Boyer Moore replace more than one pattern - c

I am working on a string search and replace project. I can only change 1 of the target pattern in the sentence. But I can find both.
Example: just do it. you will do it.
find: do
replace: think
expected---> just think it. you will think it.
what actually happened ---> just do it. you will think it.
How can I replace both of them?
I read the sentence from file input.txt
# include <limits.h>
# include <string.h>
# include <stdio.h>
#include <sys/time.h>
# define NO_OF_CHARS 256
# define MAX 10000
int sum = 0;
int control = 0;
// A utility function to get maximum of two integers
int max (int a, int b) { return (a > b)? a: b; }
// The preprocessing function for Boyer Moore's bad character heuristic
void badCharHeuristic( char *str, int size, int badchar[NO_OF_CHARS]) {
int i;
// Initialize all occurrences as -1
for (i = 0; i < NO_OF_CHARS; i++)
badchar[i] = -1;
// Fill the actual value of last occurrence of a character
for (i = 0; i < size; i++)
badchar[(int) str[i]] = i;
}
/* A pattern searching function that uses Bad Character Heuristic of Boyer Moore Algorithm */
void search( char *txt, char *pat,char temp3[MAX],int k,char*r) {
int m = strlen(pat);
int n = strlen(txt);
char src[MAX],p[MAX],temp[MAX],temp2[MAX],tempP[MAX],out[MAX];
int badchar[NO_OF_CHARS],i,leng,l,count;
char v;
/* Fill the bad character array by calling the preprocessing function badCharHeuristic() for given pattern */
badCharHeuristic(pat, m, badchar);
leng = strlen(pat);
strcpy(tempP,r);
//strcat(tempP,"</mark>");
leng = strlen(pat);
l = strlen(txt);
int s = 0; // s is shift of the pattern with respect to text
while(s <= (n - m)) {
int j = m-1;
/* Keep reducing index j of pattern while characters of pattern and text are matching at this shift s */
while(j >= 0 && pat[j] == txt[s+j]) {
count++;
j--;
}
/* If the pattern is present at current shift, then index j will become -1 after the above loop */
if (j < 0) {
//printf("pattern occurs at shift = %d\n", s);
/* Shift the pattern so that the next character in text
aligns with the last occurrence of it in pattern.
The condition s+m < n is necessary for the case when
pattern occurs at the end of text */
printf("The desired pattern was found starting from %d. line at position %d\n",k,s+1);
strncpy(temp, txt, s);
temp[s] = '\0';
//strcat(temp,"<mark>");
control++;
strcat(temp,tempP);
for(i=0;i<MAX;i++) {
if((s+leng+i)<strlen(txt))
temp2[i] = txt[s+leng+i];
else
temp2[i] = v;
}
strcat(temp,temp2);
strcpy(temp3,temp);
s += (s+m < n)? m-badchar[txt[s+m]] : 1;
}
else
/* Shift the pattern so that the bad character in text
aligns with the last occurrence of it in pattern. The
max function is used to make sure that we get a positive
shift. We may get a negative shift if the last occurrence
of bad character in pattern is on the right side of the
current character. */
s += max(1, j - badchar[txt[s+j]]);
}
sum +=count;
}
/* Driver program to test above funtion */
int main() {
char txt[MAX],p[MAX],r[MAX],temp[MAX],temp2[MAX],tempP[MAX],out[MAX];
int k = 1;
FILE *input = fopen("input.txt","r");
FILE *output = fopen("output.txt","w");
printf("Enter the text in which pattern is to be searched:");
fgets(p, MAX, stdin);
printf("Enter the text in which pattern is to be replaced:");
fgets(r, MAX, stdin);
struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
p[strlen(p)-1]='\0';
temp[1]='a';
while(!feof(input)){
if(fgets (txt, MAX, input)!=NULL) {
txt[strlen(txt)-1] = '\0';
search(txt, p,temp,k,r);
if(temp[1]!='a') {
fprintf(output,"%s\n",temp);
temp[1]='a';
}
else
fprintf(output,"%s\n",txt);
}
k++;
}
if(control==0) {
printf("\nThe pattern was not found in the given text\n\n");
}
gettimeofday(&tv2, NULL);
printf ("Total time = %f seconds\n", (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + (double) (tv2.tv_sec - tv1.tv_sec));
fclose(input);
fclose(output);
printf("The number of character comparison: %d\n",sum);
return 0;
}

Related

Searching a string by binary search

#include <stdio.h>
#include <string.h>
typedef struct {
char name[11];
int score;
} report;
int main() {
int n = 3;
report student[n];
for (int i = 0; i < 3; i++) {
scanf("%[^\#]#%d", student[i].name, &student[i].score);
}
// Input name that we search.
char search[11];
scanf("%s", search);
// bubble sort
for (int a = 0; a < n - 1; a++) {
for (int b = 0; b < n - 1 - a; b++) {
if (student[b].score < student[b+1].score) {
report temp;
strcpy(temp.name, student[b].name);
temp.score = student[b].score;
strcpy(student[b].name, student[b+1].name);
student[b].score = student[b+1].score;
strcpy(student[b+1].name, temp.name);
student[b+1].score = temp.score;
}
}
}
// binary search
int left = 0;
int right = n - 1;
int middleIndex;
int rank;
while (left <= right ) {
middleIndex = (int)(left + right) / 2;
if (strcmp(student[middleIndex].name, search) == 0) {
rank = middleIndex+1;
break;
} else if (strcmp(student[middleIndex].name, search) > 0) {
left = middleIndex + 1;
} else if (strcmp(student[middleIndex].name,search) < 0) {
right = middleIndex - 1;
}
}
// Rank of the student's name that we search.
printf("%d", rank);
return 0;
}
I want to create a program that will return a student ranking (from 3 students). The fourth line is the name that we searched. I put all the user input into a struct and sort it in descending order to represent students ranking. But the problem is, when it reach the binary search, it always return unexpected value. Could you guys help me solve the problem?
Sample Input :
Jojo#40
Ray#60
Liz#80
Jojo -> name that we searched.
""" [ {Liz, 80}, {Ray, 60}, {Jojo,40} ] """
Output : 3
At least one problem is that your names (except the first) will have a newline as the first character. That newline was left in the input stream when scanning the score.
Consequently your string compare doesn't work.
Add this
for (int a = 0; a < 3; a++) printf("|%s|\n", student[a].name);
printf("|%s|\n", search);
just after scan of search and you get the output:
|Jojo|
|
Ray|
|
Liz|
|Jojo|
As you can see there are "unexpected" newlines in front of both "Ray" and "Liz"
To solve that add a space here
scanf(" %[^\#]#%d"
^
space
As noted by #EricPostpischil in a comment, sorting by score and doing binary search by name makes no sense. The sort and the search must be based on the same.
BTW: When sorting arrays use qsort

How to return a matrix of all occurrences of given word/pattern?

I am trying to implement a function that's called Sniffer which gets two inputs and returns the correspond matrix.
First input is an integer binary array values (just zeros or ones)
second input is your searched word ( the word you choose it as argument to your function )
The functionally of the function :
Searching for occurrence of the given word within your given binary array.
At each occurrence of your word there's always follows 8 bits following it, assume that always the input is correct (it means that there's no possibility two occurrence occur one after the other without at least there's space 8bits (8 binary values)!).
Then the function must return a matrix(integer matrix) of those followed 8bit for each occurrence of the word sorted corresponded by every row of the matrix. (the functions returns just the first 8bit followed each occurrence of the Searched word)
This means:
First row has first 8 followed bit on first occurrence of the word.
Second row has first 8 followed bit on second occurrence of the word.
Third row has first 8 followed bit on third occurrence of the word.
Fourth row has first 8 followed bit on fourth occurrence of the word.
etc ...
I will elaborate by examples:
function structure is
Code:
int ** SnifferPattern(int* givenArray , int* SearchedWord);
//it returns the corresponded matrix so used int** ;
example 1:
givenArray = {1,0,1,0,1,1,1,1,1,1,1,1,0,0};
SearchedWord = {1,0,1,0};
so the function returns a matrix(size 1x8 - 8 corresponds to 8followed bit)
the first row is {1,1,1,1,1,1,1,1}, which is the first 8 bit followed
the word 1010 the matrix here with one row because there's just
one occurrence of the `SearchedWord` in the given array.
example 2:
givenArray = {1,1,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,0,1,0,1,0,1,0};
SearchedWord = {1,1,0,0}
so the function returns a matrix the first row is {1,1,1,1,1,1,1,1}
which is the first 8 bit followed the word 1010 for the first occurrence.
for the second occurrence we see the word appear , so the second row of
the returned matrix(size 2x8) will include the first 8bit followed the
word 1010 of the second occurrence. so second row of the matrix
is {1,0,1,0,1,0,1,0} so the returned matrix (size 2x8) has two rows
(because there's two occurrences of the SearchedWord) which each row
corresponds to each occurrence of the SearchedWord.
example 3:
givenArray = {1,1,1,0,1,1,1,1,1,1,1,1,0,0};
SearchedWord = {0,1,0}
so the function returns a matrix zero row (like an array with zero values)
size 1x8 (8 columns is corresponded to 8followed bit). There's no
occurrence of the SearchedWord within the givenArray so we return a zero
matrix. There's no overlap between the occurrences of the searchedWords ,
so we assume always correctness of input.
I will explain my algorithm (a pleasure if there's another suggestions more compatible to my case)
My algorithm is searching for the word at every occurrence and then take at every occurrence the first followed 8bit. I take them and store them in a matrix's rows. But it sounds much hard to complete with this.
what I succeeded / tried to implement in C is this:
int ** SnifferPattern(int* s ; int* w)
{
// s is the given array, w is the searched Word , number occurrences
// here 2 , so it should be two prints on the output of the program.
int n;
int a[1000];
int i=0;
int j;
int k = 0;
int l;
int found = 0;
int t = 0;
a[k++] = i;
j = 0;
for (i = 0; i < k; i++)
{
n = a[i] - j;
if (n == (sizeof(w)/sizeof(w[0])))
{
t = 0;
for (l = 0; w[l]; l++)
{
if (s[l + j] == w[l])
{
t++; // Matched a character.
}
}
if (t == (sizeof(w)/sizeof(w[0])))
{
found++; // We found a match!
printf("word occurred at location=%d \n", j); // Pint location
}
}
j = a[i] + 1;
}
}
int main() {
int s[1000] = {1,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1};
int w[1000] = {1,0,1};
// s is the given array, w is the searched Word , number occurrences
// here 2 , so it should be two prints on the output of the program.
SnifferPattern(s , w)
//I should print all the matrix's row in the main function .
return 0;
}
I think I have figured out what you need. And, as stated in your question ("at each occurrence of your word there's always follows 8 bits"), the following requires that a least 8-integers follow any match of w in s. Since you will include 8-integers in each row of the matrix you return, then using a pointer-to-array-of int[8] allows a single free() of the result in the caller.
Your sniffer function will loop over each integer in s keeping a counter index (ndx) of each time an integer in w matches the integer in s. When ndx equals the number of elements in w a match has been found and the next 8 integers are are collected as the columns in that row of your matrix using next8 as the index. You could write your sniffer function as:
#define AFTER 8
/* function returning allocated pointer to array of 8 int, *n of them */
int (*sniffer (int *s, int selem, int *w, int welem, int *n))[AFTER]
{
int ndx = 0, /* match index */
next8 = AFTER, /* counter for next 8 after match found */
found = 0, /* number of tiems w found in s */
(*matrix)[AFTER] = NULL;
for (int i = 0; i < selem; i++) { /* loop over each int in s */
if (next8 < AFTER) { /* count if next8 less than AFTER */
matrix[found-1][next8++] = s[i]; /* set next8 index in matrix row */
}
else if (s[i] == w[ndx]) { /* if s[i] matches w[ndx] */
if (++ndx == welem) { /* increment ndx, compare w/welem */
/* allocate storage for next row in matrix */
if (!(matrix = realloc (matrix, (found + 1) * sizeof *matrix))) {
perror ("realloc-matrix"); /* on failure, handle error */
*n = 0;
return NULL;
}
/* (optional) set all elements in row 0 */
memset (matrix[found], 0, sizeof *matrix);
found += 1; /* increment found count */
ndx = next8 = 0; /* reset ndx, set next8 to count */
}
}
else /* otherwise, not a match and not counting */
ndx = 0; /* set match index to 0 */
}
*n = found; /* update value at n to found, so n available in main */
return matrix; /* return allocated matrix */
}
(note: the number of elements in s is provided in selem and the number of elements in w is provided by welem)
Changing your s[] in main to int s[] = {1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0} so it is easy to verify the results, you could write you program as:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ARSZ 1000 /* if you need a constant, #define one (or more) */
#define AFTER 8
/* function returning allocated pointer to array of 8 int, *n of them */
int (*sniffer (int *s, int selem, int *w, int welem, int *n))[AFTER]
{
int ndx = 0, /* match index */
next8 = AFTER, /* counter for next 8 after match found */
found = 0, /* number of tiems w found in s */
(*matrix)[AFTER] = NULL;
for (int i = 0; i < selem; i++) { /* loop over each int in s */
if (next8 < AFTER) { /* count if next8 less than AFTER */
matrix[found-1][next8++] = s[i]; /* set next8 index in matrix row */
}
else if (s[i] == w[ndx]) { /* if s[i] matches w[ndx] */
if (++ndx == welem) { /* increment ndx, compare w/welem */
/* allocate storage for next row in matrix */
if (!(matrix = realloc (matrix, (found + 1) * sizeof *matrix))) {
perror ("realloc-matrix"); /* on failure, handle error */
*n = 0;
return NULL;
}
/* (optional) set all elements in row 0 */
memset (matrix[found], 0, sizeof *matrix);
found += 1; /* increment found count */
ndx = next8 = 0; /* reset ndx, set next8 to count */
}
}
else /* otherwise, not a match and not counting */
ndx = 0; /* set match index to 0 */
}
*n = found; /* update value at n to found, so n available in main */
return matrix; /* return allocated matrix */
}
int main (void) {
int s[] = {1,0,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0},
w[] = {1,0,1},
n = 0,
(*result)[AFTER] = NULL;
result = sniffer (s, sizeof s/sizeof *s, w, sizeof w/sizeof *w, &n);
for (int i = 0; i < n; i++) { /* loop over result matrix */
printf ("matrix[%d] = {", i); /* output prefix */
for (int j = 0; j < AFTER; j++) /* loop over column values */
printf (j ? ",%d" : "%d", result[i][j]); /* output column value */
puts ("}"); /* output suffix and \n */
}
free (result); /* free allocated memory */
}
Example Use/Output
$ ./bin/pattern_in_s
matrix[0] = {0,0,0,0,1,1,1,1}
matrix[1] = {1,1,1,1,0,0,0,0}
If I have misunderstood your question, please let me know in a comment below and I'm happy to help further. Let me know if you have any questions.
There are several issues you must solve.
How are arrays represented?
You have arrays of integers, whose valid values can be 0 or 1. How do you datermine the length of sich an array. There are basically two possibilities:
Use a sentinel value. That's how C strings are stored: The actual string is terminated by the special value '\0', the null terminator. The memory used to store the string may be larger. In your case, you could use a value that isn't used:
enum {
END = -1
};
int a[] = {0, 1, 0, 1, END};
The disadvantage here is that you must be careful not to forget the explicit terminator. Also, if you want to find out the length of an array, you must walk it to the end. (But that's not an issue with small arrays.)
Use an explicit length that goes along with the array.
int a[] = {1, 0, 1, 0};
int alen = sizeof(a) / sizeof(*a);
The disadvantage here is that you must pass the length to any function that operates on the array. The function f(a) does not know kow long a is; the function should be something like f(a, alen). (The sizeof(a) / sizeof(*a) mechanism works only when the array a is in scope. See here for a discussion on how to find the length of an array.)
You could, of course, define a struct that combines data and length.
How do you return an array from a function?
That's your actual question. Again there are several possibilities:
Return the array. That usually means to allocate the array you want to return on the heap with malloc, which means that the caller must call free on the result at some time. The caller must know how big the returned array is. You can use sentinels as described above or you could pass in a pointer to a sive variable, which the function fills in:
int **f(..., int *length) { ... }
Call this function like this:
int length;
int **p = f(..., &length);
for (int i = 0; i < length; i++) ...
Pass in an array and hve the function fill it. That means that the function must know about the size of the array. The return value can be used to return the actual size of the array:
int f(..., int **res, int max) { ... }
Call this function like this:
int *p[20];
int length = f(..., p, 20);
for (int i = 0; i < length; i++) ...
Let's apply this to your problem.
You want to match a pattern in a string and then return a list of all 8-bit sequences after the matches. Let's represent an array as array + length. Your function might then look like this:
int sniffer(const int *s, int slen, // haystack array + length
const int *w, int wlen, // needle array + length
const int **res, int reslen) // results array
{ ... }
It passes in the arrays s and w plus their lengths. It also passes in a third array plus its length. That array hold the results. The number of valid results – the number of rows in your matrix – is the returned value.
Call this function:
int s[] = {...};
int w[] = {...};
const int *res[8];
int n = sniffer(s, sizeof(s) / sizeof(*s),
w, sizeof(w) / sizeof(*w),
res, sizeof(res) / sizeof(*res));
What happens if there are more than reslen matches? The excess matches cannot be written, of course, but do they contribute to the return value? If they do, you could pass in an array length of 0 just to see how many matches there are. (That'
s what the string function snprintf does, in a way.) If they don't you get the exact length of the result array. Both strategies are valid.
Here's an example implementation. It uses your test case #2:
#include <stdlib.h>
#include <stdio.h>
/*
* test whether the next len elements of s and w match
*/
int match(const int *s, const int *w, int len)
{
while (len-- > 0) {
if (*s++ != *w++) return 0;
}
return 1;
}
int sniffer(const int *s, int slen, // haystack array + length
const int *w, int wlen, // needle array + length
const int **res, int reslen) // results array
{
int n = 0;
for (int i = 0; i <= slen - wlen - 8; i++) {
const int *p = s + i;
if (match(p, w, wlen)) {
if (n < reslen) res[n] = p + wlen;
n++;
}
}
return n;
}
int main(void)
{
int s[] = {1, 1, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 0, 0,
1, 0, 1, 0, 1, 0, 1, 0};
int w[] = {1, 1, 0, 0};
const int *res[8];
int reslen = sizeof(res) / sizeof(*res);
int n = sniffer(s, sizeof(s) / sizeof(*s),
w, sizeof(w) / sizeof(*w),
res, reslen);
printf("%d patterns:\n", n);
for (int i = 0; i < n && i < reslen; i++) {
printf("[%d] {", i);
for (int j = 0; j < 8; j++) {
if (j) printf(", ");
printf("%d", res[i][j]);
}
printf("}\n");
}
return 0;
}

How to rearrange array using spaces?

I'm struggling with rearranging my array. I have used from single to multiple loops trying to put spaces (white characters) between two pairs of characters, but I was constantly rewriting the original input. So there is always an input of even length, for example ABCDEFGH. And my task would be to extend the size of the array by putting spaces after every 2 chars (except the last one).
So the output would be:
AB CD EF GH
So the size of output (if I'm correct) will be (2*input_len)-1
Thanks.
EDIT:
This is my code so far
// output = "ABCDEFGHIJKL
char c1;
char c2;
char c3;
int o_len = strlen(output);
for(int i = 2; i < o_len + olen/2; i = i + 3){
if(i == 2){
c1 = output[i];
c2 = output[i+1];
c3 = output[i+2];
output[i] = ' ';
output[i+1] = c1;
output[i+2] = c2;
}
else{
c1 = output[i];
c2 = output[i+1];
output[i] = ' ';
output[i+1] = c3;
output[i+2] = c1;
c3 = c2;
}
}
So the first 3 pairs are printed correctly, then it is all a mess.
Presuming you need to store the space separate result, probably the easiest way to go about inserting the spaces is simply to use a pair of pointers (one to your input string and one to your output string) and then just loop continually writing a pair to your output string, increment both pointers by 2, check whether you are out of characters in your input string (if so break; and nul-terminate your output string), otherwise write a space to your output string and repeat.
You can do it fairly simply using memcpy (or you can just copy 2-chars to the current pointer and pointer + 1, your choice, but since you already include string.h for strlen() -- make it easy on yourself) You can do something similar to:
#include <stdio.h>
#include <string.h>
#define ARRSZ 128 /* constant for no. of chars in output string */
int main (int argc, char **argv) {
char *instr = argc > 1 ? argv[1] : "ABCDEFGH", /* in string */
outstr[ARRSZ] = "", /* out string */
*ip = instr, *op = outstr; /* pointers to each */
size_t len = strlen (instr); /* len of instr */
if (len < 4) { /* validate at least 2-pairs worth of input provided */
fputs ("error: less than two-pairs to separate.\n", stderr);
return 1;
}
if (len & 1) { /* validate even number of characters */
fputs ("error: odd number of characters in instr.\n", stderr);
return 1;
}
if (ARRSZ < len + len / 2) { /* validate sufficient storage in outstr */
fputs ("error: insufficient storage in outstr.\n", stderr);
return 1;
}
for (;;) { /* loop continually */
memcpy (op, ip, 2); /* copy pair to op */
ip += 2; /* increment ip by 2 for next pair */
op += 2; /* increment op by 2 for next pair */
if (!*ip) /* check if last pair written */
break;
*op++ = ' '; /* write space between pairs in op */
}
*op = 0; /* nul-terminate outstr */
printf ("instr : %s\noutstr : %s\n", instr, outstr);
}
Example Use/Output
$ ./bin/strspaceseppairs
instr : ABCDEFGH
outstr : AB CD EF GH
$ ./bin/strspaceseppairs ABCDEFGHIJLMNOPQ
instr : ABCDEFGHIJLMNOPQ
outstr : AB CD EF GH IJ LM NO PQ
Odd number of chars:
$ ./bin/strspaceseppairs ABCDEFGHIJLMNOP
error: odd number of characters in instr.
Or short string:
$ ./bin/strspaceseppairs AB
error: less than two-pairs to separate.
Look things over and let me know if you have further questions.
Edit To Simply Output Single-Pair or Empty-String
Based upon the comment by #chqrlie it may make more sense rather than issuing a diagnostic for a short string, just to output it unchanged. Up to you. You can modify the first conditional and move it after the odd character check in that case, e.g.
if (len & 1) { /* validate even number of characters */
fputs ("error: odd number of characters in instr.\n", stderr);
return 1;
}
if (len < 4) { /* validate at least 2-pairs worth of input provided */
puts(instr); /* (otherwise output unchanged and exit) */
return 0;
}
You can decide how you want to handle any aspect of your program and make the changes accordingly.
I think you are looking for a piece of code like the one below:
This function returns the output splitted array, as you requested to save it.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
char* split_by_space(char* str, size_t length, size_t step) {
size_t i = 0, j = 0, spaces = (length / step);
char* splitted = malloc(length + spaces + 1);
for (i = 0, j = 0; i < length; ++i, ++j) {
if (i % step == 0 && i != 0) {
splitted[j] = ' ';
++j;
}
splitted[j] = str[i];
}
splitted[j] = '\0';
return splitted;
}
int main(void) {
// Use size_t instead of int.
size_t step = 2; // Also works with odd numbers.
char str[] = "ABCDEFGH";
char* new_str;
// Works with odd and even steps.
new_str = split_by_space(str, strlen(str), step);
printf("New splitted string is [%s]", new_str);
// Don't forget to clean the memory that the function allocated.
free(new_str);
return 0;
}
When run with a step value of 2, the above code, outputs:
New splitted string is [AB CD EF GH]
Inserting characters inside the array is cumbersome and cannot be done unless you know the array is large enough to accommodate the new string.
You probably want to allocate a new array and create the modified string there.
The length of the new string is not (2 * input_len) - 1, you insert a space every 2 characters, except the last 2: if the string has 2 or fewer characters, its length is unmodified, otherwise it increases by (input_len - 2) / 2. And in case the length is off, you should round this value to the next integer, which is done in integer arithmetics this way: (input_len - 2 + 1) / 2.
Here is an example:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char *reformat_with_spaces(const char *str) {
size_t len = strlen(str);
size_t newlen = len > 2 ? len + (len - 2 + 1) / 2 : len;
char *out = malloc(newlen + 1);
if (out) {
for (size_t i = 0, j = 0; i < len; i++) {
if (i > 0 && i % 2 == 0) {
out[j++] = ' ';
}
out[j++] = str[i];
}
out[j] = '\0';
}
return out;
}
int main(void) {
char buf[256];
char *p;
while (fgets(buf, sizeof buf, stdin)) {
buf[strcspn(buf, "\n")] = '\0'; // strip the newline if any
p = reformat_with_spaces(buf);
if (p == NULL) {
fprintf(stderr, "out of memory\n");
return 1;
}
puts(p);
free(p);
}
return 0;
}
Try this,
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
void rearrange(char *str)
{
int len=strlen(str),n=0,i;
char *word=malloc((len+(int)(len/2)));
if(word==NULL)
{
printf("Memory Error");
exit(1);
}
for(i=0;i<len;i++)
{
if( i % 2 == 0 && i != 0)
{
word[n]=' ';
n++;
word[n]=str[i];
n++;
}
else
{
word[n]=str[i];
n++;
}
}
word[n]='\0';
strcpy(str,word);
free(word);
return;
}
int main()
{
char word[40];
printf("Enter word:");
scanf("%s",word);
rearrange(word);
printf("\n%s",word);
return 0;
}
See Below:
The rearrange function saves the letters in str into word. if the current position is divisible by 2 i.e i%2 it saves one space and letter into str, otherwise it saves letter only.

How to check first letter of one string with last letter of another string inside of same char array

How can I complete the function canArrangeWords() ?
Question : Given a set of words check if we can arrange them in a list such that the last letter of any word and first letter of another word are same. The input function canArrangeWords shall contain an integer num and array of words arr. num denotes the number of word in the list (1<=num<=100). arr shall contain words consisting of lower case letters between 'a' - 'z' only . return 1 if words can be arranged in that fashion and -1 if cannot.
Input : 4 pot ten nice eye
output : 1
input : 3 fox owl pond
output: -1
Please help me complete this program .
**
#include<stdio.h>
#include<string.h>
int canArrangewords(int,char [100][100]);
void main(){
int n ,count=0 , i ;
char arrayS[100][100];
scanf("%d",&n);
for (i = 0; i < n; ++i)
{
scanf("%s",arrayS[i]);
}
for(i=0;i<n;i++)
{
printf("%s",arrayS[i]);
printf("\n");
}
printf("%c\n",arrayS[2][4]);
canArrangewords(n , arrayS);
}
int canArrangewords(int n,char arrayS[100][100]){
int i , j ;
for ( i = 0; i < n; i++)
{
for ( j = i+1 ; j < strlen(arrayS[j+1]); i++)
{
int flag = strlen(arrayS[j+1]) - 1;
int temp = strcmp(arrayS[i][0],arrayS[j][flag]);
}
}
}
}
Well, first of all think of the way you can reach that answer.
If you only need to know if they can or can not be arranged and you do not have to do so your self you can use an empty array of int array[26] for each letter a-z.
The rule is that from all the first and last letters for all the words only two MAY appear an odd amount of times - the first letter of first word in list and the last letter in the last word in the list, the rest MUST appear an even amount of times. I would add a check to make sure the letters are lowercase as well. good luck!
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MINASCII 97
#define LETTERS 26
void UpdateArray(char letter, int* arr)
{
if(arr[letter - MINASCII] == 0)
{
++arr[letter - MINASCII];
}
else
{
--arr[letter - MINASCII];/*for each second time same letter is seen reduce back to zero */
}
}
int canArrangewords(int wordNum, char* wordArr[])
{
int arr[LETTERS] = {0};
int i = 0;
int count = 0 ;
char first;
char last;
char* string;
for (i= 0; i< wordNum; ++i)
{
string = wordArr[i];
first = string[0];
last = string[strlen(string)-1];
UpdateArray(first, &arr[0]);
UpdateArray(last, &arr[0]);
}
for(i = 0; i< LETTERS; ++i)
{
count+=arr[i];
}
if(count == 2 || count == 0)/*either once each or twice -see word1 example in main*/
{
return 1;
}
return -1;
}
int main()
{
int i = 0;
char* words[] = {"pot", "ten", "nice", "eye"};
char* words1[] = {"pot", "ten", "nip"};
char* words2[] = {"fox", "owl", "pond"};
i = canArrangewords(4,words);
printf("%d\n", i);
i = canArrangewords(3,words1);
printf("%d\n", i);
i = canArrangewords(3,words2);
printf("%d\n", i);
return 0;
}
Change your array of words into an array of pointers to words. Then you can simply exchange the pointers.
To speed things up, instead of a pointer to a word, have it point to a structure:
struct WORD {
char *firstchar; // begin of word
char *lastchar; // last char of word
} *words[100]; // array of 100 pointers to words
To read the words:
char buf[100];
for (i = 0; i < n; ++i)
{
scanf("%s",buf);
int len= strlen(buf);
words[i]= malloc(sizeof(struct WORDS));
words[i]->firstchar= malloc(len+1);
strcpy(words[i]->firstchar, buf);
words[i]->lastchar= words[i]->firstchar + len-1;
}
Now compare and sort:
if (*words[i]->lastchar == *words[j]->firstchar) {
struct WORDS *tmp= words[i+1];
words[i+1]= words[j];
words[j]= tmp;
}
Do this in a loop, a kind of bubble sort. I leave that to you.

fast string search to find string array element which matches the givven pattern

I have an array of constant strings which I iterate through to find an index of element which is a string that contains a search pattern. Which search algorithm should I choose to improve the speed of finding this element? I am not limited in time before running the application for preparing the look up tables if any are necessary.
I corrected a question - I am not doing exact string match - I am searching for pattern inside the element, which is in an array
array of strings:
[0] Red fox jumps over the fence
[1] Blue table
[2] Red flowers on the fence
I need to find an element which contains word 'table' - in this case its element 1
I do like 50000 iterations of a set of 30 array which could contain up to 30000 strings of not less than 128 characters. Now I am using good-old strstr brute force which is too slow...
Ok, posting a part of my function, the first strstr - looks up in an uncut array of lines if there are any occurrences, then the brute search follows. I know I can speed this part, but I am not doing optimization on this approach...
// sheets[i].buffer - contains a page of a text which is split into lines
// fullfunccall - is the pattern
// sheets[i].cells[k] - are the pointers to lines in a buffer
for( i=0; i<sheetcount; i++) {
if( i!= currsheet && sheets[i].name && sheets[i].name[0] != '\0') {
if( strstr(sheets[i].buffer, fullfunccall )) {
usedexternally = 1;
int foundatleastone = 0;
for( k=0; k<sheets[i].numcells; k++ ) {
strncpy_s(testline, MAX_LINE_SIZE, sheets[i].cells[k]->line, sheets[i].cells[k]->linesize);
testline[sheets[i].cells[k]->linesize] = '\0';
if( strstr(testline, fullfunccall )) {
dependency_num++;
if( dependency_num >= MAX_CELL_DEPENDENCIES-1) {
printf("allocation for sheet cell dependencies is insuficcient\n");
return;
}
sheets[currsheet].cells[currcellposinsheet]->numdeps = dependency_num+1;
foundatleastone++;
sheets[currsheet].cells[currcellposinsheet]->deps[dependency_num] = &sheets[i].cells[k];
}
}
if( foundatleastone == 0 ) {
printf("error locating dependency for external func: %s\n", fullfunccall );
return;
}
}
};
}
You wrote that your 'haystack' (the set of strings to search through) is roughly 30000 strings with approx. 200 characters each. You also wrote that the 'needle' (the term to search for) is either a string of 5 or 20 characters.
Based on this, you could precompute a hashtable which maps any 5-character subsequence to the string(s) in the haystack it occurs in. For 30000 strings (200 characters each) there are at most 30000 * (200 - 5) = 5.850.000 different 5-character substrings. If you hash each of it to a 16bit checksum you'd need a minimum 11MB of memory (for the hash keys) plus some pointers pointing to the string(s) in which the substring occurs.
For instance, given a simplfied haystack of
static const char *haystack[] = { "12345", "123456", "23456", "345678" };
you precompute a hash map which maps any possible 5-character string such that
12345 => haystack[0], haystack[1]
23456 => haystack[1], haystack[2]
34567 => haystack[3]
45678 => haystack[4]
With this, you could take the first five characters of your given key (either 5 or 20 characters long), hash it and then do a normal strstr through all the strings to which the key is mapped by the hash.
For each sheet that you are treating, you could build a suffix array as described in this article. Before you start your search, read the sheet, find the line beginnings (as integer indices into the sheet buffer), create the suffix array and sort it as described in the article.
Now, if you are looking for the lines in which a pattern, say "table", occurs, you can search for the next entry after "table" and the next entry after "tablf", which is the first non-match, where you have moved the right-most letter, odometer-style.
If both indices are the same, there are no matches. If they are different, you'll get a list of pointers into the sheet:
"tab. And now ..."
----------------------------------------------------------------
"table and ..." 0x0100ab30
"table water for ..." 0x0100132b
"tablet computer ..." 0x01000208
----------------------------------------------------------------
"tabloid reporter ..."
This will give you a list of pointers from which, by subtracting the base pointer of the sheet buffer, you can get the integer offsets. Comparison with the line beginnings will give you the line numbers that correspond to these pointers. (The line numbers are sorted, so you can do binary search here.)
The memory overhead is an array of pointers that has the same size as the sheet buffer, so for 30,000 strings of 200 chars, that will be about 48MB on a 64-bit machine. (The overhead of the line indices is negligible.)
Sorting the array will take long, but it is done only once for each sheet.
Edit: The idea seems to work well. I have implemented it and can scan a dictionary of about 130,000 words on a text file of nearly 600k in less then one second:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define die(...) exit((fprintf(stderr, "Fatal: " __VA_ARGS__), \
putc(10, stderr), 1))
typedef struct Sheet Sheet;
struct Sheet {
size_t size; /* Number of chars */
char *buf; /* Null-terminated char buffer */
char **ptr; /* Pointers into char buffer */
size_t nline; /* number of lines */
int *line; /* array of offset of line beginnings */
size_t naux; /* size of scratch array */
char **aux; /* scratch array */
};
/*
* Count occurrence of c in zero-terminated string p.
*/
size_t strcount(const char *p, int c)
{
size_t n = 0;
for (;;) {
p = strchr(p, c);
if (p == NULL) return n;
p++;
n++;
}
return 0;
}
/*
* String comparison via pointers to strings.
*/
int pstrcmp(const void *a, const void *b)
{
const char *const *aa = a;
const char *const *bb = b;
return strcmp(*aa, *bb);
}
/*
* Pointer comparison.
*/
int ptrcmp(const void *a, const void *b)
{
const char *const *aa = a;
const char *const *bb = b;
if (*aa == *bb) return 0;
return (*aa < *bb) ? -1 : 1;
}
/*
* Create and prepare a sheet, i.e. a text file to search.
*/
Sheet *sheet_new(const char *fn)
{
Sheet *sheet;
FILE *f = fopen(fn, "r");
size_t n;
int last;
char *p;
char **pp;
if (f == NULL) die("Couldn't open %s", fn);
sheet = malloc(sizeof(*sheet));
if (sheet == NULL) die("Allocation failed");
fseek(f, 0, SEEK_END);
sheet->size = ftell(f);
fseek(f, 0, SEEK_SET);
sheet->buf = malloc(sheet->size + 1);
sheet->ptr = malloc(sheet->size * sizeof(*sheet->ptr));
if (sheet->buf == NULL) die("Allocation failed");
if (sheet->ptr == NULL) die("Allocation failed");
fread(sheet->buf, 1, sheet->size, f);
sheet->buf[sheet->size] = '\0';
fclose(f);
sheet->nline = strcount(sheet->buf, '\n');
sheet->line = malloc(sheet->nline * sizeof(*sheet->line));
sheet->aux = NULL;
sheet->naux = 0;
n = 0;
last = 0;
p = sheet->buf;
pp = sheet->ptr;
while (*p) {
*pp++ = p;
if (*p == '\n') {
sheet->line[n++] = last;
last = p - sheet->buf + 1;
}
p++;
}
qsort(sheet->ptr, sheet->size, sizeof(*sheet->ptr), pstrcmp);
return sheet;
}
/*
* Clean up sheet.
*/
void sheet_delete(Sheet *sheet)
{
free(sheet->buf);
free(sheet->ptr);
free(sheet->line);
free(sheet->aux);
free(sheet);
}
/*
* Binary range search for string pointers.
*/
static char **pstr_bsearch(const char *key,
char **arr, size_t high)
{
size_t low = 0;
while (low < high) {
size_t mid = (low + high) / 2;
int diff = strcmp(key, arr[mid]);
if (diff < 0) high = mid;
else low = mid + 1;
}
return arr + low;
}
/*
* Binary range search for line offsets.
*/
static const int *int_bsearch(int key, const int *arr, size_t high)
{
size_t low = 0;
while (low < high) {
size_t mid = (low + high) / 2;
int diff = key - arr[mid];
if (diff < 0) high = mid;
else low = mid + 1;
}
if (low < 1) return NULL;
return arr + low - 1;
}
/*
* Find occurrences of the string key in the sheet. Returns the
* number of lines in which the key occurs and assigns up to
* max lines to the line array. (If max is 0, line may be NULL.)
*/
int sheet_find(Sheet *sheet, char *key,
int line[], int max)
{
char **begin, **end;
int n = 0;
size_t i, m;
size_t last;
begin = pstr_bsearch(key, sheet->ptr, sheet->size);
if (begin == NULL) return 0;
key[strlen(key) - 1]++;
end = pstr_bsearch(key, sheet->ptr, sheet->size);
key[strlen(key) - 1]--;
if (end == NULL) return 0;
if (end == begin) return 0;
m = end - begin;
if (m > sheet->naux) {
if (sheet->naux == 0) sheet->naux = 0x100;
while (sheet->naux < m) sheet->naux *= 2;
sheet->aux = realloc(sheet->aux, sheet->naux * sizeof(*sheet->aux));
if (sheet->aux == NULL) die("Re-allocation failed");
}
memcpy(sheet->aux, begin, m * sizeof(*begin));
qsort(sheet->aux, m, sizeof(*begin), ptrcmp);
last = 0;
for (i = 0; i < m; i++) {
int offset = sheet->aux[i] - sheet->buf;
const int *p;
p = int_bsearch(offset, sheet->line + last, sheet->nline - last);
if (p) {
if (n < max) line[n] = p - sheet->line;
last = p - sheet->line + 1;
n++;
}
}
return n;
}
/*
* Example client code
*/
int main(int argc, char **argv)
{
Sheet *sheet;
FILE *f;
if (argc != 3) die("Usage: %s patterns corpus", *argv);
sheet = sheet_new(argv[2]);
f = fopen(argv[1], "r");
if (f == NULL) die("Can't open %s.", argv[1]);
for (;;) {
char str[80];
int line[50];
int i, n;
if (fgets(str, sizeof(str), f) == NULL) break;
strtok(str, "\n");
n = sheet_find(sheet, str, line, 50);
printf("%8d %s\n", n, str);
if (n > 50) n = 50;
for (i = 0; i < n; i++) printf(" [%d] %d\n", i, line[i] + 1);
}
fclose(f);
sheet_delete(sheet);
return 0;
}
The implementation has its rough edges, but it works. I'm not especially fond of the scratch array and the additional sorting on the found pointer range, but it turns out that even sorting the large suffix array doesn't take too long.
You can extend this solution to more sheets, if you like.
I believe the most practical would be DFA as it reads every character of input at most once - more precisely it reads every input char once and stops as soon as pattern will not match definitely (if set up properly). With DFA you can also check against multiple patterns simultaneously.
Two good (but different) implementations of DFA algorithms well tested in practice are
PIRE
Ragel
It's not possible say which fits your task unless you provide more on that.
edit: DFA stays for "Deterministic Finite Automata"
edit: as you indicated your patterns are exact substrings the most common solution is KMP algorithm (Knuth-Morris-Pratt)

Resources