Program to do word/phrase replacement exceeds the time limit - c

Sorry for the possibly long and dumb question, but I'm really stumped. I'm doing a task for the university. Its meaning is very simple. You need to implement a function that will change the "bad" phrases to "good". Input to the function is a text and a double array with good and bad words (in the left column the words that need to be replaced, and on the right column the words to be inserted instead of the bad words). The dictionary itself with bad and good words can have any size, but at the end there will always be a pair of NULL - NULL.
It is important to note that the program should not do anything to change the already replaced phrases. The line "termination specialist" contains the word "specialist", so the program must check to see if there are any words in the text that have already been replaced, so that the line "termination specialist" does not change into the line "termination person with certified level of knowledge". The check happens here.
The program must also make sure that the entered dictionary of good and bad words is correct, which means that a bad word cannot be the beginning of another bad word. This check happens in the function replaceInvalidity
Text and dictionary with words do not have to be meaningful. In the context of this task, it is simply a set of symbols, i.e. letters, numbers, symbols
I wrote a program that passes most of the tests, but for some reason at one of the tests it loops and exceeds the time limit (2 seconds). As a result, I get 0 points for the whole task.
I tried checking the memory with Valgrind, but it did not show any errors.
Full code:
#ifndef __PROGTEST__
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#endif /* __PROGTEST__ */
int replaceInvalidity(const char * (*replace)[2])
{
int size = 0;
for (int i = 0; replace[i][0] != NULL; i++)
size++;
for (int i = 0; i < size - 1; i++)
{
for (int j = i + 1; j < size; j++)
{
if (strlen(replace[i][0]) >= strlen(replace[j][0]))
{
if (strstr(replace[i][0], replace[j][0]) == replace[i][0])
return 1;
}
else
{
if (strstr(replace[j][0], replace[i][0]) == replace[j][0])
return 1;
}
}
}
return 0;
}
char *newSpeak(const char *text, const char * (*replace)[2])
{
if (replaceInvalidity(replace))
{
return NULL;
}
int i = 0, k = 0, flag= 0, Nlen = 0, Olen = 0, length = 0;
char *result = (char *)malloc(sizeof(char));
length = strlen(text);
for (i = 0, k = 0; i < length; i++, k++)
{
flag = 0;
for (int j = 0; replace[j][1] != NULL; j++)
{
if (strstr(&text[i], replace[j][1]) == &text[i])
{
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Nlen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
for (int j = 0; replace[j][0] != NULL; j++)
{
if (strstr(&text[i], replace[j][0]) == &text[i])
{
Olen = strlen(replace[j][0]);
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Olen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
result = (char *)realloc(result, (k + 2) * sizeof(char));
result[k] = text[i];
}
result[k] = '\0';
return result;
}
#ifndef __PROGTEST__
int main(int argc, char * argv[])
{
char *res;
const char * d1[][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
const char * d2[][2] = {
{ "fail", "suboptimal result" },
{ "failure", "non-traditional success" },
{ NULL, NULL }
};
res = newSpeak("dumb termination specialist.", d1);
assert(!strcmp(res, "cerebrally challenged termination specialist."));
free(res);
res = newSpeak("The student answered an incorrect answer.", d1);
assert(!strcmp(res, "The client answered an alternative answer."));
free(res);
res = newSpeak("He was dumb, his failure was expected.", d1);
assert(!strcmp(res, "He was cerebrally challenged, his non-traditional success was expected."));
free(res);
res = newSpeak("The evil teacher became a murderer.", d1);
assert(!strcmp(res, "The nicenest deprived voluntary knowledge conveyor became a termination specialist."));
free(res);
res = newSpeak("Devil's advocate.", d1);
assert(!strcmp(res, "Dnicenest deprived's advocate."));
free(res);
res = newSpeak("Hello.", d2);
assert(!res);
return EXIT_SUCCESS;
}
#endif /* __PROGTEST__ */

I was not able to reproduce the issue after adding the missing includes and combining your 3 snippets. As you phrase the question as a performance issue, I reworked your code to reduce run-time from 0.476 s to 0.275 s per 1e6 calls.
Instead of calling strstr() per character of your input text for a given bad word, only call it once + number of times a given bad word is found in text. Proceed processing text after the replacement. This should make make a significant difference for large input.
Instead of using loops move data in your string use memmove() which is highly optimized.
Instead of calling realloc() for each replacement when the size of the replacement changes.
Removed replaceInvalidity() as I think you were protecting yourself of the replacement string being substring of the input to avoid an infinite loop. The implementation below avoid that by only looking at replacements after the fact.
result = realloc(result, ...) will leak memory on failure so handle the error by free'ing the original string and return NULL on error. strdup() error is handled similarly.
Problem description does not match your test case, so I revised the test case. If this is not correct please clarify expected behavior (only replace at most 1 bad word?).
#define _XOPEN_SOURCE 500
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *newSpeak(const char *text, const char *(*replace)[2]) {
char *result = strdup(text);
if(!result)
return NULL;
size_t result_len = strlen(result);
for(size_t i = 0; replace[i][0] && replace[i][0][0] && replace[i][1] && replace[i][1][0]; i++) {
size_t bad_len = strlen(replace[i][0]);
size_t good_len = strlen(replace[i][1]);
char *found = result;
for(;;) {
found = strstr(found, replace[i][0]);
if(!found)
break;
size_t offset = found - result;
if(bad_len < good_len) {
char *tmp = realloc(result, result_len + good_len - bad_len + 1);
if(!tmp) {
free(result);
return NULL;
}
result = tmp;
found = result + offset;
memmove(found + good_len, found + bad_len, result_len - offset - bad_len + 1);
} else if(bad_len > good_len) {
memmove(found + good_len, found + bad_len, result_len - offset - bad_len + 1);
char *tmp = realloc(result, result_len + good_len - bad_len + 1);
if(!tmp) {
free(result);
return NULL;
}
result = tmp;
found = result + offset;
}
result_len += good_len - bad_len;
memcpy(found, replace[i][1], good_len);
found += good_len;
}
}
return result;
}
int main(void) {
const char *d1[][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
char *res = newSpeak("dumb termination specialist.", d1);
assert(!strcmp(res, "cerebrally challenged termination person with certified level of knowledge."));
free(res);
}

Few suggestions:
There is a lot of unnecessary string traversal in replaceInvalidity() function. Functions like strlen(), strstr() etc., use them only when they are really needed, otherwise avoid them. Also, if the dictionary is supposed to be end with NULL then this is not needed:
for (int i = 0; replace[i][0] != NULL; i++) size++;
Use the check for NULL termination of dictionary directly in the for loop condition, instead of, first calculate the size and then use it.
The program must also make sure that the entered dictionary of good and bad words is correct, which means that a bad word cannot be the beginning of another bad word.
For this, you are using strstr() and it will parse the whole string to find out the substring even if the their first character does not match.
If a bad word cannot be the beginning of another bad word then simply start compare their characters from start and if any of the string reaches to end will make the dictionary invalid otherwise not.
In newSpeak() function, your program iterating the input string text character by character and for every character, first it is finding the whole dictionary good phrases as substring and if it is not found then same activity for whole dictionary bad phrases. If the input phrase is big and if there are too many number of elements in dictionary, this is going to take a lot of time in processing. You should think of something better here, may be - extract a word from input text and search for that word in dictionary and based on whole or partial or no match found in dictionary, process further.
You can do something like this ( below code is just for demonstration purpose):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#define A_B_EQUAL 0
#define A_SUBSTR 1
#define B_SUBSTR 2
#define A_B_NOTEQUAL 3
int strMatch (const char * a, const char * b) {
if (!a || !b) {
return A_B_NOTEQUAL;
}
while(*a && (*a == *b)) {
a++; b++;
}
if ((*a == '\0') && (*b == '\0')) {
return A_B_EQUAL;
} else if (*a == '\0') {
return A_SUBSTR;
} else if (*b == '\0') {
return B_SUBSTR;
}
return A_B_NOTEQUAL;
}
int replaceInvalidity (const char * (*replace)[2]) {
for (int i = 0; replace[i][0] && replace[i + 1][0]; i++) {
for (int j = i + 1; replace[j][0]; j++) {
if (strMatch (replace[j][0], replace[i][0]) != A_B_NOTEQUAL) {
fprintf (stdout, "Invalid entries found in dictionary - [%s, %s]\n", replace[j][0], replace[i][0]);
return 1;
}
}
}
return 0;
}
int findInDict (const char * (*replace)[2], const char * ps, int len) {
if (!replace || !ps || !len) {
fprintf (stderr, "(%s):Invalid argument...\n", __func__);
return -1;
}
int index = -1;
for (int i = 0; replace[i][0] && (index == -1); i++) {
if (strncmp (replace[i][0], ps, len) == 0) {
index = i;
}
if ((index != -1) && (replace[index][0][len] != '\0')) {
//dictionary entry partially matched, match rest
int res = strMatch (&ps[len], &replace[index][0][len]);
if ((res != A_B_EQUAL) && (res != B_SUBSTR)) {
index = -1;
}
}
}
return index;
}
char * newSpeak (const char * text, const char * (*replace)[2]) {
if ((!text) || (replaceInvalidity(replace))) {
fprintf (stderr, "(%s):Invalid argument...\n", __func__);
return NULL;
}
char * result = NULL;
int resultlen = 0;
while (*text) {
int ws_and_oc = 0;
int curr_text_len = 0;
const char * start = text;
const char * str = NULL;
while (isspace (*text) || !isalpha (*text)) {
ws_and_oc++; text++;
}
while (isalpha (*text)) {
curr_text_len++; text++;
}
int dict_index = findInDict (replace, start + ws_and_oc, curr_text_len);
if (dict_index >= 0) {
int len = strlen (replace [dict_index][0]);
// adjust the text pointer and curr_text_len when the dictionary bad word is a phrase and not just a word
text = (((text - start - ws_and_oc) == len) ? text : start + len + ws_and_oc);
curr_text_len = strlen (replace [dict_index][1]);
str = replace [dict_index][1];
} else {
str = start + ws_and_oc;
}
char * tmp;
result = realloc (tmp = result, resultlen + curr_text_len + ws_and_oc + 1);
if (result == NULL) {
fprintf (stderr, "(%s:%d):Failed to allocate memory...\n", __func__, __LINE__);
free (tmp);
return NULL;
}
for (int i = 0; i < ws_and_oc; ++i) {
result[resultlen++] = start[i];
}
for (int i = 0; i < curr_text_len; ++i) {
result[resultlen++] = str[i];
}
}
result[resultlen] = '\0';
return result;
}
int main (void) {
char * res;
const char * d1 [][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
res = newSpeak ("dumb termination specialist.", d1);
if (res) {
assert (!strcmp (res, "cerebrally challenged termination person with certified level of knowledge."));
free (res);
}
res = newSpeak ("The student answered an incorrect answer.", d1);
if (res) {
assert (!strcmp ( res, "The client answered an alternative answer."));
free (res);
}
res = newSpeak ("He was dumb, his failure was expected.", d1);
if (res) {
assert (!strcmp ( res, "He was cerebrally challenged, his non-traditional success was expected."));
free (res);
}
res = newSpeak ("The evil teacher became a murderer.", d1);
if (res) {
assert (!strcmp ( res, "The nicenest deprived voluntary knowledge conveyor became a termination specialist."));
free (res);
}
return 0;
}
I have skipped a couple of test cases because of lack of clarity -
The first one is:
res = newSpeak ( "Devil's advocate.", d1 );
assert ( ! strcmp ( res, "Dnicenest deprived's advocate." ) );
free ( res );
here a substring of a word of phrase, which exists in dictionary, is replaced with good phrase. What if Devil is also exists in the dictionary? What should be the behaviour in this case? Should look for best match or first match (even partial will work fine)..?
May be, once you have clarity around it, you can make the appropriate changes in the findInDict() function.
and second is:
res = newSpeak ( "Hello.", d2 );
assert ( ! res );
Why this test case expect res to be NULL? Based on the information you have provide res should be Hello. and not NULL.

Related

getting corrupted top size error in a lexer i am making when having more than 10 characters to tokenize

so i am having an issue i do not know how to fix... basically the lexer has one main function: tokenize() (idk if the spelling is even correct). Anyways, at line 50 i am calling malloc to allocate some memory for the tokens which are going to get generated, the size being:
scriptsize * sizeof(struct token). but when i try to allocate more than 10 tokens: the program crashes with malloc(): corrupted top size, can you guys help me? thanks and have a good day.
lexer.c
// low level token types: used in early parsing, and tokenizing of the text
enum llt {separator, operator, number, string, identifier, character, newline};
// string representation of low level token types
char * strllt[] = {"separator", "operator", "number", "string", "identifier", "character", "newline"};
const char separators[] = "({[ \n\t,.)};]";
const char operators[] = "+*&%^/=-";
const char numbers[] = "0123456789.";
const char whitespaces[] = "\n\t ";
struct token {int type; char * value;};
int token_equals(struct token t1, struct token t2)
{
if(t1.type == t2.type & t1.value == t2.value) {return 1;}
return 0;
}
// returns pointer to concatonation of the two selected strings
char * stradd(char * base, char * adder, int size_a, int size_b)
{
char * combined_string = (char *)malloc(size_b);
for(int i=0;i<size_a;i++) {combined_string[i] = base[i];}
for(int i=0;i<size_b;i++) {combined_string[i+size_a] = adder[i];}
return combined_string;
}
// checks if the selected character is in the selected string
int contains(const char * string, char character, int strsize)
{
for(int i=0;i<strsize;i++)
{
if(character == string[i]) {return 1;}
}
return 0;
}
int get_token_type(char character)
{
if(contains(separators, character, sizeof(separators))) {return 0;}
if(contains(operators, character, sizeof(operators))) {return 1;}
if(contains(numbers, character, sizeof(numbers))) {return 2;}
return 4; // could be a string also
}
// generates a token array out of a text, core function of the lexer
struct token * tokenize(char * text, int text_size, int * size_ptr)
{
struct token * tokens = (struct token *)malloc(sizeof(struct token) * text_size);
int tokens_current_index = 0; // next offset for a new token
char * _identifier = 0; // used to generate multi-char tokens
int id_size = 0; // the size of _identifier
int line = 0; // the current line
char * strepr = 0; // string representation of the current line
int line_size = 0; // the size of strepr
for(int i=0;i<text_size;i++)
{
// the token type of the current character
int token_type = get_token_type(text[i]);
// the token is either a separator, or an operator
if(token_type < 2)
{
// the end of an identifier has been detected
if(_identifier != 0 )
{
tokens[tokens_current_index] = (struct token){identifier, _identifier};
_identifier = 0; tokens_current_index++; id_size = 0;
}
// add the newly detected token to the token array, if it is not a whitespace
if(!contains(whitespaces, text[i], sizeof(whitespaces)))
{
tokens[tokens_current_index] = (struct token){token_type, &text[i]};
tokens_current_index++;
}
else if(text[i] == '\n')
{
// add a newline token with value strepr (used in error handling)
tokens[tokens_current_index] = (struct token){newline, strepr};
line_size = 0; strepr = 0; line++; tokens_current_index++;
}
}
if(token_type == identifier)
{
// the character is the first letter in the identifier
if(_identifier == 0)
{
// make the first letter of the identifier the current letter
_identifier = ""; id_size = 1;
_identifier = stradd("", &text[i], 0, 1);
}
// the chatacter is not the first letter in the identifier
else
{
// append the new letter to the currently generating identifier
_identifier = stradd(_identifier, &text[i], id_size, 1);
id_size++;
}
}
if(token_type == number)
{
// the number is part of an identifier: for example a10
if(_identifier != 0)
{
_identifier = stradd(_identifier, &text[i], id_size, 1);
id_size++;
}
// the number is not part of an identifier: for example 100
else
{
int dot_amount = 0; // 1.1.1 causes an error, 1.1 does not
char * str_number = ""; // the number being generated
int num_size = 0; // the lenght of the number being generated
// generate the number token, similar to an identifier generation
for(int x=i;x<text_size;x++)
{
// check if the next letter is a number, if it is not, break
int next_token_type = get_token_type(text[x]);
if(next_token_type == number || text[i] == '.')
{
str_number = stradd(str_number, &text[i], num_size, 1);
if(text[i] == '.') {dot_amount++;} // used in error check
}
else
{
// the dot amount is invalid: for example 1..2
if(dot_amount > 1)
{
printf("syntax error in line %i: in %s\n", line, str_number);
exit(-1);
}
// add the newly generated number token to the token array
tokens[tokens_current_index] = (struct token){number, str_number};
tokens_current_index++; i--; break; // stop generating number
}
// add the current character to the strepr of the line
if(strepr == 0) {strepr = stradd("", &text[i], 0, 1); line_size++;}
else
{
strepr = stradd(strepr, &text[i], line_size, 1);
line_size++;
}
// update position of the lexer, and update num_size
i++; num_size++;
}
}
}
if(strepr == 0) {strepr = stradd("", &text[i], 0, 1); line_size++;}
else if(tokens[tokens_current_index-1].type != number)
{
strepr = stradd(strepr, &text[i], line_size, 1);
line_size++;
}
}
// add identifier to the end of the token array
if(_identifier != 0)
{
tokens[tokens_current_index] = (struct token){identifier, _identifier};
}
*size_ptr = tokens_current_index+1;
return tokens;
}
main.c
#include <stdlib.h>
#include <stdio.h>
#include "lexer.h"
int main()
{
char chars[] = "if(x == 3) {return false;";
int size;
struct token * tokens = tokenize(chars, sizeof(chars), &size);
printf("%i\n", size);
for(int i=0;i<size-1;i++)
{
if(tokens[i].type < 2)
{
printf("token at %i:\t(%s, %c)\n", i, strllt[tokens[i].type], *(tokens[i].value));
}
else
{
printf("token at %i:\t(%s, %s)\n", i, strllt[tokens[i].type], tokens[i].value);
}
}
}
note: the code is inefficient, i know, but i will try to fix this myself later on, since the program does not have to be very efficient right now

Is there a way to split an array of strings into subarray of strings on token

Basically, is there any way to split an array of strings into arrays of strings before and after a token ("|") in C.
An example is shown below.
char *input[] = {"hello","I","am","|","a","cool","|","guy"}
//code
and the result is 3 arrays, containing
{"Hello","I","am"}
{"a","cool"}
{"guy"}
I tried strtok but that seems to split a string into pieces, rather than an array of strings into new, separate, sub-arrays of strings. I also do not know exactly how many "|" tokens will be present, and will need an unknown amount of new arrays (safe to say it'd be less than 10). They will be passed to execvp so having it as one string and just remembering where to start and stop looking will not work.
They will be passed to execvp
Assuming the strings include the program to be executed (the 1st parameter to execvp()) and the strings will be used in the order of appearance as per this pointer-array
char *input[] = {"hello","I","am","|","a","cool","|","guy"}
then a possible simple solution without any duplications might look like this:
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
char * input[] = {"hello", "I", "am", "|",
"a", "cool", "|",
"guy", "|"}; /* note the additional trailing `"|"`. */
int main(void)
{
char ** pcurrent = input;
char ** pend = pcurrent + sizeof input / sizeof *input;
while (pcurrent < pend)
{
{
char ** ptmp = pcurrent;
while (ptmp < pend && **ptmp != '|')
{
++ptmp;
}
*ptmp = NULL;
}
{
pid_t pid = fork();
if ((pid_t) -1) == pid)
{
perror("fork() failed");
exit(EXIT_FAILURE);
}
if ((pid_t) 0) == pid) /* child */
{
execvp(pcurrent[0], pcurrent);
perror("execvp() failed");
exit(EXIT_FAILURE);
}
/* parent */
pcurrent = ptmp + 1;
}
} /* while (pcurrent < pend) */
} /* int main(void) */
You need manually to split the input array. And dynamically allocate a new place to store result. E.g. as:
#include <stdio.h>
#include <stdbool.h>
int main()
{
char *input[] = {"hello","I","am","|","a","cool","|","guy"};
int inputLength = sizeof(input)/sizeof(input[0]);
printf("inputLength - %d\n", inputLength);
const char ***result2DimArray = malloc(sizeof(char**) * inputLength);
int *result2DimArrayLengths = malloc(sizeof(int) * inputLength);
memset(result2DimArrayLengths, 0, sizeof(int) * inputLength);
const char **currentSection = 0;
int nextSectionNumber = 0;
for(int inputIndex = 0; inputIndex < inputLength; inputIndex++)
{
if(input[inputIndex][0] == '|')
{
currentSection = 0;
}
else
{
if(!currentSection)
{
currentSection = malloc(sizeof(char*) * inputLength);
result2DimArray[nextSectionNumber] = currentSection;
nextSectionNumber++;
}
*currentSection = input[inputIndex];
currentSection++;
result2DimArrayLengths[nextSectionNumber-1]++;
}
}
/*Checking the result*/
printf("total sections - %d\n", nextSectionNumber);
for(int i=0; i<nextSectionNumber;i++)
{
for(int j=0;j<result2DimArrayLengths[i];j++)
{
printf(result2DimArray[i][j]);
printf(", ");
}
puts("");
}
return 0;
}
Here is a solution which doesn't involve dynamic memory allocation.
Before going in to the details ...
I think it's useful when tackling a problem like this to think about how the "strings" are stored in memory. It might look something like in the attached picture. (The memory addresses are completely unrealistic - and there would be null terminators at the end of each string - but you get the idea).
As the picture shows, the vital information we need for each 'sub-array' can be stored in a <char **, int> pair. The char ** is the address of the first "string" in the sub-array; the int is the number of strings it contains.
We can use a struct string_array_t to store this information.
typedef struct {
// Pointer to first string in sub-array
char **p;
// Number of strings in sub-array
int count;
} string_array_t;
We allocate an array of these on the stack; thus no need for malloc() or free() - as long as we allocate enough sub-arrays.
string_array_t string_arrays[MAX_SUB_ARRAYS] = {0};
char *input[] = {"hello", "I", "am", "|", "a", "cool", "|", "guy"};
// Pointer to current sub-array
string_array_t *cur = NULL;
size_t n_sub_arrays = 1;
Initialize our counters and pointers:
int i = 0, j = 0, k = 0;
cur = &string_arrays[0];
size_t n_strings_total = sizeof(input) / sizeof(input[0]);
Then loop over the array.
for (i = 0; i < n_strings_total; i++) {
if (!strcmp(input[i], "|")) {
// Store total number of strings in this sub-array
cur->count = k;
k = 0;
// Switch to next sub-array
cur = &string_arrays[++j];
if (j >= MAX_SUB_ARRAYS) {
fprintf(stderr, "Not enough sub-arrays allocated ...\n");
break;
}
n_sub_arrays++;
continue;
}
if (k == 0) {
cur->p = &input[i];
}
k++;
}
cur->count = k;
Print the results.
printf("Found %zu sub arrays ...\n", n_sub_arrays);
for (i = 0; i < n_sub_arrays; i++) {
string_array_t *cur = &string_arrays[i];
for (j = 0; j < cur->count; j++) {
printf("%s ", *(cur->p++));
}
printf("\n");
}

Branching issue in C program

My C program needs to skip rest of the code if particular event occurs. I have used continue for that but there is some issue. I am not sure issue is due to that only or something different but it's something logic mistake.
I am sending word to BoyerMoore_positive(ch[i], strlen(ch[i]) ); function to verify whether it exist in particular word list or not. If exist then increment count values.
for skp = BoyerMoore_skip(ch[i], strlen(ch[i]) ); if word is present in this function then I want to skip rest of the code and continue with next word. So I incremented the i.
It checks "he is the you she am" this list. But when word is present in this list and come back after performing actions in the function it does not proceed to next word though I have incremented i. It keep looping in BoyerMoore_skip(ch[i], strlen(ch[i]) ); for some times and then stops without processing next word.
I know this is very specific issue to my program but any kinda help is highly appreciable. I may making some silly mistake.
code:
while ((NULL != word) && (50 > i))
{
ch[i] = strdup(word);
//printf("%s n", ch[i]);
skp = BoyerMoore_skip(ch[i], strlen(ch[i]) );
// printf("skip is %s \n",skp);
if(skp != NULL)
{
i++;
printf("in\n");
continue;
}
// I tried with keeping i++ and continue in seperate if(skp != NULL) but same result.
printf("\n hi2 \n");
str = BoyerMoore_positive(ch[i], strlen(ch[i]) );
str2= BoyerMoore_negative(ch[i], strlen(ch[i]) );
printf("Str is %s \n",str2);
if (str == NULL)
t++;
else {
printf("%s \n", ch[i]);
// puts("true");
pcount += 1;
printf("Positive count is: %d \n",pcount);
}
if(str2== NULL)
q++;
else {
printf("%s \n", ch[i]);
// puts("true");
ncount += 1;
printf("Nagative count is: %d \n",ncount);
}
i++;
word = strtok(NULL, " ");
if(str==NULL && str==NULL and skp !=NULL)
{
pcount=0;
ncount=0;
}
}
To test your thesis remove the continue statement and add the } else { branch with a closing end bracket.
If your code works then you know that the continue is skipping over some required logic. You can duplicate that missing logic back to the true portion of the if and reinstate the continue statement.
Yourword never changes once you choose to skip. You reach continue; and go right back to the top of the loop, replicating the same word and eventually encountering the same logic.
The code that actually advances word to your next token (the strtok() logic) is at the bottom of your loop; nowhere near the continue that is sending you back to the top. I think you need that logic replicated before the continue; after you've decided you need to skip and advance to the next token.
In fact, as I look at it now, there is no reason to have it down there either. word is only used to populate c[i] with a duplicate and is never used anywhere else (that i can see, anyway), so you could in-theory just advance it immediately after making your dupe, thereby having only one snippet of that code in your loop.
Not actually an answer, but a redesign:
#include <cstring>
#include <array>
#include <iostream>
class BoyerMooreSearch
{
public:
typedef std::size_t size_type;
private:
static const size_type SkipTableSize = (std::size_t((unsigned char)(-1))) + 1;
static const size_type Threshold = 5;
typedef std::array<std::size_t, SkipTableSize> SkipTable;
public:
BoyerMooreSearch(const char* substr, size_type substrlen)
: m_substr(substr), m_substrlen(substrlen)
{
m_skip.fill(m_substrlen);
size_type n = m_substrlen - 1;
const char* s = m_substr + 1;
while(n) m_skip[*s++] = n--;
}
BoyerMooreSearch(const char* substr)
: BoyerMooreSearch(substr, std::strlen(substr))
{}
BoyerMooreSearch(const std::string& substr)
: BoyerMooreSearch(substr.c_str(), substr.size())
{}
// No copy
BoyerMooreSearch(const BoyerMooreSearch&) = delete;
BoyerMooreSearch& operator = (const BoyerMooreSearch&) = delete;
private:
inline bool test_size(size_type strlen) const {
return (m_substrlen && m_substrlen < strlen);
}
inline bool brute(size_type strlen) const {
return m_substrlen < Threshold || strlen - m_substrlen <= m_substrlen;
}
public:
const char* find(const char* str, const size_type strlen) const
{
if( ! test_size(strlen)) return 0;
else {
const char* end = str + strlen - m_substrlen;
if( ! brute(strlen)) {
// Boyer-Moore
// ===========
while(str <= end) {
if(std::strncmp(str, m_substr, m_substrlen) == 0) return str;
str += m_skip[*(str + m_substrlen)];
}
}
else {
// Brute search
// ============
while(str <= end) {
if(std::strcmp(str, m_substr) == 0) return str;
++str;
}
}
return 0;
}
}
const char* find(const char* str) const {
return find(str, std::strlen(str));
}
const char* find(const std::string str) const {
return find(str.c_str(), str.size());
}
size_type count(const char* str, const size_type strlen) const
{
size_type result = 0;
if(test_size(strlen)) {
const char* end = str + strlen - m_substrlen;
if( ! brute(strlen)) {
// Boyer-Moore
// ===========
while(str <= end) {
if(std::strncmp(str, m_substr, m_substrlen) == 0) {
++result;
str += m_substrlen;
}
else str += m_skip[*(str + m_substrlen)];
}
}
else {
// Brute search
// ============
while(str <= end) {
if(std::strncmp(str, m_substr, m_substrlen) == 0) {
++result;
str += m_substrlen;
}
else ++str;
}
}
}
return result;
}
size_type count(const char* str) const {
return count(str, std::strlen(str));
}
size_type count(const std::string str) const {
return count(str.c_str(), str.size());
}
private:
const char* m_substr;
const size_type m_substrlen;;
SkipTable m_skip;
};
int main()
{
BoyerMooreSearch bms("Hello");
const char* str = "Hello World and Hello People";
std::cout << "Count: " << bms.count(str) << std::endl;
return 0;
}

C / parse string, what is the easiest way

I have a string like that:
4;4=3;1=0,2=2,3=1,4=1,5=1;0003013340f59bce000002aaf01620e620198b2240002710;
It is separated into sections by ";" and each section can have one or more key/value pairs like 5=1 and so on, as you can see.
I want to parse it in pure C and I started working with strtok as I am showing in code here:
const wuint8 section_delimiter[] = ";";
const wuint8 field_delimiter[] = ",";
const wuint8 value_delimiter[] = "=";
printf("%s\n",data->msg);
token = strtok(data->msg,section_delimiter);
while(token != NULL) {
indicator = atoi(token);
printf("indicator: %d\n", indicator);
switch(indicator) {
case TYPE_1: {
printf("type: %d\n",TYPE_1);
wuint16 i, headerType, headerSubType;
for(i = 1; i < TP_MAX; i++) {
if(i == atoi(token)) {
token = strtok(NULL,value_delimiter);
headerType = i;
headerSubType = atoi(token);
break;
}
}
break;
}
case TYPE_2: {
printf("type: %d\n",TYPE_3);
break;
}
case TYPE_3: {
printf("type: %d\n",TYPE_3);
break;
}
case TYPE_4: {
printf("type: %d\n",TYPE_4);
break;
}
I am not sure how to do that correctly.
It also gets complicated, because not every string has the same structure, sometimes only one or two sections can be present. E.g.: 3;4=3;1=0,2=2,3=1,4=1,5=1;
Is there a how to do that showing the best and most convenient way?
strtok can't, AFAICR, be used in nested loops like this due to the global state it manages itself.
I suggest parsing each semicolon-delimited part out first, then handling them sequentially - or just implement something akin to strtok for your semicolon case yourself, then happily use strtok in the inner loop.
Using strcspn(). Fixed buffers, results go into global variables. data[] buffer is altered (and thus needs to be writable). YMMV
/*
It is separated into sections by ";" and each section can have one or more
key/value pairs like 5=1 and so on, as you can see. I want to parse it in
pure C and I started working with strtok as I am showing in code here:
*/
char data[] = "4;4=3;1=0,2=2,3=1,4=1,5=1;0003013340f59bce000002aaf01620e620198b2240002710;" ;
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct header {
int l;
int r;
} headers[123];
unsigned nheader;
int indicator;
char rest [123];
int tokenise(char * buff);
unsigned tokenise2(struct header *dst, char * buff);
/****************/
int tokenise(char * buff)
{
char *ptrs[14];
unsigned nptr;
unsigned len, pos;
ptrs[nptr=0] = NULL;
for (len = pos=0; buff[pos]; pos += len ) {
len = strcspn(buff+pos, ";");
ptrs[nptr++] = buff+pos;
ptrs[nptr] = NULL;
if (!buff[pos+len] ) break;
buff[pos+len] = 0;
len +=1;
}
if ( nptr> 0 && ptrs[0]) indicator = atoi(ptrs[0]); else indicator = -1;
if ( nptr> 1 && ptrs[1]) nheader = tokenise2 (headers, ptrs[1] ); else nheader = 0;
if ( nptr> 2 && ptrs[2]) nheader += tokenise2 (headers+nheader, ptrs[2] ); else nheader += 0;
if ( nptr> 3 && ptrs[3]) strcpy (rest, ptrs[3]); else rest[0] = 0;
return 0; /* or something useful ... */
}
unsigned tokenise2(struct header *target, char * buff)
{
char *ptrs[123];
unsigned nptr, iptr;
unsigned len, pos;
ptrs[nptr=0] = NULL;
for (len = pos=0; buff[pos]; pos += len ) {
len = strcspn(buff+pos, "," );
ptrs[nptr++] = buff+pos;
ptrs[nptr] = NULL;
if (!buff[pos+len] ) break;
buff[pos+len] = 0;
len +=1;
}
for ( iptr=0; iptr < nptr; iptr++) {
if (! ptrs[iptr] ) break;
len = strcspn(ptrs[iptr], "=" );
if (!len) break;
target[iptr].l = atoi (ptrs[iptr] );
target[iptr].r = atoi (ptrs[iptr]+len+1 );
}
return iptr; /* something useful ... */
}
int main(void)
{
int rc;
unsigned idx;
fprintf(stderr, "Org=[%s]\n", data );
rc = tokenise(data);
printf("Indicator=%d\n", indicator );
for (idx=0; idx < nheader; idx++) {
printf("%u: %d=%d\n", idx, headers[idx].l , headers[idx].r );
}
printf("Rest=%s\n", rest );
return 0;
}

using functions in c (return value)

Learning C and having many doubts.
I have a function (lets say function 1) that calls another function (lets say function 2).
Function 2 calculates an array of string.
How can I use this array in function 1?
Some code example:
int find_errors(char* word)
{
char error[100];
/*Given the word, It will find the duplicate chars and store it in the
error array. */
return 0;
}
int find_word(char* word)
{
find_errors (word);
printf("%s\n", error);
return 0;
}
There are at least three possible approaches:
Use a global variable
pass a parameter between them
return a pointer from the function
There are multiple ways to do this.
1) Create a dynamic array and return a pointer to the array. This will require you to manually free the memory for the array at a later time.
#define NUM_ELEMS 50
// In find_error():
char* error = malloc(NUM_ELEMS * sizeof(char));
return error;
// In find_word():
char *error = find_errors();
// do stuff
free(error);
2) Pass a pointer to find_errors that it can use as the error array. This will not require you to manually free the memory.
// In find_word():
char error[NUM_ELEMS];
find_error(error);
3) Use a global array. May make it more difficult for other people to understand your code. Has other potential problems as well.
// In global scope:
char error[NUM_ELEMS];
Your question relates to "call-by-reference" and "call-by-value".
char* getNewValsToSet(void)
{
char* new_vals = (char*) malloc(sizeof(char[5]));
new_vals[4] = '\0';
return new_vals;
}
void setValuesEven(char* vals_to_set)
{
vals_to_set[0] = 'A';
vals_to_set[2] = 'C';
}
void setValuesOdd(char* vals_to_set)
{
vals_to_set[1] = 'B';
vals_to_set[3] = 'D';
}
int main(void)
{
char* some_vals_to_set = getNewValsToSet();
setValsEven(some_vals_to_set);
setValsOdd(some_vals_to_set);
// ... now has vals "ABCD"
free(some_vals_to_set); //cleanup
return 0;
}
If you have "doubts" about learning C, IMHO it's one of the best things you can do (no matter the language in which you work) because it will explain exactly how things work "under-the-hood" (which all high-level languages try to hide to some degree).
You need to declare the error array globally and use it just like you did.
EDIT: using global variables isn't the best practice in most of the cases, like this one.
Here is an example of what you are looking for with an awesome console output. It dynamically allocates the array to hold any number errors (duplicate characters in your case) that may occur.
//Only free errors if result is > 0
int find_errors(char* word, char** errors)
{
int num_errors = 0;
int word_length = strlen(word);
int ARRAY_SIZE = MIN(8, word_length);
char existing[word_length];
int existing_index = 0;
*errors = NULL;
for(int i = 0; i < word_length; i++)
{
char character = word[i];
//Search array
for (int n = 0; n < word_length; ++n ) {
if(n >= existing_index)
{
existing[n] = character;
existing_index++;
break;
}
if (existing[n] == character) {
num_errors++;
if(!*errors)
*errors = (char*)malloc(ARRAY_SIZE * sizeof(char));
//Check if we need to resize array
if(num_errors >= ARRAY_SIZE)
{
ARRAY_SIZE *= 2;
ARRAY_SIZE = MIN(ARRAY_SIZE, word_length);
char *tmp = (char*)malloc(ARRAY_SIZE * sizeof(char));
memcpy(tmp, *errors, (unsigned long)ARRAY_SIZE);
free(*errors);
*errors = tmp;
}
//Set the error character
(*errors)[num_errors - 1] = character;
break;
}
}
}
return num_errors;
}
int find_word(char* word)
{
char* errors;
int errCount = find_errors (word, &errors);
if(errCount > 0)
{
printf("Invalid Characters: ");
for(int i =0; i < errCount; i++)
{
printf("%c ", errors[i]);
}
printf("\n");
free(errors);
}
return 0;
}
int main(int argc, char *argv[])
{
find_word("YWPEIT");
find_word("Hello World");
find_word("XxxxXXxXXoooooooOOOOOOOOOOOOOOOooooooooOOOOOOOOOOOOooooooOOO");
}

Resources