Related
The particular problem I have is that in my main function, I have added a print statement before and after I call the "bad" function. It always shows the before statement, but never the after statement. I also added a print statement to the end of the "bad" function, and I can see that it runs properly to the very last line of the "bad" function, so it should return normally. After the functions last print and before the main function print, I get the segfault. Any ideas? Here is the code:
int main(int argc, char* argv[])
{
char myItem[100];
int i = 0;
while (i < 100) {
scanf("%[^\n]", myItem);
i++;
if (myItem == EOF) {
break;
}
int c;
while ((c = getchar()) != '\n' && c != EOF);
//printf("string read in from user typing: %s\n", myItem);
printf("i = %d\n", i);
emailFilter(myItem);
printf("done with email filter in main\n");
//printf("item from this pass is:%s\n\n", myItem);
}
return 0;
}
and the "bad" function:
void emailFilter(char* mySubject)
{
printf(" Just entered the emailFilter() .\n");
char * event_holder[5]; //holds five separate char ptrs
for (int i = 0; i < 5; i++)
{
event_holder[i] = ((char*)malloc(100 * sizeof(char*)));
}
char command_type = parseSubject(mySubject, event_holder); //parses subject line and fills event_holder. returns command type, from parsing
//call proper parsing result
if (command_type == 'C')
{
create(event_holder);
}
else if (command_type == 'X')
{
change(event_holder);
}
else if (command_type == 'D')
{
delete(event_holder);
}
printf("Leaving emailfilter()...\n");
}
and running this code provides me:
$:
i = 1
Just entered the emailFilter() .
C, Meeting ,01/12/2019,15:30,NEB202
Leaving emailfilter()...
done with email filter in main
i = 2
Just entered the emailFilter() .
Leaving emailfilter()...
Segmentation fault
This shows that I always make it through the function, but still don't return properly.
Here is my entire code to reproduce the error.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct node {
char * event_data[5];
struct node * next;
};
struct node *head = NULL;
struct node *current = NULL;
char* earliest = NULL;
char* substring (char* orig, char* dest, int offset, int len)
{
int input_len = strlen (orig);
if (offset + len > input_len)
{
return NULL;
}
strncpy (dest, orig + offset, len);
//add null char \0 to end
char * term = "\0";
strncpy (dest + len, term, 1);
return dest;
}
char * firstItem(char* shortenedSubject)
{
int i = 0;
int currentLength = 0;
int currentCharIndex = 0;
int eventIndex = 0;
char * toReturn = (char*)malloc(100);
while ((shortenedSubject[currentLength] != '\0') && (shortenedSubject[currentLength] != ',') )//50 is my safety num to make sure it exits eventually
{
currentLength++;
}
if (shortenedSubject[currentLength] == ',') {
substring(shortenedSubject, toReturn, 0, currentLength);
}
return toReturn;
}
char parseSubject(char* subject,char * eventDataToReturn[5]) //returns "what type of command called, or none"
{
char toReturn;
char * shortenedSubject = (char*)malloc(100);
substring(subject,shortenedSubject,9,strlen(subject)-9);//put substring into tempString
int currentCharIndex = 0;// well feed into index of substring()
int eventIndex = 0; //lets us know which event to fill in
int currentLength = 0;//lets us know length of current event
int i = 0; //which char in temp string were alooking at
char * action = firstItem(shortenedSubject);
if (strlen(action) == 1)
{
if ( action[0] == 'C')
{
toReturn = 'C';
}
else if (action[0] == 'X')
{
toReturn = 'X';
}
else if (action[0] == 'D')
{
toReturn = 'D';
}
else
{
toReturn = 'N'; //not valid
//invalid email command, do nothing
}
}
else
{
toReturn = 'N'; //not valid
//invalid email command, do nothing
}
char* debug2;
while ((shortenedSubject[i] != '\0') && (i <= 50) )//50 is my safety num to make sure it exits eventually
{
char debugvar = shortenedSubject[i];
currentLength++;
if (shortenedSubject[i] == ',')
{
//eventDataToReturn[i] = substring2(shortenedSubject,currentCharIndex,currentLength);
substring(shortenedSubject,eventDataToReturn[eventIndex],currentCharIndex,currentLength-1);
debug2 = eventDataToReturn[eventIndex];
currentCharIndex= i +1;
eventIndex++;
currentLength = 0;
//i++;
}
i++;
}
substring(shortenedSubject,eventDataToReturn[4],currentCharIndex,currentLength);
return toReturn;
}
void printEventData(char* my_event_data[])
{
//printf("\nPrinting event data...\n");
for (int i = 1; i < 4; i++)
{
printf("%s,",my_event_data[i]);
}
//print last entry, no comma
printf("%s",my_event_data[4]);
}
void printEventsInorder()
{
struct node * ptr = head;
while (ptr != NULL)//if not empty, check each one and add when ready
{
printEventData(ptr->event_data);
printf("\n");
ptr = ptr->next;
}
}
void insertFront(char* my_event_data[5])
{
struct node *link = (struct node*) malloc(sizeof(struct node));
link->next = NULL;
for (int i = 0; i < 5; i++)
{
link->event_data[i] = my_event_data[i];
}
head = link;
}
int isEarlier(char* event_data_L[5], char* event_data_R[5])
{// will be given like 12:30 12:45,turn timeL into timeL1 and timeL2, and time R1 and timeR2
//compare dates for earlier
int month_L,day_L,year_L;
int month_R,day_R,year_R;
char* char_holder;
substring(event_data_L[2],char_holder,0,2);//extract first half of time
month_L = atoi(char_holder); //convert first half of time to int
substring(event_data_L[2],char_holder,3,2);//extract first half of time
day_L = atoi(char_holder); //convert first half of time to int
substring(event_data_L[2],char_holder,6,4);//extract first half of time
year_L = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,0,2);//extract first half of time
month_R = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,3,2);//extract first half of time
day_R = atoi(char_holder); //convert first half of time to int
substring(event_data_R[2],char_holder,6,4);//extract first half of time
year_R = atoi(char_holder); //convert first half of time to int
int time_L1,time_L2,time_R1,time_R2;
substring(event_data_L[3],char_holder,0,2);//extract first half of time
time_L1 = atoi(char_holder); //convert first half of time to int
substring(event_data_L[3],char_holder,3,2);//extract second half of time
time_L2 = atoi(char_holder); //convert second half of time to int
substring(event_data_R[3],char_holder,0,2);
time_R1 = atoi(char_holder);
substring(event_data_R[3],char_holder,3,2);
time_R2 = atoi(char_holder);
//convert to 2 ints, first compare left ints, then right ints
if(year_L < year_R)
{
return 1;
}
else if ( year_L == year_R)
{
if (month_L < month_L)
{
return 1;
}
else if (month_L == month_L)
{
if (day_L < day_R)
{
return 1;
}
else if (day_L == day_R)
{
if (time_L1 < time_R1)
{
return 1;
}
else if (time_L1 == time_R1)
{
if (time_L2 < time_R2)
{
return 1;
}
else if (time_L2 == time_R2)
{
return 2;
}
else//else, time is greater
{
return 3;
}
}
else //left time is greater, return 3
{
return 3;
}
}
else
{
return 3;
}
}
else
{
return 3;
}
}
else //its left is greater than right so return 3 to show that
{
return 3;
}
}
void create(char* my_event_data[5]) {
//print required sentence
char * debugvar2 = my_event_data[3];
if (head == NULL)//if empty calendar, just add it
{
insertFront(my_event_data);
//printf("EARLIEST bc empty list, \n");
printf("C, ");
printEventData(my_event_data);
printf("\n");
return;
}
else
{
struct node *link = (struct node*) malloc(sizeof(struct node));
link->next = NULL;
for (int i = 1; i < 5; i++)
{
link->event_data[i] = my_event_data[i];
}
struct node *ptr = head;
struct node *prev = NULL;
if (ptr->next == NULL) //if this is the last node to check against
{
if (isEarlier(my_event_data, ptr->event_data) == 1)
{ //check against it
printf("C, ");
printEventData(my_event_data);
printf("\n");
if (prev != NULL) //if this is first item in linked list...
{
link->next = head; //assign something before head
head = link; //move head to that thing
}
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr;
return;
}
else //else is equal to or later, so tack it on after:
{
ptr->next = link;
}
}
else
{
while (ptr->next != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if (isEarlier(my_event_data,ptr->event_data) == 1)
{
if (head == ptr) //if earlier than head... insert and print
{
//printf("earlier than head!");
printf("C, ");
printEventData(my_event_data);
printf("\n");
link->next = ptr;
head = link;
}
else //if earlier than non head, insert, but dont print
{
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr;
}
return;
}
else
{
prev = ptr;
ptr = ptr->next;
}
}
if (isEarlier(my_event_data,ptr->event_data) == 1) //while ptr-> is null now
{
printf("C, ");
printEventData(my_event_data);
printf("\n");
if (prev != NULL)
{
prev->next = link;
}
link->next = ptr->next;
return;
}
else
{
prev = link;
link = ptr;
}
}
return;
}
//if it gets here, it is the latest meeting, tack it on the end
//prev->ptr = link;
}
void change(char* my_event_data[5]) {
//create a link
struct node *ptr = head;
while (ptr->next != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if (*ptr->event_data[1] == *my_event_data[1])
{
for (int i = 1; i < 5; i++)
{
ptr->event_data[i] = my_event_data[i];
}
printf("X, ");
printEventData(my_event_data);
printf("\n");
return;
}
ptr = ptr->next;
}
if (*ptr->event_data[1] == *my_event_data[1]) //check final node
{
for (int i = 0; i < 5; i++)
{
ptr->event_data[i] = my_event_data[i];
}
printf("X, ");
printEventData(my_event_data);
printf("\n");
return;
}
printf("event to change not found");
return;
//if it gets here, nothing matched the title to change
}
void delete(char* my_event_data[5])
{
struct node *ptr = head;
struct node *prev = NULL;
while (ptr != NULL)//if not empty, check each one and add when ready
{
//if next node is later than current, we are done with insertion
if ( strcmp( ptr->event_data[1], my_event_data[1] ) == 0) // if title matches, delete it
{
if (prev != NULL)
{
prev->next = ptr->next;
}
if (ptr == head)
{
head = ptr->next;
}
free(ptr);
printf("D, ");
printEventData(my_event_data);
printf("\n");
return;
}
prev = ptr;
ptr = ptr->next;
}
}
void emailFilter(char* mySubject)
{
if (strlen(mySubject) < 9)
{
return;
}
char * event_holder[5]; //holds five separate char ptrs
for (int i = 0; i < 5; i++)
{
event_holder[i] = ((char*)malloc(100 * sizeof(char*)));
}
char command_type = parseSubject(mySubject, event_holder); //parses subject line and fills event_holder. returns command type, from parsing
//call proper parsing result
if (command_type == 'C')
{
create(event_holder);
}
else if (command_type == 'X')
{
change(event_holder);
}
else if (command_type == 'D')
{
delete(event_holder);
}
}
int main(int argc, char* argv[])
{
char myItem[100];
int i = 0;
while (i < 100)
{
scanf("%[^\n]", myItem);
i++;
if ( myItem == EOF )
{
break;
}
int c;
while ((c = getchar()) != '\n' && c != EOF);
printf("i = %d\n", i);
emailFilter(myItem);
}
return 0;
}
Also please note that this error happens when I use a txt file as STDIN via the ">" symbol on the command line. Here is the file I use:
Subject: C,Meeting ,01/12/2019,15:30,NEB202
Subject: C,Meeting ,01/12/2019,16:30,NEB202
Subject: C,Meeting ,01/12/2019,11:30,NEB202
Having tried to find something to contribute, there's this:
The code is dealing with the date/time. Below is the declaration and use of a "destination buffer" into which is copied fragments of the string:
int isEarlier(char* event_data_L[5], char* event_data_R[5])
{// will be given like 12:30 12:45 // ....
//compare dates for earlier
int month_L,day_L,year_L;
int month_R,day_R,year_R;
char* char_holder;
substring(event_data_L[2],char_holder,0,2);//extract first half of time
month_L = atoi(char_holder); //convert first half of time to int
//...
Notice that char_holder isn't pointing anywhere in particular. UB...
While it represents a beginner's approach, it is actually painful to see code like this. Below is a more concise version of isEarlier() (untested.)
int isEarlier( char *ed_L[5], char *ed_R[5] ) {
char l[16], r[16];
memcpy( l + 0, ed_L[2][6],4 ); // YYYY
memcpy( l + 4, ed_L[2][0],2 ); // MM
memcpy( l + 6, ed_L[2][3],2 ); // DD
memcpy( l + 8, ed_L[3][0],2 ); // hh
memcpy( l + 10, ed_L[3][3],2 ); // mm
memcpy( r + 0, ed_R[2][6],4 ); // YYYY
memcpy( r + 4, ed_R[2][0],2 ); // MM
memcpy( r + 6, ed_R[2][3],2 ); // DD
memcpy( r + 8, ed_R[3][0],2 ); // hh
memcpy( r + 10, ed_R[3][3],2 ); // mm
int res = memcmp( l, r, 12 );
return res < 0 ? 1 : res == 0 ? 2 : 3;
}
Note: The sample data provided indicates 2 digits for both month and day, and is ambiguous as to "mm/dd" or "dd/mm" format. The offset values used here come from the OP code.
One way to reduce the possibility of bugs in code is to both write less but more capable code, if you can, and to perform "unit testing" on code that you write. Focus on one function at a time and do not use global variables. Another is to become as familiar as you can with the proven capabilities of functions in the standard library.
EDIT: Looking at this answer, it occurs to me that this function should, itself, be refactored:
void reformatDateTime( char *d, char *s[5] ) {
memcpy( d + 0, s[2][6],4 ); // YYYY
memcpy( d + 4, s[2][0],2 ); // MM
memcpy( d + 6, s[2][3],2 ); // DD
memcpy( d + 8, s[3][0],2 ); // hh
memcpy( d + 10, s[3][3],2 ); // mm
}
int isEarlier( char *ed_L[5], char *ed_R[5] ) {
char l[16], r[16];
reformatDateTime( l, ed_L );
reformatDateTime( r, ed_R );
int res = memcmp( l, r, 12 );
res = res < 0 ? 1 : res == 0 ? 2 : 3;
printf( "isEarlier() '%.12s' vs '%.12s' result %d\n", l, r, res ); // debug
return res;
}
and I can see that it runs properly
You contradict yourself, you say that it sometimes or always seg faults. It's rather unlikely that some C code would crash at the point of leaving a function, since there's no "RAII" and in this case no multi-threading either. A stack corruption could have destroyed the function return address however.
The best way of debugging is not so much about focusing on the symptom, as it trying to pinpoint where something goes wrong. You've already done as much, so that's most of the debug effort already done.
One way of debugging from there is step #1: stare at the function for one minute. After around 5 seconds: event_holder[i] = ((char*)malloc(100 * sizeof(char*))); Well that's an obvious bug. After some 30 seconds more: wait, who cleans up this memory? The delete function perhaps but why is it then executed conditionally? (Turns out delete doesn't free() memory though.) The function leaks memory, another bug. Then after one full minute we realize that parseSubject does a whole lot of things and we'll need to dig through that one in detail if we want to weed out every possible chance of bugs. And it will take a lot more time to get to the bottom of that. But we already found 2 blatant bugs just by glancing at the code.
Fix the bugs, try again, is the problem gone?
At another glance there's a bug in main(), myItem == EOF is senseless and shouldn't compile. This suggests that you are compiling with way too lax warning levels or ignoring warnings, either is a very bad thing. What compiler options are recommended for beginners learning C?
We might note that extensive use of "magic numbers" make the code hard to read. It is also usually a sure sign of brittle code. Where do these 5 and 9 and so on come from? Use named constants. We will also fairly quickly note the lack of const correctness in something that's only supposed to parse, not change data. And so on.
I didn't read the code in detail, but the overall lack of following best practices and 3 bugs found just by brief glances suggests there's a whole lot more bugs in there.
Sorry for the possibly long and dumb question, but I'm really stumped. I'm doing a task for the university. Its meaning is very simple. You need to implement a function that will change the "bad" phrases to "good". Input to the function is a text and a double array with good and bad words (in the left column the words that need to be replaced, and on the right column the words to be inserted instead of the bad words). The dictionary itself with bad and good words can have any size, but at the end there will always be a pair of NULL - NULL.
It is important to note that the program should not do anything to change the already replaced phrases. The line "termination specialist" contains the word "specialist", so the program must check to see if there are any words in the text that have already been replaced, so that the line "termination specialist" does not change into the line "termination person with certified level of knowledge". The check happens here.
The program must also make sure that the entered dictionary of good and bad words is correct, which means that a bad word cannot be the beginning of another bad word. This check happens in the function replaceInvalidity
Text and dictionary with words do not have to be meaningful. In the context of this task, it is simply a set of symbols, i.e. letters, numbers, symbols
I wrote a program that passes most of the tests, but for some reason at one of the tests it loops and exceeds the time limit (2 seconds). As a result, I get 0 points for the whole task.
I tried checking the memory with Valgrind, but it did not show any errors.
Full code:
#ifndef __PROGTEST__
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#endif /* __PROGTEST__ */
int replaceInvalidity(const char * (*replace)[2])
{
int size = 0;
for (int i = 0; replace[i][0] != NULL; i++)
size++;
for (int i = 0; i < size - 1; i++)
{
for (int j = i + 1; j < size; j++)
{
if (strlen(replace[i][0]) >= strlen(replace[j][0]))
{
if (strstr(replace[i][0], replace[j][0]) == replace[i][0])
return 1;
}
else
{
if (strstr(replace[j][0], replace[i][0]) == replace[j][0])
return 1;
}
}
}
return 0;
}
char *newSpeak(const char *text, const char * (*replace)[2])
{
if (replaceInvalidity(replace))
{
return NULL;
}
int i = 0, k = 0, flag= 0, Nlen = 0, Olen = 0, length = 0;
char *result = (char *)malloc(sizeof(char));
length = strlen(text);
for (i = 0, k = 0; i < length; i++, k++)
{
flag = 0;
for (int j = 0; replace[j][1] != NULL; j++)
{
if (strstr(&text[i], replace[j][1]) == &text[i])
{
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Nlen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
for (int j = 0; replace[j][0] != NULL; j++)
{
if (strstr(&text[i], replace[j][0]) == &text[i])
{
Olen = strlen(replace[j][0]);
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Olen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
result = (char *)realloc(result, (k + 2) * sizeof(char));
result[k] = text[i];
}
result[k] = '\0';
return result;
}
#ifndef __PROGTEST__
int main(int argc, char * argv[])
{
char *res;
const char * d1[][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
const char * d2[][2] = {
{ "fail", "suboptimal result" },
{ "failure", "non-traditional success" },
{ NULL, NULL }
};
res = newSpeak("dumb termination specialist.", d1);
assert(!strcmp(res, "cerebrally challenged termination specialist."));
free(res);
res = newSpeak("The student answered an incorrect answer.", d1);
assert(!strcmp(res, "The client answered an alternative answer."));
free(res);
res = newSpeak("He was dumb, his failure was expected.", d1);
assert(!strcmp(res, "He was cerebrally challenged, his non-traditional success was expected."));
free(res);
res = newSpeak("The evil teacher became a murderer.", d1);
assert(!strcmp(res, "The nicenest deprived voluntary knowledge conveyor became a termination specialist."));
free(res);
res = newSpeak("Devil's advocate.", d1);
assert(!strcmp(res, "Dnicenest deprived's advocate."));
free(res);
res = newSpeak("Hello.", d2);
assert(!res);
return EXIT_SUCCESS;
}
#endif /* __PROGTEST__ */
I was not able to reproduce the issue after adding the missing includes and combining your 3 snippets. As you phrase the question as a performance issue, I reworked your code to reduce run-time from 0.476 s to 0.275 s per 1e6 calls.
Instead of calling strstr() per character of your input text for a given bad word, only call it once + number of times a given bad word is found in text. Proceed processing text after the replacement. This should make make a significant difference for large input.
Instead of using loops move data in your string use memmove() which is highly optimized.
Instead of calling realloc() for each replacement when the size of the replacement changes.
Removed replaceInvalidity() as I think you were protecting yourself of the replacement string being substring of the input to avoid an infinite loop. The implementation below avoid that by only looking at replacements after the fact.
result = realloc(result, ...) will leak memory on failure so handle the error by free'ing the original string and return NULL on error. strdup() error is handled similarly.
Problem description does not match your test case, so I revised the test case. If this is not correct please clarify expected behavior (only replace at most 1 bad word?).
#define _XOPEN_SOURCE 500
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *newSpeak(const char *text, const char *(*replace)[2]) {
char *result = strdup(text);
if(!result)
return NULL;
size_t result_len = strlen(result);
for(size_t i = 0; replace[i][0] && replace[i][0][0] && replace[i][1] && replace[i][1][0]; i++) {
size_t bad_len = strlen(replace[i][0]);
size_t good_len = strlen(replace[i][1]);
char *found = result;
for(;;) {
found = strstr(found, replace[i][0]);
if(!found)
break;
size_t offset = found - result;
if(bad_len < good_len) {
char *tmp = realloc(result, result_len + good_len - bad_len + 1);
if(!tmp) {
free(result);
return NULL;
}
result = tmp;
found = result + offset;
memmove(found + good_len, found + bad_len, result_len - offset - bad_len + 1);
} else if(bad_len > good_len) {
memmove(found + good_len, found + bad_len, result_len - offset - bad_len + 1);
char *tmp = realloc(result, result_len + good_len - bad_len + 1);
if(!tmp) {
free(result);
return NULL;
}
result = tmp;
found = result + offset;
}
result_len += good_len - bad_len;
memcpy(found, replace[i][1], good_len);
found += good_len;
}
}
return result;
}
int main(void) {
const char *d1[][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
char *res = newSpeak("dumb termination specialist.", d1);
assert(!strcmp(res, "cerebrally challenged termination person with certified level of knowledge."));
free(res);
}
Few suggestions:
There is a lot of unnecessary string traversal in replaceInvalidity() function. Functions like strlen(), strstr() etc., use them only when they are really needed, otherwise avoid them. Also, if the dictionary is supposed to be end with NULL then this is not needed:
for (int i = 0; replace[i][0] != NULL; i++) size++;
Use the check for NULL termination of dictionary directly in the for loop condition, instead of, first calculate the size and then use it.
The program must also make sure that the entered dictionary of good and bad words is correct, which means that a bad word cannot be the beginning of another bad word.
For this, you are using strstr() and it will parse the whole string to find out the substring even if the their first character does not match.
If a bad word cannot be the beginning of another bad word then simply start compare their characters from start and if any of the string reaches to end will make the dictionary invalid otherwise not.
In newSpeak() function, your program iterating the input string text character by character and for every character, first it is finding the whole dictionary good phrases as substring and if it is not found then same activity for whole dictionary bad phrases. If the input phrase is big and if there are too many number of elements in dictionary, this is going to take a lot of time in processing. You should think of something better here, may be - extract a word from input text and search for that word in dictionary and based on whole or partial or no match found in dictionary, process further.
You can do something like this ( below code is just for demonstration purpose):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#define A_B_EQUAL 0
#define A_SUBSTR 1
#define B_SUBSTR 2
#define A_B_NOTEQUAL 3
int strMatch (const char * a, const char * b) {
if (!a || !b) {
return A_B_NOTEQUAL;
}
while(*a && (*a == *b)) {
a++; b++;
}
if ((*a == '\0') && (*b == '\0')) {
return A_B_EQUAL;
} else if (*a == '\0') {
return A_SUBSTR;
} else if (*b == '\0') {
return B_SUBSTR;
}
return A_B_NOTEQUAL;
}
int replaceInvalidity (const char * (*replace)[2]) {
for (int i = 0; replace[i][0] && replace[i + 1][0]; i++) {
for (int j = i + 1; replace[j][0]; j++) {
if (strMatch (replace[j][0], replace[i][0]) != A_B_NOTEQUAL) {
fprintf (stdout, "Invalid entries found in dictionary - [%s, %s]\n", replace[j][0], replace[i][0]);
return 1;
}
}
}
return 0;
}
int findInDict (const char * (*replace)[2], const char * ps, int len) {
if (!replace || !ps || !len) {
fprintf (stderr, "(%s):Invalid argument...\n", __func__);
return -1;
}
int index = -1;
for (int i = 0; replace[i][0] && (index == -1); i++) {
if (strncmp (replace[i][0], ps, len) == 0) {
index = i;
}
if ((index != -1) && (replace[index][0][len] != '\0')) {
//dictionary entry partially matched, match rest
int res = strMatch (&ps[len], &replace[index][0][len]);
if ((res != A_B_EQUAL) && (res != B_SUBSTR)) {
index = -1;
}
}
}
return index;
}
char * newSpeak (const char * text, const char * (*replace)[2]) {
if ((!text) || (replaceInvalidity(replace))) {
fprintf (stderr, "(%s):Invalid argument...\n", __func__);
return NULL;
}
char * result = NULL;
int resultlen = 0;
while (*text) {
int ws_and_oc = 0;
int curr_text_len = 0;
const char * start = text;
const char * str = NULL;
while (isspace (*text) || !isalpha (*text)) {
ws_and_oc++; text++;
}
while (isalpha (*text)) {
curr_text_len++; text++;
}
int dict_index = findInDict (replace, start + ws_and_oc, curr_text_len);
if (dict_index >= 0) {
int len = strlen (replace [dict_index][0]);
// adjust the text pointer and curr_text_len when the dictionary bad word is a phrase and not just a word
text = (((text - start - ws_and_oc) == len) ? text : start + len + ws_and_oc);
curr_text_len = strlen (replace [dict_index][1]);
str = replace [dict_index][1];
} else {
str = start + ws_and_oc;
}
char * tmp;
result = realloc (tmp = result, resultlen + curr_text_len + ws_and_oc + 1);
if (result == NULL) {
fprintf (stderr, "(%s:%d):Failed to allocate memory...\n", __func__, __LINE__);
free (tmp);
return NULL;
}
for (int i = 0; i < ws_and_oc; ++i) {
result[resultlen++] = start[i];
}
for (int i = 0; i < curr_text_len; ++i) {
result[resultlen++] = str[i];
}
}
result[resultlen] = '\0';
return result;
}
int main (void) {
char * res;
const char * d1 [][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
res = newSpeak ("dumb termination specialist.", d1);
if (res) {
assert (!strcmp (res, "cerebrally challenged termination person with certified level of knowledge."));
free (res);
}
res = newSpeak ("The student answered an incorrect answer.", d1);
if (res) {
assert (!strcmp ( res, "The client answered an alternative answer."));
free (res);
}
res = newSpeak ("He was dumb, his failure was expected.", d1);
if (res) {
assert (!strcmp ( res, "He was cerebrally challenged, his non-traditional success was expected."));
free (res);
}
res = newSpeak ("The evil teacher became a murderer.", d1);
if (res) {
assert (!strcmp ( res, "The nicenest deprived voluntary knowledge conveyor became a termination specialist."));
free (res);
}
return 0;
}
I have skipped a couple of test cases because of lack of clarity -
The first one is:
res = newSpeak ( "Devil's advocate.", d1 );
assert ( ! strcmp ( res, "Dnicenest deprived's advocate." ) );
free ( res );
here a substring of a word of phrase, which exists in dictionary, is replaced with good phrase. What if Devil is also exists in the dictionary? What should be the behaviour in this case? Should look for best match or first match (even partial will work fine)..?
May be, once you have clarity around it, you can make the appropriate changes in the findInDict() function.
and second is:
res = newSpeak ( "Hello.", d2 );
assert ( ! res );
Why this test case expect res to be NULL? Based on the information you have provide res should be Hello. and not NULL.
I'm still new in C. I'm doing an enviroment variable task and I'm having a problem in processing my string. I would like to pass a variable that represent the environment variable, and replace a value that has ${...} with environment value if that string are same as the enviroment key. Here are the codes:
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
void replace_env(char *string, char *env)
{
int y = 0;
int x = 0;
int j = 0;
int i = 0;
int n = 0;
int val_length;
int location2[100];
char *tmp3[BUFSIZ];
char env_key[BUFSIZ];
char env_val[BUFSIZ];
char env_var[sizeof(env)][BUFSIZ];
char user_input[BUFSIZ];
char final_string[BUFSIZ];
char tmp_key[100][BUFSIZ];
tmp3[x]=env;
strncpy(env_var[x],tmp3[x],sizeof(tmp3));
for(x=0;env_var[y][x] != '=';x++) //this is to get the environment key
{
env_key[x] = env_var[y][x];
}
x++;
for(j=0;env_var[y][j] != '\0';j++) //this is to get the environment value
{
env_val[j]=env_var[y][x];
x++;
}
val_length = strlen(env_val);
j=0;
y=0;
strncpy(user_input,string,sizeof(user_input));
for(x = 0;user_input[x] !='\0';x++)
{
if (user_input[x] =='$')
{
x++;
if(user_input[x] == '{')
{
x++;
y=0;
while(user_input[x]!='}')
{
tmp_key[i][y] = user_input[x];
x++;
y++;
}
i++;
}
}
}
tmp_key[i][y]='\0';
i=0;
for(x = 0;user_input[x] !='\0';x++) //I think my problem is starting from here.
{
if (user_input[x] !='$')
{
final_string[j]=user_input[x];
j++;
}
else
{
x++;
if((user_input[x]== '{')&&(strncmp(tmp_key[i],env_key,sizeof(tmp_key))==0))
{
while(user_input[x]!='}')
{
x++;
}
strcat(final_string,env_val);
j=j+val_length;
}
else
{
final_string[j]=user_input[x];
j++;
}
}
}
printf("output result = %s \n",final_string);
}
int main() {
char s[100];
sprintf(s, "jack${ABC}zack${DEF}");
replace_env(s, "ABC=/home/fikrie/Documents");
replace_env(s, "DEF=/tmp");
if (strcmp(s, "jack/home/fikrie/Documentszack/tmp")==0) {
printf("pass\n");
} else {
printf("fail\n");
}
printf("--------------------------------------------------------\n");
return 0;
}
To make it more clearer, here are the result:
env_var = ABC=/home/fikrie/Documents
env_key = ABC
env_val = /home/fikrie/Documents
input = jack${ABC}zack${DEF}
after strcat result is = jack/home/fikrie/Documents
j value is 26
after strcat result is = jack/home/fikrie/Documentszack/home/fikrie/Documents
j value is 52
output result = jack/home/fikrie/Documentszack/home/fikrie/Documents
env_var = DEF=/tmp
env_key = DEF
env_val = /tmp
input = jack${ABC}zack${DEF}
output result = jack{ABC}zack{DEF}ocumentszack/home/fikrie/Documents
fail
--------------------------------------------------------
As you can see, ABC are sent into the replace_env function. And it does replace the ${ABC} properly, followed with a string zack.Then the problem occures where ${DEF} are replaced with ABC key and not maintained as ${DEF}
When the DEF are sent during the second call of replace_env function, things got more wierd. Both ABC and DEF are not recognized. Even worse, the character at the back are still there.
My expectation is:
For the first call of replace_env:
jack/home/Fikrie/Documentszack${DEF}
For the second call of replace_env:
jack/home/Fikrie/Documentszacl/tmp
after the strcmp passed, the final_string will be cleared again.
All help are really appreciated. I dont expect an answer. I prefer a knowledge or guidance rather than just solving it blankly. Just need a clear explanation on my fault because I have been editing this code for almost a month and everything looks so blurry now. I know there are ways to solve it using memory functions, allocation etc. But this task is about string manipulation. I am running this on Ubuntu OS. Sorry for my bad english.
I know you didn't ask for this, but consider this. Learning the C string functions is worth your time.
#include <stdio.h>
#include <string.h>
void sub(char *s, char *env, char *value) {
char buf[BUFSIZ], *src = s, *dst = buf;
int n = strlen(env);
while(*src) {
if(strncmp(src, env, n) == 0) {
dst += strlen(strcpy(dst, value));
src += strlen(env);
} else {
*dst++ = *src++;
}
}
*dst = 0;
strcpy(s, buf);
}
void replace_env(char *s, char *env) {
char copy[BUFSIZ], tmp[BUFSIZ];
strcpy(copy, env);
char *eq = strchr(copy, '=');
if(eq == 0) {
printf("No '=' found in '%s'\n", env);
return;
}
*eq = 0;
sprintf(tmp, "${%s}", copy);
sub(s, tmp, eq+1);
}
int main() {
char s[100];
sprintf(s, "jack${ABC}zack${DEF}");
replace_env(s, "ABC=/home/fikrie/Documents");
replace_env(s, "DEF=/tmp");
if (strcmp(s, "jack/home/fikrie/Documentszack/tmp")==0) {
printf("pass\n");
} else {
printf("fail\n");
}
printf("--------------------------------------------------------\n");
return 0;
}
Given a (char *) string, I want to find all occurrences of a substring and replace them with an alternate string. I do not see any simple function that achieves this in <string.h>.
The optimizer should eliminate most of the local variables. The tmp pointer is there to make sure strcpy doesn't have to walk the string to find the null. tmp points to the end of result after each call. (See Shlemiel the painter's algorithm for why strcpy can be annoying.)
// You must free the result if result is non-NULL.
char *str_replace(char *orig, char *rep, char *with) {
char *result; // the return string
char *ins; // the next insert point
char *tmp; // varies
int len_rep; // length of rep (the string to remove)
int len_with; // length of with (the string to replace rep with)
int len_front; // distance between rep and end of last rep
int count; // number of replacements
// sanity checks and initialization
if (!orig || !rep)
return NULL;
len_rep = strlen(rep);
if (len_rep == 0)
return NULL; // empty rep causes infinite loop during count
if (!with)
with = "";
len_with = strlen(with);
// count the number of replacements needed
ins = orig;
for (count = 0; tmp = strstr(ins, rep); ++count) {
ins = tmp + len_rep;
}
tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
if (!result)
return NULL;
// first time through the loop, all the variable are set correctly
// from here on,
// tmp points to the end of the result string
// ins points to the next occurrence of rep in orig
// orig points to the remainder of orig after "end of rep"
while (count--) {
ins = strstr(orig, rep);
len_front = ins - orig;
tmp = strncpy(tmp, orig, len_front) + len_front;
tmp = strcpy(tmp, with) + len_with;
orig += len_front + len_rep; // move to next "end of rep"
}
strcpy(tmp, orig);
return result;
}
This is not provided in the standard C library because, given only a char* you can't increase the memory allocated to the string if the replacement string is longer than the string being replaced.
You can do this using std::string more easily, but even there, no single function will do it for you.
There isn't one.
You'd need to roll your own using something like strstr and strcat or strcpy.
You could build your own replace function using strstr to find the substrings and strncpy to copy in parts to a new buffer.
Unless what you want to replace_with is the same length as what you you want to replace, then it's probably best to use a new buffer to copy the new string to.
Here's some sample code that does it.
#include <string.h>
#include <stdlib.h>
char * replace(
char const * const original,
char const * const pattern,
char const * const replacement
) {
size_t const replen = strlen(replacement);
size_t const patlen = strlen(pattern);
size_t const orilen = strlen(original);
size_t patcnt = 0;
const char * oriptr;
const char * patloc;
// find how many times the pattern occurs in the original string
for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
{
patcnt++;
}
{
// allocate memory for the new string
size_t const retlen = orilen + patcnt * (replen - patlen);
char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) );
if (returned != NULL)
{
// copy the original string,
// replacing all the instances of the pattern
char * retptr = returned;
for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
{
size_t const skplen = patloc - oriptr;
// copy the section until the occurence of the pattern
strncpy(retptr, oriptr, skplen);
retptr += skplen;
// copy the replacement
strncpy(retptr, replacement, replen);
retptr += replen;
}
// copy the rest of the string.
strcpy(retptr, oriptr);
}
return returned;
}
}
#include <stdio.h>
int main(int argc, char * argv[])
{
if (argc != 4)
{
fprintf(stderr,"usage: %s <original text> <pattern> <replacement>\n", argv[0]);
exit(-1);
}
else
{
char * const newstr = replace(argv[1], argv[2], argv[3]);
if (newstr)
{
printf("%s\n", newstr);
free(newstr);
}
else
{
fprintf(stderr,"allocation error\n");
exit(-2);
}
}
return 0;
}
As strings in C can not dynamically grow inplace substitution will generally not work. Therefore you need to allocate space for a new string that has enough room for your substitution and then copy the parts from the original plus the substitution into the new string. To copy the parts you would use strncpy.
// Here is the code for unicode strings!
int mystrstr(wchar_t *txt1,wchar_t *txt2)
{
wchar_t *posstr=wcsstr(txt1,txt2);
if(posstr!=NULL)
{
return (posstr-txt1);
}else
{
return -1;
}
}
// assume: supplied buff is enough to hold generated text
void StringReplace(wchar_t *buff,wchar_t *txt1,wchar_t *txt2)
{
wchar_t *tmp;
wchar_t *nextStr;
int pos;
tmp=wcsdup(buff);
pos=mystrstr(tmp,txt1);
if(pos!=-1)
{
buff[0]=0;
wcsncpy(buff,tmp,pos);
buff[pos]=0;
wcscat(buff,txt2);
nextStr=tmp+pos+wcslen(txt1);
while(wcslen(nextStr)!=0)
{
pos=mystrstr(nextStr,txt1);
if(pos==-1)
{
wcscat(buff,nextStr);
break;
}
wcsncat(buff,nextStr,pos);
wcscat(buff,txt2);
nextStr=nextStr+pos+wcslen(txt1);
}
}
free(tmp);
}
The repl_str() function on creativeandcritical.net is fast and reliable. Also included on that page is a wide string variant, repl_wcs(), which can be used with Unicode strings including those encoded in UTF-8, through helper functions - demo code is linked from the page. Belated full disclosure: I am the author of that page and the functions on it.
Here is the one that I created based on these requirements:
Replace the pattern regardless of whether is was long or shorter.
Not use any malloc (explicit or implicit) to intrinsically avoid memory leaks.
Replace any number of occurrences of pattern.
Tolerate the replace string having a substring equal to the search string.
Does not have to check that the Line array is sufficient in size to hold the replacement. e.g. This does not work unless the caller knows that line is of sufficient size to hold the new string.
avoid use of strcat() to avoid overhead of scanning the entire string to append another string.
/* returns number of strings replaced.
*/
int replacestr(char *line, const char *search, const char *replace)
{
int count;
char *sp; // start of pattern
//printf("replacestr(%s, %s, %s)\n", line, search, replace);
if ((sp = strstr(line, search)) == NULL) {
return(0);
}
count = 1;
int sLen = strlen(search);
int rLen = strlen(replace);
if (sLen > rLen) {
// move from right to left
char *src = sp + sLen;
char *dst = sp + rLen;
while((*dst = *src) != '\0') { dst++; src++; }
} else if (sLen < rLen) {
// move from left to right
int tLen = strlen(sp) - sLen;
char *stop = sp + rLen;
char *src = sp + sLen + tLen;
char *dst = sp + rLen + tLen;
while(dst >= stop) { *dst = *src; dst--; src--; }
}
memcpy(sp, replace, rLen);
count += replacestr(sp + rLen, search, replace);
return(count);
}
Any suggestions for improving this code are cheerfully accepted. Just post the comment and I will test it.
i find most of the proposed functions hard to understand - so i came up with this:
static char *dull_replace(const char *in, const char *pattern, const char *by)
{
size_t outsize = strlen(in) + 1;
// TODO maybe avoid reallocing by counting the non-overlapping occurences of pattern
char *res = malloc(outsize);
// use this to iterate over the output
size_t resoffset = 0;
char *needle;
while (needle = strstr(in, pattern)) {
// copy everything up to the pattern
memcpy(res + resoffset, in, needle - in);
resoffset += needle - in;
// skip the pattern in the input-string
in = needle + strlen(pattern);
// adjust space for replacement
outsize = outsize - strlen(pattern) + strlen(by);
res = realloc(res, outsize);
// copy the pattern
memcpy(res + resoffset, by, strlen(by));
resoffset += strlen(by);
}
// copy the remaining input
strcpy(res + resoffset, in);
return res;
}
output must be free'd
a fix to fann95's response, using in-place modification of the string, and assuming the buffer pointed to by line is large enough to hold the resulting string.
static void replacestr(char *line, const char *search, const char *replace)
{
char *sp;
if ((sp = strstr(line, search)) == NULL) {
return;
}
int search_len = strlen(search);
int replace_len = strlen(replace);
int tail_len = strlen(sp+search_len);
memmove(sp+replace_len,sp+search_len,tail_len+1);
memcpy(sp, replace, replace_len);
}
/*замена символа в строке*/
char* replace_char(char* str, char in, char out) {
char * p = str;
while(p != '\0') {
if(*p == in)
*p == out;
++p;
}
return str;
}
This function only works if ur string has extra space for new length
void replace_str(char *str,char *org,char *rep)
{
char *ToRep = strstr(str,org);
char *Rest = (char*)malloc(strlen(ToRep));
strcpy(Rest,((ToRep)+strlen(org)));
strcpy(ToRep,rep);
strcat(ToRep,Rest);
free(Rest);
}
This only replaces First occurrence
Here goes mine, make them all char*, which makes calling easier...
char *strrpc(char *str,char *oldstr,char *newstr){
char bstr[strlen(str)];
memset(bstr,0,sizeof(bstr));
int i;
for(i = 0;i < strlen(str);i++){
if(!strncmp(str+i,oldstr,strlen(oldstr))){
strcat(bstr,newstr);
i += strlen(oldstr) - 1;
}else{
strncat(bstr,str + i,1);
}
}
strcpy(str,bstr);
return str;
}
There is a function in string.h but it works with char [] not char* but again it outputs a char* and not a char []
It is simple and beautiful
Supposing we want to replace 'and' in 'TheandQuickandBrownandFox'.
We first split with strtok and then join with snprintf defined in the stdio.h
char sometext[] = "TheandQuickandBrownandFox";
char* replaced = malloc(1024);
// split on the substring, here I am using (and)
char* token = strtok(sometext, "and");
snprintf(replaced, 1, "%s", ""); // initialise so we can compare
while(token) {
if (strcmp(replaced, "") < 1) {
// if it is the first one
snprintf(replaced, 1024, "%s", token);
token = NULL;
} else {
// put the space between the existing and new
snprintf(replaced, 1024, "%s %s", replaced, token);
token = NULL;
}
}
free(replaced);
This should give us:
The Quick Brown Fox
You can use this function (the comments explain how it works):
void strreplace(char *string, const char *find, const char *replaceWith){
if(strstr(string, find) != NULL){
char *temporaryString = malloc(strlen(strstr(string, find) + strlen(find)) + 1);
strcpy(temporaryString, strstr(string, find) + strlen(find)); //Create a string with what's after the replaced part
*strstr(string, find) = '\0'; //Take away the part to replace and the part after it in the initial string
strcat(string, replaceWith); //Concat the first part of the string with the part to replace with
strcat(string, temporaryString); //Concat the first part of the string with the part after the replaced part
free(temporaryString); //Free the memory to avoid memory leaks
}
}
DWORD ReplaceString(__inout PCHAR source, __in DWORD dwSourceLen, __in const char* pszTextToReplace, __in const char* pszReplaceWith)
{
DWORD dwRC = NO_ERROR;
PCHAR foundSeq = NULL;
PCHAR restOfString = NULL;
PCHAR searchStart = source;
size_t szReplStrcLen = strlen(pszReplaceWith), szRestOfStringLen = 0, sztextToReplaceLen = strlen(pszTextToReplace), remainingSpace = 0, dwSpaceRequired = 0;
if (strcmp(pszTextToReplace, "") == 0)
dwRC = ERROR_INVALID_PARAMETER;
else if (strcmp(pszTextToReplace, pszReplaceWith) != 0)
{
do
{
foundSeq = strstr(searchStart, pszTextToReplace);
if (foundSeq)
{
szRestOfStringLen = (strlen(foundSeq) - sztextToReplaceLen) + 1;
remainingSpace = dwSourceLen - (foundSeq - source);
dwSpaceRequired = szReplStrcLen + (szRestOfStringLen);
if (dwSpaceRequired > remainingSpace)
{
dwRC = ERROR_MORE_DATA;
}
else
{
restOfString = CMNUTIL_calloc(szRestOfStringLen, sizeof(CHAR));
strcpy_s(restOfString, szRestOfStringLen, foundSeq + sztextToReplaceLen);
strcpy_s(foundSeq, remainingSpace, pszReplaceWith);
strcat_s(foundSeq, remainingSpace, restOfString);
}
CMNUTIL_free(restOfString);
searchStart = foundSeq + szReplStrcLen; //search in the remaining str. (avoid loops when replWith contains textToRepl
}
} while (foundSeq && dwRC == NO_ERROR);
}
return dwRC;
}
char *replace(const char*instring, const char *old_part, const char *new_part)
{
#ifndef EXPECTED_REPLACEMENTS
#define EXPECTED_REPLACEMENTS 100
#endif
if(!instring || !old_part || !new_part)
{
return (char*)NULL;
}
size_t instring_len=strlen(instring);
size_t new_len=strlen(new_part);
size_t old_len=strlen(old_part);
if(instring_len<old_len || old_len==0)
{
return (char*)NULL;
}
const char *in=instring;
const char *found=NULL;
size_t count=0;
size_t out=0;
size_t ax=0;
char *outstring=NULL;
if(new_len> old_len )
{
size_t Diff=EXPECTED_REPLACEMENTS*(new_len-old_len);
size_t outstring_len=instring_len + Diff;
outstring =(char*) malloc(outstring_len);
if(!outstring){
return (char*)NULL;
}
while((found = strstr(in, old_part))!=NULL)
{
if(count==EXPECTED_REPLACEMENTS)
{
outstring_len+=Diff;
if((outstring=realloc(outstring,outstring_len))==NULL)
{
return (char*)NULL;
}
count=0;
}
ax=found-in;
strncpy(outstring+out,in,ax);
out+=ax;
strncpy(outstring+out,new_part,new_len);
out+=new_len;
in=found+old_len;
count++;
}
}
else
{
outstring =(char*) malloc(instring_len);
if(!outstring){
return (char*)NULL;
}
while((found = strstr(in, old_part))!=NULL)
{
ax=found-in;
strncpy(outstring+out,in,ax);
out+=ax;
strncpy(outstring+out,new_part,new_len);
out+=new_len;
in=found+old_len;
}
}
ax=(instring+instring_len)-in;
strncpy(outstring+out,in,ax);
out+=ax;
outstring[out]='\0';
return outstring;
}
Using only strlen from string.h
sorry for my English
char * str_replace(char * text,char * rep, char * repw){//text -> to replace in it | rep -> replace | repw -> replace with
int replen = strlen(rep),repwlen = strlen(repw),count;//some constant variables
for(int i=0;i<strlen(text);i++){//search for the first character from rep in text
if(text[i] == rep[0]){//if it found it
count = 1;//start searching from the next character to avoid repetition
for(int j=1;j<replen;j++){
if(text[i+j] == rep[j]){//see if the next character in text is the same as the next in the rep if not break
count++;
}else{
break;
}
}
if(count == replen){//if count equals to the lenght of the rep then we found the word that we want to replace in the text
if(replen < repwlen){
for(int l = strlen(text);l>i;l--){//cuz repwlen greater than replen we need to shift characters to the right to make space for the replacement to fit
text[l+repwlen-replen] = text[l];//shift by repwlen-replen
}
}
if(replen > repwlen){
for(int l=i+replen-repwlen;l<strlen(text);l++){//cuz replen greater than repwlen we need to shift the characters to the left
text[l-(replen-repwlen)] = text[l];//shift by replen-repwlen
}
text[strlen(text)-(replen-repwlen)] = '\0';//get rid of the last unwanted characters
}
for(int l=0;l<repwlen;l++){//replace rep with repwlen
text[i+l] = repw[l];
}
if(replen != repwlen){
i+=repwlen-1;//pass to the next character | try text "y" ,rep "y",repw "yy" without this line to understand
}
}
}
}
return text;
}
if you want strlen code to avoid calling string.h
int strlen(char * string){//use this code to avoid calling string.h
int lenght = 0;
while(string[lenght] != '\0'){
lenght++;
}
return lenght;
}
There you go....this is the function to replace every occurance of char x with char y within character string str
char *zStrrep(char *str, char x, char y){
char *tmp=str;
while(*tmp)
if(*tmp == x)
*tmp++ = y; /* assign first, then incement */
else
*tmp++;
// *tmp='\0'; -> we do not need this
return str;
}
An example usage could be
Exmaple Usage
char s[]="this is a trial string to test the function.";
char x=' ', y='_';
printf("%s\n",zStrrep(s,x,y));
Example Output
this_is_a_trial_string_to_test_the_function.
The function is from a string library I maintain on Github, you are more than welcome to have a look at other available functions or even contribute to the code :)
https://github.com/fnoyanisi/zString
EDIT:
#siride is right, the function above replaces chars only. Just wrote this one, which replaces character strings.
#include <stdio.h>
#include <stdlib.h>
/* replace every occurance of string x with string y */
char *zstring_replace_str(char *str, const char *x, const char *y){
char *tmp_str = str, *tmp_x = x, *dummy_ptr = tmp_x, *tmp_y = y;
int len_str=0, len_y=0, len_x=0;
/* string length */
for(; *tmp_y; ++len_y, ++tmp_y)
;
for(; *tmp_str; ++len_str, ++tmp_str)
;
for(; *tmp_x; ++len_x, ++tmp_x)
;
/* Bounds check */
if (len_y >= len_str)
return str;
/* reset tmp pointers */
tmp_y = y;
tmp_x = x;
for (tmp_str = str ; *tmp_str; ++tmp_str)
if(*tmp_str == *tmp_x) {
/* save tmp_str */
for (dummy_ptr=tmp_str; *dummy_ptr == *tmp_x; ++tmp_x, ++dummy_ptr)
if (*(tmp_x+1) == '\0' && ((dummy_ptr-str+len_y) < len_str)){
/* Reached end of x, we got something to replace then!
* Copy y only if there is enough room for it
*/
for(tmp_y=y; *tmp_y; ++tmp_y, ++tmp_str)
*tmp_str = *tmp_y;
}
/* reset tmp_x */
tmp_x = x;
}
return str;
}
int main()
{
char s[]="Free software is a matter of liberty, not price.\n"
"To understand the concept, you should think of 'free' \n"
"as in 'free speech', not as in 'free beer'";
printf("%s\n\n",s);
printf("%s\n",zstring_replace_str(s,"ree","XYZ"));
return 0;
}
And below is the output
Free software is a matter of liberty, not price.
To understand the concept, you should think of 'free'
as in 'free speech', not as in 'free beer'
FXYZ software is a matter of liberty, not price.
To understand the concept, you should think of 'fXYZ'
as in 'fXYZ speech', not as in 'fXYZ beer'
You can use strrep()
char* strrep ( const char * cadena,
const char * strf,
const char * strr
)
strrep (String Replace). Replaces strf with strr in cadena and returns the new string. You need to free the returned string in your code after using strrep.
Parameters:
cadena: The string with the text.
strf: The text to find.
strr: The replacement text.
Returns
The text updated wit the replacement.
Project can be found at https://github.com/ipserc/strrep
I have a string like that:
4;4=3;1=0,2=2,3=1,4=1,5=1;0003013340f59bce000002aaf01620e620198b2240002710;
It is separated into sections by ";" and each section can have one or more key/value pairs like 5=1 and so on, as you can see.
I want to parse it in pure C and I started working with strtok as I am showing in code here:
const wuint8 section_delimiter[] = ";";
const wuint8 field_delimiter[] = ",";
const wuint8 value_delimiter[] = "=";
printf("%s\n",data->msg);
token = strtok(data->msg,section_delimiter);
while(token != NULL) {
indicator = atoi(token);
printf("indicator: %d\n", indicator);
switch(indicator) {
case TYPE_1: {
printf("type: %d\n",TYPE_1);
wuint16 i, headerType, headerSubType;
for(i = 1; i < TP_MAX; i++) {
if(i == atoi(token)) {
token = strtok(NULL,value_delimiter);
headerType = i;
headerSubType = atoi(token);
break;
}
}
break;
}
case TYPE_2: {
printf("type: %d\n",TYPE_3);
break;
}
case TYPE_3: {
printf("type: %d\n",TYPE_3);
break;
}
case TYPE_4: {
printf("type: %d\n",TYPE_4);
break;
}
I am not sure how to do that correctly.
It also gets complicated, because not every string has the same structure, sometimes only one or two sections can be present. E.g.: 3;4=3;1=0,2=2,3=1,4=1,5=1;
Is there a how to do that showing the best and most convenient way?
strtok can't, AFAICR, be used in nested loops like this due to the global state it manages itself.
I suggest parsing each semicolon-delimited part out first, then handling them sequentially - or just implement something akin to strtok for your semicolon case yourself, then happily use strtok in the inner loop.
Using strcspn(). Fixed buffers, results go into global variables. data[] buffer is altered (and thus needs to be writable). YMMV
/*
It is separated into sections by ";" and each section can have one or more
key/value pairs like 5=1 and so on, as you can see. I want to parse it in
pure C and I started working with strtok as I am showing in code here:
*/
char data[] = "4;4=3;1=0,2=2,3=1,4=1,5=1;0003013340f59bce000002aaf01620e620198b2240002710;" ;
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct header {
int l;
int r;
} headers[123];
unsigned nheader;
int indicator;
char rest [123];
int tokenise(char * buff);
unsigned tokenise2(struct header *dst, char * buff);
/****************/
int tokenise(char * buff)
{
char *ptrs[14];
unsigned nptr;
unsigned len, pos;
ptrs[nptr=0] = NULL;
for (len = pos=0; buff[pos]; pos += len ) {
len = strcspn(buff+pos, ";");
ptrs[nptr++] = buff+pos;
ptrs[nptr] = NULL;
if (!buff[pos+len] ) break;
buff[pos+len] = 0;
len +=1;
}
if ( nptr> 0 && ptrs[0]) indicator = atoi(ptrs[0]); else indicator = -1;
if ( nptr> 1 && ptrs[1]) nheader = tokenise2 (headers, ptrs[1] ); else nheader = 0;
if ( nptr> 2 && ptrs[2]) nheader += tokenise2 (headers+nheader, ptrs[2] ); else nheader += 0;
if ( nptr> 3 && ptrs[3]) strcpy (rest, ptrs[3]); else rest[0] = 0;
return 0; /* or something useful ... */
}
unsigned tokenise2(struct header *target, char * buff)
{
char *ptrs[123];
unsigned nptr, iptr;
unsigned len, pos;
ptrs[nptr=0] = NULL;
for (len = pos=0; buff[pos]; pos += len ) {
len = strcspn(buff+pos, "," );
ptrs[nptr++] = buff+pos;
ptrs[nptr] = NULL;
if (!buff[pos+len] ) break;
buff[pos+len] = 0;
len +=1;
}
for ( iptr=0; iptr < nptr; iptr++) {
if (! ptrs[iptr] ) break;
len = strcspn(ptrs[iptr], "=" );
if (!len) break;
target[iptr].l = atoi (ptrs[iptr] );
target[iptr].r = atoi (ptrs[iptr]+len+1 );
}
return iptr; /* something useful ... */
}
int main(void)
{
int rc;
unsigned idx;
fprintf(stderr, "Org=[%s]\n", data );
rc = tokenise(data);
printf("Indicator=%d\n", indicator );
for (idx=0; idx < nheader; idx++) {
printf("%u: %d=%d\n", idx, headers[idx].l , headers[idx].r );
}
printf("Rest=%s\n", rest );
return 0;
}