How to split with multiple delimiters in C - c
I have this line of text:
32+-#3#2-#3#3
I need to separate numbers from each other. So basically the result would be like this:
3
2+-
3
2-
3
3
This is my code but it's not working properly because I have numbers with two digits:
#include <stdio.h>
#include <string.h>
int main(void) {
char string[50] = "32-#3#2-#3#3";
// Extract the first token
char *token = strtok(string, "#");
// loop through the string to extract all other tokens
while (token != NULL) {
printf(" %s\n", token); //printing each token
token = strtok(NULL, "#");
}
return 0;
}
You can't do it with strtok (alone), because there is no delimiter between the numbers you want to split. It's easier without strtok, just print what you want printed and add a separator unless a character which belongs to the token follows:
#include <stdio.h>
int main()
{
char string[] = "32+-#3#2-#3#3";
for (char *token = string; *token; ++token)
if ('0'<=*token && *token<='9' || *token=='+' || *token=='-')
{
putchar(*token);
if (token[1]!='+' && token[1]!='-') putchar('\n');
}
}
If you consider this too easy, you can use a regular expression to match the tokens:
#include <stdio.h>
#include <regex.h>
int main()
{
char *string = "32+-#3#2-#3#3";
regex_t reg;
regcomp(®, "[0-9][+-]*", 0);
regmatch_t match = {0};
while (regexec(®, string+=match.rm_eo, 1, &match, 0) == 0)
printf("%.*s\n", (int)(match.rm_eo-match.rm_so), string+match.rm_so);
}
There is a simple way to achieve this, but in C is a bit more complicated since we don't have vector as in C++ but I can suggest a pure C implementation which can be improved:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void split_ss(const char* src,const char* pattern, char** outvec, size_t* outsize)
{
const size_t pat_len = strlen(pattern);
char* begin = (char*) src;
const char* next = begin;
if ((begin = strstr((const char*)begin, pattern)) != 0x00) {
unsigned int size = begin - next;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec , next, size);
outvec++;
(*outsize)+=1;
split_ss(begin+pat_len, pattern, outvec, outsize);
} else {
unsigned int size = &src[strlen(src)-1] - next + 1;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec, next, size);
(*outsize) += 1;
}
}
int main()
{
char* outdata[64] = {0};
size_t size, i=0;
split_ss("32+-#3#2-#3#3", "#", outdata, &size);
for(i=0; i < size; i++) {
printf("[%s]\r\n", outdata[i]);
}
// make sure to free it
return 0;
}
strstr is used to split by string rather than a character. Also output is a poorman 2D array with out size to iterate it and don't forget to free it.
strtok() is not the right tool for you purpose... As a matter of fact strtok() is rarely the right tool for any purpose because of its tricky semantics and side effects.
A simple loop will do:
#include <stdio.h>
int main(void) {
char string[50] = "32+-#3#2-#3#3";
for (char *p = string; *p; p++) {
if (*p == '#')
continue;
putchar(*p);
while (p[1] == '+' || p[1] == '-')
putchar(*++p);
putchar('\n');
}
return 0;
}
Related
Extracting the first two words in a sentence in C without pointers
I am getting used to writing eBPF code as of now and want to avoid using pointers in my BPF text due to how difficult it is to get a correct output out of it. Using strtok() seems to be out of the question due to all of the example codes requiring pointers. I also want to expand it to CSV files in the future since this is a means of practice for me. I was able to find another user's code here but it gives me an error with the BCC terminal due to the one pointer. char str[256]; bpf_probe_read_user(&str, sizeof(str), (void *)PT_REGS_RC(ctx)); char token[] = strtok(str, ","); char input[] ="first second third forth"; char delimiter[] = " "; char firstWord, *secondWord, *remainder, *context; int inputLength = strlen(input); char *inputCopy = (char*) calloc(inputLength + 1, sizeof(char)); strncpy(inputCopy, input, inputLength); str = strtok_r (inputCopy, delimiter, &context); secondWord = strtok_r (NULL, delimiter, &context); remainder = context; getchar(); free(inputCopy);
Pointers are powerful, and you wont be able to avoid them for very long. The time you invest in learning them is definitively worth it. Here is an example: #include <stdio.h> #include <stdlib.h> #include <string.h> /** Extracts the word with the index "n" in the string "str". Words are delimited by a blank space or the end of the string. }*/ char *getWord(char *str, int n) { int words = 0; int length = 0; int beginIndex = 0; int endIndex = 0; char currentchar; while ((currentchar = str[endIndex++]) != '\0') { if (currentchar == ' ') { if (n == words) break; if (length > 0) words++; length = 0; beginIndex = endIndex; continue; } length++; } if (n == words) { char *result = malloc(sizeof(char) * length + 1); if (result == NULL) { printf("Error while allocating memory!\n"); exit(1); } memcpy(result, str + beginIndex, length); result[length] = '\0'; return result; }else return NULL; } You can easily use the function: int main(int argc, char *argv[]) { char string[] = "Pointers are cool!"; char *word = getWord(string, 2); printf("The third word is: '%s'\n", word); free(word); //Don't forget to de-allocate the memory! return 0; }
Dynamic memory allocation for an array of pointers to char in C
I'm building a word counter program. To achieve this, I was thinking about saving the string the user inputted, and using strtok() to split the sentence with space as the delimiter. But first I want to allocate enough memory for each word. Let's say the sentence is "Hello World". I've already dynamically allocated memory for the string itself. Now I want to split Hello World into 2 strings, "Hello" and "World". My goal is to allocate enough memory so that there's not too much empty space but I also don't want to allocate too little space. Here is my code so far: #include <stdio.h> #include <stdlib.h> char *strmalloc(char **string); char *user_input = NULL; char *word_array[]; int main(void) { printf("Enter a sentence to find out the number of words: "); user_input = strmalloc(&user_input); return 0; } char *strmalloc(char **string) { char *tmp = NULL; size_t size = 0, index = 0; int ch; while ((ch = getchar()) != '\n' && ch != EOF) { if (size <= index) { size += 1; tmp = realloc(*string, size); if (!tmp) { free(*string); string = NULL; break; } *string = tmp; } (*string)[index++] = ch; } return *string; } How would I go about doing this? Should I do the splitting first or allocate the space required for the array first?
You can count words without splitting the sentence, here is an example : #include <stdio.h> #include <string.h> #include <stdlib.h> #include <ctype.h> // Change this to change the separator characters static inline char isSeparator(char ch) { return isspace(ch) || ispunct(ch); } char * jumpSeparator(char *string) { while(string[0] && isSeparator(string[0])) string++; return string; } char * findEndOfWord(char *string) { while (string[0] && !isSeparator(string[0])) string++; return string; } int countWords(char *string) { char * ptr = jumpSeparator(string); if (strlen(ptr) == 0) return 0; int count = 1; while((ptr = findEndOfWord(ptr)) && ptr[0]) { ptr = jumpSeparator(ptr); if (!ptr) break; count++; } return count; } int main() { char * sentence = "This is,a function... to||count words"; int count = countWords(sentence); printf("%d\n", count); //====> 7 } EDIT : Reusing the same functions here is another example that allocates substrings dynamically : int main() { char * sentence = "This is,a function... to||split words"; int count = countWords(sentence); char * ptr = sentence, *start, *end; char ** substrings = malloc(count * sizeof(char *)); int i=0; while((ptr = jumpSeparator(ptr)) && ptr[0]) { start = ptr; ptr = findEndOfWord(ptr); end = ptr; int len = end-start; char * newString = malloc(len + 1); memcpy(newString, start, len); newString[len] = 0; substrings[i++] = newString; } // Prints the result for(int i=0; i<count; i++) printf("%s\n", substrings[i]); // Frees the allocated memory for(int i=0; i<count; i++) free(substrings[i]); free(substrings); return 0; } Output : This is a function to split words
C: Strange behaviour with strtok()
I'm doing an exercice where I need to split a string into an array of strings. The number of delimiters is checked before (the code snippet posted is a stripped down version however it doesn't work too), then the string is transformed into lowercase and it gets split into 4 parts separated by the delimiter "-". Here's the code: #include <stdio.h> #include <string.h> #include <stdlib.h> #include <ctype.h> #define MAX_USERNAME_LENGHT 256 #define NUMBER_OF_ELEMENTS 4 void StringToArrayOfStrings(char *string, char **string_array, char *delimiter); void LowerString(char * string, int string_lenght); int main() { char string[MAX_USERNAME_LENGHT] = "Joseph-Lucy-Mike-Nick"; //Test string char *string_array[NUMBER_OF_ELEMENTS]; //We need four elements char delimiter[] = "-"; int counter = 0; //LowerString(string, strlen(string)); //printf("%s", string); StringToArrayOfStrings(string, string_array, delimiter); //Print each element of the string array for (counter = 0; counter < NUMBER_OF_ELEMENTS; counter++) { printf("\n%s\n", string_array[counter]); } return 0; } void LowerString(char * string, int string_lenght) { unsigned short int counter; for (counter = 0; counter < string_lenght; counter++) { string[counter] = tolower(string[counter]); } } void StringToArrayOfStrings(char *string, char **string_array, char *delimiter) { unsigned short int counter; char *token; token = strtok(string, delimiter); while(token != NULL) { string_array[counter++] = token; token = strtok(NULL, delimiter); } } I've been scratching my head for the past 2 hours and I wasn't able to fix it. This programs works only if the string is not printed or/and transformed in lowercase. The program crashes when entering the loop in StringToArrayOfStrings. Where's the problem? Thanks.
returning string of chars in C
i compiled my piece of code and it worked fine using printf , but i want it to be returned without being printed .. char *ft_strrev(char *str) { int i = 0; while (str[i] != '\0') { i++; } while (i != 0) { i--; } return str; } int main () { char *string; string = "amrani"; ft_strrev(string); } The main thing here is to reverse the input entred .. how can i exactly use return , to return the full char given to my var string , any tips ?
There are two approaches to doing this: make a new string and return it or mutate the parameter in place. Here's a new string version per your clarification comment. Note that memory is allocated for the new string and the caller is expected to free the result: #include <stdio.h> #include <stdlib.h> #include <string.h> char *ft_strrev(char *str) { int len = strlen(str); char *reversed = malloc(sizeof(*reversed) * (len + 1)); reversed[len] = '\0'; while (*str != '\0') { reversed[--len] = *str++; } return reversed; } int main() { char *string = "amrani"; char *reversed = ft_strrev(string); printf("%s\n", reversed); free(reversed); } Note that many functions of this kind will include the length of the string as a second parameter so the function needn't call strlen.
This solution inverts the string in place. #include <stdio.h> #include <string.h> char *ft_strrev(char *str) { int len = strlen(str); for (int i = 0; i < len / 2; i++) { char temp = str[i]; str[i] = str[len - i - 1]; str[len - i - 1] = temp; } return str; } int main() { char string[] = "amrani"; ft_strrev(string); // return value of ft_strrev not used here printf("%s", string); } Be aware of the difference betwween this: char string[] = "amrani"; // string is an array of chars initialized with "amrani" and this: char *string = "amrani"; // string is a pointer to the string literal "amrani" Modifying a string literal results in undefined behaviour, most likely some crash on modern platforms.
Erase last members of line from text file
I have a text file as data.txt and I want to delete the last members of each line: Here's the text file: 2031,2,0,0,0,0,0,0,54,0, 2027,2,0,0,0,0,0,0,209,0, 2029,2,0,0,0,0,0,0,65,0, 2036,2,0,0,0,0,0,0,165,0, I would like to delete so it becomes: 2031,2,0,0,0,0,0,0, 2027,2,0,0,0,0,0,0, 2029,2,0,0,0,0,0,0, 2036,2,0,0,0,0,0,0, I'm working in C but as the numbers can have two or three digits, I'm not sure how to do this.
A couple of uses of strrchr() can do the job: #include <string.h> void zap_last_field(char *line) { char *last_comma = strrchr(line, ','); if (last_comma != 0) { *last_comma = '\0'; last_comma = strrchr(line, ','); if (last_comma != 0) *(last_comma + 1) = '\0'; } } Compiled code that seems to work. Note that given a string containing a single comma, it will zap that comma. If you don't want that to happen, then you have to work a little harder. Test code for zap_last_field() #include <string.h> extern void zap_last_field(char *line); void zap_last_field(char *line) { char *last_comma = strrchr(line, ','); if (last_comma != 0) { *last_comma = '\0'; last_comma = strrchr(line, ','); if (last_comma != 0) *(last_comma + 1) = '\0'; } } #include <stdio.h> #include <stdlib.h> int main(void) { char *line = malloc(4096); if (line != 0) { while (fgets(line, 4096, stdin) != 0) { printf("Line: %s", line); zap_last_field(line); printf("Zap1: %s\n", line); } free(line); } return(0); } This has been vetted with valgrind and is OK on both the original data file and the mangled data file listed below. The dynamic memory allocation is there to give valgrind the maximum chance of spotting any problems. I strongly suspect that the core dump reported in a comment happens because the alternative test code tried to pass a literal string to the function, which won't work because literal strings are not generally modifiable and this code modifies the string in situ. Test code for zap_last_n_fields() If you want to zap the last couple of fields (a controlled number of fields), then you'll probably want to pass in a count of the number of fields to be zapped and add a loop. Note that this code uses a VLA so it requires a C99 compiler. #include <string.h> extern void zap_last_n_fields(char *line, size_t nfields); void zap_last_n_fields(char *line, size_t nfields) { char *zapped[nfields+1]; for (size_t i = 0; i <= nfields; i++) { char *last_comma = strrchr(line, ','); if (last_comma != 0) { zapped[i] = last_comma; *last_comma = '\0'; } else { /* Undo the damage wrought above */ for (size_t j = 0; j < i; j++) *zapped[j] = ','; return; } } zapped[nfields][0] = ','; zapped[nfields][1] = '\0'; } #include <stdio.h> int main(void) { char line1[4096]; while (fgets(line1, sizeof(line1), stdin) != 0) { printf("Line: %s", line1); char line2[4096]; for (size_t i = 1; i <= 3; i++) { strcpy(line2, line1); zap_last_n_fields(line2, i); printf("Zap%zd: %s\n", i, line2); } } return(0); } Example run — using your data.txt as input: Line: 2031,2,0,0,0,0,0,0,54,0, Zap1: 2031,2,0,0,0,0,0,0,54, Zap2: 2031,2,0,0,0,0,0,0, Zap3: 2031,2,0,0,0,0,0, Line: 2027,2,0,0,0,0,0,0,209,0, Zap1: 2027,2,0,0,0,0,0,0,209, Zap2: 2027,2,0,0,0,0,0,0, Zap3: 2027,2,0,0,0,0,0, Line: 2029,2,0,0,0,0,0,0,65,0, Zap1: 2029,2,0,0,0,0,0,0,65, Zap2: 2029,2,0,0,0,0,0,0, Zap3: 2029,2,0,0,0,0,0, Line: 2036,2,0,0,0,0,0,0,165,0, Zap1: 2036,2,0,0,0,0,0,0,165, Zap2: 2036,2,0,0,0,0,0,0, Zap3: 2036,2,0,0,0,0,0, It also correctly handles a file such as: 2031,0,0, 2031,0, 2031, 2031 ,