C: Strange behaviour with strtok() - c

I'm doing an exercice where I need to split a string into an array of strings. The number of delimiters is checked before (the code snippet posted is a stripped down version however it doesn't work too), then the string is transformed into lowercase and it gets split into 4 parts separated by the delimiter "-". Here's the code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX_USERNAME_LENGHT 256
#define NUMBER_OF_ELEMENTS 4
void StringToArrayOfStrings(char *string, char **string_array, char *delimiter);
void LowerString(char * string, int string_lenght);
int main() {
char string[MAX_USERNAME_LENGHT] = "Joseph-Lucy-Mike-Nick"; //Test string
char *string_array[NUMBER_OF_ELEMENTS]; //We need four elements
char delimiter[] = "-";
int counter = 0;
//LowerString(string, strlen(string));
//printf("%s", string);
StringToArrayOfStrings(string, string_array, delimiter);
//Print each element of the string array
for (counter = 0; counter < NUMBER_OF_ELEMENTS; counter++) {
printf("\n%s\n", string_array[counter]);
}
return 0;
}
void LowerString(char * string, int string_lenght) {
unsigned short int counter;
for (counter = 0; counter < string_lenght; counter++) {
string[counter] = tolower(string[counter]);
}
}
void StringToArrayOfStrings(char *string, char **string_array, char *delimiter) {
unsigned short int counter;
char *token;
token = strtok(string, delimiter);
while(token != NULL) {
string_array[counter++] = token;
token = strtok(NULL, delimiter);
}
}
I've been scratching my head for the past 2 hours and I wasn't able to fix it. This programs works only if the string is not printed or/and transformed in lowercase. The program crashes when entering the loop in StringToArrayOfStrings. Where's the problem?
Thanks.

Related

How to split with multiple delimiters in C

I have this line of text:
32+-#3#2-#3#3
I need to separate numbers from each other. So basically the result would be like this:
3
2+-
3
2-
3
3
This is my code but it's not working properly because I have numbers with two digits:
#include <stdio.h>
#include <string.h>
int main(void) {
char string[50] = "32-#3#2-#3#3";
// Extract the first token
char *token = strtok(string, "#");
// loop through the string to extract all other tokens
while (token != NULL) {
printf(" %s\n", token); //printing each token
token = strtok(NULL, "#");
}
return 0;
}
You can't do it with strtok (alone), because there is no delimiter between the numbers you want to split. It's easier without strtok, just print what you want printed and add a separator unless a character which belongs to the token follows:
#include <stdio.h>
int main()
{
char string[] = "32+-#3#2-#3#3";
for (char *token = string; *token; ++token)
if ('0'<=*token && *token<='9' || *token=='+' || *token=='-')
{
putchar(*token);
if (token[1]!='+' && token[1]!='-') putchar('\n');
}
}
If you consider this too easy, you can use a regular expression to match the tokens:
#include <stdio.h>
#include <regex.h>
int main()
{
char *string = "32+-#3#2-#3#3";
regex_t reg;
regcomp(&reg, "[0-9][+-]*", 0);
regmatch_t match = {0};
while (regexec(&reg, string+=match.rm_eo, 1, &match, 0) == 0)
printf("%.*s\n", (int)(match.rm_eo-match.rm_so), string+match.rm_so);
}
There is a simple way to achieve this, but in C is a bit more complicated since we don't have vector as in C++ but I can suggest a pure C implementation which can be improved:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void split_ss(const char* src,const char* pattern, char** outvec, size_t* outsize)
{
const size_t pat_len = strlen(pattern);
char* begin = (char*) src;
const char* next = begin;
if ((begin = strstr((const char*)begin, pattern)) != 0x00) {
unsigned int size = begin - next;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec , next, size);
outvec++;
(*outsize)+=1;
split_ss(begin+pat_len, pattern, outvec, outsize);
} else {
unsigned int size = &src[strlen(src)-1] - next + 1;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec, next, size);
(*outsize) += 1;
}
}
int main()
{
char* outdata[64] = {0};
size_t size, i=0;
split_ss("32+-#3#2-#3#3", "#", outdata, &size);
for(i=0; i < size; i++) {
printf("[%s]\r\n", outdata[i]);
}
// make sure to free it
return 0;
}
strstr is used to split by string rather than a character. Also output is a poorman 2D array with out size to iterate it and don't forget to free it.
strtok() is not the right tool for you purpose... As a matter of fact strtok() is rarely the right tool for any purpose because of its tricky semantics and side effects.
A simple loop will do:
#include <stdio.h>
int main(void) {
char string[50] = "32+-#3#2-#3#3";
for (char *p = string; *p; p++) {
if (*p == '#')
continue;
putchar(*p);
while (p[1] == '+' || p[1] == '-')
putchar(*++p);
putchar('\n');
}
return 0;
}

I want to split a string into two strings in C

I want to split a string by the comma and separate the first number in the string into its own new string, the rest of the string I want to keep together.
So far I have tried this by using strtok() and I can get the first number into its own string, but now I can't figure out how to keep the rest of the string together.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
char testStr[] = "1000,first,second,third,+abc";
char *uidStr;
char *restOfstr;
int n;
//This is wrong, I know, but I want to populate
//the rest of the string after the first comma
//into a single string without the UID.
uidStr = strtok(testStr, ",");
while (n < 5)
{
restOfstr = strtok(NULL, ",");
n++;
}
return 0;
}
strtok works fine, you have to keep in mind that it returns a pointer to each tokenized word so you need two pointers one for the first token and other for the rest of the string.
Demo
#include <stdio.h>
#include <string.h>
int main()
{
char testStr[] = "1000,first,second,third,+abc";
char *uidStr; //pointer to uid
char *restOfstr; //pointers to the rest of the string
uidStr = strtok(testStr, ","); //uid remains in testStr
restOfstr = strtok(NULL, "\n"); //rest of the string
puts(uidStr); //or puts(testStr) to print uid
puts(restOfstr); //print rest of the string
return 0;
}
If you want a more secure function you can use strtok_s.
You can use strchr to find the first comma in the string.
Then using strncpy to get the number in the string.
The complete code:
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
int main()
{
char *str = "1000,first,second,third,+abc";
char *s = strchr(str, ',');
if(!s)
return -1;
char num[10];
strncpy(num, str, s-str);
num[s-str] = '\0';
int a = strtol(num, NULL, 10);
printf("num = %d\nthe remaining: %s\n", a, s+1);
return 0;
}
#include <string.h>
#include <stdio.h>
int main(int ac, char **av) {
while (--ac) {
char *p = *++av;
char *t = strtok(p, ",");
char *r = strtok(NULL,"");
printf("%s : %s\n", t, r);
}
return 0;
}
Note that the empty string "" passed to the second strtok means that it cannot find a deliminator, thus returns the rest of the string.
In addition to the excellent answers #mevets and #anastaciu have provided (I would go with these), this code will also work fine.
#include <string.h>
#include <stdio.h>
int main(int argc, char** argv) {
char _p[] = "1000,Hey,There";
char* str1 = strtok(_p, ",");
char* str2 = strtok(NULL, "");
return 0;
}

strtok in C crashes with char pointer

I have a char array in C with numbers separated by comma, and need to convert it to an int array. However when I try to use strtok, it crashes with EXC_BAD_ACCESS.
Can you help me please?
The method
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ARRAY_LEN (0x100)
#define OUTPUT_LEN (0x400)
unsigned int StaticAnalyze_load( char* data, char delimiter, int* array, unsigned int length ){
char *token;
int i=0;
// CRASHES HERE (BAD ACCESS)
token = strtok(data, &delimiter);
while( token != NULL ) {
array[i] = atoi(token);
token = strtok(NULL, &delimiter);
i++;
}
for(i=0;i<3;i++) {
printf("%d\n", array[i]);
}
return length;
}
Main
int main(int argc, const char * argv[]) {
char *data = "13,654,24,48,1,79,14456,-13,654,13,46,465,0,65,16,54,1,67,4,6,74,165,"
"4,-654,616,51,654,1,654,654,-61,654647,67,13,45,1,54,2,15,15,47,1,54";
int array[ARRAY_LEN]; // array, I need to fill-in with integers from the string above
unsigned int loaded = StaticAnalyze_load(data, ',', array, ARRAY_LEN);
return 0;
}
data in main is a pointer to a literal string that strtok cannot modify.
strchr could be used to identify the tokens.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ARRAY_LEN (0x100)
#define OUTPUT_LEN (0x400)
unsigned int StaticAnalyze_load( char* data, char delimiter, int* array, unsigned int length ){
char *token = data;
int i=0;
while( i < ARRAY_LEN && token != NULL ) {
array[i] = atoi(token);
token = strchr(token, delimiter);
if ( token) {
++token;
}
i++;
}
for(i=0;i<3;i++) {
printf("%d\n", array[i]);
}
return length;
}
int main(int argc, const char * argv[]) {
char *data = "13,654,24,48,1,79,14456,-13,654,13,46,465,0,65,16,54,1,67,4,6,74,165,"
"4,-654,616,51,654,1,654,654,-61,654647,67,13,45,1,54,2,15,15,47,1,54";
int array[ARRAY_LEN]; // array, I need to fill-in with integers from the string above
unsigned int loaded = StaticAnalyze_load(data, ',', array, ARRAY_LEN);
return 0;
}

Program Hangs when printing contents of array in loop

Hi, i am using the MinGW C Compiler with Code::Blocks and my code hangs when trying to print the contents of an array (well it is a custom data type).
For a quick summary: the program is taking the contents of a txt file and splits
the string up into individual words using a custom data type called a stringArray (the name explains itself). It then should print each word of the file to the user.
The problem is, it hangs and gives me the usual "[PROGRAM NAME HERE] is not responding." After pressing cancel it gives me this result:
Process returned -1073741819 (0xC0000005) execution time : 3.861 s
Press any key to continue.
I am a sort of beginner.
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
typedef struct stringArray
{
char *string;
}stringArray;
const char delim[2] = " ";
int string_to_array(char *filecontents)
{
char *token;
token = strtok(filecontents, delim);
int i;
int dirtyContentsLength;
stringArray newContents[100];
for(i = 0; i < 100; i++)
{
newContents[i].string = "";
}
i = 0;
while (token != NULL)
{
newContents[i].string = token;
i++;
token = strtok(NULL, delim);
}
return newContents;
}
int open_file(char filename[30])
{
char *file_contents;
long input_file_size;
FILE *input_file = fopen(filename, "rb");
fseek(input_file, 0, SEEK_END);
input_file_size = ftell(input_file);
rewind(input_file);
file_contents = malloc(input_file_size * (sizeof(char)));
fread(file_contents, sizeof(char), input_file_size, input_file);
fclose(input_file);
return file_contents;
}
int lex(char filecontents[30])
{
char *tok = "";
int state = 0;
char *string = "";
}
int main(int argc, char *argv[] )
{
const char *cleanContents;
char *messyContents;
char input[30];
printf("What is the filename? ");
scanf("%s", input);
messyContents = open_file(input);
cleanContents = string_to_array(messyContents);
int contentsLength = sizeof(cleanContents) / sizeof(cleanContents[0]);
int i;
for(i = 0; i < contentsLength; i++)
{
printf("%s\n", cleanContents[i]);
}
printf("Done");
return 0;
}
You have multiple problems with your code:
string_to_array() is declared to return an int, but in reality it is returning a stringArray
Same with open_file() function, Declared to return an int, but actually returning a char*
string_to_array is returning an element that was declared locally. This means that once the function is returned, that memory is no longer valid, but it has passed it on to the caller.
Your structure name is misleading. A char* is a character array (a string). Thus the name charArray would be more appropriate. For the structure to be a string array it has to be a char**, ie an array of character arrays (array of strings)
Int the printf() in the main() function you are not passing the string (thus a compilation warning is generated)
You are not initializing memory to all 0. This is ideal as otherwise the memory will contain random data which will be interpreted as a string untill the first null terminator (\0 encountered)
The following code is a modified working version of what you are trying to achieve with comments about each change:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
typedef struct stringArray
{
char *string;
}stringArray;
const char delim[2] = " ";
// Now string_to_array takes the memory location to write output to as a first parameter so that the
// memory will reside in the callers scope (refer to problem 3 above)
// Additionally return type was now set to void (refer to problem 1)
void string_to_array(stringArray newContents[100], char *filecontents)
{
char *token;
token = strtok(filecontents, delim);
int i;
int dirtyContentsLength;
for(i = 0; i < 100; i++)
{
newContents[i].string = "";
}
i = 0;
while (token != NULL)
{
newContents[i].string = token;
i++;
token = strtok(NULL, delim);
}
// return now was removed. result written directly in memory passed as parameter by the caller.
}
// open_file changed to return a char* (refer to problem 2)
char* open_file(char filename[30])
{
char *file_contents;
long input_file_size;
FILE *input_file = fopen(filename, "rb");
fseek(input_file, 0, SEEK_END);
input_file_size = ftell(input_file);
rewind(input_file);
file_contents = malloc(input_file_size * (sizeof(char)));
fread(file_contents, sizeof(char), input_file_size, input_file);
fclose(input_file);
return file_contents;
}
int lex(char filecontents[30])
{
char *tok = "";
int state = 0;
char *string = "";
}
int main(int argc, char *argv[] )
{
stringArray cleanContents[100];
// Initializing memory to all 0s (refer to problem 6)
memset(cleanContents, 0 ,sizeof(cleanContents));
char *messyContents;
char input[30];
printf("What is the filename? ");
scanf("%s", input);
messyContents = open_file(input);
string_to_array(cleanContents, messyContents);
int contentsLength = sizeof(cleanContents) / sizeof(cleanContents[0]);
int i;
for(i = 0; i < contentsLength; i++)
{
// Checking that at least one character is present in the string before printing it...
if (cleanContents[i].string[0])
{
// Printing the string within the 'stringArray'. (refer to problem 5)
printf("%s\n", cleanContents[i].string);
}
}
printf("Done\n");
return 0;
}

Tokenized string of char to ints using atoi

I am trying to take user input: (1 345 44 23) and make it into a tokenized char string then into ints. Surprisingly I could not find much help for what I would think would be a common task.
Any ideas how to convert the char string into an in string using tokens?
My program crashes when it gets to the conversion (after the tokenization [I realize this is not a word]).
Thanks!
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define StrSZE 81
void strInput (char str[], int maxChars);
void custatoi(char * tokenArray[], int * data, int numOfTok);
int main(int argc, char *argv[])
{
char str[StrSZE];
char* tokenArray;
int maxChars=StrSZE-1, cont=1, numOfToken=0, i=0;
int* data;
strInput(str, maxChars);
tokenArray = strtok(str, " \t");
while (tokenArray)
{
printf("token: %s\n", tokenArray);
tokenArray = strtok(NULL, " \t");
numOfToken++;
}
data = (int *) malloc(numOfToken * sizeof(int));
custatoi(tokenArray, data, numOfToken);
system("PAUSE");
return 0;
}
void strInput (char str[], int maxChars)
{
char garbage;
int k=0;
str[0]='\0';
printf("Please type a string of whole numbers (intigers).\n\n");
while ((k<80) && ((str[k] = getchar()) != '\n'))
k++;
/* Clears the keyboard buffer. */
if (k==80)
while((garbage = getchar()) != '\n')
;
/* Place null at the end of the line read in from user */
str[k]='\0';
printf("str after input is: %s\n\n", str);
}
void custatoi(char * tokenArray[], int * data, int numOfTok)
{
int i;
for (i=0; i < numOfTok; i++)
data[i] = atoi(tokenArray[i]);
}
I corrected the errors in yours code: There was some mistakes in main(), tokenArray data type was not correct.
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define StrSZE 81
void strInput (char str[], int maxChars);
void custatoi(char* tokenArray[], int * data, int numOfTok);
int main(int argc, char *argv[])
{
char str[StrSZE];
int maxChars=StrSZE-1, cont=1, numOfToken=0, i=0;
int* data;
char* tokenArray[50]; // Declared correctly
strInput(str, maxChars);
tokenArray[i] = strtok(str, " \t"); // Also made a change here!
while (tokenArray[i])
{
printf("token: %s\n", tokenArray[i]);
i++;
tokenArray[i] = strtok(NULL, " \t");
numOfToken++;
}
data = (int *) malloc(numOfToken * sizeof(int));
custatoi(tokenArray, data, numOfToken);
printf("data\n");
for(i=0;i<numOfToken;i++){
printf(" %d\n",data[i]);
}
system("PAUSE");
return 0;
}
void strInput (char str[], int maxChars)
{
char garbage;
int k=0;
str[0]='\0';
printf("Please type a string of whole numbers (intigers).\n\n");
while ((k<80) && ((str[k] = getchar()) != '\n'))
k++;
/* Clears the keyboard buffer. */
if (k==80)
while((garbage = getchar()) != '\n')
;
/* Place null at the end of the line read in from user */
str[k]='\0';
printf("str after input is: %s\n\n", str);
}
void custatoi(char* tokenArray[], int * data, int numOfTok)
{
int i;
for (i=0; i < numOfTok; i++)
data[i] = atoi(tokenArray[i]);
}
At the end of the strtok loop, tokenArray will be set to NULL. You then pass it to custatoi, which presumably crashes when it tries to dereference it.
Note that tokenArray is not an array of strings; it's just a single string pointer (or a pointer to an array of characters). If you want to accumulate the tokens into an array, you'll have to create a separate array for that purpose.
The main problem is that custatoi() expects to work with an array of pointers to char, while tokenArray in main() is a mere pointer to char. The original code never collects all pointers to tokens in the input string into an array that custatoi() expects, there isn't such an array in the original code.
Please study the fixed code:
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define StrSZE 81
void custatoi(char* tokenArray[], int* data, int numOfTok);
int main(void)
{
char str[StrSZE];
char** tokenArray;
int numOfToken = 0, i;
int* data;
//strInput(str, maxChars);
strcpy(str, "1 345 44 23");
tokenArray = malloc(sizeof(char*));
tokenArray[numOfToken] = strtok(str, " \t");
while (tokenArray[numOfToken] != NULL)
{
printf("token: %s\n", tokenArray[numOfToken]);
numOfToken++;
tokenArray = realloc(tokenArray, sizeof(char*) * (numOfToken + 1));
tokenArray[numOfToken] = strtok(NULL, " \t");
}
data = malloc(numOfToken * sizeof(int));
custatoi(tokenArray, data, numOfToken);
for (i = 0; i < numOfToken; i++)
printf("data[%d]=%d\n", i, data[i]);
return 0;
}
void custatoi(char* tokenArray[], int* data, int numOfTok)
{
int i;
for (i=0; i < numOfTok; i++)
data[i] = atoi(tokenArray[i]);
}
Output (idone):
token: 1
token: 345
token: 44
token: 23
data[0]=1
data[1]=345
data[2]=44
data[3]=23

Resources