I get a segmentation fault the second time malloc runs:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int keywords_init(char *str, char ***str_arr);
int main(void) {
char str[] = "keyword1,keyword2,keyword3";
char **str_arr = NULL;
int arr_elements;
arr_elements = keywords_init(str, &str_arr);
return 0;
}
int keywords_init(char *str, char ***str_arr) {
int i;
char *pch;
/* String break */
pch = strtok(str, ",");
for (i = 0; pch != NULL; i++) {
*str_arr = realloc (*str_arr, (i+1)*sizeof(char *));
*str_arr[i] = malloc (strlen(pch) + 1);
strcpy(*str_arr[i], pch);
printf("%d: %s\n", i, pch);
pch = strtok (NULL, ",");
}
return i;
}
What confuses me is that if I don't pass the address of str_arr to keywords_init and use a double pointer instead of a triple one in keywords_init it works just fine.
You're getting bitten by operator precedence/associativity - change both occurrences of:
*str_arr[i]
to:
(*str_arr)[i]
Related
I created a function to split a string with comma.
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
char splitted_line[8][50];
void split(char *line){
printf("line 9\n");
char *part = strtok(line, ",");
printf("line 11\n");
for(int i=1; i<8; i++){
strcpy(splitted_line[i], part);
printf("%s\n", splitted_line[i]);
part = strtok(NULL, ",");
}
}
int main(){
char *line = "123,456,789";
split(line);
return 0;
}
but the result after running is :
line 9
Segmentation fault (core dumped)
it seems the problem is in char *part = strtok(line, ","); but I don't know what's that.
strtok() will modify passed original string directly.
You must not modify string literals.
char *line = "123,456,789";
should be modifyable array
char line[] = "123,456,789";
Also don't forget to check if part is not NULL before doing strcpy(splitted_line[i], part);.
I am trying to split a line into an array of words, but I am stuck on how to do this in C. My skills in C aren't very good, so I can't think of a way to "execute" my idea. Her is what I have so far:
int beginIndex = 0;
int endIndex = 0;
int maxWords = 10;
while (1) {
while (!isspace(str)) {
endIndex++;
}
char *tmp = (string from 'str' from beginIndex to endIndex)
arr[wordCnt] = tmp;
wordCnt++;
beginIndex = endIndex;
if (wordCnt = maxWords) {
return;
}
}
In my method I receive (char *str, char *arr[10]), and str is the line that I want to split when I encounter a space. arr is the array where I want to store the words. Is there any way to copy the 'chunk' of string that I want from 'str' into my tmp variable? This is the best way that I can think of right now, perhaps it's a terrible idea. If so, I would be happy to get some documentation or tips on a better method.
You should check out the C Library function strtok. You simply feed it the string you want to break up and a string of delimiters.
Here is an example of how it works (taken from the linked site):
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL) {
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
In your case instead of printing each string you would assign the pointer returned by strtok to the next element in your array arr.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int split(char *str, char *arr[10]){
int beginIndex = 0;
int endIndex;
int maxWords = 10;
int wordCnt = 0;
while(1){
while(isspace(str[beginIndex])){
++beginIndex;
}
if(str[beginIndex] == '\0')
break;
endIndex = beginIndex;
while (str[endIndex] && !isspace(str[endIndex])){
++endIndex;
}
int len = endIndex - beginIndex;
char *tmp = calloc(len + 1, sizeof(char));
memcpy(tmp, &str[beginIndex], len);
arr[wordCnt++] = tmp;
beginIndex = endIndex;
if (wordCnt == maxWords)
break;
}
return wordCnt;
}
int main(void) {
char *arr[10];
int i;
int n = split("1st 2nd 3rd", arr);
for(i = 0; i < n; ++i){
puts(arr[i]);
free(arr[i]);
}
return 0;
}
I'm trying to create a split function using strtok and a dynamic array.
However, I have no clue where things are going wrong: No informative error messages.
It does say segmentation fault, but I don't understand how the heap is corrupt or whatever causes that happens.
Would someone be willing to explain to me what is wrong and how to do it correctly?
Edit 11:16 CST code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **toArray(char **array, char str[], char sep[], int *count);
char** my_split(const char* str, char delim, int* size);
int main(int argc, char* argv[]) {
char* test = "Hello there lol";
int *count = 0;
char **array = malloc(sizeof(char*) * 5);
toArray(array, test, " ", count);
printf("Count: %d\n", *count);
int array_i;
for (array_i = 0; array_i < *count; array_i++) {
printf("array %d: %s\n", array_i, array[array_i]);
free(array[array_i]);
}
free(array);
return 1;
}
char **toArray(char **array, char str[], char sep[], int *count) {
char *temp = str;
temp = strtok(temp, sep);
array[0] = temp;
*count = 1;
while ((temp = strtok(NULL, sep)) != NULL ) {
array[(*count)++] = temp;
}
return array;
}
Compiler messages are our friend. I simpley used them to track down your issues. Try the following, and compare whats been done to what you had. Special attention to decalration and usage of pointer variables... :)
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
char **toArray(char **array, char str[], char sep[], int *count);
int main(int argc, char* argv[]) {
char test[] = "Hello there lol";
int count = 0;
char **array = malloc((sizeof(char*) * 5) +1); //added "+ 1" here, read why
toArray(array, test, " ", &count); //in comment below
printf("Count: %d\n", count);
int array_i;
for (array_i = 0; array_i < count; array_i++) {
printf("array %d: %s\n", array_i, array[array_i]);
//free(array[array_i]);
}
getchar();
free(array);
return 1;
}
char **toArray(char **array, char str[], char sep[], int *count) {
char *temp = str;
temp = strtok(temp, sep);
array[0] = temp;
*count = 1;
while ((temp = strtok(NULL, sep)) != NULL) {
array[(*count)++] = temp;
}
return array;
}
[EDIT] Example Output:
Also. The line char **array = malloc(sizeof(char*) * 5);, needed to be
char **array = malloc(sizeof(char*) * 5 + 1); because "hello" is actually 5 chars plus a NULL char, '\0'.
Some rules of thumb for C string(s).
1) when using malloc or calloc, don't forget to allow room for '\0'.
`char *buf1;` //buffer needed to manipulate buf2
`char buf2[]="someString";`
`buf1 = malloc(strlen(buf2)+1);` or `buf1 = malloc(sizeof(buf2));`
(note:, no '+1'. see '4)' below. )
2) clear (initialize) new allocated variable before use. eg:
memset(buf, 0, strlen("someString")+1); //preferred, all bytes are zeroed
OR
buf[0]=0; //useful, but use with care (only first byte is zeroed.)
3) Free all dynamically allocated memory when done with it. Eg:
free(buf);
4) Using strlen() function or sizeof() macro. (both popular for use in [mc]alloc())
Given:
char *buf1 ="Hello"; //6 characters |H|e|l|l|o|\0|
char buf2[] ="Hello"; //6 characters |H|e|l|l|o|\0|
char buf3[5]="Hello"; //5 characters |H|e|l|l|o|
char buf4[5]="Hel"; //4 characters |H|e|l|\0| |
char buf5[5]="Helloo";//should get compile error, too many initializers
Compare strlen() - sizeof() results:
strlen(buf1); //->5 (requires +1 in malloc for new variable req'd to hold "Hello\0")
sizeof(buf1); //->4 (returns sizof (char *), not # chars in string)
strlen(buf2); //->5 (requires +1 in malloc for new variable req'd yo hold "Hello\0")
sizeof(buf2); //->6 (counts all chars, including '\0')
strlen(buf3); //-> (error: Missing terminating NULL in string argument)
sizeof(buf3); //->5 (counts all chars, but there is no '\0' in this string - wrong!)
strlen(buf4); //->3 (counts chars, but not '\0')
sizeof(buf4); //->5 (counts ALL allocated space, including '\0')
You are passing char *test = "Hello there lol"; to your toArray(). Unfortunately, the string is not modifiable, so when you try to modify it with strtok(), you get a segmentation fault.
The simplest fix is:
char test[] = "Hello there lol";
You also have:
int *count = 0;
and you call the function with:
toArray(array, test, " ", count);
You need an integer, and to pass its address:
int count = 0;
...
toArray(array, test, " ", &count);
You were also trying to free the strings that were pointed at by the elements in array, but those were never allocated (they are parts of the string test). Don't free what was not allocated with malloc() et al.
With those fixes in place, this code works:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **toArray(char **array, char str[], char sep[], int *count);
int main(void)
{
char test[] = "Hello there lol";
int count = 0;
char **array = malloc(sizeof(char *) * 5);
toArray(array, test, " ", &count);
printf("Count: %d\n", count);
for (int i = 0; i < count; i++)
printf("array %d: %s\n", i, array[i]);
free(array);
return 0;
}
char **toArray(char **array, char str[], char sep[], int *count)
{
char *temp = str;
temp = strtok(temp, sep);
array[0] = temp;
*count = 1;
while ((temp = strtok(NULL, sep)) != NULL)
array[(*count)++] = temp;
return array;
}
Output:
Count: 3
array 0: Hello
array 1: there
array 2: lol
I have a problem with strtok() - it does not return the input as expected.
void parse_input(const char *input,unsigned char *ctext, int mlen){
char * str = strdup(input);
char * pch = strtok(str,"-");
while (pch != NULL)
{
ctext[mlen] = (int) pch;
pch = strtok (NULL, "-");
mlen++;
}
On input like 1-2-3-4 I would want it to fill ctext with [1,2,3,4].
That doesn't work, however.
What am I doing wrong? Any help appreciated.
ctext[mlen] = (int) pch;
That stores the numeric value of the pointer, whereas you really want the character pointed to by the pointer. Time to read a good article/book/tutorial on pointers.
ctext[mlen] = *pch;
is what you're looking for.
You want to get the character in the first byte of pch -- not the address of pch
ctext[mlen] = *pch;
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void parse_input(const char *input,unsigned char *ctext[], int *mlen){
char * str = strdup(input);
char * pch = strtok(str,"-");
while (pch != NULL){
ctext[(*mlen)++] = (unsigned char*)pch;
pch = strtok (NULL, "-");
}
}
int main(void){
unsigned char *ctext[16];
int mlen=0;
int i;
parse_input("1-2-3-4", ctext, &mlen);
printf("[ ");
for(i=0;i<mlen;++i){
printf("%s", ctext[i]);
if(i<mlen -1)
printf(", ");
}
printf(" ]\n");
//free(ctext[0]);
return 0;
}
I need to use strtok to read in a first and last name and seperate it. How can I store the names where I can use them idependently in two seperate char arrays?
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char * test;
test = strtok (str," ");
while (test != NULL)
{
printf ("%s\n",test);
test= strtok (NULL, " ");
}
return 0;
}
Here is my take at a reasonably simple tokenize helper that
stores results in a dynamically growing array
null-terminating the array
keeps the input string safe (strtok modifies the input string, which is undefined behaviour on a literal char[], at least I think in C99)
To make the code re-entrant, use the non-standard strtok_r
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** tokenize(const char* input)
{
char* str = strdup(input);
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
char* tok=strtok(str," ");
while(1)
{
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = tok? strdup(tok) : tok;
if (!tok) break;
tok=strtok(NULL," ");
}
free(str);
return result;
}
int main ()
{
char** tokens = tokenize("test string.");
char** it;
for(it=tokens; it && *it; ++it)
{
printf("%s\n", *it);
free(*it);
}
free(tokens);
return 0;
}
Here is a strtok-free reimplementation of that (uses strpbrk instead):
char** tokenize(const char* str)
{
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
const char* e=str;
if (e) do
{
const char* s=e;
e=strpbrk(s," ");
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = e? strndup(s, e-s) : strdup(s);
} while (e && *(++e));
if (count >= capacity)
result = realloc(result, (capacity+=1)*sizeof(*result));
result[count++] = 0;
return result;
}
Do you need to store them separately? Two pointers into a modified char array will yield two separate perfectly usable strings.
That is we transform this:
char str[] ="test string.";
Into this:
char str[] ="test\0string.";
^ ^
| |
char *s1 ----- |
char *s2 -----------
.
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="test string.";
char *firstname = strtok(str, " ");
char *lastname = strtok(NULL, " ");
if (!lastname)
lastname = "";
printf("%s, %s\n", lastname, firstname);
return 0;
}
What about using strcpy:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_NAMES 2
int main ()
{
char str[] ="test string.";
char *names[MAX_NAMES] = { 0 };
char *test;
int i = 0;
test = strtok (str," ");
while (test != NULL && i < MAX_NAMES)
{
names[i] = malloc(strlen(test)+1);
strcpy(names[i++], test);
test = strtok (NULL, " ");
}
for(i=0; i<MAX_NAMES; ++i)
{
if(names[i])
{
puts(names[i]);
free(names[i]);
names[i] = 0;
}
}
return 0;
}
It contains much clutter to maintain a complete program and clean its resources, but the main point is to use strcpy to copy each token into its own string.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** split(const char *str, const char *delimiter, size_t *len){
char *text, *p, *first, **array;
int c;
char** ret;
*len = 0;
text=strdup(str);
if(text==NULL) return NULL;
for(c=0,p=text;NULL!=(p=strtok(p, delimiter));p=NULL, c++)//count item
if(c==0) first=p; //first token top
ret=(char**)malloc(sizeof(char*)*c+1);//+1 for NULL
if(ret==NULL){
free(text);
return NULL;
}
strcpy(text, str+(first-text));//skip until top token
array=ret;
for(p=text;NULL!=(p=strtok(p, delimiter));p=NULL){
*array++=p;
}
*array=NULL;
*len=c;
return ret;
}
void free4split(char** sa){
char **array=sa;
if(sa!=NULL){
free(array[0]);//for text
free(sa); //for array
}
}
int main(void){
char str[] ="test string.";
char **words;
size_t len=0;
int i;
words = split(str, " \t\r\n,.", &len);
/*
for(char **wk = words; *wk ;wk++){
printf("%s\n", *wk);
}
*/
for(i = 0;i<len;++i){
printf("%s\n", words[i]);
}
free4split(words);
return 0;
}
/* result:
test
string
*/
Copy the results from strtok to a new buffer using a function such as
/*
* Returns a copy of s in freshly allocated memory.
* Exits the process if memory allocation fails.
*/
char *xstrdup(char const *s)
{
char *p = malloc(strlen(s) + 1);
if (p == NULL) {
perror("memory allocation failed");
exit(1);
}
strcpy(p, s);
return p;
}
Don't forget to free the return values when you're done with them.
IMO, you don't need (and probably don't want) to use strtok at all (as in, "for this, or much of anything else"). I think I'd use code something like this:
#include <string.h>
#include <stdlib.h>
static char *make_str(char const *begin, char const *end) {
size_t len = end-begin;
char *ret = malloc(len+1);
if (ret != NULL) {
memcpy(ret, begin, len);
ret[len]='\0';
}
return ret;
}
size_t tokenize(char *tokens[], size_t max, char const *input, char const *delims) {
int i;
char const *start=input, *end=start;
for (i=0; *start && i<max; i++) {
for ( ;NULL!=strchr(delims, *start); ++start)
;
for (end=start; *end && NULL==strchr(delims, *end); ++end)
;
tokens[i] = make_str(start, end);
start = end+1;
}
return i;
}
#ifdef TEST
#define MAX_TOKENS 10
int main() {
char *tokens[MAX_TOKENS];
int i;
size_t num = tokenize(tokens, MAX_TOKENS, "This is a longer input string ", " ");
for (i=0; i<num; i++) {
printf("|%s|\n", tokens[i]);
free(tokens[i]);
}
return 0;
}
#endif
U can do something like this too.
int main ()
{
char str[] ="test string.";
char * temp1;
char * temp2;
temp1 = strtok (str," ");
temp2 = strchr(str, ' ');
if (temp2 != NULL)
temp2++;
printf ("Splitted string :%s, %s\n" , temp1 , temp2);
return
}