Split string with delimiter in C - segmentation faults, invalid free - c

I wrote a simple code to split string in C with delimiter. When I remove all my frees, code works great but gives memory leaks. When I dont remove free, it does not show memory leaks but gives segmentation fault .. What is wring and how to solve it?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
unsigned int countWords(char *stringLine)
{
unsigned int count = 0;
char* tmp = stringLine;
char* last = 0;
const char delim = '/';
while (*tmp)
{
if (delim == *tmp)
{
count++;
last = tmp;
}
tmp++;
}
return count;
}
char **getWordsFromString(char *stringLine)
{
char** sizeNames = 0;
unsigned int count = 0;
const char *delim = "/";
count = countWords(stringLine);
sizeNames = malloc(sizeof(char*) * count);
if(sizeNames == NULL)
{
return NULL;
}
if (sizeNames)
{
size_t idx = 0;
char* token = strtok(stringLine, delim);
while (token)
{
if(idx > count)
{
exit(-1);
}
*(sizeNames + idx++) = strdup(token);
token = strtok(0, delim);
}
if(idx == count - 1)
{
exit(-1);
}
*(sizeNames + idx) = 0;
}
return sizeNames;
}
void showWords(char *stringLine)
{
unsigned int size = countWords(stringLine), i = 0;
char** sizeNames = getWordsFromString(stringLine);
for (i = 0; *(sizeNames + i); i++)
{
printf("word=[%s]\n", *(sizeNames + i));
free(*(sizeNames + i));
}
printf("\n");
free(sizeNames);
}
int main()
{
char words[] = "hello/world/!/its/me/";
showWords(words);
return 0;
}

Variable sizeNames is an array of pointers, not a string (array of characters) that you need to terminate with a null-character.
So remove this:
*(sizeNames + idx) = 0;
And change this:
for (i=0; *(sizeNames+i); i++)
To this:
for (i=0; i<size; i++)

In getWordsFromString,
*(sizeNames + idx) = 0;
Writes one past the end of your allocated memory, and when you try to free it, you get a segfault. Try count+1 in the malloc:
sizeNames = malloc(sizeof(char*) * (count+1) );

Related

How to split a text into two dimensional array in c

I'm trying to split this string:
this is a text file
looking for the word cat
the program should print also cats
and crat and lcat but it shouldn’t
print the word caats
into a two dimensional arrays such that every line in the text is a line in the array.
For example:
lines[0][0] = 't'
lines[0][1] = 'h'
and so on. For now, this is my code:
void print_lines(char txt[]){
char lines[SIZE][SIZE];
int num_of_lines = fill_lines(txt, lines);
printf("lines: %d\n",num_of_lines );
int i;
for (i = 0; i < num_of_lines; i++)
{
printf("%s\n", lines[i]);
}
}
int fill_lines(char txt[], char lines[][]){
char copy[strlen(txt)];
memcpy(copy, txt, strlen(txt));
char *line = strtok(copy, "\n");
int i = 0;
while(line != NULL){
strcpy(lines[i][0], line);
line = strtok(NULL, "\n");
i++
}
return i + 1;
}
The problem I'm currently dealing with is an error in strcpy(lines[i], line) that reads:
expression must be a pointer to a complete object type
I have also tried memcpy(lines[i], line, strlen(line)).
Any help would be much appreciated.
I think this should work for you
Here I used '\n' as a delimiter
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
char **str_split(char *a_str, const char a_delim)
{
char **result = 0;
size_t count = 0;
char *tmp = a_str;
char *last_comma = 0;
char delim[2];
delim[0] = a_delim;
delim[1] = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char *) * count);
if (result)
{
size_t idx = 0;
char *token = strtok(a_str, delim);
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, delim);
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int main()
{
char text[] = "this is a text file\nlooking for the word cat\nthe program should print also cats\nand crat and lcat but it shouldn’t\nprint the word caats";
char **tokens;
printf("ORIGINAL TEXT:\n%s\n\n", text);
tokens = str_split(text, ',');
if (tokens)
{
int i;
for (i = 0; *(tokens + i); i++)
{
printf("%s\n", *(tokens + i));
free(*(tokens + i));
}
printf("\n");
free(tokens);
}
return 0;
}

Problems with Heap Corruption - C

I am making a "simple" print out string, append string and remove section from a string. The append and new string works sometimes, sometimes it outputs nothing.
When i do:
char * temp = malloc(newSize);
It just stops outputting anything.
I have commented everything out in sections, trying to find the problem. Can't seem to find the problem, but google keeps coming up with "Heap Corruption".
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char * data;
int length;
} String;
int str_getLength(const char * characters)
{
int index = 0;
while (1)
{
if (characters[index] == '\0') break;
index++;
}
return index;
}
String str_new(const char * characters)
{
String result;
result.length = str_getLength(characters);
result.data = malloc(result.length);
memcpy(result.data, characters, result.length);
return result;
}
void str_append(String * str, const char * characters)
{
int charsLength = str_getLength(characters);
str->data = realloc(str->data, charsLength);
for (int i = 0; i < charsLength; i++) {
str->data[i + str->length] = characters[i];
}
str->length = str->length + charsLength;
}
void str_remove(String * str, int startIndex, int endIndex)
{
if (startIndex < 0 || endIndex > str->length || endIndex < startIndex) {
return;
}
int chunkSize = endIndex - startIndex;
int newSize = str->length - chunkSize;
char * temp = malloc(newSize);
// for (int i = 0; i < str->length; i++)
// {
// if (i < startIndex || i > endIndex) {
// temp[i] = str->data[i];
// }
// }
// free(str->data);
// str->length = newSize;
// str->data = temp;
}
}
int main()
{
String str = str_new("Hello, ");
printf("%s\n", str.data);
str_append(&str, "this is my first C application.");
printf("%s\n", str.data);
str_remove(&str, 0, 3);
printf("%s\n", str.data);
free(str.data);
return 0;
}
I expected it to output a modified string, it doesn't and sometimes it outputs nothing.
I am a beginner, sorry if it's a quick fix.
Apart from the blaze answer.
There are few more problems.
// for (int i = 0; i < str->length; i++)
// {
// if (i < startIndex || i > endIndex) {
// temp[i] = str->data[i];
// }
// }
You will access out of bound for temp.
You need to maintain separate index for temp.
char * temp = malloc(newSize+1);
int k=0;
for (int i = 0; i < str->length; i++)
{
if (i < startIndex || i > endIndex) {
temp[k++] = str->data[i];
}
}
temp[k] = '\0';
free(str->data);
str->length = newSize;
str->data = temp;
And
You are not null terminating the string after append.
str->data = realloc(str->data, str->length + charsLength +1); //current length + new length + \0
for (int i = 0; i < charsLength; i++) {
str->data[i + str->length] = characters[i];
}
str->data[i + str->length] = '\0'; //null terminate the new string
str->length = str->length + charsLength;
There are two problems with your reallocation. First of all, you're not assigning the result of the realloc to str->data, so if the memory was reallocated to a different place, tr->data points to invalid memory afterwards. Second, you're not adding the sizes of the string and the appended part, you're just taking the size of the part that you're appending.
This here
realloc(str->data, charsLength);
Should be:
str->data = realloc(str->data, charsLength + str->length + 1);

How can I debug this segmentation fault?

I get a segfault the second time this statement runs:
chunks[i].argv[0] = malloc( strlen(token) * sizeof(char *) + 1 );
The code in context is:
/* TODO: modify str_split to do the copying of its input string if it needs to (e.g. if it uses strtok on it), and return a struct that has the number of "chunks" it split out and the list of chunks. */
struct str_list *list_split(char *a_str, const char a_delim) {
char **result = 0;
char **result2 = 0;
size_t count = 0;
char *tmp = a_str;
char *last_comma = 0;
size_t count2 = 0;
char *tmp2 = a_str;
char *last_space = 0;
char delim[2];
delim[0] = a_delim;
delim[1] = 0;
struct str_list *chunks = NULL;
/* Count how many elements will be extracted. */
while (*tmp) {
if (a_delim == *tmp) {
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char *) * count);
chunks = malloc(sizeof(chunks));
//chunks.size = malloc(sizeof(int));
// counter = (int) count + 1;
//chunks->size = counter;
if (result == NULL) {
printf("Error allocating memory!\n"); //print an error message
return chunks;; //return with failure
}
if (result) {
size_t idx = 0;
char *token = strtok(a_str, delim);
int i = 0;
while (token) {
assert(idx < count);
*(result + idx++) = strdup(token); /* memory leak! how to free() */;
token = strtok(0, delim);;
}
assert(idx == count - 1);
*(result + idx) = 0;
}
chunks->size = (int) count;
chunks->argv = alloc_argv((unsigned) chunks->size);
for (int i = 0; i < 2; i++) { //count is wrong
while (*tmp2) {
if (' ' == *tmp2) {
count2++;
last_space = tmp2;
}
tmp2++;
}
char* token = strtok(result[i], " ");
while (token) {
printf("token: %s\n", token);
printf("size: %d\n", chunks->size);
printf("result: %s\n", result[i]);
printf("i: %d\n", i);
chunks[i].argv[0] = malloc( strlen(token) * sizeof(char *) + 1 );
chunks[i].argv[0] = strdup(token);;
token = strtok(0, " ");
}
}
return chunks;
}
My debugger says nothing interesting. Can you see what is wrong and what should be done? The call to the above function is:
int run_cmd(const char *cmd) {
struct str_list *chunks = list_split(cmd, '|');
struct pipeline *pipe = alloc_pipeline(2); //size is the number of pipelines
for (int i = 0; i < 2; i++) {
printf("i %d", i);
for (int j = 0; j < 1; j++) {
pipe[i].data[j] = chunks[i].argv[j];
}
}
int status = execute_pipeline(pipe);
// free_pipeline(pipe);
// free_str_list(chunks);
return status;
}
The definition of my structs is
struct str_list {
char *name;
int size;
char **argv;
};
struct pipeline {
char *name;
int size;
char **data;
};
This line
chunks = malloc(sizeof(chunks));
That allocates the size of the chunks variable, which is a pointer and is usually only 4 or 8 bytes large (depending on if you're on a 32 or 64 bit system).
A str_list structure is larger than that, which means you will write out of bounds of allocated memory, leading to undefined behavior and most likely a crash.
You seem to be using two of this structure, judging by the loops, which means you need to allocate two full str_list structures, which is simplest done by e.g.
chunks = malloc(2 * sizeof *chunks);

When the string is number C language

I want to return nothing when the string is number
here is my code,
#include <string.h>
#include <ctype.h>
int num = 0;
char* findWord(char* subString) {
char* word = malloc(sizeof(char) * (strlen(subString) + 1));
int i = 0;
int Position = 0;
num = 0;
while (ispunct(subString[i]) != 0 || isspace(subString[i]) != 0) {
i++;
}
num = i;
while (ispunct(subString[i]) == 0 && isspace(subString[i]) == 0) {
word[Position] = subString[i];
i++;
Position++;
}
word[Position] = '\0';
return word;
}
char** wordList(const char* s) {
int len = strlen(s);
int i = 0;
char* Copyword = malloc(sizeof(char) * len);
strncpy(Copyword, s, len);
char** result = (char**) malloc(sizeof(char*) * (len + 1));
char* word = NULL;
word = findWord(Copyword);
char* wordEnd = Copyword;
while (*word != 0) {
result[i] = word;
wordEnd = wordEnd + strlen(word) + num;
word = findWord(wordEnd);
i++;
}
result[i] = '\0';
free(Copyword);
return result;
}
int main(void) {
char** words = wordList("1 23 456 789");
int i = 0;
while (words[i] != NULL) {
printf("%s\n", words[i]);
free(words[i]); // We're done with that word
i++;
}
free(words); // We're done with the list
return 0;
}
my code is ok when the string is sentence.
however, in this case, I want to print nothing(just like a space) when the string is number.
but what I go is
1
23
456
789
I expect to get
nothing shows here! just a space
For starters: You pass a non 0-terminated C-"string" (Copyword) to findWord() and in there call strlen() on it. This just doesn't crash your app by bad luck.

Three level indirection char pointer in C causes a segment fault

I got a segment fault error at the line with the comments that contains lots of equals signs below.
The function below str_spit, I wrote it because I want to split a string using a specific char, like a comma etc.
Please help.
int str_split(char *a_str, const char delim, char *** result)
{
int word_length = 0;
int cur_cursor = 0;
int last_cursor = -1;
int e_count = 0;
*result = (char **)malloc(6 * sizeof(char *));
char *char_element_pos = a_str;
while (*char_element_pos != '\0') {
if (*char_element_pos == delim) {
char *temp_word = malloc((word_length + 1) * sizeof(char));
int i = 0;
for (i = 0; i < word_length; i++) {
temp_word[i] = a_str[last_cursor + 1 + i];
}
temp_word[word_length] = '\0';
//
*result[e_count] = temp_word;//==============this line goes wrong :(
e_count++;
last_cursor = cur_cursor;
word_length = 0;
}
else {
word_length++;
}
cur_cursor++;
char_element_pos++;
}
char *temp_word = (char *) malloc((word_length + 1) * sizeof(char));
int i = 0;
for (i = 0; i < word_length; i++) {
temp_word[i] = a_str[last_cursor + 1 + i];
}
temp_word[word_length] = '\0';
*result[e_count] = temp_word;
return e_count + 1;
}
//this is my caller function====================
int teststr_split() {
char delim = ',';
char *testStr;
testStr = (char *) "abc,cde,fgh,klj,asdfasd,3234,adfk,ad9";
char **result;
int length = str_split(testStr, delim, &result);
if (length < 0) {
printf("allocate memroy failed ,error code is:%d", length);
exit(-1);
}
free(result);
return 0;
}
I think you mean
( *result )[e_count] = temp_word;//
instead of
*result[e_count] = temp_word;//
These two expressions are equivalent only when e_count is equal to 0.:)
[] has a higher precedence than *, so probably parentheses will solve THIS problem:
(*result)[e_count] = temp_word;
I didn't check for more problems in the code. Hint: strtok() might do your job just fine.

Resources