I'm trying to write a function that takes in a path (char *) and splits it into an array of strings based around the '/' delimiter. Simplified code below :
int split_path(char * path, char ** out) {
out = NULL;
char * token = strtok(path, "/");
int count = 0;
while(token) {
out = realloc(out, sizeof(char*) * (++count));
out[count-1] = malloc(sizeof(char) * strlen(token)+1);
strcpy(out[count-1], token);
fprintf(stderr, "%s\n", out[count-1]);
token = strtok(NULL, "/");
}
out = realloc(out, sizeof(char*) * (count+1));
out[count] = NULL;
return count;
}
int main(int argc, char * argv[]) {
char path[] = "/home/pirates/are/cool/yeah";
char ** out;
int count = split_path(path, out);
fprintf(stdout, "count: %d\n", count);
fprintf(stderr, "1st: %s\n", out[0]); // segfaults here
return 0;
}
All of the print statements in the split_path function print perfectly, the output looks like this :
count: 1, string: home
count: 2, string: pirates
count: 3, string: are
count: 4, string: cool
count: 5, string: yeah
count: 5
1st: ./a.out
[1] 5676 segmentation fault (core dumped) ./a.out
But for some reason when I get back to the main function the double-char-array is no longer valid. I thought that it might be because it was pointing to memory declared in that split_path function but I'm doing strcpy to get the strings into it so it shouldn't be pointing back to memory that is local to that function. Any help is greatly appreciated.
You are mismanaged the out parameter. The out variable in main() is never assigned a valid memory address, thus the segfault. The out parameter in split_path() never updates the out variable in main(). You need to pass the address of the variable to split_path() so it can update the variable, and access the memory that the variable points to.
Also note that strtok() modifies the string it is parsing, so you should make a copy and then parse the copy so the original does not get destroyed. Otherwise, consider using strchr() instead of strtok().
Try something more like this instead:
int split_path(char * path, char *** out) {
*out = NULL;
char * tmp = strdup(path);
if (!tmp) { ... }
char * token = strtok(tmp, "/"');
int count = 0;
char ** newout;
while (token) {
newout = realloc(*out, sizeof(char**) * (++count));
if (!newout) { ... }
*out = newout;
(*out)[count-1] = malloc(sizeof(char) * (strlen(token)+1));
if (!(*out)[count-1]) { ... }
strcpy((*out)[count-1], token);
fprintf(stderr, "%s\n", token);
token = strtok(NULL, "/");
}
newout = realloc(*out, sizeof(char**) * (count+1));
if (!newout) { ... }
*out = newout;
(*out)[count] = NULL;
free (tmp);
return count;
}
int main(int argc, char * argv[]) {
char path[] = "/home/pirates/are/cool/yeah";
char ** out;
int count = split_path(path, &out);
fprintf(stdout, "count: %d\n", count);
fprintf(stderr, "1st: %s\n", out[0]); // segfaults here
free (out);
return 0;
}
And don't forget error handling. I've left it out of this example for brevity, but you should not leave it out of your real code.
Related
I am trying to extract words from a string like this:
(octopus kitten) (game cake) (soccer football)
I attempted doing this with the help of strtok (I do strcpy just for not modifying the original token/string, also used memcpy, but it does the same in my case).
Main function:
int main(int argc, char * argv[]) {
char row[] = "(octopus kitten) (game cake) (soccer football)";
char * pch;
pch = strtok(row, "(");
while (pch != NULL) {
pch[strcspn(pch, ")")] = '\0';
print_word(pch);
pch = strtok(NULL, "(");
}
return 0;
}
Function for getting and printing each word:
void get_and_print_word(char str[]) {
char r[4000];
// for not modifying the original string
strcpy(r, str);
char * c = strtok(r, " ");
for (int i = 0; i < 2; i++) {
printf("%s\n", c);
c = strtok(NULL, " ");
}
}
It works absolutely fine with a first iteration, but after pch starts to point to another adress of memory (but it should point to the adress of letter "g").
It works absolutely fine (it's just printing string within the brackets) if we remove get_and_print_word(pch):
int main(int argc, char * argv[]) {
char row[] = "(octopus kitten) (game cake) (soccer football)";
char * pch;
pch = strtok(row, "(");
while (pch != NULL) {
pch[strcspn(pch, ")")] = '\0';
printf("%s\n", pch);
pch = strtok(NULL, "(");
}
return 0;
}
But that's not what I want to do, I need to get each word, not just a string of two words and space between them.
Using pch = strtok(NULL, " )(") is also not appropriate in my case, cause I need to store each pair of words (each word, of couse, should be a separate string) in some individual
struct, so I definitely need this function.
How to solve this issue and why it works like this?
Why not use regular expression :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
int main (int argc, char *argv[])
{
int err;
regex_t preg;
const char *str_request = argv[1];
const char *str_regex = argv[2];
err = regcomp (&preg, str_regex, REG_EXTENDED);
if (err == 0) {
int match;
size_t nmatch = 0;
regmatch_t *pmatch = NULL;
nmatch = preg.re_nsub;
pmatch = malloc (sizeof (*pmatch) * nmatch);
char *buffer;
if (pmatch) {
buffer = (char *) str_request;
match = regexec (&preg, buffer, nmatch, pmatch, 0);
while (match == 0) {
char *found = NULL;
size_t size ;
int start, end;
start = pmatch[0].rm_so;
end = pmatch[0].rm_eo;
size = end - start;
found = malloc (sizeof (*found) * (size + 1));
if (found) {
strncpy (found, &buffer[start], size);
found[size] = '\0';
printf ("found : %s\n", found);
free (found);
}
//searching next occurence
match = regexec (&preg, (buffer += end), nmatch, pmatch, 0);
}
regfree (&preg);
free (pmatch);
}
}
return 0;
}
[puppet#damageinc regex]$ ./regex "(octopus kitten) (game cake) (soccer football)" "([a-z]+)"
found : octopus
found : kitten
found : game
found : cake
found : soccer
found : football
So i am attempting to pass a string array (char** arguments) to a function, fill the array with values and then print those values after returning from the function. The problem occurs when I try to print the first value of "arguments" which gives me a segmentation fault. Why is this? when I print the values in the "getArguments" function all goes as expected. I am new to C and yes this is an assignment. I am not looking for you to write this code for me however I would like an explanation of this behaviour as I try to understand this concept.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define BUFFERSIZE 81
int getArguments(char** arguments, char* argument);
void getPath(char* pathBuffer);
int checkForDirectoryChange(char **arguments, int num_args);
int main(int argc, char *argv[]){
char * command;
char ** arguments = NULL;
char * cd_path;
int len, pid, ret_code, cd_requested = 1;
char buffer[BUFFERSIZE];
/* Get user input and the first token */
printf("Enter a command: > ");
command = fgets(buffer,BUFFERSIZE,stdin);
printf("The command entered was %s",buffer);
len = strlen(buffer);
if(buffer[len-1] == '\n')
buffer[len-1]='\0';
cd_requested = getArguments(arguments, command);
printf("The argument passed is now: %s\n", arguments[0]);
if(cd_requested == 0){
fprintf(stdout,"Change directory requested.\n");
}
/*
char * pathBuf;
getPath(pathBuf);
free the memory allocated */
/*
pid = fork();
if(pid){
wait(NULL);
}else{
ret_code = execvp(*arguments, arguments);
if(ret_code){
printf("The fork failed, exiting.");
exit(0);
}
}*/
}
int getArguments(char** arguments, char* command){
int n_spaces = 0,i;
char *token;
token = strtok(command, " ");
/* Loop until we have gotten all of the tokens */
while (token) {
arguments = realloc (arguments, sizeof (char*) * ++n_spaces);
if (arguments == NULL){
printf("Memory allocation failed: token - %d\n", n_spaces);
exit (-1); /* memory allocation failed */
}
arguments[n_spaces-1] = token;
token = strtok (NULL, " ");
}
/* realloc one extra element for the last NULL */
arguments = realloc (arguments, sizeof (char*) * (n_spaces+1));
arguments[n_spaces] = 0;
/* print the result */
for (i = 0; i < (n_spaces+1); ++i)
printf ("arguments[%d] = %s\n", i, arguments[i]);
return strcmp("cd",arguments[0]);
}
int checkForDirectoryChange(char** arguments, int num_args){
return 0;
}
void getPath(char* pathBuffer){
size_t n;
n = confstr(_CS_PATH, NULL, (size_t) 0);
pathBuffer = malloc(n);
if (pathBuffer == NULL)
abort();
confstr(_CS_PATH, pathBuffer, n);
}
It is because getArguments() only reassigned the copy of pointer to pointer of characters inside itself. arguments in main() was not updated.
You should define getArguments() as
int getArguments(char*** arguments, char* command) {
/* ... */
while (token) {
*arguments = realloc (*arguments, sizeof (char*) * ++n_spaces);
if (*arguments == NULL){
printf("Memory allocation failed: token - %d\n", n_spaces);
exit (-1); /* memory allocation failed */
}
(*arguments)[n_spaces-1] = token;
token = strtok (NULL, " ");
}
/* ... */
}
And call it as the following inside main().
cd_requested = getArguments(&arguments, command);
I tried really hard to search for a solution to this but I can't think of good enough keywords.
Currently I'm having troubles grasping the concept behind makeargv and it's usage with triple pointers (I have no idea what ***foo means, it doesn't seem to be as easy of a concept as **foo or *foo). So I made my own:
const char **makeargv(char *string, int *numargs) {
string = string + strspn(string, delims);
char *copy = malloc(strlen(string) + 1);
int i;
strcpy(copy, string);
int numtokens;
if (strtok(copy, delims) != NULL) {
for (numtokens = 1; strtok(NULL, delims) != NULL; numtokens++) {}
}
strcpy(copy, string);
const char *results[numtokens+1];
results[0] = strtok(copy, delims);
for (i = 1; i < numtokens; i++) {
results[i] = strtok(NULL, delims);
}
results[numtokens+1] = NULL;
*numargs = numtokens;
return results;
}
Here's the part at where it breaks:
void parse_file(char* filename) {
char* line = malloc(160*sizeof(char));
FILE* fp = file_open(filename);
int i = 0;
int numargs = 0;
int *pointer = &numargs;
while((line = file_getline(line, fp)) != NULL) {
if (strlen(line) == 1){
continue;
}
const char **args = makeargv(line, pointer);
printf("%s\n", args[0]);
printf("%s\n", args[1]);
/* This prints out args[0], but then args[1] causes a seg fault. Even if I replace
the args[1] with another args[0] it still causes a seg fault */
}
fclose(fp);
free(line);
}
I have a working array of strings. However when I try to print out the strings in the array, I can only print 1 of my choice and then it seg faults for any subsequent calls. lets pretend my array of strings is argv[3] = {"Yes", "no", "maybe"}, if i call argv[0], it will let me call "Yes", but any other calls (even if i call argv[0] again) do not work and cause a segfault. I can call any of the elements in the array, but once i call one the rest cease to work causing segfaults.
Help please? D: This is in C.
const char *results[numtokens+1];
This array "results" is a local variable, it is only available inside of "makeargv".
You'd better use malloc:
results = malloc(numtokens+1)
And I believe there is memory leak in your code.
You will not be able to free the memory for "char *copy"
char *copy = malloc(strlen(string) + 1);
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **makeargv(char *string, int *numargs) {
static const char *delims = " \t\n";
string = string + strspn(string, delims);
char *copy = malloc(strlen(string) + 1), *p = copy;
strcpy(copy, string);
int numtokens;
for (numtokens = 0; strtok(p, delims); ++numtokens, p = NULL);
char **results = malloc(sizeof(char*)*(numtokens+1));
strcpy(copy, string);
int i;
p = copy;
for (i = 0; i < numtokens; ++i, p = NULL)
results[i] = strtok(p, delims);
results[i] = NULL;
*numargs = numtokens;
return results;
}
FILE *file_open(char *filename){
FILE *fp = fopen(filename, "r");
if(!fp){
perror("file_open");
exit(1);
}
return fp;
}
void parse_file(char* filename) {
char* line = malloc(160*sizeof(char));
FILE* fp = file_open(filename);
int i = 0, numargs = 0;
while(fgets(line, 160, fp)){
if (*line == '\n')
continue;
char **args = makeargv(line, &numargs);
for(i = 0;i<numargs;++i)
printf("%s\n", args[i]);
printf("\n");
if(args[0])
free(args[0]);
free(args);
}
fclose(fp);
free(line);
}
int main(int argc, char *argv[]){
parse_file(argv[1]);
return 0;
}
I have written the following program to resolve a path to several directory names
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char *
tokenizer(char *path, char **name){
char s[300];
char *buffer;
memcpy(s, path, strlen(path)+1);
printf("%s\n",s); // PROBLEM
int i=0;
while(s[i] == '/'){
i++;
}
if (i == strlen(path)){
return NULL;
}
*name = strtok_r(s, "/", &buffer);
return buffer;
}
int main(void){
char str[300];
char *token, *p;
scanf("%s",str);
p = tokenizer(str, &token);
if (p != NULL)
printf("%s\n",token);
else
printf("Nothing left\n");
while((p=tokenizer(p, &token)) != NULL){
printf("%s\n",token);
}
}
Output of the above program
Input: a/b/c
Output: a/b/c
a/b/c
a
b/c
b
c
c
If I comment the line labelled PROBLEM
Input: a/b/c
Output: Some garbage value
Can somebody explain me the reason for this strange behavior?
Note:
I have realised that s is a stack allocated variable and it ceases to exist in function main() but why does the program works when I use printf() ?
In addition to what geekasaur says:
strtok_r's 3rd parameter is used incorrectly, in two ways:
1. It should be initialized to NULL before the first call.
2. It shouldn't be used in any way (you return it to the caller). It should only be passed to another strtok_r call.
You are returning a pointer into a stack-allocated string (buffer points into s); s's memory ceases to be meaningful after tokenize returns.
You cannot do this
char s[300];
char *buffer;
...
*name = strtok_r(s, "/", &buffer);
return buffer;
Here buffer is a pointer to a s[300] position. s[300] is a function local variable allocated on the stack when the function is called and destroyed when the function returns.
So you are not returning a valid pointer, you cannot use that pointer out of the function.
Along with the observations that you're returning a pointer to a local variable, I think it's worth noting that your tokenizer is almost 100% pointless.
Most of what your tokenizer does is skip across any leading / characters before calling strtok_r -- but you're passing '/' as the delimiter character to strtok_r, which will automatically skip across any leading delimiter characters on it own.
Rather simpler code suffices to print out the components of a path without the delimiters:
char path[] = "a/b/c";
char *pos = NULL;
char *component = strtok_r(path, "/", &pos);
while (NULL != component) {
printf("%s\n", component);
component = strtok_r(NULL, "/", &pos);
}
Try this:
char*
token(char * path, char ** name){
static char * obuffer = NULL;
char * buffer = NULL, * p, * q;
if(path == NULL) {
buffer = realloc(buffer, strlen(obuffer) + 1);
p = obuffer;
} else {
buffer = malloc(257);
p = path;
}
if(!buffer) return NULL;
q = buffer;
if(!p || !*p) return NULL;
while(*p != '\0') {
if(*p == '/') {
p++; /* remove the / from string. */
break;
}
*q ++ = *p++;
}
*q ++ = '\0';
obuffer = p;
*name = buffer;
return buffer;
}
int main(void)
{
char * s = "foo/baa/hehehe/";
char * name = NULL;
char * t = token(s, &name);
while(t) {
printf("%s\n", name);
t = token(NULL, &name);
}
return 0;
}
the output:
foo
baa
hehehe
But you are basically "reinventing the wheel" of strtok() function..
I wrote a simple url parser using strtok(). here's the code
#include <stdio.h>
#include <stdlib.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} aUrl;
void parse_url(char *url, aUrl *ret) {
printf("Parsing %s\n", url);
char *tmp = (char *)_strdup(url);
//char *protocol, *host, *port, *path;
int len = 0;
// protocol agora eh por exemplo http: ou https:
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
//printf("char at %d => %c", len, url[len]);
ret->path = (char *)_strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
// host agora é por exemplo address.com:8080
//tmp = (char *)_strdup(host);
//strtok(tmp, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if(tmp == NULL) {
if(strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if(strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
//host = (char *) strtok(NULL, "/");
}
/*
*
*/
int main(int argc, char** argv) {
printf("hello moto\n");
aUrl myUrl;
parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);
printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);
return (EXIT_SUCCESS);
}
As you can see, I use strtok() a lot so I can "slice" the url. I don't need to support urls different than http or https so the way it's done solves all of my problems.
My concern is (this is running on an embedded device) - Am I wasting memory ?
When I write something like
ret->protocol = (char *) strtok(tmp, "/");
And then later call
ret->protocol = (char *) strtok(ret->protocol, ":");
Does me first pointer ret->protocol held remain in memory ? I thought that maybe I should set the first call to a tmp pointer, call strtok pointing ret->protocol to the right portion of the string (the second call) and then free(tmp).
What should be the best way to use strtok ?
To answer your question directly, strtok only returns a pointer to a location inside the string you give it as input-- it doesn't allocate new memory for you, so shouldn't need to call free on any of the pointers it gives you back in return.
For what it's worth, you could also look into "strchr" and "strstr", which are nondestructive ways of searching for single characters or sequences within strings.
Also note that your memory allocation is problematic here-- you're using strdup() to allocate a new string inside your parse function, and then you're assigning fragments of that memory block to fields of "ret". Your caller will thus be responsible for free'ing the strdup'd string, but since you're only passing that string back implicitly inside ret, the caller needs to know magically what pointer to pass to free. (Probably ret->protocol, but maybe not, depending on how the input looks.)
strtok modifies the string in place, replacing the specified characters with NULL. Since strings in C are NULL-terminated, it now appears that your original pointer is pointing to a shorter string, even though the original string is still there and still occupies the same amount of memory (but with characters replaced with NULL). The end of the string, I think, contains a double-NULL.
The short answer is this: Keep a pointer to the beginning of your string buffer, and have another pointer that is your "current" pointer into the string as you parse it. When you use strtok or iterate over the string in other ways you update the "current" pointer but leave the beginning pointer alone. When you're finished, free() the beginning pointer. No memory leaked.
Do you know you can continue parsing the string using NULL as first parameter of strtok?
First call:
char* token = strtok(string, delimiters);
Then:
token = strtok(NULL, other_delimiters);
This allow you to simplify your code:
int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");
// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
return -1;
//get host
token = strtok(NULL, "/");
if( token == NULL )
return -1;
strcpy(ret->host, token);
// get path
token = strtok(NULL, "#");
if( token == NULL )
return -1;
strcpy(ret->path, token);
// ...
return 0;
}
You can see I had a return value to know if parsing was successfully done.
Thanks for sharing your code! I ran it inside valgrind and fixed two memory leaks generated by strdup functions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} URL;
void parse_url(char *url, URL *ret) {
char *tmp = (char *) strdup(url);
int len = 0;
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
ret->path = (char *) strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if (tmp == NULL) {
if (strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if (strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
}
void free_url(URL *url) {
free(url->path);
free(url->protocol);
}
int main(int argc, char** argv) {
URL url;
parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
free_url(&url);
return (EXIT_SUCCESS);
}