Conditional jump or move depends on uninitialised value(s) in C - c

I have this function that return a parsed_url structure that looks like this
typedef struct url_parser_url {
char *protocol;
char *host;
int port;
char *path;
char *query_string;
int host_exists;
} url_parser_url_t;
url_parser_url_t *parsed_url;
parsed_url = (url_parser_url_t *) malloc(sizeof(url_parser_url_t));
parse_url(address, true, parsed_url);
printf("parsed_url->path = %s\n", parsed_url->path);
The parse_url function looks like
int parse_url(char *url, bool verify_host, url_parser_url_t *parsed_url) {
char *local_url = (char *) malloc(sizeof(char) * (strlen(url) + 1));
char *token;
char *token_host;
char *host_port;
char *token_ptr;
char *host_token_ptr;
char *path = NULL;
strcpy(local_url, url);
token = strtok_r(local_url, ":", &token_ptr);
parsed_url->protocol = (char *) malloc(sizeof(char) * strlen(token) + 1);
strcpy(parsed_url->protocol, token);
token = strtok_r(NULL, "/", &token_ptr);
if (token) {
host_port = (char *) malloc(sizeof(char) * (strlen(token) + 1));
strcpy(host_port, token);
} else {
host_port = (char *) malloc(sizeof(char) * 1);
strcpy(host_port, "");
}
token_host = strtok_r(host_port, ":", &host_token_ptr);
if (token_host) {
parsed_url->host = (char *) malloc(
sizeof(char) * strlen(token_host) + 1);
strcpy(parsed_url->host, token_host);
if (verify_host) {
struct hostent *host;
host = gethostbyname(parsed_url->host);
if (host != NULL) {
parsed_url->host_exists = 1;
} else {
parsed_url->host_exists = 0;
}
} else {
parsed_url->host_exists = -1;
}
} else {
parsed_url->host_exists = -1;
parsed_url->host = NULL;
}
token_host = strtok_r(NULL, ":", &host_token_ptr);
if (token_host)
parsed_url->port = atoi(token_host);
else
parsed_url->port = 0;
token_host = strtok_r(NULL, ":", &host_token_ptr);
assert(token_host == NULL);
token = strtok_r(NULL, "?", &token_ptr);
parsed_url->path = NULL;
if (token) {
path = (char *) realloc(path, sizeof(char) * (strlen(token) + 2));
strcpy(path, "/");
strcat(path, token);
parsed_url->path = (char *) malloc(sizeof(char) * strlen(path) + 1);
strncpy(parsed_url->path, path, strlen(path));
free(path);
} else {
parsed_url->path = (char *) malloc(sizeof(char) * 2);
strcpy(parsed_url->path, "/");
}
token = strtok_r(NULL, "?", &token_ptr);
if (token) {
parsed_url->query_string = (char *) malloc(
sizeof(char) * (strlen(token) + 1));
strncpy(parsed_url->query_string, token, strlen(token));
} else {
parsed_url->query_string = NULL;
}
token = strtok_r(NULL, "?", &token_ptr);
assert(token == NULL);
free(local_url);
free(host_port);
return 0;
}
The problem is when I call the function parse_url and then I use the parsed_url->path member it throws me this segmentation fault
==16647== Conditional jump or move depends on uninitialised value(s)
Can anyone to explain me what is happening and why ? Thank you

And there it is. Although it is supposed to be a safer alternative to strcpy(), the strncpy() has a nasty wart. The standard says:
The strncpy function copies not more than n characters (characters that follow a null character are not copied) from the array pointed to by s2 to the array pointed to by s1.
(C2011 7.24.2.4/2), and note 308 clarifies that
Thus, if there is no null character in the first n characters of the array pointed to by s2, the result will not be null-terminated.
The code that is ultimately responsible for your particular valgrind complaint is this:
strncpy(parsed_url->path, path, strlen(path));
Since by definition there cannot be a null character within the first strlen(path) characters of path, that strncpy() reliably fails to ensure that the copy is null-terminated. You have at least one other instance of the same problem in your code.
Since you seem to take sufficient care to ensure that enough space is available, one solution would be to switch from strncpy() to strcpy(). That would also be more efficient, because you would avoid duplicate calls to strlen().
As I noted in comments, however, if you're willing to rely on POSIX's strdup(), then that's cleaner than strlen() + malloc() + str[n]cpy(), and has the same semantics (you take responsibility for freeing the memory allocated for the copy). You wouldn't have even had the opportunity to make these errors if you had made your copies that way.

Related

Why do I get segmentation error when calling a parsing function in C

I have been trying to understand how this custom function below works to parse lines from argv but I keep getting segmentation errors.
I have been trying to debug it for hours but I cannot find the bug that is eating into "restricted memory"
The function takes a string literal and a delimiter string (not character).
When I use valgrind to audit the function, it reports a SIGSEGV error when calling strtok.
I understand strtok cannot work directly with string literals because it can cause undefined behaviour. So I decided to copy the str to a local variable first.
Yes, I tried using an array as copy too, but it still throws the segmentation error.
What I really don't understand is why does strtok not getting enough memory?
char **splitstring(char *str, const char *delim)
{
int i, wn;
char **array;
char *token;
char *copy;
copy = malloc(strlen(str) + 1);
if (copy == NULL)
{
perror("hsh");
return (NULL);
}
i = 0;
while (str[i])
{
copy[i] = str[i];
i++;
}
copy[i] = '\0';
token = strtok(copy, delim);
array = malloc((sizeof(char *) * 2));
array[0] = strdup(token);
i = 1;
wn = 3;
while (token)
{
token = strtok(NULL, delim);
array = realloc(array, (sizeof(char *) * (wn - 1)), (sizeof(char *) * wn));
array[i] = strdup(token);
i++;
wn++;
}
free(copy);
return (array);
}

null terminate an array of strings

I am trying to figure out how to get my array of strings from get_arguments to NULL terminate, or if that isn't the issue to function in my execv call.
char ** get_arguments(const char * string) {
char * copy = strdup(string);
char * remove_newline = "";
for(;;) {
remove_newline = strpbrk(copy, "\n\t");
if (remove_newline) {
strcpy(remove_newline, "");
}
else {
break;
}
}
char (* temp)[16] = (char *) malloc(256 * sizeof(char));
char * token = strtok(copy, " ");
strcpy(temp[0], token);
int i = 1;
while (token && (token = strtok(NULL, " "))) {
strcpy(temp[i], token);
i++;
}
char * new_null;
//new_null = NULL;
//strcpy(temp[i], new_null);
if(!temp[i]) printf("yup\n");
int c = 0;
for ( ; c <= i; c++) {
printf("%s ", temp[c]);
}
return temp;
}
I am trying to read in a string, space separated, similar to find ./ -name *.h. I am trying to input them into execv.
char (* arguments)[16] = (char **) malloc(256 * sizeof(char));
//...numerous lines of unrelated code
pid = fork();
if (pid == 0) {
arguments = get_arguments(input_string);
char * para[] = {"find", "./","-name", "*.h", NULL};
execv("/usr/bin/find", (char * const *) arguments);
//printf("%s\n", arguments[0]);
printf("\nexec failed: %s\n", strerror(errno)); //ls -l -R
exit(-1);
}
When I swap arguments in the execv call for para it works as intended, but trying to call with arguments returns exec failed: Bad address. If I remove the NULL from para I get the same issue. I've tried strcpy(temp, (char *) NULL), the version you see commented out in get_arguments, and a number of other things that I can't recall in their entirety, and my program ranges from Segmentation fault to failure to compile from attempting to strcpy NULL.
Changing the declarations of arguments and temp to char ** arguments = (char *) malloc(256 * sizeof(char)); ``char ** temp = (char *) malloc(256 * sizeof(char));clears upwarning: initialization from incompatible pointer typebut causes segfault on all calls toget_arguments`.
You want this:
char* temp[256]; // an array of 256 char*'s
char * token = strtok(copy, " ");
temp[0] = strdup(token);
int i = 1;
while (token && (token = strtok(NULL, " "))) {
temp[i] = strdup(token);
i++;
}
temp[i] = NULL;

Segfault while accessing memory malloc'd in function

I'm trying to write a function that takes in a path (char *) and splits it into an array of strings based around the '/' delimiter. Simplified code below :
int split_path(char * path, char ** out) {
out = NULL;
char * token = strtok(path, "/");
int count = 0;
while(token) {
out = realloc(out, sizeof(char*) * (++count));
out[count-1] = malloc(sizeof(char) * strlen(token)+1);
strcpy(out[count-1], token);
fprintf(stderr, "%s\n", out[count-1]);
token = strtok(NULL, "/");
}
out = realloc(out, sizeof(char*) * (count+1));
out[count] = NULL;
return count;
}
int main(int argc, char * argv[]) {
char path[] = "/home/pirates/are/cool/yeah";
char ** out;
int count = split_path(path, out);
fprintf(stdout, "count: %d\n", count);
fprintf(stderr, "1st: %s\n", out[0]); // segfaults here
return 0;
}
All of the print statements in the split_path function print perfectly, the output looks like this :
count: 1, string: home
count: 2, string: pirates
count: 3, string: are
count: 4, string: cool
count: 5, string: yeah
count: 5
1st: ./a.out
[1] 5676 segmentation fault (core dumped) ./a.out
But for some reason when I get back to the main function the double-char-array is no longer valid. I thought that it might be because it was pointing to memory declared in that split_path function but I'm doing strcpy to get the strings into it so it shouldn't be pointing back to memory that is local to that function. Any help is greatly appreciated.
You are mismanaged the out parameter. The out variable in main() is never assigned a valid memory address, thus the segfault. The out parameter in split_path() never updates the out variable in main(). You need to pass the address of the variable to split_path() so it can update the variable, and access the memory that the variable points to.
Also note that strtok() modifies the string it is parsing, so you should make a copy and then parse the copy so the original does not get destroyed. Otherwise, consider using strchr() instead of strtok().
Try something more like this instead:
int split_path(char * path, char *** out) {
*out = NULL;
char * tmp = strdup(path);
if (!tmp) { ... }
char * token = strtok(tmp, "/"');
int count = 0;
char ** newout;
while (token) {
newout = realloc(*out, sizeof(char**) * (++count));
if (!newout) { ... }
*out = newout;
(*out)[count-1] = malloc(sizeof(char) * (strlen(token)+1));
if (!(*out)[count-1]) { ... }
strcpy((*out)[count-1], token);
fprintf(stderr, "%s\n", token);
token = strtok(NULL, "/");
}
newout = realloc(*out, sizeof(char**) * (count+1));
if (!newout) { ... }
*out = newout;
(*out)[count] = NULL;
free (tmp);
return count;
}
int main(int argc, char * argv[]) {
char path[] = "/home/pirates/are/cool/yeah";
char ** out;
int count = split_path(path, &out);
fprintf(stdout, "count: %d\n", count);
fprintf(stderr, "1st: %s\n", out[0]); // segfaults here
free (out);
return 0;
}
And don't forget error handling. I've left it out of this example for brevity, but you should not leave it out of your real code.

strtok and memory leaks

I wrote a simple url parser using strtok(). here's the code
#include <stdio.h>
#include <stdlib.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} aUrl;
void parse_url(char *url, aUrl *ret) {
printf("Parsing %s\n", url);
char *tmp = (char *)_strdup(url);
//char *protocol, *host, *port, *path;
int len = 0;
// protocol agora eh por exemplo http: ou https:
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
//printf("char at %d => %c", len, url[len]);
ret->path = (char *)_strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
// host agora é por exemplo address.com:8080
//tmp = (char *)_strdup(host);
//strtok(tmp, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if(tmp == NULL) {
if(strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if(strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
//host = (char *) strtok(NULL, "/");
}
/*
*
*/
int main(int argc, char** argv) {
printf("hello moto\n");
aUrl myUrl;
parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);
printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);
return (EXIT_SUCCESS);
}
As you can see, I use strtok() a lot so I can "slice" the url. I don't need to support urls different than http or https so the way it's done solves all of my problems.
My concern is (this is running on an embedded device) - Am I wasting memory ?
When I write something like
ret->protocol = (char *) strtok(tmp, "/");
And then later call
ret->protocol = (char *) strtok(ret->protocol, ":");
Does me first pointer ret->protocol held remain in memory ? I thought that maybe I should set the first call to a tmp pointer, call strtok pointing ret->protocol to the right portion of the string (the second call) and then free(tmp).
What should be the best way to use strtok ?
To answer your question directly, strtok only returns a pointer to a location inside the string you give it as input-- it doesn't allocate new memory for you, so shouldn't need to call free on any of the pointers it gives you back in return.
For what it's worth, you could also look into "strchr" and "strstr", which are nondestructive ways of searching for single characters or sequences within strings.
Also note that your memory allocation is problematic here-- you're using strdup() to allocate a new string inside your parse function, and then you're assigning fragments of that memory block to fields of "ret". Your caller will thus be responsible for free'ing the strdup'd string, but since you're only passing that string back implicitly inside ret, the caller needs to know magically what pointer to pass to free. (Probably ret->protocol, but maybe not, depending on how the input looks.)
strtok modifies the string in place, replacing the specified characters with NULL. Since strings in C are NULL-terminated, it now appears that your original pointer is pointing to a shorter string, even though the original string is still there and still occupies the same amount of memory (but with characters replaced with NULL). The end of the string, I think, contains a double-NULL.
The short answer is this: Keep a pointer to the beginning of your string buffer, and have another pointer that is your "current" pointer into the string as you parse it. When you use strtok or iterate over the string in other ways you update the "current" pointer but leave the beginning pointer alone. When you're finished, free() the beginning pointer. No memory leaked.
Do you know you can continue parsing the string using NULL as first parameter of strtok?
First call:
char* token = strtok(string, delimiters);
Then:
token = strtok(NULL, other_delimiters);
This allow you to simplify your code:
int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");
// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
return -1;
//get host
token = strtok(NULL, "/");
if( token == NULL )
return -1;
strcpy(ret->host, token);
// get path
token = strtok(NULL, "#");
if( token == NULL )
return -1;
strcpy(ret->path, token);
// ...
return 0;
}
You can see I had a return value to know if parsing was successfully done.
Thanks for sharing your code! I ran it inside valgrind and fixed two memory leaks generated by strdup functions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} URL;
void parse_url(char *url, URL *ret) {
char *tmp = (char *) strdup(url);
int len = 0;
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
ret->path = (char *) strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if (tmp == NULL) {
if (strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if (strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
}
void free_url(URL *url) {
free(url->path);
free(url->protocol);
}
int main(int argc, char** argv) {
URL url;
parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
free_url(&url);
return (EXIT_SUCCESS);
}

Concatenate path and basename

basename(3) and dirname(3) can split an absolute path into its respective components.
Short of using snprintf(3), is there a natural posix-compliant library call that does the inverse - takes a directory and a filename and concatenates them?
Manually concatenation works fine for me, but can get a little tedious at times.
as far I know there is no such function in POSIX. However in the GNU libc manual there is a nice helper function:
char *concat (const char *str, ...)
{
va_list ap;
size_t allocated = 100;
char *result = (char *) malloc (allocated);
if (result != NULL)
{
char *newp;
char *wp;
va_start (ap, str);
wp = result;
for (s = str; s != NULL; s = va_arg (ap, const char *))
{
size_t len = strlen (s);
/* Resize the allocated memory if necessary. */
if (wp + len + 1 > result + allocated)
{
allocated = (allocated + len) * 2;
newp = (char *) realloc (result, allocated);
if (newp == NULL)
{
free (result);
return NULL;
}
wp = newp + (wp - result);
result = newp;
}
wp = mempcpy (wp, s, len);
}
/* Terminate the result string. */
*wp++ = '\0';
/* Resize memory to the optimal size. */
newp = realloc (result, wp - result);
if (newp != NULL)
result = newp;
va_end (ap);
}
return result;
}
usage:
const char *path = concat(directory, "/", file);

Resources