Parse url path of GET request - c

I'm new to C and I've been working on this task for about 7 hours now - please don't say I didn't try.
I want to parse the path of a self-written webserver in C. Let's say I call
http://localhost:8080/hello/this/is/a/test.html
then the browser gets
GET /hello/this/is/a/test.html HTTP/1.1
I want to parse /hello/this/is/a/test.html, so the complete string between "GET " (note the white space after GET) and the first white space after /../../..html.
What I tried so far:
int main() {
...
char * getPathOfGetRequest(char *);
char *pathname = getPathOfGetRequest(buf);
printf("%s\n\n%s", buf, pathname);
...
}
char * getPathOfGetRequest(char *buf) {
char *startingGet = "GET ";
char buf_cpy[BUFLEN];
memcpy(buf_cpy, buf, sizeof(buf));
char *urlpath = malloc(1000);
char *path = malloc(1000);
urlpath = strstr(buf_cpy, startingGet);
char delimiter[] = " ";
path = strtok(urlpath, delimiter);
path = strtok(NULL, delimiter);
return path;
}
The pathname always only has 4 correct chars and may or may not be filled with other unrelated chars, like /hell32984cn)/$"§$. I guess it has something to do with strlen(startingGet), but I can't see the relationship between it. Where is my mistake?

Question code with commentary:
char * getPathOfGetRequest(char *buf) {
char *startingGet = "GET ";
char buf_cpy[BUFLEN];
memcpy(buf_cpy, buf, sizeof(buf));
The above memcpy will likely only copy 4 bytes from buf to buf_cpy.
This is due to buf being a pointer to a char.
sizeof(buf) is the size of a pointer (likely: 4).
Perhaps, instead of using 'sizeof()', it would have been better to use 'strlen()'.
char *urlpath = malloc(1000);
char *path = malloc(1000);
urlpath = strstr(buf_cpy, startingGet);
Perhaps the questioner is not clear on why urlpath was allocated 1000 bytes of memory. In any case, the above assignment will cause that 1000 bytes to be leaked, and defeats the purpose of the 'urlpath=malloc(1000)'.
The actual effect of the above statements is urlpath = buf_cpy;, as strstr() will return the position of the beginning of 'GET ' in the buf_copy.
char delimiter[] = " ";
path = strtok(urlpath, delimiter);
Likewise, the above assignment will cause the 1000 bytes allocated to path to be leaked, and defeats the purpose of the 'path=malloc(1000)' above.
path = strtok(NULL, delimiter);
return path;
}
An alternitive coding:
char *getPathOfGetRequest(const char *buf)
{
const char *start = buf;
const char *end;
char *path=NULL;
size_t pathLen;
/* Verify that there is a 'GET ' at the beginning of the string. */
if(strncmp("GET ", start, 4))
{
fprintf(stderr, "Parse error: 'GET ' is missing.\n");
goto CLEANUP;
}
/* Set the start pointer at the first character beyond the 'GET '. */
start += 4;
/* From the start position, set the end pointer to the first white-space character found in the string. */
end=start;
while(*end && !isspace(*end))
++end;
/* Calculate the path length, and allocate sufficient memory for the path plus string termination. */
pathLen = (end - start);
path = malloc(pathLen + 1);
if(NULL == path)
{
fprintf(stderr, "malloc() failed. \n");
goto CLEANUP;
}
/* Copy the path string to the path storage. */
memcpy(path, start, pathLen);
/* Terminate the string. */
path[pathLen] = '\0';
CLEANUP:
/* Return the allocated storage, or NULL in the event of an error, to the caller. */
return(path);
}
And, finally, if 'strtok()' must be used:
char *getPathOfGetRequest(char *buf)
{
char *path = NULL;
if(strtok(buf, " "))
{
path = strtok(NULL, " ");
if(path)
path=strdup(path);
}
return(path);
}

Related

Parser generating segfaults in C

I've been trying to get this to work for the past 2 weeks to no avail.
I have a project to create a shell that implements parsing and built-in commands. The issue I'm having is when I pass a char* to my parse function and it returns, when i try to access any part of it, I get a segfault. I've tried different methods including a struct holding a char** all with the same problems, so i'm guessing it's an issue with my parser. I would appreciate any help.
code for parser.c:
#define BUFSIZE 1024
#define TOK_BUFSIZE 64
#define TOK_DELIM " \t\r\n\a"
char*** Parse(char *line0){
char* null_ptr = 0;
char*** cmd = malloc(MAX_SIZE * sizeof(char**));
/*
char arg[] = argument
char* argv[] = argument array
char** cmd[] = array of argument arrays
*/
int bufsize = MAX_SIZE, cmdp = 0, argp = 0, com = FALSE, redir = FALSE;
char *token;
char* line = malloc(100*sizeof(char));
strcpy(line,line0);
token = strtok(line, TOK_DELIM);
while (token){
if (*token == ';'){ // new command string
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[0]=tmp1;
cmd[cmdp] = tmpa;
argp = 0;
cmdp++;
com = FALSE;
redir = FALSE;
}
else if (*token == '>' || *token == '<' || token == ">>"){ // redirects
argp = 0;
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp]=tmp1;
argp++;
printf("Redirect: %s\n",tmp1);
com = FALSE;
redir = TRUE;
}
else if (*token == '|'){ // pipe
printf("PIPE\n");
cmdp++;
argp = 0;
com = FALSE;
}
else if (redir){ // redirect file name
// redirect token stored in arg[]
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp]=tmp1;
cmd[cmdp]=tmpa;
argp = 0;
cmdp++;
redir = FALSE;
com = FALSE;
printf("File: %s\n", token);
}
else if (token == "&") // background
{
cmdp++;
argp = 0;
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[0]=tmp1;
cmd[cmdp]=tmpa;
printf("Background");
}
else if (!com && !redir){ // command entered
argp = 0;
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp] = tmp1;
argp++;
printf("Command %s\n", token);
com = TRUE;
}
else if (com){ // argument to command, all other redirects and pipes taken care of
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp] = tmp1;
argp++;
printf("Argument: %s\n", token);
//cmd[cmdp] = argv; // save current working argument array
//cmdp++;
}
// end of if else statements
token = strtok(NULL, TOK_DELIM);
} // end of while
cmdp++;
cmd[cmdp] = NULL;
return &cmd;
}
When I compiled your code on the command-line by typing in:
gcc /path/to/yourcodefilename.c -Wall -Wextra
But replacing /path/to/yourcodefilename.c with the actual filename of the code containing the main function that eventually calls your function (my file is test2.c), I received warnings. The first being:
./test2.c:21: error: 'aaa' undeclared (first use in this function)
./test2.c:21: error: (Each undeclared identifier is reported only once
./test2.c:21: error: for each function it appears in.)
And I received a few of those. "aaa" is replaced by the named of something you used inside your function that has not been previously defined. This includes the word TRUE and FALSE. To correct this, you can use at the top of your program:
#define FALSE n
#define TRUE y
where n and y are numbers representing false and true respectively. Another way to correct it is to include the header files containing definitions for "TRUE" and "FALSE".
The second thing I noticed on a few lines is:
warning: assignment makes integer from pointer without a cast
Make sure you don't convert data over from one type to another. For example, don't set a character variable to a pointer value.
For example, change:
tmp1[sizeof(token)] = null_ptr;
to:
tmp1[sizeof(token)] = '\0';
Because specifying an index to a char* means specifying a char, and null_ptr is of type char* and char* and char are not the same. What i did was assigned a null value that is a char.
I hope that helps you with some troubleshooting
There are several issues here:
You allocate cmd and its subarrays. You return an address to that array at the end of the function. The address has type char ****, which is not the correct return type. What's worse: That address is the address of a local variable, which goes out of scope immediately after returning. Return the handle you got from malloc instead:
char ***Parse(char *line0)
{
char ***cmd = malloc(MAX_SIZE * sizeof(*cmd));
// fill cmd
return cmd;
}
Your code is needlessly long, mostly because you code the steps to allocate memory, copy a string and null-terminate it explicitly. (Others have pointed out that you don't do the null termination properly. You also allocate a fixed size of 1024 bytes regardles of the actual string length, which is quite wasteful.) You could write a function to duplicate strings or use the non-standard, but widely available strdup; this would make your code easier to read.
All the temporary allocations are hard to follow. For example, in the branch if (!com && !redir), you allocate to tmpa, but you never store that value in cmd. The same goes for the redirection branch.
It's also not clear when you start a new command. There should be a new command just before parsing the first token, after encountering a pipe or after encountering a semicolon. You also start new commands for redirections and the background ampersand.
The comparison token == ">>" will always be false: token is an address in line and ">>" is a string literal stored n static memory. You should use strcmp to compare two strings.
In general, you want to allocate a new list when cmdp increases. In that case, argp is reset to zero. Otherwise, you just append to the current command.
I think that you complicate things by treating everything as special. I recommend to simplify the code and leave redirection and the background for the moment. They can easily be resolved when the command is called. (Your code sets the state with redir and com, but it never enforces file names after redirection, for example. You can do that easily when all the tokens are in place.)
The code below treats only pipes and semicolons as command separators. When the command is a pipe, the pipe token is prepended to the following command:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_SIZE 32
#define TOK_DELIM " \t\r\n\a"
char *sdup(const char *str)
{
size_t len = strlen(str);
char *dup = malloc(len + 1);
if (dup) {
memcpy(dup, str, len);
dup[len] = '\0';
}
return dup;
}
char ***parse(char *line0)
{
char *token;
char *line = sdup(line0);
token = strtok(line, TOK_DELIM);
if (token == NULL) return NULL;
char ***cmd = malloc(MAX_SIZE * sizeof(char **));
int cmdp = 0;
int argp = 0;
cmd[0] = malloc(MAX_SIZE * sizeof(*cmd[0]));
while (token) {
if (strcmp(token, ";") == 0 || strcmp(token, "|") == 0) {
// begin new command
cmd[cmdp][argp++] = NULL;
cmdp++;
if (cmdp + 1 == MAX_SIZE) break;
argp = 0;
cmd[cmdp] = malloc(MAX_SIZE * sizeof(*cmd[0]));
// prepend pipe token
if (*token == '|') {
cmd[cmdp][argp++] = sdup(token);
}
} else {
// append to current command
if (argp + 1 < MAX_SIZE) {
cmd[cmdp][argp++] = sdup(token);
}
}
token = strtok(NULL, TOK_DELIM);
}
// null-terminate arg and cmd lists
cmd[cmdp][argp] = NULL;
cmdp++;
cmd[cmdp] = NULL;
return cmd;
}
int main()
{
char ***cmd = parse("echo start ; ls -l | wc > output ; echo stop");
char ***p = cmd;
while (*p) {
char **q = *p;
while (*q) {
printf("'%s' ", *q);
free(*q);
q++;
}
puts("");
free(*p);
p++;
}
free(cmd);
return 0;
}
Further remarks:
I'm not sure whether the current format is suited to the task. It might be better to have a tree structure that takes care of pipes, semicolons and maybe as well &&and || and then have leaf nodes with the commands where the arguments are linked lists.
Tokenisation with strtok requires white-space between all tokens, but punctuation can usually be written without explicit space, e.g.: "./a.out>kk&". So you will need a better method of parsing.
At the moment, you allocate space for each string, which you must free later. If you create a token struct that describes a token as read-only view into the original string you can do without the allocations. The views are not null-terminated, though, so you will need comparison methis that work on starting pointer plus length, for example.

CSV file reading issue: with large amont of data

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* getfield(char* line, int num) {
char* tok = line;
char* result;
if (line)
{
do
{
if (!--num)
{
tok = strchr(line, ',');
if (tok == NULL)
{
tok = &line[strlen(line)];
}
size_t fieldlen = tok - line;
if (fieldlen)
{
result = (char*)malloc(fieldlen+1);
result[fieldlen] = '\0';
strncpy(result, line, fieldlen);
return result;
}
else
{
break;
}
}
tok = strchr(line, ',');
line = tok + 1;
} while (tok);
}
result = (char*)malloc(2);
strcpy(result, "0");
return result;
}
int main()
{
FILE* stream = fopen("data.csv", "r");
char line[1024];
char *pstr;int num1,num2,num3;
char* value1,value2,value3;
while (fgets(line, 1024, stream))
{
char* tmp = strdup(line);
value1=getfield(tmp, 1);
value2=getfield(tmp, 2);
value3=getfield(tmp, 3);
num1 =strtol(value1,&pstr,10);
num2 =strtol(value2,&pstr,10);
num3 =strtol(value3,&pstr,10)
free(value1);
free(value2);
free(value3);
printf("Fields 1,2,3 would be 1=%d 2=%d 3=%d\n", num1,num2,num3);
// NOTE strtok clobbers tmp
free(tmp);
}
}
above is my C code to read the file....
:::: data.csv ::::
10,34,30
10,33,
23,45,23
25,,45
above is my file..
here my issue is I can call the function with "num" field. so that for reading of every line I suppose to call the function 3 times.. !! so the performance is too low for the large data files.. can someone help me that I can call the function at once and It will return an array.. than I can easily store and print (e.g. for the first line array[0]=10,array[1]=34,array[2]=30 )
You could speed it up by creating a fast split function that will destroy your line (not to mention the many lurking segmentation faults and memory leaks; this code has NO error checking or freeing of resources):
#include <stdio.h>
#include <stdlib.h>
char **split(char *line, char sep, int fields) {
char **r = (char **)malloc(fields * sizeof(char*));
int lptr = 0, fptr = 0;
r[fptr++] = line;
while (line[lptr]) {
if (line[lptr] == sep) {
line[lptr] = '\0';
r[fptr] = &(line[lptr+1]);
fptr++;
}
lptr++;
}
return r;
}
int main(int argc, char **argv) {
char line[] = "some,info,in a line";
char **fields = split(line, ',', 3);
printf("0:%s 1:%s 2:%s\n", fields[0], fields[1], fields[2]);
}
result:
0:some 1:info 2:in a line
I haven't run timing test on your code, but I'll bet a nickel that the problems is using malloc(). That is SLOW.
What Bart means is that a char[] array can contain multiple strings, back-to-back. If you scan through the array as a single string once, changing all ',' characters to '\0', your last line would look like:
{ '2', '5', 0, 0, '4', '5', 0, ? rest of buffer }
^ ^ ^ !
The ^ carets below mark the positions where you'd record pointers to three strings. As you can see, they are equivalent to separate strings of "25", "", "45" in separate arrays. The ! below marks the 0 that ended the original string. Nothing beyond that has any meaning.
All this depends on being able to modify the original string in-place, probably rendering it useless for any further processing (like printing out the offending line if an invalid field is detected). However, you are already copying the original buffer for local use, so that shouldn't be a problem. I'd get rid of the malloc for that copy buffer too, by the way.
Code might look like:
while (fgets(line, 1024, stream))
{
char tmp[sizeof line]; /* this will save a malloc()/free() pair */
char *tok, *fence, *pstr;
char ch, *cp1=line, *cp2=tmp;
while (0 != (ch = *cp1++))
*cp2++ = (ch == ',') ? 0 : ch;
fence = cp2; /* remember end of string */
*fence = 0; /* and terminate final string */
tok = tmp; /* point to first token */
num1 =strtol(tok, &pstr, 10);
if (tok < fence) tok += strlen(tok) + 1;
num2 =strtol(tok,&pstr,10);
if (tok < fence) tok += strlen(tok) + 1;
num3 =strtol(tok,&pstr,10);
printf("Fields 1,2,3 would be 1=%d 2=%d 3=%d\n", num1,num2,num3);
}
Obviously you don't need a 1K buffer to handle three values, so there will be a loop to pull out the values. The if statement after the first two strtol() calls is your replacement for getfield(), which isn't needed any more.
After this is working, look at data validation. Nothing in this (or in the original) will detect invalid numbers.

C - split string into an array of strings

I'm not completely sure how to do this in C:
char* curToken = strtok(string, ";");
//curToken = "ls -l" we will say
//I need a array of strings containing "ls", "-l", and NULL for execvp()
How would I go about doing this?
Since you've already looked into strtok just continue down the same path and split your string using space (' ') as a delimiter, then use something as realloc to increase the size of the array containing the elements to be passed to execvp.
See the below example, but keep in mind that strtok will modify the string passed to it. If you don't want this to happen you are required to make a copy of the original string, using strcpy or similar function.
char str[]= "ls -l";
char ** res = NULL;
char * p = strtok (str, " ");
int n_spaces = 0, i;
/* split string and append tokens to 'res' */
while (p) {
res = realloc (res, sizeof (char*) * ++n_spaces);
if (res == NULL)
exit (-1); /* memory allocation failed */
res[n_spaces-1] = p;
p = strtok (NULL, " ");
}
/* realloc one extra element for the last NULL */
res = realloc (res, sizeof (char*) * (n_spaces+1));
res[n_spaces] = 0;
/* print the result */
for (i = 0; i < (n_spaces+1); ++i)
printf ("res[%d] = %s\n", i, res[i]);
/* free the memory allocated */
free (res);
res[0] = ls
res[1] = -l
res[2] = (null)
Here is an example of how to use strtok borrowed from MSDN.
And the relevant bits, you need to call it multiple times. The token char* is the part you would stuff into an array (you can figure that part out).
char string[] = "A string\tof ,,tokens\nand some more tokens";
char seps[] = " ,\t\n";
char *token;
int main( void )
{
printf( "Tokens:\n" );
/* Establish string and get the first token: */
token = strtok( string, seps );
while( token != NULL )
{
/* While there are tokens in "string" */
printf( " %s\n", token );
/* Get next token: */
token = strtok( NULL, seps );
}
}

remove characters from a c string

gcc 4.4.4 c89
I am reading in from a text file and the text file consists of names in double quotes.
"Simpson, Homer"
etc
However, I want to remove the double quotes from the string.
This is how I have done it, but I am not sure its the best way.
int get_string(FILE *in, char *temp)
{
char *quote = NULL;
/* Get the first line */
fgets(temp, STRING_SIZE, in);
printf("temp before [ %s ]\n", temp);
/* Find the second quote */
if((quote = strrchr(temp, '"')) == NULL) {
fprintf(stderr, "Text file incorrectly formatted\n");
return FALSE;
}
/* Replace with a nul to get rid of the second quote */
*quote = '\0';
/* Move the pointer to point pass the first quote */
temp++;
printf("temp after [ %s ]\n", temp);
return TRUE;
}
Many thanks for any suggestions,
No, this won't work. You are changing the parameter temp, but the calling function will still have an old value. The temp outside the function will point to the opening quote. You ought to move the characters in your buffer.
However I would suggest allocating the buffer in heap and returning a pointer to it, letting the caller free the buffer when needed. This seems to be a cleaner solution. Again, this way you won't rely on the caller to pass a sufficiently large buffer.
In general, a robust reading lines from a text file is not a trivial task in C, with its lack of automatic memory allocating functions. If possible to switch to C++, I would suggest trying much simpler C++ getline.
char *foo(char *str, int notme)
{
char *tmp=strdup(str);
char *p, *q;
for(p=str, q=tmp; *p; p++)
{
if((int)*p == notme) continue;
*q=*p;
q++;
}
strcpy(str, tmp);
free(tmp);
return str;
}
simple generic remove a char
is all lines look that way why not simple remove the first and the last char?
quote++; // move over second char
quote[strlen(quote)-1]='\0'; // remove last char
Don't know if this will help, it is a simple tokenizer i use
#include <stdlib.h>
#include <string.h>
int token(char* start, char* delim, char** tok, char** nextpos, char* sdelim, char* edelim) {
// Find beginning:
int len = 0;
char *scanner;
int dictionary[8];
int ptr;
for(ptr = 0; ptr < 8; ptr++) {
dictionary[ptr] = 0;
}
for(; *delim; delim++) {
dictionary[*delim / 32] |= 1 << *delim % 32;
}
if(sdelim) {
*sdelim = 0;
}
for(; *start; start++) {
if(!(dictionary[*start / 32] & 1 << *start % 32)) {
break;
}
if(sdelim) {
*sdelim = *start;
}
}
if(*start == 0) {
if(nextpos != NULL) {
*nextpos = start;
}
*tok = NULL;
return 0;
}
for(scanner = start; *scanner; scanner++) {
if(dictionary[*scanner / 32] & 1 << *scanner % 32) {
break;
}
len++;
}
if(edelim) {
*edelim = *scanner;
}
if(nextpos != NULL) {
*nextpos = scanner;
}
*tok = (char*)malloc(sizeof(char) * (len + 1));
if(*tok == NULL) {
return 0;
}
memcpy(*tok, start, len);
*(*tok + len) = 0;
return len + 1;
}
The parameters are:
char* start, (pointer to the string)
char* delim, (pointer to the delimiters used to break up the string)
char** tok, a reference (using &) to a char* variable that will hold the toke
char** nextpos, a reference (using &) to a char* variable that will hold the position after the last token.
char* sdelim, a reference (using &) to a char variable that will hold the value of the -start delimiter
char* edelim, a reference (using &) to a char varaible that will hold the value of the end delimiter
The last three are optional.
Pass in the start address, the delimeter is a ", and pass reference to a char * to hold the actual middle string.
The result is a newly allocated string so you have to free it.
int get_string(FILE *in, char *temp)
{
char *token = NULL;
/* Get the first line */
fgets(temp, STRING_SIZE, in);
printf("temp before [ %s ]\n", temp);
/* Find the second quote */
int length = token(temp, "\"", &token, NULL, NULL, NULL)
// DO STUFF WITH THE TOKEN
printf("temp after [ %s ]\n", token);
// DO STUFF WITH THE TOKEN
// FREE IT!!!
free(token);
return TRUE;
}
The tokenizer is a multipurpose tool that can be used in a crap ton of places, this being a very small example.
Suppose
string="\"Simpson, Homer\""
then
string_without_quotes=string+1;
string_without_quotes[strlen(string)-2]='\0';
ready!

strtok and memory leaks

I wrote a simple url parser using strtok(). here's the code
#include <stdio.h>
#include <stdlib.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} aUrl;
void parse_url(char *url, aUrl *ret) {
printf("Parsing %s\n", url);
char *tmp = (char *)_strdup(url);
//char *protocol, *host, *port, *path;
int len = 0;
// protocol agora eh por exemplo http: ou https:
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
//printf("char at %d => %c", len, url[len]);
ret->path = (char *)_strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
// host agora é por exemplo address.com:8080
//tmp = (char *)_strdup(host);
//strtok(tmp, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if(tmp == NULL) {
if(strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if(strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
//host = (char *) strtok(NULL, "/");
}
/*
*
*/
int main(int argc, char** argv) {
printf("hello moto\n");
aUrl myUrl;
parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);
printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);
return (EXIT_SUCCESS);
}
As you can see, I use strtok() a lot so I can "slice" the url. I don't need to support urls different than http or https so the way it's done solves all of my problems.
My concern is (this is running on an embedded device) - Am I wasting memory ?
When I write something like
ret->protocol = (char *) strtok(tmp, "/");
And then later call
ret->protocol = (char *) strtok(ret->protocol, ":");
Does me first pointer ret->protocol held remain in memory ? I thought that maybe I should set the first call to a tmp pointer, call strtok pointing ret->protocol to the right portion of the string (the second call) and then free(tmp).
What should be the best way to use strtok ?
To answer your question directly, strtok only returns a pointer to a location inside the string you give it as input-- it doesn't allocate new memory for you, so shouldn't need to call free on any of the pointers it gives you back in return.
For what it's worth, you could also look into "strchr" and "strstr", which are nondestructive ways of searching for single characters or sequences within strings.
Also note that your memory allocation is problematic here-- you're using strdup() to allocate a new string inside your parse function, and then you're assigning fragments of that memory block to fields of "ret". Your caller will thus be responsible for free'ing the strdup'd string, but since you're only passing that string back implicitly inside ret, the caller needs to know magically what pointer to pass to free. (Probably ret->protocol, but maybe not, depending on how the input looks.)
strtok modifies the string in place, replacing the specified characters with NULL. Since strings in C are NULL-terminated, it now appears that your original pointer is pointing to a shorter string, even though the original string is still there and still occupies the same amount of memory (but with characters replaced with NULL). The end of the string, I think, contains a double-NULL.
The short answer is this: Keep a pointer to the beginning of your string buffer, and have another pointer that is your "current" pointer into the string as you parse it. When you use strtok or iterate over the string in other ways you update the "current" pointer but leave the beginning pointer alone. When you're finished, free() the beginning pointer. No memory leaked.
Do you know you can continue parsing the string using NULL as first parameter of strtok?
First call:
char* token = strtok(string, delimiters);
Then:
token = strtok(NULL, other_delimiters);
This allow you to simplify your code:
int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");
// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
return -1;
//get host
token = strtok(NULL, "/");
if( token == NULL )
return -1;
strcpy(ret->host, token);
// get path
token = strtok(NULL, "#");
if( token == NULL )
return -1;
strcpy(ret->path, token);
// ...
return 0;
}
You can see I had a return value to know if parsing was successfully done.
Thanks for sharing your code! I ran it inside valgrind and fixed two memory leaks generated by strdup functions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} URL;
void parse_url(char *url, URL *ret) {
char *tmp = (char *) strdup(url);
int len = 0;
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
ret->path = (char *) strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if (tmp == NULL) {
if (strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if (strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
}
void free_url(URL *url) {
free(url->path);
free(url->protocol);
}
int main(int argc, char** argv) {
URL url;
parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
free_url(&url);
return (EXIT_SUCCESS);
}

Resources