If I have:
char *tokenPtr = "testingpointerindex"
and I want to access everything after the 4th character, how would I go about that? I tried :
char *tokenPtr = "testingpointerindex";
char *host = tokenPtr + 4;
printf("%s\n",host);
return host;
It's just an outake but I hope it gives enough info, I get a bus error.
Thanks
EDIT:
The full code
char * getHost(char *buf){
char *tokenPtr;
tokenPtr = strtok(buf, "\r\n" );
printf("got token\n");
while ( tokenPtr != NULL ) {
if(strncmp(tokenPtr,"Host",4) == 0){
break;
}
else{
tokenPtr = strtok( NULL, "\r\n" );
}
}
char *host = tokenPtr + 7;
printf("%s\n",host);
return host;
}
int main(int argc, char *argv[])
{
char *msg = "GET /index.html HTTP/1.1\r\n Host: www.google.com\r\n\r\n";
getHost(msg);
}
The above code works fine.
However, there's one thing to mention: string literals (e.g. "testingpointerindex") are non-modifiable in C. Therefore you should use const char *, not char *.
Change:
char *tokenPtr = "testingpointerindex";
to
static char tokenPtr[] = "testingpointerindex";
In your example, tokenPtr is a string literal and string literal are non-modifiable.
The static specifier is required in the second example if you plan to return a pointer to an element of the array as automatic variables are discarded at the end of a function.
Note: I've updated this answer to conform to the new code posted by the OP's update
In the following code, there are a couple problems:
while ( tokenPtr != NULL ) {
if(strncmp(tokenPtr,"Host",4) == 0){
break;
}
else{
tokenPtr = strtok( NULL, "\r\n" );
}
}
char *host = tokenPtr + 7;
The first problem is that there is a possibility that you could exit the while-loop because tokenPtr is NULL ... you don't guard for that possibility. Secondly, you assume that tokenPtr is pointing to a string of at least length 8 after it's been returned from strtok, but that's not necessarily true either (It should be true in your example code, but in working code it might not be true).
Finally, strtok modifies the string it processes, and you're passing it a pointer to a string-literal, which is stored in a read-only memory segment, and should not be modified. You should call strdup to create your string, knowing that you'll have to call free on the returned pointer at some point. So for instance:
int main()
{
char *msg = strdup("GET /index.html HTTP/1.1\r\n Host: www.google.com\r\n\r\n");
getHost(msg);
free(msg);
return 0;
}
Fix:
char * getHost(char *buf){
char *tokenPtr;
tokenPtr = strtok(buf, "\r\n" );
printf("got token\n");
while ( tokenPtr != NULL ) {
if(strncmp(tokenPtr,"Host",4) == 0) break;
else {tokenPtr = strtok( NULL, "\r\n"); break;}
}
char *host;
host = &tokenPtr[7];
printf("%s\n", host);
}
int main(int argc, char *argv[])
{
char msg[100] = "GET /index.html HTTP/1.1\r\n Host: www.google.com\r\n\r\n";
getHost(msg);
}
Find out what was wrong ;)
Related
I want to write a program in C that displays each word of a whole sentence (taken as input) at a seperate line. This is what I have done so far:
void manipulate(char *buffer);
int get_words(char *buffer);
int main(){
char buff[100];
printf("sizeof %d\nstrlen %d\n", sizeof(buff), strlen(buff)); // Debugging reasons
bzero(buff, sizeof(buff));
printf("Give me the text:\n");
fgets(buff, sizeof(buff), stdin);
manipulate(buff);
return 0;
}
int get_words(char *buffer){ // Function that gets the word count, by counting the spaces.
int count;
int wordcount = 0;
char ch;
for (count = 0; count < strlen(buffer); count ++){
ch = buffer[count];
if((isblank(ch)) || (buffer[count] == '\0')){ // if the character is blank, or null byte add 1 to the wordcounter
wordcount += 1;
}
}
printf("%d\n\n", wordcount);
return wordcount;
}
void manipulate(char *buffer){
int words = get_words(buffer);
char *newbuff[words];
char *ptr;
int count = 0;
int count2 = 0;
char ch = '\n';
ptr = buffer;
bzero(newbuff, sizeof(newbuff));
for (count = 0; count < 100; count ++){
ch = buffer[count];
if (isblank(ch) || buffer[count] == '\0'){
buffer[count] = '\0';
if((newbuff[count2] = (char *)malloc(strlen(buffer))) == NULL) {
printf("MALLOC ERROR!\n");
exit(-1);
}
strcpy(newbuff[count2], ptr);
printf("\n%s\n",newbuff[count2]);
ptr = &buffer[count + 1];
count2 ++;
}
}
}
Although the output is what I want, I have really many black spaces after the final word displayed, and the malloc() returns NULL so the MALLOC ERROR! is displayed in the end.
I can understand that there is a mistake at my malloc() implementation, but I do not know what it is.
Is there another more elegant or generally better way to do it?
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
Take a look at this, and use whitespace characters as the delimiter. If you need more hints let me know.
From the website:
char * strtok ( char * str, const char * delimiters );
On a first call, the function expects a C string as argument for str, whose first character is used as the starting location to scan for tokens. In subsequent calls, the function expects a null pointer and uses the position right after the end of last token as the new starting location for scanning.
Once the terminating null character of str is found in a call to strtok, all subsequent calls to this function (with a null pointer as the first argument) return a null pointer.
Parameters
str
C string to truncate.
Notice that this string is modified by being broken into smaller strings (tokens).
Alternativelly [sic], a null pointer may be specified, in which case the function continues scanning where a previous successful call to the function ended.
delimiters
C string containing the delimiter characters.
These may vary from one call to another.
Return Value
A pointer to the last token found in string.
A null pointer is returned if there are no tokens left to retrieve.
Example
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
For the fun of it here's an implementation based on the callback approach:
const char* find(const char* s,
const char* e,
int (*pred)(char))
{
while( s != e && !pred(*s) ) ++s;
return s;
}
void split_on_ws(const char* s,
const char* e,
void (*callback)(const char*, const char*))
{
const char* p = s;
while( s != e ) {
s = find(s, e, isspace);
callback(p, s);
p = s = find(s, e, isnotspace);
}
}
void handle_word(const char* s, const char* e)
{
// handle the word that starts at s and ends at e
}
int main()
{
split_on_ws(some_str, some_str + strlen(some_str), handle_word);
}
malloc(0) may (optionally) return NULL, depending on the implementation. Do you realize why you may be calling malloc(0)? Or more precisely, do you see where you are reading and writing beyond the size of your arrays?
Consider using strtok_r, as others have suggested, or something like:
void printWords(const char *string) {
// Make a local copy of the string that we can manipulate.
char * const copy = strdup(string);
char *space = copy;
// Find the next space in the string, and replace it with a newline.
while (space = strchr(space,' ')) *space = '\n';
// There are no more spaces in the string; print out our modified copy.
printf("%s\n", copy);
// Free our local copy
free(copy);
}
Something going wrong is get_words() always returning one less than the actual word count, so eventually you attempt to:
char *newbuff[words]; /* Words is one less than the actual number,
so this is declared to be too small. */
newbuff[count2] = (char *)malloc(strlen(buffer))
count2, eventually, is always one more than the number of elements you've declared for newbuff[]. Why malloc() isn't returning a valid ptr, though, I don't know.
You should be malloc'ing strlen(ptr), not strlen(buf). Also, your count2 should be limited to the number of words. When you get to the end of your string, you continue going over the zeros in your buffer and adding zero size strings to your array.
Just as an idea of a different style of string manipulation in C, here's an example which does not modify the source string, and does not use malloc. To find spaces I use the libc function strpbrk.
int print_words(const char *string, FILE *f)
{
static const char space_characters[] = " \t";
const char *next_space;
// Find the next space in the string
//
while ((next_space = strpbrk(string, space_characters)))
{
const char *p;
// If there are non-space characters between what we found
// and what we started from, print them.
//
if (next_space != string)
{
for (p=string; p<next_space; p++)
{
if(fputc(*p, f) == EOF)
{
return -1;
}
}
// Print a newline
//
if (fputc('\n', f) == EOF)
{
return -1;
}
}
// Advance next_space until we hit a non-space character
//
while (*next_space && strchr(space_characters, *next_space))
{
next_space++;
}
// Advance the string
//
string = next_space;
}
// Handle the case where there are no spaces left in the string
//
if (*string)
{
if (fprintf(f, "%s\n", string) < 0)
{
return -1;
}
}
return 0;
}
you can scan the char array looking for the token if you found it just print new line else print the char.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char *s;
s = malloc(1024 * sizeof(char));
scanf("%[^\n]", s);
s = realloc(s, strlen(s) + 1);
int len = strlen(s);
char delim =' ';
for(int i = 0; i < len; i++) {
if(s[i] == delim) {
printf("\n");
}
else {
printf("%c", s[i]);
}
}
free(s);
return 0;
}
char arr[50];
gets(arr);
int c=0,i,l;
l=strlen(arr);
for(i=0;i<l;i++){
if(arr[i]==32){
printf("\n");
}
else
printf("%c",arr[i]);
}
I've only found a few threads like this, and none with information that I am able to make any sense of. I'm programming a shell in C and I feel like it should be easy but my C programming is not so fresh. I'm having issues with passing a double pointer and the contents disappearing
I feel I am on the right track, and it sounds like it has something to do with initialization, but I've tried a few things, setting pointers to NULL just to be sure. Thanks.
void runProgram (char **cLine);
char **parse(char *str);
/*
*
*/
int main(int argc, char** argv)
{
char *cin = NULL;
ssize_t buffer = 0;
char **tempArgs = NULL;
printf(">");
while(1)
{
getline(&cin, &buffer, stdin);
tempArgs = parse(cin); //malloc, parse, and return
printf("passing %s", tempArgs[0]); //works just fine here, can see the string
runProgram(tempArgs); //enter this function and array is lost
}
return (EXIT_SUCCESS);
}
char** parse( char* str )
{
char *token = NULL;
char tokens[256];
char** args = malloc( 256 );
int i = 0;
strcpy( tokens, str );
args[i] = strtok( tokens, " " );
while( args[i] )
{
i++;
args[i] = strtok(NULL, " ");
}
args[i] = NULL;
return args;
}
Visible in main up until this function call
void runProgram (char **cLine)
{
//function that calls fork and execvp
}
The simplest fix is not to use tokens at all in the parse() function:
int main(void)
{
char *buffer = NULL;
size_t buflen = 0;
char **tempArgs = NULL;
printf("> ");
while (getline(&buffer, &buflen, stdin) != -1)
{
tempArgs = parse(buffer);
printf("passing %s", tempArgs[0]);
runProgram(tempArgs);
printf("> ");
free(tempArgs); // Free the space allocated by parse()
}
free(buffer); // Free the space allocated by getline()
return (EXIT_SUCCESS);
}
char **parse(char *str)
{
char **args = malloc(256);
if (args == 0)
…handle error appropriately…
int i = 0;
args[i] = strtok(str, " ");
// Bounds checking omitted
while (args[i])
args[++i] = strtok(NULL, " ");
return args;
}
Note that when the loop terminates, the array is already null terminated, so the extra assignment wasn't necessary (but it is better to be safe than sorry).
I need to split a char array into CSV's. Actually we can do the reverse of it using strtok() like:
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="This,a,sample,string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str,",");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, ",");
}
return 0;
}
But in my case, there's an char array suppose char bits[1024]="abcdefghijklmn". I need to get the output as a,b,c,d,e,f,g,h,i,j,k,m,n.
Is there any function or library to do this i.e. in terms of raw meaning, for every character it has to put a comma.
Just iterate over the string until you hit the end-of-string '\0' character. Or use the length of the data in the array (which may be smaller than the array size) and use a simple for loop.
This works for a null terminated string. But it will leave a dangling comma at the end.
void tokenise(char *s, char *d)
{
while(*d++ = *s++) *d++ = ',';
}
If you know the length of the string already, you can pass that through. This will not leave a dangling comma.
void tokenise(char *s, char *d, int length)
{
int i = 0;
while((*d++ = *s++) && ((i++)<(length-1))) *d++ = ',';
}
In both examples, s is a pointer to the source string and d points to the output tokenised string. It is up to the calling code to ensure the buffer d points to is sufficiently large.
you can use this simple function from old basic :
// ............................................................. string word at
char * word_at(char *tString, int upTo, char *dilim) {
int wcount;
char *rString, *temp;
temp= (char *) malloc(sizeof(char) * (strlen(tString)+1));
strcpy(temp, tString);
rString= strtok(temp, dilim);
wcount=1;
while (rString != NULL){
if (wcount==upTo) {
return rString;
}
rString= strtok(NULL, dilim);
wcount++;
}
return tString ;
}
parameter : string , index and character delimiter
return : word : ( char *)
If you find easy to implement it, then this could help you to start
char* split_all( char arr[], char ch )
{
char *new, *ptr;
new = ptr = calloc( 1, 2*strlen( arr ) ); // FIXME : Error checks
for( ; *(arr + 1) ; new++, arr++ )
{
*new = *arr;
new++;
*new = ch;
}
*new = *arr;
return ptr;
}
You can re-use, optimize this for your requirement. Its a quick and dirty solution, feel free to fix it..
say I pass an argument www.bbc.co.uk/news/world-us-canada-11893886
I need to separate www.bbc.co.uk from /news/world-us-canada-11893886 for a HTTP GET
I have tried using strtok and strcat but I come across weird splits at runtime.
I can get www.bbc.co.uk just fine using strtok( host, "/");
I have tried using a combination of strtok and strcat to try and get all the rest of the string from the first "/" but i get an output like this...
request: da-11893886
tempString: news/world!
host: www.bbc.co.uk
Path: news/world!da-11893886
If you look at this output, the strangest part is that it always cuts out the middle section.
In this case, the "-us-cana"
the section of the code is attached below
// testing purposes
printf("argv[1]: %s\n", argv[1] );
host = malloc(sizeof(argv[1]));
strcpy(host, argv[1]);
host = strtok(host, "/");
// get the request
request = malloc(sizeof(argv[1]) + sizeof(char)*6);
char *tok, *tempString;
tempString = malloc(sizeof(argv[1]));
tok = strtok( NULL, "\0");
while( tok ) {
strcpy(tempString, tok);
printf("request: %s\n", request);
request = strcat(tempString, request);
tok = strtok(NULL, "\0");
}
printf("host: %s\n", host);
printf("Path: %s\n", request);
Thanks for looking over this.
Any direction or even a link to a site where I can figure out how to do this would be much appreciated.
Here's some code that does more than you want. Note that this modifies the original string - you may want to make copies instead:
void split_request(char *request, char **protocol, char **addr, char **path)
{
char *ptr = strstr(request, "://");
if(NULL == ptr)
{
*protocol = NULL;
*addr = request;
}
else
{
*protocol = request;
*addr = ptr + 3;
*ptr = '\0';
}
ptr = strchr(*addr, '/');
if(NULL == ptr)
{
*path = NULL;
}
else
{
*path = ptr + 1;
*ptr = '\0';
}
}
Please excuse any typos/obvious errors. I'm typing this in a hurry as I have work to do :P
It should get you started though.
I have modified your code to work the way you are expecting
main(int argc, char *argv[])
{
char *request,*host,*req;
char *tok, *tempString;
printf("argv[1]: %s\n", argv[1] );
host = malloc(strlen(argv[1]));
strcpy(host, argv[1]);
host = strtok(host, "/");
tempString = malloc(strlen(argv[1]));
tok = strtok( NULL, "\0");
printf("sizeof(tok) %d\n",strlen(tok));
strncpy(tempString, tok,strlen(tok));
while( tok ) {
tok = strtok(NULL, "\0");
if (tok != NULL) {
strncat(tempString, tok,strlen(tok));
}else {
break;
}
}
request = tempString;
printf("host: %s\n", host);
printf("Path: %s\n", request);
}
~
Output
./tmp www.bbc.co.uk/news/world-us-canada-11893886/tmp.htmlargv[1]: www.bbc.co.uk/news/world-us-canada-11893886/tmp.html
sizeof(tok) 38
host: www.bbc.co.uk
Path: news/world-us-canada-11893886/tmp.html
bash-2.03$
~
Use strrchr() to find the last occurrence of '/' from the rear. You will then have a pointer to the start of 'the end of the web address' if you add one to that returned pointer.
Update
Assuming your URL does not start with http://, this aught to work
#include <stdio.h>
#include <string.h>
int main(void)
{
char url[] = "www.bbc.co.uk/news/world-us-canada-11893886";
int cnt;
char host[100];
char path[100];
char request[100];
strcpy(request, strrchr(url, '/'));
strcpy(host, url);
host[cnt = strcspn(url, "/")] = '\0';
strcpy(path, &url[cnt]);
printf("host: %s\npath: %s\nrequest: %s\n", host, path, request);
return 0;
}
Output
$ ./a.out
host: www.bbc.co.uk
path: /news/world-us-canada-11893886
request: /world-us-canada-11893886
strrchr() returns the LAST instance of the character. He wants the FIRST instance after any http:// string.
The answer is simple:
char *address_start = strchr(in_string+8, '/');
If it's non NULl then there you are at the first / of the path.
Why +8? Because "https://" is 8 characters long and even if there is no "http://" at the beginning, no IP or web address is less than 8 characters. Even "a.b.c.d" is 7 characters long and I don't believe an IPv4 dotted numerical notation has any legal public address with all single digits. I might be wrong though. Might be worth validating the string to check it's long enough first.
Anyway, you can always pre-validate the string to see if it begins with "http" or not to determine the offset to start searching at.
I wrote a simple url parser using strtok(). here's the code
#include <stdio.h>
#include <stdlib.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} aUrl;
void parse_url(char *url, aUrl *ret) {
printf("Parsing %s\n", url);
char *tmp = (char *)_strdup(url);
//char *protocol, *host, *port, *path;
int len = 0;
// protocol agora eh por exemplo http: ou https:
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
//printf("char at %d => %c", len, url[len]);
ret->path = (char *)_strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
// host agora é por exemplo address.com:8080
//tmp = (char *)_strdup(host);
//strtok(tmp, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if(tmp == NULL) {
if(strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if(strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
//host = (char *) strtok(NULL, "/");
}
/*
*
*/
int main(int argc, char** argv) {
printf("hello moto\n");
aUrl myUrl;
parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);
printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);
return (EXIT_SUCCESS);
}
As you can see, I use strtok() a lot so I can "slice" the url. I don't need to support urls different than http or https so the way it's done solves all of my problems.
My concern is (this is running on an embedded device) - Am I wasting memory ?
When I write something like
ret->protocol = (char *) strtok(tmp, "/");
And then later call
ret->protocol = (char *) strtok(ret->protocol, ":");
Does me first pointer ret->protocol held remain in memory ? I thought that maybe I should set the first call to a tmp pointer, call strtok pointing ret->protocol to the right portion of the string (the second call) and then free(tmp).
What should be the best way to use strtok ?
To answer your question directly, strtok only returns a pointer to a location inside the string you give it as input-- it doesn't allocate new memory for you, so shouldn't need to call free on any of the pointers it gives you back in return.
For what it's worth, you could also look into "strchr" and "strstr", which are nondestructive ways of searching for single characters or sequences within strings.
Also note that your memory allocation is problematic here-- you're using strdup() to allocate a new string inside your parse function, and then you're assigning fragments of that memory block to fields of "ret". Your caller will thus be responsible for free'ing the strdup'd string, but since you're only passing that string back implicitly inside ret, the caller needs to know magically what pointer to pass to free. (Probably ret->protocol, but maybe not, depending on how the input looks.)
strtok modifies the string in place, replacing the specified characters with NULL. Since strings in C are NULL-terminated, it now appears that your original pointer is pointing to a shorter string, even though the original string is still there and still occupies the same amount of memory (but with characters replaced with NULL). The end of the string, I think, contains a double-NULL.
The short answer is this: Keep a pointer to the beginning of your string buffer, and have another pointer that is your "current" pointer into the string as you parse it. When you use strtok or iterate over the string in other ways you update the "current" pointer but leave the beginning pointer alone. When you're finished, free() the beginning pointer. No memory leaked.
Do you know you can continue parsing the string using NULL as first parameter of strtok?
First call:
char* token = strtok(string, delimiters);
Then:
token = strtok(NULL, other_delimiters);
This allow you to simplify your code:
int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");
// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
return -1;
//get host
token = strtok(NULL, "/");
if( token == NULL )
return -1;
strcpy(ret->host, token);
// get path
token = strtok(NULL, "#");
if( token == NULL )
return -1;
strcpy(ret->path, token);
// ...
return 0;
}
You can see I had a return value to know if parsing was successfully done.
Thanks for sharing your code! I ran it inside valgrind and fixed two memory leaks generated by strdup functions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} URL;
void parse_url(char *url, URL *ret) {
char *tmp = (char *) strdup(url);
int len = 0;
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
ret->path = (char *) strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if (tmp == NULL) {
if (strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if (strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
}
void free_url(URL *url) {
free(url->path);
free(url->protocol);
}
int main(int argc, char** argv) {
URL url;
parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
free_url(&url);
return (EXIT_SUCCESS);
}