I've only found a few threads like this, and none with information that I am able to make any sense of. I'm programming a shell in C and I feel like it should be easy but my C programming is not so fresh. I'm having issues with passing a double pointer and the contents disappearing
I feel I am on the right track, and it sounds like it has something to do with initialization, but I've tried a few things, setting pointers to NULL just to be sure. Thanks.
void runProgram (char **cLine);
char **parse(char *str);
/*
*
*/
int main(int argc, char** argv)
{
char *cin = NULL;
ssize_t buffer = 0;
char **tempArgs = NULL;
printf(">");
while(1)
{
getline(&cin, &buffer, stdin);
tempArgs = parse(cin); //malloc, parse, and return
printf("passing %s", tempArgs[0]); //works just fine here, can see the string
runProgram(tempArgs); //enter this function and array is lost
}
return (EXIT_SUCCESS);
}
char** parse( char* str )
{
char *token = NULL;
char tokens[256];
char** args = malloc( 256 );
int i = 0;
strcpy( tokens, str );
args[i] = strtok( tokens, " " );
while( args[i] )
{
i++;
args[i] = strtok(NULL, " ");
}
args[i] = NULL;
return args;
}
Visible in main up until this function call
void runProgram (char **cLine)
{
//function that calls fork and execvp
}
The simplest fix is not to use tokens at all in the parse() function:
int main(void)
{
char *buffer = NULL;
size_t buflen = 0;
char **tempArgs = NULL;
printf("> ");
while (getline(&buffer, &buflen, stdin) != -1)
{
tempArgs = parse(buffer);
printf("passing %s", tempArgs[0]);
runProgram(tempArgs);
printf("> ");
free(tempArgs); // Free the space allocated by parse()
}
free(buffer); // Free the space allocated by getline()
return (EXIT_SUCCESS);
}
char **parse(char *str)
{
char **args = malloc(256);
if (args == 0)
…handle error appropriately…
int i = 0;
args[i] = strtok(str, " ");
// Bounds checking omitted
while (args[i])
args[++i] = strtok(NULL, " ");
return args;
}
Note that when the loop terminates, the array is already null terminated, so the extra assignment wasn't necessary (but it is better to be safe than sorry).
Related
I am trying to extract words from a string like this:
(octopus kitten) (game cake) (soccer football)
I attempted doing this with the help of strtok (I do strcpy just for not modifying the original token/string, also used memcpy, but it does the same in my case).
Main function:
int main(int argc, char * argv[]) {
char row[] = "(octopus kitten) (game cake) (soccer football)";
char * pch;
pch = strtok(row, "(");
while (pch != NULL) {
pch[strcspn(pch, ")")] = '\0';
print_word(pch);
pch = strtok(NULL, "(");
}
return 0;
}
Function for getting and printing each word:
void get_and_print_word(char str[]) {
char r[4000];
// for not modifying the original string
strcpy(r, str);
char * c = strtok(r, " ");
for (int i = 0; i < 2; i++) {
printf("%s\n", c);
c = strtok(NULL, " ");
}
}
It works absolutely fine with a first iteration, but after pch starts to point to another adress of memory (but it should point to the adress of letter "g").
It works absolutely fine (it's just printing string within the brackets) if we remove get_and_print_word(pch):
int main(int argc, char * argv[]) {
char row[] = "(octopus kitten) (game cake) (soccer football)";
char * pch;
pch = strtok(row, "(");
while (pch != NULL) {
pch[strcspn(pch, ")")] = '\0';
printf("%s\n", pch);
pch = strtok(NULL, "(");
}
return 0;
}
But that's not what I want to do, I need to get each word, not just a string of two words and space between them.
Using pch = strtok(NULL, " )(") is also not appropriate in my case, cause I need to store each pair of words (each word, of couse, should be a separate string) in some individual
struct, so I definitely need this function.
How to solve this issue and why it works like this?
Why not use regular expression :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
int main (int argc, char *argv[])
{
int err;
regex_t preg;
const char *str_request = argv[1];
const char *str_regex = argv[2];
err = regcomp (&preg, str_regex, REG_EXTENDED);
if (err == 0) {
int match;
size_t nmatch = 0;
regmatch_t *pmatch = NULL;
nmatch = preg.re_nsub;
pmatch = malloc (sizeof (*pmatch) * nmatch);
char *buffer;
if (pmatch) {
buffer = (char *) str_request;
match = regexec (&preg, buffer, nmatch, pmatch, 0);
while (match == 0) {
char *found = NULL;
size_t size ;
int start, end;
start = pmatch[0].rm_so;
end = pmatch[0].rm_eo;
size = end - start;
found = malloc (sizeof (*found) * (size + 1));
if (found) {
strncpy (found, &buffer[start], size);
found[size] = '\0';
printf ("found : %s\n", found);
free (found);
}
//searching next occurence
match = regexec (&preg, (buffer += end), nmatch, pmatch, 0);
}
regfree (&preg);
free (pmatch);
}
}
return 0;
}
[puppet#damageinc regex]$ ./regex "(octopus kitten) (game cake) (soccer football)" "([a-z]+)"
found : octopus
found : kitten
found : game
found : cake
found : soccer
found : football
I am getting used to writing eBPF code as of now and want to avoid using pointers in my BPF text due to how difficult it is to get a correct output out of it. Using strtok() seems to be out of the question due to all of the example codes requiring pointers. I also want to expand it to CSV files in the future since this is a means of practice for me. I was able to find another user's code here but it gives me an error with the BCC terminal due to the one pointer.
char str[256];
bpf_probe_read_user(&str, sizeof(str), (void *)PT_REGS_RC(ctx));
char token[] = strtok(str, ",");
char input[] ="first second third forth";
char delimiter[] = " ";
char firstWord, *secondWord, *remainder, *context;
int inputLength = strlen(input);
char *inputCopy = (char*) calloc(inputLength + 1, sizeof(char));
strncpy(inputCopy, input, inputLength);
str = strtok_r (inputCopy, delimiter, &context);
secondWord = strtok_r (NULL, delimiter, &context);
remainder = context;
getchar();
free(inputCopy);
Pointers are powerful, and you wont be able to avoid them for very long. The time you invest in learning them is definitively worth it.
Here is an example:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/**
Extracts the word with the index "n" in the string "str".
Words are delimited by a blank space or the end of the string.
}*/
char *getWord(char *str, int n)
{
int words = 0;
int length = 0;
int beginIndex = 0;
int endIndex = 0;
char currentchar;
while ((currentchar = str[endIndex++]) != '\0')
{
if (currentchar == ' ')
{
if (n == words)
break;
if (length > 0)
words++;
length = 0;
beginIndex = endIndex;
continue;
}
length++;
}
if (n == words)
{
char *result = malloc(sizeof(char) * length + 1);
if (result == NULL)
{
printf("Error while allocating memory!\n");
exit(1);
}
memcpy(result, str + beginIndex, length);
result[length] = '\0';
return result;
}else
return NULL;
}
You can easily use the function:
int main(int argc, char *argv[])
{
char string[] = "Pointers are cool!";
char *word = getWord(string, 2);
printf("The third word is: '%s'\n", word);
free(word); //Don't forget to de-allocate the memory!
return 0;
}
I have been trying to tokenize a string using SPACE as delimiter but it doesn't work. Does any one have suggestion on why it doesn't work?
Edit: tokenizing using:
strtok(string, " ");
The code is like the following
pch = strtok (str," ");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ");
}
Do it like this:
char s[256];
strcpy(s, "one two three");
char* token = strtok(s, " ");
while (token) {
printf("token: %s\n", token);
token = strtok(NULL, " ");
}
Note: strtok modifies the string its tokenising, so it cannot be a const char*.
Here's an example of strtok usage, keep in mind that strtok is destructive of its input string (and therefore can't ever be used on a string constant
char *p = strtok(str, " ");
while(p != NULL) {
printf("%s\n", p);
p = strtok(NULL, " ");
}
Basically the thing to note is that passing a NULL as the first parameter to strtok tells it to get the next token from the string it was previously tokenizing.
strtok can be very dangerous. It is not thread safe. Its intended use is to be called over and over in a loop, passing in the output from the previous call. The strtok function has an internal variable that stores the state of the strtok call. This state is not unique to each thread - it is global. If any other code uses strtok in another thread, you get problems. Not the kind of problems you want to track down either!
I'd recommend looking for a regex implementation, or using sscanf to pull apart the string.
Try this:
char strprint[256];
char text[256];
strcpy(text, "My string to test");
while ( sscanf( text, "%s %s", strprint, text) > 0 ) {
printf("token: %s\n", strprint);
}
Note: The 'text' string is destroyed as it's separated. This may not be the preferred behaviour =)
You can simplify the code by introducing an extra variable.
#include <string.h>
#include <stdio.h>
int main()
{
char str[100], *s = str, *t = NULL;
strcpy(str, "a space delimited string");
while ((t = strtok(s, " ")) != NULL) {
s = NULL;
printf(":%s:\n", t);
}
return 0;
}
I've made some string functions in order to split values, by using less pointers as I could because this code is intended to run on PIC18F processors. Those processors does not handle really good with pointers when you have few free RAM available:
#include <stdio.h>
#include <string.h>
char POSTREQ[255] = "pwd=123456&apply=Apply&d1=88&d2=100&pwr=1&mpx=Internal&stmo=Stereo&proc=Processor&cmp=Compressor&ip1=192&ip2=168&ip3=10&ip4=131&gw1=192&gw2=168&gw3=10&gw4=192&pt=80&lic=&A=A";
int findchar(char *string, int Start, char C) {
while((string[Start] != 0)) { Start++; if(string[Start] == C) return Start; }
return -1;
}
int findcharn(char *string, int Times, char C) {
int i = 0, pos = 0, fnd = 0;
while(i < Times) {
fnd = findchar(string, pos, C);
if(fnd < 0) return -1;
if(fnd > 0) pos = fnd;
i++;
}
return fnd;
}
void mid(char *in, char *out, int start, int end) {
int i = 0;
int size = end - start;
for(i = 0; i < size; i++){
out[i] = in[start + i + 1];
}
out[size] = 0;
}
void getvalue(char *out, int index) {
mid(POSTREQ, out, findcharn(POSTREQ, index, '='), (findcharn(POSTREQ, index, '&') - 1));
}
void main() {
char n_pwd[7];
char n_d1[7];
getvalue(n_d1, 1);
printf("Value: %s\n", n_d1);
}
When reading the strtok documentation, I see you need to pass in a NULL pointer after the first "initializing" call. Maybe you didn't do that. Just a guess of course.
Here is another strtok() implementation, which has the ability to recognize consecutive delimiters (standard library's strtok() does not have this)
The function is a part of BSD licensed string library, called zString. You are more than welcome to contribute :)
https://github.com/fnoyanisi/zString
char *zstring_strtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurance of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignmetn requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}
As mentioned in previous posts, since strtok(), or the one I implmented above, relies on a static *char variable to preserve the location of last delimiter between consecutive calls, extra care should be taken while dealing with multi-threaded aplications.
int not_in_delimiter(char c, char *delim){
while(*delim != '\0'){
if(c == *delim) return 0;
delim++;
}
return 1;
}
char *token_separater(char *source, char *delimiter, char **last){
char *begin, *next_token;
char *sbegin;
/*Get the start of the token */
if(source)
begin = source;
else
begin = *last;
sbegin = begin;
/*Scan through the string till we find character in delimiter. */
while(*begin != '\0' && not_in_delimiter(*begin, delimiter)){
begin++;
}
/* Check if we have reached at of the string */
if(*begin == '\0') {
/* We dont need to come further, hence return NULL*/
*last = NULL;
return sbegin;
}
/* Scan the string till we find a character which is not in delimiter */
next_token = begin;
while(next_token != '\0' && !not_in_delimiter(*next_token, delimiter)) {
next_token++;
}
/* If we have not reached at the end of the string */
if(*next_token != '\0'){
*last = next_token--;
*next_token = '\0';
return sbegin;
}
}
void main(){
char string[10] = "abcb_dccc";
char delim[10] = "_";
char *token = NULL;
char *last = "" ;
token = token_separater(string, delim, &last);
printf("%s\n", token);
while(last){
token = token_separater(NULL, delim, &last);
printf("%s\n", token);
}
}
You can read detail analysis at blog mentioned in my profile :)
I tried really hard to search for a solution to this but I can't think of good enough keywords.
Currently I'm having troubles grasping the concept behind makeargv and it's usage with triple pointers (I have no idea what ***foo means, it doesn't seem to be as easy of a concept as **foo or *foo). So I made my own:
const char **makeargv(char *string, int *numargs) {
string = string + strspn(string, delims);
char *copy = malloc(strlen(string) + 1);
int i;
strcpy(copy, string);
int numtokens;
if (strtok(copy, delims) != NULL) {
for (numtokens = 1; strtok(NULL, delims) != NULL; numtokens++) {}
}
strcpy(copy, string);
const char *results[numtokens+1];
results[0] = strtok(copy, delims);
for (i = 1; i < numtokens; i++) {
results[i] = strtok(NULL, delims);
}
results[numtokens+1] = NULL;
*numargs = numtokens;
return results;
}
Here's the part at where it breaks:
void parse_file(char* filename) {
char* line = malloc(160*sizeof(char));
FILE* fp = file_open(filename);
int i = 0;
int numargs = 0;
int *pointer = &numargs;
while((line = file_getline(line, fp)) != NULL) {
if (strlen(line) == 1){
continue;
}
const char **args = makeargv(line, pointer);
printf("%s\n", args[0]);
printf("%s\n", args[1]);
/* This prints out args[0], but then args[1] causes a seg fault. Even if I replace
the args[1] with another args[0] it still causes a seg fault */
}
fclose(fp);
free(line);
}
I have a working array of strings. However when I try to print out the strings in the array, I can only print 1 of my choice and then it seg faults for any subsequent calls. lets pretend my array of strings is argv[3] = {"Yes", "no", "maybe"}, if i call argv[0], it will let me call "Yes", but any other calls (even if i call argv[0] again) do not work and cause a segfault. I can call any of the elements in the array, but once i call one the rest cease to work causing segfaults.
Help please? D: This is in C.
const char *results[numtokens+1];
This array "results" is a local variable, it is only available inside of "makeargv".
You'd better use malloc:
results = malloc(numtokens+1)
And I believe there is memory leak in your code.
You will not be able to free the memory for "char *copy"
char *copy = malloc(strlen(string) + 1);
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **makeargv(char *string, int *numargs) {
static const char *delims = " \t\n";
string = string + strspn(string, delims);
char *copy = malloc(strlen(string) + 1), *p = copy;
strcpy(copy, string);
int numtokens;
for (numtokens = 0; strtok(p, delims); ++numtokens, p = NULL);
char **results = malloc(sizeof(char*)*(numtokens+1));
strcpy(copy, string);
int i;
p = copy;
for (i = 0; i < numtokens; ++i, p = NULL)
results[i] = strtok(p, delims);
results[i] = NULL;
*numargs = numtokens;
return results;
}
FILE *file_open(char *filename){
FILE *fp = fopen(filename, "r");
if(!fp){
perror("file_open");
exit(1);
}
return fp;
}
void parse_file(char* filename) {
char* line = malloc(160*sizeof(char));
FILE* fp = file_open(filename);
int i = 0, numargs = 0;
while(fgets(line, 160, fp)){
if (*line == '\n')
continue;
char **args = makeargv(line, &numargs);
for(i = 0;i<numargs;++i)
printf("%s\n", args[i]);
printf("\n");
if(args[0])
free(args[0]);
free(args);
}
fclose(fp);
free(line);
}
int main(int argc, char *argv[]){
parse_file(argv[1]);
return 0;
}
I have been trying to tokenize a string using SPACE as delimiter but it doesn't work. Does any one have suggestion on why it doesn't work?
Edit: tokenizing using:
strtok(string, " ");
The code is like the following
pch = strtok (str," ");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ");
}
Do it like this:
char s[256];
strcpy(s, "one two three");
char* token = strtok(s, " ");
while (token) {
printf("token: %s\n", token);
token = strtok(NULL, " ");
}
Note: strtok modifies the string its tokenising, so it cannot be a const char*.
Here's an example of strtok usage, keep in mind that strtok is destructive of its input string (and therefore can't ever be used on a string constant
char *p = strtok(str, " ");
while(p != NULL) {
printf("%s\n", p);
p = strtok(NULL, " ");
}
Basically the thing to note is that passing a NULL as the first parameter to strtok tells it to get the next token from the string it was previously tokenizing.
strtok can be very dangerous. It is not thread safe. Its intended use is to be called over and over in a loop, passing in the output from the previous call. The strtok function has an internal variable that stores the state of the strtok call. This state is not unique to each thread - it is global. If any other code uses strtok in another thread, you get problems. Not the kind of problems you want to track down either!
I'd recommend looking for a regex implementation, or using sscanf to pull apart the string.
Try this:
char strprint[256];
char text[256];
strcpy(text, "My string to test");
while ( sscanf( text, "%s %s", strprint, text) > 0 ) {
printf("token: %s\n", strprint);
}
Note: The 'text' string is destroyed as it's separated. This may not be the preferred behaviour =)
You can simplify the code by introducing an extra variable.
#include <string.h>
#include <stdio.h>
int main()
{
char str[100], *s = str, *t = NULL;
strcpy(str, "a space delimited string");
while ((t = strtok(s, " ")) != NULL) {
s = NULL;
printf(":%s:\n", t);
}
return 0;
}
I've made some string functions in order to split values, by using less pointers as I could because this code is intended to run on PIC18F processors. Those processors does not handle really good with pointers when you have few free RAM available:
#include <stdio.h>
#include <string.h>
char POSTREQ[255] = "pwd=123456&apply=Apply&d1=88&d2=100&pwr=1&mpx=Internal&stmo=Stereo&proc=Processor&cmp=Compressor&ip1=192&ip2=168&ip3=10&ip4=131&gw1=192&gw2=168&gw3=10&gw4=192&pt=80&lic=&A=A";
int findchar(char *string, int Start, char C) {
while((string[Start] != 0)) { Start++; if(string[Start] == C) return Start; }
return -1;
}
int findcharn(char *string, int Times, char C) {
int i = 0, pos = 0, fnd = 0;
while(i < Times) {
fnd = findchar(string, pos, C);
if(fnd < 0) return -1;
if(fnd > 0) pos = fnd;
i++;
}
return fnd;
}
void mid(char *in, char *out, int start, int end) {
int i = 0;
int size = end - start;
for(i = 0; i < size; i++){
out[i] = in[start + i + 1];
}
out[size] = 0;
}
void getvalue(char *out, int index) {
mid(POSTREQ, out, findcharn(POSTREQ, index, '='), (findcharn(POSTREQ, index, '&') - 1));
}
void main() {
char n_pwd[7];
char n_d1[7];
getvalue(n_d1, 1);
printf("Value: %s\n", n_d1);
}
When reading the strtok documentation, I see you need to pass in a NULL pointer after the first "initializing" call. Maybe you didn't do that. Just a guess of course.
Here is another strtok() implementation, which has the ability to recognize consecutive delimiters (standard library's strtok() does not have this)
The function is a part of BSD licensed string library, called zString. You are more than welcome to contribute :)
https://github.com/fnoyanisi/zString
char *zstring_strtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurance of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignmetn requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}
As mentioned in previous posts, since strtok(), or the one I implmented above, relies on a static *char variable to preserve the location of last delimiter between consecutive calls, extra care should be taken while dealing with multi-threaded aplications.
int not_in_delimiter(char c, char *delim){
while(*delim != '\0'){
if(c == *delim) return 0;
delim++;
}
return 1;
}
char *token_separater(char *source, char *delimiter, char **last){
char *begin, *next_token;
char *sbegin;
/*Get the start of the token */
if(source)
begin = source;
else
begin = *last;
sbegin = begin;
/*Scan through the string till we find character in delimiter. */
while(*begin != '\0' && not_in_delimiter(*begin, delimiter)){
begin++;
}
/* Check if we have reached at of the string */
if(*begin == '\0') {
/* We dont need to come further, hence return NULL*/
*last = NULL;
return sbegin;
}
/* Scan the string till we find a character which is not in delimiter */
next_token = begin;
while(next_token != '\0' && !not_in_delimiter(*next_token, delimiter)) {
next_token++;
}
/* If we have not reached at the end of the string */
if(*next_token != '\0'){
*last = next_token--;
*next_token = '\0';
return sbegin;
}
}
void main(){
char string[10] = "abcb_dccc";
char delim[10] = "_";
char *token = NULL;
char *last = "" ;
token = token_separater(string, delim, &last);
printf("%s\n", token);
while(last){
token = token_separater(NULL, delim, &last);
printf("%s\n", token);
}
}
You can read detail analysis at blog mentioned in my profile :)