Just to clarify, I'm a complete novice in C programming.
I have a tokenize function and it isn't behaving like what I expect it to. I'm trying to read from the FIFO or named pipe that is passed by the client side, and this is the server side. The client side reads a file and pass it to the FIFO. The problem is that tokenize doesn't return a format where execvp can process it, as running gdb tells me that it failed at calling the execute function in main(). (append function appends a char into the string)
One bug is that tokens is neither initialized nor allocated any memory.
Here is an example on how to initialize and allocate memory for tokens:
char **tokenize(char *line){
line = append(line,'\0');
int i = 0, tlen = 0;
char **tokens = NULL, *line2, *token, *delimiter;
delimiter = " \t";
token = strtok(line,delimiter);
while (token != NULL) {
if (i == tlen) {
// Allocate more space
tlen += 10;
tokens = realloc(tokens, tlen * sizeof *tokens);
if (tokens == NULL) {
exit(1);
}
}
tokens[i] = token;
token = strtok(NULL, delimiter);
i += 1;
}
tokens[i] = NULL;
return tokens;
}
This code will allocate memory for 10 tokens at a time. If the memory allocation fails, it will end the program with a non-zero return value to indicate failure.
Related
I'm trying to create an array of array of strings to prepare them to be shown in a table.
So I have a function that returns a buffer string with the list of some scanned wifi access points, and I'm using strsep to split it by "\n" and then again by "\t".
The loop runs fine until it reaches the end and when the while argument ((line = strsep(&buf, "\n"))) is evaluated it gives a SEGFAULT.
Short Illustrative example asked per #Jabberwocky:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int
wap_scan_count_lines(char* wap_scan)
{
int line_amount = 0;
char *scan = wap_scan;
while(*scan)
{
if ('\n' == *scan){
line_amount++;
}
scan++;
}
return line_amount;
}
int main() {
char ***scan_result, *line=NULL, *item=NULL, *scan=NULL;
scan = strdup("bssid / frequency / signal level / flags / ssid\n"
"a8:6a:bb:e2:d6:ef 5785 -47 [WPA-PSK-CCMP+TKIP][WPA2-PSK-CCMP+TKIP][WPS][ESS] Fibertel WiFi114 5.8GHz");
int wap_scan_size = wap_scan_count_lines(scan);
scan_result = malloc(wap_scan_size * sizeof(**scan_result));
int i = 0;
int item_len = sizeof (*scan_result);
while((line = strsep(&scan, "\n")) != NULL ) {
if(i==0){
i++;
continue;
}
char **scan_line = calloc(5, item_len);
int j = 0;
while ((item = strsep(&line, "\t")) != NULL) {
printf("%s\n", item);
scan_line[j++] = strdup(item);
}
scan_result[i++] = scan_line;
}
return 0;
}
The real function that gives me the problem:
char *** wifi_client_get_wap_list(int *len)
{
char ***scan_result;
char *buf, *buf_free, *cmd, *line, *item;
int ret, items_len;
cmd = strdup("SCAN");
ret = wpa_ctrl_command(cmd, NULL);
if (ret < 0) goto error;
cmd = strdup("SCAN_RESULTS");
ret = wpa_ctrl_command(cmd, &buf); //RETURNS A STRING ON BUF ALLOCATED BY STRDUP
if (ret < 0){
free(buf);
goto error;
}
*len = wap_scan_count_lines(buf); //NUMBER OF LINES IN THE SCAN RESULT
scan_result = calloc(*len, sizeof(**scan_result));
int i = 0, j;
buf_free = buf;
items_len = sizeof (*scan_result);
while ((line = strsep(&buf, "\n"))){ //THIS GIVES THE SEGFAULT AT THE END
// SKIP FIRST LINE WITH HEADERS
if (i==0){
i++;
continue;
}
//if (strcmp(line, "") == 0) {
// break;
//}
//EACH LINE HAS 5 VALUES (bssid, freq, level,flags,ssid)
char **scan_line = calloc(5, items_len);
j = 0;
printf("INNER STEPS:\n");
while((item = strsep(&line, "\t"))){
*(scan_line + j) = strdup(item);
printf("%d ", j);
j++;
}
*(scan_result + i) = scan_line;
printf("\nSTEP: %d\n", i);
i++;
}
free(buf_free);
free(cmd);
return scan_result;
error:
// #TODO: Handle error
if (ret == -2) {
printf("'%s' command timed out.\n", cmd);
} else if (ret < 0) {
printf("'%s' command failed.\n", cmd);
}
free(cmd);
return NULL;
}
Based on https://man7.org/linux/man-pages/man3/strsep.3.html the issue is that the loop will run one more time than you want it to, causing scan_result to overflow.
The relevant parts of the documentation are:
The strsep() function returns a pointer to the token, that is, it
returns the original value of *stringp.
and
If *stringp is NULL, the strsep() function returns NULL and does
nothing else. Otherwise, this function finds the first token in
the string *stringp, that is delimited by one of the bytes in the
string delim. This token is terminated by overwriting the
delimiter with a null byte ('\0'), and *stringp is updated to
point past the token. In case no delimiter was found, the token
is taken to be the entire string *stringp, and *stringp is made
NULL.
In wap_scan_count_lines you count the number of lines that are terminated with '\n'.
In the following 2 lines, you allocate the memory to hold the result based on the number of lines terminated with '\n'.
int wap_scan_size = wap_scan_count_lines(scan);
scan_result = malloc(wap_scan_size * sizeof(**scan_result));
However, the above quoted documentation for strsep() implies that in your simplified example the first wap_scan_size times strsep is called, at the end of the call the result will not be NULL and scan won't be set to NULL during the call. The next time through the call, scan will be set to NULL during the call but the result will not be NULL. This means that the body of the loop will be executed wap_scan_size + 1 times, causing a write past the end of scan_result.
There are at least two possible fixes, depending on whether you actually want to process any line at the end of the input that is not terminated by '\n'.
If you do need to process such lines, which seems more robust to me, particularly given that your simplified example ends with such a line, just allocate one extra entry in scan_result:
scan_result = malloc((wap_scan_size + 1) * sizeof(**scan_result));
If you are quite sure that you do not need to process such lines, but this seems incorrect to me, change:
while((line = strsep(&scan, "\n")) != NULL ) {
to
for(line = strsep(&scan, "\n"); scan != NULL; line = strsep(&scan, "\n") ) {
I read the previous questions on dynamic arrays in C however I was not able to relate the answers to my question.
I am taking commands from stdin using fgets, removing the newline character and then want to store each command delimited by a space in a dynamically allocated array of strings. I however am having a lot of trouble with the correct way to allocated and reallocate memory. I am compiling with clang and keep getting segmentation fault 11. I then used -fsanitize=address and keep getting:
==2286==ERROR: AddressSanitizer: heap-buffer-overflow on address
0x60200000eeb8 at pc 0x000108fb6f85 bp 0x7fff56c49560 sp 0x7fff56c49558
WRITE of size 8 at 0x60200000eeb8 thread T0
Here is my code:
// Sets a delimiter to split the input
const char *seperator = " ";
char *token = strtok(line, seperator);
char **cmds = (char **) malloc(sizeof(char) * sizeof(*cmds));
// Adds first token to array of delimited commands
cmds[0] = token;
int count = 1;
while (token != NULL) {
token = strtok(NULL, sep);
if (token != NULL) {
cmds = (char **) realloc(cmds, sizeof(char) * (count + 1));
// Adds next token array of delimited commands
cmds[count] = token;
count++;
}
}
You're not allocating enough memory. cmds is an array of pointers, so each element is sizeof(char *) bytes, not sizeof(char) bytes.
On the initial allocation you want 1 char *, then on subsequent allocations you want count + 1.
Also, don't cast the return value of malloc, as that can hide other problems, and don't forget to check for failures.
char **cmds = malloc(sizeof(char *) * 1);
if (cmds == NULL) {
perror("malloc failed");
exit(1);
}
...
cmds = realloc(cmds, sizeof(char *) * (count + 1));
if (cmds == NULL) {
perror("reallocfailed");
exit(1);
}
The first malloc is wrong.. What'll you get when you derefer cmd by *cmd, before it's even allocated?
It also uses sizeof(char), which is wrong..
the right way would be..
// strtok modifies the string. So use writable string
char line[80] = "Hello my name is anand";
char *token = strtok(line, sep);
int count = 0;
// Alloc array of char* for total number of tokens we have right now
char **cmds = (char **) malloc(sizeof(char*) * (count + 1));
while (token != NULL)
{
/**
* Alloc memory for the actual token to be stored..
* token returned by strtok is just reference to the existing string
* in 'line'
*/
cmds[count] = malloc(sizeof(char) * ((strlen(token) + 1)));
// Adds tokens to array of delimited commands
strcpy(cmds[count], token);
count++;
token = strtok(NULL, sep);
if (token != NULL)
{
// resize array of tokens to store an extra token
char ** newCmds = (char **) realloc(cmds, sizeof(char*) * (count + 1));
// only if realloc was successful then use it.
if (newCmds != NULL)
{
cmds = newCmds;
}
}
}
First, sizeof(char) is always 1 by definition. And coding that does not make your code more readable.
But a pointer to char needs sizeof(char*) bytes (depending on the machine & ABI, that is often 8 or 4 bytes). I would at least suggest to compile your code with gcc -Wall -Wextra -g if using GCC.
At last, I find your code a bit inefficient. You are calling realloc at every loop. I would maintain a variable containing the allocated size
int allocsize = 4; // allocated size in number of elements
char **cmds = malloc(allocsize*sizeof(char*));
if (!cmds) { perror("malloc"); exit(EXIT_FAILURE); };
(BTW, always check the result of malloc; it can fail).
And to avoid realloc-ing every time, I would grow the allocated size in a geometric fashion, so inside the loop:
if (count>=allocsize) {
int newallocsize = (4*allocsize)/3+10;
cmds = realloc (cmds, newallocsize*sizeof(char*));
if (!cmds) { perror("realloc"); exit(EXIT_FAILURE); };
allocsize = newallocsize;
}
Alternatively, instead of keeping three variables: cmds, count, allocsize you could use a single struct ending with a flexible array member (and keeping its allocated and used sizes).
I've been trying to get this to work for the past 2 weeks to no avail.
I have a project to create a shell that implements parsing and built-in commands. The issue I'm having is when I pass a char* to my parse function and it returns, when i try to access any part of it, I get a segfault. I've tried different methods including a struct holding a char** all with the same problems, so i'm guessing it's an issue with my parser. I would appreciate any help.
code for parser.c:
#define BUFSIZE 1024
#define TOK_BUFSIZE 64
#define TOK_DELIM " \t\r\n\a"
char*** Parse(char *line0){
char* null_ptr = 0;
char*** cmd = malloc(MAX_SIZE * sizeof(char**));
/*
char arg[] = argument
char* argv[] = argument array
char** cmd[] = array of argument arrays
*/
int bufsize = MAX_SIZE, cmdp = 0, argp = 0, com = FALSE, redir = FALSE;
char *token;
char* line = malloc(100*sizeof(char));
strcpy(line,line0);
token = strtok(line, TOK_DELIM);
while (token){
if (*token == ';'){ // new command string
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[0]=tmp1;
cmd[cmdp] = tmpa;
argp = 0;
cmdp++;
com = FALSE;
redir = FALSE;
}
else if (*token == '>' || *token == '<' || token == ">>"){ // redirects
argp = 0;
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp]=tmp1;
argp++;
printf("Redirect: %s\n",tmp1);
com = FALSE;
redir = TRUE;
}
else if (*token == '|'){ // pipe
printf("PIPE\n");
cmdp++;
argp = 0;
com = FALSE;
}
else if (redir){ // redirect file name
// redirect token stored in arg[]
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp]=tmp1;
cmd[cmdp]=tmpa;
argp = 0;
cmdp++;
redir = FALSE;
com = FALSE;
printf("File: %s\n", token);
}
else if (token == "&") // background
{
cmdp++;
argp = 0;
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[0]=tmp1;
cmd[cmdp]=tmpa;
printf("Background");
}
else if (!com && !redir){ // command entered
argp = 0;
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp] = tmp1;
argp++;
printf("Command %s\n", token);
com = TRUE;
}
else if (com){ // argument to command, all other redirects and pipes taken care of
char* tmp1 = malloc(BUFSIZE * sizeof(char));
char** tmpa = malloc(BUFSIZE * sizeof(char*));
strcpy(tmp1, token);
tmp1[sizeof(token)] = null_ptr;
tmpa[argp] = tmp1;
argp++;
printf("Argument: %s\n", token);
//cmd[cmdp] = argv; // save current working argument array
//cmdp++;
}
// end of if else statements
token = strtok(NULL, TOK_DELIM);
} // end of while
cmdp++;
cmd[cmdp] = NULL;
return &cmd;
}
When I compiled your code on the command-line by typing in:
gcc /path/to/yourcodefilename.c -Wall -Wextra
But replacing /path/to/yourcodefilename.c with the actual filename of the code containing the main function that eventually calls your function (my file is test2.c), I received warnings. The first being:
./test2.c:21: error: 'aaa' undeclared (first use in this function)
./test2.c:21: error: (Each undeclared identifier is reported only once
./test2.c:21: error: for each function it appears in.)
And I received a few of those. "aaa" is replaced by the named of something you used inside your function that has not been previously defined. This includes the word TRUE and FALSE. To correct this, you can use at the top of your program:
#define FALSE n
#define TRUE y
where n and y are numbers representing false and true respectively. Another way to correct it is to include the header files containing definitions for "TRUE" and "FALSE".
The second thing I noticed on a few lines is:
warning: assignment makes integer from pointer without a cast
Make sure you don't convert data over from one type to another. For example, don't set a character variable to a pointer value.
For example, change:
tmp1[sizeof(token)] = null_ptr;
to:
tmp1[sizeof(token)] = '\0';
Because specifying an index to a char* means specifying a char, and null_ptr is of type char* and char* and char are not the same. What i did was assigned a null value that is a char.
I hope that helps you with some troubleshooting
There are several issues here:
You allocate cmd and its subarrays. You return an address to that array at the end of the function. The address has type char ****, which is not the correct return type. What's worse: That address is the address of a local variable, which goes out of scope immediately after returning. Return the handle you got from malloc instead:
char ***Parse(char *line0)
{
char ***cmd = malloc(MAX_SIZE * sizeof(*cmd));
// fill cmd
return cmd;
}
Your code is needlessly long, mostly because you code the steps to allocate memory, copy a string and null-terminate it explicitly. (Others have pointed out that you don't do the null termination properly. You also allocate a fixed size of 1024 bytes regardles of the actual string length, which is quite wasteful.) You could write a function to duplicate strings or use the non-standard, but widely available strdup; this would make your code easier to read.
All the temporary allocations are hard to follow. For example, in the branch if (!com && !redir), you allocate to tmpa, but you never store that value in cmd. The same goes for the redirection branch.
It's also not clear when you start a new command. There should be a new command just before parsing the first token, after encountering a pipe or after encountering a semicolon. You also start new commands for redirections and the background ampersand.
The comparison token == ">>" will always be false: token is an address in line and ">>" is a string literal stored n static memory. You should use strcmp to compare two strings.
In general, you want to allocate a new list when cmdp increases. In that case, argp is reset to zero. Otherwise, you just append to the current command.
I think that you complicate things by treating everything as special. I recommend to simplify the code and leave redirection and the background for the moment. They can easily be resolved when the command is called. (Your code sets the state with redir and com, but it never enforces file names after redirection, for example. You can do that easily when all the tokens are in place.)
The code below treats only pipes and semicolons as command separators. When the command is a pipe, the pipe token is prepended to the following command:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_SIZE 32
#define TOK_DELIM " \t\r\n\a"
char *sdup(const char *str)
{
size_t len = strlen(str);
char *dup = malloc(len + 1);
if (dup) {
memcpy(dup, str, len);
dup[len] = '\0';
}
return dup;
}
char ***parse(char *line0)
{
char *token;
char *line = sdup(line0);
token = strtok(line, TOK_DELIM);
if (token == NULL) return NULL;
char ***cmd = malloc(MAX_SIZE * sizeof(char **));
int cmdp = 0;
int argp = 0;
cmd[0] = malloc(MAX_SIZE * sizeof(*cmd[0]));
while (token) {
if (strcmp(token, ";") == 0 || strcmp(token, "|") == 0) {
// begin new command
cmd[cmdp][argp++] = NULL;
cmdp++;
if (cmdp + 1 == MAX_SIZE) break;
argp = 0;
cmd[cmdp] = malloc(MAX_SIZE * sizeof(*cmd[0]));
// prepend pipe token
if (*token == '|') {
cmd[cmdp][argp++] = sdup(token);
}
} else {
// append to current command
if (argp + 1 < MAX_SIZE) {
cmd[cmdp][argp++] = sdup(token);
}
}
token = strtok(NULL, TOK_DELIM);
}
// null-terminate arg and cmd lists
cmd[cmdp][argp] = NULL;
cmdp++;
cmd[cmdp] = NULL;
return cmd;
}
int main()
{
char ***cmd = parse("echo start ; ls -l | wc > output ; echo stop");
char ***p = cmd;
while (*p) {
char **q = *p;
while (*q) {
printf("'%s' ", *q);
free(*q);
q++;
}
puts("");
free(*p);
p++;
}
free(cmd);
return 0;
}
Further remarks:
I'm not sure whether the current format is suited to the task. It might be better to have a tree structure that takes care of pipes, semicolons and maybe as well &&and || and then have leaf nodes with the commands where the arguments are linked lists.
Tokenisation with strtok requires white-space between all tokens, but punctuation can usually be written without explicit space, e.g.: "./a.out>kk&". So you will need a better method of parsing.
At the moment, you allocate space for each string, which you must free later. If you create a token struct that describes a token as read-only view into the original string you can do without the allocations. The views are not null-terminated, though, so you will need comparison methis that work on starting pointer plus length, for example.
I am trying to write a program that accepts a user string and then reverses the order of the words in the string and prints it. My code works for most tries, however, it seg faults on certain occasions, for the same input.
On stepping through I found that the content of character pointers words[0] and words[1] are getting changed to garbage values/Null.
I set a watch point on one of the word[1] and wprd[0] character pointers that are getting corrupted (incorrect address), and can see that the content of these pointers changes at '_platform_memmove$VARIANT$Unknown () from /usr/lib/system/libsystem_platform.dylib'. I cant figure out how this gets invoked and what's causing the content of the pointers to be overwritten.
I have posted my code below and would like any assistance in figuring out where I am going wrong. I am sorry about the indentation issues.
char* reverseWords(char *s) {
char** words = NULL;
int word_count = 0;
/*Create an array of all the words that appear in the string*/
const char *delim = " ";
char *token;
token = strtok(s, delim);
while(token != NULL){
word_count++;
words = realloc(words, word_count * sizeof(char*));
if(words == NULL){
printf("malloc failed\n");
exit(0);
}
words[word_count - 1] = strdup(token);
token = strtok(NULL, delim);
}
/*Traverse the list backwards and check the words*/
int count = word_count;
char *return_string = malloc(strlen(s) + 1);
if(return_string == NULL){
printf("malloc failed\n");
exit(0);
}
int offset = 0;
while(count > 0){
memcpy((char*)return_string + offset, words[count - 1], strlen(words[count - 1]));
free(words[count - 1]);
offset += strlen(words[count - 1]);
if(count != 1){
return_string[offset] = ' ';
offset++;
}
else {
return_string[offset] = '\0';
}
count--;
}
printf("%s\n",return_string);
free(words);
return return_string;
}
int main(){
char *string = malloc(1000);
if(string == NULL){
printf("malloc failed\n");
exit(0);
}
fgets(string, 1000, stdin);
string[strlen(string)] = '\0';
reverseWords(string);
return 0;
}
The problem is that the line
char *return_string = malloc(strlen(s) + 1);
doesn't allocate nearly enough memory to hold the output. For example, if the input string is "Hello world", you would expect strlen(s) to be 11. However, strlen(s) will actually return 5.
Why? Because strtok modifies the input line. Every time you call strtok, it finds the first delimiter and replaces it with a NUL character. So after the first while loop, the input string looks like this
Hello\0world\0
and calling strlen on that string will return 5.
So, the result_string is too small, and one or more memcpy will write past the end of the string, resulting in undefined behavior, e.g. a segmentation fault. The reason for the error message about memmove: the memcpy function internally invokes memmove as needed.
As #WhozCraig pointed out in the comments, you also need to make sure that you don't access memory after a call to free, so you need to swap these two lines
free(words[count - 1]);
offset += strlen(words[count - 1]);
What's the problem of doing this:
void *educator_func(void *param) {
char *lineE = (char *) malloc (1024);
size_t lenE = 1024;
ssize_t readE;
FILE * fpE;
fpE = fopen(file, "r");
if (fpE == NULL) {
printf("ERROR: couldnt open file\n");
exit(0);
}
while ((readE = getline(&lineE, &lenE, fpE)) != -1) {
char *pch2E = (char *) malloc (50);
pch2E = strtok(lineE, " ");
free(pch2E);
}
free(lineE);
fclose(fpE);
return NULL;
}
If i remove the line 'pch2E = strtok(lineE, " ");' it works fine...
why cant i do a strtok() there ? I tried with strtok_r() also but no luck, it gives me invalid free (Address 0x422af10 is 0 bytes inside a block of size 1,024 free'd)
Your code is not doing what you think it is doing... the call to pch2E = strtok(lineE, " "); is replacing the value of pch2E with the return value of strtok which is either lineE or a newly allocated replacement for lineE
You can fix it as follows...
int firstPass = 1;
while ((readE = getline(&lineE, &lenE, fpE)) != -1)
{
char* pch2E = strtok( firstPass ? lineE : NULL, " ");
firstPass = 0;
}
free(lineE);
I should add, the more I look at your code, the more fundamentally flawed it looks to me. You need an inner loop in your code that deals with tokens while the outer loop is loading lines...
while ((readE = getline(&lineE, &lenE, fpE)) != -1)
{
char* pch2E;
int firstPass = 1;
while( (pch2E = strtok( firstPass ? lineE : NULL, " ")) != NULL )
{
firstPass = 0;
// do something with the pch2E return value
}
}
free(lineE);
strtok returns a pointer to the token, that is included in the string you have passed, so you can't free it, because it doesn't (always) point to something you've allocated with malloc.
That kind of assignment can't even work in C, if you wanted a function that would copy the token into a buffer, it would be something like this:
tokenize(char* string, char* delimiter, char* token);
And you would need to pass a valid pointer to token, for the function to copy the data in. In C to copy the data in the pointer, the function needs access to that pointer so it would be impossible for a function to do it on a return value.
An alternative strategy for that (but worst) would be a function that allocates memory internally and returns a pointer to a memory area that needs to be freed by the caller.
For your problem, strtok needs to be called several times to return all the tokens, until it returns null, so it should be:
while ((readE = getline(&lineE, &lenE, fpE)) != -1) {
char *pch2E;
pch2E = strtok(lineE, " "); //1st token
while ((pch2E = strtok(NULL, " ")) != NULL) {
//Do something with the token
}
}