Unexpected behavior with my token function - c

I am trying to write a simple Shell in C. Eventually, I will implement forking of processes and piping etc. But, right now, I'm just trying to get all the logic worked out.
I have a partially working shell: When I type exit it exits... however, my token function doesn't seem to be working right.
What am I doing wrong here? I'm not sure why its seg-faulting.
Token prints out once in the while loop and then it seg-faults and crashes.
#include <stdio.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_BUF_SZ 1024
void checkForPipe(char *string, bool *pipe_bool);
void checkForRedirect(char *string, bool *redirect_bool);
void tokenizeInput(char *string, bool pipe, bool redirect);
int main()
{
char *ptr;
bool is_pipe = 0;
bool is_redirect_out = 0;
bool is_exit = 0;
ptr = (char*)malloc(MAX_BUF_SZ);
while(!is_exit)
{
// Diplay prompt
char cur_dir[MAX_BUF_SZ];
getcwd(cur_dir, MAX_BUF_SZ);
printf("SHELL:%s$ ", cur_dir);
fgets(ptr, MAX_BUF_SZ, stdin);
checkForPipe(ptr, &is_pipe);
checkForRedirect(ptr, &is_redirect_out);
printf("pipe flag = %d\n", is_pipe);
printf("redirect flag = %d\n", is_redirect_out);
if(strcmp(ptr, "exit\n") == 0)
{
is_exit = 1;
}
tokenizeInput(ptr, is_pipe, is_redirect_out);
}
return 0;
}
void checkForPipe(char *string, bool *pipe_bool)
{
char *check_for_pipes;
char *clean_compare;
check_for_pipes = (char*)malloc(MAX_BUF_SZ);
clean_compare = (char*)malloc(MAX_BUF_SZ);
strcpy(check_for_pipes, string);
strcpy(clean_compare, string);
char * token = strtok(check_for_pipes, "|");
if(strcmp(token, clean_compare) == 0)
{
free(clean_compare);
free(check_for_pipes);
}
else
{
*pipe_bool = 1;
free(clean_compare);
free(check_for_pipes);
}
}
void checkForRedirect(char *string, bool *redirect_bool)
{
char *check_for_redirects;
char *clean_compare;
check_for_redirects = (char*)malloc(MAX_BUF_SZ);
clean_compare = (char*)malloc(MAX_BUF_SZ);
strcpy(check_for_redirects, string);
strcpy(clean_compare, string);
char * token = strtok(check_for_redirects, ">");
if(strcmp(token, clean_compare) == 0)
{
free(clean_compare);
free(check_for_redirects);
}
else
{
*redirect_bool = 1;
free(clean_compare);
free(check_for_redirects);
}
}
void tokenizeInput(char *string, bool pipe, bool redirect)
{
char *copy_string;
copy_string = (char*)malloc(MAX_BUF_SZ);
strcpy(copy_string, string);
if(pipe == 0 && redirect == 0)
{
char **args = {NULL};
char *token = strtok(copy_string, " ");
int i = 0;
printf("%s\n", token);
while(token != NULL)
{
args[i] = token;
strtok(NULL, " ");
printf("%s\n", token);
i++;
}
}
/* printf("%s\n%s\n%s\n", args[0], args[1], args[2]); */
}

The problem is on args[i]
I modified your code as follows:
Supposing you have a pre-known number of token which is MAX_BUF_SZ.
You allocate MAX_BUF_SZ pointers of type char*
char **args = malloc(MAX_BUF_SZ * sizeof(char *));
and in the loop, you still have to allocate each pointer char* before using it:
while(token != NULL)
{
args[i] = (char *)malloc(strlen(token)+1);
printf("%s\n", token);
args[i] = token;
token = strtok(NULL, " ");
i++;
}
The whole functions is like this:
void tokenizeInput(char *string, bool pipe, bool redirect)
{
char *copy_string;
copy_string = (char*)malloc(MAX_BUF_SZ);
strcpy(copy_string, string);
// suppose we would have MAX_BUF_SZ tokens
char **args = malloc(MAX_BUF_SZ * sizeof(char *));
if(pipe == 0 && redirect == 0)
{
char *token = strtok(copy_string, " ");
int i = 0;
//printf("token %s\n", token);
while(token != NULL)
{
args[i] = (char *)malloc(strlen(token)+1);
printf("%s\n", token);
args[i] = token;
token = strtok(NULL, " ");
i++;
}
}
/* printf("%s\n%s\n%s\n", args[0], args[1], args[2]); */
}
Here is my example running :
SHELL:D:\Users\T0180694\Documents\Mes Outils Personnels\PAN\PAN_folder$ test is essai of you and me
pipe flag = 0
redirect flag = 0
test
is
essai
of
you
and
me

Related

Parsing command will only work for one word commands

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>
#include <readline/readline.h>
#include <readline/history.h>
HIST_ENTRY;
void printPrompt()
{
printf("SB> ");
}
char *readCommandLine()
{
char* myComm = (char*)malloc(1024);
scanf("%s", myComm);
return myComm;
}
char *parseCmd(char *myComm, char **tokens, size_t *index)
{
char *tok;
const char *delim = " ";
*index = 0;
if (tok != NULL)
{
tokens[*index] = tok;
(*index)++;
}
else
{
tokens[*index] = "\0";
printf("%s\n", tok);
return;
}
while(tok != NULL)
{
tok = strtok(NULL, delim);
tokens[*index] = tok;
(*index)++;
}
tokens[*index] = NULL;
return myComm;
}
int isInternalCommand(char *c)
{
int value = 0;
if (strcmp(c, "exit") == 0)
{
value = 1;
printf("%s\n", "yes");
}
return value;
}
void execInternalCommand(char *c)
{
int val = 0;
char a[4];
if (strcmp(c,"exit") == 0)
{
val = val + 1;
}
switch(val)
{
case 1:
_exit(0);
break;
default:
break;
}
}
void executeCommand (char * c, char * const arguments)
{
execvp(c, arguments);
}
int main()
{
char *myComm;
char *parsed;
size_t num_args = 100;
char **tokens = malloc(sizeof(char *) * (num_args+1));
size_t *idx = (size_t *) malloc(sizeof(size_t));
pid_t pid, cpid;
int *status;
while(1)
{
printPrompt();
myComm = readCommandLine();
parsed = parseCmd(myComm, tokens, idx);
char *const *args = tokens;
if (isInternalCommand(parsed))
{
execInternalCommand(parsed);
}
else
{
pid = fork();
if (pid == 0)
{
executeCommand(parsed, args);
printf("%s\n", "executing...");
}
else if (pid > 0)
{
waitpid(cpid, &status, 0);
printf("%s\n", "waiting...");
}
else
{
}
}
free(idx);
}
return 0;
}
So, whenever I try and parse a command, it only works for one word commands.
For example, whenever I try mkdir ./something, it displays mkdir with a "missing operand", which means that it will not make the directory. Additionally, whenever I type man something, like man exec, it will tell something like "which man page do you want to look at?" How do I get around this? Looks like this is a parsing issue.
scanf("%s", myComm);
reads a single whitespace delimited word from stdin. As a result, your commands can only be single words. If you want to read a line, you should use fgets, or, better yet, getline, which will call malloc for you:
char *readCommandLine()
{
char* myComm = 0;
size_t buffer_length = 0;
size_t length = getline(&myComm, &buffer_length, stdin);
if (myComm[length] == '\n')
myComm[length--] = 0; /* strip off trailing newline, if any */
return myComm;
}

C spliting strings into an array outputs just the first token

I am writing a microshell program as homework at my university.
Everything goes well besides one function that's not doing exactly what I'd wish it did.
I am quite new to C programming, always used higher level languages.
In fact the only times I worked with C was fiddling around with Arduino.
So I've got a line of what user inputs on the prompt. I'm trying to split it into an array of strings separated by space.
I initialized an array with
char **args = NULL;
args = malloc(sizeof(char *) * LINE_LENGTH);
And I'm sending it to a function parse_line(line, args)
The function looks like this:
bool parse_line(char *line, char **arr) {
size_t i = 0;
char *point;
point = strtok(line, " ");
while (point != NULL) {
arr[i] = malloc(strlen(point) + 1);
strcpy(arr[i], point);
point = strtok(NULL, " ");
i++;
}
arr[i] = NULL;
if (!arr)
return false;
return true;
}
The thing is that afterwards in the arr resides only first token from the splitted up line.
I am debugging it and though variable 'point' gets the right values, they aren't copied into my array. Why? I don't know.
Oh.. and the line is an array of chars, dynamic one.
char * line = NULL;
line = read_input_line();
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#define LINE_LENGTH 50
void clear_screen();
void display_prompt(bool clearscr);
char * read_input_line();
void print_line(char *line);
bool parse_line(char *line, char **arr);
int main() {
bool initialRun = true;
while (true) {
// display prompt on the screen
display_prompt(initialRun);
if (initialRun)
initialRun = false;
// read input line from terminal
char * line = NULL;
line = read_input_line();
// basic commands
if (line == NULL)
continue;
if (strcmp(line, "exit") == 0) {
free(line);
exit(EXIT_SUCCESS);
} else if (strcmp(line, "clear") == 0) {
clear_screen();
continue;
}
// parse line into array
char **args = NULL;
args = malloc(sizeof(char *) * LINE_LENGTH);
if (!parse_line(line, args)) {
printf("Error during parsing command \n");
continue;
}
}
return 0;
}
void clear_screen() {
printf("\e[2J\e[H");
}
void display_prompt(bool clearscr) {
if (clearscr)
clear_screen();
printf(" > ");
}
char * read_input_line() {
char * line = (char *)malloc(sizeof(char) * LINE_LENGTH);
if (!fgets(line, LINE_LENGTH, stdin))
return NULL;
size_t len = strlen(line);
if (len > 0 && line[len-1] == '\n') {
line[--len] = '\0';
}
return line;
}
void print_line(char *line) {
printf("%s \n", line);
}
bool parse_line(char *line, char **arr) {
size_t i = 0;
char *point;
point = strtok(line, " ");
while (point != NULL) {
arr[i] = (char *)malloc(strlen(point) + 1);
strcpy(arr[i], point);
point = strtok(NULL, " ");
i++;
}
arr[i] = NULL;
for (int j=0; j<i; j++) {
printf("%s\n", arr[i]);
}
if (!arr)
return false;
return true;
}
Input:
ls -l -h
arr[i] = NULL;
for (int j=0; j<i; j++) {
printf("%s\n", arr[i]);
}
You are not using j as index so you send NULL to printf(). It's undefined behavior.
I propose you an example of implementation(still not the best but for a beginner that enough):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define LINE_LENGTH 50
char **parse_line(char *line, size_t *n);
int main(void) {
while (true) {
// display prompt on the screen
printf(" > ");
// read input line from terminal
char line[LINE_LENGTH];
if (!fgets(line, sizeof line, stdin)) {
return 1;
}
line[strcspn(line, "\n")] = '\0';
if (strcmp(line, "exit") == 0) {
exit(EXIT_SUCCESS);
} else if (strcmp(line, "clear") == 0) {
printf("\e[2J\e[H");
continue;
}
// parse line into array
size_t n;
char **args = parse_line(line, &n);
if (!args) {
printf("Error during parsing command \n");
continue;
}
for (size_t i = 0; i < n; i++) {
printf("%s\n", args[i]);
}
}
}
char **parse_line(char *line, size_t *n) {
char **arr = malloc(sizeof *arr);
size_t i = 0;
for (char *token = strtok(line, " "); token != NULL; token = strtok(NULL, " ")) {
char **tmp = realloc(arr, sizeof *tmp * (i + 2));
if (tmp == NULL) {
for (size_t j = 0; j < i; j++) {
free(arr[j]);
}
free(arr);
return NULL;
}
arr = tmp;
arr[i] = malloc(strlen(token) + 1);
if (arr[i] == NULL) {
for (size_t j = 0; j < i; j++) {
free(arr[j]);
}
free(arr);
return NULL;
}
strcpy(arr[i], token);
i++;
}
arr[i] = NULL;
*n = i;
return arr;
}

C - Find a word and get the next two

I am parsing a file in C, line by line. Here is an exemple of what I am trying to do :
I have a line for example :
word word word WORDTOFIND: word1 word2 word word
What I want to do is : When I find the word WORDTOFIND, get the two next words (word1 and word2 in this case) of the line. Is there an easy way to do that in C ? I know about the strstr function, but I don't find a way to get the next two words word1 and word2 after I found the good one.
One approach would be this:
int main(void)
{
char *str = "rated rat cat bat hat";
char *key = "rat ";
char *pointer = NULL;
char nwords = 2;
if ((pointer = strstr(str, key)) != NULL)
{
while (*pointer != ' ') pointer++;
while (nwords >= 0)
{
printf("%c", *pointer);
if (*pointer == ' ') {
nwords--;
} else if (*pointer == '\0') {
exit(0);
}
pointer++;
}
}
}
You can try an aproach like this, using strtok to parse the words at every space. This code also uses malloc and realloc to allocate space for an array of strings, and grows it when needed.
The code looks like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXCHAR 100
void exit_if_null(void *ptr, const char *msg);
char *stripped_word(char *word);
int
main(int argc, char const *argv[]) {
FILE *filename;
char line[MAXCHAR];
char *word, *newword;
char **allwords;
int init_size = 8, count = 0, i;
const char *key = "WORDTOFIND";
filename = fopen("files.txt", "r");
if (filename == NULL) {
fprintf(stderr, "%s\n", "Error reading file!");
exit(EXIT_FAILURE);
}
allwords = malloc(init_size * sizeof(*allwords));
exit_if_null(allwords, "Initial Allocation");
while (fgets(line, MAXCHAR, filename) != NULL) {
word = strtok(line, " \n");
while (word != NULL) {
if (count == init_size) {
init_size *= 2;
allwords = realloc(allwords, init_size * sizeof(*allwords));
}
allwords[count] = malloc(strlen(word)+1);
exit_if_null(allwords[count], "Initial Allocation");
newword = stripped_word(word);
strcpy(allwords[count], newword);
count++;
word = strtok(NULL, " \n");
free(newword);
}
}
for (i = 0; i < count; i++) {
if (strcmp(key, allwords[i]) == 0) {
printf("Next two words:\n");
printf("%s\n", allwords[i+1]);
printf("%s\n", allwords[i+2]);
}
free(allwords[i]);
allwords[i] = NULL;
}
free(allwords);
allwords = NULL;
return 0;
}
void
exit_if_null(void *ptr, const char *msg) {
if (!ptr) {
printf("Unexpected null pointer: %s\n", msg);
exit(EXIT_FAILURE);
}
}
char
*stripped_word(char *word) {
int i, pos = 0;
char *result;
result = malloc(strlen(word)+1);
exit_if_null(result, "Initial Allocation");
for (i = 0; word[i] != '\0'; i++) {
if (isalpha(word[i]) || isdigit(word[i])) {
result[pos++] = word[i];
}
}
result[pos] = '\0';
return result;
}

string operations - weird characters

Description:
I read userinput (e.g "ls -l /") with fgets() and invoke Parse() where it gets seperated ("ls" "-l" "\") for later usage.
The Problem is: the tokens from the the first cycle have weird characters(screenshot below) in it, but from thereon the output is fine.
I tried to initialize both Buffers with zeroes with no change in behaviour. Please explain what is happening in my first output.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/times.h>
#include <sys/wait.h>
#include <signal.h>
#include <errno.h>
typedef char* string;
char inputBuffer[512];
string parse[256];
int j,parseCount;
void Parse(void);
void Parse(void)
{
char buffer[512];
string token;
token = " ";
strcpy(buffer, inputBuffer);
j=0;
parse[j] = strtok (buffer, token);
while (parse[j] != NULL)
{
j++;
parse[j] = strtok (NULL, token);
}
parseCount =j;
}
int main (void)
{
printf(">> ");
fgets(inputBuffer, 512, stdin); /* input buffer, max.Input(char), whereFrom?*/
Parse();
for (j=0;j<parseCount;j++){
printf("[%d] %s\n",j, parse[j]);
}
return main();
}
This line
parse[j] = strtok (buffer, token);
stores memory addresses in buffer, which is local to Parse(). The memory representing buffer is invalidated upon the return of Parse(), so also the addresses stored in parse aren't valid anymore when trying to be dereferenced to print what they refer to.
To fix this have the calling function create a temporary working buffer and pass down to `Parse() a reference to it:
char * parse[256] = 0;
char buffer[512] = "";
size_t parseCount = 0;
void Parse(char * buffer)
{
const char * token = " ";
size_t j = 0;
parse[j] = strtok(buffer, token);
while (parse[j] != NULL)
{
j++;
parse[j] = strtok(NULL, token);
}
parseCount = j;
}
int main(void)
{
fgets(buffer, 512, stdin);
{
char buffer_tmp[512];
strcpy(buffer_tmp, buffer);
Parse(buffer_tmp);
for (size_t j = 0; j < parseCount; j++)
{
printf("[%zu] %s\n", j, parse[j]);
}
}
return 0;
}
As I don't like the globals, I'd prefer the following:
#include <stdio.h>
#include <string.h>
size_t parse(char * buffer, char ** parse)
{
const char * token = " ";
size_t j = 0;
parse[j] = strtok(buffer, token);
while (parse[j] != NULL)
{
j++;
parse[j] = strtok(NULL, token);
}
return j;
}
#define IN_MAX (512 + 1 + 1)
int main(void)
{
char buffer[IN_MAX] = "";
if (NULL != fgets(buffer, IN_MAX, stdin))
{
char buffer_tmp[IN_MAX];
strcpy(buffer_tmp, buffer);
{
size_t parse_count = 0;
char * parse[IN_MAX/2 + 1] = 0;
size_t parse_count = parse(buffer_tmp, parse);
for (size_t j = 0; j < parse_count; j++)
{
printf("[%zu] %s\n", j, parse[j]);
}
}
}
else if (ferror())
{
fprintf(stderr, "Error reading from inout stream.\n");
}
return 0;
}

String parse in C

Is there a better of parsing the below string instead of doing a strtok() to get each field.
"subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000"
Basically I want to retrieve the value for each field into another char buf's.
Here is my code. Just wanted to know if there is any other better way of doing it (any better string parsing algos)
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#define SUBJECT "subject="
#define CC_LIST "cc="
#define SERVER "server="
static void
get_value (const char *tok, char **rval_buf, size_t field_len)
{
size_t val_size = 0;
if (!tok || !rval_buf)
return;
val_size = strlen(tok + field_len) + 1;
*rval_buf = calloc(1, val_size);
if (*rval_buf) {
strlcpy(*rval_buf, tok + field_len, val_size);
}
}
int
main (int argc, char **argv)
{
/* hard coded buf for testing */
char buf[] = "subject=what&cc=bose#yahoo.com&server=smtp.yahoo.com:8000";
char *subject_text = NULL;
char *cc_list = NULL;
char *server_addr = NULL;
char *tok = NULL;
int field_len = 0;
int val_len = 0;
tok = strtok(buf, "&");
while(tok) {
/*
* Handle the token
*/
/* check if it is subject */
if (strstr(tok, SUBJECT)) {
get_value(tok, &subject_text, strlen(SUBJECT));
} else if (strstr(tok, CC_LIST)) { /* check if it is CC */
get_value(tok, &cc_list, strlen(CC_LIST));
} else if (strstr(tok, SERVER)) { /* check if it is server */
get_value(tok, &server_addr, strlen(SERVER));
}
tok = strtok(NULL, "&");
}
/* dump data */
fprintf(stdout, "\nSUBJECT: \"%s\"\nCC_LIST: \"%s\"\nSERVER: \"%s\" \n\n",
subject_text, cc_list, server_addr);
return EXIT_SUCCESS;
}
strstr searches for one string ("the needle") inside another ("the haystack"), but you really only want to know whether the needle is the beginning of the haystack.
Here's a small suggestion: (requires #include <stdbool> or change the booleans to ints. I like bools.)
static bool
getval(const char* haystack, const char** res, const char* needle, size_t len) {
if (haystack && 0 == strncmp(haystack, needle, len)) {
*res = strdup(haystack + len);
return true;
}
return false;
}
and later:
for (tok = strtok(buf, "&"); tok; tok = strtok(NULL, "&")) {
getval(tok, &subject_text, SUBJECT, strlen(SUBJECT)) ||
getval(tok, &cc_list, CC_LIST, strlen(CC_LIST)) ||
getval(tok, &server_addr, SERVER, strlen(SERVER));
}
You can actually get away with doing the strlen inside of getval, which cuts down a lot on the noise, because most modern compilers are clever enough to inline getval and constant-fold the length of a constant string.
Use strtok()
char *strtok(char *str, const char *delim)
You can put '&' as a delimeter
I wrote a quick-n-dirty splitter for you:
int split(char* input, char delim, char*** parts)
{
int count = 1;
char** result;
char* t = input;
while(*t != '\0')
{
if (*t++ == delim)
{
count++;
}
}
result = (char**)malloc(count * sizeof(char*));
t = input;
int i = 0;
result[i] = input;
while(*t != '\0')
{
if (*t == delim)
{
*t = '\0';
result[++i] = ++t;
}
else
{
t++;
}
}
*parts = result;
return count;
}
int main()
{
char raw[] = "subject=\"some text\"&cc=abcd&server=acd.com";
char* str = _strdup(raw);
char** parts;
char** keyval;
int cnt = split(str, '&', &parts);
for(int i=0; i<cnt; ++i)
{
split(parts[i], '=', &keyval);
printf("[%d]: %s <--> %s\n", i, keyval[0], keyval[1]);
free(keyval);
}
free(parts);
getchar();
return 0;
}
Output
[0]: subject <--> "some text"
[1]: cc <--> abcd
[2]: server <--> acd.com

Resources