C spliting strings into an array outputs just the first token - c

I am writing a microshell program as homework at my university.
Everything goes well besides one function that's not doing exactly what I'd wish it did.
I am quite new to C programming, always used higher level languages.
In fact the only times I worked with C was fiddling around with Arduino.
So I've got a line of what user inputs on the prompt. I'm trying to split it into an array of strings separated by space.
I initialized an array with
char **args = NULL;
args = malloc(sizeof(char *) * LINE_LENGTH);
And I'm sending it to a function parse_line(line, args)
The function looks like this:
bool parse_line(char *line, char **arr) {
size_t i = 0;
char *point;
point = strtok(line, " ");
while (point != NULL) {
arr[i] = malloc(strlen(point) + 1);
strcpy(arr[i], point);
point = strtok(NULL, " ");
i++;
}
arr[i] = NULL;
if (!arr)
return false;
return true;
}
The thing is that afterwards in the arr resides only first token from the splitted up line.
I am debugging it and though variable 'point' gets the right values, they aren't copied into my array. Why? I don't know.
Oh.. and the line is an array of chars, dynamic one.
char * line = NULL;
line = read_input_line();
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/wait.h>
#define LINE_LENGTH 50
void clear_screen();
void display_prompt(bool clearscr);
char * read_input_line();
void print_line(char *line);
bool parse_line(char *line, char **arr);
int main() {
bool initialRun = true;
while (true) {
// display prompt on the screen
display_prompt(initialRun);
if (initialRun)
initialRun = false;
// read input line from terminal
char * line = NULL;
line = read_input_line();
// basic commands
if (line == NULL)
continue;
if (strcmp(line, "exit") == 0) {
free(line);
exit(EXIT_SUCCESS);
} else if (strcmp(line, "clear") == 0) {
clear_screen();
continue;
}
// parse line into array
char **args = NULL;
args = malloc(sizeof(char *) * LINE_LENGTH);
if (!parse_line(line, args)) {
printf("Error during parsing command \n");
continue;
}
}
return 0;
}
void clear_screen() {
printf("\e[2J\e[H");
}
void display_prompt(bool clearscr) {
if (clearscr)
clear_screen();
printf(" > ");
}
char * read_input_line() {
char * line = (char *)malloc(sizeof(char) * LINE_LENGTH);
if (!fgets(line, LINE_LENGTH, stdin))
return NULL;
size_t len = strlen(line);
if (len > 0 && line[len-1] == '\n') {
line[--len] = '\0';
}
return line;
}
void print_line(char *line) {
printf("%s \n", line);
}
bool parse_line(char *line, char **arr) {
size_t i = 0;
char *point;
point = strtok(line, " ");
while (point != NULL) {
arr[i] = (char *)malloc(strlen(point) + 1);
strcpy(arr[i], point);
point = strtok(NULL, " ");
i++;
}
arr[i] = NULL;
for (int j=0; j<i; j++) {
printf("%s\n", arr[i]);
}
if (!arr)
return false;
return true;
}
Input:
ls -l -h

arr[i] = NULL;
for (int j=0; j<i; j++) {
printf("%s\n", arr[i]);
}
You are not using j as index so you send NULL to printf(). It's undefined behavior.
I propose you an example of implementation(still not the best but for a beginner that enough):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define LINE_LENGTH 50
char **parse_line(char *line, size_t *n);
int main(void) {
while (true) {
// display prompt on the screen
printf(" > ");
// read input line from terminal
char line[LINE_LENGTH];
if (!fgets(line, sizeof line, stdin)) {
return 1;
}
line[strcspn(line, "\n")] = '\0';
if (strcmp(line, "exit") == 0) {
exit(EXIT_SUCCESS);
} else if (strcmp(line, "clear") == 0) {
printf("\e[2J\e[H");
continue;
}
// parse line into array
size_t n;
char **args = parse_line(line, &n);
if (!args) {
printf("Error during parsing command \n");
continue;
}
for (size_t i = 0; i < n; i++) {
printf("%s\n", args[i]);
}
}
}
char **parse_line(char *line, size_t *n) {
char **arr = malloc(sizeof *arr);
size_t i = 0;
for (char *token = strtok(line, " "); token != NULL; token = strtok(NULL, " ")) {
char **tmp = realloc(arr, sizeof *tmp * (i + 2));
if (tmp == NULL) {
for (size_t j = 0; j < i; j++) {
free(arr[j]);
}
free(arr);
return NULL;
}
arr = tmp;
arr[i] = malloc(strlen(token) + 1);
if (arr[i] == NULL) {
for (size_t j = 0; j < i; j++) {
free(arr[j]);
}
free(arr);
return NULL;
}
strcpy(arr[i], token);
i++;
}
arr[i] = NULL;
*n = i;
return arr;
}

Related

Reading in strings without header <string.h>

I am a newbie in C and for an exercise I have to write a program, where I can read in strings. If my reserved memory (length BUFFER_SIZE) isn't enough, it should reserve memory in increments of +=BUFFER_SIZE, as long as needed to read the string. I tried to write some functions to get this done, but it doesn't work. Can somebody please help me?
My code:
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#define BUFFER_SIZE 10
size_t string_length(char *string)
{
size_t i = 0;
while(string[i] != '\0')
{
i++;
}
return i;
}
void string_concatenate(char *string, char *string_to_chain)
{
size_t length = string_length(string);
for(size_t i = 0; *(string_to_chain + i) != '\0'; i++, length++)
{
string[length] = string_to_chain [i];
}
string[length] = '\0';
}
char *string_search(char *string, char character)
{
do
{
if (*string == character)
{
return (char*)string;
}
} while (*string++);
return NULL;
}
char *get_line()
{
int line_size = BUFFER_SIZE;
char* line = malloc(line_size * sizeof(char));
if(line == NULL)
{
return NULL;
}
printf("Bitte Text eingeben: \n");
fgets(line, BUFFER_SIZE, stdin);
char *new_line_character = string_search(line, '\n');
while(new_line_character == NULL)
{
line_size += BUFFER_SIZE;
line = realloc(line, (line_size * sizeof(char)));
if(line == NULL)
{
return NULL;
}
char *new_line = line + BUFFER_SIZE - 1;
fgets(new_line, line_size, stdin);
new_line_character = string_search(line, '\n');
}
*new_line_character = '\0';
return line;
}
}
int main(void) {
char *string = get_line();
printf("s%\n", string);
}

Invalid write of Size 8 when mallocing array of strings

I am trying to write my own Shell in C. I have a problem. I wrote my own _strtok function that uses strtok but returns all the tokens as an array of strings. For testing I use the string "ls -laR" defined in the main function. I get the valgrind error "Invalid write of size 8" when trying to malloc the number of chars in the second pointer in the array of strings named "Doubl". Why is it doing this? I am allocating the proper number of pointers to strings in the doubl array. Any insight or help would be appreciated
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
char **_strtok(char *str, char *delim)
{
char **doubl;
char *s = str;
char *string;
int i = 0;
while (*s)
{
if (*s == *delim)
i++;
s++;
}
doubl = malloc(sizeof(char *) * i + 1);
i = 0;
string = strtok(str, delim);
while (1)
{
doubl[i] = malloc(sizeof(char) * strlen(string) + 1);
strcpy(doubl[i], string);
i++;
if (string == NULL)
break;
string = strtok(NULL, delim);
}
return (doubl);
}
char *get_path(char **env)
{
char **check = env;
char *path = NULL;
char pth[] = "PATH";
int i, j, stop = 0;
for (i = 0; check[i] && stop == 0; i++)
{
for (j = 0; j < 4 && stop == 0; j++)
{
if (check[i][j] != pth[j])
break;
if (check[i][j] == pth[j] && j == 3)
{
path = malloc(strlen(check[i]));
strcpy(path, check[i]);
stop = 1;
}
}
}
return (path);
}
char **cmd_to_arg(char **cmd, char **env)
{
/* FREE PATH BEFORE END */
char *path = get_path(env);
char *slash = "/";
char **args = NULL, **check = _strtok(path, ":"), **checkStart = check, **cmdStart = cmd;
int status = -1, i = 0, j;
while (*checkStart)
{
strcat(*checkStart, slash);
strcat(*checkStart, cmd[0]);
status = access(*checkStart, F_OK | X_OK);
printf("%s\n", *checkStart);
if (status == 0)
break;
checkStart++;
}
for(;*cmdStart; i++, cmdStart++)
printf("%d\n", i);
args = malloc(sizeof(char *) * i);
args[0] = malloc(strlen(*checkStart));
strcpy(args[0], *checkStart);
puts(args[0]);
for (j = 1; j < i && cmd[j] != NULL; j++)
{
//printf("%d\n", j);
args[j] = malloc(strlen(cmd[j]) * sizeof(char));
strcpy(args[j], cmd[j]);
puts(args[j]);
}
return (args);
}
int main(int ac, char **av, char **env)
{
(void)ac, (void)av, (void)env;
char line[] = "ls laR";
//size_t size = 0;
char **cmd; //**cmdStart;
//int i = 0, j = 0;
cmd = _strtok(line, " ");
cmd = cmd_to_arg(cmd, env);
return (0);
}

Parsing command will only work for one word commands

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>
#include <readline/readline.h>
#include <readline/history.h>
HIST_ENTRY;
void printPrompt()
{
printf("SB> ");
}
char *readCommandLine()
{
char* myComm = (char*)malloc(1024);
scanf("%s", myComm);
return myComm;
}
char *parseCmd(char *myComm, char **tokens, size_t *index)
{
char *tok;
const char *delim = " ";
*index = 0;
if (tok != NULL)
{
tokens[*index] = tok;
(*index)++;
}
else
{
tokens[*index] = "\0";
printf("%s\n", tok);
return;
}
while(tok != NULL)
{
tok = strtok(NULL, delim);
tokens[*index] = tok;
(*index)++;
}
tokens[*index] = NULL;
return myComm;
}
int isInternalCommand(char *c)
{
int value = 0;
if (strcmp(c, "exit") == 0)
{
value = 1;
printf("%s\n", "yes");
}
return value;
}
void execInternalCommand(char *c)
{
int val = 0;
char a[4];
if (strcmp(c,"exit") == 0)
{
val = val + 1;
}
switch(val)
{
case 1:
_exit(0);
break;
default:
break;
}
}
void executeCommand (char * c, char * const arguments)
{
execvp(c, arguments);
}
int main()
{
char *myComm;
char *parsed;
size_t num_args = 100;
char **tokens = malloc(sizeof(char *) * (num_args+1));
size_t *idx = (size_t *) malloc(sizeof(size_t));
pid_t pid, cpid;
int *status;
while(1)
{
printPrompt();
myComm = readCommandLine();
parsed = parseCmd(myComm, tokens, idx);
char *const *args = tokens;
if (isInternalCommand(parsed))
{
execInternalCommand(parsed);
}
else
{
pid = fork();
if (pid == 0)
{
executeCommand(parsed, args);
printf("%s\n", "executing...");
}
else if (pid > 0)
{
waitpid(cpid, &status, 0);
printf("%s\n", "waiting...");
}
else
{
}
}
free(idx);
}
return 0;
}
So, whenever I try and parse a command, it only works for one word commands.
For example, whenever I try mkdir ./something, it displays mkdir with a "missing operand", which means that it will not make the directory. Additionally, whenever I type man something, like man exec, it will tell something like "which man page do you want to look at?" How do I get around this? Looks like this is a parsing issue.
scanf("%s", myComm);
reads a single whitespace delimited word from stdin. As a result, your commands can only be single words. If you want to read a line, you should use fgets, or, better yet, getline, which will call malloc for you:
char *readCommandLine()
{
char* myComm = 0;
size_t buffer_length = 0;
size_t length = getline(&myComm, &buffer_length, stdin);
if (myComm[length] == '\n')
myComm[length--] = 0; /* strip off trailing newline, if any */
return myComm;
}

C - Find a word and get the next two

I am parsing a file in C, line by line. Here is an exemple of what I am trying to do :
I have a line for example :
word word word WORDTOFIND: word1 word2 word word
What I want to do is : When I find the word WORDTOFIND, get the two next words (word1 and word2 in this case) of the line. Is there an easy way to do that in C ? I know about the strstr function, but I don't find a way to get the next two words word1 and word2 after I found the good one.
One approach would be this:
int main(void)
{
char *str = "rated rat cat bat hat";
char *key = "rat ";
char *pointer = NULL;
char nwords = 2;
if ((pointer = strstr(str, key)) != NULL)
{
while (*pointer != ' ') pointer++;
while (nwords >= 0)
{
printf("%c", *pointer);
if (*pointer == ' ') {
nwords--;
} else if (*pointer == '\0') {
exit(0);
}
pointer++;
}
}
}
You can try an aproach like this, using strtok to parse the words at every space. This code also uses malloc and realloc to allocate space for an array of strings, and grows it when needed.
The code looks like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXCHAR 100
void exit_if_null(void *ptr, const char *msg);
char *stripped_word(char *word);
int
main(int argc, char const *argv[]) {
FILE *filename;
char line[MAXCHAR];
char *word, *newword;
char **allwords;
int init_size = 8, count = 0, i;
const char *key = "WORDTOFIND";
filename = fopen("files.txt", "r");
if (filename == NULL) {
fprintf(stderr, "%s\n", "Error reading file!");
exit(EXIT_FAILURE);
}
allwords = malloc(init_size * sizeof(*allwords));
exit_if_null(allwords, "Initial Allocation");
while (fgets(line, MAXCHAR, filename) != NULL) {
word = strtok(line, " \n");
while (word != NULL) {
if (count == init_size) {
init_size *= 2;
allwords = realloc(allwords, init_size * sizeof(*allwords));
}
allwords[count] = malloc(strlen(word)+1);
exit_if_null(allwords[count], "Initial Allocation");
newword = stripped_word(word);
strcpy(allwords[count], newword);
count++;
word = strtok(NULL, " \n");
free(newword);
}
}
for (i = 0; i < count; i++) {
if (strcmp(key, allwords[i]) == 0) {
printf("Next two words:\n");
printf("%s\n", allwords[i+1]);
printf("%s\n", allwords[i+2]);
}
free(allwords[i]);
allwords[i] = NULL;
}
free(allwords);
allwords = NULL;
return 0;
}
void
exit_if_null(void *ptr, const char *msg) {
if (!ptr) {
printf("Unexpected null pointer: %s\n", msg);
exit(EXIT_FAILURE);
}
}
char
*stripped_word(char *word) {
int i, pos = 0;
char *result;
result = malloc(strlen(word)+1);
exit_if_null(result, "Initial Allocation");
for (i = 0; word[i] != '\0'; i++) {
if (isalpha(word[i]) || isdigit(word[i])) {
result[pos++] = word[i];
}
}
result[pos] = '\0';
return result;
}

string operations - weird characters

Description:
I read userinput (e.g "ls -l /") with fgets() and invoke Parse() where it gets seperated ("ls" "-l" "\") for later usage.
The Problem is: the tokens from the the first cycle have weird characters(screenshot below) in it, but from thereon the output is fine.
I tried to initialize both Buffers with zeroes with no change in behaviour. Please explain what is happening in my first output.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/times.h>
#include <sys/wait.h>
#include <signal.h>
#include <errno.h>
typedef char* string;
char inputBuffer[512];
string parse[256];
int j,parseCount;
void Parse(void);
void Parse(void)
{
char buffer[512];
string token;
token = " ";
strcpy(buffer, inputBuffer);
j=0;
parse[j] = strtok (buffer, token);
while (parse[j] != NULL)
{
j++;
parse[j] = strtok (NULL, token);
}
parseCount =j;
}
int main (void)
{
printf(">> ");
fgets(inputBuffer, 512, stdin); /* input buffer, max.Input(char), whereFrom?*/
Parse();
for (j=0;j<parseCount;j++){
printf("[%d] %s\n",j, parse[j]);
}
return main();
}
This line
parse[j] = strtok (buffer, token);
stores memory addresses in buffer, which is local to Parse(). The memory representing buffer is invalidated upon the return of Parse(), so also the addresses stored in parse aren't valid anymore when trying to be dereferenced to print what they refer to.
To fix this have the calling function create a temporary working buffer and pass down to `Parse() a reference to it:
char * parse[256] = 0;
char buffer[512] = "";
size_t parseCount = 0;
void Parse(char * buffer)
{
const char * token = " ";
size_t j = 0;
parse[j] = strtok(buffer, token);
while (parse[j] != NULL)
{
j++;
parse[j] = strtok(NULL, token);
}
parseCount = j;
}
int main(void)
{
fgets(buffer, 512, stdin);
{
char buffer_tmp[512];
strcpy(buffer_tmp, buffer);
Parse(buffer_tmp);
for (size_t j = 0; j < parseCount; j++)
{
printf("[%zu] %s\n", j, parse[j]);
}
}
return 0;
}
As I don't like the globals, I'd prefer the following:
#include <stdio.h>
#include <string.h>
size_t parse(char * buffer, char ** parse)
{
const char * token = " ";
size_t j = 0;
parse[j] = strtok(buffer, token);
while (parse[j] != NULL)
{
j++;
parse[j] = strtok(NULL, token);
}
return j;
}
#define IN_MAX (512 + 1 + 1)
int main(void)
{
char buffer[IN_MAX] = "";
if (NULL != fgets(buffer, IN_MAX, stdin))
{
char buffer_tmp[IN_MAX];
strcpy(buffer_tmp, buffer);
{
size_t parse_count = 0;
char * parse[IN_MAX/2 + 1] = 0;
size_t parse_count = parse(buffer_tmp, parse);
for (size_t j = 0; j < parse_count; j++)
{
printf("[%zu] %s\n", j, parse[j]);
}
}
}
else if (ferror())
{
fprintf(stderr, "Error reading from inout stream.\n");
}
return 0;
}

Resources