Parsing array of chars into ***char - c

I've a array of chars as a result of gets() (which is a command inputed to my shell), for example "ls -l \ | sort". Now what I want to have is a char*** that would hold pointers to particular commands (so split by |). With my example:
*1[] = {"ls", "-l", "\", null}
*2[] = {"sort", null}
and my char*** would be {1,2}. The thing is, I don't know how many strings will be given to me in this array of characters, so I can't predefine that. What I have now is just splitting the array of chars into words by whitespaces and I can't figure out how to do what I actually need.
Also in my input/output above the function should react the same to "ls -l \|sort" and "ls -l \ | sort"
My code so far:
int parse(char *line, char **argv)
{
while (*line != '\0') {
while (*line == ' ' || *line == '\t' || *line == '\n')
*line++ = '\0';
*argv++ = line;
while (*line != '\0' && *line != ' ' && *line != '\t' && *line != '\n'){
line++;
}
}
*argv = '\0';
return 0;
}

To split a line, use strtok(). You can use this function with | as delimiter and then with :
#include <string.h>
int parse(char *line, char **argv, const char *delim)
{
int word_counter = 0
/* get the first word */
char *word = strtok(line, delim);
/* walk through other words */
while (word != NULL)
{
printf(" %s\n", word);
word_counter++;
/* save word somewhere */
word = strtok(NULL, delim);
}
printf("This string contains %d words separated with %s\n",word_counter,delim);
}

Related

how can i parse this file in c

how can split the word from its meaning
1. mammoth: large
My code:
void ReadFromFile(){
FILE *dictionary = fopen("dictionary.txt", "r");
char word[20];
char meaning[50];
while(fscanf(dictionary, "%[^:]:%[^\t]\t", word, meaning) == 2){
printf("%s %s\n", word, meaning);
}
fclose(dictionary);
Assuming the word and the meaning do not contain digits and dots,
my approach is the following:
First, split the input line on the digits and dots into the tokens which
have the form as word: meaning.
Next separate each token on the colon character.
As a finish up, remove the leading and trailing blank characters.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define INFILE "dictionary.txt"
void split(char *str);
void separate(char *str);
char *trim(char *str);
/*
* split line on serial number into "word" and "meaning" pairs
* WARNING: the array of "str" is modified
*/
void
split(char *str)
{
char *tk; // pointer to each token
char delim[] = "0123456789."; // characters used in the serial number
tk = strtok(str, delim); // get the first token
while (tk != NULL) {
separate(tk); // separate each token
tk = strtok(NULL, delim); // get the next token
}
}
/*
* separate the pair into "word" and "meaning" and print them
*/
void
separate(char *str)
{
char *p;
if (NULL == (p = index(str, ':'))) {
// search a colon character in "str"
fprintf(stderr, "Illegal format: %s\n", str);
exit(1);
}
*p++ = '\0'; // terminate the "word" string
// now "p" points to the start of "meaning"
printf("%s %s\n", trim(str), trim(p));
}
/*
* remove leading and trailing whitespaces
* WARNING: the array of "str" is modified
*/
char *
trim(char *str)
{
char *p;
for (p = str; *p != '\0'; p++); // jump to the end of "str"
for (; p > str && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n' || *p == '\0'); p--);
// rewind the pointer skipping blanks
*++p = '\0'; // chop the trailing blanks off
for (p = str; *p != '\0' && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'); p++);
// skip leading blanks
return p;
}
int
main()
{
FILE *fp;
char str[BUFSIZ];
if (NULL == (fp = fopen(INFILE, "r"))) {
perror(INFILE);
exit(1);
}
while (NULL != fgets(str, BUFSIZ, fp)) {
split(trim(str));
}
fclose(fp);
return 0;
}
Output:
foe enemy
vast huge
purchase buy
drowsy sleepy
absent missing
prank trick
[snip]
[Alternative]
I suppose C may not be a suitable language for this kind of string manipulations. High-level languages such as python, perl or ruby will solve it with much fewer codes. Here is an example with python which will produce the same results:
import re
with open("dictionary.txt") as f:
s = f.read()
for m in re.finditer(r'\d+\.\s*(.+?):\s*(\S+)', s):
print(m.group(1) + " " + m.group(2))

What might be wrong, ls doesn't work on my own shell

I have to write my own shell, and it's almost complete but I have a problem with ls/pwd
ls is giving me this error: cannot access'': No such file or directory
pwd is working in some way, but still it is giving me error:ignoring non-option arguments
https://i.stack.imgur.com/3IpRd.png
What might be the problem?
#include <sys/types.h>
#include <sys/wait.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
void parsowanie(char *line, char **argv)
{
while (*line != '\0') {
while (*line == ' ' || *line == '\n'|| *line == '\t' )
*line++ = '\0';
*argv++ = line;
while (*line != '\0' && *line!= ' ' && *line != '\t' && *line != '\n')
line++;
}
*argv = '\0';
}
int main()
{
printf("MicroShell\nAutor:Amadeusz Lewandowski\n");
char co_chcesz[500],cwd[500],login[50],*argv[1000],*to;
memset(argv,0,999);
pid_t pid;
int status;
while(strcmp(co_chcesz,"exit\n"))
{
sleep(1);
getcwd(cwd,sizeof(cwd));
getlogin_r(login,sizeof(login));
printf("[%s:%s]\n$ ",login,cwd);
fgets(co_chcesz,1000,stdin);
if (strcmp(co_chcesz,"help\n")==0)
{
printf("autor to miranda orange\n");
continue;
}
pid=fork();
if(pid==0)
{
if (execvp(*argv,argv)<0)
{
perror("The following error occurred");
printf("Value of errno:%d\n",errno);
break;
}
else
{
waitpid(pid,&status,0);
}
}
}
return 0;
}
Your command-line splitting function is wrong. Reformatted here to better show how it was implemented:
void parsowanie(char *line, char **argv)
{
while (*line != '\0') {
/* Eating whitespace */
while (*line == ' ' || *line == '\n'|| *line == '\t' )
*line++ = '\0';
/* Adding next command line argument to `argv` */
*argv++ = line;
while (*line != '\0' && *line!= ' ' && *line != '\t' && *line != '\n')
line++;
}
*argv = '\0';
}
The problem is that you add the next command line argument to argv before you actually know if there is a next command line argument. So we have to add a test before *argv++ = line;. The test in the while statement now becomes unnecessary.
void parsowanie(char *line, char **argv)
{
while (1) {
/* Eating whitespace */
while (*line == ' ' || *line == '\n'|| *line == '\t' )
*line++ = '\0';
/* Checking if there are more command line arguments */
if (!*line)
{
/* Terminating `argv` */
*argv = NULL;
return;
}
/* Adding next command line argument to `argv` */
*argv++ = line;
while (*line != '\0' && *line!= ' ' && *line != '\t' && *line != '\n')
line++;
}
}

two C functions trying to return the first word in a string

I have created the two following functions. The first, eatWrd, returns the first word in a string without any white spaces, and removes the first word from the input string:
MAX is a number representing the max length of a string
char* eatWrd(char * cmd)
{
int i = 0; //i will hold my place in cmd
int count = 0; //count will hold the position of the second word
int fw = 0; //fw will hold the position of the first word
char rest[MAX]; // rest will hold cmd without the first word
char word[MAX]; // word will hold the first word
// start by removing initial white spaces
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
count++;
fw++;
}
// now start reading the first word until white spaces or terminating characters
while(cmd[i] != ' ' && cmd[i] != '\t' && cmd[i] != '\n' && cmd[i] != '\0'){
word[i-fw] = cmd[i];
i++;
count++;
}
word[i-fw] = '\0';
// now continue past white spaces after the first word
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
count++;
}
// finally save the rest of cmd
while(cmd[i] != '\n' && cmd[i] != '\0'){
rest[i-count] = cmd[i];
i++;
}
rest[i-count] = '\0';
// reset cmd, and copy rest back into it
memset(cmd, 0, MAX);
strcpy(cmd, rest);
// return word as a char *
char *ret = word;
return ret;
}
The second, frstWrd, just returns the first word without modifying the input string:
// this function is very similar to the first without modifying cmd
char* frstWrd(char * cmd)
{
int i = 0;
int fw = 0;
char word[MAX];
while(cmd[i] == ' ' || cmd[i] == '\t'){
i++;
fw++;
}
while(cmd[i] != ' ' && cmd[i] != '\t' && cmd[i] != '\n' && cmd[i] != '\0'){
word[i-fw] = cmd[i];
i++;
}
word[i-fw] = '\0';
char *ret = word;
return ret;
}
To test the function, I used fgets to read a string from the User(me), and then I printed three strings (frstWrd(input), eatWrd(input), eatWrd(input)). I would have expected that given a string, "my name is tim" for example, the program would print "my my name", but instead it prints the third word three times over, "is is is":
// now simply test the functions
main()
{
char input[MAX];
fgets(input, MAX - 1, stdin);
printf("%s %s %s", frstWrd(input), eatWrd(input), eatWrd(input));
}
I have looked over my functions over and over and cannot see the mistake. I believe there is simply something I don't know about printf, or about using multiple string modification functions as arguments in another function. Any insight would be helpful thanks.
As I see rest and word are local variables in the function eatWrd. So it is bad practice to return pointer to such memory outside functions.
EDIT 1:
Also you should understand, that in line
printf("%s %s %s", frstWrd(input), eatWrd(input), eatWrd(input));
function eatWrd(input) could be called the first (before frstWrd(input)).
EDIT 2:
This can be usefull in finction eatWrd
//char rest[MAX]; // rest will hold cmd without the first word
char * rest = (char*) malloc(MAX);
And new main let be as:
int main()
{
char input[MAX];
fgets(input, MAX - 1, stdin);
printf("%s ", frstWrd(input));
printf("%s ", eatWrd(input));
printf("%s\n", eatWrd(input));
}
And in the end my solution for frstWrd (just to show how standard functions can be useful):
char* frstWrd(char * cmd)
{
char * word = (char *) malloc(MAX);
sscanf(cmd, "%s", word);
return word;
}

Seg fault while parsing a line into ***char

My code is supposed to parse an array of chars into ***char, so that it splits it first by '|' char and then by whitespaces, newline characters etc into words. Sample i/o:
I = ls -l | sort | unique
O =
*cmds[1] = {"ls", "-l", NULL};
*cmds[2] = {"sort", NULL};
*cmds[3] = {"unique", NULL};
above are pointers to char arrays, so split by words and then below is ***char with pointers to above pointers
char **cmds[] = {1, 2, 3, NULL};
Now, I don't see my mistake (probably because I am not so skilled in C), but program gives segfault the second I call parse(..) function from inside parsePipe(). Can anyone please help?
void parse(char *line, char **argv)
{
while (*line != '\0') {
while (*line == ' ' || *line == '\t' || *line == '\n')
*line++ = '\0';
*argv++ = line;
while (*line != '\0' && *line != ' ' && *line != '\t' && *line != '\n'){
line++;
}
}
*argv = '\0';
}
void parsePipe(char *line, char ***cmds)
{
char *cmd = strtok(line, "|");
int word_counter = 0;
while (cmd != NULL)
{
printf("Printing word -> %s\n", cmd);
word_counter++;
parse(cmd, *cmds++);
cmd = strtok(NULL, "|");
}
printf("This string contains %d words separated with |\n",word_counter);
}
void main(void)
{
char line[1024];
char **cmds[64];
while (1) {
printf("lsh -> ");
gets(line);
printf("\n");
parsePipe(line, cmds);
}
}
[too long for a comment]
This line
*argv++ = line; /* with char ** argv */
refers to invalid memory, as the code does *argv[n] (with char **argv[64]) which refers nothing.
The namings you use do not make live easier.
Try the following naming:
void parse(char *line, char **cmd)
{
while (*line != '\0') {
while (*line == ' ' || *line == '\t' || *line == '\n')
*line++ = '\0';
*cmd++ = line;
while (*line != '\0' && *line != ' ' && *line != '\t' && *line != '\n'){
line++;
}
}
*argv = '\0';
}
void parsePipe(char *line, char ***cmdline)
{
char *cmd = strtok(line, "|");
int word_counter = 0;
while (cmd != NULL)
{
printf("Printing word -> %s\n", cmd);
word_counter++;
parse(cmd, *cmdline++);
cmd = strtok(NULL, "|");
}
printf("This string contains %d words separated with |\n",word_counter);
}
void main(void)
{
char line[1024];
char **cmdline[64];
while (1) {
printf("lsh -> ");
gets(line);
printf("\n");
parsePipe(line, cmdline);
}
}
For none of the cmds used memory had been allocated.
So
*cmd++ = line;
fails, as cmd points nowhere, but gets dereferenced and the code tries to write to where it's pointing, which is nowhere, that is invalid memory.
Fixing this can be done by passing char*** to parse(char *** pcmd) and counting the tokens found
size_t nlines = 0;
...
++nlines.
and the doing a
*pcmd = realloc(*pcmd, nlines + 1); /* Allocate one more as needed to later find the end of the array. */
(*pcmd)[nlines -1] = line;
(*pcmd)[nlines] = NULL; /* Initialise the stopper, marking the end of the array. */
for each token found.
Obviously you need to call it like this:
parse(cmd, cmdline++);
To have all this work the inital array needs to initialised properly (as you should have done anyway):
char **cmdline[64] = {0};

In C, Save one char at a time

I am processing a string in which each word is separated by spaces. The < indicates it is a input redirection, and > indicates it is a output redirection.
Ex:
< Hello > World
I want to save the words in different variables (char *in, char *out )
How can I do that? I've looked through the string library and none seems to be able to do the job.
Here's what I have so far concerning this question.
char buff[MAXARGS];
char *str;
char *in;
char *out;
if( strchr(buff, '<') != NULL )
{
str = strchr(buff, '<');
str++;
if( *str == ' ' || *str == '\0' || *str == '\n' || *str == '\t' )
{
fprintf( stdout, "User did not specify file name!\n" );
}
else
in = str; // This will save Hello > World all together. I don't want that.
}
Thanks much.
To get you started, here's how you could do it assuming you are allowed to modify buff, and assuming a simple case (at most one < and at most one >).
First, get the position of the < and >
in = strchr(buff, '<');
out = strchr(buff, '>');
Then you artificially split the string:
if (in) {
*in = 0;
in++;
}
if (out) {
*out = 0;
out++;
}
At this point, buff points to a C-terminated string that has no < or >. in is either null, or points to a string that follows < and contains no >. Same for out.
Then you need to "strip" all these strings (remove whitespace), and check that after that they still contain meaningful data.
You'll need a whole lot more logic to get all the cases right, including rejecting invalid input.
u can use this..
char filename[max_path]
str1 = strchr(buff, '<');
str2 = strchr(buff, '>');
memcpy( filename , str1+1 , str2-str1-1 );
so the path between < and > will be in filename.
and
output = str2 + 1;
supposing your input patter is fixed as < input_name > output_name and you want to be able to extract input_name and output_name respectively.
One solution is to split the str using " <>". Following code will print out Hello , World consecutively. Saving them to in and out is left for you as an exercise :)
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="< Hello > World";
char *in;
char *out;
char *pch;
char *del=" <>";
pch = strtok (str,del);
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, del);
}
return 0;
}
You need to decide where the storage for the two filenames is allocated, and how you will know how much storage is provided.
static void find_filename(const char *str, char *name)
{
char c;
while ((c = *str++) != '\0' && isspace((unsigned char)c))
;
if (c != '\0')
{
*name++ = c;
while ((c = *str++) != '\0' && !isspace((unsigned char)c))
*name++ = c;
}
}
int find_io_redirection(const char *str, char *in, char *out)
{
const char *lt = strchr(str, '<');
const char *gt = strchr(str, '>');
if (lt != 0)
find_filename(lt+1, in);
if (gt != 0)
find_filename(gt+1, out);
return(lt != 0 && gt != 0);
}
This code simply assumes that you provide big enough strings for in and out. If you want to be safer, then you tell the function(s) how big each target string is. You can compress that code. You might decide you want to return the number of redirections. You might decide you should know about double output redirections, or double input redirections. You might decide you should return a bit mask indicating which redirections were present. With a considerably more complex interface, you might be able to indicate which parts of the input line represented the I/O redirection; this would help in the calling function to decide which parts of the line should now be ignored.
You'd use the code above like this:
char buffer[MAXCMDLEN];
char in[MAXFILENAMELEN];
char out[MAXFILENAMELEN];
if (fgets(buffer, sizeof(buffer), stdin) != 0)
{
if (find_io_redirection(buffer, in, out))
...process buffer know that I/O redirection is present...
else
...witter about missing I/O redirection...
}

Resources