So I'm working in C and have a char array and I want to split it everytime there is a space, "(", ")", or "{". But, I want to keep those character delimiters. For example, if my input was
void statement(int y){
I want my output to be
void
statement
(
int
y
)
{
What's the best way to go about this?
You can do it with the loop of your choice and a few conditional tests that basically boil down to:
if the current char is a delimiter;
if the previous char wasn't a delimiter, output a space before the delimiter;
if the delimiter (current char) is not a space, output char followed by newline.
(using the string of delimiters as the string in strchr and checking against the current char is a simple way of determining if the current char is a delim)
Putting that together in a short example, you could do something similar to:
#include <stdio.h>
#include <string.h>
int main (void) {
int c, last = 0; /* current & previous char */
const char *delims = " (){}"; /* delimiters */
while ((c = getchar()) != EOF) { /* read each char */
if (strchr (delims, c)) { /* if delimiter */
if (last && !strchr (delims, last)) /* if last not delimiter */
putchar ('\n'); /* precede char with newline */
if (c != ' ') { /* if current not space */
putchar (c); /* output delimiter */
putchar ('\n'); /* followed by newline */
}
}
else /* otherwise */
putchar (c); /* just output char */
last = c; /* set last to current */
}
}
Example Use/Output
Given your input string, the output matches what you have provided.
$ printf "void statement(int y){" | ./bin/getchar_delims
void
statement
(
int
y
)
{
Look things over and let me know if you have further questions.
You can try using strpbrk, which not only retains the delimiting chars by simply returning a pointer to the found delimiter, but also supports multiple delimiter characters.
For example, this should do what you want:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
int main(int argc, char *argv[]) {
char *input = "void statement(int y){a";
char *delims = " (){";
char *remaining = input;
char *token;
// while we find delimiting characters
while ((token = strpbrk(remaining, delims)) != NULL) {
// print the characters between the last found delimiter (or string beginning) and current delimiter
if (token - remaining > 0) {
printf("%.*s\n", token - remaining, remaining);
}
// Also print the delimiting character itself
printf("%c\n", *token);
// Offset remaining search string to character after the found delimiter
remaining = token + 1;
}
// Print any characters after the last delimiter
printf("%s\n", remaining);
return 0;
}
The output includes the spaces since you included as a delimiter. If you don't want this, wrap the delimiter character printing in a condition like this:
if (*token != ' ') {
printf("%c\n", *token);
}
Related
I just want the string without underscore. I tried below few codes all doesn't work:
string is char pointer from another function, it looks like this: " "_I_have_1_dog.dat)" "
void func1(char *string)
{
char buffer[256]="";
unsigned long count = 0;
count = sscanf_s(string, " \"%*c%255[^\"]\"", buffer, _countof(buffer));
output:
_I_have_1_dog.dat
count = sscanf_s(string, " \"%*[^_]_%255[^\"]\"", buffer, _countof(buffer));
output:
_I_have_1_dog.dat
count = sscanf_s(string, " \"[^_]_%255[^\"]\"", buffer, _countof(buffer));
output:
_I_have_1_dog.dat
count = sscanf_s(string, " \"[^_]_%255[^\"]\"", buffer, _countof(buffer));
output:
_I_have_1_dog.dat
Edit Based on Removing 1st char '_' Instead of All '_'
The easiest approach to remove a leading '_' is simply to shift all characters down by 1 in string if the first character is an '_'.
You can use the functions like memmove from string.h to do the same thing. (in a single function call) However, looping is just as easy.
A simple function using the loop method could be:
void rm_1st_underscore (char *string)
{
int i = 1; /* set index to 1 (2nd char in string) */
if (*string != '_') /* if 1st char not '_', just return */
return;
do /* loop over each char in string */
string[i-1] = string[i]; /* shift chars back by 1 in string */
while (string[i++] != 0); /* (note: causes '\0' to copy) */
}
A short example would be:
#include <stdio.h>
void rm_1st_underscore (char *string)
{
int i = 1; /* set index to 1 (2nd char in string) */
if (*string != '_') /* if 1st char not '_', just return */
return;
do /* loop over each char in string */
string[i-1] = string[i]; /* shift chars back by 1 in string */
while (string[i++] != 0); /* (note: causes '\0' to copy) */
}
int main (void) {
char str[] = "_I_have_1_dog.dat";
rm_1st_underscore (str);
puts (str);
}
Example Use/Output
$ ./bin/rmunderscore
I_have_1_dog.dat
Look things over and let me know if you still have questions. If you need help trying it with memmove let me know and I'll drop another example.
Using memmove
Since we are just removing the first '_' instead of all of them, using memmove makes it trivial. Simply include string.h and get the length of string and then call memmove copying from the second char in string back to the first, e.g.
...
#include <string.h>
void rm_1st_underscore (char *string)
{
size_t len = strlen (string);
memmove (string, string + 1, len);
}
...
(the output is the same)
I want to get the rest of the string after the first "/" symbol.
The expectation I wanna get is:
index/homepage/component.html/sdfsdf
int main(int argc, char *argv[]){
char link[] = "www.google.com/index/homepage/component.html/sdfsdf";
char *token;
char *temp[1000];
int i=0;
token = strtok(link, "/");
while(token != NULL){
temp[i] = token;
token = strtok(NULL, "/");
++i;
}
printf("the parse string: %s\n",*temp);
return 0;
}
the parse string: www.google.com
The other answers have covered a more prefered method, that is strchr(), but for compleatness this is how it can be done using regular expressions and sscanf.
this works by grabing everything befor the '/' and ignoring it by using %*[^/] (the * means it will not put it into a variable), then on the other side of '/' just getting everything till the end of line char is hit by %[^\n]
#include <stdio.h>
char link[] = "www.google.com/index/homepage/component.html/sdfsdf";
int main(int argc, char **argv){
char buff[128];
// Seporate string after first "/"
// %*[^/] goes untill it finds '/' and stops, but wont put it in a var
// "/" will eat the '/' charater
// %[^\n] goes untill it finds the '\n' char and puts it into buff
sscanf(link, "%*[^/]/%[^\n]", buff);
printf("%s\n", buff);
}
edit: added clarification of the '*'
Since you have one target character you are after in a longer string, to find the first occurrence, you can simply use strchr to return a pointer to the desired character within the larger string. In this case the first '/'. To use strchr to locate the pointer associated with the first '/' your call would simply be:
char *p = strchr (link, '/');
If p is not NULL then it has been found within the string. Recall, the character found was a separator character that was not wanted, so you advance the pointer by 1 so that it is pointing to the first character after the '/'.
A simple call to strlen() at that point will tell you how many characters you must copy from link to your string where you are saving the results. Then using the length, you can simply call memcpy to copy from link beginning at the pointer location to the end of the string (including the nul-terminating character) to obtain the wanted characters. A short example would be:
#include <stdio.h>
#include <string.h>
#define MAXTOK 2048
int main (void) {
char link[] = "www.google.com/index/homepage/component.html/sdfsdf",
*p = link,
token[MAXTOK]; /* declare array to hold results */
if ((p = strchr (link, '/'))) { /* check whether '/' found in line */
size_t len = strlen (++p); /* advance pointer by 1, get length */
if (len > MAXTOK - 1) { /* check if length exceeds available */
fputs ("error: string exceeds allowable length.\n", stderr);
return 1;
}
memcpy (token, p, len + 1); /* copy remaining part of string */
printf ("%s\n", token); /* output it */
}
}
EXample Use/Output
$ ./bin/charstr_rest
index/homepage/component.html/sdfsdf
Providing the desired result string. Look things over and let me know if you have questions.
Hope this Helps..First find out the Position of '/' Character in the String then build your Substring,have mentioned on each block of code Whats it does...
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
char link[] = "www.google.com/index/homepage/component.html/sdfsdf";
char linktemp[50];
int i=0,c=0;
////Finding the Position of First '/'
for(i=0;i<strlen(link);i++)
{
if(link[i]=='/')
{
break;
}
}
//If no '/' Character found
if(i==strlen(link))
{
printf("No '/' Character found in the String");
return 0;
}
////Creating the Substring
while(c<strlen(link)-i)
{
linktemp[c]=link[i+c+1];
c++;
}
linktemp[c] ='\0';
printf("the parse string: %s\n",linktemp);
return 0;
}
I am writing my own trim() in C. There is a structure which contains all string values, the structure is getting populated from the data coming from a file which contains spaces before and after the beginning of a word.
char *trim(char *string)
{
int stPos,endPos;
int len=strlen(string);
for(stPos=0;stPos<len && string[stPos]==' ';++stPos);
for(endPos=len-1;endPos>=0 && string[endPos]==' ';--endPos);
char *trimmedStr = (char*)malloc(len*sizeof(char));
strncpy(trimmedStr,string+stPos,endPos+1);
return trimmedStr;
}
int main()
{
char string1[]=" a sdf ie ";
char *string =trim(string1);
printf("%s",string);
return 0;
}
Above code is working fine, but i don't want to declare new variable that stores the trimmed word. As the structure contains around 100 variables.
Is there any way to do somthing like below where I dont need any second variable to print the trimmed string.
printf("%s",trim(string1));
I believe above print can create dangling pointer situation.
Also, is there any way where I don't have to charge original string as well, like if I print trim(string) it will print trimmed string and when i print only string, it will print original string
elcuco was faster. but it's done so here we go:
char *trim(char *string)
{
char *ptr = NULL;
while (*string == ' ') string++; // chomp away space at the start
ptr = string + strlen(string) - 1; // jump to the last char (-1 because '\0')
while (*ptr == ' '){ *ptr = '\0' ; ptr--; } ; // overwrite with end of string
return string; // return pointer to the modified start
}
If you don't want to alter the original string I'd write a special print instead:
void trim_print(char *string)
{
char *ptr = NULL;
while (*string == ' ') string++; // chomp away space at the start
ptr = string + strlen(string) - 1; // jump to the last char (-1 because '\0')
while (*ptr == ' '){ ptr--; } ; // find end of string
while (string <= ptr) { putchar(*string++); } // you get the picture
}
something like that.
You could the original string in order to do this. For trimming the prefix I just advance the pointer, and for the suffix, I actually add \0. If you want to keep the original starting as is, you will have to move memory (which makes this an O(n^2) time complexity solution, from an O(n) I provided).
#include <stdio.h>
char *trim(char *string)
{
// trim prefix
while ((*string) == ' ' ) {
string ++;
}
// find end of original string
char *c = string;
while (*c) {
c ++;
}
c--;
// trim suffix
while ((*c) == ' ' ) {
*c = '\0';
c--;
}
return string;
}
int main()
{
char string1[] = " abcdefg abcdf ";
char *string = trim(string1);
printf("String is [%s]\n",string);
return 0;
}
(re-thinking... is it really O(n^2)? Or is it O(2n) which is a higher O(n)...? I guess depending on implementation)
You can modify the function by giving the output in the same input string
void trim(char *string)
{
int i;
int stPos,endPos;
int len=strlen(string);
for(stPos=0;stPos<len && string[stPos]==' ';++stPos);
for(endPos=len-1;endPos>=0 && string[endPos]==' ';--endPos);
for (i=0; i<=(endPos-stPos); i++)
{
string[i] = string[i+stPos];
}
string[i] = '\0'; // terminate the string and discard the remaining spaces.
}
...is there any way where i don't have to charge original string as well, like if i do trim(string) it will print trimmed string and when i print only string, it will print original string – avinashse 8 mins ago
Yes, though it gets silly.
You could modify the original string.
trim(string);
printf("trimmed: %s\n", string);
The advantage is you have the option of duplicating the string if you want to retain the original.
char *original = strdup(string);
trim(string);
printf("trimmed: %s\n", string);
If you don't want to modify the original string, that means you need to allocate memory for the modified string. That memory then must be freed. That means a new variable to hold the pointer so you can free it.
char *trimmed = trim(original);
printf("trimmed: %s\n", trimmed);
free(trimmed);
You can get around this by passing a function pointer into trim and having trim manage all the memory for you.
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
void trim(char *string, void(*func)(char *) )
{
// Advance the pointer to the first non-space char
while( *string == ' ' ) {
string++;
}
// Shrink the length to the last non-space char.
size_t len = strlen(string);
while(string[len-1]==' ') {
len--;
}
// Copy the string to stack memory
char trimmedStr[len + 1];
strncpy(trimmedStr,string, len);
// strncpy does not add a null byte, add it ourselves.
trimmedStr[len] = '\0';
// pass the trimmed string into the user function.
func(trimmedStr);
}
void print_string(char *str) {
printf("'%s'\n", str);
}
int main()
{
char string[]=" a sdf ie ";
trim(string, print_string);
printf("original: '%s'\n", string);
return 0;
}
Ta da! One variable, the original is left unmodified, no memory leaks.
While function pointers have their uses, this is a bit silly.
It's C. Get used to managing memory. ¯\_(ツ)_/¯
Also, is there any way where I don't have to charge original string as
well, like if I print trim(string) it will print trimmed string and
when i print only string, it will print original string
Yes you can, but you cannot allocate new memory in the trim function as you will not be holding the return memory.
You can have a static char buffer in the trim function and operate on it.
Updated version of #elcuco answer.
#include <stdio.h>
char *trim(char *string)
{
static char buff[some max length];
// trim prefix
while ((*string) == ' ' ) {
string++;
}
// find end of original string
int i = 0;
while (*string) {
buff[i++] = *string;
string++;
}
// trim suffix
while ((buff[i]) == ' ' ) {
buff[i] = '\0';
i--;
}
return buff;
}
int main()
{
char string1[] = " abcdefg abcdf ";
char *string = trim(string1);
printf("String is [%s]\n",string);
return 0;
}
With this you don't need to worry about holding reference to trim function return.
Note: Previous values of buff will be overwritten with new call to trim function.
If you don't want to change the original, then you will need to make a copy, or pass a second array of sufficient size as a parameter to your function for filling. Otherwise a simple in-place trmming is fine -- so long as the original string is mutable.
An easy way to approach trimming on leading and trailing whitespace is to determine the number of leading whitespace characters to remove. Then simply use memmove to move from the first non-whitespace character back to the beginning of the string (don't forget to move the nul-character with the right portion of the string).
That leaves only removing trailing whitespace. An easy approach there is to loop from the end of the string toward the beginning, overwriting each character of trailing whitespace with a nul-character until your first non-whitespace character denoting the new end of string is found.
A simple implementation for that could be:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define DELIM " \t\n" /* whitespace constant delimiters for strspn */
/** trim leading and trailing whitespace from s, (s must be mutable) */
char *trim (char *s)
{
size_t beg = strspn (s, DELIM), /* no of chars of leading whitespace */
len = strlen (s); /* length of s */
if (beg == len) { /* string is all whitespace */
*s = 0; /* make s the empty-string */
return s;
}
memmove (s, s + beg, len - beg + 1); /* shift string to beginning */
for (int i = (int)(len - beg - 1); i >= 0; i--) { /* loop from end */
if (isspace(s[i])) /* checking if char is whitespace */
s[i] = 0; /* overwrite with nul-character */
else
break; /* otherwise - done */
}
return s; /* Return s */
}
int main (void) {
char string1[] = " a sdf ie ";
printf ("original: '%s'\n", string1);
printf ("trimmed : '%s'\n", trim(string1));
}
(note: additional intervening whitespace was added to your initial string to show that multiple intervening whitespace is left unchanged, the output is single-quoted to show the remaining text boundaries)
Example Use/Output
$ ./bin/strtrim
original: ' a sdf ie '
trimmed : 'a sdf ie'
Look things over and let me know if you have further questions.
I've tried to run this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char a[1000];
void eliminatesp() {
char buff1[1000], buff2[1000];
LOOP: sscanf(a,"%s %s",buff1,buff2);
sprintf(a,"%s%s", buff1, buff2);
for(int i=0; i<strlen(a); ++i) {
if(a[i]==' ') goto LOOP;
}
}
void eliminateline() {
char buff1[1000]; char buff2[1000];
LOOP: sscanf(a,"%s\n\n%s",buff1,buff2);
sprintf(a,"%s\n%s", buff1, buff2);
for(int i=0; i<strlen(a)-1; ++i) {
if(a[i]=='\n'&&a[i+1]=='\n') goto LOOP;
}
}
int main() {sprintf(a,"%s\n\n%s", "hello world","this is my program, cris");
eliminatesp();
eliminateline();
printf("%s",a); return 0;
return 0;
}
but the output was:
hello world
world
How can I correct it? I was trying to remove spaces and empty lines.
Going with your idea of using sscanf and sprintf you can actually eliminate both spaces and newlines in a single function, as sscanf will ignore all whitespace (including newlines) when reading the input stream. So something like this should work:
void eliminate() {
char buff1[1000], buff2[1000], b[1000];
char* p = a, *q = b, *pq = b;
sprintf(q, "%s", p);
while (q != NULL && *q != '\0')
{
if (iswspace(*q))
{
sscanf(pq, "%s %s", buff1, buff2);
sprintf(p, "%s%s", buff1, buff2);
p += strlen(buff1);
pq = ++q;
}
q++;
}
}
Pedro, while the %s format specifier does stop conversion on the first encountered whitespace, it isn't the only drawback to attempting to parse with sscanf. In order to use sscanf you will also need to use the %n conversion specifier (the number of characters consumed during conversion to the point the %n appears) and save the value as an integer (say offset). Your next conversion will begin a a + offset until you have exhausted all words in 'a'. This can be a tedious process.
A better approach can simply be to loop over all characters in 'a' copying non-whitespace and single-delimiting whitespace to the new buffer as you go. (I often find it easier to copy the full string to a new buffer (say 'b') and then read from 'b' writing the new compressed string back to 'a').
As you work your way down the original string, you use simple if else logic to determine whether to store the current (or last) character or whether to just skip it and get the next. There are many ways to do this, no one way more right than the other as long as they are reasonably close in efficiency. Making use of the <ctype.h> functions like isspace() makes things easier.
Also, in your code, avoid the use of global variables. There is no reason you can't declare 'a' in main() and pass it as a parameter to your eliminate functions. If you need a constant in your code, like 1000, then #define a constant and avoid sprinkling magic numbers throughout your code.
Below is an example putting all those pieces together, and combining both your eliminatesp and eliminateline functions into a single eliminatespline function that does both trim whitespace and eliminate blank lines. This will handle blank lines and considers lines containing only whitespace characters as blank.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXL 1000 /* if you need a constant, define one (or more) */
/** trim leading, compress included, and trim trailing whitespace.
* given non-empty string 'a', trim all leading whitespace, remove
* multiple included spaces and empty lines, and trim all trailing
* whitespace.
*/
void eliminatespline (char *a)
{
char b[MAXL] = "", /* buffer to hold copy of a */
*rp = b, /* read pointer to copy of a */
*wp = a, /* write pointer for a */
last = 0; /* last char before current */
if (!a || !*a) /* a NULL or empty - return */
return;
strcpy (b, a); /* copy a to b */
while (isspace (*rp)) /* skip leading whitespace */
rp++;
last = *rp++; /* fill last with 1st non-whitespace */
while (*rp) { /* loop over remaining chars in b */
/* last '\n' and current whitespace - advance read pointer */
if (last == '\n' && isspace(*rp)) {
rp++;
continue;
} /* last not current or last not space */
else if (last != *rp || !isspace (last))
*wp++ = last; /* write last, advance write pointer */
last = *rp++; /* update last, advance read pointer */
}
if (!isspace (last)) /* if last not space */
*wp++ = last; /* write last, advance write pointer */
*wp = 0; /* nul-terminate at write pointer */
}
int main() {
char a[] = " hello world\n \n\nthis is my program, cris ";
eliminatespline (a);
printf ("'%s'\n", a);
return 0;
}
note: the line being trimmed has both leading and trailing whitespace as well as embedded blank lines and lines containing only whitespace, e.g.
" hello world\n \n\nthis is my program, cris "
Example Use/Output
$ ./bin/elimspaceline
'hello world
this is my program, cris'
(note: the printf statements wraps the output in single-quotes to confirm all leading and trailing whitespace was eliminated.)
If you did want to use sscanf, you could essentially do the same thing with sscanf (using the %n specifier to report characters consumed) and a array of two-characters to treat the next character as a string, and do something like the following:
void eliminatespline (char *a)
{
char b[MAXL] = "", /* string to hold build w/whitespace removed */
word[MAXL] = "", /* string for each word */
c[2] = ""; /* string made of char after word */
int n = 0, /* number of chars consumed by sscanf */
offset = 0; /* offset from beginning of a */
size_t len; /* length of final string in b */
/* sscanf each word and char that follows, reporting consumed */
while (sscanf (a + offset, "%s%c%n", word, &c[0], &n) == 2) {
strcat (b, word); /* concatenate word */
strcat (b, c); /* concatenate next char */
offset += n; /* update offset with n */
}
len = strlen (b); /* get final length of b */
if (len && isspace(b[len - 1])) /* if last char is whitespace */
b[len - 1] = 0; /* remove last char */
strcpy (a, b); /* copy b to a */
}
Look things over, try both approaches and let me know if you have further questions.
I am trying to make this shell parse. How do I make the program implement parsing in a way so that commands that are in quotes will be parsed based on the starting and ending quotes and will consider it as one token? During the second while loop where I am printing out the tokens I think I need to put some sort of if statement, but I am not too sure. Any feedback/suggestions are greatly appreciated.
#include <stdio.h> //printf
#include <unistd.h> //isatty
#include <string.h> //strlen,sizeof,strtok
int main(int argc, char **argv[]){
int MaxLength = 1024; //size of buffer
int inloop = 1; //loop runs forever while 1
char buffer[MaxLength]; //buffer
bzero(buffer,sizeof(buffer)); //zeros out the buffer
char *command; //character pointer of strings
char *token; //tokens
const char s[] = "-,+,|, ";
/* part 1 isatty */
if (isatty(0))
{
while(inloop ==1) // check if the standard input is from terminal
{
printf("$");
command = fgets(buffer,sizeof(buffer),stdin); //fgets(string of char pointer,size of,input from where
token = strtok(command,s);
while (token !=NULL){
printf( " %s\n",token);
token = strtok(NULL, s); //checks for elements
}
if(strcmp(command,"exit\n")==0)
inloop =0;
}
}
else
printf("the standard input is NOT from a terminal\n");
return 0;
}
For an arbitrary command-line syntax, strtok is not the best function. It works for simple cases, where the words are delimited by special characters or white space, but there will come a time where you want to split something like this ls>out into three tokens. strtok can't handle this, because it needs to place its terminating zeros somewhere.
Here's a quick and dirty custom command-line parser:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int error(const char *msg)
{
printf("Error: %s\n", msg);
return -1;
}
int token(const char *begin, const char *end)
{
printf("'%.*s'\n", end - begin, begin);
return 1;
}
int parse(const char *cmd)
{
const char *p = cmd;
int count = 0;
for (;;) {
while (isspace(*p)) p++;
if (*p == '\0') break;
if (*p == '"' || *p == '\'') {
int quote = *p++;
const char *begin = p;
while (*p && *p != quote) p++;
if (*p == '\0') return error("Unmachted quote");
count += token(begin, p);
p++;
continue;
}
if (strchr("<>()|", *p)) {
count += token(p, p + 1);
p++;
continue;
}
if (isalnum(*p)) {
const char *begin = p;
while (isalnum(*p)) p++;
count += token(begin, p);
continue;
}
return error("Illegal character");
}
return count;
}
This code understands words separated by white-space, words separated by single or double quotation marks and single-character operators. It doesn't understand escaped quotation marks inside quotes and non-alphanumeric characters such as the dot in words.
The code is not hard to understand and you can extend it easily to understand double-char operators such as >> or comments.
If you want to escape quotation marks, you'll have to recognise the escape character in parse and unescape it and possible other escape sequences in token.
First, you've declared argv to be an array of pointers to... pointers. In fact, it is an array of pointers to chars. So:
int main(int argc, char **argv){
The trend is you want to reach for [], which got you into incorrect code here, but the idiom in C/C++ is more commonly to use pointer syntax, e.g.:
const char* s = "-+| ";
FWIW.
Also, note that fgets() will return NULL when it hits end of file (e.g., the user types CTRL-D on *nix or CTRL-Z on DOS/Windows). You probably don't want a segment violation when that happens.
Also, bzero() is a nonportable function (you probably don't care in this context) and the C compiler will happily initialize an array to zeroes for you if you ask it to (possibly worth caring about; syntax demonstrated below).
Next, as soon as you allow quoted strings, the next language question that immediately arises is: "how do I quote a quote?". Then, you are immediately out of the territory that can be handled cleanly with strtok(). I'm not 100% sure how you want to break your string into tokens. Using strtok() in the way you do, I think the string "a|b" would produce two tokens, "a" and "b", making you overlook the "|". You're treating "|" and "-" and "+" like whitespace, to be ignored, which is not generally what a shell does. For example, given this command-line:
echo 'This isn''t so hard' | cp -n foo.h .. >foo.out
I would probably want to get the following list of tokens:
echo
'This isn''t so hard'
|
cp
-n
foo.h
..
>
foo.out
Usually, characters like '+' and '-' are not special for most shells' tokenizing process (unlike '|' and '&' and '<', etc. which are instructions to the shell that the spawned command never sees). They get passed onto the application that is then free to decide "'-' indicates this word is an option and not a filename" or whatever.
What follows is a version of your code that produces the output I described (which may or may not be exactly what you want) and allows either double or single-quoted arguments (trivial to extend to handle back-ticks too) that can contain quote marks of the same kind, etc.
#include <stdio.h> //printf
#include <unistd.h> //isatty
#include <string.h> //strlen,sizeof,strtok
#define MAXLENGTH 1024
int main(int argc, char **argv[]){
int inloop = 1; //loop runs forever while 1
char buffer[MAXLENGTH] = {'\0'}; //compiler inits entire array to NUL bytes
// bzero(buffer,sizeof(buffer)); //zeros out the buffer
char *command; //character pointer of strings
char *token; //tokens
char* rover;
const char* StopChars = "|&<> ";
size_t toklen;
/* part 1 isatty */
if (isatty(0))
{
while(inloop ==1) // check if the standard input is from terminal
{
printf("$");
token = command = fgets(buffer,sizeof(buffer),stdin); //fgets(string of char pointer,size of,input from where
if(command)
while(*token)
{
// skip leading whitespace
while(*token == ' ')
++token;
rover = token;
// if possible quoted string
if(*rover == '\'' || *rover == '\"')
{
char Quote = *rover++;
while(*rover)
if(*rover != Quote)
++rover;
else if(rover[1] == Quote)
rover += 2;
else
{
++rover;
break;
}
}
// else if special-meaning character token
else if(strchr(StopChars, *rover))
++rover;
// else generic token
else
while(*rover)
if(strchr(StopChars, *rover))
break;
else
++rover;
toklen = (size_t)(rover-token);
if(toklen)
printf(" %*.*s\n", toklen, toklen, token);
token = rover;
}
if(strcmp(command,"exit\n")==0)
inloop =0;
}
}
else
printf("the standard input is NOT from a terminal\n");
return 0;
}
Regarding your specific request: commands that are in quotes will be parsed based on the starting and ending quotes.
You can use strtok() by tokenizing on the " character. Here's how:
char a[]={"\"this is a set\" this is not"};
char *buf;
buf = strtok(a, "\"");
In that code snippet, buf will contain "this is a set"
Note the use of \ allowing the " character to used as a token delimiter.
Also, Not your main issue, but you need to:
Change this:
const char s[] = "-,+,|, "; //strtok will parse on -,+| and a " " (space)
To:
const char s[] = "-+| "; //strtok will parse on only -+| and a " " (space)
strtok() will parse out whatever you have in the delimiter string, including ","