How to Tokenize string[array]? - c

I need to tokenize a string from an array, i need just three words and ignore all tabs '\t' and spaces ' '
the array line[] is just a test case.
I debugged mine, the first array (supposed to carry only the first word) got filled by spaces & letters from 3 words, not stopping after the first word when a tab or space is met. BTW my program crashed. i suspect getting out of array bounds maybe.
What am I doing wrong?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char line[] = " CLOOP LDA buffer,x";
char array1[20] ="";
char array2[20] ="";
char array3[20] ="";
int i = 0;
for( i ; i<strlen(line) ; i++)
{
while ( (line[i] != ' ') && (line[i] != '\t'))
{
if(array1[0] == '\0')
{
int j = 0;
while(line[i] != ' ' && line[i] != '\t')
{
array1[j] = line[i];
i++;
j++;
}
}
if(array2[0] =='\0');
{
int k = 0;
while(line[i] != ' ' && line[i] != '\t')
{
array2[k] = line[i];
i++;
k++;
}
}
if(array3[0] == '\0')
{
int g = 0;
while(line[i] != ' ' && line[i] != '\t')
{
array3[g] = line[i];
i++;
g++;
}
}
}
}
printf("array 1: %s\n array2: %s\n array3: %s\n", array1, array2, array3);
return(0);
}

You are over-complicating things. First of all it is difficult to feed all 3 arrays at the same time. The processing for one token should be completely finished before moving to the other token.
I would propose to "eat" all the white spaces before starting to process a token.
That is done by:
// skip leading delimiters
if( skip_leading_delimiters )
{
if( is_delimiter( delimiters, line[i]) ) continue;
skip_leading_delimiters = 0;
}
After token is processes you can move to the next token and process it. I tried to preserve your concept and approach as much as I could. The amount of while loops has been reduced to 0 since // skip leading delimiters section takes care of it.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int is_delimiter(const char * delimiters, char c) // check for a delimiter
{
char *p = strchr (delimiters, c); // if not NULL c is separator
if (p) return 1; // delimeter
else return 0; // not a delimeter
}
int main()
{
char line[] = " CLOOP LDA buffer,x";
char array1[20];
char array2[20];
char array3[20];
int con1 = 1;
int con2 = 0;
int con3 = 0;
int con1s = 0;
int con2s = 0;
int con3s = 0;
int i = 0;
int j = 0;
int skip_leading_delimiters = 1;
char * delimiters = " \b";
for(i = 0; i < strlen(line); i++)
{
// skip leading delimiters
if( skip_leading_delimiters )
{
if( is_delimiter( delimiters, line[i]) ) continue;
skip_leading_delimiters = 0;
}
if(con1)
{
if(line[i] != ' ' && line[i] != '\t')
{
array1[j] = line[i];
j++;
array1[j] = 0;
con1s = 1;
}
else
{
if(con1s)
{
con1 = 0;
con2 = 1;
skip_leading_delimiters = 1;
j = 0;
}
continue;
}
}
if(con2)
{
if(line[i] != ' ' && line[i] != '\t')
{
array2[j] = line[i];
j++;
array2[j] = 0;
con2s = 1;
}
else
{
con2 = 0;
con3 = 1;
skip_leading_delimiters = 1;
j = 0;
continue;
}
}
if(con3)
{
if(line[i] != ' ' && line[i] != '\t')
{
array3[j] = line[i];
j++;
array3[j] = 0;
con3s = 1;
}
else
{
con3 = 0;
j = 0;
continue;
}
}
}
printf(" array1: %s\n array2: %s\n array3: %s\n", array1, array2, array3);
return(0);
}
Output:
array1: CLOOP
array2: LDA
array3: buffer,x

Related

turning morse into english from a txt file in C, only iterates 1 char in the string before stopping

Using the code below it only reads one char and does not convert morse to letter. My idea was to create a string of one morse "letter" and put it in the convert function, however only 1 char is being read since I am only seeing a single 1 printed on the screen after the string itself is printed. The string only consists of '-' , '.' , ' '. I was wondering if anyone knows what the solution might be.
char convertToLetter(M* data, char word[10]) {
int size = 0;
char correct;
while (size < 60)
{
int compare = strcmp(word, data->morse);
if (compare == 0) {
correct = data->letter;
}
data++;
size++;
}
correct = '\0';
return correct;
}
int main(){
//some code here for opening a file.
char curSent[200];
char letter[6] = "";
int i = 0;
char* fullString = (char*)malloc(1000 * sizeof(char));
fullString[0] = '\0';
while (fgets(curSent, 200, inFile) != NULL) {
if (curSent[0] != '\n') {
curSent[strlen(curSent) - 1] = '\0';
strcat_s(fullString,1000, curSent);
}
else {
printf("%s", fullString);
printf("\n\n");
int j = 0;
while (i < strlen(fullString)) {
if (fullString[i] != ' ') {
fullString[i] = letter[j];
i++;
j++;
printf("%d \n", 1);
}else if (fullString[i + 1] == ' ' && fullString[i] == ' ') {
printf("%d", 2);
printf(" %c", convertToLetter(dictionary, letter));
memset(letter, 0, strlen(letter));
j = 0;
i = i + 2;
}else if (fullString[i] == ' ') {
printf("%d", 3);
printf("%c", convertToLetter(dictionary, letter));
memset(letter, 0, strlen(letter));
j = 0;
i = i++;
}
}
memset(fullString, 0, strlen(fullString));
i = 0;
}
}
//printf("%s", fullString);
getchar();
return 0;
}

String Pattern Matching in C

I was trying this pattern matching method in C but whenever I give all the input, the vscode terminal waits for a while and just stops the program without any warnings/message. Can anyone point to what is wrong here?
#include <stdio.h>
#include <string.h>
int main()
{
char STR[100], PAT[100], REP[100], ANS[100];
int i, m, j, k, flag, slP, slR, len;
i = m = k = j = flag = len = 0;
printf("\nMain String: ");
gets(STR);
printf("\nPattern String: ");
gets(PAT);
slP = strlen(PAT);
printf("\nReplace String: ");
gets(REP);
slR = strlen(REP);
while (STR[i] != '\0')
{
if (STR[i] = PAT[j])
{
len = 0;
for (k = 0; k < slP; k++)
{
if (STR[k] = PAT[k])
len++;
}
if (len == slP)
{
flag = 1;
for (k = 0; k < slR; k++, m++)
ANS[m] = REP[k];
}
}
else
{
ANS[m] = STR[i];
m++;
i++;
}
}
if (flag == 0)
{
printf("\nPattern not found!");
}
else
{
ANS[m] = '\0';
printf("\nResultant String: %s\n", ANS);
}
return 0;
}
There are multiple problems in the code:
using gets() is risky, this function was removed from the C Standard because it cannot be used safely.
if (STR[i] = PAT[j]) copied the pattern to the string. You should use:
if (STR[i] == PAT[j])
similarly, if (STR[k] = PAT[k]) is incorrect. You should compare PAT[k] and STR[i + k]:
if (STR[i + k] == PAT[k])
you should test for buffer overflow for the output string as replacing a short string by a larger one may produce a string that will not fit in ANS
you do not increment i properly.
Here is a modified version:
#include <stdio.h>
int getstr(const char *prompt, char *dest, int size) {
int c, len = 0;
printf("%s", prompt);
while ((c = getchar()) != EOF && c != '\n') {
if (len + 1 < size)
dest[len++] = c;
}
if (size > 0)
dest[len] = '\0';
printf("\n");
if (c == EOF && len == 0)
return -1;
else
return len;
}
int main() {
char STR[100], PAT[100], REP[100], ANS[100];
int i, m, k, flag;
if (getstr("Main String: ", STR, sizeof STR) < 0)
return 1;
if (getstr("Pattern String: ", PAT, sizeof PAT) < 0)
return 1;
if (getstr("Replace String: ", REP, sizeof REP) < 0)
return 1;
i = m = flag = 0;
while (STR[i] != '\0') {
if (STR[i] == PAT[0]) { // initial match
// compare the rest of the pattern
for (k = 1; PAT[k] != '\0' && PAT[k] == STR[i + k]; k++)
continue;
if (PAT[k] == '\0') { // complete match
flag = 1;
// copy the replacement string
for (k = 0; REP[k] != '\0'; k++) {
if (m + 1 < sizeof ANS)
ANS[m++] = REP[k];
}
i += k; // skip the matching characters
continue;
}
}
// otherwise copy a single character
if (m + 1 < sizeof ANS)
ANS[m++] = STR[i];
i++;
}
ANS[m] = '\0';
if (flag == 0) {
printf("Pattern not found!\n");
} else {
printf("Resultant String: %s\n", ANS);
}
return 0;
}

Splitting a string to an array of strings

I'm trying to split a sentence the user inputs to an array of words so I can later manipulate the words separately as strings.
The code is compiling but prints only garbage after the user input.
I tried debugging but don't see the problem. Can someone help me fix it?
#include <stdio.h>
#include <string.h>
int main() {
char str[1000];
int i = 0;
char rev[1000][1000];
int r = 0;
puts("Enter text:");
gets(str);
int k, length = 0;
printf_s("So the words are:\n");
while (str[i] != '\0') {
if (str[i] == ' ') {
k = i - length;
do {
rev[r][k] = (str[k]);
k++;
} while (str[k] != ' ');
printf(" ");
length = (-1);
r++;
} else
if (str[i + 1] == '\0') {
k = i - length;
do {
rev[r][k] = (str[k]);
k++;
} while (str[k] != '\0');
length = 0;
r++;
}
length++;
i++;
}
for (int r = 0; r < 1000; r++)
printf("%s ", rev[r]);
return 0;
}
fix like this
#include <stdio.h>
int main(void) {
char str[1000];
char rev[1000][1000];
puts("Enter text:");
fgets(str, sizeof str, stdin);//Use fgets instead of gets. It has already been abolished.
int r = 0;
int k = 0;
for(int i = 0; str[i] != '\0'; ++i){
if (str[i] == ' ' || str[i] == '\n'){//is delimiter
if(k != 0){
rev[r++][k] = '\0';//add null-terminator and increment rows
k = 0;//reset store position
}
} else {
rev[r][k++] = str[i];
}
}
if(k != 0)//Lastly there was no delimiter
rev[r++][k] = '\0';
puts("So the words are:");
for (int i = 0; i < r; i++){
printf("%s", rev[i]);
if(i < r - 2)
printf(", ");
else if(i == r - 2)
printf(" and ");
}
return 0;
}
Replace you declaration
char rev[1000][1000];
with
char * rev[1000]; // We will need pointers only
int i = 0; // Index to previous array
and all your code after
puts( "Enter text:" );
with this:
fgets( str, 998, stdin ); // Safe way; don't use gets(str)
const char delim[] = ",; "; // Possible delimiters - comma, semicolon, space
char *word;
/* Get the first word */
word = strtok( str, delim );
rev[i++] = word;
/* Get the next words */
while( word != NULL )
{
word = strtok( NULL, delim );
rev[i++] = word;
}
/* Testing */
for (int r = 0; r < i - 1; r++)
printf( "%s\n", rev[r] );
return 0
}
As you can see, all dirty work is done with the strtok() function ("string to tokens") which walks through other and other words ("tokens"), recognizing them as delimited by one or more characters from the string delim.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int count_spaces(char *str)
{
if (str == NULL || strlen(str) <= 0)
return (0);
int i = 0, count = 0;
while (str[i])
{
if (str[i] == ' ')
count++;
i++;
}
return (count);
}
int count_char_from_pos(char *str, int pos)
{
if (str == NULL || strlen(str) <= 0)
return 0;
int i = pos, count = 0;
while (str[i] && str[i] != ' ')
{
count++;
i++;
}
return count;
}
char **get_words(char *str)
{
if (str == NULL || strlen(str) <= 0)
{
printf("Bad string inputed");
return NULL;
}
int i = 0, j = 0, k = 0;
char **dest;
if ((dest = malloc(sizeof(char*) * (count_spaces(str) + 1))) == NULL
|| (dest[0] = malloc(sizeof(char) * (count_char_from_pos(str, 0) + 1))) == NULL)
{
printf("Malloc failed\n");
return NULL;
}
while (str[i])
{
if (str[i] == ' ') {
dest[j++][k] = '\0';
if ((dest[j] = malloc(sizeof(char) * (count_char_from_pos(str, i) + 1))) == NULL)
{
printf("Malloc failed\n");
return NULL;
}
k = 0;
}
else {
dest[j][k++] = str[i];
}
i++;
}
dest[j][k] = 0;
dest[j + 1] = NULL;
return dest;
}
int main(void) {
char *line = NULL;
size_t n = 0;
getline(&line, &n, stdin);
printf("%s\n", line);
line[strlen(line) - 1] = 0;
printf("%s\n", line);
char **tab = get_words(line);
int i = 0;
while (tab[i])
{
printf("%s\n", tab[i++]);
}
}
here is a long but fully working example
get the user input
then send it to get_words function. It will get the number of words, the number of characters for each words, allocate everything in memory and writes chars then return it. You get a char ** and prints it just tested it it works
If you wish to split a string into an array of strings, you should consider the strtok function from #include <string.h>. The strtok function will the split the string on the given delimiter(s). For your case, it would the " ".
Using the strtok example from Tutorials Point:
#include <string.h>
#include <stdio.h>
int main(){
char str[80] = "This is - www.tutorialspoint.com - website";//The string you wish to split
const char s[] = "-";//The thing you want it to split from. But there is no need to this.
char *token;//Storing the string
/* get the first token */
token = strtok(str, s);//Split str one time using the delimiter s
/* walk through other tokens */
while( token != NULL )
{
printf( " %s\n", token );//Print the string
token = strtok(NULL, s);//Split the string again using the delimiter
}
return(0);
}

how to split words in a string in c?

I need to build a program that receives up to 30 chars from the user, and then to play with it.
For example, I need to reverses the sentence and then print it, or to rotate it.
I have been trying to copy the words of the sentence one by one to a matrix of [30][31], but it does not working... any ideas?
I cannot use pointers...
thanks for the help :)
#include <stdio.h>
#include <string.h>
void main(){
int i=0,
j=0,
wrongData=0,
charCounter=0,
word=0,
letter=0;
char st[100],
arr[100]={0},
mat[30][31]={0};
printf("Please, enter your sentence >");
gets(st);
while(i<strlen(st)){
if('A'<=st[i] && st[i]<='Z'){
charCounter++;
arr[j] = st[i];
i++;
j++;
} else if(st[i]==' '){
arr[j] = ' ';
i++;
j++;
while(st[i] == ' '){
i++;
}
} else if(st[i]=='\0'){
arr[j] = '\0';
break;
} else {
puts("ERROR: Incorrect data, try again.");
wrongData=1;
break;
}
if(wrongData==0){
if(charCounter>30){
puts("ERROR: Incorrect data, try again.");
}
}
}
puts(st);
puts(arr);
if(arr[j]==' '){
word++;
}
while(arr[j]!=' ' && letter<32){
strcpy(mat[word],arr);
}
if(arr[j]=='\0'){
mat[word][letter]=arr[j];
}
puts(mat[word]);
}
Taking into account your comment
the problem is that i need to reverse the words not the letters... for
example: if the string is cats hates dogs, i need to get at the end
dogs hates cats
then I think you mean something as the following
#include <stdio.h>
#include <ctype.h>
#include <string.h>
char * reverse_words( char s[] )
{
for ( char *p = s, *q = s; *p; p = q )
{
while ( isspace( ( unsigned char )*p ) ) ++p;
q = p;
while ( *q && !isspace( ( unsigned char )*q ) ) ++q;
for ( size_t i = 0; i < ( q - p ) / 2; i++ )
{
char c = p[i];
p[i] = q[-i-1];
q[-i-1] = c;
}
}
for ( size_t i = 0, n = strlen( s ); i < n / 2; i++ )
{
char c = s[i];
s[i] = s[n-i-1];
s[n-i-1] = c;
}
return s;
}
int main( void )
{
char s[] = "cats hates dogs";
puts( s );
puts( reverse_words( s ) );
return 0;
}
The program output is
cats hates dogs
dogs hates cats
Here is another approach. The idea is to go through the string and record the index where each word starts and ends. Then the words can be printed in reverse order afterwards. (btw - it will also be easy to rotate the words).
#include<stdio.h>
#include <string.h>
int main() {
char st[100] = "here we go again";
int start[30] = { 0 };
int end[30] = { 0 };
int count = 0;
int len = strlen(st);
int i, j;
// Find start and end index of each word
start[0] = 0;
for(i = 0; i < len; ++i)
{
if (st[i] == ' ')
{
end[count] = i;
++count;
start[count] = i + 1;
}
}
end[count] = len;
// Print the words in reverse order
for(i=count; i >= 0; --i)
{
for (j = start[i]; j < end[i]; ++j)
{
printf("%c", st[j]);
}
printf(" ");
}
printf("\n");
return 0;
}
output:
again go we here
fix your approach like this:
#include <stdio.h>
//#include <string.h>
#define MAX_LEN 30
int main(void){
int i, j, n, word;
char st[100], arr[100], mat[MAX_LEN / 2][MAX_LEN + 1];
printf("Please, enter your sentence (up to %d chars and A-Z or space)\n>", MAX_LEN);fflush(stdout);
scanf("%99[^\n]%*c", st);
//validate and reduce of spaces
for(j = i = 0; st[i]; ++i){
if(i > MAX_LEN){
fputs("ERROR: Incorrect data, try again.\n", stderr);
return 1;
}
if('A'<=st[i] && st[i]<='Z'){
arr[j++] = st[i];
} else if(st[i]==' '){
arr[j++] = ' ';
while(st[++i] == ' ')//Skip a continuous space
;
--i;//one back for next loop
} else {
fputs("ERROR: Incorrect data, try again.\n", stderr);
return 1;
}
}
arr[j] = '\0';//st[i]=='\0' never become true in loop
#if DEBUG
puts(st);
puts(arr);
#endif
//split to word
for(word = j = i = 0; arr[i];){
while(arr[i] == ' ')
++i;//skip space
while(arr[i] != ' ' && arr[i] != '\0')
mat[word][j++] = arr[i++];
mat[word++][j] = '\0';
j = 0;
}
#if DEBUG
for(i = 0; i < word; ++i)
puts(mat[i]);
#endif
puts("reverse word");
for(i = 0; i < word; ++i){
if(i)
putchar(' ');
printf("%s", mat[word-1-i]);
}
puts("\nrotate word");
printf("Please, enter number of rotate\n>");fflush(stdout);
scanf("%d", &n);
for(i = 0; i < word; ++i){
if(i)
putchar(' ');
printf("%s", mat[(i+n)%word]);//rotate left
}
}

C --- Remove all extra spaces in string excluding chars between specified characters

I have this string: print "Foo cakes are yum"
I need to somehow strip all extra whitespace but leave text between quotes alone. This is what i have so far:
char* clean_strip(char* string)
{
int d = 0, c = 0;
char* newstr;
while(string[c] != '\0'){
if(string[c] == ' '){
int temp = c + 1;
if(string[temp] != '\0'){
while(string[temp] == ' ' && string[temp] != '\0'){
if(string[temp] == ' '){
c++;
}
temp++;
}
}
}
newstr[d] = string[c];
c++;
d++;
}
return newstr;
}
This returns this string: print "Foo cakes are yum"
I need to be able to skip text between thw quotes so i get this: print "Foo cakes are yum".
Here is the same question but for php, i need a c answer: Remove spaces in string, excluding these in specified between specified characters
Please help.
Try this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* clean_strip(char* string)
{
int d = 0, c = 0;
char* newstr = malloc(strlen(string)+1);
int quoted = 0;
while(string[c] != '\0'){
if (string[c] == '"') quoted = !quoted;
if(!quoted && string[c] == ' '){
int temp = c + 1;
if(string[temp] != '\0'){
while(string[temp] == ' ' && string[temp] != '\0'){
if(string[temp] == ' '){
c++;
}
temp++;
}
}
}
newstr[d] = string[c];
c++;
d++;
}
newstr[d] = 0;
return newstr;
}
int main(int argc, char *argv[])
{
char *input = "print \"Foo cakes are yum\"";
char *output = clean_strip(input);
printf(output);
free(output);
return 0;
}
This will produce the output:
print "Foo cakes are yum"
It works by looking for the " character. If it's found it toggles the variable quoted. If quoted is true, then the whitespace removal is skipped.
Also, your original function never allocates memory for newstr. I added the newstr = malloc(...) part. It is important to allocate memory for strings before writing to them.
I simplified your logic a little.
int main(void)
{
char string[] = "print \"Foo cakes are yum\"";
int i = 0, j = 1, quoted=0;
if (string[0] == '"')
{
quoted=1;
}
for(i=1; i< strlen(string); i++)
{
if (string[i] == '"')
{
quoted = 1-quoted;
}
string[j] = string[i];
if (string[j-1]==' ' && string[j] ==' ' && !quoted)
{
;
}
else
{
j++;
}
}
string[j]='\0';
printf("%s\n",string);
return 0;
}

Resources