Analyzing Strings with sscanf - c

I need to analyze a string previous reader with fgets,
then I have a row from:
name age steps\n
mario 10 1 2 3 4\n
joe 15 3 5\n
max 20 9 3 2 4 5\n
there are a variable number of steps for each column,
then I can read name and age with
sscanf(mystring, "%s %d", name, &age);
after this I have a for cycle for read all steps
int step[20];
int index=0;
while(sscanf(mystring,"%d", &step[index++])>0);
but this cycle never ends populating all array data with the age column.

The reason this never ends is because you are constantly providing the same string to scan.
sscanf provides the %n switch which stores the amount of characters read before it is reached inside a, which allows you to move forward in your input string by that amount of characters before rescanning.
This'll work:
int step[20];
int index=0;
int readLen;
while(sscanf(mystring,"%d%n", &step[index++], &readLen)>0) {
mystring += readLen;
}

A working solution is given in the answer from sokkyoku.
Another possibility to read variable length lines is to use strtok like in the following code snippet:
int getlines (FILE *fin)
{
int nlines = 0;
int count = 0;
char line[BUFFSIZE]={0};
char *p;
if(NULL == fgets(buff, BUFFSIZE, fin))
return -1;
while(fgets(line, BUFFSIZE, fin) != NULL) {
//Remove the '\n' or '\r' character
line[strcspn(line, "\r\n")] = 0;
count = 0;
printf("line[%d] = %s\n", nlines, line);
for(p = line; (p = strtok(p, " \t")) != NULL; p = NULL) {
printf("%s ", p);
++count;
}
printf("\n\n");
++nlines;
}
return nlines;
}
Explanation of the above function getlines:
Each line in the file fin is read using fgets and stored in the variable line.
Then each substring in line (separated by a white space or \t character) is extracted and the pointer to that substring stored in p, by means of the function strtok in the for loop (see for example this post for further example on strtok).
The function then just print p but you can do everything with the substring here.
I also count (++count) the number of items found in each line. At the end, the function getline count and returns the number of lines read.

Related

How to read specific words from a file?

I have a file that contains words and their synonyms each on a separate line.
I am writing this code that should read the file line by line then display it starting from the second word which is the synonym.
I used the variable count in the first loop in order to be able to count the number of synonyms of each word because the number of synonyms differs from one to another. Moreover I used the condition synonyms[i]==',' because each synonym is separate by a comma.
The purpose of me writing such code is to put them in a binary search tree in order to have a full dictionary.
The code doesn't contain any error yet it is not working.
I have tried to each the loop but that didn't work too.
Sample input from the file:
abruptly - dead, short, suddenly
acquittance - release
adder - common, vipera
Sample expected output:
dead short suddenly
acquittance realse
common vipera
Here is the code:
void LoadFile(FILE *fp){
int count;
int i;
char synonyms[50];
char word[50];
while(fgets(synonyms,50,fp)!=NULL){
for (i=0;i<strlen(synonyms);i++)
if (synonyms[i]==',' || synonyms[i]=='\n')
count++;
}
while(fscanf(fp,"%s",word)==1){
for(i=1;i<strlen(synonyms);i++){
( fscanf(fp,"%s",synonyms)==1);
printf("%s",synonyms);
}
}
}
int main(){
char fn[]="C:/Users/CLICK ONCE/Desktop/Semester 4/i2206/Project/Synonyms.txt";
FILE *fp;
fp=fopen(fn,"rt");
if (fp==NULL){
printf("Cannot open this file");
}
else{
LoadFile(fp);
}
return 0;
}
Here is my solution. I have split the work into functions for readability. The actual parsing is done in parsefunction. That function thakes into account hyphenated compound words such as seventy-two. The word and his synonyms must be separated by an hyphen preceded by at least one space.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// Trim leading and trailing space characters.
// Warning: string is modified
char* trim(char* s) {
char* p = s;
int l = strlen(p);
while (isspace(p[l - 1])) p[--l] = 0;
while (*p && isspace(*p)) ++p, --l;
memmove(s, p, l + 1);
return s;
}
// Warning: string is modified
int parse(char* line)
{
char* token;
char* p;
char* word;
if (line == NULL) {
printf("Missing input line\n");
return 0;
}
// first find the word delimiter: an hyphen preceded by a space
p = line;
while (1) {
p = strchr(p, '-');
if (p == NULL) {
printf("Missing hypen\n");
return 0;
}
if ((p > line) && (p[-1] == ' ')) {
// We found an hyphen preceded by a space
*p = 0; // Replace by nul character (end of string)
break;
}
p++; // Skip hyphen inside hypheneted word
}
word = trim(line);
printf("%s ", word);
// Next find synonyms delimited by a coma
char delim[] = ", ";
token = strtok(p + 1, delim);
while (token != NULL) {
printf("%s ", token);
token = strtok(NULL, delim);
}
printf("\n");
return 1;
}
int LoadFile(FILE* fp)
{
if (fp == NULL) {
printf("File not open\n");
return 0;
}
int ret = 1;
char str[1024]; // Longest allowed line
while (fgets(str, sizeof(str), fp) != NULL) {
str[strcspn(str, "\r\n")] = 0; // Remove ending \n
ret &= parse(str);
}
return ret;
}
int main(int argc, char *argv[])
{
FILE* fp;
char* fn = "Synonyms.txt";
fp = fopen(fn, "rt");
if (fp == NULL) {
perror(fn);
return 1;
}
int ret = LoadFile(fp);
fclose(fp);
return ret;
}
I think the biggest conceptual misunderstanding demonstrated in the code is a failure to understand how fgets and fscanf work.
Consider the following lines of code:
while(fgets(synonyms,50,fp)!=NULL){
...
while(fscanf(fp,"%49s",word)==1){
for(i=1;i<strlen(synonyms);i++){
fscanf(fp,"%49s",synonyms);
printf("%s",synonyms);
}
}
}
The fgets reads one line of the input. (Unless there is an input line that is greater than 49 characters long (48 + a newline), in which case fgets will only read the first 49 characters. The code should check for that condition and handle it.) The next fscanf then reads a word from the next line of input. The first line is effectively being discarded! If the input is formatted as expected, the 2nd scanf will read a single - into synonyms. This makes strlen(synonyms) evaluate to 1, so the for loop terminates. The while scanf loop then reads another word, and since synonyms still contains a string of length 1, the for loop is never entered. while scanf then proceeds to read the rest of the file. The next call to fgets returns NULL (since the fscanf loop has read to the end of the file) so the while/fgets loop terminates after 1 iteration.
I believe the intention was for the scanfs inside the while/fgets to operate on the line read by fgets. To do that, all the fscanf calls should be replaced by sscanf.

Redirection input in C: fgets() end of line "\n" interfering with strcmp()

Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int individualAverage(int data[][20],int j)
{
int k,average=0;
for(k=0;k<10;k++)
{
average += data[k][j];
}
return average;
}
int main()
{
int var,indAvg=0;
int i=0,j,k;
char *experiments[20];
int data[10][20];
char str[100],str2[100];
char *ptr, *token;
int no_line=1;
while(fgets(str,100,stdin) != NULL && (strcmp(str,"*** END ***") && strcmp(str,"*** END ***\n")))
{
if(no_line % 2 == 0)
{
k=0;
token = strtok (str," ");
while (token != NULL)
{
sscanf (token, "%d", &var);
data[k++][i] = var;
token = strtok (NULL," ");
}
i++;
}
else
{
ptr = strdup(str);
experiments[i] = ptr;
}
no_line++;
}
fgets(str,100,stdin);
token = strtok(str," ");
while(token != NULL && (strcmp(token,"4") && strcmp(token,"4")))
{
sscanf (token, "%d", &var);
printf("DATA SET ANALYSIS\n1.\tShow all the data\n2.\tCalculate the average for an experiment\n3.\tCalculate the average across all experiments\n4.\tQuit\nSelection: %d\n\n",var);
switch(var)
{
case 1 :
for(j=0;j<i;j++)
{
printf("%s",experiments[j]);
for(k=0;k<10;k++)
{
printf("%d ",data[k][j]);
}
printf("\n");
}
printf("\n");
break;
case 2 :
printf("What experiment would you like to use?\n");
token = strtok (NULL," ");
sscanf (token, "%s", &str);
for(j=0;j<i;j++)
{
if(strcmp(experiments[j],str) == 0)
{
indAvg = individualAverage(data,j);
printf("Experiment: %s",experiments[j]);
printf("The individual average of the experiment is %d\n",indAvg);
break;
}
}
}
token = strtok(NULL," ");
}
}
OK, so I have a method that takes lines of redirection input. The lines come in pairs. First line is the name of an experiment, and the second line has the 10 values separated by spaces for that experiment. After these pairs, there is an ending line "*** END ***"
After this line, there is one last line holding the instructions of what to do with the data.
I'm currently having a problem where I've used fgets() to store the strings of the first pairs of lines into a variable which I declared as char *experiments[20];
Each of strings that this array is pointing to will have '\n' at the end of the string because of fgets()
Back to the last line of instructions. You have values 1-4. Right now I'm looking at instruction 2. It tells the average of an experiment. So after 2 on the last line, there must be the name of one of the experiments. I've used:
char str[100];
int var;
char *token;
token = strtok(str, " ");
sscanf (token, "%d", &var);
to get the first value on the line into var (pretend it's 2). So after that would be a string. Say it's Test 1, I'll use
token = strtok (NULL," ");
sscanf (token, "%s", &str);
to get the value into str, and then I'll compare it to experiments for all possible indexes.
HOWEVER, because fgets() gives '\n' at the end of the lines, all of the experiments strings will have '\n' at the end while str will just have the name of the experiment WITHOUT '\n' therefore they will never be equal even if '\n' is the only difference between the strings.
Any solutions?
Since you know that there may be a \n at the end of the string, you could check for it, and remove it if it's there:
size_t len = strlen(str);
if (len != 0 && str[len-1] == '\n') {
str[len-1] = '\0';
}
This would terminate the line at \n, so your strcmp would succeed. An alternative is to use strncmp, and pass the length of the target string. This runs the risk of false positives when there's a longer suffix that \n, though.
You could also read your data like this:
fscanf(f, "%99[^\n]", str);
You can make your own version of fgets that doesn't store the new-line character when it encounters one, and call it myfgets. Something like this would replicate fgets's behaviour, I think, produced with respect to the description given in MSDN:
char * myfgets( char * str, int n, FILE * stream ) {
if ( n <= 0 ) return NULL; // won't accept less than or equal to zero length
int currentPos = 0;
while ( n-- > 0 ) {
int currentChar = fgetc( stream );
if ( currentChar == EOF ) return NULL;
if ( currentChar == '\n' ) break;
// if these two lines were in reversed order,
// it would behave same as the original fgets
str[currentPos++] = currentChar;
}
return str;
}
But of course the other solution is simpler, hehe...

Searching and Reading a text file

this is my first time asking a question on here so I'll try to do my best. I'm not that great at C, I'm only in Intermediate C programming.
I'm trying to write a program that reads a file, which I got working. But I'm have search for a word then save the word after it into an array. What I have going right now is
for(x=0;x<=256;x++){
fscanf(file,"input %s",insouts[x][0]);
}
In the file there are lines that say "input A0;" and I want it to save "A0" to insouts[x][0]. 256 is just a number I picked because I don't know how many inputs it might have in the text file.
I have insouts declared as:
char * insouts[256][2];
Use fgets() & sscanf(). Seperate I/O from format scanning.
#define N (256)
char insouts[N][2+1]; // note: no * and 2nd dimension is 3
for(size_t x = 0; x < N; x++){
char buf[100];
if (fgets(buf, sizeof buf, stdin) == NULL) {
break; // I/O error or EOF
}
int n = 0;
// 2 this is the max length of characters for insouts[x]. A \0 is appended.
// [A-Za-z0-9] this is the set of legitimate characters for insouts
// %n record the offset of the scanning up to that point.
int result = sscanf(buf, "input %2[A-Za-z0-9]; %n", insouts[x], &n);
if ((result != 1) || (buf[n] != '\0')) {
; // format error
}
}
You want to pass the address of the x'th element of the array and not the value stored there. You can use the address-of operator & to do this.
I think
for(x = 0;x < 256; x++){
fscanf(file,"input %s", &insouts[x][0]);
// you could use insouts[x], which should be equivalent to &insouts[x][0]
}
would do the trick :)
Also, you are only allocating 2 bytes for every string. Keep in mind that strings need to be terminated by a null character, so you should change the array allocation to
char * insouts[256][3];
However, I'm pretty sure the %s will match A0; and not just A0, so you might need to account for this as well. You can use %c together with a width to read a given number of characters. However, you add to add the null byte yourself. This should work (not tested):
char* insouts[256][3];
for(x = 0; x < 256; x++) {
fscanf(file, "input %2c;", insouts[x]);
insouts[x][2] = '\0';
}
Rather than trying to use fscanf why don't you use "getdelim" with ';' as the delimiter.
According to the man page
" getdelim() works like getline(), except that a line delimiter other than newline can be specified as the delimiter argument. As with getline(), a delimiter character is not added if one was not present in the input before end of file was reached."
So you can do something like (untested and uncompiled code)
char *line = NULL;
size_t n, read;
int alloc = 100;
int lc = 0;
char ** buff = calloc(alloc, sizeof(char *)); // since you don't know the file size have 100 buffer and realloc if you need more
FILE *fp = fopen("FILE TO BE READ ", "r");
int deli = (int)';';
while ((read = getline(&line, &n, fp)) != -1) {
printf("%s", line); // This should have "input A0;"
// you can use either sscanf or strtok here and get A0 out
char *out = null ;
sscanf(line, "input %s;", &out);
if (lc > alloc) {
alloc = alloc + 50;
buff = (char **) realloc(buff, sizeof(char *) * alloc);
}
buff[lc++] = out
}
int i = 0 ;
for (i = 0 ; i < lc; i++)
printf ("%s\n", buff[i]);

Read Txt file Language C

Hi guys I have this file struct:
0
2 4
0: 1(ab) 5(b)
1: 2(b) 6(a)
2: 0(a) 2(b)
3: 2(a) 6(b)
4: 5(ab)
5: 2(a) 6(b)
6: 4(b) 6(ab)
Each line will feed a struct with its data (numbers + letters).
What's the best way to read the line and get the strings I want?
Example:
0
2 4
0,1,ab,5,b
1,2,b,5,a
...
The lines may vary in size because we can have 1, 2, 3, .... numbers.
I already did it :
//struct
#define MAX_ 20
struct otherstats{ //struct otherStats
int conectstat[MAX_];//conection with others stats
int transitions[MAX_];//Symbols betwen conection ASCI
}tableStats[MAX_];
struct sAutomate{
int stat_initial; //initial
int stats_finals[MAX_]; //final orfinals
struct otherstats tableStats[MAX_]; //otherStats 0 1 2 3 4 5 6
};
/* eXample that what i want ..using the example
sAutomate.stat_initial=0
sAutomate.stats_finals[0]=2
sAutomate.stats_finals[1]=4
Others Stats table
//0
sAutomate.tableStats[0].conectstat[0]=1;
sAutomate.tableStats[0].conectstat[1]=5;
sAutomate.tableStats[0].transitions[0]=ab;
sAutomate.tableStats[0].transitions[1]=b;
//1
sAutomate.tableStats[1].conectstat[0]=2;
sAutomate.tableStats[1].conectstat[1]=6;
sAutomate.tableStats[1].transitions[0]=b;
sAutomate.tableStats[1].transitions[1]=a;
///etc
*/
void scanfile(){ //function to read the file
struct sAutomate st; //initialize st struct
char filename[] = "txe.txt";
FILE *file = fopen ( filename, "r" );
char buf[81];
char parts[5][11];
fscanf(file,"%d", &st.stat_initial);//read first line
printf("initial state : %d \n", st.stat_initial);
fscanf(file,"%d",&st.stats_finals);
fscanf(file,"%d",&st.stats_finals);
while (fgets(buf, sizeof(buf), stdin) != NULL)
{
if (sscanf(buf, "%10[^:]: (%10[^(], %10[^)]), (%10[^(], %10[^)])",
parts[0], parts[1], parts[2], parts[3], parts[4]) == 5)
{
printf("parts: %s, %s, %s, %s, %s\n",
parts[0], parts[1], parts[2], parts[3], parts[4]);
}
else
{
printf("Invalid input: %s", buf);
}
}
//fclose
First problem I see is you're overwriting stats_finals:
fscanf(file,"%d",&st.stats_finals);
fscanf(file,"%d",&st.stats_finals);
What you wanted to do here was:
fscanf(file,"%d",&st.stats_finals[0]);
fscanf(file,"%d",&st.stats_finals[1]);
To save off both the "2" and the "4" from the text file.
Second major problem is you're reading from stdin:
while (fgets(buf, sizeof(buf), stdin) != NULL)
That doesn't read your text file, that reads input from the keyboard... So you wanted that to be:
while (fgets(buf, sizeof(buf), file) != NULL)
Third (minor) problem is that fscanf() will not read newlines, and fgets() will. This means when you go from reading your second stats_finals to the first read in the while loop, your first input will just be the left over newline character. That's not a big deal since you check for "invalid input", but it's worth noting.
Finally, your sscanf looks wrong to me:
sscanf(buf, "%10[^:]: (%10[^(], %10[^)]), (%10[^(], %10[^)])",
^ ^
That's a width of 10, Why are you checking for commas? You didn't
I don't think that's have any in your text file
what you wanted...
I think this is more what you were looking for:
sscanf(buf, "%[0-9]: %[0-9](%[^)]) %[0-9](%[^)])",
^
takes a digit (0 to 9)
EDIT
Missed your original point. If you don't know how long the strings will be that you're reading, you can't use sscanf(). It's that simple. :)
The scanf family assumes you know how many objects you'll be parsing and the format string takes in that many. There are other options however.
Read a single line with fgets as you're doing, but then you can tokenize it. Either with the C function strtok or by your own hand with a for loop.
One note however:
Since you don't know how long it is, this: char parts[5][11]; is not your best bet. This limits you to 2 entries... probably it would be better to do this dynamically (read the line then allocate the correct size to store your tokens in.)
If you really don't know how many numbers and letters the line will contain, why are you reading a fixed amount of numbers and letters?
You could read the whole line with fgets and then parse it with a tokenizer like strtok, something like this:
const char* const DELIMITERS = " ";
int i; // index for tableStats
char* token;
token = strtok(line, DELIMITERS);
// first integer
if (token == NULL || sscanf(token, "%d:", &i) < 1)
// error
/* it seems like you should have at least one element in your "list",
* otherwise this is not necessary
*/
token = strtok(NULL, DELIMITERS);
if (token == NULL || sscanf(token, "%d(%[^)])",
&(tableStats[i].connectstat[0]),
&(tableStats[i].transitions[0])) < 2)
// error
// read optional part
for (int j = 1; (token = strtok(NULL, DELIMITERS)) != NULL; ++j)
if (sscanf(token, "%d(%[^)])", &(tableStats[i].connectstat[j]),
&(tableStats[i].transitions[j])) < 3)
break;
Remember that strtok changes the string, make a copy of it if you still need it.
Obviusly the code is for the arbitrary long lines, reading the first two lines is trivial.

Parsing text in C

I have a file like this:
...
words 13
more words 21
even more words 4
...
(General format is a string of non-digits, then a space, then any number of digits and a newline)
and I'd like to parse every line, putting the words into one field of the structure, and the number into the other. Right now I am using an ugly hack of reading the line while the chars are not numbers, then reading the rest. I believe there's a clearer way.
Edit: You can use pNum-buf to get the length of the alphabetical part of the string, and use strncpy() to copy that into another buffer. Be sure to add a '\0' to the end of the destination buffer. I would insert this code before the pNum++.
int len = pNum-buf;
strncpy(newBuf, buf, len-1);
newBuf[len] = '\0';
You could read the entire line into a buffer and then use:
char *pNum;
if (pNum = strrchr(buf, ' ')) {
pNum++;
}
to get a pointer to the number field.
fscanf(file, "%s %d", word, &value);
This gets the values directly into a string and an integer, and copes with variations in whitespace and numerical formats, etc.
Edit
Ooops, I forgot that you had spaces between the words.
In that case, I'd do the following. (Note that it truncates the original text in 'line')
// Scan to find the last space in the line
char *p = line;
char *lastSpace = null;
while(*p != '\0')
{
if (*p == ' ')
lastSpace = p;
p++;
}
if (lastSpace == null)
return("parse error");
// Replace the last space in the line with a NUL
*lastSpace = '\0';
// Advance past the NUL to the first character of the number field
lastSpace++;
char *word = text;
int number = atoi(lastSpace);
You can solve this using stdlib functions, but the above is likely to be more efficient as you're only searching for the characters you are interested in.
Given the description, I think I'd use a variant of this (now tested) C99 code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
struct word_number
{
char word[128];
long number;
};
int read_word_number(FILE *fp, struct word_number *wnp)
{
char buffer[140];
if (fgets(buffer, sizeof(buffer), fp) == 0)
return EOF;
size_t len = strlen(buffer);
if (buffer[len-1] != '\n') // Error if line too long to fit
return EOF;
buffer[--len] = '\0';
char *num = &buffer[len-1];
while (num > buffer && !isspace((unsigned char)*num))
num--;
if (num == buffer) // No space in input data
return EOF;
char *end;
wnp->number = strtol(num+1, &end, 0);
if (*end != '\0') // Invalid number as last word on line
return EOF;
*num = '\0';
if (num - buffer >= sizeof(wnp->word)) // Non-number part too long
return EOF;
memcpy(wnp->word, buffer, num - buffer);
return(0);
}
int main(void)
{
struct word_number wn;
while (read_word_number(stdin, &wn) != EOF)
printf("Word <<%s>> Number %ld\n", wn.word, wn.number);
return(0);
}
You could improve the error reporting by returning different values for different problems.
You could make it work with dynamically allocated memory for the word portion of the lines.
You could make it work with longer lines than I allow.
You could scan backwards over digits instead of non-spaces - but this allows the user to write "abc 0x123" and the hex value is handled correctly.
You might prefer to ensure there are no digits in the word part; this code does not care.
You could try using strtok() to tokenize each line, and then check whether each token is a number or a word (a fairly trivial check once you have the token string - just look at the first character of the token).
Assuming that the number is immediately followed by '\n'.
you can read each line to chars buffer, use sscanf("%d") on the entire line to get the number, and then calculate the number of chars that this number takes at the end of the text string.
Depending on how complex your strings become you may want to use the PCRE library. At least that way you can compile a perl'ish regular expression to split your lines. It may be overkill though.
Given the description, here's what I'd do: read each line as a single string using fgets() (making sure the target buffer is large enough), then split the line using strtok(). To determine if each token is a word or a number, I'd use strtol() to attempt the conversion and check the error condition. Example:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/**
* Read the next line from the file, splitting the tokens into
* multiple strings and a single integer. Assumes input lines
* never exceed MAX_LINE_LENGTH and each individual string never
* exceeds MAX_STR_SIZE. Otherwise things get a little more
* interesting. Also assumes that the integer is the last
* thing on each line.
*/
int getNextLine(FILE *in, char (*strs)[MAX_STR_SIZE], int *numStrings, int *value)
{
char buffer[MAX_LINE_LENGTH];
int rval = 1;
if (fgets(buffer, buffer, sizeof buffer))
{
char *token = strtok(buffer, " ");
*numStrings = 0;
while (token)
{
char *chk;
*value = (int) strtol(token, &chk, 10);
if (*chk != 0 && *chk != '\n')
{
strcpy(strs[(*numStrings)++], token);
}
token = strtok(NULL, " ");
}
}
else
{
/**
* fgets() hit either EOF or error; either way return 0
*/
rval = 0;
}
return rval;
}
/**
* sample main
*/
int main(void)
{
FILE *input;
char strings[MAX_NUM_STRINGS][MAX_STRING_LENGTH];
int numStrings;
int value;
input = fopen("datafile.txt", "r");
if (input)
{
while (getNextLine(input, &strings, &numStrings, &value))
{
/**
* Do something with strings and value here
*/
}
fclose(input);
}
return 0;
}

Resources