I am trying to parse a csv file with C where the separator is | using strtok. The problem is that some fields are empty and thus two separators are placed next to each other. It seems that strtok is just skipping all empty fields and just outputting the next non-empty field.
The thing is that I need to know at which position the token that is being read belongs to.
Here is a very small example to illustrate.
FILE
node|171933|||traffic_signals|||||40.4200658|-3.7016652
This line for instance has 10 fields, but only field 1,2,9 and 10 have some value in it.
CODE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void main()
{
FILE *fp;
char lineBuf[128];
char *token;
int i=0;
if((fp = fopen("test.txt", "r"))==NULL){
fprintf (stderr, "\nError when opening file\n");
return ;
}
fgets (lineBuf, sizeof(lineBuf), fp);
token=strtok(lineBuf, "|\n");
while(token!=NULL){
printf("Element %d: %s\n",i,token); i++;
token=strtok(NULL, "|\n");
}
}
OUTPUT
Element 0: node
Element 1: 171933
Element 2: traffic_signals
Element 3: 40.4200658
Element 4: -3.7016652
EXPECTED OUTPUT
Element 0: node
Element 1: 171933
Element 4: traffic_signals
Element 9: 40.4200658
Element 10: -3.7016652
Is there any other way to parse a line like this as expected? The number of elements in a line is not defined previously.
I already know that the behaviour shown by strtok is the usual behaviour I am just asking for another way of doing it to obtain the desired results, not for the explanation of why this happens.
I am not sure what platform you are on, but strsep() is the recommended replacement for what you are trying to do.
man strsep
while (fgets(buf, BUFSIZE, fp) != NULL) {
char *line = buf;
char *field;
int index = 0;
while ((field = strsep(&line, "|")) != NULL) {
/* note the trailing field will contain newline. */
printf("element %d = %s\n", index, field);
index++;
}
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//non skip version strtok
char *my_strtok_r(char *str, const char *delims, char **store){
char *p, *wk;
if(str != NULL){
*store = str;
}
if(*store == NULL) return NULL;
//*store += strspn(*store, delims);//skip delimiter
if(**store == '\0') return NULL;
p=strpbrk(wk=*store, delims);
if(p != NULL){
*p='\0';
*store = p + 1;
} else {
*store = NULL;
}
return wk;
}
char *my_strtok(char *str, const char *delims){
static char *p;
return my_strtok_r(str, delims, &p);
}
int main(void){
char lineBuf[128] = "node|171933|||traffic_signals|||||40.4200658|-3.7016652\n";
char *token;
int i=0;
token=my_strtok(lineBuf, "|\n");
while(token!=NULL){
if(*token)//token != "";
printf("Element %d: %s\n",i,token);
i++;
token=my_strtok(NULL, "|\n");
}
return 0;
}
Related
I am trying to read a CSV file of the following format:
5,455,78,5
12245,4,78
1,455,4557,1,8,9
I have managed to open the file but I have no idea how to interpret the data. All the data is written in the first column, but I do not know how many rows there are or how many entries there is in each row.
This is my code for opening the file.
printf("File chosen is: %s",file);
int p=0;
FILE *myFile = NULL;
myFile = fopen(file,"r");
if (myFile == NULL)
{
exit(1);
}
if (myFile != NULL)
{
printf("\n\nFile read succesfully");
}
This should parse your csv. After opening you file, read each line using fgets. Loop through until fgets returns NULL which indicates no line could be read and you reached the end of your file. Use strtok to parse your line from fgets using the comma as your delimiter.
#include <stdio.h> // file handling functions
#include <stdlib.h> // atoi
#include <string.h> // strtok
...
char buffer[80];
while (fgets(buffer, 80, myFile)) {
// If you only need the first column of each row
char *token = strtok(buffer, ",");
if (token) {
int n = atoi(token);
printf("%d\n", n);
}
// If you need all the values in a row
char *token = strtok(buffer, ",");
while (token) {
// Just printing each integer here but handle as needed
int n = atoi(token);
printf("%d\n", n);
token = strtok(NULL, ",");
}
}
...
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char* getfield(char* line, int num)
{
const char* tok;
for (tok = strtok(line, ",");
tok && *tok;
tok = strtok(NULL, ",\n"))
{
if (!--num)
return tok;
}
return NULL;
}
int main()
{
FILE *stream = fopen("yourfile.csv", "r");
int i = 0;
int j = 0;
printf("Choose a line to be given its elements: ");
scanf("%d", &j);
char line[1024];
while (fgets(line, 1024, stream))
{
char* tmp = _strdup(line);
i++;
printf("Element %d would be %s\n", i, getfield(tmp, j));
free(tmp);
}
}
Thank you for posting some code, but you don't mention what you wish to do with your data once you read it in.
I'll give you some pointers:
Use an array of known size to read your data into from the file and buffer it for processing. Run this in a loop. e.g.
char buffer[1000];
while (fgets(buffer, sizeof (buffer), myFile))
{
char *data = strtok(buffer, ",");
printf("Data %s\n", data);
/* Further processing of data */
data = strtok(NULL, ",");
}
fclose(myFile);
Process that buffer using strtok to separate out your strings. The token is the data delimiter which should be ',' but I'm not clear on whether you also have a newline character in there, but it needs to be consistent.
Handle your strings returned above.
I'm doing a project for school and I need to read from an .INI file to start my vars for the game. Problem is, I cannot seem to understand how strtok works and I'm really confused at this point.
I know I'm returning an empty struct because I have no idea how to put the specific values into the vars!
Here's my read_from_config.h
#ifndef READ_FROM_CONFIG_H
#define READ_FROM_CONFIG_H
#define MAXSTR 500
typedef struct {
unsigned int xdim;
unsigned int ydim;
unsigned int nzombies;
unsigned int nhumans;
unsigned int nzplayers;
unsigned int nhplayers;
unsigned int turns;
} CONFIG;
CONFIG read_config(char *argv[]);
#endif
And here is my read_from_config.c
#include "read_from_config.h"
#include "example.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
CONFIG read_config(char *argv[]) {
char str[MAXSTR];
FILE *fp = NULL;
char *filename = argv[1];
char *token;
fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Não foi possível abrir ficheiro de configuração.");
fprintf(stderr, "\nModo de uso: ./program_name config.ini");
}
while (fgets(str, MAXSTR, fp) != NULL) {
for (int i = 0; i != '\0'; i++) {
char *equal = strpbrk (str, "=");
if (equal) {
token = strtok(str, "=");
}
}
}
printf("%d", token[0]);
CONFIG config;
return config;
}
CONFIG.INI
; Dimension of the grid
xdim=20
ydim=20
; Inicial number of zombies and humans
nzombies=20
nhumans=20
; Number of zombies and humans controlled by players
nzplayers=0
nhplayers=1
; Number of maximum turns
maxturns=1000
The function strtok take a string only the first time it gets called. All
subsequent calls must be passed with NULL
man strtok
#include <string.h>
char *strtok(char *str, const char *delim);
DESCRIPTION
The strtok() function breaks a string into a sequence of zero or more nonempty tokens.
On the first call to strtok(), the string to be parsed should be specified in str.
In each subsequent call that should parse the same string, str must be NULL.
Example:
char line[] = "a,b,c,d,e,f\n"; // to simulate an fgets line
char *token = strtok(line, ",\n"); // you can add \n to the separator
// to get rid of the \n at the end
puts(token); // prints a
while(token = strtok(NULL, ",\n"))
puts(token); // prints b then c etc..
Keep in mind that strtok modifies the source, this is going to fail:
strtok("a,b,c", ",");
because string literals are not modifiable. In that case you have to make a copy
to a char[] or a dynamic allocated char*.
If you need to have the source intact after the parsing, then you definitively
need to make a copy.
In your code you do:
printf("%d", token[0]);
That's not incorrect but perhaps not what you want to do. This line doesn't
print the first character, it prints the ascii value of the first character.
This
printf("%c", token[0]);
will print the first character.
Also you are doing
CONFIG read_config(char *argv[]) {
...
CONFIG config;
return config;
}
You are returning an uninitialized CONFIG object, you are ignoring the parsing
and nothing is set in your config object.
Your parsing is also a little bit strange.
for (int i = 0; i != '\0'; i++)
The loop exits immediately because 0 == '\0'! I don't understand what you are
trying to do with it.
I would first create a helper function to populate the values of the config, see
set_config_val. Then you can parse it like this:
CONFIG read_config(char *argv[]) {
...
const char *delim = "=\n";
CONFIG config;
while (fgets(str, MAXSTR, fp) != NULL) {
if(strchr(str, '='))
{
char varname[100];
int value;
token = strtok(line, delim);
strcpy(varname, token);
token = strtok(NULL, delim);
value = atoi(token);
set_config_val(&config, varname, value);
} else {
fprintf(stderr, "Skipping line, no = found");
}
}
fclose(fp);
return config;
}
void set_config_val(CONFIG *config, const char *key, int val)
{
if(config == NULL)
return;
if(strcmp(key, "xdim") == 0)
config->xdim = val;
else if(strcmp(key, "ydim") == 0)
config->ydim = val;
...
}
I know how to loop through a file line by line by reading into a FILE* using fopen, fgets etc
but how can i look through a char array line by line using plain C?
i have googled a lot an can only find stuff that reads from a file.
#include <stdio.h>
char *sgets(char *s, int n, const char **strp){
if(**strp == '\0')return NULL;
int i;
for(i=0;i<n-1;++i, ++(*strp)){
s[i] = **strp;
if(**strp == '\0')
break;
if(**strp == '\n'){
s[i+1]='\0';
++(*strp);
break;
}
}
if(i==n-1)
s[i] = '\0';
return s;
}
int main(){
const char *data = "abc\nefg\nhhh\nij";
char buff[16];
const char **p = &data;
while(NULL!=sgets(buff, sizeof(buff), p))
printf("%s", buff);
return 0;
}
Reading a character array line by line : What does a line mean ? '\n' Perhaps.
so, iterate through the array.
int main()
{ char array[10]="ab\nbc\ncd\n";
int lines =0;int i=0;
while(array[i]!='\0')
{ if(array[i]!='\n')
printf("%c",array[i++]);
else { lines++;i++; printf("\n"); }
} return 0;
}
In case if you want to keep your separator flexible (e.g. you got "\r\n") and stick to libraries, strtok is handy:
#include <cstring>
int main() {
const char separator[3] = "\r\n";
char text[13] = "ab\r\ncd\r\nef\r\n";
char *line = NULL;
line = strtok(text, separator);
while (line != NULL) {
printf("%s\n", line); // call your function to process line
line = strtok(NULL, separator); // NULL means continue from where the previous successful call to strtok ended
}
system("pause"); // just prevent console from closing
}
How do i get the position of delimited separated string?
My text file looks like
at:x:25:25:Batch jobs daemon:/var/spool/atjobs:/bin/bash
avahi:x:109:111:User for Avahi:/var/run/avahi-daemon:/bin/false
beagleindex:x:110:112:User for Beagle indexing:/var/cache/beagle:/bin/bash
My C code looks like
#include<stdio.h>
int main(int argc, char *argv[])
{
char *str, *saveptr;
char ch[100];
char *sp;
FILE *f;
int j;
char searchString[20];
char *pos;
f = fopen("passwd", "r");
if (f == NULL)
{
printf("Error while opening the file");
}
while (fgets(ch, sizeof ch, f)!= NULL)
{
/*printf("%s\n", ch); */
for (j = 1, str = ch; ; j++, str= NULL)
{
char *token = strtok_r(str, ": ", &saveptr);
if (token == NULL)
break;
//printf("%s---\n---", token);
printf("%s",token);
}
}
fclose(f);
well, using strtok(str, ": ", will split your string on spaces as well as colons, which is probably not what you want. In addition, strtok treats multiple consecutive delimiter characters as a single delimiter (so it will never return an empty string between two colons), which is not what you want for parsing passwd.
Instead, you probably just want to use strchr:
while (fgets(ch, sizeof ch, f)!= NULL) {
char *token, *end;
for (j = 1, token = ch; token; j++, token = end) {
if ((end = strchr(token, ':'))) *end++ = 0;
...do something with token and j
I do not think you have to use strtok() just to get the position of a token separated by delimiters, rather simply walk through each line, and do a char by char comparison for the delimiter... (hope this will help you)
I prepared an input file called GetDelimPosition.txt:
at:x:25:25:Batch jobs daemon:/var/spool/atjobs:/bin/bash
avahi:x:109:111:User for Avahi:/var/run/avahi-daemon:/bin/false
jamil:x:25:25:Batch jobs daemon:/var/spool/atjobs:/bin/bash
javier:x:109:111:User for Avahi:/var/run/avahi-daemon:/bin/false
jiame:x:25:25:Batch jobs daemon:/var/spool/atjobs:/bin/bash
jose:x:109:111:User for Avahi:/var/run/avahi-daemon:/bin/false
And used the following code: (of course you will modify as needed)
#include <ansi_c.h>
//edit this line as needed:
#define FILE_LOC "C:\\dev\\play\\GetDelimPosition.txt"
int main(void)
{
FILE * fp;
char ch[260];
int line=-1;
int position[80][100]={0}; //lines x DelimPosition
memset(position, 0, 80*100*sizeof(int));
int i=-1,j=0, k=0;
int len;
fp = fopen(FILE_LOC, "r");
while (fgets(ch, sizeof ch, fp)!= NULL)
{
line++; //increment line
len = strlen(ch);
for(j=0;j<len;j++)
{
if(ch[j] == ':')
{
position[line][k] = j+1;//position of token (1 after delim)
k++; //increment position index for next token
}
}
k=0; //getting new line, zero position index
}
fclose(fp);
return 0;
}
To get the following results: (rows are lines in file, columns are positions of each token. First token is assumed at position 0, and not reported)
I'm trying to store different values that are taken from a file line by line. The lines in the text file read as something shown below
100000,player1,long title name
300000,someotherplayer,another long title name
45512845,thisplayer,one more long title name
I want to store each value that is comma separated into three different arrays, (int)number, (str)player_name, (str)title_name.
I have some code below, but it doesn't compile.
ptr_file=fopen("text.txt", "r");
char buffer[1000];
int line;
line = 0;
while(fgets(buffer, sizeof(buffer), ptr_file) != NULL){
char number[line]=strtok(buffer, ",");
char player_name[line]=strtok(NULL, ",");
char title_name[line]=strtrok(NULL, ",");
}
Can someone give me some advice on this?
So, there are a couple of issues with your code,
You open the file in mode "o" which I'm not really sure what it is, I suspect you want "r"
strtok returns a char * which you cannot assign to a char[].
One the second run through the loop you will overwrite the data in buffer.
I would do something like this:
struct player {
int number;
char player_name[64];
char title_name[256];
};
int main(void) {
FILE *ptrfile=fopen("text.txt", "r");
char buffer[1000];
int line;
struct player players[16];
line = 0;
if(ptrfile==NULL) return 0;
while(fgets(buffer, sizeof(buffer), ptrfile) != NULL){
if(strcmp(buffer, "") == 0) return 0;
char *number=strtok(buffer, ",");
char *player_name=strtok(NULL, ",");
char *title_name=strtok(NULL, ",");
players[line].number=atoi(number);
strcpy(players[line].player_name, player_name);
strcpy(players[line].title_name, title_name);;
line++;
}
fclose(ptrfile);
return 0
}
function strtok return a pointer, so it should be
char* p = strtok(...)
Check the reference here
This is something I did that was similar to what you seem to be doing. The problem you will find is that you want to make each value into a char* but you have to malloc each one then you can connect this char* into the array. It would also just be easier to do that with the numbers to then turn them into int later on.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
char *msg[100];
char temp[100];
int length, i;
int num = 0;
while((scanf("%s", &temp[0]) != EOF))
{
length = strlen(temp);
msg[num] = malloc((length+1 )* sizeof(char));
strcpy(msg[num], temp);
num++;
}
printf("There are %d words in the this input.\n", num);
for(i = 0; i < num; i++)
{
printf("%s\n", msg[i]);
}
return 0;
}
The thing with the malloc is that you will have to have each one unique because the words are all different sizes. I know this example isn't exactly what your doing but it will get you in the right direction.