Read comma-separated "quoted" strings from a file - c

I am new to C programming but have a bit of Java knowledge, so I want to write a program that reads strings stored in a file, possibly several names separated by comma, such as "boy","girl","car" etc. In Java I would use something like, string str[]=str1.split(" ");.
So I came up with several codes each time but none seems to work, here is my most recent code:
fscanf(fp,"%[^\n]",c);
But this essentially prints the whole line till a new line is found. I have also tried using
fscanf(fp,"%[^,]",c);
And if I use gets() it only gets the first string and ignores all others from the first comma.
This didn't give any reasonable output, it rather gave some minute(encoded) characters.
Please can anyone help me with how to pick out string values separated by comma and in quotes

You can use strtok() function (string.h) to do this task. store the file data in a string of a considerable size. and apply
str = strtok(full_file_string,",");
/* you can save this string in a 2 dimensional array of characters or print it */
while(NULL != str)
{
str=strtok(NULL,",");
/*print or save your next word here as you like */
}
for further reference see manpage of strtok.
Hope this might help you :)

fscanf doesn't work with regular expressions, but rather with placeholders. So you need to specify the placeholder for what you want to read, and then fscanf will get the next element that matches your pattern. To get what you want one would use something like:
char word[enough_space];
.
.
.
while(fscanf(fp, "\"%s\"", word) != EOF)
{
//Do something with yout word.
};
Here you will be trying to get a string between to quotes. Note how the placeholder indicates which part of the match should be saved. on successive calls fscanf will get to the next match and so on. When it consumes the whole file it will return EOF.

Below example will extract the substring. The format of your fille should be something like:
"boy","girl","car",
Notice that file string should end with ','
int read_file_with_string_tokens() {
char * tocken;
char astring[127];
int current = 0;
int limit;
char *filebuffer = NULL;
FILE *file = fopen("your/file/path/and/name", "r");
if (file != NULL) {
fseek(file, 0L, SEEK_END);
int f_size = ftell(file);
fseek(file, 0L, SEEK_SET);
filebuffer = (char*) malloc(f_size + 2);
if (filebuffer == NULL) {
pclose(file);
free(filebuffer);
return -1;
}
memset(filebuffer, 0, f_size + 2);
if (fgets(filebuffer, f_size + 1, file) == NULL) {
fclose(file);
free(filebuffer);
return -1;
}
fclose(file);
memset(astring, 0, 127);
char *result = NULL;
tocken = strchr(filebuffer, ',');
while (tocken != NULL) {
limit = tocken - filebuffer + 1;
strncpy(astring, &filebuffer[current], limit - current - 1);
printf("%s" , astring);
current = limit;
tocken = strchr(&filebuffer[limit], ',');
memset(astring, 0, 127);
}
free(filebuffer);
}
return 0;
}

#include <stdio.h>
int main(void){
char line[128];
char word[32];
FILE *in, *out;
int line_length;
in = fopen("in.txt", "r");
out = fopen("out.txt", "w");
while(1==fscanf(in, "%[^\n]%n\n", line, &line_length)){//read one line
int pos, len;
for(pos=0;pos < line_length-1 && 1==sscanf(line + pos, "%[^,]%*[,]%n", word, &len);pos+=len){
fprintf(out, "%s\n", word);
}
}
fclose(out);
fclose(in);
return 0;
}
/* output result out.txt
"boy"
"girl"
"car"
...
*/

Related

how do i make operations on a specific line in a text file in c?

void main(void)
{
FILE* textfile;
char line[1000];
textfile = fopen("omar.txt", "r");
if (textfile == NULL)
return 1;
while (fgets(line, 1000, textfile)) {
printf(line);
}
fclose(textfile);
}
so this code prints the whole content of a text file , what should I do to read the third line in the file for example ?
To read the nth line in a file you can do something like this
int i = 0;
while (fgets(line, 1000, textfile)) {
i++;
if (i == n) {
// do stuff with nth line
break;
}
}
This approach uses a counter to count until the nth iteration is reached. Once it is, you can do what you need to do with the nth line.
Also this may be unrelated but you should never use printf without a format specifier as you have in printf(line);. This can be dangerous and could be used by an attacker to exploit the program. I would recommend that in your case puts(line); is a better alternative.
For example:
int readNthLine(FILE *fi, char *buff, size_t buffsize, size_t line)
{
fseek(fi, 0, SEEK_SET);
{
for(size_t cline = 0; cline < line; cline++)
{
if(!fgets(buff, buffsize, fi)) return -1;
}
}
return 0;
}
This very simple function will work only if the size of the buffer is larger than the length of the longest line in the file.
Of course, you should check the result of any I/O operation.

How to read from a file and parse it

I have a file .txt containing some values formatted like this:
0,30,25,10
Now, I open up the file and store it into an array
char imposta_tratt[300];
FILE *fp;
fp = fopen("/home/pi/Documents/imposta_trattamento.txt", "r");
if (fp == 0) return;
fread(imposta_tratt, sizeof(imposta_tratt), 1, fp);
fclose(fp);
Now I expect to have the array filled with my data. I have the values separated by a , so I go on and parse it:
const char delim[2] = ",";
int t=0;
char *token = strtok(imposta_tratt, delim);
while (token!=NULL){
strcpy(tratt[t],token);
token = strtok(NULL, delim);
tratt[t]=token;
t++;
}
Here, referring to what's in the file .txt, I expect to have tratt[0]=0; tratt[1]=30; tratt[2]=25; and so on, but seems like I am missing something since it's not like this.
All I want is to have the values of the txt file stored in single variables. Can someone help?
What you are trying to achieve can simply be done using fgets():
bool read_file_content(const char *filename, const size_t tsizemax, int tratt[tsizemax], size_t *tsize, const char *delim)
{
// Attempt to open filename.
FILE *fp = fopen(filename, "r");
if (!fp) return false; // Return false upon failure.
// Try to read one line. If you have more, you need a while loop.
char imposta_tratt[300];
if (!fgets(imposta_tratt, sizeof imposta_tratt, fp)) {
fclose(fp);
return false;
}
*tsize = 0;
char tmp[300]; // Temporary buffer. Used for conversion into int.
char *token = strtok(imposta_tratt, delim);
while (token && *tsize < tsizemax) {
strncpy(tmp, token, sizeof tmp);
tratt[(*tsize)++] = atoi(tmp);
token = strtok(NULL, delim);
}
fclose(fp);
return true;
}
const char *filename: The file you want to parse.
const size_t tsizemax: The maximum size of your tratt array. It is important to control the size, otherwise your code will have buffer overflow (think of when your file has more than 100 tokens, for example).
int tratt[tsizemax]: The array that will hold the values.
size_t *tsize: The number of tokens read (used in combination of tsizemax).
const char *delim: The delimiter(s), in your case a ,.
This is your main():
int main(void)
{
int tratt[100];
size_t size = 0;
if (!read_file_content("in.txt", 100, tratt, &size, ",")) {
puts("Failed");
return 1;
}
for (size_t i = 0; i < size; ++i)
printf("%d\n", tratt[i]);
}
Output:
0
30
25
10
Suppose "in.txt" has contents
0,30,25,10
The below program uses fscanf to read the integers into the tratt array, one-by-one. As we read integers using fscanf, we make sure it's return value is as expected. If not, we close the file and exit. In the event that the return value of fscanf is not as expected, the program also prints which type of error occurred. Currently, if any error occurs, the program stops. However, you can make the program behave differently depending on the error that occurred if you like.
As output, the program prints all of the integers read into the tratt array. The output is
0
30
25
10
Now this program assumes we know the number of elements we want to read into tratt. If we do not, we could allow for dynamically allocating more memory should the array need more elements or perhaps "in.txt" could contain a data structure, say, at the beginning/end of the file that records information about the file, such as the number of numbers in the file and the data type (a binary file would be best suited for this). These are just a couple of the possibilities.
A better approach might be to read characters in one-by-one (say, using getc) and use strtol to convert a sequence of character digits to a long int (I would have taken an approach similar to this).
Nevertheless, this approach is more succinct and should suffice.
#include <stdio.h>
#include <stdlib.h>
#define FILE_NAME "in.txt"
#define MAX_LEN 4
int main(void) {
int i, tratt[MAX_LEN];
FILE *fp = fopen(FILE_NAME, "r"); /* open file for reading */
/* if cannot open file */
if (fp == NULL) {
printf("Cannot open %s\n", FILE_NAME);
exit(EXIT_FAILURE);
}
/* read integer, checking return value of scanf as expected */
if (fscanf(fp, "%d", &tratt[0]) != 1) {
if (ferror(fp))
printf("fscanf: read error\n");
else if (feof(fp))
printf("fscanf: end of file\n");
else
printf("fscanf: matching failure\n");
fclose(fp);
exit(EXIT_FAILURE);
}
for (i = 1; i < MAX_LEN; i++)
/* read comma plus integer, checking return value of scanf */
if (fscanf(fp, ",%d", &tratt[i]) != 1) {
if (ferror(fp))
printf("fscanf: read error\n");
else if (feof(fp))
printf("fscanf: end of file\n");
else
printf("fscanf: matching failure\n");
fclose(fp);
exit(EXIT_FAILURE);
}
fclose(fp); /* close file */
/* print integers stored in tratt */
for (i = 0; i < MAX_LEN; i++)
printf("%d\n", tratt[i]);
return 0;
}

Parsing contents of a textfile in C(Deleting parts, storing others)

I have a basic .txt file that may contain an unknown amount of pieces of data exactly in this format and I need to extract the second part after the '=' identifier. For example:
variable1=Hello
variable2=How
variable3=Are
variable4=You?
I need to extract "Hello" "How" "Are" and "You?" separately and store them into an array(removing/ignoring the variable name) and being able to call each word individually. I'm doing this in C and here is what I currently have.
#include <stdio.h>
#include <string.h>
int main()
{
char*result;
char copy[256];
FILE * filePtr;
filePtr = fopen("testfile.txt", "r+");
strcpy(copy, "testfile.txt");
while(fgets(copy, 256, filePtr)!= NULL)
{
result = strchr(copy, '=');
result = strtok(NULL, "=");
printf("%s",result);
if(result != 0)
{
*result = 0;
}
result = strtok(copy, "=");
}
return 0;
}
My current output is
(null)How
Are
You?
You do not need strtok, using strchr is enough.
no need to copy the filename to the copy buffer.
probably not necessary to open the file in update mode "%r+" either.
Here is a corrected version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
char *words[20];
int n = 0;
char *result;
char copy[256];
FILE *filePtr;
filePtr = fopen("testfile.txt", "r");
while (fgets(copy, 256, filePtr) != NULL) {
copy[strcspn(copy, "\n")] = '\0'; /* strip the \n if present */
result = strchr(copy, '=');
if (result != NULL) {
words[n++] = strdup(result + 1);
printf("%s ", result + 1);
}
}
printf("\n");
fclose(filePtr);
return 0;
}
Note the one liner to strip the trailing \n left at the end of copy by fgets(): copy[strcspn(copy, "\n")] = '\0';. It works even if fgets() did not see a \n before the end of the buffer or before the end of file. strcspn counts returns the number of characters in copy that are not in the second argument, thus it returns the length of the line without the \n.
The words are collected into an array words of pointers to strings. Each word is copied into memory allocated by malloc by the strdup function. strdup is not part of Standard C, but part of Posix and probably present in your environment, possibly written as _strdup.
Note also that you should also test for failure to open the file, failure to allocate memory in strdup, and also handle more than 20 strings...
If there is a fixed set of words and you just want to strip the initial parts, you can use a simpler hardcoded approach:
int main(void) {
char word1[20], word2[20], word3[20], word4[20];
FILE *filePtr;
filePtr = fopen("testfile.txt", "r");
if (fscanf(filePtr,
"%*[^=]=%19[^\n]%*[^=]=%19[^\n]%*[^=]=%19[^\n]%*[^=]=%19[^\n]",
word1, word2, word3, word4) == 4) {
printf("%s %s %s %s\n", word1, word2, word3, word4);
// perform whatever task with the arrays
} else {
printf("parse failed\n");
}
fclose(filePtr);
return 0;
}

How to read line by line after i read a text into a buffer?

First , I read a text into a buffer by calling fread, and then I want to read it line by line, how to do it? I try to use a sscanf , but it seems not to work.
char textbuf[4096];
char line[256];
FILE *fp;
fp = fopen(argv[1],"r");
memset(textbuf, 0, 4096);
fread(textbuf, 1, 4096, fp);
I know using fgets is a good way. I just want to know weather this method can do the same thing.
Try this:
fgets(textbuf, sizeof(textbuf), fp);
For read line by line you can use: fgets(line, 128, fp) or getline(&line, &size, fp);
EDIT
If you want to read it from a variable, look at strtok() function:
char * line = strtok(strdup(buffer), "\n");
while(line) {
printf("%s", line);
line = strtok(NULL, "\n");
}
You can find the location of the end-of-line character using strchr() like this:
char *eol = strchr(line, '\n');
Everything before *eol is the first line. Then advance from line to eol + 1, remove any subsequent \r or \n characters, and repeat the process until strchr() returns NULL to indicate there are no more newline characters. At that point, move any remaining data to the beginning of the buffer and read the next chunk from the file.
If you're concerned about efficiency you can avoid moving the data by using 2 buffers and alternating between them, but even the naive method is probably faster than fgets() if the file has many lines.
how about strtok
char *line;
line = strtok(texbuf, '\n');
You said "I know using fgets is a good way. I just want to know weather this method can do the same thing.", of course you can, you just re-implement fgets as in the c library. The c library doesn't actually read line by line, it reads in a whole chunk and gives you a line when you call fgets.
Not an efficient way, but a sample of the kind of things you have to do.
#include <stdio.h>
typedef struct my_state {
unsigned char * buf;
int offset;
int buf_size;
int left;
FILE * file;
} my_state_t;
int takeone(my_state_t * state) {
if ((state->left - state->offset)<=0) {
if (feof(state->file)) return -1;
state->left = fread(state->buf,1,state->buf_size,state->file);
state->offset = 0;
if (state->left == 0) return -1;
}
return state->buf[state->offset++];
}
int getaline(my_state_t * state, char * out, int size) {
int c;
c = takeone(state);
if (c < 0) return 0;
while (c >=0 && size > 1) {
*out++ = c;
--size;
if (c == '\n') break;
c = takeone(state);
}
*out=0;
return 1;
}
int main(int argc, char ** argv){
FILE *fp;
char textbuf[4096];
char line[256];
my_state_t fs;
fs.buf=textbuf;
fs.offset=0;
fs.buf_size=4096;
fs.left=0;
fp = (argc>1)? fopen(argv[1],"rb") : stdin;
fs.file = fp;
while (getaline(&fs,line,256)) {
printf("-> %s", line);
}
fclose(fp);
}

Replacing strings in files - C

I'm trying to make a program that replacing strings in files.
I got the program below that replacing all the occurrences of one string in the file, but now I need to extend so it'll replace multiple strings.
The trivial way is to run the program several times, each time with different string as input, but I'm looking for more efficient way to do it.
My input can be:
Set of strings to replace (each string appears once).
List of strings to replace by order of appearance (string can be at the list several times) but without knowing their offset.
Thanks for the help.
#include <stdio.h>
#include <string.h>
#define LINE_LEN 128
int main(){
char fileOrig[32] = "orig.txt";
char fileRepl[32] = "new.txt";
char text2find[80];
char text2repl[80];
printf("enter text to replace in the file:");
scanf ("%s",text2find);
sprintf(text2repl,"%s%s%s","<b><font color=\"#FF0000\">",text2find,"</font></b>");
char buffer[LINE_LEN+2];
char *buff_ptr, *find_ptr;
FILE *fp1, *fp2;
int buff_int;
size_t find_len = strlen(text2find);
fp1 = fopen(fileOrig,"r");
fp2 = fopen(fileRepl,"w");
buff_int=(int)buffer;
while(fgets(buffer,LINE_LEN+2,fp1)){
buff_ptr = buffer;
while ((find_ptr = strstr(buff_ptr,text2find))){
while(buff_ptr < find_ptr)
fputc((int)*buff_ptr++,fp2);
fputs(text2repl,fp2);
buff_ptr += find_len;
}
fputs(buff_ptr,fp2);
}
fclose(fp2);
fclose(fp1);
return 0;
}
Sometimes things can get complicated. Say if you have strings to replace as {ab,ba} and they would be replaced to {xy,yx} respectively. Say you have the input file to contain "aba". Now the output becomes order dependant.
Similar confusion can occur if the replacement of one string causes another string to be formed which belongs to the strings-to-replace list.
IMO, you should define what you want to do in this situations and then use an approach similar to what you have already done.
BTW, you can better your string matching by using an finite automata based approach or use some existing state of the art algorithm like KMP or Boyer-Moore. This will let you search multiple strings at once.
I'm not sure if what you want is possible, but you might want to look into string search algorithms to make the search part of your algorithm more optimized. A naive search algorithm has, according to Wikipedia, complexity Θ((n-m+1) m), with n the length of your text and m the length of your search string. Take a look at the link, you can do significantly better.
Once you have all the offsets of the strings to replace, the actual replacing seems to be fairly straightforward.
I'm sorry I can't completely answer your question, but I thought this might give you some optimization ideas.
I think this will help you. Please use following Code to Search & Replace string .
Call this function from Top lavel function like this:
replaceIPAddress( "System.cfg", "172.16.116.157", "127.0.0.1");
void replaceIPAddress( char * confFileName, char *text_to_find , char *text_to_replace )
{
FILE *input = fopen(confFileName, "r");
FILE *output = fopen("temp.txt", "w");
char buffer[512];
while (fgets(buffer, sizeof(buffer), input) != NULL)
{
char *pos = strstr(buffer, text_to_find);
if (pos != NULL)
{
/* Allocate memory for temporary buffer */
char *temp = calloc(
strlen(buffer) - strlen(text_to_find) + strlen(text_to_replace) + 1, 1);
/* Copy the text before the text to replace */
memcpy(temp, buffer, pos - buffer);
/* Copy in the replacement text */
memcpy(temp + (pos - buffer), text_to_replace, strlen(text_to_replace));
/* Copy the remaining text from after the replace text */
memcpy(temp + (pos - buffer) + strlen(text_to_replace),
pos + strlen(text_to_find),
1 + strlen(buffer) - ((pos - buffer) + strlen(text_to_find)));
fputs(temp, output);
free(temp);
}
else
fputs(buffer, output);
}
fclose(output);
fclose(input);
/* Rename the temporary file to the original file */
rename("temp.txt", confFileName);
}
//
// Find and replace data in a file
//
// This is not as straightforward a problem as it initially appears,
// because if you have the text
//
// "Jack is a pirate"
//
// And you wish to replace "is" with "was", the string needs to be longer,
// or else you end up with the following:
//
// "Jack wasapirate"
//
// This becomes more of a problem for larger text. For example, if we wanted
// to replace "Jack" with "Rumpelstiltskin", we'd end up with:
//
// "Rumpelstiltskin"
//
// Which completely overwrites our original text!!!
//
// In order to do this correctly, we wither need to:
//
// 1. Read the entire file into a in-memory buffer
// 2. Write to a temporary file, then replace the original file
//
// Option #2 is easier to implement, and should work for your coursework.
//
#include <stdio.h>
int main(int argc, char** argv)
{
// We need a buffer to read in data
const int BufferSize = 0x1000;
char Buffer[BufferSize];
char *InputFileName = "input.txt";
char *TemporaryFileName = "temp.txt";
// Open the file for reading. 'rt' means that it must already exist, for reading.
FILE *Input = fopen(InputFileName, "rt");
// Our output file. 'w+' means to create the file if it doesnt exist, for writing.
FILE *Output = fopen(TemporaryFileName, "w+");
// Our find and replace arguments
char *Find = "is";
char *Replace = "was";
if(NULL == Input)
{
printf("Could not open file");
return 1;
}
printf("Find: %s\n", Find);
printf("Replace: %s\n", Replace);
// For each line...
while(NULL != fgets(Buffer, BufferSize, Input))
{
// For each incidence of "is"
char *Stop = NULL; // Where to stop copying (at 'is')
char *Start = Buffer; // Start at the beginning of the line, and after each match
printf("Line: %s\n", Buffer);
while(1)
{
// Find next match
Stop = strstr(Start, Find);
if(NULL == Stop)
{
// Print the remaining text in the line
fwrite(Start, 1, strlen(Start), Output);
break;
}
// Write out everything between the end of the previous match, and the
// beginning of the current match.
//
// For example:
//
// "Jack is a pirate who is cool"
//
// Has two instances to replace. In order, we'd find them as such:
//
// "Jack is a pirate who is cool"
// ^
// ^
// What we want to do is write:
// - "Jack "
// - "was"
// - "a pirate who "
// - "was"
// - "cool"
printf("Match starts at: %s\n", Stop);
// We have found a match! Copy everything from [Start, Stop)
fwrite(Start, 1, Stop - Start, Output);
// Write our replacement text
fwrite(Replace, 1, strlen(Replace), Output);
// Next time, we want to start searching after our 'match'
Start = Stop + strlen(Find);
printf("Search resumes at: %s\n", Start);
};
}
// Close our files
fclose(Input);
fclose(Output);
// If desired, rename the Output file to the Input file
rename(TemporaryFileName, InputFileName);
return 0;
}

Resources