How to split a text file into multiple parts in c - c

What i need to do, is to take a file of n lines, and for every x lines, create a new file with the lines of the original file. An example would be this:
Original File:
stefano
angela
giuseppe
lucrezia
In this case, if x == 2, 3 file would be created, in order:
First file:
stefano
angela
Second FIle:
giuseppe
lucrezia
Third File:
lorenzo
What i've done so far is this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 10
int getlines(FILE *fp)
{
int c = 0;
int ch;
do{
ch = fgetc(fp);
if(ch == '\n')
{
c++;
}
}while(ch != EOF);
fseek(fp, 0 , SEEK_SET);
return c;
}
int ix = 0;
void Split(FILE *fp, FILE **fpo, int step, int lines, int *mem)
{
FILE **fpo2 = NULL;
char * filename = malloc(sizeof(char)*64);
char * ext = ".txt";
char number[2];
for(int i = ix; i < *mem; i++)
{
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
if(!(fpo[i] = fopen(filename, "w")))
{
fprintf(stderr, "Error in writing\n");
exit(EXIT_FAILURE);
}
}
char ch;
int c = 0;
do{
ch = fgetc(fp);
printf("%c", ch);
if(ch == '\n')
{
c++;
}
if(c >= step)
{
c = 0;
ix++;
if(ix >= *mem && (ix*step) <= lines)
{
*mem = *mem + 1;
fpo2 = realloc(fpo, sizeof(FILE*)*(*mem));
Split(fp, fpo2, step, lines, mem);
}
}
putc(ch, fpo[ix]);
}while(ch != EOF);
}
int main()
{
FILE * fp;
if(!(fp = fopen("file.txt", "r")))
{
fprintf(stderr, "Error in opening file\n");
exit(EXIT_FAILURE);
}
int mem = N;
int lines = getlines(fp);
int step = lines/N;
FILE **fpo = malloc(sizeof(FILE *)*N);
Split(fp, fpo, step, lines, &mem);
exit(EXIT_SUCCESS);
}
I'm stack with segmentation error, i couldn't find the bug doing
gdb myprogram
run
bt
I really appreciate any help.
EDIT:
I've changed some things and now it works, but it creates an additional file that contains strange characters. I need to still adjust some things:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 10
int getlines(FILE *fp)
{
int c = 0;
int ch;
do{
ch = fgetc(fp);
if(ch == '\n')
{
c++;
}
}while(ch != EOF);
fseek(fp, 0 , SEEK_SET);
return c;
}
int ix = 0;
void Split(FILE *fp, FILE **fpo, int step, int lines, int *mem)
{
FILE **fpo2 = NULL;
char * ext = ".txt";
for(int i = ix; i < *mem; i++)
{
char * filename = malloc(sizeof(char)*64);
char * number = malloc(sizeof(char)*64);
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
if(!(fpo[i] = fopen(filename, "w")))
{
fprintf(stderr, "Error in writing\n");
exit(EXIT_FAILURE);
}
free(number);
free(filename);
}
char ch;
int c = 0;
do{
ch = fgetc(fp);
printf("%c", ch);
if(ch == '\n')
{
c++;
}
if(c >= step)
{
c = 0;
ix++;
if(ix >= *mem && ((ix-1)*step) <= lines)
{
*mem = *mem + 1;
fpo2 = realloc(fpo, sizeof(FILE*)*(*mem));
Split(fp, fpo2, step, lines, mem);
}
}
putc(ch, fpo[ix]);
}while(ch != EOF);
}
int main()
{
FILE * fp;
if(!(fp = fopen("file.txt", "r")))
{
fprintf(stderr, "Error in opening file\n");
exit(EXIT_FAILURE);
}
int mem = N;
int lines = getlines(fp);
int step = lines/N;
FILE **fpo = malloc(sizeof(FILE *)*N);
Split(fp, fpo, step, lines, &mem);
exit(EXIT_SUCCESS);
}

There are a few problems in your code. But first I think you need to fix the most important thing
int step = lines/N;
Here step is 0 if your input file has less than N lines of text. This is because lines and N both are integer and integer division is rounding down.

I won't fix your code, but I'll help you with it. Some changes I
suggest:
Instead of getlines, use getline(3) from the standard
library.
fseek(fp, 0 , SEEK_SET) is pointless.
In char * filename = malloc(sizeof(char)*64), note that
both arguments to malloc are constant, and the size is arbitrary.
These days, it's safe to allocate filename buffers statically,
either on the stack or with static: char filename[PATH_MAX].
You'll want to use limits.h to get that constant.
Similarly you have no need to dynamically allocate your FILE
pointers.
Instead of
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
use sprintf(filename, "temp%d%s", i+1, ext)
get familiar with err(3) and friends, for your own convenience.
Finally, your recursive Split is -- how shall we say it? -- a nightmare. Your whole program
should be something like:
open input
while getline input
if nlines % N == 0
create output filename with 1 + n/N
open output
write output
nlines++

Related

pallindrome is not copied to next file but printed on output screen

I have a file named fp1 containing different names, some being palindromes, and have to read all names from fp1 and check if each name is a palindrome or not. If it's a palindrome the I need to print the name to screen and copy it to another file named fp.
Here's my program:
#include <stdio.h>
#include <conio.h>
#include <stdlib.h>
void main() {
FILE *fp, *fp1;
char m, y[100];
int k = 0, i = 0, t = 1, p = 0;
fp = fopen("C:\\Users\\HP\\Desktop\\New folder\\file 2.txt", "w");
fp1 = fopen("C:\\Users\\HP\\Desktop\\New folder\\file4.txt", "r");
if (fp == NULL) {
printf("error ");
exit(1);
}
if (fp1 == NULL) {
printf("error");
exit(1);
}
k = 0;
m = fgetc(fp1);
while (m != EOF) {
k = 0;
i = 0;
t = 1;
p = 0;
while (m != ' ') {
y[k] = m;
k = k + 1;
m = fgetc(fp1);
}
p = k - 1;
for (i = 0; i <= k - 1; i++) {
if (y[i] != y[p]) t = 0;
p = p - 1;
}
if (t == 1) {
fputs(y, fp);
printf("%s is a pallindrome\n", y);
}
m = fgetc(fp1);
}
fclose(fp);
fclose(fp1);
}
coping pallindrome from one file to next file
You are not null terminating your buffer before attempting to use the contents as a string. After placing the last valid character read by fgetc into the buffer, you must place a null terminating character (\0).
A character buffer without a null terminating byte is not a string. Passing such a buffer to fputs, or the printf specifier %s without a length bound, will invoke Undefined Behaviour.
fgetc returns an int, not a char. On systems where char is unsigned, you will not be able to reliably test against the negative value of EOF.
The inner while loop is not checking for EOF. When the file is exhausted, it will repeatedly assign EOF to the buffer, until the buffer overflows.
To that end, in general, the inner while loop does nothing to prevent a buffer overflow for longer inputs.
In a hosted environment, void main() is never the correct signature for main. Use int main(void) or int main(int argc, char **argv).
Note that fputs does not print a trailing newline. As is, you would fill the output file full of strings with no delineation.
The nested while loops are fairly clumsy, and I would suggest moving your palindrome logic to its own function.
Here is a refactored version of your program. This program discards the tails of overly long words ... but the buffer is reasonably large.
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#define BUFFER_SIZE 1024
FILE *open_file_or_die(const char *path, const char *mode)
{
FILE *file = fopen(path, mode);
if (!path) {
perror(path);
exit(EXIT_FAILURE);
}
return file;
}
int is_palindrome(const char *word, size_t len)
{
for (size_t i = 0; i < len / 2; i++)
if (word[i] != word[len - i - 1])
return 0;
return 1;
}
int main(void)
{
/*
FILE *input = open_file_or_die("C:\\Users\\HP\\Desktop\\New folder\\file4.txt", "r");
FILE *output = open_file_or_die("C:\\Users\\HP\\Desktop\\New folder\\file 2.txt", "w");
*/
FILE *input = stdin;
FILE *output = stdout;
char buffer[BUFFER_SIZE];
size_t length = 0;
int ch = 0;
while (EOF != ch) {
ch = fgetc(input);
if (isspace(ch) || EOF == ch) {
buffer[length] = '\0';
if (length && is_palindrome(buffer, length)) {
fputs(buffer, output);
fputc('\n', output);
printf("<%s> is a palindrome.\n", buffer);
}
length = 0;
} else if (length < BUFFER_SIZE - 1)
buffer[length++] = ch;
}
/*
fclose(input);
fclose(output);
*/
}

How to shuffle 2 different text file into 1?

#include <stdio.h>
int main(){
char temp[64];
FILE *fp1=fopen("data/1.txt","a");
FILE *fp2=fopen("data/2.txt","r");
while(fgets(temp,64,fp2)!=NULL){
fputs(temp,fp1);
}
fclose(fp1);
fclose(fp2);
return 0;
}
With such code I was able to combine 2 different text file into 1.
data/1.txt contents: abcdefghijk
data/2.txt contents: ABCDE
Outcome: abcdefghijkABCDE
However, I am struggling with shuffling 2 different text file.
Wanted result: aAbBcCdDeEfghijk
Followings are my current code.
#include <stdio.h>
#include <string.h>
int main(){
FILE *fp1,*fp2,*fp_out;
char ch1,ch2;
int result=1;
fp1=fopen("data/1.txt","r");
fp2=fopen("data/2.txt","r");
fp_out=fopen("data/out.txt","w");
//shuffling code area//
fclose(fp1);
fclose(fp2);
fclose(fp_out);
char buf[64]={};
fp_out=fopen("data/out.txt","r");
fgets(buf,64,fp_out);
if(!strncmp("aAbBcCdDeEfghijk",buf,64))
printf("PASS\n");
else
printf("FAIL\n");
fclose(fp_out);
return 0;
}
How can I design a code in "shuffling code area" in order to have outcomes like wanted result? I have thought about making 2 different FOR loops and combining but it kept showed an error.
This is some dirty way to do the job.
You can read the file which ever you want to write first character first and then read a character from second file and write both into third file one after the other.
Just adding extra code as per your need.
This just works for your case , not tested with many cases and corner cases.
#include <stdio.h>
#include <string.h>
int main(){
FILE *fp1,*fp2,*fp_out;
char ch1,ch2;
int result=1;
int file1_content_over = 0;
int file2_content_over = 0;
fp1 = fopen("data/1.txt","r");
fp2 = fopen("data/2.txt","r");
fp_out=fopen("data/out.txt","w");
//shuffling code area//
// read till file1_content_over or file2_content_over is not finished
while(! file1_content_over || !file2_content_over)
{
ch1 = fgetc(fp1);
ch2 = fgetc(fp2);
if(ch1 != EOF)
fputc(ch1,fp_out);
else
file1_content_over = 1;
if(ch2 != EOF)
fputc(ch2,fp_out);
else
file2_content_over = 1;
}
//shuffling code area//
fclose(fp1);
fclose(fp2);
fclose(fp_out);
char buf[64]={};
fp_out=fopen("data/out.txt","r");
fgets(buf,64,fp_out);
printf("buf = %s\n", buf);
if(!strncmp("aAbBcCdDeEfghijk",buf,strlen("aAbBcCdDeEfghijk")))
printf("PASS\n");
else
printf("FAIL\n");
fclose(fp_out);
return 0;
}
Working for me! Not the best optimized code, I didnt get to much time to that!
Main():
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX 100
int removingSPaces(char array[MAX], int sizeArray);
void orderChar(char bufFile1[MAX], char bufFile2[MAX], char bufOut[MAX], int maxSize, int sizeBuf1, int sizeBuf2);
int getChar(char buf[MAX], FILE *fp);
int main(){
FILE *fp1, *fp2, *fpOut;
char bufFile1[MAX] = {0}, bufFile2[MAX] = {0}, bufOut[MAX] = {0};
int sizeBuf1 = 0, sizeBuf2 = 0;
int maxSize=0;
if((fp1=fopen("file1.txt","r")) == NULL || (fp2=fopen("file2.txt","r")) == NULL || (fpOut=fopen("fileOut.txt","w")) == NULL){
perror("");
exit(1);
}
sizeBuf1 = getChar(bufFile1, fp1); //geting the chars from file1
fclose(fp1);
sizeBuf1 = removingSPaces(bufFile1, sizeBuf1); //removing the \n if exists from chars of file1
sizeBuf2 = getChar(bufFile2, fp2); //geting the chars from file2
fclose(fp2);
sizeBuf2 = removingSPaces(bufFile2, sizeBuf2); //removing the \n if exists from chars of file2
maxSize = sizeBuf1 + sizeBuf2; //Max Size to loop for
orderChar(bufFile1, bufFile2, bufOut, maxSize, sizeBuf1, sizeBuf2); //Order the chars!
fprintf(fpOut, "%s", bufOut); //Printing to the file
fclose(fpOut);
/* COPIED FROM YOUR CODE */
char buf[64]={0}; //Just added the 0, because you cant initialize the array like with only {}
if((fpOut=fopen("fileOut.txt", "r")) == NULL){
perror("");
exit(1);
}
fgets(buf,64, fpOut);
if(!strncmp("aAbBcCdDeEfghijk", buf, 64))
printf("PASS\n");
else
printf("FAIL\n");
fclose(fpOut);
/* COPIED FROM YOUR CODE */
return 0;
}
Functions():
int removingSPaces(char array[MAX], int sizeArray){
int size = sizeArray;
if(array[sizeArray -1] == '\n'){
array[sizeArray -1] = '\0';
size = strlen(array);
}
return size;
}
int getChar(char buf[MAX], FILE *fp){
char bufAux[MAX];
int size;
while(fgets(bufAux, sizeof(bufAux), fp)){
size = strlen(bufAux);
}
strcpy(buf, bufAux);
return size;
}
void orderChar(char bufFile1[MAX], char bufFile2[MAX], char bufOut[MAX], int maxSize, int sizeBuf1, int sizeBuf2){
int positionsF1=0, positionsF2=0;
int aux = 0; //This will starts organization by the first file! If you want to change it just change to 1;
for(int i=0; i < maxSize; i++){
if(aux == 0 && positionsF1 != sizeBuf1){
bufOut[i]=bufFile1[positionsF1];
if(positionsF2!=sizeBuf2){
aux = 1;
}
positionsF1++;
}else if(aux == 1 && positionsF2 != sizeBuf2){
bufOut[i]=bufFile2[positionsF2];
if(positionsF1!=sizeBuf1){
aux = 0;
}
positionsF2++;
}
}
}
Content of file 1:
abcdefghijk
Content of file 2:
ABCDE

Compare two binary files in C

I am writing a program to compare two binary files and plot the first difference. I want to read 16 bytes of data from each file continuously and compare them. For that I am storing 16 bytes from both file into char *buffer1, buffer2. When I print the output I am getting that buffer1 has both the data of file1 and file2.
The code is as follows:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void printConversion(char *buf1, char *buf2) {
size_t len = strlen(buf1);
char *binary = malloc(len * 8 + 1);
binary[0] = '\0';
for (size_t i = 0; i < len; ++i) {
char ch = buf1[i];
for (int j = 7; j >= 0; --j) {
if (ch & (1 << j)) {
strcat(binary,"1");
} else {
strcat(binary,"0");
}
}
}
printf("File1: %s\t", binary);
free(binary);
printf("File2:");
for (int i = 0; i < sizeof(buf2); i++) {
printf("%x", buf2[i] - '0');
}
}
void fileRead(FILE *fp, char *buf, int count) {
fseek(fp, count, SEEK_SET);
fread(buf, 1, 16, fp);
}
int fileSize(FILE *fp) {
fseek(fp, 0, SEEK_END);
int size = ftell(fp) + 1;
return size;
}
int main(int argc, char *argv[]) {
printf("***Binary File Comparator***\n ");
int count = 0;
int index = 0;
char buffer1[16];
char buffer2[16];
char buffer3[16];
char buffer4[16];
// Invalid Number of Arguments
if (argc < 3 || argc > 3) {
printf("Invalid Number of Arguments\n");
}
FILE *fp1, *fp2;
fp1 = fopen(argv[1], "rb");
int size = fileSize(fp1);
int size1 = size;
fclose(fp1);
while (size > 1) {
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer1, count);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer2, count);
if (size1 < count) {
int lastSize = count - size1;
count = count + lastSize;
fclose(fp2);
} else {
count = count+16;
fclose(fp2);
}
**printf("buffer1:%s\tbuffer2:%s\n", buffer1, buffer2)**;
size = size - 16;
int result = strcmp(buffer1, buffer2);
if (result != 0) {
for (int i = 0; i < sizeof(buffer1); i++) {
if (buffer1[i] != buffer2[i]) {
int count1 = (count - 16) + i;
index++;
if (index == 1) {
printf("Byte_Offset:%x\n", count1);
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer3, count1);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer4, count1);
fclose(fp2);
printConversion(buffer3, buffer4);
break;
}
} else {
continue;
}
}
}
}
}
I have tried to highlight the printf part that is printing my buffer1 and buffer2
The output is as follows:
buffer1:83867715933586928386771593358692 buffer2:8386771593358692
buffer1:49216227905963264921622790596326 buffer2:4921622790596326
buffer1:40267236116867294026723611686729 buffer2:4026723611686729
buffer1:82306223673529228230622367352922 buffer2:8230622367352922
buffer1:25869679356114222586967935611422 buffer2:2586967935611422
Can anybody help what I am doing wrong. Please point me the error and what optimization changes could be done in code. I am at learning stage your feedback will be very helpful.
You are complicating the task by reading 16 bytes at a time. If the goal is to indicate the first difference, just read one byte at a time from both files with getc() this way:
int compare_files(FILE *fp1, FILE *fp2) {
unsigned long pos;
int c1, c2;
for (pos = 0;; pos++) {
c1 = getc(fp1);
c2 = getc(fp2);
if (c1 != c2 || c1 == EOF)
break;
}
if (c1 == c2) {
printf("files are identical and have %lu bytes\n", pos);
return 0; // files are identical
} else
if (c1 == EOF) {
printf("file1 is included in file2, the first %lu bytes are identical\n", pos);
return 1;
} else
if (c2 == EOF) {
printf("file2 is included in file1, the first %lu bytes are identical\n", pos);
return 2;
} else {
printf("file1 and file2 differ at position %lu: 0x%02X <> 0x%02X\n", pos, c1, c2);
return 3;
}
}
In terms of efficiency, reading one byte at a time does not pose a problem if the streams are buffered. For large files, you can get better performance by memory mapping the file contents if available on the target system and for the given input streams.
Not an actual answer, but a word on optimisation. You can increase the speed of the program if you have a bigger buffer. Basically the larger the buffer the faster the program runs HOWEVER the speed you gain from just making it larger will increase logarithmically.
Here is a picture of a graph that will help you understand. Also, what i mentioned applies to any simmilar situation. This includes: Copying files, filling the sound buffer etc. Loading the entire file in your RAM first and operationg on it will usually be faster than loading parts of it. Ofc this is not possible with larger files but still this is what you should aim for if you want speed.
PS: I'm writting here because i don't have rep to comment.
EDIT: I came up with solution but since you did not state what you need to do with your buffer3 and buffer4 i packed it up inside a function.
If you are sure that you are only going to use 16 bytes as a buffer size, remove the nBufferSize parameter and replace the buffer dynamic allocation with a static one.
If after the execution you need the buffers, add them as parameters and keep the nBufferSize param. Keep in mind that if you intend to use them outside the function, you should also allocate them outside the function, so things don't get messy.
/** Returns 0 if files are identical, 1 if they are different and -1 if there
is an error. */
int FileCmp(char* szFile1, char* szFile2, int nBufferSize)
{
FILE *f1, *f2;
f1 = fopen(szFile1, "rb");
f2 = fopen(szFile2, "rb");
// Some error checking?
if (f1 == NULL || f2 == NULL)
return -1;
// You can check here for file sizes before you start comparing them.
// ...
// Start the comparrison.
/// Replace this part with static allocation. --------
char* lpBuffer1 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer1 == NULL) // close the files and return error.
{
fclose(f1);
fclose(f2);
return -1;
}
char* lpBuffer2 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer2 == NULL) // close the files, free buffer1 and return error.
{
free(lpBuffer1);
fclose(f1);
fclose(f2);
return -1;
}
/// --------------------------------------------------
while(1)
{
unsigned int uRead1 = fread(lpBuffer1, sizeof(char), nBufferSize, f1);
unsigned int uRead2 = fread(lpBuffer2, sizeof(char), nBufferSize, f2);
if (uRead1 != uRead2)
goto lFilesAreDifferent;
for(unsigned int i = 0; i < uRead1; i++)
if (lpBuffer1[i] != lpBuffer2[i])
goto lFilesAreDifferent;
if ((feof(f1) != 0) && (feof(f2) != 0))
break; // both files have nothing more to read and are identical.
goto lSkip;
lFilesAreDifferent:
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 1;
lSkip:;
}
// The files are the same. Close them, free the buffers and return 0.
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 0;
}
A simple Demo:
#define BUFFER_SIZE 16
int main(int nArgs, char** szArgs)
{
if (nArgs != 3)
{
printf("Invalid number of arguments.");
return 0;
}
int nResult = FileCmp(szArgs[1], szArgs[2], BUFFER_SIZE);
switch (nResult)
{
case 0: printf("Files [%s] and [%s] are identical.", szArgs[1], szArgs[2]); break;
case 1: printf("Files [%s] and [%s] are different.", szArgs[1], szArgs[2]); break;
case -1: printf("Error."); break;
}
return 0;
}
EDIT II: Personally, i have never used the C standard FILE library (it was either C++ fstream or pure win32 fileapi) so don't take my word here for granted but fread is the fastest function i could find (faster than fgets or fgetc). If you want even faster than this you should get into OS dependant functions (like ReadFile() for Windows).
chqrlie's solution using getc is absolutely the right way to do this. I wanted to address some points brought up in comments, and find it's best to do that with code. In one comment, I recommend pseudo code which could be confusing (namely, you can't write fwrite(file1...) || fwrite(file2 ...) because of the short circuit. But you can implement the idea of that with:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/*
* Compare two files, 16 bytes at a time. (Purely to demonstrate memcmp.
* Clearly, this should be implemented with getc.)
*/
FILE * xfopen(const char *, const char *);
size_t xfread(void *, FILE *, const char *);
int
main(int argc, char **argv)
{
FILE *fp[2];
size_t n[2];
char buf[2][16];
unsigned count = 0;
if(argc != 3) { return EXIT_FAILURE; }
fp[0] = xfopen(argv[1], "r");
fp[1] = xfopen(argv[2], "r");
do {
n[0] = xfread(buf[0], fp[0], argv[1]);
n[1] = xfread(buf[1], fp[1], argv[2]);
if( n[0] != n[1] || (n[0] && memcmp(buf[0], buf[1], n[0]))) {
fprintf(stderr, "files differ in block %u\n", count);
return 1;
}
count += 1;
} while(n[0]);
puts("files are identical");
return 0;
}
size_t
xfread(void *b, FILE *fp, const char *name)
{
size_t n = fread(b, 1, 16, fp);
if(n == 0 && ferror(fp)) {
fprintf(stderr, "Error reading %s\n", name);
exit(EXIT_FAILURE);
}
return n;
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = strcmp(path, "-") ? fopen(path, mode) : stdin;
if( fp == NULL ) {
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}

How to store text, line by line, to a 2D array in C?

again. I'm new to C. Still thinking in Python terms (readlines, append them to variable) so I'm having difficulties translating that to C. This is what I want to do: open a text file for reading, store each line in an array row by row, print it out to make sure it's stored.
This is how far I've got:
int main(){
FILE * fp = fopen("sometext.txt", "r");
char text[100][100];
if(fp == NULL){
printf("File not found!");
}
else{
char aLine[20];
int row = 0;
while(fgets(aLine, 20, fp) != NULL){
printf("%s", aLine);
//strcpy(text[row], aLine); Trying to append a line (as row)
return 0;
}
Please don't start with "invest in some more time and look somewhere else because it's easy and has been answered". I'm bad at this, and I'm trying.
You can try this. Basically you need an array of arrays to store each line. You find the length of the longest line in the file and you allocate space for it. Then rewind the pointer to the start of the file and use fgets to get each line from the file and strdup to allocate space and copy the line to the respective position. Hope this helps.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]) {
FILE * fp = fopen("sometext.txt", "r");
int maxLineSize = 0, count = 0;
char c;
while ((c = fgetc(fp)) != EOF) {
if (c == '\n' && count > maxLineSize) maxLineSize = count;
if (c == '\n') count = 0;
count++;
}
rewind(fp);
char ** lines = NULL;
char * line = calloc(maxLineSize, sizeof(char));
for (int i = 0 ; fgets(line, maxLineSize + 1, fp) != NULL ; i++) { // +1 for \0
lines = realloc(lines, (i + 1) * sizeof(char *));
line[strcspn(line, "\n")] = 0; // optional if you want to cut \n from the end of the line
lines[i] = strdup(line);
printf("%s\n", lines[i]);
memset(line, maxLineSize, '\0');
}
fclose(fp);
}
You could solve it without copy
The follow code could work:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <math.h>
int main()
{
FILE * fp = fopen("sometext.txt", "r");
if(fp == NULL){
printf("File not found!");
return -1;
}
char text[100][20];
int row = 0;
while(row < 100 && fgets(text[row], sizeof(text[0]), fp) != NULL)
++row;
for (int i= 0; i != row; ++i)
fputs(text[i], stdout);
return 0;
}

Compare each line from two different files and print the lines that are different in C

Supposing that I have two files like this:
file1.txt
john
is
the new
guy
file2.txt
man
the old
is
rick
cat
dog
I'd like to compare first line from file1 with all the lines from file2 and verify if it exist. If not, go two the second line from file1 and compare it with all the lines from file2.. and so on until eof is reached by file1.
The output that I expect is:
john
the new
guy
How I thought this should be done:
read file1 and file2
create a function which returns the line number of each of them
take the first line from file1 and compare it to all the lines from file2
do this until all the lines from file1 are wasted
Now, I don't know what I'm doing wrong, but I don't get the result that I expect:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int countlines(char *filename)
{
int ch = 0, lines = 0;
FILE *fp = fopen(filename, "r");
if (fp == NULL)
return 0;
do {
ch = fgetc(fp);
if (ch == '\n')
lines++;
} while (ch != EOF);
if (ch != '\n' && lines != 0)
lines++;
fclose(fp);
return lines;
}
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
int counter = 0;
for (int i = 0; i < countlines(argv[1]); i++)
{
fgets(buffer_line_template_file, 100, template_file);
for (int j = 0; j < countlines(argv[2]); j++)
{
fgets(buffer_line_data_file, 100, data_file);
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
counter++;
printf("%d", counter);
}
}
}
printf("\n\n");
return 0;
}
Could someone please point me into the right direction ? For testing purposes I created a counter at the end which was a part of a small debug. There should be the print() function
As per #chux answer I got the following simplified code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
while(fgets(buffer_line_template_file, 100, template_file))
{
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
printf("%s\n", buffer_line_template_file);
}
}
}
printf("\n\n");
return 0;
}
The above code is giving me the following output, which is not what is expected:
john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy
Problems with OP's code
Imprecise definition of line.
Excessive recalculation
Fuzzy determination of the number of lines in a file.
Unlike string, which has a precise definition in C, reading a line is not so well defined. The primary specificity issue: does a line contain the trailing '\n'. If the first answer is Yes, then does the last text in a file after a '\n' constitute a line? (Excessively long lines are another issue, but let us not deal with that today.)
Thus possibly some lines end with '\n' and others do not, fooling strcmp("dog", "dog\n").
The easiest solution is to read a line until either 1) a '\n' is encountered, 2) EOF occurs or 3) line buffer is full. Then after getting a line, lop off the potential trailing '\n'.
Now all lines code subsequently works with have no '\n'.
fgets(buffer_line_template_file, 100, template_file);
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
OP's loop is incredible wasteful. Consider a file with 1000 lines. Code will loop, calling 1000 times countlines() (each countlines() call reads 1000 lines) times when one countlines() call would suffice.
// for (int j = 0; j < countlines(argv[2]); j++)
int j_limit = countlines(argv[2]);
for (int j = 0; j < j_limit; j++)
There really is no need to count the line anyways, just continue until EOF (fgets() returns NULL). So no need to fix its fuzzy definition. (fuzzy-ness concerns same issues as #1)
int counter = 0;
for (fgets(buffer_line_template_file, 100, template_file)) {
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while ((fgets(buffer_line_data_file, 100, data_file)) {
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
counter++;
printf("%d", counter);
}
}
}
Other simplifications possible - for another day.
FWIW, following counts lines of text allowing the last line in the file to optionally end with a '\n'.
unsigned long long FileLineCount(FILE *istream) {
unsigned long long LineCount = 0;
rewind(istream);
int previous = '\n';
int ch;
while ((ch = fgetc(inf)) != EOF) {
if (previous == '\n') LineCount++;
previous = ch;
}
return LineCount;
}
Note that this function may get a different result that fgets() calls. Consider a file of one line of 150 characters. fgets(..., 100,...) will report 2 lines. FileLineCount() reports 1.
[Edit] Updated code to conform to OP functionality.
int found = 0;
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
{
found = 1;
break;
}
}
if (!found) printf("%s\n", buffer_line_template_file);
This program prints the diff of two files file1.txt and file2.txt.
#include<stdio.h>
#include <stdlib.h>
#include <memory.h>
int main() {
FILE *fp1, *fp2;
int ch1, ch2;
char fname1[40], fname2[40];
char *line = NULL;
size_t len = 0;
ssize_t read;
char *line2 = NULL;
size_t len2 = 0;
ssize_t read2;
fp1 = fopen("file1.txt", "r");
fp2 = fopen("file2.txt", "r");
if (fp1 == NULL) {
printf("Cannot open %s for reading ", fname1);
exit(1);
} else if (fp2 == NULL) {
printf("Cannot open %s for reading ", fname2);
exit(1);
} else {
while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
if (!strcmp(line, line2)) {
printf("Retrieved diff on line %zu :\n", read);
printf("%s", line);
}
}
if (ch1 == ch2)
printf("Files are identical \n");
else if (ch1 != ch2)
printf("Files are Not identical \n");
fclose(fp1);
fclose(fp2);
}
return (0);
}
You already have a very good answer (and always will from chux), but here is a slightly different approach to the problem. It uses automatic storage to reading file2 into an array of strings and then compares each line in file1 against every line in file2 to determine whether it is unique. You can easily convert the code to dynamically allocate memory, but for sake of complexity that was omitted:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 256, MAXL = 512 };
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);
int main (int argc, char **argv) {
FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
size_t n1 = 0, n2 = 0;
if (!fp1 || !fp2) {
fprintf (stderr, "error: file open failed.\n");
return 1;
}
printf ("\nunique words in file1, not in file 2.\n\n");
file1infile2 (fp2, fp1, &n2, &n1);
printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
n1, n2);
return 0;
}
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
char buf[MAXC] = "";
char f2buf[MAXL][MAXC] = { "" };
size_t i;
*n1 = *n2 = 0;
while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0;
strcpy (f2buf[(*n2)++], buf);
}
while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0, (*n1)++;
for (i = 0; i < *n2; i++)
if (!(strcmp (f2buf[i], buf)))
goto matched;
printf (" %s\n", buf);
matched:;
}
}
Look over the code and let me know if you have any questions.
Example Use/Output
$ ./bin/f1inf2 dat/f1 dat/f2
unique words in file1, not in file 2.
john
the new
guy
analyzed 4 lines in file1 against 6 lines in file2.

Resources