Hi stackoverflow(ers)!
I'm learning Unix using the XV6 OS (documentation found here) and have been trying to write a tail function in C. The expected output of:
tail is to give the last 10 lines of the file
tail - is to give the last of lines of the file
tail ... is to give the last 10 lines of files ...
tail - ... is to give the last of lines of ...
grep | tail is to give the last 10 sentences in which contain
I have written two versions of tail, one implemented using char* [] and the other by writing to a file and then reading from it (both posted below)
My version which implements the tail using char* [] seems to be more accurate to the actual command. However in the version where I'm writing to a temporary file and then reading from it I'm getting more lines as output and I'm not sure why that is happening. My guess is, while reading from one file and writing to another the placement of '\n' are getting messed up. I'd highly appreciate help in figuring it out!
Please don't get mad at me if I'm doing something silly. I'm new to C in Unix and only trying to learn.
tail.c using char* []
#include "types.h"
#include "stat.h"
#include "user.h"
#include "fcntl.h"
char buf [512];
void tail (int fd, int toSub) {
int n;
int numLines = 0;
int linesToPrint = 0;
char *buffer;
buffer = (char*) malloc (500000);
int buffSize = 0;
while ((n = read(fd, buf, sizeof(buf))) > 0) {
for (int i = 0; i<n; i++) {
buffer[buffSize] = (char)buf[i];
buffSize++;
if(buf[i] == '\n')
numLines++;
}
}
if (n < 0) {
printf (1, "tail: read error \n");
exit ();
}
if (numLines < toSub)
linesToPrint = 0;
linesToPrint = numLines - toSub;
int counter = 0;
for (int i = 0; i < buffSize; i++) {
if (counter >= linesToPrint)
printf(1,"%c",buffer[i]);
if (buffer[i] == '\n')
counter++;
}
free (buffer);
}
int main (int argc, char *argv[]) {
int toSub = 10;
int fd = -1;
if (argc <= 1) {
tail (0, toSub);
exit();
}
else if (argc > 1 && argv[1][0] == '-') {
char getToSub [10];
for (int k=1; k<strlen(argv[1]); k++) {
getToSub[k-1] = argv[1][k];
}
toSub = (atoi)(getToSub);
}
else {
if((fd = open (argv[1], toSub)) < 0) {
printf (1, "tail: cannot open %s\n", argv[1]);
exit ();
}
tail (fd, toSub);
close (fd);
}
if (argc > 2) {
for (int i=2; i<argc; i++) {
if((fd = open (argv[i], 0)) < 0) {
printf (1, "tail: cannot open %s\n", argv[i]);
exit ();
}
else {
tail (fd, toSub);
close (fd);
}
}
}
exit();
}
tail.c using write
#include "types.h"
#include "stat.h"
#include "user.h"
#include "fcntl.h"
char buf [512];
void tail (int fd, int toSub) {
int n;
int numLines;
int linesToPrint;
int ptrDump;
ptrDump = open ("tailDump", O_CREATE | O_RDWR);
while ((n = read(fd, buf, sizeof(buf))) > 0) {
write (ptrDump, buf, sizeof(buf));
for (int i = 0; i<n; i++) {
if(buf[i] == '\n')
numLines++;
}
}
if (n < 0) {
printf (1, "tail: read error \n");
exit ();
}
if (numLines < toSub)
linesToPrint = 0;
linesToPrint = numLines - toSub;
close (ptrDump);
ptrDump = open ("tailDump", 0);
int counter = 0;
while ((n = read(ptrDump, buf, sizeof(buf))) > 0) {
for (int i = 0; i<n; i++) {
if (counter > linesToPrint)
printf(1,"%c",buf[i]);
if (buf[i] == '\n')
counter++;
}
}
close (ptrDump);
unlink("tailDump");
}
int main (int argc, char *argv[]) {
int toSub = 10;
int fd = -1;
if (argc <= 1) {
tail (0, toSub);
exit();
}
else if (argc > 1 && argv[1][0] == '-') {
char getToSub [10];
for (int k=1; k<strlen(argv[1]); k++) {
getToSub[k-1] = argv[1][k];
}
toSub = (atoi)(getToSub);
}
else {
if((fd = open (argv[1], toSub)) < 0) {
printf (1, "tail: cannot open %s\n", argv[1]);
exit ();
}
tail (fd, toSub);
close (fd);
}
if (argc > 2) {
for (int i=2; i<argc; i++) {
if((fd = open (argv[i], 0)) < 0) {
printf (1, "tail: cannot open %s\n", argv[i]);
exit ();
}
else {
tail (fd, toSub);
close (fd);
}
}
}
exit();
}
I have the code put up on my Github (found here) as well in tail_using_str.c and tail_using_file.c
I think your problem is here:
while ((n = read(fd, buf, sizeof(buf))) > 0) {
write (ptrDump, buf, sizeof(buf));
You read in n bytes but when you write, you write sizeof(buf) bytes. In other words, you may write too many bytes.
Maybe you want this instead:
while ((n = read(fd, buf, sizeof(buf))) > 0) {
write (ptrDump, buf, n);
^
note
Please don't get mad at me if I'm doing something silly. I'm new to C in Unix and only trying to learn.
Thus this answer, which is not strictly necessary, since the core question you've asked has already been answered. Your posted question actually raises a bunch more questions not explicitly asked, which I intend to answer here.
The expected output of: ... tail - is to give the last of lines of the file
According to who? Not according to POSIX, and not according to UNIX V7, where tail(1) first appeared.
(Well, actually tail(1) first appeared in PWB/UNIX, but that wasn't widely used.)
grep | tail is to give the last 10 sentences in which contain
You mean last 10 lines, not sentences. grep does not produce sentences.
(Except in Soviet Unix, where grep sentences you!)
char *buffer;
buffer = (char*) malloc (500000);
This and the following exit call create a memory leak. You may say that it's harmless since the OS will give the memory back on program exit, but it's sloppy, and tools like Valgrind will call you on it.
Either free() your buffers before all possible exit points from the function, or declare this buffer on the stack instead:
char buffer[500000]
You might not be able to declare a buffer that big on the stack, depending on xv6's limits. A common modern limit for the stack size is 2 MiB, and that's for the entire stack, used by all of the functions in your deepest call chain. This is configurable is modern systems, but may not be configurable in xv6.
If you're forced to go with the malloc() option, you can do that on a single line:
char *buffer = (char*) malloc (500000);
Additionally:
it is bad style to have buf and buffer. Lazy. Give each buffer a purpose-driven name, like lineBuf and accumBuf
buffSize is confusingly named. It isn't clear which buffer it refers to, and it isn't the size of the buffer anyway. Call it something like accumBytes to solve both problems.
You're missing a bunch of #includes necessary on modern POSIX systems, and you have some that don't work on such. I'd see if xv6 has stdio.h.h, stdlib.h, string.h and unistd.h, and #include them for POSIX portability. I'd also see if you can #include types.h via sys/types.h, as that's necessary at least on macOS, and probably other Unixes. user.h isn't needed on modern systems, so if you don't actually need it on xv6, remove it.
Your in-memory variant reads the entire file into RAM and then skips back over the bytes in RAM it doesn't want to print. A bit of thought will show how you can both cut the buffer size down and not make two passes over the input data. (Hint: accumBuf[toSub][sizeof(lineBuf)]. Feel free to multiply the second term by some amount if you wish to allow lines greater than sizeof(lineBuf) bytes.)
if(buf[i] == '\n') numLines++;
You should probably check for a non-'\n' byte at the end of the accumulation buffer and add another line for it. Lines without LF terminators aren't quite kosher, but the user expectation is typically that you treat that trailing fragment as a line.
printf (1, "tail: read error \n");
What is this 1, noise? Are you trying to specify stdout? That's only correct for write, not printf. printf() already sends to stdout. (Indeed, you have to use fprintf() to send anywhere else.)
Since these are only in your error cases, that means you must not be testing for errors.
That's another reason to write code for POSIX portability even though you're ultimately targeting xv6: modern Unix system C compilers are much stricter about the code they're willing to accept. Modern C compilers do much of what we had to rely on tools like lint for in the past.
exit()
exit(2) takes a parameter, the exit status code, traditionally 0 for a clean exit and nonzero for an error. The only reason your compiler is letting you get away with that is that early C compilers did not strictly check the argument list given against the function's declared parameters. In fact, xv6 is probably shipping a K&R compiler which didn't even have function prototypes to declare the parameter lists with. The programmer was expected to do the right thing without being warned.
linesToPrint = numLines - toSub;
That isn't "lines to print", it's "lines to skip printing". It took me a good 5 minutes of staring at the code to get past that semantic mismatch. The compiler doesn't care, but variable names aren't for the compiler. If they were only for the compiler, we'd just call them all a, b, etc.
printf("%c",buffer[i]);
Use putchar() here.
int counter = 0;
Again, lazy. Count of what?
I'm only halfway through the first program, but that's enough commentary. I hope you've learned a few things from this.
Related
I'm trying to read a large file that has one float per line in C. For this, I put together the code below. It works fine when testing on small data. However, when reading 600 million numbers this way, it is very slow. Any ideas for how I can speed it up? I'm generating the raw file via python, so re-formatting the data (to have multiple numbers in a line separated by commas for example) is also an option. Any insight into why this method is so slow would be greatly appreciated.
void read_file(float *W)
{
FILE *fp;
int i = 0;
// In this file, one row should contain only one NUMBER!!
// So flatten the matrix.
if (fp = fopen("C:\\Users\\rohit\\Documents\\GitHub\\base\\numerical\\c\\ReadFile1\\Debug\\data.txt", "r")) {
while (fscanf(fp, "%f", &W[i]) != EOF) {
++i;
}
fclose(fp);
}
fclose(fp);
scanf("%d",&i);
}
I encountered a similar problem years ago. The solution was to replace fscanf with fgets and strtod. This gave much more than a 10-fold improvement, if I recall correctly.
So your loop:
while (fscanf(fp, "%f", &W[i]) != EOF) {
++i;
}
should look something like:
while (fgets(buf, sizeof buf, fp)) {
W[i++] = strtod(buf, 0);
}
Edit: Error checking is always a good idea. So adding this in, the simple two-liner grows to about ten lines:
char buf[80];
errno = 0;
while (!errno && fgets(buf, sizeof buf, fp)) {
W[i++] = strtod(buf, 0);
}
if (errno) { // Maybe ERANGE or EINVAL from strtod, or a read error like EINTR
int save = errno;
printf("errno=%d reading line %d\n", save, i); // or perror()
exit(1);
}
Edit 2: Regarding error checking, the input file could easily contain text such as nan or inf, perhaps from some upstream bug. But strtod and fscanf are perfectly happy to parse these. And this could cause mysterious problems in your code.
But it is easy enough to check. Add the code:
int bad = 0;
for (int j = 0; j < i; j++)
bad += !isnormal(W[j]); // check for nan, inf, etc.
if (bad) {
// ... handle error
}
Putting this in a separate, simple, loop makes it easier for the compiler to optimize (in theory), especially if you use something like #pragma GCC optimize ("unroll-loops").
(Comment: This is my second answer.) I see the OP asked in a comment:
Do you happen to have a sample in C for reading the binary floats by any chance?
A binary version would blow any ascii version out-of-the-water. And is shorter.
Here the OP's function signature has been changed to include the maximum number of floats in the return W, and to return the number actually read from the file.
size_t read_file(float *W, size_t maxlen)
{
FILE *fp = fopen("C:\\Users\\rohit\\Documents\\GitHub\\base\\numerical\\c\\ReadFile1\\Debug\\data.txt", "r");
return fp ? fread(W, sizeof(float), maxlen, fp) : 0;
}
Or for something even faster, you could use mmap... . But this is not available on Windows.
Added: However, unbuffered I/O is would perhaps be faster. The following function uses a single malloc and a single unbuffered read to copy a file to the heap. (NB: not yet tested on large files; may need open64.)
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
void *readFileToHeap(const char *file, int *len) {
void *retval = 0;
ssize_t cnt;
struct stat st;
const int fd = open(file, O_RDONLY, 0);
if (fd < 0)
return printf("Cannot open %s\n", file), (void *)0;
if (fstat(fd, &st))
return perror("fstat()"), close(fd), (void *)0;
if (!(retval = malloc(st.st_size)))
return perror("malloc()"), close(fd), (void *)0;
cnt = read(fd, retval, st.st_size);
close(fd); // not the best: could clobber errno
if (cnt < 0)
return perror("read()"), free(retval), (void *)0;
if (cnt != st.st_size)
return printf("Partial read %d\n", cnt), free(retval), (void *)0;
*len = cnt;
return retval;
}
I have the below code (not mine) trying to get it to work out of curiousity.
from here C low-level standard-in to accept filename then printing file contents to stdout
int checkfile(void)
{
char buffer[4096];
char userInput[100];
int input_file1;
int n;
/* Check file existence. */
printf("Enter the filename: \n");
fflush(NULL);
if (!fgets(userInput,sizeof(userInput),stdin))
{
perror("fgets");
exit(EXIT_FAILURE); };
if((input_file1 = open(userInput, O_RDONLY)) < 0)
{
perror(userInput);
exit(1);
}
while((n = read(input_file1, buffer, sizeof(buffer))) > 0)
{
if((write(STDOUT_FILENO, buffer, n)) < 0)
{
perror("failed to display file to output");
close(input_file1);
exit(1);
}
}
}
whenever i run the code and try to open a file called "success.txt" i get a segmentation fault
"RUN FINISHED; Segmentation fault; core dumped; real time: 3s; user: 0ms; system: 0ms"
i'm also calling this from my main code as
checkfile();
if that makes any difference.
Could someone point out to me what i'm missing because i can't see it at the moment. i have a feeling some of my variables are not set properly...but unsure thank you.
if (!fgets(userInput,sizeof(userInput),stdin))
is wrong on couple of accounts.
userInput does not point to any valid memory.
sizeof(userInput) is the same as sizeof(char*), which is not what you want.
Change
char *userInput;
to something like:
char userInput[100];
Next problem
if((input_file1 = open(userInput, O_RDONLY)) < 0)
That's wrong. The return value of open is an int. Type of input_file1 is FILE*. I am surprised you didn't get compiler errors/warnings.
Change
FILE *input_file1;
to
int input_file1;
And the next problem
It's probably caused by fgets() including the newline character in userInput. Add code to trim the newline.
int len = strlen(userInput);
if ( userInput[len-1] == '\n' )
{
userInput[len-1] = '\0';
}
This question already has answers here:
Counting lines, words, characters and top ten words?
(5 answers)
Count lines, words and characters in C
(4 answers)
Closed 9 years ago.
So the assignment is to emulate the unix command wc in C. I've got most of the structure down but I've got some problems with the actual counting pieces.
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
int main(int argc, char *argv[]){
int file;
int newLine=0, newWord=0, newChar=0, i=0;
char *string;
char buf[101];
file = open(argv[1], O_RDONLY, 0644);
int charRead=read(file, buf, 101);
if (file == -1){
printf("file does not exist");
}
else{
for (i; i<100; i++){
if (buf[i]!='\0'){
newChar++;
}
if (buf[i]==' '){
newWord++;
}
if (buf[i]=='\n'){
newLine++;
}
}
}
printf("%d\n",newWord);
printf("%d\n",newLine);
printf("%d\n",newChar);
printf("%s",argv[1]);
close(file);
}
So the line counter works perfectly well.
The word count is always one short unless there is a space at the end of the word. I've tried to ameliorate this by making the special case:
if(buf[i]!='\0' || (buf[i]=='\0' && buf[i]!=' '))
but this doesnt' seem to work either.
The other problem is that the character count is always way off. I think it has something to do with the buffer size, but I can't seem to find much documentation on how to make the buffer work in this scenario.
Please advise. Thanks!
EDIT I looked at the answers given in the "duplicate" questions, and I don't think they really addressed the question you had. I have written a short program that does what you want (and is "safe" in that it handles any size of input). I tested it against a short file, where it gave identical results to wc.
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[]) {
// count characters, words, lines
int cCount = 0, wCount = 0, lCount = 0, length;
char buf[1000];
FILE *fp;
if (argc < 2) {
printf("usage: wordCount fileName\n");
return -1;
}
if((fp = fopen(argv[1], "r")) == NULL) {
printf("unable to open %s\n", argv[1]);
return -1;
}
while(fgets(buf, 1000, fp)!=NULL) {
int ii, isWord, isWhite;
lCount++;
isWord = 0;
length = strlen(buf);
cCount += length;
for(ii = 0; ii<length; ii++) {
isWhite = (buf[ii]!=' ' && buf[ii]!= '\n' && buf[ii] != '\t') ? 1 : 0;
if (isWhite == 1) {
if(isWord != 1) wCount++;
isWord = 1;
}
if(isWhite == 0 && isWord == 1) {
isWord = 0;
}
}
}
printf("Characters: %d\nWords: %d\nLines: %d\n\n", cCount, wCount, lCount);
return 0;
}
Note - if you have a line with more than 1000 characters in it, the above may give a false result; this could be addressed by using getline() which is a very safe (but non standard) function that will take care of allocating enough memory for the line that is read in. I don't think you need to worry about it here.
If you do worry about it, you can use the same trick as above (where you have the "isWord" state) and extend it to isLine (reset when you encounter a \n). Then you don't need the inner for loop. It is marginally more memory efficient, but slower.
I wrote a simple program that would open a csv file, read it, make a new csv file, and only write some of the columns (I don't want all of the columns and am hoping removing some will make the file more manageable). The file is 1.15GB, but fopen() doesn't have a problem with it. The segmentation fault happens in my while loop shortly after the first progress printf().
I tested on just the first few lines of the csv and the logic below does what I want. The strange section for when index == 0 is due to the last column being in the form (xxx, yyy)\n (the , in a comma separated value file is just ridiculous).
Here is the code, the while loop is the problem:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv) {
long size;
FILE* inF = fopen("allCrimes.csv", "rb");
if (!inF) {
puts("fopen() error");
return 0;
}
fseek(inF, 0, SEEK_END);
size = ftell(inF);
rewind(inF);
printf("In file size = %ld bytes.\n", size);
char* buf = malloc((size+1)*sizeof(char));
if (fread(buf, 1, size, inF) != size) {
puts("fread() error");
return 0;
}
fclose(inF);
buf[size] = '\0';
FILE *outF = fopen("lessColumns.csv", "w");
if (!outF) {
puts("fopen() error");
return 0;
}
int index = 0;
char* currComma = strchr(buf, ',');
fwrite(buf, 1, (int)(currComma-buf), outF);
int progress = 0;
while (currComma != NULL) {
index++;
index = (index%14 == 0) ? 0 : index;
progress++;
if (progress%1000 == 0) printf("%d\n", progress/1000);
int start = (int)(currComma-buf);
currComma = strchr(currComma+1, ',');
if (!currComma) break;
if ((index >= 3 && index <= 10) || index == 13) continue;
int end = (int)(currComma-buf);
int endMinusStart = end-start;
char* newEntry = malloc((endMinusStart+1)*sizeof(char));
strncpy(newEntry, buf+start, endMinusStart);
newEntry[end+1] = '\0';
if (index == 0) {
char* findNewLine = strchr(newEntry, '\n');
int newLinePos = (int)(findNewLine-newEntry);
char* modifiedNewEntry = malloc((strlen(newEntry)-newLinePos+1)*sizeof(char));
strcpy(modifiedNewEntry, newEntry+newLinePos);
fwrite(modifiedNewEntry, 1, strlen(modifiedNewEntry), outF);
}
else fwrite(newEntry, 1, end-start, outF);
}
fclose(outF);
return 0;
}
Edit: It turned out the problem was that the csv file had , in places I was not expecting which caused the logic to fail. I ended up writing a new parser that removes lines with the incorrect number of commas. It removed 243,875 lines (about 4% of the file). I'll post that code instead as it at least reflects some of the comments about free():
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char** argv) {
long size;
FILE* inF = fopen("allCrimes.csv", "rb");
if (!inF) {
puts("fopen() error");
return 0;
}
fseek(inF, 0, SEEK_END);
size = ftell(inF);
rewind(inF);
printf("In file size = %ld bytes.\n", size);
char* buf = malloc((size+1)*sizeof(char));
if (fread(buf, 1, size, inF) != size) {
puts("fread() error");
return 0;
}
fclose(inF);
buf[size] = '\0';
FILE *outF = fopen("uniformCommaCount.csv", "w");
if (!outF) {
puts("fopen() error");
return 0;
}
int numOmitted = 0;
int start = 0;
while (1) {
char* currNewLine = strchr(buf+start, '\n');
if (!currNewLine) {
puts("Done");
break;
}
int end = (int)(currNewLine-buf);
char* entry = malloc((end-start+2)*sizeof(char));
strncpy(entry, buf+start, end-start+1);
entry[end-start+1] = '\0';
int commaCount = 0;
char* commaPointer = entry;
for (; *commaPointer; commaPointer++) if (*commaPointer == ',') commaCount++;
if (commaCount == 14) fwrite(entry, 1, end-start+1, outF);
else numOmitted++;
free(entry);
start = end+1;
}
fclose(outF);
printf("Omitted %d lines\n", numOmitted);
return 0;
}
you're malloc'ing but never freeing. possibly you run out of memomry, one of your mallocs returns NULL, and the subsequent call to str(n)cpy segfaults.
adding free(newEntry);, free(modifiedNewEntry); immediately after the respective fwrite calls should solve your memory shortage.
also note that inside your loop you compute offsets into the buffer buf which contains the whole file. these offsets are held in variables of type int whose maximum value on your system may be too small for the numbers you are handling. also note that adding large ints may result in a negative value which is another possible cause of the segfault (negative offsets into buf take you to some address outside the buffer possibly not even readable).
The malloc(3) function can (and sometimes does) fail.
At least code something like
char* buf = malloc(size+1);
if (!buf) {
fprintf(stderr, "failed to malloc %d bytes - %s\n",
size+1, strerror(errno));
exit (EXIT_FAILURE);
}
And I strongly suggest to clear with memset(buf, 0, size+1) the successful result of a malloc (or otherwise use calloc ....), not only because the following fread could fail (which you are testing) but to ease debugging and reproducibility.
and likewise for every other calls to malloc or calloc (you should always test them against failure)....
Notice that by definition sizeof(char) is always 1. Hence I removed it.
As others pointed out, you have a memory leak because you don't call free appropriately. A tool like valgrind could help.
You need to learn how to use the debugger (e.g. gdb). Don't forget to compile with all warnings and debugging information (e.g. gcc -Wall -g). And improve your code till you get no warnings.
Knowing how to use a debugger is an essential required skill when programming (particularly in C or C++). That debugging skill (and ability to use the debugger) will be useful in every C or C++ program you contribute to.
BTW, you could read your file line by line with getline(3) (which can also fail and you should test that).
I am currently attempting to write a program that will tell it's user how many times the specified 8-bit byte appears in the specified file.
I have some ground work laid out, but when it comes to making sure that the file makes it in to an array or buffer or whatever format I should put the file data into to check for the bytes, I feel I'm probably very far off from using the correct methods.
After that, I need to check whatever the file data gets put in to for the byte specified, but I am also unsure how to do this.
I think I may be over-complicating this quite a bit, so explaining anything that needs to be changed or that can just be scrapped completely is greatly appreciated.
Hopefully didn't leave out any important details.
Everything seems to be running (this code compiles), but when I try to printf the final statement at the bottom, it does not spit out the statement.
I have a feeling I just did not set up the final for loop correctly at all..
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
//#define BUFFER_SIZE (4096)
main(int argc, char *argv[]){ //argc = arg count, argv = array of arguements
char buffer[4096];
int readBuffer;
int b;
int byteCount = 0;
b = atoi(argv[2]);
FILE *f = fopen(argv[1], "rb");
unsigned long count = 0;
int ch;
if(argc!=3){ /* required number of args = 3 */
fprintf(stderr,"Too few/many arguements given.\n");
fprintf(stderr, "Proper usage: ./bcount path byte\n");
exit(0);
}
else{ /*open and read file*/
if(f == 0){
fprintf(stderr, "File could not be opened.\n");
exit(0);
}
}
if((b <= -1) || (b >= 256)){ /*checks to see if the byte provided is between 0 & 255*/
fprintf(stderr, "Byte provided must be between 0 and 255.\n");
exit(0);
}
else{
printf("Byte provided fits in range.\n");
}
int i = 0;
int k;
int newFile[i];
fseek(f, 0, SEEK_END);
int lengthOfFile = ftell(f);
for(k = 0; k < sizeof(buffer); k++){
while(fgets(buffer, lengthOfFile, f) != NULL){
newFile[i] = buffer[k];
i++;
}
}
if(newFile[i] = buffer[k]){
printf("same size\n");
}
for(i = 0; i < sizeof(newFile); i++){
if(b == newFile[i]){
byteCount++;
}
printf("Final for loop is working???"\n");
}
}
OP is mixing fgets() with binary reads of a file.
fgets() reads a file up to the buffer size provided or reaching a \n byte. It is intended for text processing. The typical way to determine how much data was read via fgets() is to look for a final \n - which may or may not be there. The data read could have embedded NUL bytes in it so it becomes problematic to know when to stop scanning the buffer. on a NUL byte or a \n.
Fortunately this can all be dispensed with, including the file seek and buffers.
// "rb" should be used when looking at a file in binary. C11 7.21.5.3 3
FILE *f = fopen(argv[1], "rb");
b = atoi(argv[2]);
unsigned long byteCount = 0;
int ch;
while ((ch = fgetc(f)) != EOF) {
if (ch == b) {
byteCount++;
}
}
The OP error checking is good. But the for(k = 0; k < sizeof(buffer); k++){ loop and its contents had various issues. OP had if(b = newFile[i]){ which should have been if(b == newFile[i]){
Not really an ANSWER --
Chux corrected the code, this is just more than fits in a comment.
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv)
{
struct stat st;
int rc=0;
if(argv[1])
{
rc=stat(argv[1], &st);
if(rc==0)
printf("bytes in file %s: %ld\n", argv[1], st.st_size);
else
{
perror("Cannot stat file");
exit(EXIT_FAILURE);
}
return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
The stat() call is handy for getting file size and for determining file existence at the same time.
Applications use stat instead of reading the whole file, which is great for gigantic files.