Run time error in C - c

My code gives a run time error when I reach the middle of the file.
If I change the values of temp2 or temp1 then it crashes at the start of the file.
I can't understand the error I am making in this file.
It runs smoothly on a small file which has 100 lines.
I am making a file searching project so I need to store big files which have directories of entire drives.
#include<stdio.h>
#include<string.h>
#include<windows.h>
#include<conio.h>
char file[99999];
void brek(char *p, char *q);
void main()
{
FILE *fp;
int x = 0, y = 0;
int a = 0, b = 0;
int g = 0;
char temp1[10000]; // temp1 is simply for jumping to the date against the given directory or file
// the main array storing the lines is temp2.
char temp2[1000][1000];
system("chdir C:\\Users\\Faraz\\Documents && dir /s > dir.txt");
fp = fopen("C:\\Users\\Faraz\\Documents\\dir.txt", "r");
while ((y = getc(fp)) != EOF)
{
file[x] = y;
x++;
}
fclose(fp);
file[x] = '\0';
puts(&file[0]);
// <----the copying of the file to the string "file (globally declared)"is
// done---->//
getche();
system("cls");
// <-------------------start loop-------------------->//
a = 0;
while (file[a] != '\0') // <-------starting of the loop
{
while (file[a] != '/')
{
temp1[a] = file[a];
a++;
}
temp1[a] = '\0';
a = a - 2;
b = 0;
while (file[a] != '\n')
{
temp2[g][b] = file[a];
b++;
a++;
}
temp2[g][b] = '\0';
puts(&temp2[g][0]);
g++;
}
// <-----------------end loop---------------------->//
}

Try this instead
int
main(int argc, char **argv)
{
LPWIN32_FIND_DATAA fdFile;
HANDLE hFind = NULL;
const char *sPath = "C:\\Users\\Faraz\\Documents\\*.*";
if((hFind = FindFirstFile(sPath, &fdFile)) == INVALID_HANDLE_VALUE)
{
printf("no such directory %s\n", sPath);
return -1;
}
do
{
printf("Directory: %s\n", fdFile->cFileName);
}
while(FindNextFile(hFind, &fdFile)); //Find the next file.
FindClose(hFind); //Always, Always, clean things up!
return 0;
}

THe answer i found out is that you simply cant load a 1gb file in a 512 mb memory.
So we define buffer size at the top of the string using
#define BUFFER_SIZE 512
and using fgets we read the file line - by - line hence not overloading our memory space allocated by OS.
Dynamic Memory Allocation is not the solution to our problem as we start using heap space.

Related

How do you read what is in a directory after opening it?

Why can't I read what is in a directory, it keeps on giving me segmentation faults?
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
int count(char* loc)
{
int num = 0;
char c;
FILE *file = fopen(loc, "r");
while( (c = fgetc(file)) != EOF) {
if(c == '\n')
num++;
}
int r = fclose(file);
return num;
}
int main()
{
int lines = 0;
int files = 0;
char* names[files];
int i = 0;
DIR* d = opendir("./visual"); //You can change this bit
struct dirent *file;
while((file = readdir(d)) != NULL){
i++;
names[i] = file->d_name;
files++;
printf("%s\n", names[i]);
}
closedir(d);
printf("__________\n");
for(int i = 0;i < files;i++){
printf("i = %d\n", i);
lines = lines + count(names[i]);
}
printf("you have written %d lines of code", lines);
}
Here you define an array of size 0.
int files = 0;
char* names[files];
Here (while i and files are both 0) you access the first of those zero (note the conflict here?) elements in the array.
names[i] = file->d_name;
Then you increase files.
files++;
This does however not change the size of the array, and even if it would it would be too late.
Going on, I will quote WhozCraigs helpful comment (with permission):
Even fixing that, you're still in for an awakening. names[i] = file->d_name will store off a pointer to memory that is neither guaranteed, nor even likely, to be static for the lifetime of the enumeration.
It can/will be reused as you enumerate each file entry. And even if it didn't, all that memory is guaranteed to be off-limits once closedir is fired.
If you want to retain the file names, you need to make copies of them; not just save off pointers.
End of quote.

Compare two binary files in C

I am writing a program to compare two binary files and plot the first difference. I want to read 16 bytes of data from each file continuously and compare them. For that I am storing 16 bytes from both file into char *buffer1, buffer2. When I print the output I am getting that buffer1 has both the data of file1 and file2.
The code is as follows:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void printConversion(char *buf1, char *buf2) {
size_t len = strlen(buf1);
char *binary = malloc(len * 8 + 1);
binary[0] = '\0';
for (size_t i = 0; i < len; ++i) {
char ch = buf1[i];
for (int j = 7; j >= 0; --j) {
if (ch & (1 << j)) {
strcat(binary,"1");
} else {
strcat(binary,"0");
}
}
}
printf("File1: %s\t", binary);
free(binary);
printf("File2:");
for (int i = 0; i < sizeof(buf2); i++) {
printf("%x", buf2[i] - '0');
}
}
void fileRead(FILE *fp, char *buf, int count) {
fseek(fp, count, SEEK_SET);
fread(buf, 1, 16, fp);
}
int fileSize(FILE *fp) {
fseek(fp, 0, SEEK_END);
int size = ftell(fp) + 1;
return size;
}
int main(int argc, char *argv[]) {
printf("***Binary File Comparator***\n ");
int count = 0;
int index = 0;
char buffer1[16];
char buffer2[16];
char buffer3[16];
char buffer4[16];
// Invalid Number of Arguments
if (argc < 3 || argc > 3) {
printf("Invalid Number of Arguments\n");
}
FILE *fp1, *fp2;
fp1 = fopen(argv[1], "rb");
int size = fileSize(fp1);
int size1 = size;
fclose(fp1);
while (size > 1) {
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer1, count);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer2, count);
if (size1 < count) {
int lastSize = count - size1;
count = count + lastSize;
fclose(fp2);
} else {
count = count+16;
fclose(fp2);
}
**printf("buffer1:%s\tbuffer2:%s\n", buffer1, buffer2)**;
size = size - 16;
int result = strcmp(buffer1, buffer2);
if (result != 0) {
for (int i = 0; i < sizeof(buffer1); i++) {
if (buffer1[i] != buffer2[i]) {
int count1 = (count - 16) + i;
index++;
if (index == 1) {
printf("Byte_Offset:%x\n", count1);
fp1 = fopen(argv[1], "rb");
fileRead(fp1, buffer3, count1);
fclose(fp1);
fp2 = fopen(argv[2], "rb");
fileRead(fp2, buffer4, count1);
fclose(fp2);
printConversion(buffer3, buffer4);
break;
}
} else {
continue;
}
}
}
}
}
I have tried to highlight the printf part that is printing my buffer1 and buffer2
The output is as follows:
buffer1:83867715933586928386771593358692 buffer2:8386771593358692
buffer1:49216227905963264921622790596326 buffer2:4921622790596326
buffer1:40267236116867294026723611686729 buffer2:4026723611686729
buffer1:82306223673529228230622367352922 buffer2:8230622367352922
buffer1:25869679356114222586967935611422 buffer2:2586967935611422
Can anybody help what I am doing wrong. Please point me the error and what optimization changes could be done in code. I am at learning stage your feedback will be very helpful.
You are complicating the task by reading 16 bytes at a time. If the goal is to indicate the first difference, just read one byte at a time from both files with getc() this way:
int compare_files(FILE *fp1, FILE *fp2) {
unsigned long pos;
int c1, c2;
for (pos = 0;; pos++) {
c1 = getc(fp1);
c2 = getc(fp2);
if (c1 != c2 || c1 == EOF)
break;
}
if (c1 == c2) {
printf("files are identical and have %lu bytes\n", pos);
return 0; // files are identical
} else
if (c1 == EOF) {
printf("file1 is included in file2, the first %lu bytes are identical\n", pos);
return 1;
} else
if (c2 == EOF) {
printf("file2 is included in file1, the first %lu bytes are identical\n", pos);
return 2;
} else {
printf("file1 and file2 differ at position %lu: 0x%02X <> 0x%02X\n", pos, c1, c2);
return 3;
}
}
In terms of efficiency, reading one byte at a time does not pose a problem if the streams are buffered. For large files, you can get better performance by memory mapping the file contents if available on the target system and for the given input streams.
Not an actual answer, but a word on optimisation. You can increase the speed of the program if you have a bigger buffer. Basically the larger the buffer the faster the program runs HOWEVER the speed you gain from just making it larger will increase logarithmically.
Here is a picture of a graph that will help you understand. Also, what i mentioned applies to any simmilar situation. This includes: Copying files, filling the sound buffer etc. Loading the entire file in your RAM first and operationg on it will usually be faster than loading parts of it. Ofc this is not possible with larger files but still this is what you should aim for if you want speed.
PS: I'm writting here because i don't have rep to comment.
EDIT: I came up with solution but since you did not state what you need to do with your buffer3 and buffer4 i packed it up inside a function.
If you are sure that you are only going to use 16 bytes as a buffer size, remove the nBufferSize parameter and replace the buffer dynamic allocation with a static one.
If after the execution you need the buffers, add them as parameters and keep the nBufferSize param. Keep in mind that if you intend to use them outside the function, you should also allocate them outside the function, so things don't get messy.
/** Returns 0 if files are identical, 1 if they are different and -1 if there
is an error. */
int FileCmp(char* szFile1, char* szFile2, int nBufferSize)
{
FILE *f1, *f2;
f1 = fopen(szFile1, "rb");
f2 = fopen(szFile2, "rb");
// Some error checking?
if (f1 == NULL || f2 == NULL)
return -1;
// You can check here for file sizes before you start comparing them.
// ...
// Start the comparrison.
/// Replace this part with static allocation. --------
char* lpBuffer1 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer1 == NULL) // close the files and return error.
{
fclose(f1);
fclose(f2);
return -1;
}
char* lpBuffer2 = malloc(sizeof(char)*nBufferSize);
if (lpBuffer2 == NULL) // close the files, free buffer1 and return error.
{
free(lpBuffer1);
fclose(f1);
fclose(f2);
return -1;
}
/// --------------------------------------------------
while(1)
{
unsigned int uRead1 = fread(lpBuffer1, sizeof(char), nBufferSize, f1);
unsigned int uRead2 = fread(lpBuffer2, sizeof(char), nBufferSize, f2);
if (uRead1 != uRead2)
goto lFilesAreDifferent;
for(unsigned int i = 0; i < uRead1; i++)
if (lpBuffer1[i] != lpBuffer2[i])
goto lFilesAreDifferent;
if ((feof(f1) != 0) && (feof(f2) != 0))
break; // both files have nothing more to read and are identical.
goto lSkip;
lFilesAreDifferent:
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 1;
lSkip:;
}
// The files are the same. Close them, free the buffers and return 0.
free(lpBuffer1);
free(lpBuffer2);
fclose(f1);
fclose(f2);
return 0;
}
A simple Demo:
#define BUFFER_SIZE 16
int main(int nArgs, char** szArgs)
{
if (nArgs != 3)
{
printf("Invalid number of arguments.");
return 0;
}
int nResult = FileCmp(szArgs[1], szArgs[2], BUFFER_SIZE);
switch (nResult)
{
case 0: printf("Files [%s] and [%s] are identical.", szArgs[1], szArgs[2]); break;
case 1: printf("Files [%s] and [%s] are different.", szArgs[1], szArgs[2]); break;
case -1: printf("Error."); break;
}
return 0;
}
EDIT II: Personally, i have never used the C standard FILE library (it was either C++ fstream or pure win32 fileapi) so don't take my word here for granted but fread is the fastest function i could find (faster than fgets or fgetc). If you want even faster than this you should get into OS dependant functions (like ReadFile() for Windows).
chqrlie's solution using getc is absolutely the right way to do this. I wanted to address some points brought up in comments, and find it's best to do that with code. In one comment, I recommend pseudo code which could be confusing (namely, you can't write fwrite(file1...) || fwrite(file2 ...) because of the short circuit. But you can implement the idea of that with:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/*
* Compare two files, 16 bytes at a time. (Purely to demonstrate memcmp.
* Clearly, this should be implemented with getc.)
*/
FILE * xfopen(const char *, const char *);
size_t xfread(void *, FILE *, const char *);
int
main(int argc, char **argv)
{
FILE *fp[2];
size_t n[2];
char buf[2][16];
unsigned count = 0;
if(argc != 3) { return EXIT_FAILURE; }
fp[0] = xfopen(argv[1], "r");
fp[1] = xfopen(argv[2], "r");
do {
n[0] = xfread(buf[0], fp[0], argv[1]);
n[1] = xfread(buf[1], fp[1], argv[2]);
if( n[0] != n[1] || (n[0] && memcmp(buf[0], buf[1], n[0]))) {
fprintf(stderr, "files differ in block %u\n", count);
return 1;
}
count += 1;
} while(n[0]);
puts("files are identical");
return 0;
}
size_t
xfread(void *b, FILE *fp, const char *name)
{
size_t n = fread(b, 1, 16, fp);
if(n == 0 && ferror(fp)) {
fprintf(stderr, "Error reading %s\n", name);
exit(EXIT_FAILURE);
}
return n;
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = strcmp(path, "-") ? fopen(path, mode) : stdin;
if( fp == NULL ) {
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}

How to find words with capital letters in a char using c?

I'm trying to find all the words with capital letters in a string, but am unable to process my data structure. i seem to be able to print out fileContent, indicating that it is loading in successfully, but my second function is not working on the file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* loadFile(char* fileName)
{
FILE *inputFile;
inputFile = fopen(fileName, "r");
//finds the end of the file
fseek(inputFile, 0, SEEK_END);
//stores the size of the file
int size = ftell(inputFile);
//Sets the scan to the start of the file
fseek(inputFile, 0, SEEK_SET);
char *documentStore = (char*)malloc(size);
int i = 0, c;
while((c = fgetc(inputFile)) != EOF)
{
documentStore[i] = c;
i++;
}
return documentStore;
}
void countImportantWords(char* fileContent, char** importantWords, int* frequencyWords)
{
int uniqueWordCount = 0;
int lengthWordStore = 10;
int i = 0;
int recording = 0;
char wordBuffer[50];
int wordBufferCount = 0;
int isWordPresent = 0;
while(fileContent[i] != EOF)
{
//To allocate more memory incase the structure is full
if(uniqueWordCount == lengthWordStore)
{
lengthWordStore += 10;
char** newWordStore = realloc(importantWords, lengthWordStore * sizeof(char*));
int* newFrequencyStore = realloc(frequencyWords, sizeof(int));
importantWords = newWordStore;
frequencyWords = newFrequencyStore;
}
printf("%s", wordBuffer);
//Conditions to fill if its a word
if(fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 0)
{
wordBuffer[0] = fileContent[i];
recording = 1;
}else if(fileContent[i] >= 'a' && fileContent[i] <= 'z' && recording == 1)
{
//each if is to check if the end of word is reached. Any character that is non alphabetical is considered end of word
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else if (fileContent[i] >= 'A' && fileContent[i] <= 'Z' && recording == 1)
{
wordBufferCount += 1;
wordBuffer[wordBufferCount] = fileContent[i];
} else {
//Adding a terminating character so that it strcpy only copies until that point
wordBuffer[wordBufferCount + 1] = '\0';
recording = 0;
//check to see if that word is in the array already, and if it is, it will just increment the frequency
for(int j = 0; j < uniqueWordCount; j++){
if(strcmp(wordBuffer, importantWords[j]) == 0)
{
frequencyWords[j] += 1;
isWordPresent = 1;
}
}
//if its not present, it should assign it to the structure
if(isWordPresent == 0)
{
char* wordStore = (char*)malloc(wordBufferCount * sizeof(char));
strcpy(wordStore, wordBuffer);
uniqueWordCount += 1;
importantWords[uniqueWordCount] = wordStore;
frequencyWords[uniqueWordCount] = 1;
}
}
i++;
}
}
int main() {
char fileName[50];
char *fileContent;
char **importantWords = (char**)malloc(10*sizeof(char**));
int *frequencyWords = (int*)malloc(10*sizeof(int));
printf("Please input the full file path: ");
scanf("%s", fileName);
fileContent = loadFile(fileName);
countImportantWords(fileContent, importantWords, frequencyWords);
int i = 0;
while(importantWords[i] != '\0')
{
printf("%s %d", importantWords[i], frequencyWords[i]);
i++;
}
return 0;
}
I've put in the full file so you can see how the structure was created incase that it is the issue, but ideally what would happen is that the final loop would print out all the words that are important and they're frequency. Currently i'm getting exit code 11, which i'm not sure what it means, but may be worth mentioning. I'd really appreciate any help :)
You can simplify the process dramatically but utilising functions and learning to manage your memory. I wrote a short example which does not take punctuation into account. It just assumes every word is separated by a space, which you can customise to your discretion.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char* readfile(char* filename){
char* data = NULL;
FILE* file = fopen(filename, "r");
if(file == NULL){
return NULL;
}
fseek(file, 0, SEEK_END);
long size = ftell(file)+1;
fseek(file, 0, SEEK_SET);
data = (char*)malloc(size);
if(data == NULL){
return NULL;
}
fgets(data, (int)size, file);
return data;
}
typedef struct uppercase_t{
char** word;
int count;
}uppercase;
void copy(uppercase* u,char* token){
size_t length = strlen(token);
u->word[u->count] = (char*)malloc(length+1);
if(u->word[u->count] == NULL){
return;
}
strcpy(u->word[u->count], token);
++u->count;
}
void createuppercasedata(uppercase* u, char* data){
const char delimeter[] = " ";
char* token = strtok(data, delimeter);
if(token == NULL){
return;
}
u->word = (char**)malloc(u->count+1);
if(u->word == NULL){
return;
}
if(isupper(token[0])){
copy(u,token);
}
while(token != NULL){
token = strtok(0, delimeter);
if(token != NULL)
if(isupper(token[0])) {
char** reallocated = (char**)realloc(u->word, u->count+1);
if(reallocated == NULL){
return;
}
u->word = reallocated;
copy(u, token);
}
}
}
void destroyuppercasedata(uppercase* u){
for(int index = 0; index < u->count; ++index){
free(u->word[index]);
}
free(u->word);
}
int main(){
char filename[] = "textfile";
char* data = readfile(filename);
if(data == NULL){
return -1;
}
uppercase u = {0};
createuppercasedata(&u, data);
printf("found %i uppercase words\n",u.count);
for(int index = 0; index < u.count; ++index){
printf("%s\n", u.word[index]);
}
destroyuppercasedata(&u);
free(data);
}
The code will allocate a new pointer for each uppercase and memory for the word to be copied too. It will free all the memory it allocated in the structure with destroyuppercasedata and it will free the initial data that was read from file. Error checking and memory management in C is really important. So utilise those properly.
This was the test file I used.
textfile
How many Uppercase words can Be Found In this text File the answer should be Seven
And this was the output to the terminal:
How
Uppercase
Be
Found
In
File
Seven

How to save in a string the contents of a text file

This is the code why when I show in output the string I have all words but with in the final row a strange symbol , an ASCII random symbol...
My objective is to save in a string all words to operate with it.
For example I have this document:
Mario
Paul
Tyler
How can i save all words in a string??
#include <stdio.h>
#include <stdlib.h>
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char *argv[]) {
int l,i=0,j=0,parole=0;
char A[10][10];
char leggiparola;
char testo[500];
FILE*fp;
fp=fopen("parole.txt","r");
if(fp!=NULL)
{
while(!feof(fp))
{
fscanf(fp,"%c",&leggiparola);
printf("%c", leggiparola);
testo[j]=leggiparola;
j++;
}
}
fclose(fp);
printf("%s",testo);
return 0;
}
Besides while(!feof(fp)) being "always wrong" you miss to 0-terminate the result string.
To do so place a
testo[j] = '\0'
just after the while-loop.
Instead of using fscanf, try with getc:
int leggiparola; /* This need to be an int to also be able to hold another
unique value for EOF besides 256 different char values. */
...
while ( (leggiparola = getc(fp)) != EOF)
{
printf("%c",leggiparola);
testo[j++] = leggiparola;
if (j==sizeof(testo)-1)
break;
}
testo[j] = 0;
Here's fslurp. I't a bit messy due to the need to grow the buffer manually.
/*
load a text file into memory
*/
char *fslurp(FILE *fp)
{
char *answer;
char *temp;
int buffsize = 1024;
int i = 0;
int ch;
answer = malloc(1024);
if(!answer)
return 0;
while( (ch = fgetc(fp)) != EOF )
{
if(i == buffsize-2)
{
if(buffsize > INT_MAX - 100 - buffsize/10)
{
free(answer);
return 0;
}
buffsize = buffsize + 100 * buffsize/10;
temp = realloc(answer, buffsize);
if(temp == 0)
{
free(answer);
return 0;
}
answer = temp;
}
answer[i++] = (char) ch;
}
answer[i++] = 0;
temp = realloc(answer, i);
if(temp)
return temp;
else
return answer;
}

Large numbers of input files causing program to not function at all

I'm working on a word frequency program that is designed to handle any number of input files. It works fine for smaller numbers of files, even if those files have tens of thousands of words, but when attempting to run it with a larger number of files (24 in the case I'm testing), it barely even begins reading from the first file before segfaulting.
typedef struct {
int noInFiles, numFiles, numToPrint;
char** fileNames;
FILE** files;
Hash hash;
} Freq;
void openFiles(Freq* freq) {
int i;
char* str;
freq->files = calloc(1,sizeof(FILE**));
for(i = 0; i < freq-> numFiles; i++) {
freq->files[i] = fopen(freq->fileNames[i],"r");
if(freq->files[i] == NULL) {
str = malloc(strlen(freq->fileNames[i]) + 5);
sprintf(str,"wf: %s",freq->fileNames[i]);
perror(str);
free(str);
exit(EXIT_FAILURE);
}
}
}
void wordCount(Freq* freq) {
int i, totalWords = 0;
char *word = NULL;
unsigned wordLength = 0, memSize = 0;
for(i = 0; i < freq->numFiles; i++) {
fprintf(stderr,"Counting from file %d named %s\n", i,freq->fileNames[i]);
while(EOF != getWord(&word, &wordLength, &memSize, freq->files[i], "file"))
{
addEntry(&(freq->hash), word, 1);
totalWords++;
free(word);
word = NULL;
}
}
freq->totalWords = totalWords;
}
Valgrind says that openFiles has an Invalid write of size 4 but i have no idea what that means
This line is problematic:
freq->files = calloc(1,sizeof(FILE**));
According to the sturct, you will need a FILE* to each of the files, but this line just allocate one FILE**, change to this:
freq->files = calloc(freq->numFiles, sizeof(FILE*));

Resources