Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 years ago.
Improve this question
I need to make program that compare content of all files in directory and detects duplicates. Program works fine until it starts comparing files inside the for loop. I think there might be error in array but I can't find any. Any help will be appreciated.
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
int listFiles(char *path, int a, char ***array);
int main() {
char path[100];
char **array = NULL;
int a = 0;
printf("Enter path to list files: ");
scanf("%s", path);
listFiles(path, a, &array);
return 0;
}
int listFiles(char *basePath, int a, char ***array) {
char path[1000], c, *d = NULL;
FILE *input_file;
struct dirent *dp;
DIR *dir = opendir(basePath);
if (!dir) {
return 1;
}
while ((dp = readdir(dir)) != NULL) {
if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) {
strcpy(path, basePath);
strcat(path, "\\");
strcat(path, dp->d_name);
input_file = fopen(path, "r");
if (input_file == NULL) {
fclose(input_file);
}
if (input_file) {
printf("%s\n", path);
while ((c = getc(input_file)) != EOF)
d = realloc(d, (x + 1) * sizeof(*d));
(d)[x++] = c;
}
*array = realloc(*array, (a + 1) * sizeof(**array));
(*array)[a++] = d;
if (a > 1) {
for (int j = 0; j < a - 1; ++j) {
if (!memcmp(array[a], array[j], 999)) {
printf("duplikat\n");
free(array[a]);
--a;
break;
}
}
}
}
listFiles(path, a, array);
}
closedir(dir);
return 0;
}
while ((c = getc(input_file)) != EOF)
d=realloc(d, (x+1)*sizeof(*d));
(d)[x++] = c;
Is equal to:
while ((c = getc(input_file)) != EOF) {
d = realloc(d, (x+1)*sizeof(*d));
}
(d)[x++] = c;
Which reads from a file until EOF and reallocates a d pointer with the same size for each character in the file. Then is assigns to the last element in d the value of EOF. So the content of the file is not saved in d pointer.
Always explicitly use { and } (except for cases when don't).
Check for overflow.
size_t pathsize = sizeof(path);
assert(pathsize > strlen(basePath) + 1 + strlen(dp->d_name) + 1);
strcpy(path, basePath);
strcat(path, "\\");
strcat(path, dp->d_name);
Do similar checks everytime you do something dangerous.
What's the point of closing NULL file?
if(input_file == NULL){
fclose(input_file);
}
if (input_file){
The number 999 is very magical.
memcmp(array[a],array[j],999)
You incorrectly handle arrays. Or don't free memory. I don't know.
*array = realloc(*array, (a+1)*sizeof(**array));
(*array)[a++] = d;
...
free(array[a]);
There is a little point in char *** variable. Don't use *** (unless in cases where you use them). The char***array can be completely removed.
Start with a good abstraction. First write a function that will compare two files bool files_compare(const char *file1, const char *file2);. Then write a function that will list all files in a directory. Then for each pair of files listed compare them. There is a little need to no need in storing the content of the file in dynamic memory (what if you have files that have 10Gb and a system with 1Gb of memory?).
The listFiles function is untimately is endlessly recursively calling itself if there is at least one file in the directory.
int listFiles(char *basePath, int a, char ***array)
{
...
DIR *dir = opendir(basePath);
...
while ((dp = readdir(dir)) != NULL){
...
listFiles(path,a,array);
}
}
This code has many errors. It reuses old memory, incorrectly handles arrays, has some strange magic numbers, leaks memory, does not close opened files, is not protected against overflow, and opens the directory recursively.
Related
This question already has answers here:
Getting a stack overflow exception when declaring a large array
(8 answers)
Closed 1 year ago.
I have quite a big dictionary file. I want to take each line from the file and store it in an array so I can perform manipulations later. For example given the words:
aaaa
arggghhh
broooooo
Coooodee
If I call array[2], it should give me "broooooo". I have tried using the code below however I keep running into segmentation faults. Any help would be greatly appreciated.
Here is the code I have been trying:
int main (int argc, char* argv[]) {
char* file="/usr/share/dict/words";
FILE *dict;
char str[60];
char arr[80368][60];
int count = 0;
dict = fopen(file, "r");
if(dict == NULL){
perror("Error opening file");
return(-1);
}
while(fgets(str,sizeof(str),dict) != NULL){
strcpy(arr[count], str);
count++;
}
fclose(dict);
return 0;
}
char arr[80368][60];
This will try to allocate 4822080 bytes on the stack. The default maximum should be 8Mb so it is lower than that, but maybe it is configured lower on your system? You can inspect with ulimit -a | grep stack.
Does your program work if you test with a smaller input file, say generated with head -100 /usr/share/dict/words > input.txt, and then with the size of arr reduced correspondingly?
This is the best method to read file when you don't know the size.
If you want the file inside array you just need to use str_split else the file is inside char *
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fcntl.h>
#include <stddef.h>
char **str_split(char *cmd, char split_by)
{
char **argv = malloc(sizeof(char *) * strlen(cmd));
int pos = 0;
for (int i = 0; cmd[i] != '\0'; i++) {
if (cmd[i] == split_by || cmd[i] == '\0') {
cmd[i] = '\0';
argv[pos] = strdup(cmd);
pos++;
cmd += i + 1;
i = 0;
}
}
argv[pos] = strdup(cmd);
argv[pos + 1] = NULL;
return argv;
}
char *load_file(char *file_path)
{
int fd;
struct stat file_stat;
stat(file_path, &file_stat);
int file_size = file_stat.st_size;
char *str = malloc(sizeof(char) * file_size + 2);
if ((fd = open(file_path, O_RDONLY)) == -1) {
printf("error");
exit(84);
}
read(fd, str, file_size);
str[file_size] = '\0';
return str;
}
int main(int ac, char **av)
{
char *file_in_string = load_file(av[1]);
printf("this is the file in one string:\n%s\n", file_in_string);
char **file_in_array = str_split(file_in_string, '\n');
printf("this is the file inside array");
for (int i = 0; file_in_array[i]; i++)
printf("line [%d]: %s\n", i, file_in_array[i]);
}
Why can't I read what is in a directory, it keeps on giving me segmentation faults?
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
int count(char* loc)
{
int num = 0;
char c;
FILE *file = fopen(loc, "r");
while( (c = fgetc(file)) != EOF) {
if(c == '\n')
num++;
}
int r = fclose(file);
return num;
}
int main()
{
int lines = 0;
int files = 0;
char* names[files];
int i = 0;
DIR* d = opendir("./visual"); //You can change this bit
struct dirent *file;
while((file = readdir(d)) != NULL){
i++;
names[i] = file->d_name;
files++;
printf("%s\n", names[i]);
}
closedir(d);
printf("__________\n");
for(int i = 0;i < files;i++){
printf("i = %d\n", i);
lines = lines + count(names[i]);
}
printf("you have written %d lines of code", lines);
}
Here you define an array of size 0.
int files = 0;
char* names[files];
Here (while i and files are both 0) you access the first of those zero (note the conflict here?) elements in the array.
names[i] = file->d_name;
Then you increase files.
files++;
This does however not change the size of the array, and even if it would it would be too late.
Going on, I will quote WhozCraigs helpful comment (with permission):
Even fixing that, you're still in for an awakening. names[i] = file->d_name will store off a pointer to memory that is neither guaranteed, nor even likely, to be static for the lifetime of the enumeration.
It can/will be reused as you enumerate each file entry. And even if it didn't, all that memory is guaranteed to be off-limits once closedir is fired.
If you want to retain the file names, you need to make copies of them; not just save off pointers.
End of quote.
I'm creating a program which recursively finds all #include dependencies between C files in a specified directory and its child directories. Dependency paths should be absolute, so I use realpath to resolve relative paths and symbolic links. Since there can be many files I have decided to make the program multithreaded with OpenMP or pthreads.
The problem is that realpath resolves paths through the working directory. All threads share the same working directory so I would need to put a mutex on chdir and realpath.
Is there any alternate standard function to realpath which also takes the directory to resolve the path from as an argument?
There are a number of POSIX functions with the at suffix (such as openat()) which work with a specified directory. There isn't, however, a realpathat() function in POSIX. There also isn't an opendirat(), but there is fdopendir() which creates a DIR stream for an open directory file descriptor.
In a multithreaded program, any use of chdir() is fraught.
You should rethink your algorithm to use the various *at() functions to avoid needing to change directory at all. You'd open the directories for reading (open() or openat() with O_DIRECTORY, perhaps — though O_DIRECTORY isn't 100% necessary, nor is it supported on macOS) so that you can then access the files appropriately using the directory file descriptor in the *at() calls.
I worked a bit on a solution. It is by no means optimal but at least it seems to work. I created the function abspathat which turns a relative path into an absolute path. Then I use the built in readlinkat to fix the symlinks. The solution handles turns paths like "../code.c" "./code.c" "code.c" into "/dir/code.c". However it does currently not fix paths such as ../dir/../code.c, though why would anyone create such a path. Nor does it check if the file actually exists. Feel free to improve or do whatever you like with this code.
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <dirent.h>
#include <stdio.h>
/*****************************************************************************/
char *abspathat(char *dirpath, int dirlen, char *path);
/*****************************************************************************/
static const int MAX_FILEPATH = 4096;
/*****************************************************************************/
char *realpathat(int dirfd, char *dirpath, int dirlen, char *path) {
char *abs = abspathat(dirpath, dirlen, path);
char *buf = malloc(sizeof(char)*MAX_FILEPATH);
ssize_t size = readlinkat(dirfd, abs, buf, MAX_FILEPATH);
char *realpath;
if(size != -1) {
realpath = malloc(sizeof(size+1));
memcpy(realpath, buf, size);
realpath[size] = '\0';
free(abs);
} else {
realpath = abs;
}
free(buf);
return realpath;
}
/*---------------------------------------------------------------------------*/
char *abspathat(char *dirpath, int dirlen, char *path) {
/* If absolute */
if(path[0] == '/') {
return path;
}
int i;
char *right;
int d = 0;
int rlen = strlen(path);
int llen = 0;
if(path[0] == '.') {
if(path[1] == '.' && path[2] == '/') {
for(i = 3, d = 1; path[i] == '.'
&& path[i+1] == '.'
&& path[i+2] == '/'
&& i < rlen; i+=3) {
d++;
}
right = &path[i];
rlen -= i;
} else if(path[1] == '/') {
right = &path[2];
rlen -= 2;
}
} else {
right = &path[0];
}
for(i = dirlen - 1 - (dirpath[dirlen-1] == '/'); d && i; i--) {
if(dirpath[i] == '/') {
d--;
}
}
llen = i+1;
char *cpy = malloc(sizeof(char)*(llen + rlen + 2));
memcpy(cpy, dirpath, llen);
cpy[llen] = '/';
memcpy(cpy+llen+1, right, rlen);
cpy[llen+rlen+1] = '\0';
return cpy;
}
/*---------------------------------------------------------------------------*/
int main(int argc, char *argv[]) {
if(argc == 3) {
char *dirpath = argv[1];
DIR *d = opendir(dirpath);
char *path = argv[2];
char *resolved = realpathat(dirfd(d), dirpath, strlen(dirpath), path);
printf("%s\n", resolved);
} else {
printf("realpathat [directory] [filepath]\n");
}
return 0;
}
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
I am looping through my file structure with a C program. It I hit a new folder, i appending it's path to a linked list so I can look through all subdirectories iteratively.
The program consists of:
main function, calling the iterative function (which loops through the files)
When i loop through all the files once everything works fine. However when i have a while loop in my main function to call the iterative function more often, it always fails on the second time due to segmenatation error.
So i was investigating a bit and it seems that one element of the linked list is having an invalid address.
All addresses of my elements have this format and length: 0x2398ff0 or 0x2398ee0
However the illegal pointer has an address from 0x7f3770304c58
Does anyone have any thoughts why this address is so long?
I have checked throught printf("%p", element) every new address that gets added to the linked list, and this address never appear anywhere before in the code. It like magically appears.
I was thinking about a wild pointer maybe, but after i free any pointer i set it to NULL to, which should prevent this right?
Thanks for any tip. I haven't posted the code right now cause it is very long and thought maybe there are obvious things i just dont see.
EDIT: the entire code, including main function
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
void iterativefile(FILE *f, char** field, int looper){
DIR *d;
struct dirent *dir;
typedef struct nextpath { // Define element type of linked list
char *thispath;
struct nextpath *next;
}nextpath;
nextpath *startpath = malloc(sizeof(nextpath));
char * beginning = (char *) malloc(2); //create first element in linked list, starting on root node "."
strcpy(beginning, ".");
startpath->thispath = beginning;
int found = 0;
nextpath *currentzeiger = startpath;
nextpath *firstelement = startpath;
char *newdir, *currentfile, *currentpath;
do {
currentpath = currentzeiger->thispath;
d = opendir(currentpath);
if (!d){ //IF the path is invalid or cannot be opened
firstelement = currentzeiger->next;
free(currentzeiger);
currentzeiger = firstelement;
continue;
}
while((dir = readdir(d)) != NULL){
if (dir->d_type != DT_REG){ // current element is a directory -> add it to linked list
if (strcmp(dir->d_name, ".") != 0 && strcmp(dir->d_name, "..") != 0){
newdir = (char *) malloc(2+strlen(currentpath) + strlen(dir->d_name));
strcpy(newdir, currentpath);
strcat(newdir, "/");
strcat(newdir, dir->d_name);
nextpath *new = malloc(sizeof(nextpath)); // add new folder to linked list
new->thispath = NULL;
new->thispath = strdup(newdir);
new->next = currentzeiger->next;
currentzeiger->next = new;
free(newdir);
newdir = NULL;
}
}
else { // current element is a file -> check if already included in list, if not, add it
currentfile = (char *) malloc(2+strlen(currentpath)+strlen(dir->d_name));
strcpy(currentfile, currentpath);
strcat(currentfile, "/");
strcat(currentfile, dir->d_name);
found = 0;
if (field != NULL) {
for (int z = 0; z < looper; z++){
if (field[z] != NULL){
if(strcmp(currentfile,field[z]) == 0){
found = 1;
free(field[z]);
field[z] = NULL;
}
}
}
}
if (found == 0){
char *renamefile = (char *) malloc(strlen(currentpath) + 6);
strcpy(renamefile, currentpath);
strcat(renamefile, ".cbsm");
free(renamefile);
renamefile = NULL;
}
free(currentfile);
currentfile = NULL;
}
}
firstelement = currentzeiger->next;
free(currentzeiger->thispath);
currentzeiger->thispath = NULL;
free(currentzeiger);
currentzeiger = firstelement;
closedir(d);
}while(currentzeiger != NULL);
}
int main()
{
int counterofwhile = 1;
while(1){
printf("Loop number: %d\n", counterofwhile);
counterofwhile++;
FILE *fp = fopen("datasyn.txt", "rw+");
if (fp == NULL) {
printf("FILE ERROR");
FILE *fp = fopen("datasyn.txt", "ab+");
iterativefile(fp, NULL, 0);
}
else {
int lines = 0;
int ch = 0;
int len = 0;
int max_len = 0;
while((ch = fgetc(fp)) != EOF){
++len;
if (ch == '\n'){
if(max_len < len)
max_len = len;
++lines;
len = 0;
}
}
if (len)
++lines;
fprintf(stderr, "%d lines\n", lines);
if (lines > 0){
int numProgs = 0;
char *programs[lines];
char line[max_len + 1];
rewind(fp);
while(fgets(line, sizeof(line), fp)){
int new_line = strlen(line) - 1;
if (line[new_line] == '\n')
line[new_line] = '\0';
programs[numProgs++] = strdup(line);
}
iterativefile(fp, programs, numProgs);
for (int j = 0; j < numProgs; j++){
free(programs[j]);
}
}
else {
iterativefile(fp, NULL, 0);
}
sleep(1);
printf("Done\n");
fclose(fp);
}
}
return 0;
}
In the function iterativefile(), you don't use calloc() to allocate startpath and you don't set startpath->next to null. The memory returned by malloc() is not necessarily zeroed. When you subsequently use startpath->next, all hell breaks loose.
You also don't use the file pointer passed into iterativefile(). When you remove the parameter from the definition, you change the calls, and you've got a shadowed fp in main() (in the if (fp == NULL) block, you create a new FILE *fp which is really not needed). It really isn't clear what else is meant to happen; you've not given clear instructions on what the program is meant to be doing. I don't have the datasyn.txt file, but it shouldn't matter since the file stream is not used. I got lots of lines like FILE ERRORLoop number: 280 from the code, but no crash where previously I was getting a crash.
Compile with more warning options. I called the file fp17.c and compiled my hacked version using:
gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes \
-Wold-style-definition fp17.c -o fp17
With a few other simple changes (static before the function; int main(void)), the code compiled cleanly (and would have needed -Wshadow to spot the shadowing if it hadn't been for an 'unused variable' warning that pointed me to it).
My code gives a run time error when I reach the middle of the file.
If I change the values of temp2 or temp1 then it crashes at the start of the file.
I can't understand the error I am making in this file.
It runs smoothly on a small file which has 100 lines.
I am making a file searching project so I need to store big files which have directories of entire drives.
#include<stdio.h>
#include<string.h>
#include<windows.h>
#include<conio.h>
char file[99999];
void brek(char *p, char *q);
void main()
{
FILE *fp;
int x = 0, y = 0;
int a = 0, b = 0;
int g = 0;
char temp1[10000]; // temp1 is simply for jumping to the date against the given directory or file
// the main array storing the lines is temp2.
char temp2[1000][1000];
system("chdir C:\\Users\\Faraz\\Documents && dir /s > dir.txt");
fp = fopen("C:\\Users\\Faraz\\Documents\\dir.txt", "r");
while ((y = getc(fp)) != EOF)
{
file[x] = y;
x++;
}
fclose(fp);
file[x] = '\0';
puts(&file[0]);
// <----the copying of the file to the string "file (globally declared)"is
// done---->//
getche();
system("cls");
// <-------------------start loop-------------------->//
a = 0;
while (file[a] != '\0') // <-------starting of the loop
{
while (file[a] != '/')
{
temp1[a] = file[a];
a++;
}
temp1[a] = '\0';
a = a - 2;
b = 0;
while (file[a] != '\n')
{
temp2[g][b] = file[a];
b++;
a++;
}
temp2[g][b] = '\0';
puts(&temp2[g][0]);
g++;
}
// <-----------------end loop---------------------->//
}
Try this instead
int
main(int argc, char **argv)
{
LPWIN32_FIND_DATAA fdFile;
HANDLE hFind = NULL;
const char *sPath = "C:\\Users\\Faraz\\Documents\\*.*";
if((hFind = FindFirstFile(sPath, &fdFile)) == INVALID_HANDLE_VALUE)
{
printf("no such directory %s\n", sPath);
return -1;
}
do
{
printf("Directory: %s\n", fdFile->cFileName);
}
while(FindNextFile(hFind, &fdFile)); //Find the next file.
FindClose(hFind); //Always, Always, clean things up!
return 0;
}
THe answer i found out is that you simply cant load a 1gb file in a 512 mb memory.
So we define buffer size at the top of the string using
#define BUFFER_SIZE 512
and using fgets we read the file line - by - line hence not overloading our memory space allocated by OS.
Dynamic Memory Allocation is not the solution to our problem as we start using heap space.