I am trying to read all .txt files which is in the directory named "dataset". All text files has name like 1.txt, 2.txt, 3.txt... And then saving the contents of the files to a structure named FILES.
I used the dirent.h library and readdir( ) function as I saw in some sources. But the file name which program read from directory does not return correctly. Here is my related code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
typedef struct FILES{
char **words;
int wordCount;
}FILES;
void readFiles();
FILES *files;
int fileCount;
int main(){
readFiles();
return 0;
}
void readFiles(){
FILE *file;
DIR *directory;
struct dirent *filesInDirectory;
int counter;
fileCount = 1;
files = (FILES *)malloc(sizeof(FILES));
directory = opendir("dataset");
if(directory == NULL){
printf("Warning: The directory name that is given in the code is not
valid ..!");
return;
}else{
while((filesInDirectory = readdir(directory)) != NULL){
printf("%s\n", filesInDirectory->d_name);
file = fopen(filesInDirectory->d_name, "r+");
if(file == NULL){
printf("Warning: The file named %s could not open ..!",
filesInDirectory->d_name);
return;
}
files[fileCount-1].wordCount = 1;
files[fileCount-1].words = (char **)malloc(files[fileCount-
1].wordCount * sizeof(char *));
counter = 0;
while(!feof(file)){
files[fileCount-1].words[counter] = (char *)malloc(20 *
sizeof(char));
fscanf(file, "%s", files[fileCount-1].words[counter]);
files[fileCount-1].wordCount++;
files[fileCount-1].words = (char **)realloc(files[fileCount-
1].words, files[fileCount-1].wordCount * sizeof(char *));
counter++;
}
fileCount++;
fclose(file);
}
}
}
The file name that I printed in here "printf("%s\n", filesInDirectory->d_name);" is ".". Where am I doing wrong?
THIS IS NOT AN ANSWER BUT TOO BIG FOR A COMMENT
Here are your problems:
1.
files = (FILES *)malloc(sizeof(FILES));
This is enough size of one FILES. This is will not be enough
After
while((filesInDirectory = readdir(directory)) != NULL){
Put something like
size_t len = strlen(filesInDirectory->d_name);
if (len < 5 || strcmp(filesInDirectory->d_name + len - 4, ".txt") != 0) {
continue;
}
This will check to ensure files end in .txt
Consider using fstat to ensure that the file is a text file
Remove the casts on malloc
Perhaps use realloc for files
while ... feof is bad - see link above
fscanf(file, "%s", - Buffer overruns is possible. Do something about this. Read the manual page
Related
I'm trying to build a C portable code (for Windows, MacOS and Linux) that creates an output .txt file to receive the results of a numerical simulation.
Summarizing, the code takes the name of the file and the extension and checks if the file already exists in the directory. If so, it creates another file with the same name, but with a number between parenthesis (#) in the end to distinguish the old from the new one.
The problem is: it is working properly on the mac environment, however when I compile and run it on windows, the file is not created in the end of execution. I could not find what I'm doing wrong.
Also, I'm using Intel C/C++ Classic compiler. If I use another compiler, for example, the Intel® oneAPI DPC++/C++ Compiler for windows, it complains about the usage of sizeof(src) when I call the strncat(...) function.
So far, this is the version of the code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
bool file_exists(char *filename);
FILE *create_output_file(char *name, char *ext, bool (*file_exists)(char *));
#define funcname "test"
int main(void) {
// Create output files to store results
char *output_filename = malloc(200 * sizeof(*output_filename));
sprintf(output_filename, "%s_out", funcname);
printf("output_filename = %s\n", output_filename);
char *ext = ".txt";
FILE *output_file = create_output_file(output_filename, ext, file_exists);
}
bool file_exists(char *filename) {
// Try to open file with same name as filename
FILE *testfile = fopen(filename, "r");
// Bool variable to check the existence of file
bool exists = false;
// Check the existence of a file called filename
if (testfile != NULL) {
// Returns true if the file exists
exists = true;
}
// Close the file
fclose(testfile);
// Returns the existence of a file (1) = does exist, (0) = does not exist
return exists;
}
FILE *create_output_file(char *name, char *ext, bool (*file_exists)(char *)) {
// Form the full filename
name = strncat(name, ext, sizeof(ext));
printf("fullfilename = %s\n", name);
// Check if a file with the filename exists in the directory
if (file_exists(name)) {
// If exists, assign the same name with "(number)" to differentiate the new version
int j = 1;
char *numb = malloc(10 * sizeof(*numb));
sprintf(numb, "(%i)", j);
// Remove the extension from the name string
name[strlen(name) - strlen(ext)] = '\0';
// Add (number) to the name and then add the file extension again
name = strncat(name, numb, sizeof(numb));
name = strncat(name, ext, sizeof(ext));
// Check if the name with numbers exists until it doesn't
int limit = 1e1;
while (file_exists(name)) {
j++;
sprintf(numb, "(%i)", j);
if (j == limit) {
name[strlen(name) - strlen(numb) + 1 - strlen(ext)] = '\0';
limit = limit*10;
} else {
name[strlen(name) - strlen(numb) - strlen(ext)] = '\0';
}
name = strncat(name, numb, sizeof(numb));
name = strncat(name, ext, sizeof(ext));
}
// Free allocated memory
free(numb);
}
// After assign the proper name, create the output file
FILE *output_file = fopen(name, "w");
// Returns the file
return output_file;
}
What am I missing here?
There are multiple problems:
In file_exists you call fclose(testfile) even if fopen failed This has undefined behavior.
in create_output_file , your usage of sizeof is incorrect: in strncat(name, ext, sizeof(ext)); sizeof is applied to a pointer, hence it evaluates to the size of a pointer, not the length of the string it points to. You could write
strncat(name, ext, strlen(ext));
but it would be exactly equivalent to strcat(name, ext);
The function strncat is defined as
char *strncat(char *dest, const char *src, size_t n);
it copies at most n characters plus a null terminator from the string pointed to by src at the end of the string pointed to by dest.
The code is too complicated, you have multiple memory leaks and you do not check for buffer overflow when composing the filename.
Here is a simplified version:
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#define funcname "test"
bool file_exists(const char *filename) {
// Try to open file with same name as filename
FILE *testfile = fopen(filename, "r");
if (testfile != NULL) {
fclose(testfile);
return true;
} else {
return false;
}
}
#define MAX_FILE_NUM 10000
FILE *create_output_file(char *name, size_t size,
const char *ext,
bool (*file_exists)(const char *))
{
size_t len = strlen(name);
int i = 0;
snprintf(name + len, size - len, "%s", ext);
while (file_exists(name)) {
if (++i > MAX_FILE_NUM) {
// all file versions tried.
return NULL;
}
snprintf(name + len, size - len, "(%d)%s", i, ext);
}
return fopen(name, "w");
}
int main() {
// Create output files to store results
char output_filename[200];
snprintf(output_filename, sizeof output_filename, "%s_out", funcname);
printf("output_filename = %s\n", output_filename);
const char *ext = ".txt";
FILE *output_file = create_output_file(output_filename,
sizeof output_filename, ext, file_exists);
if (output_file == NULL) {
printf("could not open output file, last try: %s\n", output_filename);
} else {
printf("actual output_filename = %s\n", output_filename);
fclose(output_file);
}
return 0;
}
I am a beginner programmer (learning how to use things like hash tables and tries at present) and so am not well informed, and would value your advice.
I want to write a program that:
Receives a directory address as an argv.
Goes through each file in that directory, one by one (they will all be BMPs) and after reading to a buffer...
Performs a function on the RGB values in that buffer, nothing special -- imagine something like a box blur or a greyscale function.
Saves the buffer to a file in a new folder, closes the file in the original directory currently being accessed, and moves onto the next one until it reaches the final file.
I am experimenting with dirent as best I can, but no matter how I phrase this question, I end up with something that tells me how to read filenames and list them, and how to read those into a dirent struct that doesn't itself hold the file data; I get nothing about specifically accessing a directory and looking for files within them with the explicit purpose of fopen()ing them.
An excerpt of my code, to give you an example of my (probably awful) logic:
DIR *folder;
folder = opendir(argv[3]);
if (folder == NULL);
{
printf("Unable to read folder");
return 2;
}
struct dirent *input;
FILE *fileinput;
int files = 0;
// Use this file loop to go through each
while( (input = readdir(folder)) != NULL )
{
fileinput = fopen(input->d_name, "r");
if (filepointer != NULL)
{
// checks for file headers, open another FILE for writing, my actual function etc.
}
But again, it seems that the FOPEN there is accessing a copy of a name, and not the file itself indicated so. And I simply don't have the vocabulary to find a similar question answering this, on SO or elsewhere.
Would anyone mind pointing me in the right direction? Apologies for any hassle as I'm sure this is a very basic question...
ーーーーEDIT: requested to post updated code for review:
#include <dirent.h> //必要
#include <sys/types.h>
#include "helpers.h" //bmp.h declared within
#include <getopt.h> //parse argvs
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <string.h>
const int PATH_MAX = 260;
int main(int argc, char *argv[])
{
char *filters = "rbg";
char filter = getopt(argc, argv, filters);
if (filter == '?') {
printf("Invalid filter.\nUsage: ./colourfilter [flag]\n r = red\t b = blue\t g = green\n");
return 2;
}
if (getopt(argc, argv, filters) != -1) {
printf("Only one filter may be used.\n");
return 3;
}
// OPEN INPUT FOLDER
const char *inputs = "inputs";
DIR *infolder = opendir(inputs);
if (infolder == NULL) {
//fprintf(stderr,"Unable to read folder %s\n", infolder);
printf("Unable to read folder.\n");
return 4;
}
// Declare variables
struct dirent *input;
int counter = 0;
char name[8];
FILE *imgout;
while((input = readdir(infolder)) != NULL)
{
char path[PATH_MAX];
if (!strcmp(input->d_name, ".") || !strcmp(input->d_name, "..")) {
continue;
}
if ((size_t)snprintf(path, sizeof(path), "%s/%s", infolder, input->d_name) >= sizeof(path)) {
printf("Filename too long: %s/%s\n", infolder, input->d_name);
continue;
}
// FOPEN THINGS
// "Also make sure you open the BMP files as binary with "rb" and "wb".:" (see: https://stackoverflow.com/questions/71321367/)
sprintf(name, "%03i.bmp", counter);
FILE *imgin = fopen(path, "rb");
imgout = fopen(name, "wb");
if (imgin == NULL) {
printf("Could not open %s.\n", path);
return 7;
}
if (imgout == NULL) {
fclose(imgin);
printf("Could not create images.\n");
return 8;
}
BITMAPFILEHEADER bf;
fread(&bf, sizeof(BITMAPFILEHEADER), 1, imgin);
BITMAPINFOHEADER bi;
fread(&bi, sizeof(BITMAPINFOHEADER), 1, imgin);
// Ensure infile is (likely) a 24-bit uncompressed BMP 4.0
if (bf.bfType != 0x4d42 || bf.bfOffBits != 54 || bi.biSize != 40 ||
bi.biBitCount != 24 || bi.biCompression != 0)
{
fclose(imgout);
fclose(imgin);
printf("Unsupported file format.\n");
return 8;
} // ... other stuff after this for implementing functions etc.
The problem is fopen(input->d_name, "r"); tries to open the file in the current directory instead of the one specified in argv[3]. You must construct the path to the file in a separate string.
Also make sure you open the BMP files as binary with "rb" and "wb".
char *foldername = argv[3];
DIR *folder = opendir(foldername);
if (folder == NULL) {
fprintf(stderr, "Unable to read folder %s\n", foldername);
return 2;
}
struct dirent *input;
FILE *fileinput;
int files = 0;
// Use this file loop to go through each
while ((input = readdir(folder)) != NULL) {
char path[PATH_MAX];
if (!strcmp(input->d_name, ".") || !strcmp(input->d_name, "..")) }
continue;
}
if ((size_t)snprintf(path, sizeof path, "%s/%s", foldername, input->d_name) >= sizeof path) {
fprintf(stderr, "filename too long: %s/%s\n", foldername, input->d_name);
continue;
}
fileinput = fopen(path, "rb");
...
I'm making kind of an antivirus as a part of a project I got. I need to find if a binary file includes a certain string anywhere in it. What I'm trying to do is add 7 chars to an array, because i know the virus signature is 7 chars long, then compare to the string and if they are equal, means the file is infected. But, it doesn't work. it says "Exception thrown: read access violation.
currStr was 0x1110113.". Also, whenever I try to free arrays or free files, the program crashed. Any help would be appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#define ARR_SIZE 250
void Search_in_File(char* dir, char* str)
{
FILE *fp;
DIR *folder;
char currFilePath[ARR_SIZE] = { 0 };
struct dirent *entry;
int files = 0;
char ch = 0;
char* virusStr;
char* currStr;
char* buffer;
int fSize = 0;
int index = 0;
folder = opendir(dir); //Opening the folder
if (folder == NULL)
{
perror("Unable to read directory");
return(1);
}
virusStr = (char*)malloc(sizeof(char) * ARR_SIZE);
strcpy(virusStr, str); // Getting the virus detection string
while ((entry = readdir(folder))) // Reading each file in the folder
{
if ((entry->d_name)[0] != '.') //Making sure that we will go over valid files only
{
strcpy(currFilePath, dir); //Coppy the folder name into the file path
files++;
printf("File %3d: %s\n",
files,
entry->d_name
);
strcat(currFilePath, "/"); //Add slash so we can add the filename and get full path
strcat(currFilePath, entry->d_name); // Adding file name after the slash
fp = fopen(currFilePath, "rb"); //Opening the file with the full path
if (fp == NULL)
{
printf("Error opening file!\n");
}
fseek(fp, 0, SEEK_END);
fSize = ftell(fp);
fseek(fp, 0, SEEK_SET);
buffer = (char*)malloc(fSize + 1);
fread(buffer, sizeof(char), fSize, fp); // Reading the file content into a string
currStr = (char*)malloc(sizeof(char) * strlen(str) + 1); //Memory for the checking string
printf("%d", strlen(currStr));
printf("%p", &currStr);
for (index; index < fSize; index++)
{
printf("%c", buffer[index]);
if (buffer[index] != EOF)
{
//Error happens here
strcat(currStr[index], buffer[index]); // Adding the current char to the current string
// Checking if we have a string the same length as the virus signature
if ((index % strlen(virusStr)) == 0)
{
if (strcmp(currStr, virusStr) == 0) // Checking if we have the same string
{
printf("%s - Infected!", entry->d_name);
break;
}
}
}
currStr = '\0'; // Reset the current string
}
index = 0;
currFilePath[0] = '\0'; // Resetting the file path in order to get the new one
}
}
closedir(folder);
//fclose(fp); //Doesnt Work - ????
//free(virusStr); //Doesnt Work - ????
//free(currStr);//Doesnt Work - ????
//free(buffer);//Doesnt Work - ????
}
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 years ago.
Improve this question
I need to make program that compare content of all files in directory and detects duplicates. Program works fine until it starts comparing files inside the for loop. I think there might be error in array but I can't find any. Any help will be appreciated.
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
int listFiles(char *path, int a, char ***array);
int main() {
char path[100];
char **array = NULL;
int a = 0;
printf("Enter path to list files: ");
scanf("%s", path);
listFiles(path, a, &array);
return 0;
}
int listFiles(char *basePath, int a, char ***array) {
char path[1000], c, *d = NULL;
FILE *input_file;
struct dirent *dp;
DIR *dir = opendir(basePath);
if (!dir) {
return 1;
}
while ((dp = readdir(dir)) != NULL) {
if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) {
strcpy(path, basePath);
strcat(path, "\\");
strcat(path, dp->d_name);
input_file = fopen(path, "r");
if (input_file == NULL) {
fclose(input_file);
}
if (input_file) {
printf("%s\n", path);
while ((c = getc(input_file)) != EOF)
d = realloc(d, (x + 1) * sizeof(*d));
(d)[x++] = c;
}
*array = realloc(*array, (a + 1) * sizeof(**array));
(*array)[a++] = d;
if (a > 1) {
for (int j = 0; j < a - 1; ++j) {
if (!memcmp(array[a], array[j], 999)) {
printf("duplikat\n");
free(array[a]);
--a;
break;
}
}
}
}
listFiles(path, a, array);
}
closedir(dir);
return 0;
}
while ((c = getc(input_file)) != EOF)
d=realloc(d, (x+1)*sizeof(*d));
(d)[x++] = c;
Is equal to:
while ((c = getc(input_file)) != EOF) {
d = realloc(d, (x+1)*sizeof(*d));
}
(d)[x++] = c;
Which reads from a file until EOF and reallocates a d pointer with the same size for each character in the file. Then is assigns to the last element in d the value of EOF. So the content of the file is not saved in d pointer.
Always explicitly use { and } (except for cases when don't).
Check for overflow.
size_t pathsize = sizeof(path);
assert(pathsize > strlen(basePath) + 1 + strlen(dp->d_name) + 1);
strcpy(path, basePath);
strcat(path, "\\");
strcat(path, dp->d_name);
Do similar checks everytime you do something dangerous.
What's the point of closing NULL file?
if(input_file == NULL){
fclose(input_file);
}
if (input_file){
The number 999 is very magical.
memcmp(array[a],array[j],999)
You incorrectly handle arrays. Or don't free memory. I don't know.
*array = realloc(*array, (a+1)*sizeof(**array));
(*array)[a++] = d;
...
free(array[a]);
There is a little point in char *** variable. Don't use *** (unless in cases where you use them). The char***array can be completely removed.
Start with a good abstraction. First write a function that will compare two files bool files_compare(const char *file1, const char *file2);. Then write a function that will list all files in a directory. Then for each pair of files listed compare them. There is a little need to no need in storing the content of the file in dynamic memory (what if you have files that have 10Gb and a system with 1Gb of memory?).
The listFiles function is untimately is endlessly recursively calling itself if there is at least one file in the directory.
int listFiles(char *basePath, int a, char ***array)
{
...
DIR *dir = opendir(basePath);
...
while ((dp = readdir(dir)) != NULL){
...
listFiles(path,a,array);
}
}
This code has many errors. It reuses old memory, incorrectly handles arrays, has some strange magic numbers, leaks memory, does not close opened files, is not protected against overflow, and opens the directory recursively.
I wrote a code to read files
What is wrong in the following code I am always getting last filename if I print any arrayItem
#include <stdio.h>
#include <string.h>
char **get_files()
{
FILE *fp;
int status;
char file[1000];
char **files = NULL;
int i = 0;
/* Open the command for reading. */
fp = popen("ls", "r");
if (fp == NULL) {
printf("Failed to run command\n" );
//exit;
}
while (fgets(file, sizeof(file)-1, fp) != NULL) {
files = (char **)realloc(files, (i + 1) * sizeof(char *));
//files[i] = (char *)malloc(sizeof(char));
files[i] = file;
i++;
}
printf("%s", files[0]);
return files;
}
int main()
{
char **files = NULL;
int i =0 ;
files = get_files("");
}
you should use
files[i] = strdup(file);
instead of
files[i] = file;
The second version only lets files[i] point to your reading buffer which is always the same. With the next fgets, you'll overwrite the contents of file and thus the contents of file[i] which actually point to the same location in memory.
In fact, at the end, all your file[0]..file[n] will point to the same location as file does.
With strdup(..) you're allocating a new buffer and copying the contents of file there.
pclose is missing for your popen. popen is only POSIX, not C89/C99.
No memory-alloc check in the example, its your work ;-)
#include <stdio.h>
#include <stdlib.h>
char **get_files(char **list)
{
FILE *fp;
char file[1000];
int i=1;
/* Open the command for reading. */
fp = popen("ls -l", "rt");
if( !fp )
perror("Failed to run command\n" ),exit(1);
while( fgets(file, sizeof file , fp) ) {
list = realloc(list, ++i * sizeof*list );
memmove( list+1, list, (i-1)*sizeof*list);
*list = strcpy( malloc(strlen(file)+1), file);
}
pclose( fp );
return list;
}
main()
{
char **files = get_files(calloc(1,sizeof*files)), **start=files;
while( *files )
{
puts(*files);
free(*files++);
}
free(start);
return 0;
}
Calling popen() on 'ls' is a bad way to do this. Take a look at opendir(), readdir(), rewinddir() and closedir().
You are reusing the file array. After you've read a filename, you need to use strdup to take a copy of it, and put that copy into the files array. Otherwise, every element in files just points to the same string.
Your array in char file[1000] is single dimension, regardless of how you re-allocate memory (unless I'm missing something obvious). If you are reading an unknown number of files, then a linked list is probably the best way to go about this.