I need to loop trough a directory, data and read each file, that meets certain conditions, in a string and do something with it. For some reason it fails after the fseek call (the output is only the name of the first file in the directory).
Any idea what am I doing wrong?
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
void doAlgorithm(char *input) {
printf("%s\n", input);
}
int main(int argc, char** argv) {
struct dirent *dir;
DIR *d = opendir("data");
FILE *file;
while ((dir = readdir(d)) != NULL) {
if (strlen(dir->d_name) > 6 && dir->d_name[6] == 'i') {
printf("Filename: %s\n", dir->d_name);
file = fopen(dir->d_name, "r");
fseek(file, 0, SEEK_END);
long length = ftell(file);
fseek(file, 0, SEEK_SET);
printf(", Filesize: %ld\n", length);
char *buffer = malloc(length + 1);
fread(buffer, 1, length, file);
buffer[length] = '\0';
fclose(file);
doAlgorithm(buffer);
}
}
closedir(d);
return (EXIT_SUCCESS);
}
Your problem is that you file = fopen(dir->d_name, "r"); doesn't know where that file is in the directory. you need to give it the full path. You can do this;
struct dirent *dir;
// put the directory path here. on windows is \ instead of /
char *path = "/Users/adnis/CLion/Stackoverflow/testdir";
char *slash = "";
DIR *d = opendir(path);
FILE *file;
while ((dir = readdir(d)) != NULL) {
if (strlen(dir->d_name) > 6 && dir->d_name[6] == 'i') {
printf("Filename: %s\n", dir->d_name);
int length = strlen(path);
/*check if the path already contains a '/' at
the end before joining the filename to the directory*/
if(path[strlen(path)-1] != '/'){ //on windows is '\'
slash = "/";
}
length += strlen(dir->d_name)+2;
// allocate memory for the new path
// and make sure we have enough memory.
char *newpath = malloc(length);
assert(newpath != NULL);
snprintf(newpath,length,"%s%s%s",path,slash,dir->d_name);
file = fopen(newpath, "r");
if(file == NULL){
fprintf(stderr, "fopen: %s\n", strerror(errno));
break;
}
fseek(file, 0, SEEK_END);
long len = ftell(file);
fseek(file, SEEK_SET, 0);
char *buffer = malloc(len + 1);
fread(buffer, 1, len, file);
buffer[strlen(buffer)] = '\0';
printf("%s \n",buffer);
fclose(file);
}
}
closedir(d);
return (EXIT_SUCCESS);
I suggest that when reading directory you have to also try and avoid reading "." and ".." since they are just current directory and previous directory. something like this will help. In your while loop
if(strcmp(dir->d_name,".") == 0 || strcmp(dir->d_name,"..") == 0)
continue;
Related
I'm a beginner to C and wanted to code a simple function that reads the content of file and returns it as a string, as an exercise.
Here is my solution which I think works, but is there any obvious bad practices or unoptimal code here ? For example, I manually added a \0 at the end of the string, but I don't know if it is really necessary...
#include <stdio.h>
#include <stdlib.h>
char *readFile(char *path)
{
//open file
FILE *file = fopen(path, "r");
//if broken
if (file == NULL)
{
printf("Erreur");
return NULL;
}
//return variable
char *result;
//length of the file
int len;
fseek(file, 0, SEEK_END);
len = ftell(file);
fseek(file, 0, SEEK_SET);
//initialising return variable
result = (char*) malloc(sizeof(char) * (len + 1));
int c;
int i = 0;
while (feof(file) == 0)
{
c = fgetc(file);
if (c != EOF)
{
printf("%04x -> %c\n", c, c);
*(result + i) = c;
i++;
}
}
*(result + i) = '\0';
printf("len : %i\n", len);
fclose(file);
return result;
}
I'd replace this:
int c;
int i = 0;
while (feof(file) == 0)
{
c = fgetc(file);
if (c != EOF)
{
printf("%04x -> %c\n", c, c);
*(result + i) = c;
i++;
}
}
with this:
fread(file, 1, len, result);
It's much shorter
It's correct
It's certainly faster
There is still room for improvement though, for example you could add error handling, fread can fail.
Since you have already got the length of the file to be read, you could also read them at once instead char-by-char.
Another implmentation of your function, for example:
char *readFile(char *path)
{
//open file
FILE *file = fopen(path, "r");
//if broken
if (file == NULL)
{
printf("Erreur");
return NULL;
}
//return variable
char *result;
//length of the file
int len;
fseek(file, 0, SEEK_END);
len = ftell(file);
fseek(file, 0, SEEK_SET);
//initialising return variable
result = (char*) malloc(sizeof(char) * (len + 1));
size_t i = fread(result, sizeof(char), len, file);
*(result + i) = '\0';
printf("len : %i\n", len);
fclose(file);
return result;
}
How do I read a file into a string on Linux in C?
I came up with some code, but it's not working, and idk why. fgetc() always returns -1.
The file structure is something like this
.:
Files/
main.c
makefile
./Files:
test
Contents of main.c:
#include <stdio.h>
int fileLength(const char filePath[]);
void readFile(const char filePath[], char* outString);
int main()
{
char fileContents[fileLength("Files/test")];
readFile("Files/test", &fileContents);
printf("DEBUG: Address of fileContents is 0x%x\n", &fileContents);
printf("File contents:\n%s\n", fileContents);
return 0;
}
int fileLength(const char filePath[])
{
//Open the file
FILE* file;
if ((file = fopen(filePath, "r")) == NULL)
{
printf("ERROR: File (%s) cannot be opened.\n", filePath);
return -1;
}
//Find the length
fseek(file, 0, SEEK_END);
return ftell(file);
}
void readFile(const char filePath[], char* outString)
{
FILE* file;
//File reading
printf("DEBUG: File path is %s\n", filePath);
if ((file = fopen(filePath, "r")) == NULL)
{
printf("ERROR: File (%s) cannot be opened.\n", filePath);
exit(1);
}
//Get length of file and allocate the according amount of memory
fseek(file, 0, SEEK_END);
int fileLength = ftell(file);
printf("DEBUG: File length is %i\n", fileLength);
//Allocate string
char fileContent[fileLength];
//Read file to string
printf("DEBUG: File contents as digits:\n");
for (int i = 0; i < fileLength; i++)
{
fileContent[i] = fgetc(file);
printf("%d ", fileContent[i]);
}
printf("\n");
printf("DEBUG: Contents of file are:\n%s\n", fileContent);
fclose(file);
printf("DEBUG: outString is pointing to 0x%x\n", outString);
*outString = fileContent;
}
The output is usually just a bunch of question mark diamond things (running in terminal) that match the length of the file with a few other random chars thrown in at the end. The chars at the end change every time the program is run.
kaylum was right, the solution was to:
rewind() after finding the file length in readFile()
remember to fclose() when done
write directly to outString instead of using fileContent
The final code of main.c comes out to be:
#include <stdio.h>
int fileLength(const char filePath[]);
void readFile(const char filePath[], char* outString);
int main()
{
char fileContents[fileLength("Files/test")];
readFile("Files/test", &fileContents);
printf("File contents:\n%s\n", fileContents);
return 0;
}
int fileLength(const char filePath[])
{
//Open the file
FILE* file;
if ((file = fopen(filePath, "r")) == NULL)
{
printf("ERROR: File (%s) cannot be opened.\n", filePath);
return -1;
}
//Find the length
fseek(file, 0, SEEK_END);
int length = ftell(file);
fclose(file);
return length;
}
void readFile(const char filePath[], char* outString)
{
FILE* file;
//File reading
if ((file = fopen(filePath, "r")) == NULL)
{
printf("ERROR: File (%s) cannot be opened.\n", filePath);
exit(1);
}
//Get length of file and allocate the according amount of memory
fseek(file, 0, SEEK_END);
int fileLength = ftell(file);
rewind(file);
//Read file to string
for (int i = 0; i < fileLength; i++)
outString[i] = fgetc(file);
fclose(file);
}
I'm making kind of an antivirus as a part of a project I got. I need to find if a binary file includes a certain string anywhere in it. What I'm trying to do is add 7 chars to an array, because i know the virus signature is 7 chars long, then compare to the string and if they are equal, means the file is infected. But, it doesn't work. it says "Exception thrown: read access violation.
currStr was 0x1110113.". Also, whenever I try to free arrays or free files, the program crashed. Any help would be appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#define ARR_SIZE 250
void Search_in_File(char* dir, char* str)
{
FILE *fp;
DIR *folder;
char currFilePath[ARR_SIZE] = { 0 };
struct dirent *entry;
int files = 0;
char ch = 0;
char* virusStr;
char* currStr;
char* buffer;
int fSize = 0;
int index = 0;
folder = opendir(dir); //Opening the folder
if (folder == NULL)
{
perror("Unable to read directory");
return(1);
}
virusStr = (char*)malloc(sizeof(char) * ARR_SIZE);
strcpy(virusStr, str); // Getting the virus detection string
while ((entry = readdir(folder))) // Reading each file in the folder
{
if ((entry->d_name)[0] != '.') //Making sure that we will go over valid files only
{
strcpy(currFilePath, dir); //Coppy the folder name into the file path
files++;
printf("File %3d: %s\n",
files,
entry->d_name
);
strcat(currFilePath, "/"); //Add slash so we can add the filename and get full path
strcat(currFilePath, entry->d_name); // Adding file name after the slash
fp = fopen(currFilePath, "rb"); //Opening the file with the full path
if (fp == NULL)
{
printf("Error opening file!\n");
}
fseek(fp, 0, SEEK_END);
fSize = ftell(fp);
fseek(fp, 0, SEEK_SET);
buffer = (char*)malloc(fSize + 1);
fread(buffer, sizeof(char), fSize, fp); // Reading the file content into a string
currStr = (char*)malloc(sizeof(char) * strlen(str) + 1); //Memory for the checking string
printf("%d", strlen(currStr));
printf("%p", &currStr);
for (index; index < fSize; index++)
{
printf("%c", buffer[index]);
if (buffer[index] != EOF)
{
//Error happens here
strcat(currStr[index], buffer[index]); // Adding the current char to the current string
// Checking if we have a string the same length as the virus signature
if ((index % strlen(virusStr)) == 0)
{
if (strcmp(currStr, virusStr) == 0) // Checking if we have the same string
{
printf("%s - Infected!", entry->d_name);
break;
}
}
}
currStr = '\0'; // Reset the current string
}
index = 0;
currFilePath[0] = '\0'; // Resetting the file path in order to get the new one
}
}
closedir(folder);
//fclose(fp); //Doesnt Work - ????
//free(virusStr); //Doesnt Work - ????
//free(currStr);//Doesnt Work - ????
//free(buffer);//Doesnt Work - ????
}
I'm using miniz to create a .zip file in C, on Windows.
I used the doc to produce my code and it works. I can create an archive with the files I want, ONLY if I give relative path to the zip function.
I don't get why the "file_name" variable must be something like "../test/file.txt" and not "C:/../test/file.txt".
if (!(status = mz_zip_add_mem_to_archive_file_in_place(archive, file_name, data, strlen(data) + 1, s_pComment,
(uint16) strlen(s_pComment), MZ_BEST_COMPRESSION)))
return (merror("add file to archive failed !!"));
Before this function, I open my file, get the data inside and call the zip_function with it.
if (!(src = fopen(file_name, "r")))
return (merror("can't open this file"));
char *line = NULL;
char *data= NULL;
size_t n = 0;
getline(&line, &n, src);
data= strdup(line);
while (getline(&line, &n, src) != -1){
data = realloc(save, sizeof(char) * (strlen(data) + strlen(line)) + 1);
data = strcat(data, line);
}
fopen(src);
So I call the zip function with the archive name, the file name (with the absolute path) and the datas inside it (in char * format).
This is the "full" code : the function init_zip is the first function called by my program. The arg parameter is the archive name I want to be create(it can be an absolute path and works) and the args parameter are the names of the differents files I want to add to the archive file (relative path works but not absolute).
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint;
static const char *s_pComment = "";
static int isDirectory(const char *path) {
struct stat statbuf;
if (stat(path, &statbuf) != 0)
return 0;
return S_ISDIR(statbuf.st_mode);
}
int get_data(const char *archive, const char *file)
{
FILE *src;
if (!isDirectory(file)) {
if (!(src = fopen(file, "r")))
return (merror("can't open this file"));
char *line = NULL;
char *save = NULL;
size_t n = 0;
getline(&line, &n, src);
save = strdup(line);
while (getline(&line, &n, src) != -1) {
save = realloc(save, sizeof(char) * (strlen(save) + strlen(line)) + 1);
save = strcat(save, line);
}
printf("compressing %s ..\n", file);
if (m_compress(archive, file, save))
return (merror("compress function failed"));
printf(("\tOK.\n"));
fclose(src);
}
else
{
DIR *dir;
struct dirent *entry;
char *new_file;
if (!(dir = opendir(file)))
return (merror("opendir failed: ", "wrong directory path in init_zip.get_data command : ", file, NULL));
while ((entry = readdir(dir)) != NULL)
{
if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) {
new_file = add_path(file, entry->d_name);
get_data(archive, new_file);
}
}
if (new_file)
free(new_file);
closedir(dir);
}
}
int init_zip(const char *arg, const char **args)
{
printf("\nZIP cmd:\n >");
remove(arg);
for (int counter = 0; args[counter]; ++counter)
{
get_data(arg, args[counter]);
}
printf("All the files are added to %s archive file.\n", arg);
return (0);
}
int m_compress(const char *archive, const char *file_name, const char *data)
{
mz_bool status;
if (data)
if (!(status = mz_zip_add_mem_to_archive_file_in_place(archive, file_name, data, strlen(data) + 1, s_pComment,
(uint16) strlen(s_pComment), MZ_BEST_COMPRESSION)))
return (merror("add file to archive failed !!"));
else
if (!(status = mz_zip_add_mem_to_archive_file_in_place(archive, file_name, NULL, 0, "no comment", (uint16)strlen("no comment"), MZ_BEST_COMPRESSION)))
return (merror("add directory to archive failed !!"));
return (0);
}
This is the add_path() function used in get_data():
char *add_path(const char *str1, const char *str2)
{
char *path;
path = malloc(sizeof(char) * (strlen(str1) + 1 + strlen(str2) + 1));
path = strcpy(path, str1);
path = strcat(path, "/");
path = strcat(path, str2);
return (path);
}
Anyone knows something about it?
If nothing helps, then you should lookup the sources. Following the code in miniz on Github, file miniz_zip.c line 4297 I see:
mz_bool mz_zip_add_mem_to_archive_file_in_place(...
which calls function mz_zip_writer_validate_archive_name to check the second filename provided that it cannot start with a drive letter (line 3069) and if so
returns FALSE with error set to MZ_ZIP_INVALID_FILENAME.
As to why this second filename may not be an absolute path, I don't know. If it is important to you, you could get the code from Github and adapt it.
in my program, I provide a directory which contains text files. Each of the text files contain a few hundred lines in the following format
Username,Password,BloodType,Domain,Number
I then create a thread for each file in the directory which will merge-sort(by number) these lines into the array char* text_lines[6000];
I can't figure out why I'm getting a segmentation fault because I'm getting different output on every run.
Heres my code:
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <sys/types.h>
#include <dirent.h>
#include <string.h>
void store_line(char* line);
void* my_merge_sort(void* file);
char** text_lines;
int main(int argc, char* argv[])
{
if(argc != 2)
{
fprintf(stderr, "usage: ./coolsort <directory>\n");
}
else
{
text_lines = malloc(6000 * sizeof(char*));
DIR* the_directory;
int filecount = 0;
struct dirent* directory_files[50];
if((the_directory = opendir(argv[1])) != NULL)
{
//make a list of the files in the directory
while((directory_files[filecount++] = readdir(the_directory))) ;
filecount--;
//<<<DEBUGGING INFO>
int i;
fprintf(stderr,"there are %i files in %s:\n", filecount, argv[1]);
for(i = 0; i < filecount; i++)
{
fprintf(stderr, "%s\n",directory_files[i]->d_name);
}
char cwd[512];
chdir(argv[1]);
getcwd(cwd, sizeof(cwd));
fprintf(stderr, "the CWD is: %s\n", cwd);
//<DEBUGGING INFO>>>
//lets start some threads
pthread_t threads[filecount-2];
int x = 0;
for(i = 0; i < (filecount); i++ )
{
if (!strcmp (directory_files[i]->d_name, "."))
continue;
if (!strcmp (directory_files[i]->d_name, ".."))
continue;
pthread_create(&threads[x++], NULL, my_merge_sort, (void*)directory_files[i]->d_name);
}
//do stuff here
//
}
else
{
fprintf(stderr, "Failed to open directory: %s\n", argv[1]);
}
}
}
void* my_merge_sort(void* file)
{
fprintf(stderr, "We got into the function!\n");
FILE* fp = fopen(file, "r");
char* buffer;
char* line;
char delim[2] = "\n";
int numbytes;
//minimize I/O's by reading the entire file into memory;
fseek(fp, 0L, SEEK_END);
numbytes = ftell(fp);
fseek(fp, 0L, SEEK_SET);
buffer = (char*)calloc(numbytes, sizeof(char));
fread(buffer, sizeof(char), numbytes, fp);
fclose(fp);
//now read the buffer by '\n' delimiters
line = strtok(buffer, delim);
fprintf(stderr, "Heres the while loop\n");
while(line != NULL)
{
store_line(line);
line = strtok(buffer, NULL);
}
free(buffer);
}
void store_line(char* line)
{
//extract the ID.no, which is the fifth comma-seperated-token.
char delim[] = ",";
char* buff;
int id;
int i;
strtok(line, delim);
for(i = 0; i < 3; i++)
{
strtok(line, NULL);
}
buff = strtok(line, NULL);
id = atoi(buff);
//copy the line to text_lines[id]
memcpy(text_lines[id], line, strlen(line));
}
edit: I checked to make sure that it would fit into the initial array, and found that the highest ID is only 3000;
You use of strtok() is wrong:
line = strtok(buffer, NULL);
should be
line = strtok(NULL, delim);
Another mistakes should be fixed similarly.
The elements of text_lines are uninitialized:
text_lines = malloc(6000 * sizeof(char*));
this allocated 6000 pointers to char, but none of these pointers are initialized.