I am doing a recursive walk through directories to make changes to files. My change file function needs the full path of the file to be able to do stuff. However, what my program is doing right now is just getting the name of the current file or folder but not the full path.
My approach is that I would make a string and keeps appending names to it until I get the full path. However, because I'm doing recursion, I'm having troubles passing the string around to append more strings to it.
This is my code:
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <string.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/types.h>
#include <errno.h>
void recursiveWalk(const char *pathName, char *fullPath, int level) {
DIR *dir;
struct dirent *entry;
if (!(dir = opendir(pathName))) {
fprintf(stderr, "Could not open directory\n");
return;
}
if (!(entry = readdir(dir))) {
fprintf(stderr, "Could not read directory\n");
return;
}
do {
if (entry->d_type == DT_DIR) { // found subdirectory
char path[1024];
int len = snprintf(path, sizeof(path)-1, "%s/%s", pathName, entry->d_name); // get depth
path[len] = 0;
// skip hidden paths
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
continue;
}
fprintf(stdout, "%*s[%s]\n", level*2, "", entry->d_name);
// Append fullPath to entry->d_name here
recursiveWalk(path, fullPath, level + 1);
}
else { // files
fprintf(stdout, "%*s- %s\n", level*2, "", entry->d_name);
//changeFile(fullPath);
}
} while (entry = readdir(dir));
closedir(dir);
}
int main(int argn, char *argv[]) {
int level = 0;
recursiveWalk(".", "", level);
return 0;
}
Well there are a number of little problems in your code.
you never use nor change fullPath in recursiveWalk
your formats are weird : you use level*2 to limit the number of characters printed from an empty string
you compute the actual path only when you have found a directory, while you say you need it to change a file.
you add path[len] = 0 after a snprintf when snprintf guarantees that but buffer is null terminated
But apart from that, you correctly pass the path to the analyzed dir append to the path passed in initial call, but in pathName variable, and computed as path.
So a possible fix for your code would be :
fix the formats for printf
remove the unused fullPath parameter from recursiveWalk
allways compute path and use it in the file branch
comment out the unnecessary path[len] = '\0'
I also replaced while (entry = readdir(dir)); with while ((entry = readdir(dir))); to explicitely tell the compiler that I want to set entry and then test its value - and remove the warning
Possible code:
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <string.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/types.h>
#include <errno.h>
void recursiveWalk(const char *pathName, int level) {
DIR *dir;
struct dirent *entry;
if (!(dir = opendir(pathName))) {
fprintf(stderr, "Could not open directory\n");
return;
}
if (!(entry = readdir(dir))) {
fprintf(stderr, "Could not read directory\n");
return;
}
do {
char path[1024];
int len = snprintf(path, sizeof(path)-1, "%s/%s", pathName, entry->d_name); // get depth
// path[len] = 0;
if (entry->d_type == DT_DIR) { // found subdirectory
// skip hidden paths
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
continue;
}
fprintf(stdout, "%s [%s] (%d)\n", pathName, entry->d_name, level);
// Append fullPath to entry->d_name here
recursiveWalk(path, level + 1);
}
else { // files
fprintf(stdout, "%s (%d)\n", path, level);
//changeFile(fullPath);
}
} while ((entry = readdir(dir)));
closedir(dir);
}
int main(int argn, char *argv[]) {
int level = 0;
recursiveWalk(".", level);
return 0;
}
Recursion is a succinct way to express things (especially walking directories), but actually you should normally avoid it in practice. If the directory tree is deep enough, it will crash your software.
Using a queue eliminates the need for recursion, and is generally an efficient way to traverse.
I'm including the code I use to process directory trees in a project...
static int on_dir(const char* const dir, struct duplicate** dp) {
bool r = opts.recurse;
DIR* d = opendir(dir);
if (!d)
return - 1;
struct dirent* de;
while ((de = readdir(d))) {
struct stat s;
size_t bs = strlen(dir) + strlen(de->d_name) + 2;
char b[bs];
const char* const a = strjoin(b, dir, de->d_name, '/');
if (lstat(a, &s)) {
print_error("unable to stat %s", d);
continue;
}
if (S_ISREG(s.st_mode))
if (on_file(a, &s, dp))
print_error("unable to process file %s/%s", dir, de->d_name);
}
if (!r) {
if (closedir(d))
on_fatal("unable to close directory %s", dir);
return 0;
}
rewinddir(d);
while ((de = readdir(d))) {
struct stat ds;
size_t bs = strlen(dir) + strlen(de->d_name) + 2;
char b[bs];
const char* const d = strjoin(b, dir, de->d_name, '/');
if (lstat(d, &ds)) {
print_error("unable to stat %s", d);
continue;
}
if (S_ISDIR(ds.st_mode)) {
const char* const dot = ".";
const char* const dotdot = "..";
if (!strcmp(dot, de->d_name) || !strcmp(dotdot, de->d_name))
continue;
struct path* p = path_create(strcpy(fmalloc(bs), d));
queue_add(&paths, &p->queue);
}
}
if (closedir(d))
print_error("unable to close directory %s", dir);
return 0;
}
and the code for strjoin
static inline char* strjoin(char* restrict const d, const char* restrict const a, const char* restrict const b, const char c) {
size_t na = strlen(a);
size_t nb = strlen(b);
memcpy(d, a, na);
d[na] = c;
memcpy(d + na + 1, b, nb);
d[na + nb + 1] = '\0';
return d;
}
I'm hoping this helps. Please feel free to use any of the code you find in the git repository.
Related
In this code, I am trying to split the pathnames with the help of strtok(). I am watching the \n sign to differentiate different pathnames. Although when I print the path variable, it gives all the pathnames.
But when I split them and print the token[1] variable, it gives me the segmentation fault. I used token[0], but it gave me all the pathnames as it was printed with the path variable but without \n sign concatenated with pathnames.
int watch(char *dirname) {
DIR *dir;
struct dirent *entity;
char path[500] = { 0 };
char *token[2];
dir = opendir(dirname);
if (dir != NULL) {
while (entity = readdir(dir)) {
snprintf(path, sizeof(path), "%s%s", dirname, entity->d_name);
strcat(path, "\n");
token[0] = strtok(path, "\n");
token[1] = strtok(NULL, "\n");
printf("%s\n", token[1]);
}
closedir(dir);
} else {
perror("Couldn't open the directory");
}
}
The given code does not make sense and it is unclear what the real purpose is.
As it stands token[1] will always be NULL. As the string to tokenize is something like "dirfilename\n" and then split on \n. The second call to strtok always returns NULL.
So the question remains, what is the purpose of this code?
When concatenating the dirname and filename, you do not insert a / between them. Use "%s/%s" to make it a pathname.
It is unclear why there should be any ; in the pathname. This might be a confusion with the PATH variable.
Here is a simple program to enumerate files from the directories in the PATH variable:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <dirent.h>
int list_path(const char *p, int len) {
char path[1024];
DIR *dir;
struct dirent *entity;
int count = 0;
if (len + 2 > (int)sizeof(path))
return -1;
snprintf(path, sizeof(path), "%.*s", len, p);
dir = opendir(path);
if (dir != NULL) {
while ((entity = readdir(dir)) != NULL) {
if (!strcmp(entity->d_name, ".") || !strcmp(entity->d_name, ".."))
continue;
count++;
snprintf(path + len, sizeof(path) - len, "/%s", entity->d_name);
printf("%s\n", path);
}
closedir(dir);
}
return count;
}
int main() {
char *path = getenv("PATH");
const char *pathsep = ":"; // use `;` for Windows
int len;
if (path) {
while (*path) {
len = strcspn(path, pathsep);
list_path(path, len);
path += len + (path[len] != '\0');
}
}
return 0;
}
The following C code will list the amount of files and directories and will do it 4 times faster than the linux find command. I need only the count of the folders, not interested in the file count and even listing them. Is there a way to optimize the below code and make it more efficient?
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <dirent.h>
void listdir(char *path, size_t size) {
DIR *dir;
struct dirent *entry;
size_t len = strlen(path);
if (!(dir = opendir(path))) {
fprintf(stderr, "path not found: %s: %s\n",
path, strerror(errno));
return;
}
puts(path);
while ((entry = readdir(dir)) != NULL) {
char *name = entry->d_name;
if (entry->d_type == DT_DIR) {
if (!strcmp(name, ".") || !strcmp(name, ".."))
continue;
if (len + strlen(name) + 2 > size) {
fprintf(stderr, "path too long: %s/%s\n", path, name);
} else {
path[len] = '/';
strcpy(path + len + 1, name);
listdir(path, size);
path[len] = '\0';
}
} else {
printf("%s/%s\n", path, name);
}
}
closedir(dir);
}
int main( int argc, char *argv[] ) {
if( argc == 2 ) {
printf("Path: %s\n", argv[1]);
}
else if( argc > 2 ) {
printf("Too many arguments supplied.\n");
}
else {
printf("One argument expected.\n");
return 0;
}
char path[1024];
memcpy (path, argv[1],1024);
listdir(path, sizeof path);
return 0;
}
Removing the following lines will of course not display the files , but will not speed up the execution time :
} else {
printf("%s/%s\n", path, name);
}
If you are not interested in printing the filenames, just remove the printf statements.
Note however that there are some problems in the code:
memcpy(path, argv[1], 1024); may read beyond the end of the string pointed to by argv[1], which is undefined behavior, or not produce a proper C string, which leads to undefined behavior in the function listdir.
You could also avoid recomputing the length of the directory name in each recursive call.
Here is a modified version you can try:
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
long long countdirs(char *path, size_t size, size_t len) {
DIR *dir;
struct dirent *entry;
long long count;
if (!(dir = opendir(path))) {
fprintf(stderr, "path not found: %s: %s\n",
path, strerror(errno));
return 0;
}
count = 1; // count this directory
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_DIR) {
char *name = entry->d_name;
size_t len1 = strlen(name);
if (*name == '.' && (len1 == 1 || (len1 == 2 && name[1] == '.')))
continue;
if (len + len1 + 2 > size) {
count++;
fprintf(stderr, "path too long: %s/%s\n", path, name);
} else {
path[len] = '/';
memcpy(path + len + 1, name, len1 + 1);
count += countdirs(path, size, len + 1 + len1);
path[len] = '\0';
}
}
}
closedir(dir);
return count;
}
int main(int argc, char *argv[]) {
char buf[4096];
char *path;
size_t len;
if (argc != 2) {
fprintf(stderr, "one argument expected.\n");
return 1;
}
path = argv[1];
len = strlen(path);
if (len >= sizeof(buf)) {
fprintf(stderr, "path too long: %s\n", path);
return 1;
}
memcpy(buf, path, len + 1);
printf("%s: %lld directories\n", path, countdirs(buf, sizeof buf, len));
return 0;
}
Further notes:
The above code might fail if the directory tree is too deep or has loops. Failure may come from running out of handles causing opendir to fail.
You should try an alternative approach using the POSIX standard function nftw() as documented in this answer: https://stackoverflow.com/a/29402705/4593267
As suggested by EOF, since paths are not used, constructing them is not required. It might be safer and more efficient to use openat() and fdopendir(). (documented here: https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html https://pubs.opengroup.org/onlinepubs/9699919799/functions/fdopendir.html ).
There is little point in optimizing this function as most of the time is spent in the OS or waiting for the storage device. The effect of file system cacheing may be huge: I measured 15x on linux for 133000 directories. Using a different set of system calls may improve or worsen the speed, but small improvements are probably highly system specific.
I'm currently trying to make a c program that counts the total file size in bytes of a given directory, including the directory itself, all files within, and all files and directories in all sub-directories. Essentially, I'm being asked to program a replacement for du -b as a command.
I thought I had a working solution, but after the first directory this program thinks that all entries deeper down are directories, even when they're just regular files. This includes when I give it a directory that's one degree deeper directly, say by giving it the input ./Directory1 instead of just ..
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <dirent.h>
int du_function(char direc[]) {
int total = 0;
char str[100];
strcpy(str, direc);
struct stat sfile;
struct dirent *de;
DIR *dr = opendir(direc);
if (dr == NULL) {
printf("Could not open directory\n");
return 0;
}
while ((de = readdir(dr)) != NULL) {
printf("%s\n", de->d_name);
stat(de->d_name, &sfile);
if (S_ISDIR(sfile.st_mode) && strcmp(de->d_name, ".") != 0 &&
strcmp(de->d_name, "..") != 0) {
strcat(str, "/");
strcat(str, de->d_name);
printf("This is a directory called %s\n", str);
total = total + du_function(str);
strcpy(str, direc);
}
if (S_ISREG(sfile.st_mode) || strcmp(de->d_name, ".") == 0) {
printf("Size in bytes = %ld\n", sfile.st_size);
total = total + sfile.st_size;
printf("The total is %d bytes so far.\n", total);
}
}
printf("The total is %d bytes.\n", total);
closedir(dr);
return total;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
char cwd[1] = ".";
du_function(cwd);
} else
du_function(argv[1]);
return 0;
}
I'm at my wit's end here and have tried a variety of solutions, but for some reason S_ISDIR must be identifying things as directories that aren't (or receiving the wrong input from me, more likely.)
char cwd[1] = "."; is too small for a string. No room the the null character.
Use [] and let the compiler determine the needed size for the string.
char cwd[] = ".";
stat() call is based on local name and not full path.
#if 0
stat(de->d_name, &sfile);
#else
char buf[500];
snprintf(buf, sizeof buf, "%s/%s", direc, de->d_name);
stat(buf, &sfile);
#endif
Similar finding in (deleted) #PSkocik
Perhaps other issues exist.
Just like you construct the path for the recursive call, you must construct the filename with the leading directory for the stat system call. Your program only works for the files in the current directory.
An effective way to fix your program is pass a large enough buffer to the recursive function and build the path and filenames incrementally in place:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <dirent.h>
long int du_function(char path[], size_t size) {
size_t len = strlen(path);
long int total = 0;
struct stat sfile;
struct dirent *de;
DIR *dr = opendir(path);
if (dr == NULL) {
printf("Could not open directory %s\n", path);
return 0;
}
while ((de = readdir(dr)) != NULL) {
if (strcmp(de->d_name, "..") == 0)
continue;
//printf("%s\n", de->d_name);
if (snprintf(path + len, size - len, "/%s", de->d_name) > (int)(size - len)) {
path[len] = '\0';
printf("Path too long: %s/%s\n", path, de->d_name);
continue;
}
stat(path, &sfile);
if (S_ISDIR(sfile.st_mode) && strcmp(de->d_name, ".") != 0) {
//printf("This is a directory called %s\n", path);
total = total + du_function(path, size);
} else
if (S_ISREG(sfile.st_mode) || strcmp(de->d_name, ".") == 0) {
//printf("Size in bytes = %ld\n", (long)sfile.st_size);
total = total + sfile.st_size;
//printf("The total is %ld bytes so far.\n", total);
}
}
path[len] = '\0';
printf("%ld\t%s\n", total, path);
closedir(dr);
return total;
}
int main(int argc, char *argv[]) {
char path[1024] = ".";
if (argc > 1) {
strcpy(path, argv[1]);
}
du_function(path, sizeof path);
return 0;
}
The goal is to compare files by size and filter those of the same size.
For that you need to compare every file to every file.
However the first loop doesnt work so the search of the first directory is stuck at the first file.
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
int main(int argc, char *v[]) {
struct dirent *d_verzeichnis1;
struct dirent *d_verzeichnis2;
DIR *dir1;
DIR *dir2;
FILE *file = fopen(v[3], "W");
dir1 = opendir(v[1]);
dir2 = opendir(v[2]);
struct stat filesize1;
struct stat filesize2;
while ((d_verzeichnis1 = readdir(dir1)) != NULL) {
stat((d_verzeichnis1->d_name), &filesize1);
while ((d_verzeichnis2 = readdir(dir2)) != NULL) {
stat((d_verzeichnis2->d_name), &filesize2);
if (filesize1.st_size == filesize2.st_size);
{
printf("%s und %s sind gleich\n",
d_verzeichnis1->d_name, d_verzeichnis2->d_name);
}
}
d_verzeichnis1 = readdir(dir1);
}
}
There are multiple problems in your code:
you should verify the actual number of arguments provided on the command line to avoid undefined behavior if fewer than 3 were provided.
fopen(v[3], "W"); uses an invalid mode string, you should use "w". It is unclear what this stream pointer is used for anyway.
dir1 and dir2 are not tested: you have undefined behavior if opendir() fails.
stat is called with the directory entry name, which is not a relative pathname to the file if the directory is different from the current directory. You should construct the path name from the directory name and entry name.
if (filesize1.st_size == filesize2.st_size); has an extra ; at the end of the line, causing the following block to execute unconditionally. You should use K&R style with{` at the end of the line to avoid such silly mistakes.
the logic of parallel scan is incorrect: you should reopen or at least rewind the second directory for each entry in the first to allow a full scan for potential matches.
Here is a corrected version:
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/stat.h>
char *buildpath(char *dest, size_t size, const char *dir, const char *name) {
size_t len = strlen(dir);
const char *sep = "";
if (len > 0 && dir[len - 1] != '/')
sep = "/";
if ((unsigned)snprintf(dest, size, "%s%s%s", dir, sep, name) < size)
return dest;
else
return NULL;
}
int main(int argc, char *argv[]) {
char path1[1024];
char path2[1024];
struct dirent *dp1;
struct dirent *dp2;
DIR *dir1;
DIR *dir2;
struct stat filesize1;
struct stat filesize2;
if (argc < 3) {
fprintf(stderr, "missing argument\n");
fprintf(stderr, "usage: cmpdir dir1 dir2\n");
return 1;
}
dir1 = opendir(argv[1]);
if (dir1 == NULL) {
fprintf(stderr, "cannt open directory %s: %s\n", argv[1], strerror(errno));
return 1;
}
dir2 = opendir(argv[2]);
if (dir2 == NULL) {
fprintf(stderr, "cannt open directory %s: %s\n", argv[2], strerror(errno));
return 1;
}
while ((dp1 = readdir(dir1)) != NULL) {
/* ignore . and .. entries */
if (!strcmp(dp1->d_name, ".")
|| !strcmp(dp1->d_name, ".."))
continue;
if (!buildpath(path1, sizeof path1, argv[1], dp1->d_name)) {
/* path too long */
continue;
}
if (stat(path1, &filesize1)) {
/* cannot stat entry */
continue;
}
if (!S_ISREG(filesize1.st_mode)) {
/* not a regular file */
continue;
}
rewinddir(dir2);
while ((dp2 = readdir(dir2)) != NULL) {
/* ignore . and .. entries */
if (!strcmp(dp2->d_name, ".")
|| !strcmp(dp2->d_name, ".."))
continue;
if (!buildpath(path2, sizeof path2, argv[2], dp2->d_name)) {
/* path too long */
continue;
}
if (stat(path2, &filesize2)) {
/* cannot stat entry */
continue;
}
if (!S_ISREG(filesize2.st_mode)) {
/* not a regular file */
continue;
}
if (filesize1.st_size == filesize2.st_size) {
printf("%s and %s have the same size %llu\n",
path1, path2, (unsigned long long)filesize1.st_size);
/* perform actual comparison... */
}
}
}
closedir(dir1);
closedir(dir2);
return 0;
}
I have a program that recursively prints the cwd size plus containing file sizes and repeats for each sub directory.
Recursive directory traversal function: (Note the reason for printf in this function and passing two strings is that the output needs to be in a special format so I can't just output the actual filepath. Also I am just learning about system calls to work with directories in ubuntu so if you have any comments on improvements on the code I would appreciate them (style or using something more simple to accomplish the same).
#include <sys/stat.h>
#include <limits.h>
#include <dirent.h>
#include <libgen.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define TRUE 1
#define FALSE 0
#define FIRST_ARG 1
#define SECOND_ARG 2
char* fileName;
FILE* fileToRead;
DIR* directoryToRead;
int printFileSize(char*);
int printWorkingSize(char*, char*);
int printSize(char*, char*);
int printDir(char*, char*);
int printCurrentDir(char*, char*);
int bytesToKbytes(long long, char*);
int main(int argc, char** argv) {
if(argc == FIRST_ARG) {
char currentDir[PATH_MAX + 1] = "";
char* currentDirectory;
directoryToRead = opendir (".");
if (directoryToRead == NULL){
exit (EXIT_FAILURE);
}
closedir(directoryToRead);
printCurrentDir(currentDirectory, ".");
}
return 0;
}
int printCurrentDir(char* name, char* path) {
struct dirent *dir;
struct stat statBuffer;
char fileName[PATH_MAX + 1];
char filePath[PATH_MAX + 1];
DIR* openDir;
if((openDir = opendir(path)) == NULL) {
printf("Could not open %s\n", path);
}
stat(path, &statBuffer);
if(strcmp(path, ".") == 0) {
printf("%lld .\n", (long long)statBuffer.st_size);
} else {
printf("%lld %s\n", (long long)statBuffer.st_size, name);
}
while (TRUE) { // go through contents of current directory
dir = readdir(openDir);
if(!dir) {
break;
}
if((strcmp(dir->d_name, "..") == 0) || (strcmp(dir->d_name,".") == 0)) {
continue;
}
if(name == NULL) {
strcpy(fileName, dir->d_name);
} else {
strcpy(fileName, name);
strcat(fileName, "/");
strcat(fileName, dir->d_name);
}
strcpy(filePath, path);
strcat(filePath, "/");
strcat(filePath, dir->d_name);
if(dir->d_type == DT_DIR) { // if the next file is a directory
if(!printCurrentDir(fileName, filePath)) {
return FALSE;
}
}
else if(!printWorkingSize(fileName, filePath)) {
return FALSE;
}
}
return TRUE;
}
//output file size in bytes followed by name-> (char* file)
int printWorkingSize(char* file, char* path) {
struct stat statBuffer;
stat(path, &statBuffer);
char result[PATH_MAX];
if(bytesToKbytes((long long)statBuffer.st_size, result) == FALSE) {
sprintf(result, "%lld", (long long)statBuffer.st_size);
}
printf("%5s %s\n", result, path);
return TRUE;
}
// convert bytes to kilobytes if more than 5 digits
int bytesToKbytes(long long bytes, char* result) {
if(!(bytes > 99999)) {
return FALSE;
}
char size[PATH_MAX];
sprintf(size, "%lld", bytes);
size[3] = 'K';
size[4] = '\0';
strcpy(result, size);
return TRUE;
}
It is operating system specific. On Linux and POSIX, you should simply use the nftw(3) library function, which is recursively reading the directories and already calling stat(2) for you.
The problem is effectively in the way you use bytesToKbytes. It returns a boolean to indicate if it wrote anything in result and :
you do not preload anything in result before calling bytesToKbytes
you do not test return value to write anything in resul if size was less than 99999
You could use :
int printWorkingSize(char* file, char* path) {
struct stat statBuffer;
stat(path, &statBuffer);
char result[PATH_MAX];
if (! bytesToKbytes((long long)statBuffer.st_size, result)) {
sprintf(result, "%lld", (long long)statBuffer.st_size);
}
printf("%5s %s\n", result, path);
return TRUE;
}
You could also change bytesToKbytes to put length in bytes into resul if size is less than 99999