How to get total size of subdirectories in C? - c

Is there a way to get the total size of subdirectories with recursivity?
For example:
Dir1
-- FileA.txt (90)
-- SubDir2 (4096)
---- FileB.txt (100)
---- FileC.txt (400)
When I try to get the size of SubDir2, it gives me 4186. The expected answer I wish to get is 4596.
My program counts the size of FileA instead of FileB and FileC. How to make it count FileB and FileC?
To start, I have to use these headers:
#include <errno.h>
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
Then, I made this function to call it recursively:
static DIR *dir;
void goDir(char* argv[])
{
struct dirent *dit;
struct stat st;
int size = 0;
static int total_size = 0;
char filePath[NAME_MAX];
while ((dit = readdir(dir)) != NULL)
{
sprintf(filePath, "%s/%s", argv[1], dit->d_name); // To show correctly the size and name of files
lstat(filePath, &st);
// To skip . and ..
if ( (strcmp(dit->d_name, ".") == 0) || (strcmp(dit->d_name, "..") == 0) ) continue;
size = st.st_size;
if(S_ISDIR(st.st_mode))
{
goDir(argv); // For Recursivity
total_size += size;
printf("DIR\t");
printf("MODE: %lo\t", (unsigned long) st.st_mode);
printf("SIZE: %d\t", total_size);
printf("%s\n", dit->d_name);
}
else
{
total_size += size;
printf("FILES\t");
printf("MODE: %lo\t", (unsigned long) st.st_mode);
printf("SIZE: %d\t", size);
printf("%s\n", dit->d_name);
}
}
}
And then, my main program:
int main (int argc, char *argv[])
{
if ( argc != 2 ) {
printf("Usage: Program <Directory>\n");
return 1;
}
if ((dir = opendir(argv[1])) == NULL) return 1;
goDir(argv);
closedir(dir);
return 0;
}

Your goDir() function never opens a new directory, or closes the directory once it has finished. That is going to lead to problems — basically, you are not traversing down your directory hierarchy.
You'll need to make dir a local (non-static) variable in the goDir() function; you'll open and close directories; and instead of passing argv, you'll pass a directory name, etc.
While you're at it, you should change goDir() to return the size it finds, and lose the static variable total_size.
This code looks semi-plausible:
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
#ifndef NAME_MAX
#define NAME_MAX 1024
#endif
static long goDir(char *dirname)
{
DIR *dir = opendir(dirname);
if (dir == 0)
return 0;
struct dirent *dit;
struct stat st;
long size = 0;
long total_size = 0;
char filePath[NAME_MAX];
while ((dit = readdir(dir)) != NULL)
{
if ( (strcmp(dit->d_name, ".") == 0) || (strcmp(dit->d_name, "..") == 0) )
continue;
sprintf(filePath, "%s/%s", dirname, dit->d_name);
if (lstat(filePath, &st) != 0)
continue;
size = st.st_size;
if (S_ISDIR(st.st_mode))
{
long dir_size = goDir(filePath) + size;
printf("DIR\t");
printf("MODE: %lo\t", (unsigned long) st.st_mode);
printf("SIZE: %ld\t", dir_size);
printf("%s\n", filePath);
total_size += dir_size;
}
else
{
total_size += size;
printf("FILES\t");
printf("MODE: %lo\t", (unsigned long) st.st_mode);
printf("SIZE: %ld\t", size);
printf("%s\n", filePath);
}
}
return total_size;
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s <Directory>\n", argv[0]);
return 1;
}
long size = goDir(argv[1]);
printf("Total size: %ld\n", size);
return 0;
}
It compiles and runs. It does not give the same answer as du -k, mainly because it does not round up the size of files to an integral number of disk blocks. You'd need to look at st_blksize and/or st_blocks in the struct stat to get that information.

You'll have to use a hash table to keep track of the occurrences of inodes. So that they are not counted twice.

Related

Most fastest C code to count recursively directories in Linux ( without files )

The following C code will list the amount of files and directories and will do it 4 times faster than the linux find command. I need only the count of the folders, not interested in the file count and even listing them. Is there a way to optimize the below code and make it more efficient?
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <dirent.h>
void listdir(char *path, size_t size) {
DIR *dir;
struct dirent *entry;
size_t len = strlen(path);
if (!(dir = opendir(path))) {
fprintf(stderr, "path not found: %s: %s\n",
path, strerror(errno));
return;
}
puts(path);
while ((entry = readdir(dir)) != NULL) {
char *name = entry->d_name;
if (entry->d_type == DT_DIR) {
if (!strcmp(name, ".") || !strcmp(name, ".."))
continue;
if (len + strlen(name) + 2 > size) {
fprintf(stderr, "path too long: %s/%s\n", path, name);
} else {
path[len] = '/';
strcpy(path + len + 1, name);
listdir(path, size);
path[len] = '\0';
}
} else {
printf("%s/%s\n", path, name);
}
}
closedir(dir);
}
int main( int argc, char *argv[] ) {
if( argc == 2 ) {
printf("Path: %s\n", argv[1]);
}
else if( argc > 2 ) {
printf("Too many arguments supplied.\n");
}
else {
printf("One argument expected.\n");
return 0;
}
char path[1024];
memcpy (path, argv[1],1024);
listdir(path, sizeof path);
return 0;
}
Removing the following lines will of course not display the files , but will not speed up the execution time :
} else {
printf("%s/%s\n", path, name);
}
If you are not interested in printing the filenames, just remove the printf statements.
Note however that there are some problems in the code:
memcpy(path, argv[1], 1024); may read beyond the end of the string pointed to by argv[1], which is undefined behavior, or not produce a proper C string, which leads to undefined behavior in the function listdir.
You could also avoid recomputing the length of the directory name in each recursive call.
Here is a modified version you can try:
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
long long countdirs(char *path, size_t size, size_t len) {
DIR *dir;
struct dirent *entry;
long long count;
if (!(dir = opendir(path))) {
fprintf(stderr, "path not found: %s: %s\n",
path, strerror(errno));
return 0;
}
count = 1; // count this directory
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_DIR) {
char *name = entry->d_name;
size_t len1 = strlen(name);
if (*name == '.' && (len1 == 1 || (len1 == 2 && name[1] == '.')))
continue;
if (len + len1 + 2 > size) {
count++;
fprintf(stderr, "path too long: %s/%s\n", path, name);
} else {
path[len] = '/';
memcpy(path + len + 1, name, len1 + 1);
count += countdirs(path, size, len + 1 + len1);
path[len] = '\0';
}
}
}
closedir(dir);
return count;
}
int main(int argc, char *argv[]) {
char buf[4096];
char *path;
size_t len;
if (argc != 2) {
fprintf(stderr, "one argument expected.\n");
return 1;
}
path = argv[1];
len = strlen(path);
if (len >= sizeof(buf)) {
fprintf(stderr, "path too long: %s\n", path);
return 1;
}
memcpy(buf, path, len + 1);
printf("%s: %lld directories\n", path, countdirs(buf, sizeof buf, len));
return 0;
}
Further notes:
The above code might fail if the directory tree is too deep or has loops. Failure may come from running out of handles causing opendir to fail.
You should try an alternative approach using the POSIX standard function nftw() as documented in this answer: https://stackoverflow.com/a/29402705/4593267
As suggested by EOF, since paths are not used, constructing them is not required. It might be safer and more efficient to use openat() and fdopendir(). (documented here: https://pubs.opengroup.org/onlinepubs/9699919799/functions/open.html https://pubs.opengroup.org/onlinepubs/9699919799/functions/fdopendir.html ).
There is little point in optimizing this function as most of the time is spent in the OS or waiting for the storage device. The effect of file system cacheing may be huge: I measured 15x on linux for 133000 directories. Using a different set of system calls may improve or worsen the speed, but small improvements are probably highly system specific.

Trying to print total space in bytes in directory and all sub directories

I'm currently trying to make a c program that counts the total file size in bytes of a given directory, including the directory itself, all files within, and all files and directories in all sub-directories. Essentially, I'm being asked to program a replacement for du -b as a command.
I thought I had a working solution, but after the first directory this program thinks that all entries deeper down are directories, even when they're just regular files. This includes when I give it a directory that's one degree deeper directly, say by giving it the input ./Directory1 instead of just ..
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <dirent.h>
int du_function(char direc[]) {
int total = 0;
char str[100];
strcpy(str, direc);
struct stat sfile;
struct dirent *de;
DIR *dr = opendir(direc);
if (dr == NULL) {
printf("Could not open directory\n");
return 0;
}
while ((de = readdir(dr)) != NULL) {
printf("%s\n", de->d_name);
stat(de->d_name, &sfile);
if (S_ISDIR(sfile.st_mode) && strcmp(de->d_name, ".") != 0 &&
strcmp(de->d_name, "..") != 0) {
strcat(str, "/");
strcat(str, de->d_name);
printf("This is a directory called %s\n", str);
total = total + du_function(str);
strcpy(str, direc);
}
if (S_ISREG(sfile.st_mode) || strcmp(de->d_name, ".") == 0) {
printf("Size in bytes = %ld\n", sfile.st_size);
total = total + sfile.st_size;
printf("The total is %d bytes so far.\n", total);
}
}
printf("The total is %d bytes.\n", total);
closedir(dr);
return total;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
char cwd[1] = ".";
du_function(cwd);
} else
du_function(argv[1]);
return 0;
}
I'm at my wit's end here and have tried a variety of solutions, but for some reason S_ISDIR must be identifying things as directories that aren't (or receiving the wrong input from me, more likely.)
char cwd[1] = "."; is too small for a string. No room the the null character.
Use [] and let the compiler determine the needed size for the string.
char cwd[] = ".";
stat() call is based on local name and not full path.
#if 0
stat(de->d_name, &sfile);
#else
char buf[500];
snprintf(buf, sizeof buf, "%s/%s", direc, de->d_name);
stat(buf, &sfile);
#endif
Similar finding in (deleted) #PSkocik
Perhaps other issues exist.
Just like you construct the path for the recursive call, you must construct the filename with the leading directory for the stat system call. Your program only works for the files in the current directory.
An effective way to fix your program is pass a large enough buffer to the recursive function and build the path and filenames incrementally in place:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <dirent.h>
long int du_function(char path[], size_t size) {
size_t len = strlen(path);
long int total = 0;
struct stat sfile;
struct dirent *de;
DIR *dr = opendir(path);
if (dr == NULL) {
printf("Could not open directory %s\n", path);
return 0;
}
while ((de = readdir(dr)) != NULL) {
if (strcmp(de->d_name, "..") == 0)
continue;
//printf("%s\n", de->d_name);
if (snprintf(path + len, size - len, "/%s", de->d_name) > (int)(size - len)) {
path[len] = '\0';
printf("Path too long: %s/%s\n", path, de->d_name);
continue;
}
stat(path, &sfile);
if (S_ISDIR(sfile.st_mode) && strcmp(de->d_name, ".") != 0) {
//printf("This is a directory called %s\n", path);
total = total + du_function(path, size);
} else
if (S_ISREG(sfile.st_mode) || strcmp(de->d_name, ".") == 0) {
//printf("Size in bytes = %ld\n", (long)sfile.st_size);
total = total + sfile.st_size;
//printf("The total is %ld bytes so far.\n", total);
}
}
path[len] = '\0';
printf("%ld\t%s\n", total, path);
closedir(dr);
return total;
}
int main(int argc, char *argv[]) {
char path[1024] = ".";
if (argc > 1) {
strcpy(path, argv[1]);
}
du_function(path, sizeof path);
return 0;
}

How to compare multiple files based on only file type and size

The goal is to compare files by size and filter those of the same size.
For that you need to compare every file to every file.
However the first loop doesnt work so the search of the first directory is stuck at the first file.
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
int main(int argc, char *v[]) {
struct dirent *d_verzeichnis1;
struct dirent *d_verzeichnis2;
DIR *dir1;
DIR *dir2;
FILE *file = fopen(v[3], "W");
dir1 = opendir(v[1]);
dir2 = opendir(v[2]);
struct stat filesize1;
struct stat filesize2;
while ((d_verzeichnis1 = readdir(dir1)) != NULL) {
stat((d_verzeichnis1->d_name), &filesize1);
while ((d_verzeichnis2 = readdir(dir2)) != NULL) {
stat((d_verzeichnis2->d_name), &filesize2);
if (filesize1.st_size == filesize2.st_size);
{
printf("%s und %s sind gleich\n",
d_verzeichnis1->d_name, d_verzeichnis2->d_name);
}
}
d_verzeichnis1 = readdir(dir1);
}
}
There are multiple problems in your code:
you should verify the actual number of arguments provided on the command line to avoid undefined behavior if fewer than 3 were provided.
fopen(v[3], "W"); uses an invalid mode string, you should use "w". It is unclear what this stream pointer is used for anyway.
dir1 and dir2 are not tested: you have undefined behavior if opendir() fails.
stat is called with the directory entry name, which is not a relative pathname to the file if the directory is different from the current directory. You should construct the path name from the directory name and entry name.
if (filesize1.st_size == filesize2.st_size); has an extra ; at the end of the line, causing the following block to execute unconditionally. You should use K&R style with{` at the end of the line to avoid such silly mistakes.
the logic of parallel scan is incorrect: you should reopen or at least rewind the second directory for each entry in the first to allow a full scan for potential matches.
Here is a corrected version:
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/stat.h>
char *buildpath(char *dest, size_t size, const char *dir, const char *name) {
size_t len = strlen(dir);
const char *sep = "";
if (len > 0 && dir[len - 1] != '/')
sep = "/";
if ((unsigned)snprintf(dest, size, "%s%s%s", dir, sep, name) < size)
return dest;
else
return NULL;
}
int main(int argc, char *argv[]) {
char path1[1024];
char path2[1024];
struct dirent *dp1;
struct dirent *dp2;
DIR *dir1;
DIR *dir2;
struct stat filesize1;
struct stat filesize2;
if (argc < 3) {
fprintf(stderr, "missing argument\n");
fprintf(stderr, "usage: cmpdir dir1 dir2\n");
return 1;
}
dir1 = opendir(argv[1]);
if (dir1 == NULL) {
fprintf(stderr, "cannt open directory %s: %s\n", argv[1], strerror(errno));
return 1;
}
dir2 = opendir(argv[2]);
if (dir2 == NULL) {
fprintf(stderr, "cannt open directory %s: %s\n", argv[2], strerror(errno));
return 1;
}
while ((dp1 = readdir(dir1)) != NULL) {
/* ignore . and .. entries */
if (!strcmp(dp1->d_name, ".")
|| !strcmp(dp1->d_name, ".."))
continue;
if (!buildpath(path1, sizeof path1, argv[1], dp1->d_name)) {
/* path too long */
continue;
}
if (stat(path1, &filesize1)) {
/* cannot stat entry */
continue;
}
if (!S_ISREG(filesize1.st_mode)) {
/* not a regular file */
continue;
}
rewinddir(dir2);
while ((dp2 = readdir(dir2)) != NULL) {
/* ignore . and .. entries */
if (!strcmp(dp2->d_name, ".")
|| !strcmp(dp2->d_name, ".."))
continue;
if (!buildpath(path2, sizeof path2, argv[2], dp2->d_name)) {
/* path too long */
continue;
}
if (stat(path2, &filesize2)) {
/* cannot stat entry */
continue;
}
if (!S_ISREG(filesize2.st_mode)) {
/* not a regular file */
continue;
}
if (filesize1.st_size == filesize2.st_size) {
printf("%s and %s have the same size %llu\n",
path1, path2, (unsigned long long)filesize1.st_size);
/* perform actual comparison... */
}
}
}
closedir(dir1);
closedir(dir2);
return 0;
}

How do I recursively go through folders and count total file size?

I am trying to make this work recursively so when it finds a folder it goes into the folder and finds the file size, then at the end print the total of all file sizes. However I cannot figure out how to get this to work recursively, I have tried many things. Also my count for total does not end up correctly even when I am not doing recursion. Any and all help is greatly appreciated.
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <string.h>
#include <stdlib.h>
void do_ls(char[]);
int total = 0;
int main(int ac, char *av[])
{
if (ac == 1)
do_ls(".");
else
{
while (--ac) {
printf("%s:\n", *++av);
do_ls(*av);
}
}
}
void do_ls(char dirname[])
{
DIR *dir_ptr;
struct dirent *direntp;
struct stat info;
if ((dir_ptr = opendir(dirname)) == NULL)
fprintf(stderr, "ls01: cannot opern %s\n", dirname);
else
{
while((direntp = readdir(dir_ptr)) != NULL) {
stat(direntp->d_name, &info);
if (S_ISDIR(info.st_mode))
printf("%s\n", direntp->d_name);
//I believe recursion goes here, I tried the following
//do_ls(direntp->d_name);
//do_ls(".");
//do_ls(dirname + '/' + direntp->d_name);
//none of these seemed to work.
else
printf("%d %s\n", (int)info.st_size, direntp->d_name);
total += (int)info.st_size;
}
closedir(dir_ptr);
}
printf("Your total is: %d \n", total);
}
When you have a directory entry to recurse into, you must construct the path by concatenating the directory and the entry name with a / separator and call do_ls recursively.
In order to compute file sizes, you can use the stat system call, but you will need the pathname too, so construct is before testing for the entry type (use malloc to allocate space for the concatenated string) and do not forget to free it after use.
Also ignore the . and .. entries and move the closedir() out of the while loop.
Here is an improved version that does not use a global variable for the
total size, but instead returns the cumulative size to the caller:
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
long long do_ls(const char *name) {
DIR *dir_ptr;
struct dirent *direntp;
struct stat info;
long long total = 0;
int output = 1;
if (stat(name, &info)) {
fprintf(stderr, "ls01: cannot stat %s\n", name);
return 0;
}
if (S_ISDIR(info.st_mode)) {
if ((dir_ptr = opendir(name)) == NULL) {
fprintf(stderr, "ls01: cannot open directory %s\n", name);
} else {
while ((direntp = readdir(dir_ptr)) != NULL) {
char *pathname;
/* ignore current and parent directories */
if (!strcmp(direntp->d_name, ".") || !strcmp(direntp->d_name, ".."))
continue;
pathname = malloc(strlen(name) + 1 + strlen(direntp->d_name) + 1);
if (pathname == NULL) {
fprintf(stderr, "ls01: cannot allocated memory\n");
exit(1);
}
sprintf(pathname, "%s/%s", name, direntp->d_name);
total += do_ls(pathname);
free(pathname);
}
closedir(dir_ptr);
}
} else {
total = info.st_size;
}
printf("%10lld %s\n", total, name);
return total;
}
int main(int ac, char *av[]) {
int i = 1;
if (i >= ac) {
do_ls(".");
} else {
long long total = 0;
while (i < ac) {
total += do_ls(av[i++]);
}
printf("total is: %lld\n", total);
}
return 0;
}
When you have the filename building working as already advised, the global
int total = 0;
is inadequate for summing the size of files with
total += (int)info.st_size;
so I suggest
uint64_t total;
and then
total += info.st_size;
Besides that a global is poor practice, which can be improved by returning a value from the recursive function, when you have it working. The total then can be summed within the recursive function.
uint64_t do_ls(char[]);

C - Recursive call to traverse sub-directory and print all files + sizes (using stat)

I have a program that recursively prints the cwd size plus containing file sizes and repeats for each sub directory.
Recursive directory traversal function: (Note the reason for printf in this function and passing two strings is that the output needs to be in a special format so I can't just output the actual filepath. Also I am just learning about system calls to work with directories in ubuntu so if you have any comments on improvements on the code I would appreciate them (style or using something more simple to accomplish the same).
#include <sys/stat.h>
#include <limits.h>
#include <dirent.h>
#include <libgen.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define TRUE 1
#define FALSE 0
#define FIRST_ARG 1
#define SECOND_ARG 2
char* fileName;
FILE* fileToRead;
DIR* directoryToRead;
int printFileSize(char*);
int printWorkingSize(char*, char*);
int printSize(char*, char*);
int printDir(char*, char*);
int printCurrentDir(char*, char*);
int bytesToKbytes(long long, char*);
int main(int argc, char** argv) {
if(argc == FIRST_ARG) {
char currentDir[PATH_MAX + 1] = "";
char* currentDirectory;
directoryToRead = opendir (".");
if (directoryToRead == NULL){
exit (EXIT_FAILURE);
}
closedir(directoryToRead);
printCurrentDir(currentDirectory, ".");
}
return 0;
}
int printCurrentDir(char* name, char* path) {
struct dirent *dir;
struct stat statBuffer;
char fileName[PATH_MAX + 1];
char filePath[PATH_MAX + 1];
DIR* openDir;
if((openDir = opendir(path)) == NULL) {
printf("Could not open %s\n", path);
}
stat(path, &statBuffer);
if(strcmp(path, ".") == 0) {
printf("%lld .\n", (long long)statBuffer.st_size);
} else {
printf("%lld %s\n", (long long)statBuffer.st_size, name);
}
while (TRUE) { // go through contents of current directory
dir = readdir(openDir);
if(!dir) {
break;
}
if((strcmp(dir->d_name, "..") == 0) || (strcmp(dir->d_name,".") == 0)) {
continue;
}
if(name == NULL) {
strcpy(fileName, dir->d_name);
} else {
strcpy(fileName, name);
strcat(fileName, "/");
strcat(fileName, dir->d_name);
}
strcpy(filePath, path);
strcat(filePath, "/");
strcat(filePath, dir->d_name);
if(dir->d_type == DT_DIR) { // if the next file is a directory
if(!printCurrentDir(fileName, filePath)) {
return FALSE;
}
}
else if(!printWorkingSize(fileName, filePath)) {
return FALSE;
}
}
return TRUE;
}
//output file size in bytes followed by name-> (char* file)
int printWorkingSize(char* file, char* path) {
struct stat statBuffer;
stat(path, &statBuffer);
char result[PATH_MAX];
if(bytesToKbytes((long long)statBuffer.st_size, result) == FALSE) {
sprintf(result, "%lld", (long long)statBuffer.st_size);
}
printf("%5s %s\n", result, path);
return TRUE;
}
// convert bytes to kilobytes if more than 5 digits
int bytesToKbytes(long long bytes, char* result) {
if(!(bytes > 99999)) {
return FALSE;
}
char size[PATH_MAX];
sprintf(size, "%lld", bytes);
size[3] = 'K';
size[4] = '\0';
strcpy(result, size);
return TRUE;
}
It is operating system specific. On Linux and POSIX, you should simply use the nftw(3) library function, which is recursively reading the directories and already calling stat(2) for you.
The problem is effectively in the way you use bytesToKbytes. It returns a boolean to indicate if it wrote anything in result and :
you do not preload anything in result before calling bytesToKbytes
you do not test return value to write anything in resul if size was less than 99999
You could use :
int printWorkingSize(char* file, char* path) {
struct stat statBuffer;
stat(path, &statBuffer);
char result[PATH_MAX];
if (! bytesToKbytes((long long)statBuffer.st_size, result)) {
sprintf(result, "%lld", (long long)statBuffer.st_size);
}
printf("%5s %s\n", result, path);
return TRUE;
}
You could also change bytesToKbytes to put length in bytes into resul if size is less than 99999

Resources