I need to recursively list all directories and files in C programming. I have looked into FTW but that is not included with the 2 operating systems that I am using (Fedora and Minix). I am starting to get a big headache from all the different things that I have read over the past few hours.
If somebody knows of a code snippet I could look at that would be amazing, or if anyone can give me good direction on this I would be very grateful.
Why does everyone insist on reinventing the wheel again and again?
POSIX.1-2008 standardized the nftw() function, also defined in the Single Unix Specification v4 (SuSv4), and available in Linux (glibc, man 3 nftw), OS X, and most current BSD variants. It is not new at all.
Naïve opendir()/readdir()/closedir() -based implementations almost never handle the cases where directories or files are moved, renamed, or deleted during the tree traversal, whereas nftw() should handle them gracefully.
As an example, consider the following C program that lists the directory tree starting at the current working directory, or at each of the directories named on the command line, or just the files named at the command line:
/* We want POSIX.1-2008 + XSI, i.e. SuSv4, features */
#define _XOPEN_SOURCE 700
/* Added on 2017-06-25:
If the C library can support 64-bit file sizes
and offsets, using the standard names,
these defines tell the C library to do so. */
#define _LARGEFILE64_SOURCE
#define _FILE_OFFSET_BITS 64
#include <stdlib.h>
#include <unistd.h>
#include <ftw.h>
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
/* POSIX.1 says each process has at least 20 file descriptors.
* Three of those belong to the standard streams.
* Here, we use a conservative estimate of 15 available;
* assuming we use at most two for other uses in this program,
* we should never run into any problems.
* Most trees are shallower than that, so it is efficient.
* Deeper trees are traversed fine, just a bit slower.
* (Linux allows typically hundreds to thousands of open files,
* so you'll probably never see any issues even if you used
* a much higher value, say a couple of hundred, but
* 15 is a safe, reasonable value.)
*/
#ifndef USE_FDS
#define USE_FDS 15
#endif
int print_entry(const char *filepath, const struct stat *info,
const int typeflag, struct FTW *pathinfo)
{
/* const char *const filename = filepath + pathinfo->base; */
const double bytes = (double)info->st_size; /* Not exact if large! */
struct tm mtime;
localtime_r(&(info->st_mtime), &mtime);
printf("%04d-%02d-%02d %02d:%02d:%02d",
mtime.tm_year+1900, mtime.tm_mon+1, mtime.tm_mday,
mtime.tm_hour, mtime.tm_min, mtime.tm_sec);
if (bytes >= 1099511627776.0)
printf(" %9.3f TiB", bytes / 1099511627776.0);
else
if (bytes >= 1073741824.0)
printf(" %9.3f GiB", bytes / 1073741824.0);
else
if (bytes >= 1048576.0)
printf(" %9.3f MiB", bytes / 1048576.0);
else
if (bytes >= 1024.0)
printf(" %9.3f KiB", bytes / 1024.0);
else
printf(" %9.0f B ", bytes);
if (typeflag == FTW_SL) {
char *target;
size_t maxlen = 1023;
ssize_t len;
while (1) {
target = malloc(maxlen + 1);
if (target == NULL)
return ENOMEM;
len = readlink(filepath, target, maxlen);
if (len == (ssize_t)-1) {
const int saved_errno = errno;
free(target);
return saved_errno;
}
if (len >= (ssize_t)maxlen) {
free(target);
maxlen += 1024;
continue;
}
target[len] = '\0';
break;
}
printf(" %s -> %s\n", filepath, target);
free(target);
} else
if (typeflag == FTW_SLN)
printf(" %s (dangling symlink)\n", filepath);
else
if (typeflag == FTW_F)
printf(" %s\n", filepath);
else
if (typeflag == FTW_D || typeflag == FTW_DP)
printf(" %s/\n", filepath);
else
if (typeflag == FTW_DNR)
printf(" %s/ (unreadable)\n", filepath);
else
printf(" %s (unknown)\n", filepath);
return 0;
}
int print_directory_tree(const char *const dirpath)
{
int result;
/* Invalid directory path? */
if (dirpath == NULL || *dirpath == '\0')
return errno = EINVAL;
result = nftw(dirpath, print_entry, USE_FDS, FTW_PHYS);
if (result >= 0)
errno = result;
return errno;
}
int main(int argc, char *argv[])
{
int arg;
if (argc < 2) {
if (print_directory_tree(".")) {
fprintf(stderr, "%s.\n", strerror(errno));
return EXIT_FAILURE;
}
} else {
for (arg = 1; arg < argc; arg++) {
if (print_directory_tree(argv[arg])) {
fprintf(stderr, "%s.\n", strerror(errno));
return EXIT_FAILURE;
}
}
}
return EXIT_SUCCESS;
}
Most of the code above is in print_entry(). Its task is to print out each directory entry. In print_directory_tree(), we tell nftw() to call it for each directory entry it sees.
The only hand-wavy detail above is the decision on how many file descriptors one should let nftw() use. If your program uses at most two extra file descriptors (in addition to the standard streams) during the file tree walk, 15 is known to be safe (on all systems having nftw() and being mostly POSIX-compliant).
In Linux, you could use sysconf(_SC_OPEN_MAX) to find the maximum number of open files, and subtract the number you use concurrently with the nftw() call, but I wouldn't bother (unless I knew the utility would be used mostly with pathologically deep directory structures). Fifteen descriptors does not limit the tree depth; nftw() just gets slower (and might not detect changes in a directory if walking a directory deeper than 13 directories from that one, although the tradeoffs and general ability to detect changes vary between systems and C library implementations). Just using a compile-time constant like that keeps the code portable -- it should work not just on Linux, but on Mac OS X and all current BSD variants, and most other not-too-old Unix variants, too.
In a comment, Ruslan mentioned that they had to switch to nftw64() because they had filesystem entries that required 64-bit sizes/offsets, and the "normal" version of nftw() failed with errno == EOVERFLOW. The correct solution is to not switch to GLIBC-specific 64-bit functions, but to define _LARGEFILE64_SOURCE and _FILE_OFFSET_BITS 64. These tell the C library to switch to 64-bit file sizes and offsets if possible, while using the standard functions (nftw(), fstat(), et cetera) and type names (off_t etc.).
Here is a recursive version:
#include <unistd.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>
void listdir(const char *name, int indent)
{
DIR *dir;
struct dirent *entry;
if (!(dir = opendir(name)))
return;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_DIR) {
char path[1024];
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
continue;
snprintf(path, sizeof(path), "%s/%s", name, entry->d_name);
printf("%*s[%s]\n", indent, "", entry->d_name);
listdir(path, indent + 2);
} else {
printf("%*s- %s\n", indent, "", entry->d_name);
}
}
closedir(dir);
}
int main(void) {
listdir(".", 0);
return 0;
}
int is_directory_we_want_to_list(const char *parent, char *name) {
struct stat st_buf;
if (!strcmp(".", name) || !strcmp("..", name))
return 0;
char *path = alloca(strlen(name) + strlen(parent) + 2);
sprintf(path, "%s/%s", parent, name);
stat(path, &st_buf);
return S_ISDIR(st_buf.st_mode);
}
int list(const char *name) {
DIR *dir = opendir(name);
struct dirent *ent;
while (ent = readdir(dir)) {
char *entry_name = ent->d_name;
printf("%s\n", entry_name);
if (is_directory_we_want_to_list(name, entry_name)) {
// You can consider using alloca instead.
char *next = malloc(strlen(name) + strlen(entry_name) + 2);
sprintf(next, "%s/%s", name, entry_name);
list(next);
free(next);
}
}
closedir(dir);
}
Header files worth being skimmed in this context: stat.h, dirent.h. Bear in mind that the code above isn't checking for any errors which might occur.
A completely different approach is offered by ftw defined in ftw.h.
As I mentioned in my comment, I believe a recursive approach to have two inherent flaws to this task.
The first flaw is the limit on open files. This limit imposes a limit on deep traversal. If there are enough sub-folders, the recursive approach will break. (See edit regarding stack overflow)
The second flaw is a bit more subtle. The recursive approach makes it very hard to test for hard links. If a folder tree is cyclic (due to hard links), the recursive approach will break (hopefully without a stack overflow). (See edit regarding hard links)
However, it is quite simple to avoid these issues by replacing recursion with a single file descriptor and linked lists.
I assume this isn't a school project and that recursion is optional.
Here's an example application.
Use a.out ./ to view folder tree.
I apologize for the macros and stuff... I usually use inline functions, but I thought it would be easier to follow the code if it was all in a single function.
#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
int main(int argc, char const *argv[]) {
/* print use instruction unless a folder name was given */
if (argc < 2)
fprintf(stderr,
"\nuse:\n"
" %s <directory>\n"
"for example:\n"
" %s ./\n\n",
argv[0], argv[0]),
exit(0);
/*************** a small linked list macro implementation ***************/
typedef struct list_s {
struct list_s *next;
struct list_s *prev;
} list_s;
#define LIST_INIT(name) \
{ .next = &name, .prev = &name }
#define LIST_PUSH(dest, node) \
do { \
(node)->next = (dest)->next; \
(node)->prev = (dest); \
(node)->next->prev = (node); \
(dest)->next = (node); \
} while (0);
#define LIST_POP(list, var) \
if ((list)->next == (list)) { \
var = NULL; \
} else { \
var = (list)->next; \
(list)->next = var->next; \
var->next->prev = var->prev; \
}
/*************** a record (file / folder) item type ***************/
typedef struct record_s {
/* this is a flat processing queue. */
list_s queue;
/* this will list all queued and processed folders (cyclic protection) */
list_s folders;
/* this will list all the completed items (siblings and such) */
list_s list;
/* unique ID */
ino_t ino;
/* name length */
size_t len;
/* name string */
char name[];
} record_s;
/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name) \
((record_s *)(((uintptr_t)(node)) - \
((uintptr_t) & ((record_s *)0)->list_name)))
/* initializes a new record */
#define RECORD_INIT(name) \
(record_s){.queue = LIST_INIT((name).queue), \
.folders = LIST_INIT((name).folders), \
.list = LIST_INIT((name).list)}
/*************** the actual code ***************/
record_s records = RECORD_INIT(records);
record_s *pos, *item;
list_s *tmp;
DIR *dir;
struct dirent *entry;
/* initialize the root folder record and add it to the queue */
pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
*pos = RECORD_INIT(*pos);
pos->len = strlen(argv[1]);
memcpy(pos->name, argv[1], pos->len);
if (pos->name[pos->len - 1] != '/')
pos->name[pos->len++] = '/';
pos->name[pos->len] = 0;
/* push to queue, but also push to list (first item processed) */
LIST_PUSH(&records.queue, &pos->queue);
LIST_PUSH(&records.list, &pos->list);
/* as long as the queue has items to be processed, do so */
while (records.queue.next != &records.queue) {
/* pop queued item */
LIST_POP(&records.queue, tmp);
/* collect record to process */
pos = NODE2RECORD(tmp, queue);
/* add record to the processed folder list */
LIST_PUSH(&records.folders, &pos->folders);
/* process the folder and add all folder data to current list */
dir = opendir(pos->name);
if (!dir)
continue;
while ((entry = readdir(dir)) != NULL) {
/* create new item, copying it's path data and unique ID */
item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
*item = RECORD_INIT(*item);
item->len = pos->len + entry->d_namlen;
memcpy(item->name, pos->name, pos->len);
memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
item->name[item->len] = 0;
item->ino = entry->d_ino;
/* add item to the list, right after the `pos` item */
LIST_PUSH(&pos->list, &item->list);
/* unless it's a folder, we're done. */
if (entry->d_type != DT_DIR)
continue;
/* test for '.' and '..' */
if (entry->d_name[0] == '.' &&
(entry->d_name[1] == 0 ||
(entry->d_name[1] == '.' && entry->d_name[2] == 0)))
continue;
/* add folder marker */
item->name[item->len++] = '/';
item->name[item->len] = 0;
/* test for cyclic processing */
list_s *t = records.folders.next;
while (t != &records.folders) {
if (NODE2RECORD(t, folders)->ino == item->ino) {
/* we already processed this folder! */
break; /* this breaks from the small loop... */
}
t = t->next;
}
if (t != &records.folders)
continue; /* if we broke from the small loop, entry is done */
/* item is a new folder, add to queue */
LIST_PUSH(&records.queue, &item->queue);
}
closedir(dir);
}
/*************** Printing the results and cleaning up ***************/
while (records.list.next != &records.list) {
/* pop list item */
LIST_POP(&records.list, tmp);
/* collect and process record */
pos = NODE2RECORD(tmp, list);
fwrite(pos->name, pos->len, 1, stderr);
fwrite("\n", 1, 1, stderr);
/* free node */
free(pos);
}
return 0;
}
EDIT
#Stargateur mentioned in the comments that the recursive code will probably overflow the stack before reaching the open file limit.
Although I don't see how a stack-overflow is any better, this assessment is probably correct as long as the process isn't close to the file limit when invoked.
Another point mentioned by #Stargateur in the comments was that the depth of the recursive code is limited by the maximum amount of sub-directories (64000 on the ext4 filesystem) and that hard links are extremely unlikely (since hard links to folders aren't allowed on Linux/Unix).
This is good news if the code is running on Linux (which it is, according to the question), so this issue isn't a real concern (unless running the code on macOS or, maybe, Windows)... although 64K subfolders in recursion might blow the stack wide open.
Having said that, the none recursive option still has advantages, such as being able to easily add a limit to the amount of items processed as well as being able to cache the result.
P.S.
According to the comments, here's a non-recursive version of the code that doesn't check for cyclic hierarchies. It's faster and should be safe enough to use on a Linux machine where hard links to folders aren't allowed.
#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
int main(int argc, char const *argv[]) {
/* print use instruction unless a folder name was given */
if (argc < 2)
fprintf(stderr,
"\nuse:\n"
" %s <directory>\n"
"for example:\n"
" %s ./\n\n",
argv[0], argv[0]),
exit(0);
/*************** a small linked list macro implementation ***************/
typedef struct list_s {
struct list_s *next;
struct list_s *prev;
} list_s;
#define LIST_INIT(name) \
{ .next = &name, .prev = &name }
#define LIST_PUSH(dest, node) \
do { \
(node)->next = (dest)->next; \
(node)->prev = (dest); \
(node)->next->prev = (node); \
(dest)->next = (node); \
} while (0);
#define LIST_POP(list, var) \
if ((list)->next == (list)) { \
var = NULL; \
} else { \
var = (list)->next; \
(list)->next = var->next; \
var->next->prev = var->prev; \
}
/*************** a record (file / folder) item type ***************/
typedef struct record_s {
/* this is a flat processing queue. */
list_s queue;
/* this will list all the completed items (siblings and such) */
list_s list;
/* unique ID */
ino_t ino;
/* name length */
size_t len;
/* name string */
char name[];
} record_s;
/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name) \
((record_s *)(((uintptr_t)(node)) - \
((uintptr_t) & ((record_s *)0)->list_name)))
/* initializes a new record */
#define RECORD_INIT(name) \
(record_s){.queue = LIST_INIT((name).queue), .list = LIST_INIT((name).list)}
/*************** the actual code ***************/
record_s records = RECORD_INIT(records);
record_s *pos, *item;
list_s *tmp;
DIR *dir;
struct dirent *entry;
/* initialize the root folder record and add it to the queue */
pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
*pos = RECORD_INIT(*pos);
pos->len = strlen(argv[1]);
memcpy(pos->name, argv[1], pos->len);
if (pos->name[pos->len - 1] != '/')
pos->name[pos->len++] = '/';
pos->name[pos->len] = 0;
/* push to queue, but also push to list (first item processed) */
LIST_PUSH(&records.queue, &pos->queue);
LIST_PUSH(&records.list, &pos->list);
/* as long as the queue has items to be processed, do so */
while (records.queue.next != &records.queue) {
/* pop queued item */
LIST_POP(&records.queue, tmp);
/* collect record to process */
pos = NODE2RECORD(tmp, queue);
/* process the folder and add all folder data to current list */
dir = opendir(pos->name);
if (!dir)
continue;
while ((entry = readdir(dir)) != NULL) {
/* create new item, copying it's path data and unique ID */
item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
*item = RECORD_INIT(*item);
item->len = pos->len + entry->d_namlen;
memcpy(item->name, pos->name, pos->len);
memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
item->name[item->len] = 0;
item->ino = entry->d_ino;
/* add item to the list, right after the `pos` item */
LIST_PUSH(&pos->list, &item->list);
/* unless it's a folder, we're done. */
if (entry->d_type != DT_DIR)
continue;
/* test for '.' and '..' */
if (entry->d_name[0] == '.' &&
(entry->d_name[1] == 0 ||
(entry->d_name[1] == '.' && entry->d_name[2] == 0)))
continue;
/* add folder marker */
item->name[item->len++] = '/';
item->name[item->len] = 0;
/* item is a new folder, add to queue */
LIST_PUSH(&records.queue, &item->queue);
}
closedir(dir);
}
/*************** Printing the results and cleaning up ***************/
while (records.list.next != &records.list) {
/* pop list item */
LIST_POP(&records.list, tmp);
/* collect and process record */
pos = NODE2RECORD(tmp, list);
fwrite(pos->name, pos->len, 1, stderr);
fwrite("\n", 1, 1, stderr);
/* free node */
free(pos);
}
return 0;
}
Here is a simplified version that is recursive but uses much less stack space:
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <dirent.h>
void listdir(char *path, size_t size) {
DIR *dir;
struct dirent *entry;
size_t len = strlen(path);
if (!(dir = opendir(path))) {
fprintf(stderr, "path not found: %s: %s\n",
path, strerror(errno));
return;
}
puts(path);
while ((entry = readdir(dir)) != NULL) {
char *name = entry->d_name;
if (entry->d_type == DT_DIR) {
if (!strcmp(name, ".") || !strcmp(name, ".."))
continue;
if (len + strlen(name) + 2 > size) {
fprintf(stderr, "path too long: %s/%s\n", path, name);
} else {
path[len] = '/';
strcpy(path + len + 1, name);
listdir(path, size);
path[len] = '\0';
}
} else {
printf("%s/%s\n", path, name);
}
}
closedir(dir);
}
int main(void) {
char path[1024] = ".";
listdir(path, sizeof path);
return 0;
}
On my system, its output is exactly identical to that of find .
Walking a Directory Tree Without Constructing Path Names
This is a version that uses file descriptors to refer to directories, with fdopendir(), fstatat(), and openat() to walk a directory tree without having to construct any path names.
This is simpler to implement, and can be useful on systems with deeply-nested directory trees, where a full path name might exceed PATH_MAX - and note that PATH_MAX may not even exist.
The posted code is compressed, broken up, and all error checking removed to remove vertical scroll bars and improve readability. A complete example is at the end of the question.
Headers
#define _POSIX_C_SOURCE 200809L
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
Actual directory tree walk implementation:
// the actual walking is done by descriptor, not name
static int myftwImp( int dirfd )
{
DIR *dirp = fdopendir( dirfd );
for ( ;; )
{
struct dirent *dent = readdir( dirp );
if ( NULL == dent ) break;
if ( ( 0 == strcmp( ".", dent->d_name ) ) ||
( 0 == strcmp( "..", dent->d_name ) ) )
{
continue;
}
struct stat sb = { 0 };
fstatat( dirfd, dent->d_name, &sb, 0 );
if ( S_ISDIR( sb.st_mode ) )
{
printf( "dir: %s\n", dent->d_name );
int newdirfd = openat( dirfd, dent->d_name,
O_RDONLY | O_DIRECTORY );
myftwImp( newdirfd );
}
printf( " file: %s\n", dent->d_name );
}
// this will close the descriptor, too
closedir( dirp );
return( 0 );
}
Public call that uses directory name:
int myftw( const char *dirname )
{
int dirfd = open( dirname, O_RDONLY | O_DIRECTORY );
myftwImp( dirfd );
return( 0 );
}
Example use:
int main( int argc, char **argv )
{
int rc = myftw( argv[ 1 ] );
return( rc );
}
No error checking is done here for brevity. Real code should check all calls for errors and handle them appropriately.
Full code with error checking:
#define _POSIX_C_SOURCE 200809L
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
static int myftwImp( int dirfd )
{
DIR *dirp = fdopendir( dirfd );
if ( NULL == dirp )
{
return( -1 );
}
int rc = 0;
for ( ;; )
{
struct dirent *dent = readdir( dirp );
if ( NULL == dent )
{
break;
}
if ( 0 == strcmp( ".", dent->d_name ) )
{
continue;
}
if ( 0 == strcmp( "..", dent->d_name ) )
{
continue;
}
struct stat sb = { 0 };
rc = fstatat( dirfd, dent->d_name, &sb, 0 );
if ( 0 != rc )
{
break;
}
if ( S_ISDIR( sb.st_mode ) )
{
int newdirfd = openat( dirfd, dent->d_name, O_RDONLY | O_DIRECTORY );
if ( -1 == newdirfd )
{
rc = -1;
break;
}
printf( "dir: %s\n", dent->d_name );
rc = myftwImp( newdirfd );
if ( 0 != rc )
{
break;
}
}
printf( " file: %s\n", dent->d_name );
}
closedir( dirp );
return( rc );
}
int myftw( const char *dirname )
{
int dirfd = open( dirname, O_RDONLY | O_DIRECTORY );
if ( -1 == dirfd )
{
return( -1 );
}
int rc = myftwImp( dirfd );
return( rc );
}
int main( int argc, char **argv )
{
int rc = myftw( argv[ 1 ] );
return( rc );
}
I'm implementing parts of the Linux ls command in C. I want to sort the contents of directories lexicographically, which I've been doing using scandir(). This is easy enough for listing single directories, but I'm having trouble doing it for listing subdirectories recursively. My current code: (results in a segmentation faults once a directory type is reached)
void recursive(char* arg){
int i;
struct dirent **file_list;
int num;
char* next_dir;
num = scandir(arg, &file_list, NULL, alphasort);
for(i = 0; i < num; i++) {
if(file_list[i]->d_type == DT_DIR) {
if(strcmp(".", file_list[i]->d_name) != 0 && strcmp("..", file_list[i]->d_name) != 0) {
// Directories are printed with a colon to distinguish them from files
printf("%s: \n", file_list[i]->d_name);
strcpy(next_dir, arg);
strcat(next_dir, "/");
strcat(next_dir, file_list[i]->d_name);
printf("\n");
recursive(next_dir);
}
} else {
if(strcmp(".", file_list[i]->d_name) != 0 && strcmp("..", file_list[i]->d_name) != 0) {
printf("%s \n", file_list[i]->d_name);
}
}
}
}
int main(void) {
recursive(".");
return 0;
}
There are two recommended methods for traversing entire filesystem trees in Linux and other POSIXy systems:
nftw(): man 3 nftw
Given an initial path, a callback function, the maximum number of descriptors to use, and a set of flags, nftw() will call the callback function once for every filesystem object in the subtree. The order in which entries in the same directory is called is not specified, however.
This is the POSIX.1 (IEEE 1003) function.
fts_open()/fts_read()/fts_children()/fts_close(): man 3 fts
The fts interface provides a way to traverse filesystem hierarchies. The fts_children() provides a linked list of filesystem entries sorted by the comparison function specified in the fts_open() call. It is rather similar to how scandir() returns an array of filesystem entries, except that the two use very different structures to describe each filesystem entry.
Prior to glibc 2.23 (released in 2016), the Linux (glibc) fts implementation had bugs when using 64-bit file sizes (so on x86-64, or when compiling with -D_FILE_OFFSET_BITS=64).
These are BSD functions (FreeBSD/OpenBSD/macOS), but are available in Linux also.
Finally, there is also the atfile version of scandir(), scandirat(), that returns the filtered and sorted filesystem entries from a specific directory, but in addition to the pathname, it takes a file descriptor to the relative root directory to be used as a parameter. (If AT_FDCWD is used instead of a file descriptor, then scandirat() behaves like scandir().)
The simplest option here is to use nftw(), store all walked paths, and finally sort the paths. For example, walk.c:
// SPDX-License-Identifier: CC0-1.0
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
#include <stdlib.h>
#include <locale.h>
#include <ftw.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
struct entry {
/* Insert additional properties like 'off_t size' here. */
char *name; /* Always points to name part of pathname */
char pathname[]; /* Full path and name */
};
struct listing {
size_t max; /* Number of entries allocated for */
size_t num; /* Number of entries in the array */
struct entry **ent; /* Array of pointers, one per entry */
};
#define STRUCT_LISTING_INITIALIZER { 0, 0, NULL }
/* Locale-aware sort for arrays of struct entry pointers.
*/
static int entrysort(const void *ptr1, const void *ptr2)
{
const struct entry *ent1 = *(const struct entry **)ptr1;
const struct entry *ent2 = *(const struct entry **)ptr2;
return strcoll(ent1->pathname, ent2->pathname);
}
/* Global variable used by nftw_add() to add to the listing */
static struct listing *nftw_listing = NULL;
static int nftw_add(const char *pathname, const struct stat *info, int typeflag, struct FTW *ftwbuf)
{
const char *name = pathname + ftwbuf->base;
/* These generate no code, just silences the warnings about unused parameters. */
(void)info;
(void)typeflag;
/* Ignore "." and "..". */
if (name[0] == '.' && !name[1])
return 0;
if (name[0] == '.' && name[1] == '.' && !name[2])
return 0;
/* Make sure there is room for at least one more entry in the listing. */
if (nftw_listing->num >= nftw_listing->max) {
const size_t new_max = nftw_listing->num + 1000;
struct entry **new_ent;
new_ent = realloc(nftw_listing->ent, new_max * sizeof (struct entry *));
if (!new_ent)
return -ENOMEM;
nftw_listing->max = new_max;
nftw_listing->ent = new_ent;
}
const size_t pathnamelen = strlen(pathname);
struct entry *ent;
/* Allocate memory for this entry.
Remember to account for the name, and the end-of-string terminator, '\0', at end of name. */
ent = malloc(sizeof (struct entry) + pathnamelen + 1);
if (!ent)
return -ENOMEM;
/* Copy other filesystem entry properties to ent here; say 'ent->size = info->st_size;'. */
/* Copy pathname, including the end-of-string terminator, '\0'. */
memcpy(ent->pathname, pathname, pathnamelen + 1);
/* The name pointer is always to within the pathname. */
ent->name = ent->pathname + ftwbuf->base;
/* Append. */
nftw_listing->ent[nftw_listing->num++] = ent;
return 0;
}
/* Scan directory tree starting at path, adding the entries to struct listing.
Note: the listing must already have been properly initialized!
Returns 0 if success, nonzero if error; -1 if errno is set to indicate error.
*/
int scan_tree_sorted(struct listing *list, const char *path)
{
if (!list) {
errno = EINVAL;
return -1;
}
if (!path || !*path) {
errno = ENOENT;
return -1;
}
nftw_listing = list;
int result = nftw(path, nftw_add, 64, FTW_DEPTH);
nftw_listing = NULL;
if (result < 0) {
errno = -result;
return -1;
} else
if (result > 0) {
errno = 0;
return result;
}
if (list->num > 2)
qsort(list->ent, list->num, sizeof list->ent[0], entrysort);
return 0;
}
int main(int argc, char *argv[])
{
struct listing list = STRUCT_LISTING_INITIALIZER;
setlocale(LC_ALL, "");
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *arg0 = (argc > 0 && argv && argv[0] && argv[0][0]) ? argv[0] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", arg0);
fprintf(stderr, " %s .\n", arg0);
fprintf(stderr, " %s TREE [ TREE ... ]\n", arg0);
fprintf(stderr, "\n");
fprintf(stderr, "This program lists all files and directories starting at TREE,\n");
fprintf(stderr, "in sorted order.\n");
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
for (int arg = 1; arg < argc; arg++) {
if (scan_tree_sorted(&list, argv[arg])) {
fprintf(stderr, "%s: Error scanning directory tree: %s.\n", argv[arg], strerror(errno));
return EXIT_FAILURE;
}
}
printf("Found %zu entries:\n", list.num);
for (size_t i = 0; i < list.num; i++)
printf("\t%s\t(%s)\n", list.ent[i]->pathname, list.ent[i]->name);
return EXIT_SUCCESS;
}
Compile using gcc -Wall -Wextra -O2 walk.c -o walk, and run using e.g. ./walk ...
The scan_tree_sorted() function calls nftw() for the directory specified, updating the global variable nftw_listing so that the nftw_add() callback function can add each new directory entry to it. If the listing contains more that one entry afterwards, it is sorted using qsort() and a locale-aware comparison function (based on strcoll()).
nftw_add() skips . and .., and adds every other pathname to the listing structure nftw_listing. It automatically grows the array as needed in linear fashion; the new_max = nftw_listing->num + 1000; means we allocate in units of a thousand (pointers).
The scan_tree_sorted() can be called multiple times with the same listing as the target, if one wants to list disjoint subtrees in one listing. Note, however, that it does not check for duplicates, although those could easily be filtered out after the qsort.
So, basically, I have a program that forks off a child process and creates a directory with the name + the child processes ID. This is done in another part of the code.
So, lets say the user names the directory "TestDir#". It will be TestDir12342 or something similar in the end.
So, later on, the user could enter a search term for that directory by typing in TestDir#. I want to lope off the "#", and have chdir() search for a directory that begins with that name, "TestDir". I don't have to worry about repeat files or similarly named files for this program.
Does anyone know a simple way to do this with chdir()? I have tried many different test code, but I am at a lose.
I have also attempted to store the child process ID in the parent process, but for some reason I can never get them to match. I am aware that fork() gives the child process ID in return to the parent. Yet, for some reason, the program refuses to make them match.
So, I am trying this as a workaround (searching the beginning part of the file name). Thanks for any assistance if someone knows of a way to do this.
readdir can be used to get the entries of the directory.
The following searchFirstDir finds the first prefix-matched directory. (tested in Ubuntu Linux)
#include <stdio.h>
#include <unistd.h>
#include <dirent.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
int searchFirstDir(const char *workingDir, const char *prefix, char *resultBuffer, int bufferLen)
{
DIR *pDir = NULL;
int found = 0;
// opendir
{
pDir = opendir(workingDir);
if (pDir == NULL) {
perror("ERROR: opendir");
return -1;
}
}
// readdir
{
int ret;
struct dirent *pEntry;
struct dirent *result;
int prefixLen = strlen(prefix);
// refer: man readdir (in Linux)
{
long name_max = pathconf(workingDir, _PC_NAME_MAX);
if (name_max == -1) /* Limit not defined, or error */
name_max = 255; /* Take a guess */
size_t len = offsetof(struct dirent, d_name) + name_max + 1;
pEntry = malloc(len);
}
do {
ret = readdir_r(pDir, pEntry, &result);
if (ret) {
perror("ERROR: readdir_r");
break;
}
if (pEntry->d_type == DT_DIR && strncmp(pEntry->d_name, prefix, prefixLen) == 0) {
strncpy(resultBuffer, pEntry->d_name, bufferLen);
found++;
break;
}
} while(ret == 0 && result != NULL);
free(pEntry);
}
// closedir
closedir(pDir);
return found > 0 ? 0 : -1;
}
int main(int argc, char *argv)
{
char resultBuffer[255];
int ret = searchFirstDir("workingdirectory", "TestDir", resultBuffer, 255);
if (ret == 0) {
printf("First matched directory: %s\n", resultBuffer);
}
}
Yes, there is a way to perform the requested type of chdir taking advantage of globbing, i.e. filename expansion using a wildcard of "*", as follows:
#include <string.h>
#include <glob.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
/* Convert a wildcard pattern into a list of blank-separated
filenames which match the wildcard. */
char * glob_pattern(char *wildcard)
{
char *gfilename;
size_t cnt, length;
glob_t glob_results;
char **p;
glob(wildcard, GLOB_NOCHECK, 0, &glob_results);
/* How much space do we need? */
for (p = glob_results.gl_pathv, cnt = glob_results.gl_pathc;
cnt; p++, cnt--)
length += strlen(*p) + 1;
/* Allocate the space and generate the list. */
gfilename = (char *) calloc(length, sizeof(char));
for (p = glob_results.gl_pathv, cnt = glob_results.gl_pathc;
cnt; p++, cnt--)
{
strcat(gfilename, *p);
if (cnt > 1)
strcat(gfilename, " ");
}
globfree(&glob_results);
return gfilename;
}
int main() {
char *directory;
int ret;
directory = glob_pattern("te*");
ret = chdir (directory);
printf("Result of chdir: %d\n",ret);
}
Note: The "globbing" portion of the code comes from here
Linux has a glob utility so if you wish to do the same in C, you have to write the code yourself as this example portrays. When the program finishes however you will be back in the directory you originally used to run this script. When the code does a successful directory change, the return result is zero. Note, this code executed in a directory containing a subdirectory named "test".
Problem 1: what's the best data structure to save the directory structure?
Problem 2: I have tried to use a general tree to solve it, but there are a lot of problems:
The number of files under a directory is not certain. So the number of child nodes under a tree node is also not certain. and I try to add a keyword nchild to each node, showing nchild child nodes. so there are nchild pointers (saved with **child) to the child nodes. And once that, **child and *child should be dynamically allocated space with no certain child nodes. So you know, this is really difficult to release these spaces(and the program below is not called free()). Is there a better way to solve it?
And sometimes the program below would get the garbage characters when I output the directory tree, which make me really confused. while debugging it, found that is the function ent=readdir(pDir); has read garbage characters. But when I write another simple program to read the same directory, that goes well. I think the problem is the recursive function, but I didn't get any idea. I will be appreciated if some one can give me a idea. Thanks!
```
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#include <stdio.h>
typedef struct tree_file_s
{
char path[512];
time_t date;
char type;
long size;
int nchild;
struct tree_file_s **child;
} tree_file_t;
int dir_child_len(const char *dir)
{
int nchild = 0;
DIR *pDir;
struct dirent *ent;
pDir = opendir(dir);
while((ent=readdir(pDir)) != NULL)
{
if (strcmp(ent->d_name, ".")==0 || strcmp(ent->d_name, "..")==0)
{
continue;
}
nchild++;
}
return nchild;
}
void tree_create(tree_file_t *tft, const char *dir)
{
int nchild; // the tft has n child
DIR *pDir;
struct dirent *ent; // the directory dir dirent info
struct stat file_stat; // the new file's stat info
stat(dir, &file_stat);
nchild = dir_child_len(dir);
pDir = opendir(dir);
// Initialize the parent
//tft->path = calloc(1, strlen(dir)+1);
strcpy(tft->path, dir);
tft->date = file_stat.st_mtime;
tft->type = 'D';
tft->size = file_stat.st_size;
tft->nchild = nchild;
tft->child = calloc(1, nchild);
nchild = 0;
while ((ent=readdir(pDir)) != NULL)
{
if (ent->d_type & DT_DIR)
{
if (strcmp(ent->d_name, ".")==0 || strcmp(ent->d_name, "..")==0)
{
continue;
}
tree_file_t *new_dir = calloc(1, sizeof(tree_file_t));
tft->child[nchild] = new_dir;
char *new_path = calloc(1, strlen(dir)+strlen(ent->d_name)+1);
sprintf(new_path, "%s/%s", dir, ent->d_name);
tree_create(new_dir, new_path);
free(new_path);
} else {
tree_file_t *new_file = calloc(1, sizeof(tree_file_t));
char *new_path = calloc(1, strlen(dir)+strlen(ent->d_name)+1);
// new_file->path = calloc(1, strlen(dir)+strlen(ent->d_name)+1);
sprintf(new_path, "%s/%s", dir, ent->d_name);
stat(new_path, &file_stat);
strcpy(new_file->path, new_path);
free(new_path);
new_file->date = file_stat.st_mtime;
new_file->type = 'F';
new_file->size = file_stat.st_size;
new_file->nchild = 0;
new_file->child = 0;
tft->child[nchild] = new_file;
}
//free(new_path);
//new_path = 0;
nchild++;
}
}
void display_tree(tree_file_t *tft)
{
int nchild, i;
nchild = tft->nchild;
printf("%c: %s\n", tft->type, tft->path);
for(i = 0; i < nchild; i++)
{
if(tft->child[i]->type == 'F')
{
printf("%c: %s\n", tft->child[i]->type, tft->child[i]->path);
} else {
display_tree(tft->child[i]);
}
}
}
int main(int argc, const char *argv[])
{
if(argc != 2)
{
printf("Usage: a.out dir\n");
exit(0);
}
char dir[512];
strcpy(dir, argv[1]);
tree_file_t *tft = calloc(1, sizeof(tree_file_t));
tree_create(tft, dir);
display_tree(tft);
return 0;
}
```
When you allocate space for new_path you need to add 2 (one for the slash, one for the null terminator). And you never close the directories you open (use closedir()).
An even more serious error is this line:
tft->child = calloc(1, nchild);
which only allocates nchild bytes, not enough to hold nchild pointers! Try:
tft->child = calloc(nchild, sizeof(*tft->child));
Here i have one directory which has number of files.
I want to fill this files all information in one structure.
I have two structures which are following.
struct files {
char *file_name;
int file_size;
};
typedef struct file_header {
int file_count;
struct files file[variable as per number of files];
} metadata;
i want to make one header which contains all information regarding these files.
like if i have 3 files than i want to make this structure like this in file_count = 3 and how can i allocate second variable value? and want to store file name and file size as per file.
i want file structure like this
file_count = 3
file[0].file_name = "a.txt"
file[0].file_size = 1024
file[1].file_name = "b.txt"
file[1].file_size = 818
file[2].file_name = "c.txt"
file[2].file_size = 452
I have all logic about file name and file size but how can i fill these things in this structure.?
Code :
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
char path[1024] = "/home/test/main/Integration/testing/package_DIR";
//int count = 5;
struct files {
char *file_name;
int file_size;
};
typedef struct file_header {
int file_count;
struct files file[5];
} metadata;
metadata *create_header();
int main() {
FILE *file = fopen("/home/test/main/Integration/testing/file.txt", "w");
metadata *header;
header = create_header();
if(header != NULL)
{
printf("size of Header is %d\n",sizeof(metadata));
}
if (file != NULL) {
if (fwrite(&header, sizeof(metadata), 1, file) < 1) {
puts("short count on fwrite");
}
fclose(file);
}
file = fopen("/home/test/main/Integration/testing/file.txt", "rb");
if (file != NULL) {
metadata header = { 0 };
if (fread(&header, sizeof(header), 1, file) < 1) {
puts("short count on fread");
}
fclose(file);
printf("File Name = %s\n", header.file[0].file_name);
printf("File count = %d\n", header.file_count);
printf("File Size = %d\n", header.file[0].file_size);
}
return 0;
}
metadata *create_header()
{
int file_count = 0;
DIR * dirp;
struct dirent * entry;
dirp = opendir(path);
metadata *header = (metadata *)malloc(sizeof(metadata));
while ((entry = readdir(dirp)) != NULL) {
if (entry->d_type == DT_REG) { /* If the entry is a regular file */
header->file[file_count].file_name = (char *)malloc(sizeof(char)*strlen(entry->d_name));
strcpy(header->file[file_count].file_name,entry->d_name);
//Put static but i have logic for this i will apply later.
header->file[file_count].file_size = 10;
file_count++;
}
}
header->file_count = file_count;
closedir(dirp);
//printf("File Count : %d\n", file_count);
return header;
}
output :
size of Header is 88
ile Name = �~8
File count = 29205120
File Size = -586425488
Its shows different output. so whats problem here?
Among other things, you are using sizeof on a pointer variable, but seem to think that gives you the size of the object being pointed to. It doesn't. To do that, use the asterisk operator to make the expression have the type that the pointer points at:
printf("size of Header is %d\n", sizeof *metadata);
As a side note, notice that sizeof is not a function, so you don't need parenthesis. When you do see parenthesis, that's when they're part of the expression (a cast).
You are not leaving enough room for the null-terminator:
header->file[file_count].file_name = (char *)malloc(sizeof(char)*strlen(entry->d_name));