Access inode table to list all filenames - c

I would like to know the most efficient way to list the filenames on a Posix system. Doing either:
$ ls -R
Or:
$ find /
Or:
$ du /
Or 100 other variations (links abound on StackOverflow/ServerFault about various ways to do this). However, this is way too slow on the filesystem I am on, cifs -- for example, I have currently been running the ls -R for about two days (it takes about 50 hours to complete -- there are tons of files and directories on the system -- several petabytes worth).
So I am wondering if this can be done at a lower-level, hopefully in C. to list out the filenames from the inode database (example here). I don't need a recursive lookup of the entire path, but only the top-level name | filename -- and I would build out everything else manually. Is there a way to do this so that hopefully instead of taking ~50 hours to do an ls command with the billions of recursive lookups (yes, it does get cached after successive runs, but not most of it on the first run) can the inode database itself be dumped?
An an example, perhaps something like:
#filename,inode
myfile.mov,1234
myotherfile.csv,92033
But the main point here --and why I asked this question -- is speed not actually a command in order to do the above (such as $ ls -iR).

Here is a way to directly use getdents recursively. I will update timings of this shortly to compare it to ls and the other standard unix utils:
#define _GNU_SOURCE
#include <dirent.h>
#include <string.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#define handle_error(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
struct linux_dirent {
unsigned long d_ino;
off_t d_off;
unsigned short d_reclen;
char d_name[];
};
void print_files(char* dir, FILE* out)
{
// open the file
int fd = open(dir, O_RDONLY | O_DIRECTORY);
if (fd == -1) handle_error("Error opening file.\n");
// grab a buffer to read the file data
#define BUF_SIZE (1024*1024*1)
char* buffer = malloc(sizeof *buffer * BUF_SIZE);
if (buffer == NULL) handle_error("Error malloc.\n");
// do the getdents syscall writing to buffer
int num_read = syscall(SYS_getdents, fd, buffer, BUF_SIZE);
if (num_read == -1) handle_error("Error getdents syscall.\n");
close(fd);
for (long buffer_position = 0; buffer_position < num_read;) {
struct linux_dirent *d = (struct linux_dirent *) (buffer + buffer_position);
char d_type = *(buffer + buffer_position + d->d_reclen - 1);
// skip on . and .. in the listing
if (d->d_name[0] == '.') {
buffer_position += d->d_reclen;
continue;
}
// path = dir + '/' + name
char path[400];
strcpy(path, dir);
strcat(path, "/");
strcat(path, d->d_name);
// recursive call, as necessary
if (d_type == DT_DIR)
print_files(path, out);
else if (d_type == DT_REG)
fprintf(out, "%s\n", path);
// advance buffer position
buffer_position += d->d_reclen;
}
free(buffer);
}
int main(int argc, char *argv[])
{
char dir[1024];
strcpy(dir, argc > 1 ? argv[1] : ".");
FILE *out = fopen("c-log.txt", "w");
fprintf(out, "-------------[ START ]---------------------\n");
print_files(dir, out);
}

Related

Hashing program not returning identical values for the same file

This hashing function I've created (that scans for new files, and computes their hashes) seemingly functions, however once removing a file, for example test.c, and then replacing it with the exact same file it returns 2 different hash values. By this I mean that whilst the program is running the first calculation might return a hash of 1234, for example, and once deleting and placing the same file within the folder the it then returns 2345.
There seems to be no order, as 1234 could be the result 5 times in a row. I wondered whether there's any strikingly obvious reason in this code?
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <sys/inotify.h>
#include <openssl/sha.h>
int main (int argc, char *argv[])
{
int fd;
unsigned char c[SHA512_DIGEST_LENGTH];
int i;
SHA512_CTX mdContext;
int bytes;
unsigned char data[1024];
const int event_size = sizeof(struct inotify_event);
const int buf_len = 1024 * (event_size + FILENAME_MAX);
char *directory = "/home/joe/Documents/";
char *hashDirectory = "/home/joe/Documents/_Hash/";
char hashInBuf[100];
char hashOutBuf[100];
fd = inotify_init();
if (fd < 0) {
perror("inotify_init");
}
while (1) {
char buff[buf_len];
int no_of_events, count = 0;
//SEARCH FOR NEW FILES WITHIN DIRECTORY
no_of_events = read (fd, buff, buf_len);
while (count < no_of_events) {
struct inotify_event *event = (struct inotify_event *)&buff[count];
if (event->len) {
if ((event->mask & IN_CREATE))
if(!(event->mask & IN_ISDIR)) {
printf("\n%s has been created\n", event->name);
//CONJOIN DIRECTORY AND FILENAME / EXTENSION
snprintf(hashInBuf, sizeof(hashInBuf), "%s/%s", directory, event->name);
snprintf(hashOutBuf, sizeof(hashOutBuf), "%s/%s.txt", hashDirectory, event->name);
FILE *ftest=fopen(hashInBuf, "rb");
FILE *ftest2=fopen(hashOutBuf, "wt");
//HASH FUNCTION
SHA512_Init (&mdContext);
while ((bytes = fread (data, 1, 1024, ftest)) != 0)
SHA512_Update (&mdContext, data, bytes);
SHA512_Final (c,&mdContext);
for(i = 0; i < SHA512_DIGEST_LENGTH; i++){
fprintf(ftest2, "%02x", c[i]);
printf("%02x", c[i]);
}
fclose (ftest);
fclose (ftest2);
fflush (stdout);
}
}
count += event_size + event->len;
}
}
return 0;
}
Thank you in advance!
In this line
if ((event->mask & IN_CREATE))
you wait for the event that a file is created. Then, your hashing function immediately starts running!
This may lead to the situation that the file is not fully written yet, so you only hashed a part of the file.
You should use the event IN_CLOSE_WRITE to make sure, that the file has already been completely written.
Another option is to not create the files in this directory, but creating them in a temporary directory and subsequently moving them into the target directory. The corresponding event is IN_MOVED_TO then.

Is there a way to directly copy files from one folder to another without opening

I know this way of copying files, which I think is pretty much standard way of copying files in C.
#include <stdio.h>
#include <stdlib.h>
int main()
{
char ch, source_file[20], target_file[20];
FILE *source, *target;
printf("Enter name of file to copy\n");
gets(source_file);
source = fopen(source_file, "r");
if( source == NULL )
{
printf("Press any key to exit...\n");
exit(EXIT_FAILURE);
}
printf("Enter name of target file\n");
gets(target_file);
target = fopen(target_file, "w");
if( target == NULL )
{
fclose(source);
printf("Press any key to exit...\n");
exit(EXIT_FAILURE);
}
while( ( ch = fgetc(source) ) != EOF )
fputc(ch, target);
printf("File copied successfully.\n");
fclose(source);
fclose(target);
return 0;
But this way opens the file and copies line by line. The files I want to copy are HUGE and many. This way will take very VERY LONG. Is there a way I can achieve my goal of copying these files directly. I know terminal or command prompt are completely different things than C language, but a simple
cp sourcefile.txt destinationfile.txt
can do the trick.
Is there any such commands or tricks in C that I can use. I cannot use
system("cp sourcefile.txt destinationfile.txt");
command because I am writing a robust program that should work in Linux and windows.
Well, what do you imagine the cp command itself do for copying files? If opens source file in read mode, destination file is write mode and copy everything by binary chunks! Ok more things can be involved if you pass other options to cp, but the copy itself is not more magic than that.
That being said, what you do is not that. You are copying the file character by character. Even if the standard library does some buffering, you are repeatedly calling an function when it could be avoided. And... never use gets. It is deprecated for ages because it is unsecure. If the user enters looong file names (more than 19 characters) you get a buffer overflow. And do not forget to test all io functions including output ones. When writing a huge file on a external media such an USB key, you could get out of space on device, and you program would just say it could successfully do the copy.
Copying loop could be something like:
#define SIZE 16384
char buffer[SIZE];
int crin, crout = 0;
while ((crin = fread(buffer, 1, SIZE, source)) > 0) {
crout = fwrite(buffer, 1, crin, target);
if (crout != crin) { /* control everything could be written */
perror("Write error");
crout = -1;
break;
}
if (crin < 0) { /* test read error (removal of amovible media, ...) */
perror("Read error");
}
A low level optimization here would be to directly use posix functions instead of standard library ones, because as soon as you are using binary IO in big chunks, the buffering of the standard library gives no advantage, and you simply have its overhead.
This is how i have moved a file in the past without having to open it:
#include <stdio.h>
int main()
{
rename("C:\\oldFile.txt", "C:\\newfile.txt");
return 0;
}
One thing to be aware is that you're copying the slowest possible way, because you're doing it character by character. One improvement would be to copy full lines or bigger text chunks, using fgets and fputs
Even better is to not copy the file as a text file, but instead just as a binary chunk. This is achieved by opening the file in binary mode with the b flag, so e.g. target = fopen(target_file, "wb"); and using fread and fwrite instead of the put character functions.
In both scenarios you have to use a temporary buffer with a reasonable size (could be the size of the file or fixed). To determine the optimal size is not trivial.
Yet another way to copy, and according to my operating systems professor what cp does, is by using memory mapped files.
How to use memory mapped files is unfortunately not portable, but depending on your operating system i.e. platform. For unix the manpage of mmap is your friend. This is an example unix implementation by me:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#include <errno.h>
#include <time.h>
#include <string.h>
#include <sys/shm.h>
#include <signal.h>
#include <stdbool.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(int argc, const char * argv[]) {
if (argc != 3)
{
fprintf(stderr, "Usage %s <SourceFile> <DestinationFile>\n",argv[0]);
return EXIT_FAILURE;
}
int source_file_desc = open(argv[1], O_RDONLY);
if (source_file_desc == -1) {
perror("Can't open source file");
return EXIT_FAILURE;
}
struct stat source_info;
if (stat(argv[1], &source_info) != 0) {
perror("Can't get source file infos");
return EXIT_FAILURE;
}
void *source_mem = mmap(NULL, source_info.st_size, PROT_READ, MAP_FILE|MAP_PRIVATE, source_file_desc, 0);
if (source_mem == MAP_FAILED) {
perror("Mapping source file failed");
return EXIT_FAILURE;
}
int destination_file_desc = open(argv[2], O_TRUNC|O_CREAT|O_RDWR);
if (destination_file_desc == -1) {
perror("Can't open destination file");
}
if (chmod(argv[2], source_info.st_mode) != 0) {
perror("Can't copy file permissions");
}
if (lseek(destination_file_desc, source_info.st_size-1, SEEK_SET) == -1) {
perror("Can'T seek to new end of destination file");
}
unsigned char dummy = 0;
if (write(destination_file_desc, &dummy, 1) == -1)
{
perror("Couldn't write dummy byte");
}
void *destination_mem = mmap(NULL, source_info.st_size, PROT_WRITE,MAP_FILE|MAP_SHARED, destination_file_desc,0);
if (destination_mem == MAP_FAILED) {
perror("Mapping destination file failed");
}
memcpy(destination_mem, source_mem, source_info.st_size);
munmap(source_mem,source_info.st_size);
munmap(destination_mem, source_info.st_size);
close(source_file_desc);
close(destination_file_desc);
return EXIT_SUCCESS;
}
If it’s not a problem that any changes to one copy would affect the other, you can create a link to the file. How this works depends on the OS.
If you want to optimize a file copy as much as possible using only the standard library, here is what I suggest (untested):
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern bool copy_file( FILE* dest, FILE* restrict src );
static bool error_helper( const char* file, int line, const char* msg );
#if defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__X86__) || defined(__I86__) || defined(__INTEL__) || defined(__386)
# define PAGE_SIZE 4096U
#else
# error "Define the page size on your system, or use a system call such as sysconf() to find it."
#endif
#define non_fatal_stdlib_error() error_helper( __FILE__, __LINE__, strerror(errno) )
bool copy_file( FILE* dest, FILE* restrict src )
{
errno = 0;
if ( !(dest = freopen( NULL, "w+", dest )) )
return non_fatal_stdlib_error();
/* Try to help the library out by turning buffering off and allocating an aligned block; it might be able to detect that at runtime.
* On the other hand, the unbuffered implementation might be worse. */
setvbuf( src, NULL, _IONBF, BUFSIZ );
setvbuf( dest, NULL, _IONBF, BUFSIZ );
char* const buffer = aligned_alloc( PAGE_SIZE, PAGE_SIZE );
if (!buffer)
return non_fatal_stdlib_error();
size_t n = fread( buffer, 1, PAGE_SIZE, src );
while ( PAGE_SIZE == n ) {
const size_t written = fwrite( buffer, 1, PAGE_SIZE, dest );
if ( written != PAGE_SIZE )
return non_fatal_stdlib_error();
n = fread( buffer, 1, PAGE_SIZE, src );
} // end while
if (ferror(src))
return non_fatal_stdlib_error();
if ( n > 0 ) {
const size_t written = fwrite( buffer, 1, n, dest );
if ( written != n )
return non_fatal_stdlib_error();
}
return true;
}
bool error_helper( const char* file, int line, const char* msg )
{
fflush(stdout);
fprintf( stderr, "Error at %s, line %d: %s.\n", file, line, msg );
fflush(stderr);
return false;
}
This at least gives the library implementation a chance to detect that all reads and writes are single memory pages.

Applying fork() and pipe() (or fifo()) on counting words code

I've completed writing of counting words code finally. It counts total number of words in files. (i.e. txt). Now, I want to use multiple fork() to access and read every file. I studied in the last week. Besides, I use global variable to hold number of counted words. As far as I know, If I apply fork(), used global variables are assigned as 0. To avoid it, I tried to use mmap() and similar functions this is okey. But, I also want to use pipe() also (fifo() if it is possible) to communicate (hold values of numbers).
I use nftw() function to go in folders and files. My logic is on the below picture. How can use fork() and pipe() (fifo()) on this code ? fork() is really complicated for me because of my inexperience. I'm new using of pipe() and fork(). According to my idea logic of the code is that if I can use fork() and pipe(), there will be fork() every file(i.e. txt) and access them by using fork. If there is another folder and there are files, again creates fork() from one of created forks , then access file. I try to explain also drawing below. Thank you. I want to learn using of them.
int countInEveryFolder(const char *dir)
is used because I don't know how to count files until the next folder in nftw() function. Number of files is necessary because it is number of fork.
Every folder should be parent of files. The files are included by the folder.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <dirent.h>
#include <errno.h>
#include <ftw.h>
#include <ctype.h>
#include <sys/mman.h>
#include <locale.h>
#include <errno.h>
#define MAX_PATH_LEN 2048
unsigned long total_words = 0UL;
unsigned long total_dirs = 0UL;
unsigned long total_files = 0UL;
// Just proves counting number of file in a folder
int countInEveryFolder(const char *dir) {
struct stat stDirInfo;
struct dirent * stFiles;
DIR * stDirIn;
char szFullName[MAX_PATH_LEN];
char szDirectory[MAX_PATH_LEN];
struct stat stFileInfo;
int numOfFile = 0;
strncpy( szDirectory, dir, MAX_PATH_LEN - 1 );
if (lstat( szDirectory, &stDirInfo) < 0)
{
perror (szDirectory);
return 0;
}
if (!S_ISDIR(stDirInfo.st_mode))
return 0;
if ((stDirIn = opendir( szDirectory)) == NULL)
{
perror( szDirectory );
return 0;
}
while (( stFiles = readdir(stDirIn)) != NULL)
{
if (!strcmp(stFiles->d_name, ".") || !strcmp(stFiles->d_name, ".."))
continue;
sprintf(szFullName, "%s/%s", szDirectory, stFiles -> d_name );
if (lstat(szFullName, &stFileInfo) < 0)
perror ( szFullName );
/* is the file a directory? */
if (S_ISREG(stFileInfo.st_mode))
{
printf( "Filename: %s\n", szFullName );
numOfFile++;
}
} // end while
closedir(stDirIn);
return numOfFile;
}
// Count words in files.
unsigned long count_words_in_file(const char *const filename)
{
unsigned long count = 0UL;
int errnum = 0;
int c;
FILE *in;
in = fopen(filename, "rt");
if (in == NULL) {
errnum = errno;
fprintf(stderr, "%s: %s.\n", filename, strerror(errnum));
errno = errnum;
return 0UL;
}
/* Skip leading whitespace. */
do {
c = getc(in);
} while (isspace(c));
/* Token loop. */
while (c != EOF) {
/* This token is a word, if it starts with a letter. */
if (isalpha(c))
count++;
/* Skip the rest of this token. */
while (!isspace(c) && c != EOF)
c = getc(in);
/* Skip the trailing whitespace. */
while (isspace(c))
c = getc(in);
}
/* Paranoid checking for I/O errors. */
if (!feof(in) || ferror(in)) {
fclose(in);
fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
errnum = EIO;
} else
if (fclose(in)) {
fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
errnum = EIO;
}
errno = errnum;
return count;
}
// Recursively go in folders
int nftw_callback(const char *filepath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
{
// Directory
if (typeflag == FTW_DP || typeflag == FTW_D)
{
total_dirs++;
printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
//countInEveryFolder(filepath);
}
// Folder
else if (typeflag == FTW_F)
{
total_files++;
total_words += count_words_in_file(filepath);
printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
}
return 0;
}
/* Error message */
void err_sys(const char *msg)
{
perror(msg);
fflush(stdout);
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
total_files = total_dirs = total_words = 0UL;
if (nftw(argv[1], nftw_callback, 15, FTW_PHYS) == 0) {
/* Success! */
printf("%s: %lu files, %lu directories, %lu words total.\n",
argv[1], total_files, total_dirs, total_words);
} else {
/* Failed... */
err_sys("ntfw");
}
putchar('\n');
//printf( "\nTotal words = %d\n\n", *wordCount);
//printf( "\nTotal folders = %d\n\n", *folderCount);
//printf( "\nTotal childs = %d\n\n", *childCount); //fork()
return 0;
}
To start I would write the program with two phases. A single-process phase in which all the file-paths are queued up (into a linked-list or dequeue), and a multi-process phase in which the worker processes receive work via their pipe() and send counts back to the main process via their pipe(). The main process would use select() to multiplex the input from its children.
Once you understand how to use select() with pipe()s, then work on having the filepath discovery be concurrent.
This design would be much easier to implement in Go, node.js, or greenlet with Python, but learning how to do it in C gives you a level of understanding for the underlying operations that you don't get with newer languages.

Go code to list files in a Linux directory using getdents()

As an exercise, I wanted to translate some C code that used many syscalls into Golang. I found this nice code example on Unix & Linux StackExchange:
/*
* List directories using getdents() because ls, find and Python libraries
* use readdir() which is slower (but uses getdents() underneath.
*
* Compile with
* ]$ gcc getdents.c -o getdents
*/
#define _GNU_SOURCE
#include <dirent.h> /* Defines DT_* constants */
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
struct linux_dirent {
long d_ino;
off_t d_off;
unsigned short d_reclen;
char d_name[];
};
#define BUF_SIZE 1024*1024*5
int
main(int argc, char *argv[])
{
int fd, nread;
char buf[BUF_SIZE];
struct linux_dirent *d;
int bpos;
char d_type;
fd = open(argc > 1 ? argv[1] : ".", O_RDONLY | O_DIRECTORY);
if (fd == -1)
handle_error("open");
for ( ; ; ) {
nread = syscall(SYS_getdents, fd, buf, BUF_SIZE);
if (nread == -1)
handle_error("getdents");
if (nread == 0)
break;
for (bpos = 0; bpos < nread;) {
d = (struct linux_dirent *) (buf + bpos);
d_type = *(buf + bpos + d->d_reclen - 1);
if( d->d_ino != 0 && d_type == DT_REG ) {
printf("%s\n", (char *)d->d_name );
}
bpos += d->d_reclen;
}
}
exit(EXIT_SUCCESS);
}
I have only been able to code this using ioutil.ReadDir which defeats the purpose. Does anyone have an idea on how to translate this?
I would avoid using this code. As written, it's wrong: on 32-bit systems and maybe even some 64-bit ones, SYS_getdents is the legacy syscall that doesn't provide d_type and lacks support for 64-bit inode numbers, which means you get gratuitous errors on modern filesystems. Even if you fix it, it's no better than inlining readdir, which does basically exactly the same thing internally.

Infinite recursion while listing directories in linux

I try to write program where part of it is listing all directories (especially starting from /), but I have a problem with /proc/self which is infinitely recursive (I get /proc/self/task/4300/fd/3/proc/self/task/4300/fd/3/proc/self/task/4300/fd/3/proc/... and so on). What is nice way to deal with it?
EDIT: Program is written in C language and I use opendir(), readdir()
You can use the S_ISLNK macro to test the st_mode field returned by a call to lstat. If the file is a symbolic link, do not try to follow it.
[user#machine:~]:./list | grep link
/proc/mounts is a symbolic link
/proc/self is a symbolic link
Example code
#include <stdio.h> // For perror
#include <stdlib.h>
#include <sys/types.h> // For stat, opendir, readdir
#include <sys/stat.h> // For stat
#include <unistd.h> // For stat
#include <dirent.h> // For opendir, readdir
const char *prefix = "/proc";
int main(void)
{
DIR *dir;
struct dirent *entry;
int result;
struct stat status;
char path[PATH_MAX];
dir = opendir(prefix);
if (!dir)
{
perror("opendir");
exit(1);
}
entry = readdir(dir);
while (entry)
{
result = snprintf(path, sizeof(path), "%s", prefix);
snprintf(&path[result], sizeof(path) - result, "/%s", entry->d_name);
printf("%s", path);
result = lstat(path, &status);
if (-1 == result)
{
printf("\n");
perror("stat");
exit(2);
}
if (S_ISLNK(status.st_mode))
{
printf("%s", " is a symbolic link");
}
printf("\n");
entry = readdir(dir);
}
return(0);
}
From path_resolution(7):
Length limit
There is a maximum length for pathnames. If the pathname (or some intermediate pathname obtained while resolving symbolic links) is too long, an ENAMETOOLONG error
is returned ("File name too long").
I think you should employ similar behaviour: check for too long pathnames.
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
#include <sys/param.h>
/* Short & sweet recursive directory scan, finds regular files only.
Good starting point, should work on Linux OS.
Pass the root path, and returns number of dirs and number of files
found.
*/
char *tree_scan( const char *path, int *ndirs, int *nfiles){
DIR *dir;
struct dirent *entry;
char spath[MAXPATHLEN] = "";
if( !(dir = opendir( path))){ perror("opendir"); exit(1);}
for( entry = readdir( dir); entry; entry = readdir( dir)){
sprintf( spath, "%s/%s", path, entry->d_name);
if( entry->d_type == DT_REG){ (*nfiles)++; printf( "%s\n", spath);}
if( entry->d_type == DT_DIR &&
(strcmp( ".", entry->d_name)) &&
(strcmp( "..", entry->d_name))){
(*ndirs)++; tree_scan( spath, ndirs, nfiles);
}
}
closedir( dir);
return(0);
}
/* Call it like so */
int i = 0, l = 0;
tree_scan( "/path", &i, &l);
printf("Scanned %d directories, %d files.\n", i, l);
I don't have a *nix terminal handy, but you could always take a look at the source for ls.c and see how it's done.
The source as part of the gnu core utils can be found here.
I created a ls clone a few years ago in school, and I think I got around it by watching the pathname size as ulidtko mentioned.

Resources