I'm creating a program which recursively finds all #include dependencies between C files in a specified directory and its child directories. Dependency paths should be absolute, so I use realpath to resolve relative paths and symbolic links. Since there can be many files I have decided to make the program multithreaded with OpenMP or pthreads.
The problem is that realpath resolves paths through the working directory. All threads share the same working directory so I would need to put a mutex on chdir and realpath.
Is there any alternate standard function to realpath which also takes the directory to resolve the path from as an argument?
There are a number of POSIX functions with the at suffix (such as openat()) which work with a specified directory. There isn't, however, a realpathat() function in POSIX. There also isn't an opendirat(), but there is fdopendir() which creates a DIR stream for an open directory file descriptor.
In a multithreaded program, any use of chdir() is fraught.
You should rethink your algorithm to use the various *at() functions to avoid needing to change directory at all. You'd open the directories for reading (open() or openat() with O_DIRECTORY, perhaps — though O_DIRECTORY isn't 100% necessary, nor is it supported on macOS) so that you can then access the files appropriately using the directory file descriptor in the *at() calls.
I worked a bit on a solution. It is by no means optimal but at least it seems to work. I created the function abspathat which turns a relative path into an absolute path. Then I use the built in readlinkat to fix the symlinks. The solution handles turns paths like "../code.c" "./code.c" "code.c" into "/dir/code.c". However it does currently not fix paths such as ../dir/../code.c, though why would anyone create such a path. Nor does it check if the file actually exists. Feel free to improve or do whatever you like with this code.
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <dirent.h>
#include <stdio.h>
/*****************************************************************************/
char *abspathat(char *dirpath, int dirlen, char *path);
/*****************************************************************************/
static const int MAX_FILEPATH = 4096;
/*****************************************************************************/
char *realpathat(int dirfd, char *dirpath, int dirlen, char *path) {
char *abs = abspathat(dirpath, dirlen, path);
char *buf = malloc(sizeof(char)*MAX_FILEPATH);
ssize_t size = readlinkat(dirfd, abs, buf, MAX_FILEPATH);
char *realpath;
if(size != -1) {
realpath = malloc(sizeof(size+1));
memcpy(realpath, buf, size);
realpath[size] = '\0';
free(abs);
} else {
realpath = abs;
}
free(buf);
return realpath;
}
/*---------------------------------------------------------------------------*/
char *abspathat(char *dirpath, int dirlen, char *path) {
/* If absolute */
if(path[0] == '/') {
return path;
}
int i;
char *right;
int d = 0;
int rlen = strlen(path);
int llen = 0;
if(path[0] == '.') {
if(path[1] == '.' && path[2] == '/') {
for(i = 3, d = 1; path[i] == '.'
&& path[i+1] == '.'
&& path[i+2] == '/'
&& i < rlen; i+=3) {
d++;
}
right = &path[i];
rlen -= i;
} else if(path[1] == '/') {
right = &path[2];
rlen -= 2;
}
} else {
right = &path[0];
}
for(i = dirlen - 1 - (dirpath[dirlen-1] == '/'); d && i; i--) {
if(dirpath[i] == '/') {
d--;
}
}
llen = i+1;
char *cpy = malloc(sizeof(char)*(llen + rlen + 2));
memcpy(cpy, dirpath, llen);
cpy[llen] = '/';
memcpy(cpy+llen+1, right, rlen);
cpy[llen+rlen+1] = '\0';
return cpy;
}
/*---------------------------------------------------------------------------*/
int main(int argc, char *argv[]) {
if(argc == 3) {
char *dirpath = argv[1];
DIR *d = opendir(dirpath);
char *path = argv[2];
char *resolved = realpathat(dirfd(d), dirpath, strlen(dirpath), path);
printf("%s\n", resolved);
} else {
printf("realpathat [directory] [filepath]\n");
}
return 0;
}
Related
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 years ago.
Improve this question
I need to make program that compare content of all files in directory and detects duplicates. Program works fine until it starts comparing files inside the for loop. I think there might be error in array but I can't find any. Any help will be appreciated.
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
int listFiles(char *path, int a, char ***array);
int main() {
char path[100];
char **array = NULL;
int a = 0;
printf("Enter path to list files: ");
scanf("%s", path);
listFiles(path, a, &array);
return 0;
}
int listFiles(char *basePath, int a, char ***array) {
char path[1000], c, *d = NULL;
FILE *input_file;
struct dirent *dp;
DIR *dir = opendir(basePath);
if (!dir) {
return 1;
}
while ((dp = readdir(dir)) != NULL) {
if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) {
strcpy(path, basePath);
strcat(path, "\\");
strcat(path, dp->d_name);
input_file = fopen(path, "r");
if (input_file == NULL) {
fclose(input_file);
}
if (input_file) {
printf("%s\n", path);
while ((c = getc(input_file)) != EOF)
d = realloc(d, (x + 1) * sizeof(*d));
(d)[x++] = c;
}
*array = realloc(*array, (a + 1) * sizeof(**array));
(*array)[a++] = d;
if (a > 1) {
for (int j = 0; j < a - 1; ++j) {
if (!memcmp(array[a], array[j], 999)) {
printf("duplikat\n");
free(array[a]);
--a;
break;
}
}
}
}
listFiles(path, a, array);
}
closedir(dir);
return 0;
}
while ((c = getc(input_file)) != EOF)
d=realloc(d, (x+1)*sizeof(*d));
(d)[x++] = c;
Is equal to:
while ((c = getc(input_file)) != EOF) {
d = realloc(d, (x+1)*sizeof(*d));
}
(d)[x++] = c;
Which reads from a file until EOF and reallocates a d pointer with the same size for each character in the file. Then is assigns to the last element in d the value of EOF. So the content of the file is not saved in d pointer.
Always explicitly use { and } (except for cases when don't).
Check for overflow.
size_t pathsize = sizeof(path);
assert(pathsize > strlen(basePath) + 1 + strlen(dp->d_name) + 1);
strcpy(path, basePath);
strcat(path, "\\");
strcat(path, dp->d_name);
Do similar checks everytime you do something dangerous.
What's the point of closing NULL file?
if(input_file == NULL){
fclose(input_file);
}
if (input_file){
The number 999 is very magical.
memcmp(array[a],array[j],999)
You incorrectly handle arrays. Or don't free memory. I don't know.
*array = realloc(*array, (a+1)*sizeof(**array));
(*array)[a++] = d;
...
free(array[a]);
There is a little point in char *** variable. Don't use *** (unless in cases where you use them). The char***array can be completely removed.
Start with a good abstraction. First write a function that will compare two files bool files_compare(const char *file1, const char *file2);. Then write a function that will list all files in a directory. Then for each pair of files listed compare them. There is a little need to no need in storing the content of the file in dynamic memory (what if you have files that have 10Gb and a system with 1Gb of memory?).
The listFiles function is untimately is endlessly recursively calling itself if there is at least one file in the directory.
int listFiles(char *basePath, int a, char ***array)
{
...
DIR *dir = opendir(basePath);
...
while ((dp = readdir(dir)) != NULL){
...
listFiles(path,a,array);
}
}
This code has many errors. It reuses old memory, incorrectly handles arrays, has some strange magic numbers, leaks memory, does not close opened files, is not protected against overflow, and opens the directory recursively.
int main(int argc, char **argv)
{
char *dirFilename = "/home/sv5071184/sample";
DIR *directory = NULL;
directory = opendir (dirFilename);
if(directory == NULL)
return -1;
struct dirent *dirp;
while ((dirp = readdir (directory)) != NULL) {
if( ! (strcmp (dirp->d_name ,".done") == 0) )
{
printf ("%s\n", dirp->d_name);
}
}
if(closedir(directory) < 0)
return -1;
}
NOTE: i have updated the code . with this code am able to find all the files in a directory whereas i need to find only .done files
The function char *strstr(const char *haystack, const char *needle); give you the position of the substring 'needle' if exists. Then you just have to test if the substring is at the end of the file name.
EDIT:
If you want to keep your code as you did, in the while, change your if condition to:
strstr(dirp->d_name, ".done")!=NULL
Here is how you can find the extension using strrchr:
#include <string.h>
int main(int argc, char **argv)
{
char *dirFilename = "/home/sv5071184/sample";
DIR *directory = NULL;
directory = opendir (dirFilename);
if(directory == NULL)
return -1;
struct dirent *dirp;
while ((dirp = readdir (directory)) != NULL) {
char *dot = strrchr(dirp->d_name, '.'); /* Find last '.', if there is one */
if (dot && (strcmp(dot, ".done") == 0))
{
printf ("%s\n", dirp->d_name);
}
}
if(closedir(directory) < 0)
return -1;
}
Finding files that end in specific string (an extension) is very common thing to do with file names.
For the better or worse, under POSIX standard, file name is not separated into name and extension , and unfortunately C string library does not have a function that is like endsWith in Java or C#.
I think that you should write a simple utility function ends_with(char*, char*) that checks if one string ends with another and use it to check if ends_with(ent->d_name, ".done").
Hope this helps :)
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <dirent.h>
int main(int argc, char **argv)
{
char *dirFilename = "/home/sv5071184/sample";
DIR *directory = NULL;
directory = opendir (dirFilename);
if(directory == NULL)
return -1;
struct dirent *dirp;
while ((dirp = readdir (directory)) != NULL) {
if ( strstr(dirp->d_name , ".done" ))
{
printf( "found a .done file: %s\n", dirp->d_name );
}
}
if(closedir(directory) < 0)
return -1;
}
I have used this way and it is working.. thanks everyone
I am doing a recursive walk through directories to make changes to files. My change file function needs the full path of the file to be able to do stuff. However, what my program is doing right now is just getting the name of the current file or folder but not the full path.
My approach is that I would make a string and keeps appending names to it until I get the full path. However, because I'm doing recursion, I'm having troubles passing the string around to append more strings to it.
This is my code:
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <string.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/types.h>
#include <errno.h>
void recursiveWalk(const char *pathName, char *fullPath, int level) {
DIR *dir;
struct dirent *entry;
if (!(dir = opendir(pathName))) {
fprintf(stderr, "Could not open directory\n");
return;
}
if (!(entry = readdir(dir))) {
fprintf(stderr, "Could not read directory\n");
return;
}
do {
if (entry->d_type == DT_DIR) { // found subdirectory
char path[1024];
int len = snprintf(path, sizeof(path)-1, "%s/%s", pathName, entry->d_name); // get depth
path[len] = 0;
// skip hidden paths
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
continue;
}
fprintf(stdout, "%*s[%s]\n", level*2, "", entry->d_name);
// Append fullPath to entry->d_name here
recursiveWalk(path, fullPath, level + 1);
}
else { // files
fprintf(stdout, "%*s- %s\n", level*2, "", entry->d_name);
//changeFile(fullPath);
}
} while (entry = readdir(dir));
closedir(dir);
}
int main(int argn, char *argv[]) {
int level = 0;
recursiveWalk(".", "", level);
return 0;
}
Well there are a number of little problems in your code.
you never use nor change fullPath in recursiveWalk
your formats are weird : you use level*2 to limit the number of characters printed from an empty string
you compute the actual path only when you have found a directory, while you say you need it to change a file.
you add path[len] = 0 after a snprintf when snprintf guarantees that but buffer is null terminated
But apart from that, you correctly pass the path to the analyzed dir append to the path passed in initial call, but in pathName variable, and computed as path.
So a possible fix for your code would be :
fix the formats for printf
remove the unused fullPath parameter from recursiveWalk
allways compute path and use it in the file branch
comment out the unnecessary path[len] = '\0'
I also replaced while (entry = readdir(dir)); with while ((entry = readdir(dir))); to explicitely tell the compiler that I want to set entry and then test its value - and remove the warning
Possible code:
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <string.h>
#include <dirent.h>
#include <unistd.h>
#include <sys/types.h>
#include <errno.h>
void recursiveWalk(const char *pathName, int level) {
DIR *dir;
struct dirent *entry;
if (!(dir = opendir(pathName))) {
fprintf(stderr, "Could not open directory\n");
return;
}
if (!(entry = readdir(dir))) {
fprintf(stderr, "Could not read directory\n");
return;
}
do {
char path[1024];
int len = snprintf(path, sizeof(path)-1, "%s/%s", pathName, entry->d_name); // get depth
// path[len] = 0;
if (entry->d_type == DT_DIR) { // found subdirectory
// skip hidden paths
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) {
continue;
}
fprintf(stdout, "%s [%s] (%d)\n", pathName, entry->d_name, level);
// Append fullPath to entry->d_name here
recursiveWalk(path, level + 1);
}
else { // files
fprintf(stdout, "%s (%d)\n", path, level);
//changeFile(fullPath);
}
} while ((entry = readdir(dir)));
closedir(dir);
}
int main(int argn, char *argv[]) {
int level = 0;
recursiveWalk(".", level);
return 0;
}
Recursion is a succinct way to express things (especially walking directories), but actually you should normally avoid it in practice. If the directory tree is deep enough, it will crash your software.
Using a queue eliminates the need for recursion, and is generally an efficient way to traverse.
I'm including the code I use to process directory trees in a project...
static int on_dir(const char* const dir, struct duplicate** dp) {
bool r = opts.recurse;
DIR* d = opendir(dir);
if (!d)
return - 1;
struct dirent* de;
while ((de = readdir(d))) {
struct stat s;
size_t bs = strlen(dir) + strlen(de->d_name) + 2;
char b[bs];
const char* const a = strjoin(b, dir, de->d_name, '/');
if (lstat(a, &s)) {
print_error("unable to stat %s", d);
continue;
}
if (S_ISREG(s.st_mode))
if (on_file(a, &s, dp))
print_error("unable to process file %s/%s", dir, de->d_name);
}
if (!r) {
if (closedir(d))
on_fatal("unable to close directory %s", dir);
return 0;
}
rewinddir(d);
while ((de = readdir(d))) {
struct stat ds;
size_t bs = strlen(dir) + strlen(de->d_name) + 2;
char b[bs];
const char* const d = strjoin(b, dir, de->d_name, '/');
if (lstat(d, &ds)) {
print_error("unable to stat %s", d);
continue;
}
if (S_ISDIR(ds.st_mode)) {
const char* const dot = ".";
const char* const dotdot = "..";
if (!strcmp(dot, de->d_name) || !strcmp(dotdot, de->d_name))
continue;
struct path* p = path_create(strcpy(fmalloc(bs), d));
queue_add(&paths, &p->queue);
}
}
if (closedir(d))
print_error("unable to close directory %s", dir);
return 0;
}
and the code for strjoin
static inline char* strjoin(char* restrict const d, const char* restrict const a, const char* restrict const b, const char c) {
size_t na = strlen(a);
size_t nb = strlen(b);
memcpy(d, a, na);
d[na] = c;
memcpy(d + na + 1, b, nb);
d[na + nb + 1] = '\0';
return d;
}
I'm hoping this helps. Please feel free to use any of the code you find in the git repository.
I would like to get names of only *.txt files in given directory, sth like this:
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <dirent.h>
int main(int argc, char **argv)
{
char *dirFilename = "dir";
DIR *directory = NULL;
directory = opendir (dirFilename);
if(directory == NULL)
return -1;
struct dirent *ent;
while ((ent = readdir (directory)) != NULL)
{
if(ent->d_name.extension == "txt")
printf ("%s\n", ent->d_name);
}
if(closedir(directory) < 0)
return -1;
return 0;
}
How can I do this in pure unixs c?
Firstly, Unix has no notion of file extensions, so there's no extension member on struct dirent. Second, you can't compare strings with ==. You can use something like
bool has_txt_extension(char const *name)
{
size_t len = strlen(name);
return len > 4 && strcmp(name + len - 4, ".txt") == 0;
}
The > 4 part ensures that the filename .txt is not matched.
(Obtain bool from <stdbool.h>.)
You can use the glob() function call for that. More info using your favourite search engine, Linux man pages, or here.
#include <glob.h>
#include <stdio.h>
int main(int argc, char **argv) {
const char *pattern = "./*.txt";
glob_t pglob;
glob(pattern, GLOB_ERR, NULL, &pglob);
printf("Found %d matches\n", pglob.gl_pathc);
printf("First match: %s\n", pglob.gl_pathv[0]);
globfree(&pglob);
return 0;
}
Possibility:
while ((ent = readdir (directory)) != NULL)
{
const size_t len = strlen(ent->d_name);
if (len > 4 &&
ent->d_name[len - 4] == '.' &&
ent->d_name[len - 3] == 't' &&
ent->d_name[len - 2] == 'x' &&
ent->d_name[len - 1] == 't')
{
printf ("%s\n", ent->d_name);
}
}
You're almost there, you just need to check if the filename ends with .txt. One way to do that is to use strcmp, strcasecmp, or memcmp:
while ((ent = readdir (directory)) != NULL)
{
int len = strlen(ent->d_name);
if(len > 4 && memcmp(ent->d_name + len - 4, ".txt", 4) == 0) // only checks lowercase
{
// It's a .txt file - now check that it's a regular file
char filename[PATH_MAX];
snprintf(filename, sizeof(filename), "%s/%s", dirFilename, ent->d_name);
struct stat st;
if(stat(filename, &st) == 0 && S_ISREG(st.st_mode))
{
// It's a regular file - process it
}
}
}
It's a good idea to verify that it's a regular file (and not a directory or other type of special file) by calling stat(2) on the full file path and checking the st_mode field with the S_ISxxx macros. Note that the d_type member of the DIR struct returned by readdir isn't always supported, so it's not a good idea to rely on it.
Alternatively, instead of using opendir, readdir, and closedir, you can use the glob(3) function:
glob_t globbuf;
if(glob("/path/to/dir/*.txt", 0, NULL, &globbuf) == 0)
{
int i;
for(i = 0; i < globbuf.gl_pathc; i++)
process_filename(globbuf.gl_pathv[i]);
}
globfree(&globbuf);
#BartFriedrich has points out the glob() function, however he didn't give an example of it's use. Very briefly (and wholly untested) you might try something like this
#include <glob.h>
#include <stdio.h>
void glob_example() {
glob_t g;
int i;
glob("*.txt", 0, NULL, &g);
for (i = 0; i < g.gl_pathc)
printf("matched: %s\n", g.pathv[i]);
globfree(&g)
}
glob() is actually a fairly complicated function in detail, and for more general file matching requirements I probably wouldn't use it, but it does handle your problem effectively. For more information, check out man glob on your linux machine or look at the man page online.
You could write a endswith function:
int endswith (const char *name, const char *suffix)
Just do a reverse-loop (start from the end) throught the suffix and check if each char is the same.
Is there a way to list all subdirectories in a given directory path in C? I was hoping I would be able to do it with the stat() function but it only works on files.
stat works on directories too.
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <unistd.h>
int num_dirs(const char* path)
{
int dir_count = 0;
struct dirent* dent;
DIR* srcdir = opendir(path);
if (srcdir == NULL)
{
perror("opendir");
return -1;
}
while((dent = readdir(srcdir)) != NULL)
{
struct stat st;
if(strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0)
continue;
if (fstatat(dirfd(srcdir), dent->d_name, &st, 0) < 0)
{
perror(dent->d_name);
continue;
}
if (S_ISDIR(st.st_mode)) dir_count++;
}
closedir(srcdir);
return dir_count;
}
You want readdir(3).
/*
I had need in something like this not so long ago (my difference is I
needed recursive scan) so I added only some comments... Sorry for recursion
but I was short of time and this was only part of internal one-time tool.
*/
/* Print all the dirs starting from <path> [maybe recursive]. */
int print_dirs(const char *path, int recursive)
{
struct dirent *direntp = NULL;
DIR *dirp = NULL;
size_t path_len;
/* Check input parameters. */
if (!path)
return -1;
path_len = strlen(path);
if (!path || !path_len || (path_len > _POSIX_PATH_MAX))
return -1;
/* Open directory */
dirp = opendir(path);
if (dirp == NULL)
return -1;
while ((direntp = readdir(dirp)) != NULL)
{
/* For every directory entry... */
struct stat fstat;
char full_name[_POSIX_PATH_MAX + 1];
/* Calculate full name, check we are in file length limts */
if ((path_len + strlen(direntp->d_name) + 1) > _POSIX_PATH_MAX)
continue;
strcpy(full_name, path);
if (full_name[path_len - 1] != '/')
strcat(full_name, "/");
strcat(full_name, direntp->d_name);
/* Ignore special directories. */
if ((strcmp(direntp->d_name, ".") == 0) ||
(strcmp(direntp->d_name, "..") == 0))
continue;
/* Print only if it is really directory. */
if (stat(full_name, &fstat) < 0)
continue;
if (S_ISDIR(fstat.st_mode))
{
printf("%s\n", full_name);
if (recursive)
print_dirs(full_name, 1);
}
}
/* Finalize resources. */
(void)closedir(dirp);
return 0;
}
/* We are taking first argument as initial path name. */
int main(int argc, const char* argv[])
{
if (argc < 2)
return -1;
print_dirs(argv[1], 1);
return 0;
}
As others have noted, stat(2) works fine on files and devices of all types. It reads through symbolic links to the file at the far end; if you need the information about the symbolic link itself, use lstat(2).
To list the names of all directories within a single directory (non-recursively), use a combination of the readdir(3) family of functions.
To list the names of all directories recursively, use the ftw(3) or nftw(3) functions to do a 'file tree walk' (from whence cometh their names; 'n' is for 'new').