Related
I would like to know the most efficient way to list the filenames on a Posix system. Doing either:
$ ls -R
Or:
$ find /
Or:
$ du /
Or 100 other variations (links abound on StackOverflow/ServerFault about various ways to do this). However, this is way too slow on the filesystem I am on, cifs -- for example, I have currently been running the ls -R for about two days (it takes about 50 hours to complete -- there are tons of files and directories on the system -- several petabytes worth).
So I am wondering if this can be done at a lower-level, hopefully in C. to list out the filenames from the inode database (example here). I don't need a recursive lookup of the entire path, but only the top-level name | filename -- and I would build out everything else manually. Is there a way to do this so that hopefully instead of taking ~50 hours to do an ls command with the billions of recursive lookups (yes, it does get cached after successive runs, but not most of it on the first run) can the inode database itself be dumped?
An an example, perhaps something like:
#filename,inode
myfile.mov,1234
myotherfile.csv,92033
But the main point here --and why I asked this question -- is speed not actually a command in order to do the above (such as $ ls -iR).
Here is a way to directly use getdents recursively. I will update timings of this shortly to compare it to ls and the other standard unix utils:
#define _GNU_SOURCE
#include <dirent.h>
#include <string.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#define handle_error(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
struct linux_dirent {
unsigned long d_ino;
off_t d_off;
unsigned short d_reclen;
char d_name[];
};
void print_files(char* dir, FILE* out)
{
// open the file
int fd = open(dir, O_RDONLY | O_DIRECTORY);
if (fd == -1) handle_error("Error opening file.\n");
// grab a buffer to read the file data
#define BUF_SIZE (1024*1024*1)
char* buffer = malloc(sizeof *buffer * BUF_SIZE);
if (buffer == NULL) handle_error("Error malloc.\n");
// do the getdents syscall writing to buffer
int num_read = syscall(SYS_getdents, fd, buffer, BUF_SIZE);
if (num_read == -1) handle_error("Error getdents syscall.\n");
close(fd);
for (long buffer_position = 0; buffer_position < num_read;) {
struct linux_dirent *d = (struct linux_dirent *) (buffer + buffer_position);
char d_type = *(buffer + buffer_position + d->d_reclen - 1);
// skip on . and .. in the listing
if (d->d_name[0] == '.') {
buffer_position += d->d_reclen;
continue;
}
// path = dir + '/' + name
char path[400];
strcpy(path, dir);
strcat(path, "/");
strcat(path, d->d_name);
// recursive call, as necessary
if (d_type == DT_DIR)
print_files(path, out);
else if (d_type == DT_REG)
fprintf(out, "%s\n", path);
// advance buffer position
buffer_position += d->d_reclen;
}
free(buffer);
}
int main(int argc, char *argv[])
{
char dir[1024];
strcpy(dir, argc > 1 ? argv[1] : ".");
FILE *out = fopen("c-log.txt", "w");
fprintf(out, "-------------[ START ]---------------------\n");
print_files(dir, out);
}
I want to open an image, and in Windows I do:
#include <windows.h>
..
ShellExecute(NULL, "open", "https://gsamaras.files.wordpress.com/2018/11/chronosgod.png", NULL, NULL, SW_SHOWNORMAL);
I would like to use a Linux approach, where it's so much easier to run something on the fly. Example:
char s[100];
snprintf(s, sizeof s, "%s %s", "xdg-open", "https://gsamaras.files.wordpress.com/2018/11/chronosgod.png");
system(s);
In my Ubuntu, it works. However, when running that in Wandbox (Live Demo), or in any other online compiler, I would most likely get an error:
sh: 1: xdg-open: not found
despite the fact that these online compilers seem to live in Linux (checked). I don't expect the online compiler to open a browser for me, but I did expect the code to run without an error. Ah, and forget Mac (personal laptop, limiting my machines).
Since I have no other Linux machine to check, my question is: Can I expect that this code will work in most of the major Linux distributions?
Maybe the fact that it failed on online compilers is misleading.
PS: This is for my post on God of Time, so no worries about security.
Although Antti Haapala already completely answered the question, I thought some comments about the approach, and an example function making safe use trivial, might be useful.
xdg-open is part of desktop integration utilities from freedesktop.org, as part of the Portland project. One can expect them to be available on any computer running a desktop environment participating in freedesktop.org. This includes GNOME, KDE, and Xfce.
Simply put, this is the recommended way of opening a resource (be it a file or URL) when a desktop environment is in use, in whatever application the user prefers.
If there is no desktop environment in use, then there is no reason to expect xdg-open to be available either.
For Linux, I would suggest using a dedicated function, perhaps along the following lines. First, a couple of internal helper functions:
#define _POSIX_C_SOURCE 200809L
#define _GNU_SOURCE
//
// SPDX-License-Identifier: CC0-1.0
//
#include <stdlib.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <dirent.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
/* Number of bits in an unsigned long. */
#define ULONG_BITS (CHAR_BIT * sizeof (unsigned long))
/* Helper function to open /dev/null to a specific descriptor.
*/
static inline int devnullfd(const int fd)
{
int tempfd;
/* Sanity check. */
if (fd == -1)
return errno = EINVAL;
do {
tempfd = open("/dev/null", O_RDWR | O_NOCTTY);
} while (tempfd == -1 && errno == EINTR);
if (tempfd == -1)
return errno;
if (tempfd != fd) {
if (dup2(tempfd, fd) == -1) {
const int saved_errno = errno;
close(tempfd);
return errno = saved_errno;
}
if (close(tempfd) == -1)
return errno;
}
return 0;
}
/* Helper function to close all except small descriptors
specified in the mask. For obvious reasons, this is not
thread safe, and is only intended to be used in recently
forked child processes. */
static void closeall(const unsigned long mask)
{
DIR *dir;
struct dirent *ent;
int dfd;
dir = opendir("/proc/self/fd/");
if (!dir) {
/* Cannot list open descriptors. Just try and close all. */
const long fd_max = sysconf(_SC_OPEN_MAX);
long fd;
for (fd = 0; fd < ULONG_BITS; fd++)
if (!(mask & (1uL << fd)))
close(fd);
for (fd = ULONG_BITS; fd <= fd_max; fd++)
close(fd);
return;
}
dfd = dirfd(dir);
while ((ent = readdir(dir)))
if (ent->d_name[0] >= '0' && ent->d_name[0] <= '9') {
const char *p = &ent->d_name[1];
int fd = ent->d_name[0] - '0';
while (*p >= '0' && *p <= '9')
fd = (10 * fd) + *(p++) - '0';
if (*p)
continue;
if (fd == dfd)
continue;
if (fd < ULONG_MAX && (mask & (1uL << fd)))
continue;
close(fd);
}
closedir(dir);
}
closeall(0) tries hard to close all open file descriptors, and devnullfd(fd) tries to open fd to /dev/null. These are used to make sure that even if the user spoofs xdg-open, no file descriptors are leaked; only the file name or URL is passed.
On non-Linux POSIXy systems, you can replace them with something more suitable. On BSDs, use closefrom(), and handle the first ULONG_MAX descriptors in a loop.
The xdg_open(file-or-url) function itself is something along the lines of
/* Launch the user-preferred application to open a file or URL.
Returns 0 if success, an errno error code otherwise.
*/
int xdg_open(const char *file_or_url)
{
pid_t child, p;
int status;
/* Sanity check. */
if (!file_or_url || !*file_or_url)
return errno = EINVAL;
/* Fork the child process. */
child = fork();
if (child == -1)
return errno;
else
if (!child) {
/* Child process. */
uid_t uid = getuid(); /* Real, not effective, user. */
gid_t gid = getgid(); /* Real, not effective, group. */
/* Close all open file descriptors. */
closeall(0);
/* Redirect standard streams, if possible. */
devnullfd(STDIN_FILENO);
devnullfd(STDOUT_FILENO);
devnullfd(STDERR_FILENO);
/* Drop elevated privileges, if any. */
if (setresgid(gid, gid, gid) == -1 ||
setresuid(uid, uid, uid) == -1)
_Exit(98);
/* Have the child process execute in a new process group. */
setsid();
/* Execute xdg-open. */
execlp("xdg-open", "xdg-open", file_or_url, (char *)0);
/* Failed. xdg-open uses 0-5, we return 99. */
_Exit(99);
}
/* Reap the child. */
do {
status = 0;
p = waitpid(child, &status, 0);
} while (p == -1 && errno == EINTR);
if (p == -1)
return errno;
if (!WIFEXITED(status)) {
/* Killed by a signal. Best we can do is I/O error, I think. */
return errno = EIO;
}
switch (WEXITSTATUS(status)) {
case 0: /* No error. */
return errno = 0; /* It is unusual, but robust to explicitly clear errno. */
case 1: /* Error in command line syntax. */
return errno = EINVAL; /* Invalid argument */
case 2: /* File does not exist. */
return errno = ENOENT; /* No such file or directory */
case 3: /* A required tool could not be found. */
return errno = ENOSYS; /* Not implemented */
case 4: /* Action failed. */
return errno = EPROTO; /* Protocol error */
case 98: /* Identity shenanigans. */
return errno = EACCES; /* Permission denied */
case 99: /* xdg-open does not exist. */
return errno = ENOPKG; /* Package not installed */
default:
/* None of the other values should occur. */
return errno = ENOSYS; /* Not implemented */
}
}
As already mentioned, it tries hard to close all open file descriptors, redirects the standard streams to /dev/null, ensures the effective and real identity matches (in case this is used in a setuid binary), and passes success/failure using the child process exit status.
The setresuid() and setresgid() calls are only available on OSes that have saved user and group ids. On others, use seteuid(uid) and setegid() instead.
This implementation tries to balance user configurability with security. Users can set the PATH so that their favourite xdg-open gets executed, but the function tries to ensure that no sensitive information or privileges are leaked to that process.
(Environment variables could be filtered, but they should not contain sensitive information in the first place, and we don't really know which ones a desktop environment uses. So better not mess with them, to keep user surprises to a minimum.)
As a minimal test main(), try the following:
int main(int argc, char *argv[])
{
int arg, status;
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s FILE-OR-URL ...\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This example program opens each specified file or URL\n");
fprintf(stderr, "xdg-open(1), and outputs success or failure for each.\n");
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
status = EXIT_SUCCESS;
for (arg = 1; arg < argc; arg++)
if (xdg_open(argv[arg])) {
printf("%s: %s.\n", argv[arg], strerror(errno));
status = EXIT_FAILURE;
} else
printf("%s: Opened.\n", argv[arg]);
return status;
}
As the SPDX license identifier states, this example code is licensed under Creative Commons Zero 1.0. Use it any way you wish, in any code you want.
The xdg-open is part of the xdg-utils. They're almost always installed with the GUI desktop of any Linux distribution.
A Linux distribution can be installed without any Graphical User Interface, on servers say, and most probably then they would lack xdg-open.
Instead of system, you could - and should - use fork + exec - if exec fails then xdg-open could not be executed.
The online compilers most probably don't have any Desktop GUI installed on them, thus the lack of that utility.
I know this way of copying files, which I think is pretty much standard way of copying files in C.
#include <stdio.h>
#include <stdlib.h>
int main()
{
char ch, source_file[20], target_file[20];
FILE *source, *target;
printf("Enter name of file to copy\n");
gets(source_file);
source = fopen(source_file, "r");
if( source == NULL )
{
printf("Press any key to exit...\n");
exit(EXIT_FAILURE);
}
printf("Enter name of target file\n");
gets(target_file);
target = fopen(target_file, "w");
if( target == NULL )
{
fclose(source);
printf("Press any key to exit...\n");
exit(EXIT_FAILURE);
}
while( ( ch = fgetc(source) ) != EOF )
fputc(ch, target);
printf("File copied successfully.\n");
fclose(source);
fclose(target);
return 0;
But this way opens the file and copies line by line. The files I want to copy are HUGE and many. This way will take very VERY LONG. Is there a way I can achieve my goal of copying these files directly. I know terminal or command prompt are completely different things than C language, but a simple
cp sourcefile.txt destinationfile.txt
can do the trick.
Is there any such commands or tricks in C that I can use. I cannot use
system("cp sourcefile.txt destinationfile.txt");
command because I am writing a robust program that should work in Linux and windows.
Well, what do you imagine the cp command itself do for copying files? If opens source file in read mode, destination file is write mode and copy everything by binary chunks! Ok more things can be involved if you pass other options to cp, but the copy itself is not more magic than that.
That being said, what you do is not that. You are copying the file character by character. Even if the standard library does some buffering, you are repeatedly calling an function when it could be avoided. And... never use gets. It is deprecated for ages because it is unsecure. If the user enters looong file names (more than 19 characters) you get a buffer overflow. And do not forget to test all io functions including output ones. When writing a huge file on a external media such an USB key, you could get out of space on device, and you program would just say it could successfully do the copy.
Copying loop could be something like:
#define SIZE 16384
char buffer[SIZE];
int crin, crout = 0;
while ((crin = fread(buffer, 1, SIZE, source)) > 0) {
crout = fwrite(buffer, 1, crin, target);
if (crout != crin) { /* control everything could be written */
perror("Write error");
crout = -1;
break;
}
if (crin < 0) { /* test read error (removal of amovible media, ...) */
perror("Read error");
}
A low level optimization here would be to directly use posix functions instead of standard library ones, because as soon as you are using binary IO in big chunks, the buffering of the standard library gives no advantage, and you simply have its overhead.
This is how i have moved a file in the past without having to open it:
#include <stdio.h>
int main()
{
rename("C:\\oldFile.txt", "C:\\newfile.txt");
return 0;
}
One thing to be aware is that you're copying the slowest possible way, because you're doing it character by character. One improvement would be to copy full lines or bigger text chunks, using fgets and fputs
Even better is to not copy the file as a text file, but instead just as a binary chunk. This is achieved by opening the file in binary mode with the b flag, so e.g. target = fopen(target_file, "wb"); and using fread and fwrite instead of the put character functions.
In both scenarios you have to use a temporary buffer with a reasonable size (could be the size of the file or fixed). To determine the optimal size is not trivial.
Yet another way to copy, and according to my operating systems professor what cp does, is by using memory mapped files.
How to use memory mapped files is unfortunately not portable, but depending on your operating system i.e. platform. For unix the manpage of mmap is your friend. This is an example unix implementation by me:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#include <errno.h>
#include <time.h>
#include <string.h>
#include <sys/shm.h>
#include <signal.h>
#include <stdbool.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(int argc, const char * argv[]) {
if (argc != 3)
{
fprintf(stderr, "Usage %s <SourceFile> <DestinationFile>\n",argv[0]);
return EXIT_FAILURE;
}
int source_file_desc = open(argv[1], O_RDONLY);
if (source_file_desc == -1) {
perror("Can't open source file");
return EXIT_FAILURE;
}
struct stat source_info;
if (stat(argv[1], &source_info) != 0) {
perror("Can't get source file infos");
return EXIT_FAILURE;
}
void *source_mem = mmap(NULL, source_info.st_size, PROT_READ, MAP_FILE|MAP_PRIVATE, source_file_desc, 0);
if (source_mem == MAP_FAILED) {
perror("Mapping source file failed");
return EXIT_FAILURE;
}
int destination_file_desc = open(argv[2], O_TRUNC|O_CREAT|O_RDWR);
if (destination_file_desc == -1) {
perror("Can't open destination file");
}
if (chmod(argv[2], source_info.st_mode) != 0) {
perror("Can't copy file permissions");
}
if (lseek(destination_file_desc, source_info.st_size-1, SEEK_SET) == -1) {
perror("Can'T seek to new end of destination file");
}
unsigned char dummy = 0;
if (write(destination_file_desc, &dummy, 1) == -1)
{
perror("Couldn't write dummy byte");
}
void *destination_mem = mmap(NULL, source_info.st_size, PROT_WRITE,MAP_FILE|MAP_SHARED, destination_file_desc,0);
if (destination_mem == MAP_FAILED) {
perror("Mapping destination file failed");
}
memcpy(destination_mem, source_mem, source_info.st_size);
munmap(source_mem,source_info.st_size);
munmap(destination_mem, source_info.st_size);
close(source_file_desc);
close(destination_file_desc);
return EXIT_SUCCESS;
}
If it’s not a problem that any changes to one copy would affect the other, you can create a link to the file. How this works depends on the OS.
If you want to optimize a file copy as much as possible using only the standard library, here is what I suggest (untested):
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern bool copy_file( FILE* dest, FILE* restrict src );
static bool error_helper( const char* file, int line, const char* msg );
#if defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__X86__) || defined(__I86__) || defined(__INTEL__) || defined(__386)
# define PAGE_SIZE 4096U
#else
# error "Define the page size on your system, or use a system call such as sysconf() to find it."
#endif
#define non_fatal_stdlib_error() error_helper( __FILE__, __LINE__, strerror(errno) )
bool copy_file( FILE* dest, FILE* restrict src )
{
errno = 0;
if ( !(dest = freopen( NULL, "w+", dest )) )
return non_fatal_stdlib_error();
/* Try to help the library out by turning buffering off and allocating an aligned block; it might be able to detect that at runtime.
* On the other hand, the unbuffered implementation might be worse. */
setvbuf( src, NULL, _IONBF, BUFSIZ );
setvbuf( dest, NULL, _IONBF, BUFSIZ );
char* const buffer = aligned_alloc( PAGE_SIZE, PAGE_SIZE );
if (!buffer)
return non_fatal_stdlib_error();
size_t n = fread( buffer, 1, PAGE_SIZE, src );
while ( PAGE_SIZE == n ) {
const size_t written = fwrite( buffer, 1, PAGE_SIZE, dest );
if ( written != PAGE_SIZE )
return non_fatal_stdlib_error();
n = fread( buffer, 1, PAGE_SIZE, src );
} // end while
if (ferror(src))
return non_fatal_stdlib_error();
if ( n > 0 ) {
const size_t written = fwrite( buffer, 1, n, dest );
if ( written != n )
return non_fatal_stdlib_error();
}
return true;
}
bool error_helper( const char* file, int line, const char* msg )
{
fflush(stdout);
fprintf( stderr, "Error at %s, line %d: %s.\n", file, line, msg );
fflush(stderr);
return false;
}
This at least gives the library implementation a chance to detect that all reads and writes are single memory pages.
I'm writing a program in c that basically copies files, but I'm getting this error: Segmentation fault (core dumped). From what I'm reading I think it's because I'm trying to access memory that hasn't been allocated yet. I'm a newbie when it comes to c and I suck at pointers, so I was wondering if you guys could tell me which pointer is causing this and how to fix it if possible. Btw, this program is supposed to be a daemon, but I haven't put anything inside the infinite while loop at the bottom.
Here is my code:
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <syslog.h>
#include <string.h>
#include <dirent.h>
int main(int c, char *argv[]) {
char *source, *destination;
char *list1[30], *list2[30], *listDif[30];
unsigned char buffer[4096];
int i=0, x=0, sizeSource=0, sizeDest=0, sizeDif=0;
int outft, inft,fileread;
int sleeper;
struct dirent *ent, *ent1;
//Check number of arguments
if(c<3)
{
printf("Daemon wrongly called\n");
printf("How to use: <daemon name> <orginDirectory> <destinationDirectory> \n");
printf("or : <daemon name> <orginDirectory> <destinationDirectory> <sleeperTime(seconds)>");
return 0;
}
//Checks if sleeper time is given or will be the default 5minutes
/*if(c=4)
{
char *p;
errno = 0;
long conv = strtol(argv[3], &p, 10);
if(errno != 0 || *p != '\0')
{
printf("Number given for sleeper incorrect, it has to be an integer value.\n");
return(0);
} else
{
sleeper = conv;
}
} else
{
sleeper = 300;
}*/
//Get path of directories from arguments
source = argv[1];
destination = argv[2];
//Check if directories exist
DIR* dirSource = opendir(source);
if (!dirSource)
{
printf("Source directory incorrect\n");
return 0;
}
DIR* dirDest = opendir(destination);
if (!dirDest)
{
printf("Destination directory incorrect\n");
return 0;
}
/* save all the files and directories within directory */
while ((ent = readdir (dirSource)) != NULL) {
list1[sizeSource] = strdup(ent->d_name);
sizeSource++;
if(sizeSource>=30){break;}
}
closedir(dirSource);
while((ent1 = readdir (dirDest)) != NULL) {
list2[sizeDest] = strdup(ent1->d_name);
sizeDest++;
if(sizeDest>=30){break;}
}
closedir(dirDest);
/* Verify the diferences between the directories and save them */
int z;
int dif = 0; //0 - False | 1 - True
printf("Diferenças:\n");
for(i=0;i<sizeSource;i++){
dif = 0;
for(z=0;z<sizeDest;z++){
if(strcmp(list1[i],list2[z])==0){ //If there is no match, it saves the name of the file to listDif[]
dif = 1;
break;
}
}
if(dif==0) {
printf("%s\n",list1[i]);
listDif[sizeDif] = list1[i];
sizeDif++;
}
}
/* This code will copy the files */
z=0;
while(z!=sizeDif){
// output file opened or created
char *pathSource, *pathDest;
strcpy(pathSource, source);
strcat(pathSource, "/");
strcat(pathSource, listDif[z]);
strcpy(pathDest, destination);
strcat(pathDest, "/");
strcat(pathDest, listDif[z]);
// output file opened or created
if((outft = open(pathDest, O_CREAT | O_APPEND | O_RDWR))==-1){
perror("open");
}
// lets open the input file
inft = open(pathSource, O_RDONLY);
if(inft >0){ // there are things to read from the input
fileread = read(inft, buffer, sizeof(buffer));
printf("%s\n", buffer);
write(outft, buffer, fileread);
close(inft);
}
close(outft);
}
/* Our process ID and Session ID */
pid_t pid, sid;
/* Fork off the parent process */
pid = fork();
if (pid < 0) {
exit(EXIT_FAILURE);
}
/* If we got a good PID, then
we can exit the parent process. */
if (pid > 0) {
exit(EXIT_SUCCESS);
}
/* Change the file mode mask */
umask(0);
/* Open any logs here */
/* Create a new SID for the child process */
sid = setsid();
if (sid < 0) {
/* Log the failure */
exit(EXIT_FAILURE);
}
/* Change the current working directory */
if ((chdir("/")) < 0) {
/* Log the failure */
exit(EXIT_FAILURE);
}
/* Close out the standard file descriptors */
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
/* Daemon-specific initialization goes here */
/* The Big Loop */
while (1) {
//sleep(5); /* wait 5 seconds */
}
exit(EXIT_SUCCESS);
}
The result of ls is:
ubuntu#ubuntu:~/Desktop$ ls
Concatenar_Strings.c core D2 daemon.c examples.desktop
Concatenar_Strings.c~ D1 daemon daemon.c~ ubiquity.desktop
D1 and D2 are folders, and in D1 are three text documents that I want to copy into D2.
One other question, is this a delayed error or an immediate one? Because I doubt this message would appear on a code line that with two integers.
Thanks in advance guys.
This loop is wrong:
while ((ent = readdir (dirSource)) != NULL) {
list1[sizeSource] = ent->d_name;
Probably, ent points to the same memory block every time, and the readdir function updates it. So when you save that pointer, you end up with your list containing invalid pointers (probably end up all pointing to the same string). Further, the string may be deallocated once you got to the end of the directory.
If you want to use the result of readdir after closing the directory or after calling readdir again you will need to take a copy of the data. In this case you can use strdup and it is usually good style to free the string at the end of the operation.
This may or may not have been the cause of your segfault. Another thing to check is that you should break out of your loops if sizeSource or sizeDest hits 30.
In the strcmp loop, you should really set dif = 0 at the start of the i loop, instead of in an else block.
Update: (more code shown by OP)
char *pathSource, *pathDest;
strcpy(pathSource, source);
You are copying to a wild pointer, which is a likely cause of segfaults. strcpy does not allocate any memory, it expects that you have already allocated enough.
One possible fix would be:
char pathSource[strlen(source) + 1 + strlen(listDif[z]) + 1];
sprintf(pathSource, "%s/%s", source, listDif[z]);
Alternatively (without using VLA):
char pathSource[MAX_PATH]; // where MAX_PATH is some large number
snprintf(pathSource, MAX_PATH, "%s/%s", source, listDif[z]);
Do the same thing for pathDest.
NB. Consider moving the closedir lines up to after the readdir loops; generally speaking you should open and close a resource as close as possible to the times you start and finish using them respectively; this makes your code easier to maintain.
I want to be able to check to see if a file could be opened on Linux (for read or for read and write). However I don't have control of the code which will be opening the file, so I can't do what I would normally do which is to open it and then handle the error.
I appreciate that there will always be race conditions on any check due to permissions changing after the call has returned but before the open call, but I'm trying to avoid some undesirable error logging from a library which I have no control over.
I'm aware of stat, but I'd prefer not to need to try to replicate the logic of checking user IDs and group IDs.
You can use:
access("filename", R_OK);
or
euidaccess("filename", R_OK);
To check if your UID or EUID have read access to a respective file. (UID and EUID will be different if your are running setuid)
Use euidaccess or access, although you almost certainly always want to use the former.
(edit: the reason for adding this was that with this approach you can ensure you can avoid the race conditions. That said, it is quite a tricky approach, so maybe just coping with potential race conditions is a better practical approach).
If your goal is to shield the code that you do not own from unhandled errors, using LD_PRELOAD to intercept the open call itself might be of use. An example of it with malloc is here: Overriding 'malloc' using the LD_PRELOAD mechanism
here my quick improvisation on how you could do it - basically an interceptor that will launch an interactive shell to you to correct the error.
WARNING: lots of open calls actually do fail for legit reasons, e.g. when the program is going over different directories in the path trying to find the file, so treat this code as an educational example only to be used with this example code - if you are any close to real world use, your code definitely will need to be smarter. With all this said, let's get to the meat.
First, the "offensive" program that you do not have the control over:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(int argc, char *argv[]) {
int res = 0;
printf("About to try to open the file...\n");
res = open("/tmp/unreadable", O_RDONLY);
printf("The result after opening: %d\n", res);
if (res < 0) {
perror("Could not open, and here is what the errno says");
} else {
char buf[1024];
int fd = res;
res = read(fd, buf, sizeof(buf));
printf("Read %d bytes, here are the first few:\n", res);
buf[30] = 0;
printf("%s\n", buf);
close(fd);
}
}
Then the interceptor:
#include <stdio.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdlib.h>
#define __USE_GNU
#include <dlfcn.h>
static int (*real_open)(const char *pathname, int flags, ...)=NULL;
static void __open_trace_init(void)
{
real_open = dlsym(RTLD_NEXT, "open");
if (NULL == real_open) {
fprintf(stderr, "Error in `dlsym`: %s\n", dlerror());
return;
}
}
int open(const char *pathname, int flags, ...)
{
if(real_open==NULL)
__open_trace_init();
va_list va;
int res = 0;
do {
if (flags & O_CREAT) {
int mode = 0;
va_start(va, flags);
mode = va_arg(va, int);
va_end(va);
fprintf(stderr, "open(%s, %x, %x) = ", pathname, flags, mode);
res = real_open(pathname, flags, mode);
fprintf(stderr, "%d\n", res);
} else {
fprintf(stderr, "open(%s, %x) = ", pathname, flags);
res = real_open(pathname, flags);
fprintf(stderr, "%d\n", res);
}
if (res < 0) {
printf("The open has returned an error. Please correct and we retry.\n");
system("/bin/sh");
}
} while (res < 0);
return res;
}
And here is how it looks like when running:
ayourtch#ayourtch-lnx:~$ echo This is unreadable >/tmp/unreadable
ayourtch#ayourtch-lnx:~$ chmod 0 /tmp/unreadable
ayourtch#ayourtch-lnx:~/misc/stackoverflow$ LD_PRELOAD=./intercept ./a.out
About to try to open the file...
open(/tmp/unreadable, 0) = -1
The open has returned an error. Please correct and we retry.
open(/dev/tty, 802) = 3
open(/dev/tty, 802) = 3
open(/home/ayourtch/.bash_history, 0) = 3
open(/home/ayourtch/.bash_history, 0) = 3
open(/lib/terminfo/x/xterm, 0) = 3
open(/etc/inputrc, 0) = 3
sh-4.1$ ls -al /tmp/unreadable
---------- 1 ayourtch ayourtch 19 2011-10-18 13:03 /tmp/unreadable
sh-4.1$ chmod 444 /tmp/unreadable
sh-4.1$ exit
open(/home/ayourtch/.bash_history, 401) = 3
open(/home/ayourtch/.bash_history, 0) = 3
open(/home/ayourtch/.bash_history, 201) = 3
open(/tmp/unreadable, 0) = 3
The result after opening: 3
Read 19 bytes, here are the first few:
This is unreadable
�0
ayourtch#ayourtch-lnx:~/misc/stackoverflow$
By the way this example also exposes an obvious bug in the first "test" code - I should have checked that the number of the chars read was at least 30 and put the null char accordingly.
Anyway, that code is supposed to be buggy and outside of the control, so it is kind of good to have a bug in it - else you would not need to use this kind of hack :-)