I've found on google code that was over 50 lines long and that's completely unnecessary for what I'm trying to do.
I want to make a very simple cp implementation in C.
Just so I can play with the buffer sizes and see how it affects performance.
I want to use only Linux API calls like read() and write() but I'm having no luck.
I want a buffer that is defined as a certain size so data from file1 can be read into buffer and then written to file2 and that continues until file1 has reached EOF.
Here is what I tried but it doesn't do anything
#include <stdio.h>
#include <sys/types.h>
#define BUFSIZE 1024
int main(int argc, char* argv[]){
FILE fp1, fp2;
char buf[1024];
int pos;
fp1 = open(argv[1], "r");
fp2 = open(argv[2], "w");
while((pos=read(fp1, &buf, 1024)) != 0)
{
write(fp2, &buf, 1024);
}
return 0;
}
The way it would work is ./mycopy file1.txt file2.txt
This code has an important problem, the fact that you always write 1024 bytes regardless of how many you read.
Also:
You don't check the number of command line arguments.
You don't check if the source file exists (if it opens).
You don't check that the destination file opens (permission issues).
You pass the address of the array which has a different type than the pointer to the first element to the array.
The type of fp1 is wrong, as well as that of fp2.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
int main(int argc, char **argv)
{
char buffer[1024];
int files[2];
ssize_t count;
/* Check for insufficient parameters */
if (argc < 3)
return -1;
files[0] = open(argv[1], O_RDONLY);
if (files[0] == -1) /* Check if file opened */
return -1;
files[1] = open(argv[2], O_WRONLY | O_CREAT | S_IRUSR | S_IWUSR);
if (files[1] == -1) /* Check if file opened (permissions problems ...) */
{
close(files[0]);
return -1;
}
while ((count = read(files[0], buffer, sizeof(buffer))) != 0)
write(files[1], buffer, count);
return 0;
}
Go to section 8.3 of the K&R "The C Programming Language". There you will see an example of what you want to accomplish. Try using different buffer sizes and you will end up seeing a point where the performance tops.
#include <stdio.h>
int cpy(char *, char *);
int main(int argc, char *argv[])
{
char *fn1 = argv[1];
char *fn2 = argv[2];
if (cpy(fn2, fn1) == -1) {
perror("cpy");
return 1;
}
reurn 0;
}
int cpy(char *fnDest, char *fnSrc)
{
FILE *fpDest, *fpSrc;
int c;
if ((fpDest = fopen(fnDest, "w")) && (fpSrc = fopen(fnSrc, "r"))) {
while ((c = getc(fpSrc)) != EOF)
putc(fpDest);
fclose(fpDest);
fclose(fpSrc);
return 0;
}
return -1;
}
First, we get the two file names from the command line (argv[1] and argv[2]). The reason we don't start from *argv, is that it contains the program name.
We then call our cpy function, which copies the contents of the second named file to the contents of the first named file.
Within cpy, we declare two file pointers: fpDest, the destination file pointer, and fpSrc, the source file pointer. We also declare c, the character that will be read. It is of type int, because EOF does not fit in a char.
If we could open the files succesfully(if fopen does not return NULL), we get characters from fpSrc and copy them onto fpDest, as long as the character we have read is not EOF. Once we have seen EOF, we close our file pointers, and return 0, the success indicator. If we could not open the files, -1 is returned. The caller can check the return value for -1, and if it is, print an error message.
Good question. Related to another good question:
How can I copy a file on Unix using C?
There are two approaches to the "simplest" implementation of cp. One approach uses a file copying system call function of some kind - the closest thing we get to a C function version of the Unix cp command. The other approach uses a buffer and read/write system call functions, either directly, or using a FILE wrapper.
It's likely the file copying system calls that take place solely in kernel-owned memory are faster than the system calls that take place in both kernel- and user-owned memory, especially in a network filesystem setting (copying between machines). But that would require testing (e.g. with Unix command time) and will be dependent on the hardware where the code is compiled and executed.
It's also likely that someone with an OS that doesn't have the standard Unix library will want to use your code. Then you'd want to use the buffer read/write version, since it only depends on <stdlib.h> and <stdio.h> (and friends).
<unistd.h>
Here's an example that uses function copy_file_range from the unix standard library <unistd.h>, to copy a source file to a (possible non-existent) destination file. The copy takes place in kernel space.
/* copy.c
*
* Defines function copy:
*
* Copy source file to destination file on the same filesystem (possibly NFS).
* If the destination file does not exist, it is created. If the destination
* file does exist, the old data is truncated to zero and replaced by the
* source data. The copy takes place in the kernel space.
*
* Compile with:
*
* gcc copy.c -o copy -Wall -g
*/
#define _GNU_SOURCE
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>
/* On versions of glibc < 2.27, need to use syscall.
*
* To determine glibc version used by gcc, compute an integer representing the
* version. The strides are chosen to allow enough space for two-digit
* minor version and patch level.
*
*/
#define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __gnuc_patchlevel__)
#if GCC_VERSION < 22700
static loff_t copy_file_range(int in, loff_t* off_in, int out,
loff_t* off_out, size_t s, unsigned int flags)
{
return syscall(__NR_copy_file_range, in, off_in, out, off_out, s,
flags);
}
#endif
/* The copy function.
*/
int copy(const char* src, const char* dst){
int in, out;
struct stat stat;
loff_t s, n;
if(0>(in = open(src, O_RDONLY))){
perror("open(src, ...)");
exit(EXIT_FAILURE);
}
if(fstat(in, &stat)){
perror("fstat(in, ...)");
exit(EXIT_FAILURE);
}
s = stat.st_size;
if(0>(out = open(dst, O_CREAT|O_WRONLY|O_TRUNC, 0644))){
perror("open(dst, ...)");
exit(EXIT_FAILURE);
}
do{
if(1>(n = copy_file_range(in, NULL, out, NULL, s, 0))){
perror("copy_file_range(...)");
exit(EXIT_FAILURE);
}
s-=n;
}while(0<s && 0<n);
close(in);
close(out);
return EXIT_SUCCESS;
}
/* Test it out.
*
* BASH:
*
* gcc copy.c -o copy -Wall -g
* echo 'Hello, world!' > src.txt
* ./copy src.txt dst.txt
* [ -z "$(diff src.txt dst.txt)" ]
*
*/
int main(int argc, char* argv[argc]){
if(argc!=3){
printf("Usage: %s <SOURCE> <DESTINATION>", argv[0]);
exit(EXIT_FAILURE);
}
copy(argv[1], argv[2]);
return EXIT_SUCCESS;
}
It's based on the example in my Ubuntu 20.x Linux distribution's man page for copy_file_range. Check your man pages for it with:
> man copy_file_range
Then hit j or Enter until you get to the example section. Or search by typing /example.
<stdio.h>/<stdlib.h> only
Here's an example that only uses stdlib/stdio. The downside is it uses an intermediate buffer in user-space.
/* copy.c
*
* Compile with:
*
* gcc copy.c -o copy -Wall -g
*
* Defines function copy:
*
* Copy a source file to a destination file. If the destination file already
* exists, this clobbers it. If the destination file does not exist, it is
* created.
*
* Uses a buffer in user-space, so may not perform as well as
* copy_file_range, which copies in kernel-space.
*
*/
#include <stdlib.h>
#include <stdio.h>
#define BUF_SIZE 65536 //2^16
int copy(const char* in_path, const char* out_path){
size_t n;
FILE* in=NULL, * out=NULL;
char* buf = calloc(BUF_SIZE, 1);
if((in = fopen(in_path, "rb")) && (out = fopen(out_path, "wb")))
while((n = fread(buf, 1, BUF_SIZE, in)) && fwrite(buf, 1, n, out));
free(buf);
if(in) fclose(in);
if(out) fclose(out);
return EXIT_SUCCESS;
}
/* Test it out.
*
* BASH:
*
* gcc copy.c -o copy -Wall -g
* echo 'Hello, world!' > src.txt
* ./copy src.txt dst.txt
* [ -z "$(diff src.txt dst.txt)" ]
*
*/
int main(int argc, char* argv[argc]){
if(argc!=3){
printf("Usage: %s <SOURCE> <DESTINATION>\n", argv[0]);
exit(EXIT_FAILURE);
}
return copy(argv[1], argv[2]);
}
Another way to ensure portability in general while still working with a Unix-like C API is to develop with GNOME (e.g. GLib, GIO)
https://docs.gtk.org/glib/
https://docs.gtk.org/gio/
Related
My setup: gcc-4.9.2, UTF-8 environment.
The following C-program works in ASCII, but does not in UTF-8.
Create input file:
echo -n 'привет мир' > /tmp/вход
This is test.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 10
int main(void)
{
char buf[SIZE+1];
char *pat = "привет мир";
char str[SIZE+2];
FILE *f1;
FILE *f2;
f1 = fopen("/tmp/вход","r");
f2 = fopen("/tmp/выход","w");
if (fread(buf, 1, SIZE, f1) > 0) {
buf[SIZE] = 0;
if (strncmp(buf, pat, SIZE) == 0) {
sprintf(str, "% 11s\n", buf);
fwrite(str, 1, SIZE+2, f2);
}
}
fclose(f1);
fclose(f2);
exit(0);
}
Check the result:
./test; grep -q ' привет мир' /tmp/выход && echo OK
What should be done to make UTF-8 code work as if it was ASCII code - not to bother how many bytes a symbol takes, etc. In other words: what to change in the example to treat any UTF-8 symbol as a single unit (that includes argv, STDIN, STDOUT, STDERR, file input, output and the program code)?
#define SIZE 10
The buffer size of 10 is insufficient to store the UTF-8 string привет мир. Try changing it to a larger value. On my system (Ubuntu 12.04, gcc 4.8.1), changing it to 20, worked perfectly.
UTF-8 is a multibyte encoding which uses between 1 and 4 bytes per character. So, it is safer to use 40 as the buffer size above.
There is a big discussion at How many bytes does one Unicode character take? which might be interesting.
Siddhartha Ghosh's answer gives you the basic problem. Fixing your code requires more work, though.
I used the following script (chk-utf8-test.sh):
echo -n 'привет мир' > вход
make utf8-test
./utf8-test
grep -q 'привет мир' выход && echo OK
I called your program utf8-test.c and amended the source like this, removing the references to /tmp, and being more careful with lengths:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 40
int main(void)
{
char buf[SIZE + 1];
char *pat = "привет мир";
char str[SIZE + 2];
FILE *f1 = fopen("вход", "r");
FILE *f2 = fopen("выход", "w");
if (f1 == 0 || f2 == 0)
{
fprintf(stderr, "Failed to open one or both files\n");
return(1);
}
size_t nbytes;
if ((nbytes = fread(buf, 1, SIZE, f1)) > 0)
{
buf[nbytes] = 0;
if (strncmp(buf, pat, nbytes) == 0)
{
sprintf(str, "%.*s\n", (int)nbytes, buf);
fwrite(str, 1, nbytes, f2);
}
}
fclose(f1);
fclose(f2);
return(0);
}
And when I ran the script, I got:
$ bash -x chk-utf8-test.sh
+ '[' -f /etc/bashrc ']'
+ . /etc/bashrc
++ '[' -z '' ']'
++ return
+ alias 'r=fc -e -'
+ echo -n 'привет мир'
+ make utf8-test
gcc -O3 -g -std=c11 -Wall -Wextra -Werror utf8-test.c -o utf8-test
+ ./utf8-test
+ grep -q 'привет мир' $'в?\213?\205од'
+ echo OK
OK
$
For the record, I was using GCC 5.1.0 on Mac OS X 10.10.3.
This is more of a corollary to the other answers, but I'll try to explain this from a slightly different angle.
Here is Jonathan Leffler's version of your code, with three slight changes: (1) I made explicit the actual individual bytes in the UTF-8 strings; and (2) I modified the sprintf formatting string width specifier to hopefully do what you are actually attempting to do. Also tangentially (3) I used perror to get a slightly more useful error message when something fails.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 40
int main(void)
{
char buf[SIZE + 1];
char *pat = "\320\277\321\200\320\270\320\262\320\265\321\202"
" \320\274\320\270\321\200"; /* "привет мир" */
char str[SIZE + 2];
FILE *f1 = fopen("\320\262\321\205\320\276\320\264", "r"); /* "вход" */
FILE *f2 = fopen("\320\262\321\213\321\205\320\276\320\264", "w"); /* "выход" */
if (f1 == 0 || f2 == 0)
{
perror("Failed to open one or both files"); /* use perror() */
return(1);
}
size_t nbytes;
if ((nbytes = fread(buf, 1, SIZE, f1)) > 0)
{
buf[nbytes] = 0;
if (strncmp(buf, pat, nbytes) == 0)
{
sprintf(str, "%*s\n", 1+(int)nbytes, buf); /* nbytes+1 length specifier */
fwrite(str, 1, 1+nbytes, f2); /* +1 here too */
}
}
fclose(f1);
fclose(f2);
return(0);
}
The behavior of sprintf with a positive numeric width specifier is to pad with spaces from the left, so the space you tried to use is superfluous. But you have to make sure the target field is wider than the string you are printing in order for any padding to actually take place.
Just to make this answer self-contained, I will repeat what others have already said. A traditional char is always exactly one byte, but one character in UTF-8 is usually not exactly one byte, except when all your characters are actually ASCII. One of the attractions of UTF-8 is that legacy C code doesn't need to know anything about UTF-8 in order to continue to work, but of course, the assumption that one char is one glyph cannot hold. (As you can see, for example, the glyph п in "привет мир" maps to the two bytes -- and hence, two chars -- "\320\277".)
This is clearly less than ideal, but demonstrates that you can treat UTF-8 as "just bytes" if your code doesn't particularly care about glyph semantics. If yours does, you are better off switching to wchar_t as outlined e.g. here: http://www.gnu.org/software/libc/manual/html_node/Extended-Char-Intro.html
However, the standard wchar_t is less than ideal when the standard expectation is UTF-8. See e.g. the GNU libunistring documentation for a less intrusive alternative, and a bit of background. With that, you should be able to replace char with uint8_t and the various str* functions with u8_str* replacements and be done. The assumption that one glyph equals one byte will still need to be addressed, but that becomes a minor technicality in your example program. An adaptation is available at http://ideone.com/p0VfXq (though unfortunately the library is not available on http://ideone.com/ so it cannot be demonstrated there).
The following code works as required:
#include <stdio.h>
#include <locale.h>
#include <stdlib.h>
#include <wchar.h>
#define SIZE 10
int main(void)
{
setlocale(LC_ALL, "");
wchar_t buf[SIZE+1];
wchar_t *pat = L"привет мир";
wchar_t str[SIZE+2];
FILE *f1;
FILE *f2;
f1 = fopen("/tmp/вход","r");
f2 = fopen("/tmp/выход","w");
fgetws(buf, SIZE+1, f1);
if (wcsncmp(buf, pat, SIZE) == 0) {
swprintf(str, SIZE+2, L"% 11ls", buf);
fputws(str, f2);
}
fclose(f1);
fclose(f2);
exit(0);
}
Probably your test.c file is not stored in UTF-8 format and for that reason "привет мир" string is ASCII - and the comparison failed. Change text encoding of source file and try again.
I have a program that I'm doing for class where I need to take the content of one file, reverse it, and write that reversed content to another file. I have written a program that successfully does this (after much googling as I am new to the C programming language). The problem however is that my professor wants us to submit the program in a certain way with a couple supporting .h and .c files (which I understand is good practice). So I was hoping someone could help me understand exactly how I can take my already existing program and make it into one that is to his specifications, which are as follows:
he would like a file named "file_utils.h" that has function signatures and guards for the following two functions
int read_file( char* filename, char **buffer );
int write_file( char* filename, char *buffer, int size);
thus far I have created this file to try and accomplish this.
#ifndef UTILS_H
#define UTILS_H
int read_file(char* filename, char **buffer);
int write_file(char* filename, char *buffer, int size);
#endif
he would like a file named "file_utils.c" that has the implemented code for the previous two functions
he would like a file named "reverse.c" that accepts command arguments, includes a main function, and calls the functions from the previous two files.
now. I understand how this is supposed to work, but as I'm looking at the program I wrote my way I'm unsure how to actually accomplish the same result by adhering to the previously mentioned specifications.
Below is the program that successfully accomplishes the desired functionality
#include<stdlib.h>
#include<stdio.h>
#include<fcntl.h>
#include<string.h>
#include<sys/stat.h>
#include<unistd.h>
int main(int argc, char *argv[]) {
int file1, file2, char_count, x, k;
char buffer;
// if the number of parameters passed are not correct, exit
//
if (argc != 3) {
fprintf(stderr, "usage %s <file1> <file2>", argv[0]);
exit(EXIT_FAILURE);
}
// if the origin file cannot be opened for whatever reason, exit
// S_IRUSR specifies that this file is to be read by only the file owner
//
if ((file1 = open(argv[1], S_IRUSR)) < 0) {
fprintf(stderr, "The origin-file is inaccessible");
exit(EXIT_FAILURE);
}
// if the destination-file cannot be opened for whatever reason, exit
// S_IWUSR specifies that this file is to be written to by only the file owner
//
if ((file2 = creat(argv[2], S_IWUSR)) < 0) {
fprintf(stderr, "The destination-file is inaccessible");
exit(EXIT_FAILURE);
}
// SEEK_END is used to place the read/write pointer at the end of the file
//
char_count = lseek(file1, (off_t) 0, SEEK_END);
printf("origin-file size is %d\n", char_count - 1);
for (k = char_count - 1; k >= 0; k--) {
lseek(file1, (off_t) k, SEEK_SET);
x = read(file1, &buffer, 1);
if (x != 1) {
fprintf(stderr, "can't read 1 byte");
exit(-1);
}
x = write(file2, &buffer, 1);
if (x != 1) {
fprintf(stderr, "can't write 1 byte");
exit(-1);
}
}
write(STDOUT_FILENO, "Reversal & Transfer Complete\n", 5);
close(file1);
close(file2);
return 0;
}
any insight as to how I can accomplish this "re-factoring" of sorts would be much appreciated, thanks!
The assignment demands a different architecture than your program. Unfortunately, this will not be a refactoring but a rewrite.
You have most of the pieces of read_file and write_file already: opening the file, determining its length, error handling. Those can be copy-pasted into the new functions.
But read_file should call malloc and read the file into memory, which is different.
You should create a new function in reverse.c, called by main, to reverse the bytes in a memory buffer.
After that function runs, write_file should attempt to open the file, and only do its error checking at that point.
Your simple program is superior because it validates the output file before any I/O, and it requires less memory. Its behavior satisfies the assignment, but its form does not.
The intended behavior of the C program below is to copy its own executable file to a new randomly-named file, then execve that file, ad nauseum. This should create many, many copies of the executable. This is obviously a terrible idea, but it is nevertheless what I am trying to do.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
int main(int argc, char* argv[]) {
/* Obtain name of current executable */
const char* binName = argv[0];
/* Print message */
printf("Hello from %s!\n", binName);
/* Create name of new executable */
char newBinName[] = "tmpXXXXXX";
mkstemp(newBinName);
/* Determine size of current executable */
struct stat st;
stat(binName, &st);
const int binSize = st.st_size;
/* Copy current executable to memory */
char* binData = (char*) malloc(sizeof(char) * binSize);
FILE* binFile = fopen(binName, "rb");
fread(binData, sizeof(char), binSize, binFile);
fclose(binFile);
/* Write executable in memory to new file */
binFile = fopen(newBinName, "wb");
fwrite(binData, sizeof(char), binSize, binFile);
fclose(binFile);
/* Make new file executable */
chmod(newBinName, S_IRUSR | S_IWUSR |S_IXUSR);
/* Run new file executable */
execve(
newBinName,
(char*[]) {
newBinName,
NULL
},
NULL);
/* If this code runs, then there has been an error. */
perror("execve");
return EXIT_FAILURE;
}
Instead, though, the following is output:
Hello from ./execTest
execve: Text file busy
I presume that the text file is "busy" because ./execTest is still accessing it... but I do close the file stream to that file. What is it that I'm doing wrong?
From the manpage of mkstemp:
On success, these functions return the file descriptor of the
temporary file.
You discard the file descriptor returned to you by mkstemp, effectively leaking it. Probably it is a writable file descriptor and thus will cause execve to fail with ETXTBSY (which occurs when there are writable fds open). Can you try close() on the return value of mkstemp and see if that improves the behavior?
General point of feedback: When coding in C you should be in the habit of looking at return values. Failure to observe their meaning and error status is often indicative of a bug.
Im trying to use the md5sum command in a C program, right now im using dirent.h to get all the files in a folder, now, I want to get all the md5 of all those files, I am doing this:
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <syslog.h>
#include <string.h>
#include <dirent.h>
int main(void){
char *word = ".gz";
int i=0;
char *word2 = ".";
char *word3 = "..";
unsigned int md5;
DIR *d;
struct dirent *dir;
d = opendir(".");
if (d) {
while ((dir = readdir(d)) != NULL)
{
if((strstr(dir->d_name, word) == NULL) && (strcmp(dir->d_name, word2) != 0) && (strcmp(dir->d_name, word3)!= 0)) {
md5 = system("md5sum dir->d_name");
printf("The md5 of %s is %d\n", dir->d_name, md5);
}
}
}
return(0);
}
but when I run it, it says, for example:
md5sum: dir-: No such file or directory
The md5 of ej1_signal.c is 256
md5sum: dir-: No such file or directory
The md5 of pipeL.c is 256
Could you please explain me why is this happening? Thanks !
The system function doesn't returns you what you think. system is used to launch a command and when that command finished, it (generally) exits with an exit code. This is the value you catched.
What you need is the output of the command not its return value. So what you need is popen which lets you launch some external command and read/write to it through a pipe. See http://pubs.opengroup.org/onlinepubs/009695399/functions/popen.html for example.
system does not return the output of a command. To get the output of a command, you need to create a process and tie the standard output stream to a file descriptor you can read data off in the other process. For an example on how to do that, you can refer to the pipe man page (section 2).
Another option is to use a library that provides an MD5 implementation (eg. OpenSSL). The man page of EVP_DigestInit (section 3) provides an example for that.
Another problem is that your code tries to calculate the digest of d->d_name, not the file which name is in d->d_name. You could use sprintf or strncat with a suitably sized buffer (ie. the length of the static string part md5sum plus the maximum size of the file name (usually 256 bytes, may vary between library implementations and file systems) plus another byte for safely terminating the string (as some implementations may report an unterminated string in d->d_name)). Please note that this does not apply if you use a library for digest calculation, as the library uses either the file name or you need to pass the file contents to a library function (eg. EVP_DigestUpdate).
The first problem is that you launch a new shell process executing "md5sum dir->d_name", meaning it does a md5 on the "file" named dir->d_name, instead of using the value you get from readdir.
So you could add a temp variable, and prepare the command in it prior to running system.
limits.h is for Linux, adjust it if necessary to get the max length of a path
...
#include <linux/limits.h>
char temp[PATH_MAX];
then instead of
md5 = system("md5sum dir->d_name");
add
strcpy(temp, "md5sum ");
strcat(temp, dir->d_name);
system(temp);
as for the other problem (system will not return the md5 string), this will display the md5 of the file in the directory. And you can just remove the printf ...
There is no command in C to return the output of an external command, but there exists popen you can just open a command as a FILE * and read the output from it. This is how you can do it, and it's all explained within the code
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
int main(void)
{
DIR *d;
struct dirent *dir;
d = opendir(".");
if (d == NULL)
return -1;
while ((dir = readdir(d)) != NULL)
{
char command[sizeof dir->d_name + 10];
struct stat st;
FILE *pipe;
if (stat(dir->d_name, &st) == -1)
continue;
/* check if the entry is a directory, md5sum does not work with them */
if (S_ISDIR(st.st_mode) != 0)
continue;
/*
* md5sum dir->d_name will pass `dir->d_name` as the argument to the md5sum command,
* we need to build the command string, I like snprintf in this case
*/
snprintf(command, sizeof command, "md5sum \"%s\"", dir->d_name);
/*
* Open the pipe, it will execute the new command in a new process (fork)
* and create a pipe for communication with the current porcess
*/
pipe = popen(command, "r");
if (pipe != NULL)
{
char md5[33];
/* read the md5 digest string from the command output */
fread(md5, 1, sizeof md5 - 1, pipe);
/* append a null terminator */
md5[sizeof md5 - 1] = '\0';
printf("The md5 of %s is %s\n", dir->d_name, md5);
}
/* close the pipe */
pclose(pipe);
}
/* you should always call closedir() if opendir() succeded */
closedir(d);
return 0;
}
Let me explain what I'm trying to realize:
I have a encrypted tar file. I can decrypt it in memory, but obviously I can't write the decrypted data back to hard disk as a real file. The decrypted data is structured as a char* buffer in memory; how can I untar it in memory?
I can't find answer with libtar library.
I also tried to untar it with execlp("tar", "tar", "-xvO", (void*)0).
But it didn't work as I thought.
Anyone can give me a hint of the best solution? Thanks!
I suspect that libtar is the answer.
Using libtar, you can specify your own functions for opening/closing, reading and writing.
From the manpage:
int tar_open(TAR **t, char *pathname, tartype_t *type, int oflags,
int mode, int options);
The tar_open() function opens a tar archive file corresponding to the
filename named by the pathname argument. The oflags argument must be
either O_RDONLY or O_WRONLY.
The type argument specifies the access methods for the given file
type. The tartype_t structure has members named openfunc(), closefunc(),
readfunc() and writefunc(), which are pointers to the functions for opening,
closing, reading, and writing the file, respectively. If type is NULL,
the file type defaults to a normal file, and the standard open(),
close(), read(), and write() functions are used.
I made an example, how to read file contents from an in-memory tar. The is_file_in_tar() function returns the length and the starting position of the named file if it is stored in the tar:
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
struct tar {
char name[100]; char _unused[24];
char size[12]; char _padding[376];
} *tar;
int is_file_in_tar( struct tar *tar, char *name, char **start, int *length ){
for( ; tar->name[0]; tar+=1+(*length+511)/512 ){
sscanf( tar->size, "%o", length);
if( !strcmp(tar->name,name) ){ *start = (char*)(tar+1); return 1; }
}
return 0;
}
int main(){
int fd=open( "libtar-1.2.11.tar", O_RDONLY );
tar=mmap(NULL, 808960, PROT_READ, MAP_PRIVATE, fd, 0);
char *start; int length; char name[]="libtar-1.2.11/TODO";
if( is_file_in_tar(tar,name,&start,&length) ) printf("%.*s",length,start);
}
You can execute tar utility redirected to stdout. (tar --to-stdout). You should run it using forkpty() or popen() in order to read the output.
I've done for that with this code. Try it!
FILE*fp;
if( fp = popen("/bin/tar -xv -C /target/dir", "w") )
{
fwrite(tar_buffer,1,tar_size,fp);
pclose(fp);
printf("Untar End %d Save file\n", tar_size);
}
Just untar to in-memory tmpfs using a normal untar operation.