I am trying to create a C program which has a text.txt file with 50 lines. This text.txt file should be split into 5 files such as text_part1.txt, text_part2.txt and so on. The 50 lines in the text.txt file should be copied equally to 10 lines each in 5 files.
All these has to be done by using command line arguments. I am a beginner in C and have just started to code. I don't know how to use command line arguments.
#include <stdio.h>
#include<stdlib.h>
#include<string.h>
int main()
{
FILE *ptr_readfile;
FILE *ptr_writefile;
char line [100];
char fileoutputname[10];
int filecounter=1, linecounter=1;
ptr_readfile = fopen("C:/home/dir/sample_pg/data/text.txt","r");
if (!ptr_readfile)
return 1;
sprintf(fileoutputname, "file_part%d", filecounter);
ptr_writefile = fopen(fileoutputname, "w");
while (fgets(line, sizeof line, ptr_readfile)!=NULL)
{
if (linecounter == 5)
{
fclose(ptr_writefile);
linecounter = 1;
filecounter++;
sprintf(fileoutputname, "file_part%d", filecounter);
ptr_writefile = fopen(fileoutputname, "w");
if (!ptr_writefile)
return 1;
}
fprintf(ptr_writefile,"%s\n", line);
linecounter++;
}
fclose(ptr_readfile);
return 0;
}
To get the program's arguments, you need to define your main function with an argument count (conventionally named argc) and and argument array (conventionally named argv), so something as
int main(int argc, char**argv) {
for (int ix=1; ix<argc; ix++) {
FILE* fil = fopen(argv[ix], "r");
if (!fil) { perror(argv[ix]); exit(EXIT_FAILURE); };
When you compile this (with some other needed code) into an executable foo.exe and run foo.exe a b c on the terminal, argc is 4 and you have
argc == 4 &&
strcmp(argv[0], "foo.exe") == 0 &&
strcmp(argv[1], "a") == 0 &&
strcmp(argv[2], "b") == 0 &&
strcmp(argv[3], "c") == 0 &&
argv[4] == NULL
Notice that it is a good habit to call perror on failure of a function like fopen
BTW, you forgot to call fclose in your program. You might learn more about fflush also. And you should prefer snprintf to sprintf to avoid buffer overflows. Learn more about, and be very scared of, undefined behavior.
Please take the habit of compiling with all warnings & debug info (e.g. gcc -Wall -Wextra -g if using GCC....) then learn how to use the debugger.
Read perror(3), fopen(3), fclose(3), fflush(3), snprintf(3) and take the habit to read the documentation of every function that you want to use.
See also csplit; you might take some inspiration by studying the source code of the free software package coreutils implementing it on Linux.
Related
I'm quite new to C. I faced a problem while studying the last chapter of K&R.
I'm trying to implement fopen() and fillbuf() function by using system calls, open and read.
I exactly copied the source code from the book but repeatedly get segmentation error after I compile.
fp->fd = fd;
fp->cnt = 0;
fp->base = NULL;
fp->flag = (*mode=='r')? _READ : _WRITE;
Why does error occur? Here is my complete code.
#include<fcntl.h>
#include<unistd.h>
#include<stdlib.h>
#define PERM 0644
#define EOF (-1)
#define BUFSIZE 1024
#define OPEN_MAX 20
typedef struct _iobuf{
int cnt;
char *ptr;
char *base;
int flag;
int fd;
} myFILE;
enum _flags {
_READ = 01,
_WRITE = 02,
_UNBUF = 04,
_EOF = 010,
_ERR = 020
};
myFILE _iob[OPEN_MAX]={
{0, (char *) 0, (char *) 0, _READ, 0 },
{0, (char *) 0, (char *) 0, _WRITE, 1 },
{0, (char *) 0, (char *) 0, _WRITE | _UNBUF, 2 }
};
#define stdin (&_iob[0])
#define stdout (&_iob[1])
#define stderr (&_iob[2])
#define getc(p) ( --(p)->cnt>=0 ? (unsigned char) *(p)->ptr++ : _fillbuf(p) )
int _fillbuf(myFILE *fp)
{
int bufsize;
if((fp->flag & (_READ|_EOF|_ERR))!=_READ)
return EOF;
bufsize=(fp->flag & _UNBUF)? 1 : BUFSIZE;
if(fp->base==NULL)
if((fp->base=(char *)malloc(bufsize))==NULL)
return EOF;
fp->ptr=fp->base;
fp->cnt=read(fp->fd, fp->ptr, bufsize);
if(--fp->cnt<0){
if(fp->cnt == -1)
fp->flag |= _EOF;
else
fp->flag |= _ERR;
return EOF;
}
return (unsigned char) *fp->ptr++;
}
myFILE *myfopen(char *name, char *mode)
{
int fd;
myFILE *fp;
if(*mode!='r' && *mode!='w' && *mode!='a')
return NULL;
for(fp=_iob; fp<_iob+OPEN_MAX; fp++)
if((fp->flag & (_READ | _WRITE))==0)
break;
if(fp>=_iob+OPEN_MAX)
return NULL;
if(*mode=='w')
fd=creat(name, PERM);
else if(*mode=='a'){
if((fd=open(name, O_WRONLY, 0))==-1)
fd=creat(name, PERM);
lseek(fd, 0L, 2);
} else
fd=open(name, O_RDONLY, 0);
if(fd==-1)
return NULL;
fp->fd = fd;
fp->cnt = 0;
fp->base = NULL;
fp->flag = (*mode=='r')? _READ : _WRITE;
return fp;
}
int main(int argc, char *argv[])
{
myFILE *fp;
int c;
if((fp=myfopen(argv[1], "r"))!=NULL)
write(1, "opened\n", sizeof("opened\n"));
while((c=getc(fp))!=EOF)
write(1, &c, sizeof(c));
return 0;
}
EDIT: Please see Jonathan Leffler's answer. It is more accurate and provides a better diagnosis. My answer works, but there is a better way to do things.
I see the problem.
myFILE *fp;
if(*mode!='r' && *mode!='w' && *mode!='a')
return NULL;
for(fp=_iob; fp<_iob+OPEN_MAX; fp++)
if((fp->flag & (_READ | _WRITE))==0) // marked line
break;
When you reach the marked line, you try to dereference the fp pointer. Since it is (likely, but not certainly) initialized to zero (but I should say NULL), you are dereferencing a null pointer. Boom. Segfault.
Here's what you need to change.
myFILE *fp = (myFILE *)malloc(sizeof(myFILE));
Be sure to #include <malloc.h> to use malloc.
Also your close function should later free() your myFILE to prevent memory leaks.
A different analysis of the code in the question
The code shown in the question consists of parts, but not all, of the code from K&R "The C Programming Language, 2nd Edition" (1988; my copy is marked 'Based on Draft Proposed ANSI C'), pages 176-178, plus a sample main program that is not from the book at all. The name of the type was changed from FILE to myFILE too, and fopen() was renamed to myfopen(). I note that the expressions in the code in the question have many fewer spaces than the original code in K&R. The compiler doesn't mind; human readers generally prefer spaces around operators.
As stated in another (later) question and answer, the diagnosis given by Mark Yisri in the currently accepted answer is incorrect — the problem is not a null pointer in the for loop. The prescribed remedy works (as long as the program is invoked correctly), but the memory allocation is not necessary. Fortunately for all concerned, the fclose() function was not included in the implementations, so it wasn't possible to close a file once it was opened.
In particular, the loop:
for (fp = _iob; fp < _iob + OPEN_MAX; fp++)
if ((fp->flag & (_READ | _WRITE)) == 0)
break;
is perfectly OK because the array _iob is defined as:
FILE _iob[OPEN_MAX] = {
…initializers for stdin, stdout, stderr…
};
This is an array of structures, not structure pointers. The first three elements are initialized explicitly; the remaining elements are implicitly initialized to all zeros. Consequently, there is no chance of there being a null pointer in fp as it steps through the array. The loop might also be written as:
for (fp = &_iob[0]; fp < &_iob[OPEN_MAX]; fp++)
if ((fp->flag & (_READ | _WRITE)) == 0)
break;
Empirically, if the code shown in the question (including the main(), which was not — repeat not — written by K&R) is invoked correctly, it works without crashing. However, the code in the main() program does not protect itself from:
Being invoked without a non-null argv[1].
Being invoked with a non-existent or non-readable file name in argv[1].
These are very common problems, and with the main program as written, either could cause the program to crash.
Although it is hard to be sure 16 months later, it seems likely to me that the problem was in the way that the program was invoked rather than anything else. If the main program is written more-or-less appropriately, you end up with code similar to this (you also need to add #include <string.h> to the list of included headers):
int main(int argc, char *argv[])
{
myFILE *fp;
int c;
if (argc != 2)
{
static const char usage[] = "Usage: mystdio filename\n";
write(2, usage, sizeof(usage) - 1);
return 1;
}
if ((fp = myfopen(argv[1], "r")) == NULL)
{
static const char filenotopened[] = "mystdio: failed to open file ";
write(2, filenotopened, sizeof(filenotopened) - 1);
write(2, argv[1], strlen(argv[1]));
write(2, "\n", 1);
return 1;
}
write(1, "opened\n", sizeof("opened\n"));
while ((c = getc(fp)) != EOF)
write(1, &c, sizeof(c));
return 0;
}
This can't use fprintf() etc because the surrogate implementation of the standard I/O library is not complete. Writing the errors direct to file descriptor 2 (standard error) with write() is fiddly, if not painful. It also means that I've taken shortcuts like assuming that the program is called mystdio rather than actually using argv[0] in the error messages. However, if it is invoked without any file name (or if more than one file name is given), or if the named file cannot be opened for reading, then it produces a more or less appropriate error message — and does not crash.
Leading underscores
Note that the C standard reserves identifiers starting with underscores.
You should not create function, variable or macro names that start with an underscore, in general. C11 §7.1.3 Reserved identifiers says (in part):
All identifiers that begin with an underscore and either an uppercase letter or another underscore are always reserved for any use.
All identifiers that begin with an underscore are always reserved for use as identifiers with file scope in both the ordinary and tag name spaces.
See also What does double underscore (__const) mean in C?
In fairness, K&R were essentially describing the standard implementation of the standard I/O library at the time when the 1st Edition was written (1978), modernized sufficiently to be using function prototype notation in the 2nd Edition. The original code was on pages 165-168 of the 1st Edition.
Even today, if you are implementing the standard library, you would use names starting with underscores precisely because they are reserved for use 'by the implementation'. If you are not implementing the standard library, you do not use names starting with underscores because that uses the namespace reserved for the implementation. Most people, most of the time, are not writing the standard library — most people should not be using leading underscores.
Basically what I want to do is have a program with int main(argc, *argv[]) and instead of writing chars into command line, I want to have my program read those words from a file. How could I accomplish this? Is there a special command in Linux for that?
You can use standard redirect operations in a *nix shell to pass files as input:
./myprogram < inputfile.txt
This statement executes your program (myprogram) and pumps the data inside of inputfile.txt to your program
You can also redirect the output of program to a file in a similar fashion:
./myprogram > outputfile.txt
Instead of doing
for(int i = 1; i < argc; i++)
{
insert(&trie, argv[i]);
}
you could doing something like
FILE *input;
char *line;
....
while (fscanf(input, "%ms", &line) != EOF) {
insert(&trie, line);
/* If you make a copy of line in `insert()`, you should
* free `line` at here; if you do not, free it later. */
free(line);
}
Use redirection
yourprogram < youtextfile
will offer the content of yourtextfile as standard input (stdin) to yourprogram. Likewise
yourprogram > yourothertextfile
will send everything the program writes to standard output (stdout) to yourothertextfile
You'll notice when reading man pages that most system calls have a version that works directly with stdin or stdout
For example consider the printf family:
printf ("hello world\n");
is a shorter version of
fprintf (stdout,"hello world\n");
and the same goes for scanf and stdin.
This is only the most basic usage of redirection, which in my opinion is one of the key aspects of "the unix way of doing things". As such, you'll find lots of articles and tutorials that show examples that are a lot more advanced than what I wrote here. Have a look at this Linux Documentation Project page on redirection to get started.
EDIT: getting fed input via redirection ior interactively "looks" the same to the program, so it will react the same to redirected input as it does to console input. This means that if your program expects data line-wise (eg because it uses gets() to read lines), the input text file should be organized in lines.
By default, every program you execute on POSIX-compliant systems has three file descriptors open (see <unistd.h> for the macros' definition): the standard input (STDOUT_FILENO), the standard output (STDOUT_FILENO), and the error output (STDERR_FILENO), which is tied to the console.
Since you said you want read lines, I believe the ssize_t getline(char **lineptr, size_t *n, FILE *stream) function can do the job. It takes a stream (FILE pointer) as a third argument, so you must either use fopen(3) to open a file, or a combination of open(2) and fdopen(3).
Getting inspiration from man 3 getline, here is a program demonstrating what you want:
#define _GNU_SOURCE
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *fp;
size_t len;
char *line;
ssize_t bytes_read;
len = 0;
line = NULL;
if (argc > 1)
{
fp = fopen(argv[1], "r");
if (fp == NULL)
{
perror(*argv);
exit(EXIT_FAILURE);
}
}
else
fp = stdin;
while ((bytes_read = getline(&line, &len, fp)) != -1)
printf("[%2zi] %s", bytes_read, line);
free(line);
exit(EXIT_SUCCESS);
}
Without arguments, this program reads lines from the standard input: you can either feed it lines like echo "This is a line of 31 characters" | ./a.out or execute it directly and write your input from there (finish with ^D).
With a file as an argument, it will output every line from the file, and then exit.
You can have your executable read its arguments on the command line and use xargs, the special Linux command for passing the contents of a file to a command as arguments.
An alternative to xargs is parallel.
I need to write the Solaris the 'line' application on Solaris, which is very simple, on Linux. It was developed for scripting, it takes one line of stdin and outputs it to stdout. I wrote an extraordinarily simple C program to do this. I used both getline and fgets and they produce the same outcome:
int main(int argc, char* argv[])
{
char buf[256];
char* line = NULL;
if (fgets(line, 256, stdin) != NULL)
//if (getline(&line, &len, stdin) != -1)
{
printf("%s", line);
}
return EXIT_SUCCESS;
}
The C-Shell script, loop_file looks like this:
#!/bin/csh -f
while(1)
set line = `app`
echo "Got Line : $line"
end
I kick off the process using the following:
./loop_file < textFile.txt
It's a large file, the first line is printed out just but the next line is almost 4096 characters after, the third line is almost 4096 after that, etc. It is not a line-by-line read. I've even tried forgetting using C and using awk in the while loop instead.
Any ideas?
By the way, don't say - don't use CSH - it's legacy code I'm required to port :)
while(1)
set line = `app`
set name = $line[0];
set address = $line[1];
set purpose = $line[2];
end
stdin is buffered which could be the reason why some of the lines from the text file are missed out. You could possibly make stdin unbuffered using setvbuf(fd, NULL, _IONBF, 0) or equivalent before fgets call in your C program and address this issue.
Alternatively, you can read character by character following the suit of line utility. Maybe something on the lines of :
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
int main(void)
{
char c;
while(read(STDIN_FILENO , &c, 1) > 0 && c != '\n')
putchar(c);
return EXIT_SUCCESS;
}
Hope this helps!
I have implemented my own dynamic-memory version of getline function:
char * fgetline(FILE * f)
Starts with 30 character buffer and when the buffer is full allocate a new one copy the contents and free the old buffer. When we get EOF or \n we return from the function.
I want to use this function to implement a version of the program tail. Input comes from stdin, output goes to stdout. If the first argument begins with -, everything after the - is the number of lines to print. The default number of lines to print is 10, when no argument is given.
I have thought until now that I should use the function:
int atoi (const char *s)
from stdlib.h and have an array of pointers to lines but I don't know exactly how to do this.
Any ideas?
Declare your main function as
int main (int argc, char**argv) {
}
If you compile your program to myprog executable, and invoke it as myprog -20 somefile anotherfile then you have:
argc == 4
&& strcmp(argv[0], "myprog") == 0
&& strcmp(argv[1], "-20") == 0
&& strcmp(argv[2], "somefile") == 0
&& strcmp(argv[3], "anotherfile") == 0
&& argv[4] == NULL
in other words, you might want to have your program containing
int nblines = 10;
int main(int argc, char**argv) {
int argix = 1;
if (argc>1) {
if (argv[1][0]=='-')
{
nblines = atoi(argv[1]+1);
argix = 2;
}
for (; argix < argc; argix++)
tail (argv[argix]);
}
return 0;
}
It is up to you to implement your void tail(char*filename); function appropriately. Don't forget to compile with all warnings & debugging info, e.g. with gcc -Wall -g on Linux. Use your debugger (gdb on Linux) to debug your program. Take into account that fopen can fail, and use errno to display an appropriate error message.
Notice that you don't need your fgetline function. The getline(3)
function is standard (in Posix 2008) and is dynamically allocating the line buffer.
I've been trying to get this code to work for hours! All I need to do is open a file to see if it is real and readable. I'm new to C so I'm sure there is something stupid I'm missing. Here is the code (shorthand, but copied):
#include <stdio.h>
main() {
char fpath[200];
char file = "/test/file.this";
sprintf(fpath,"~cs4352/projects/proj0%s",file);
FILE *fp = fopen(fpath,"r");
if(fp==NULL) {
printf("There is no file on the server");
exit(1);
}
fclose(fp);
//do more stuff
}
I have also verified that the path is correctly specifying a real file that I have read permissions to. Any other ideas?
Edit 1: I do know that the fpath ends up as "~cs4352/projects/proj0/test/file.this"
Edit 2: I have also tried the using the absolute file path. In both cases, I can verify that the paths are properly built via ls.
Edit 3: There errno is 2... I'm currently trying to track what that means in google.
Edit 4: Ok, errno of 2 is "There is no such file or directory". I am getting this when the reference path in fopen is "/home/courses1/cs4352/projects/proj0/index.html" which I verified does exist and I have read rights to it. As for the C code listed below, there may be a few semantic/newbie errors in it, but gcc does not give me any compile time warnings, and the code works exactly as it should except that it says that it keeps spitting errno of 2. In other words, I know that all the strings/char array are working properly, but the only thing that could be an issue is the fopen() call.
Solution: Ok, the access() procedure is what helped me the most (and what i am still using as it is less code, not to mention the more elegant way of doing it). The problem actually came from something that I didn't explain to you all (because I didn't see it until I used access()). To derrive the file, I was splitting strings using strtok() and was only splitting on " \n", but because this is a UNIX system, I needed to add "\r" to it as well. Once I fixed that, everything fell into place, and I'm sure that the fopen() function would work as well, but I have not tested it.
Thank you all for your helpful suggestions, and especially to Paul Beckingham for finding this wonderful solution.
Cheers!
The "~" is expanded by the shell, and is not expanded by fopen.
To test the existence and readability of a file, consider using the POSIX.1 "access" function:
#include <unistd.h>
if (access ("/path/to/file", F_OK | R_OK) == 0)
{
// file exists and is readable
}
First, file needs to be declared as char* or const char*, not simply char as you've written. But this might just be a typo, the compiler should at least give a warning there.
Secondly, use an absolute path (or a path relative to the current directory), not shell syntax with ~. The substitution of ~cs4352 by the respective home directory is usually done by the shell, but you are directly opening the file. So you are trying to open a file in a ~cs4352 subdirectory of your current working directory, which I guess is not what you want.
Other people have probably produced the equivalent (every modern shell, for example), but here's some code that will expand a filename with ~ or ~user notation.
#if __STDC_VERSION__ >= 199901L
#define _XOPEN_SOURCE 600
#else
#define _XOPEN_SOURCE 500
#endif
#include <assert.h>
#include <limits.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
char *relfname(const char *name, char *buffer, size_t bufsiz)
{
assert(name != 0 && buffer != 0 && bufsiz != 0);
if (name[0] != '~')
strncpy(buffer, name, bufsiz);
else
{
const char *copy;
struct passwd *usr = 0;
if (name[1] == '/' || name[1] == '\0')
{
usr = getpwuid(getuid());
copy = &name[1];
}
else
{
char username[PATH_MAX];
copy = strchr(name, '/');
if (copy == 0)
copy = name + strlen(name);
strncpy(username, &name[1], copy - &name[1]);
username[copy - &name[1]] = '\0';
usr = getpwnam(username);
}
if (usr == 0)
return(0);
snprintf(buffer, bufsiz, "%s%s", usr->pw_dir, copy);
}
buffer[bufsiz-1] = '\0';
return buffer;
}
#ifdef TEST
static struct { const char *name; int result; } files[] =
{
{ "/etc/passwd", 1 },
{ "~/.profile", 1 },
{ "~root/.profile", 1 },
{ "~nonexistent/.profile", 0 },
};
#define DIM(x) (sizeof(x)/sizeof(*(x)))
int main(void)
{
int i;
int fail = 0;
for (i = 0; i < DIM(files); i++)
{
char buffer[PATH_MAX];
char *name = relfname(files[i].name, buffer, sizeof(buffer));
if (name == 0 && files[i].result != 0)
{
fail++;
printf("!! FAIL !! %s\n", files[i].name);
}
else if (name != 0 && files[i].result == 0)
{
fail++;
printf("!! FAIL !! %s --> %s (unexpectedly)\n", files[i].name, name);
}
else if (name == 0)
printf("** PASS ** %s (no match)\n", files[i].name);
else
printf("** PASS ** %s -> %s\n", files[i].name, name);
}
return((fail == 0) ? EXIT_SUCCESS : EXIT_FAILURE);
}
#endif
You could try examining errno for more information on why you're not getting a valid FILE*.
BTW-- in unix the global value errno is set by some library and system calls when they need to return more information than just "it didn't work". It is only guaranteed to be good immediately after the relevant call.
char file = "/test/file.this";
You probably want
char *file = "/test/file.this";
Are you sure you do not mean
~/cs4352/projects/proj0%s"
for your home directory?
To sum up:
Use char *file=/test/file.this";
Don't expect fopen() to do shell substitution on ~ because it won't. Use the full path or use a relative path and make sure the current directory is approrpriate.
error 2 means the file wasn't found. It wasn't found because of item #2 on this list.
For extra credit, using sprintf() like this to write into a buffer that's allocated on the stack is a dangerous habit. Look up and use snprintf(), at the very least.
As someone else here mentioned, using access() would be a better way to do what you're attempting here.