Related
I need to get proper Polish characters "ąężźćśół". I used some solutions like setlocale, system chcp, wchar_t. Everything goes well as long as I don't use files/lists. wscanf, wprintf and wchar_t works perfectly.
But if I'm trying to read something from a file and save that into a list (even in array), then trying to put that to the screen, I can't get proper Polish characters, and in case of the lists, I'm getting different results from time to time for example, z` , A2 , like random characters from nowhere. I've been trying to get good results by using fscanf and fgets with w(wide) variations, but it doesn't work. Did I something wrong?
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <locale.h>
struct dyk{
wchar_t line[200];
struct dyk *next;
};
typedef struct dyk dyk;
void printdyk(char name[100]){
dyk *wyp;
wyp = malloc(sizeof(dyk));
wchar_t yt[100];
FILE *dyktando;
dyktando = fopen(name, "r+");
if(dyktando == NULL){
wprintf(L"Błąd otwarcia pliku!\n"); //Can't open file
}else{
fgets(&wyp->line, sizeof(dyk), dyktando); //reading from file and send to the list
wprintf(L"%s\n", wyp->line); //write text from the list on the screen
wchar_t yt[100];
wscanf(L"%s", &yt); //testing strings comparing, so I have to put some variables
int n=strcmp(yt, wyp->line); //str compare
printf("%d", n); //result, it gives me -1 every time
}
fclose(dyktando);
}
I tested function with txt file that contents only one character "ż". Can't read from file properly. At the start of main function I put these 2 lines:
system("chcp 852");
setlocale(LC_ALL, ".852");
I'm using codeblock, mingw32-gcc compiler, and no flags.
You are not using wchar_t compatible functions everywhere in your code. In particular:
fgets(&wyp->line, sizeof(dyk), dyktando); //reading from file and send to the list
The wchar_t compatible version is fgetws. Also, wyp->line (without the & operator) is the correct argument.
int n=strcmp(yt, wyp->line); //str compare
wcscmp should be used instead.
Also note that sizeof on a wchar_t array is not correct when a function expects length in characters rather than bytes (like fgetws does).
A comment OP (Amatheon) made indicates that the true underlying problem is how to properly read files using wide-character functions.
To ensure maximum compatibility and portability, let's restrict to C99. Consider the following example program:
#include <stdlib.h>
#include <locale.h>
#include <string.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>
#include <errno.h>
#ifdef USE_ERRNO_CONSTANTS
#define SET_ERRNO(value) (errno = (value))
#else
#define SET_ERRNO(value)
#endif
ssize_t get_wide_delimited(wchar_t **lineptr, size_t *sizeptr, wint_t delim, FILE *stream)
{
wchar_t *line = NULL;
size_t size = 0;
size_t used = 0;
wint_t wc;
if (!lineptr || !sizeptr || !stream) {
/* Invalid function parameters. NULL pointers are not allowed. */
SET_ERRNO(EINVAL);
return -1;
}
if (ferror(stream)) {
/* Stream is already in error state. */
SET_ERRNO(EIO);
return -1;
}
if (*sizeptr > 0) {
line = *lineptr;
size = *sizeptr;
} else {
*lineptr = NULL;
}
while (1) {
wc = fgetwc(stream);
if (wc == WEOF || wc == delim)
break;
if (used + 1 > size) {
/* Growth policy. We wish to allocate a chunk of memory at once,
so we don't need to do realloc() too often as it is a bit slow,
relatively speaking. On the other hand, we don't want to do
too large allocations, because that would waste memory.
Anything that makes 'size' larger than 'used' will work.
*/
if (used < 254)
size = 256;
else
if (used < 65536)
size = 2 * used;
else
size = (used | 65535) + 65521;
line = realloc(line, size * sizeof (wchar_t));
if (!line) {
/* Out of memory. */
SET_ERRNO(ENOMEM);
return -1;
}
*lineptr = line;
*sizeptr = size;
}
line[used++] = wc;
}
if (wc == WEOF) {
/* Verify that the WEOF did not indicate a read error. */
if (ferror(stream)) {
/* Read error. */
SET_ERRNO(EIO);
return -1;
}
}
/* Ensure there is enough room for the delimiter and end-of-string mark. */
if (used + 2 > size) {
/* We could reuse the reallocation policy here,
with the exception that the minimum is used + 2, not used + 1.
For simplicity, we use the minimum reallocation instead.
*/
size = used + 2;
line = realloc(line, size * sizeof (wchar_t));
if (!line) {
/* Out of memory. */
SET_ERRNO(ENOMEM);
return -1;
}
*lineptr = line;
*sizeptr = size;
}
/* Append the delimiter, unless end-of-stream mark. */
if (wc != WEOF)
line[used++] = wc;
/* Append the end-of-string nul wide char,
but do not include it in the returned length. */
line[used] = L'\0';
/* Success! */
return (ssize_t)used;
}
ssize_t get_wide_line(wchar_t **lineptr, size_t *sizeptr, FILE *stream)
{
return get_wide_delimited(lineptr, sizeptr, L'\n', stream);
}
int main(int argc, char *argv[])
{
wchar_t *line = NULL, *p;
size_t size = 0;
unsigned long linenum;
FILE *in;
int arg;
if (!setlocale(LC_ALL, ""))
fprintf(stderr, "Warning: Your C library does not support your current locale.\n");
if (fwide(stdout, 1) < 1)
fprintf(stderr, "Warning: Your C library does not support wide standard output.\n");
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s FILENAME [ FILENAME ... ]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will output the named files, using wide I/O.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
for (arg = 1; arg < argc; arg++) {
in = fopen(argv[arg], "r");
if (!in) {
fprintf(stderr, "%s: %s.\n", argv[arg], strerror(errno));
return EXIT_FAILURE;
}
if (fwide(in, 1) < 1) {
fprintf(stderr, "%s: Wide input is not supported from this file.\n", argv[arg]);
fclose(in);
return EXIT_FAILURE;
}
linenum = 0;
while (get_wide_line(&line, &size, in) > 0) {
linenum++;
/* We use another pointer to the line for simplicity.
We must not modify 'line' (except via 'free(line); line=NULL; size=0;'
or a similar reallocation), because it points to dynamically allocated buffer. */
p = line;
/* Remove leading whitespace. */
while (iswspace(*p))
p++;
/* Trim off the line at the first occurrence of newline or carriage return.
(The line will also end at the first embedded nul wide character, L'\0',
if the file contains any.) */
p[wcscspn(p, L"\r\n")] = L'\0';
wprintf(L"%s: Line %lu: '%ls', %zu characters.\n", argv[arg], linenum, p, wcslen(p));
}
if (ferror(in)) {
fprintf(stderr, "%s: Read error.\n", argv[arg]);
fclose(in);
return EXIT_FAILURE;
}
if (fclose(in)) {
fprintf(stderr, "%s: Delayed read error.\n", argv[arg]);
return EXIT_FAILURE;
}
wprintf(L"%s: Total %lu lines read.\n", argv[arg], linenum);
fflush(stdout);
}
free(line);
line = NULL;
size = 0;
return EXIT_SUCCESS;
}
Because the EINVAL, EIO, and ENOMEM errno constants are not defined in the C standards, the get_wide_line() and get_wide_delimited() only set errno if you define the USE_ERRNO_CONSTANTS preprocessor value.
The get_wide_line() and get_wide_delimited() are reimplementations of the getwline() and getwdelim() functions from ISO/IEC TR 24731-2:2010; the wide-character equivalents of the POSIX.1 getline() and getdelim() functions. Unlike fgets() or fgetws(), these use a dynamically allocated buffer to hold the line, so there is no fixed line length limits, other than available memory.
I've explicitly marked the code to be under Creative Commons Zero license: No Rights Reserved. It means you can use it in your own code, under whatever license you want.
Note: I would really love users to push their vendors and C standard committee members to get these included in the bog-standard C library part in the next version of the C standard. As you can see from above, they can be implemented in standard C already; it is just that the C library itself can do the same much more efficiently. The GNU C library is a perfect example of that (although even they are stalling with the implementation, because lack of standardization). Just think how many buffer overflow bugs would be avoided if people used getline()/getdelim()/getwline()/getwdelim() instead of fgets()/fgetws()! And avoid having to think about what the maximum reasonable line length in each instance would be to, too. Win-win!
(In fact, we could switch the return type to size_t, and use 0 instead of -1 as the error indicator. That would limit the changes to the text of the C standard to the addition of the four functions. It saddens and irritates me to no end, to have such a significant group of trivial functions so callously and ignorantly overlooked, for no sensible reason. Please, bug your vendors and any C standards committee members you have access to about this, as incessantly and relentlessly as you can manage. Both you and they deserve it.)
The essential parts of the program are
if (!setlocale(LC_ALL, ""))
This tells the C library to use the locale the user has specified.
Please, do not hardcode the locale value into your programs. In most operating systems, all you need to do is to change the LANG or LC_ALL environment variable to the locale you want to use, before running your program.
You might think that "well, I can hardcode it this time, because this is the locale used for this data", but even that can be a mistake, because new locales can be created at any time. This is particularly annoying when the character set part is hardcoded. For example, the ISO 8859 single-byte character set used in Western Europe is ISO 8859-15, not ISO 8859-1, because ISO 8859-15 has the € character in it, whereas ISO 8859-1 does not. If you have hardcoded ISO 8859-1 in your program, then it cannot correctly handle the € character at all.
if (fwide(stream, 1) < 1) for both stdout and file handles
While the C library does internally do an equivalent of the fwide() call based on which type of I/O function you use on the file handle the very first time, the explicit check is much better.
In particular, if the C library cannot support wide I/O to the file or stream represented by the handle, fwide() will return negative. (Unless the second parameter is also zero, it should never return zero; because of the issues in standardization, I recommend a strict return value check approach in this case, to catch vendors who decide to try to make life as difficult as possible for programmers trying to write portable code while technically still fulfilling the standard text, like Microsoft is doing. They even stuffed the C standard committee with their own representatives, so they could tweak C11 away from C99 features they didn't want to support, plus get a stamp of approval of their own nonstandard extensions nobody used before, to help create barriers for developers writing portable C code. Yeah, I don't trust their behaviour at all.)
ssize_t len = get_wide_line(&line, &size, handle);
If you initialize wchar_t *line = NULL; and size_t size = 0; prior to first call to get_wide_line() or get_wide_delimited(), the function will dynamically resize the buffer as needed.
The return value is negative if and only if an error occurs. (The functions should never return zero.)
When a line is read successfully, the return value reflects the number of wide characters in the buffer, including the delimiter (newline, L'\n' for get_wide_delimited()), and is always positive (greater than zero). The contents in the buffer will have a terminating end-of-wide-string character, L'\0', but it is not counted in the return value.
Note that when the delimiter is not L'\0', the buffer may contain embedded wide nul characters, L'\0'. In that case, len > wcslen(line).
The above example programs skips any leading whitespace on each input line, and trims off the line at the first linefeed (L'\n'), carriage return (L'\r'), or nul (L'\0'). Because of this, the return value len is only checked for success (a positive return value greater than zero).
free(line); line = NULL; size = 0;
It is okay to discard the line at any point its contents are no longer needed. I recommend explicitly setting the line pointer to NULL, and the size to zero, to avoid use-after-free bugs. Furthermore, that allows any following get_wide_line() or get_wide_delimited() to correctly dynamically allocate a new buffer.
ferror(handle) after a wide input function fails
Just like with narrow streams and EOF, there are two cases why wide input functions might return WEOF (or return -1, depending on the function): because there is no more input, or because a read error occurred.
There is no reason whatsoever to write computer programs that ignore read or write errors, without reporting them to the user. Sure, they are rare, but not so rare that a programmer can sanely expect them to never occur. (In fact, with Flash memory on flimsy circuits stored in weak plastic housings and subjected to human-sized stresses (I've sat on mine time and time again), the errors aren't that rare.) It is just evil, rather similar to food preparers being too lazy to wash their hands, causing fecal bacteria outbreaks every now and then. Don't be a fecal bacteria spreader equivalent programmer.
Let's say you have a harebrained lecturer who does not allow you to use the above get_wide_line() or get_wide_delimited() functions.
Don't worry. We can implement the same program using fgetws(), if we restrict line to some fixed upper limit (of wide characters). Lines longer than that will read as two or more lines instead:
#include <stdlib.h>
#include <locale.h>
#include <string.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>
#include <errno.h>
#ifndef MAX_WIDE_LINE_LEN
#define MAX_WIDE_LINE_LEN 1023
#endif
int main(int argc, char *argv[])
{
wchar_t line[MAX_WIDE_LINE_LEN + 1], *p;
unsigned long linenum;
FILE *in;
int arg;
if (!setlocale(LC_ALL, ""))
fprintf(stderr, "Warning: Your C library does not support your current locale.\n");
if (fwide(stdout, 1) < 1)
fprintf(stderr, "Warning: Your C library does not support wide standard output.\n");
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s FILENAME [ FILENAME ... ]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program will output the named files, using wide I/O.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
for (arg = 1; arg < argc; arg++) {
in = fopen(argv[arg], "r");
if (!in) {
fprintf(stderr, "%s: %s.\n", argv[arg], strerror(errno));
return EXIT_FAILURE;
}
if (fwide(in, 1) < 1) {
fprintf(stderr, "%s: Wide input is not supported from this file.\n", argv[arg]);
fclose(in);
return EXIT_FAILURE;
}
linenum = 0;
while (1) {
/* If line is an array, (sizeof line / sizeof line[0]) evaluates to
the number of elements in it. This does not work if line is a pointer
to dynamically allocated memory. In that case, you need to remember
number of wide characters you allocated for in a separate variable,
and use that variable here instead. */
p = fgetws(line, sizeof line / sizeof line[0], in);
if (!p)
break;
/* Have a new line. */
linenum++;
/* Remove leading whitespace. */
while (iswspace(*p))
p++;
/* Trim off the line at the first occurrence of newline or carriage return.
(The line will also end at the first embedded nul wide character, L'\0',
if the file contains any.) */
p[wcscspn(p, L"\r\n")] = L'\0';
wprintf(L"%s: Line %lu: '%ls', %zu characters.\n", argv[arg], linenum, p, wcslen(p));
}
if (ferror(in)) {
fprintf(stderr, "%s: Read error.\n", argv[arg]);
fclose(in);
return EXIT_FAILURE;
}
if (fclose(in)) {
fprintf(stderr, "%s: Delayed read error.\n", argv[arg]);
return EXIT_FAILURE;
}
wprintf(L"%s: Total %lu lines read.\n", argv[arg], linenum);
fflush(stdout);
}
return EXIT_SUCCESS;
}
Aside from the function used to read each line, the difference is that instead of keeping the while loop condition as while ((p = fgetws(line, ...))) { ... }, I changed to the while (1) { p = fgetws(line, ...); if (!p) break; ... form that I believe is more readable.
I did deliberately show the longer, more complicated-looking one first, and this simpler one last, in the hopes that you would see that the more complicated-looking one actually has the simpler main() -- if we don't just count lines of code or something equally silly, but look at how many opportunities for mistakes there are.
As OP themselves wrote in a comment, the size of the buffer passed to fgets() or fgetws() is a real issue. There are rules of thumb, but they all suffer from being fragile against edits (especially the differences between arrays and pointers). With getline()/getdelim()/getwline()/getwdelim()/get_wide_line()/get_wide_delimited(), the rule of thumb is wchar_t *line = NULL; size_t size = 0; ssize_t len; and len = get_wide_line(&line, &size, handle);. No variations, and simple to remember and use. Plus it gets rid of any fixed limitations.
I'm quite new to C. I faced a problem while studying the last chapter of K&R.
I'm trying to implement fopen() and fillbuf() function by using system calls, open and read.
I exactly copied the source code from the book but repeatedly get segmentation error after I compile.
fp->fd = fd;
fp->cnt = 0;
fp->base = NULL;
fp->flag = (*mode=='r')? _READ : _WRITE;
Why does error occur? Here is my complete code.
#include<fcntl.h>
#include<unistd.h>
#include<stdlib.h>
#define PERM 0644
#define EOF (-1)
#define BUFSIZE 1024
#define OPEN_MAX 20
typedef struct _iobuf{
int cnt;
char *ptr;
char *base;
int flag;
int fd;
} myFILE;
enum _flags {
_READ = 01,
_WRITE = 02,
_UNBUF = 04,
_EOF = 010,
_ERR = 020
};
myFILE _iob[OPEN_MAX]={
{0, (char *) 0, (char *) 0, _READ, 0 },
{0, (char *) 0, (char *) 0, _WRITE, 1 },
{0, (char *) 0, (char *) 0, _WRITE | _UNBUF, 2 }
};
#define stdin (&_iob[0])
#define stdout (&_iob[1])
#define stderr (&_iob[2])
#define getc(p) ( --(p)->cnt>=0 ? (unsigned char) *(p)->ptr++ : _fillbuf(p) )
int _fillbuf(myFILE *fp)
{
int bufsize;
if((fp->flag & (_READ|_EOF|_ERR))!=_READ)
return EOF;
bufsize=(fp->flag & _UNBUF)? 1 : BUFSIZE;
if(fp->base==NULL)
if((fp->base=(char *)malloc(bufsize))==NULL)
return EOF;
fp->ptr=fp->base;
fp->cnt=read(fp->fd, fp->ptr, bufsize);
if(--fp->cnt<0){
if(fp->cnt == -1)
fp->flag |= _EOF;
else
fp->flag |= _ERR;
return EOF;
}
return (unsigned char) *fp->ptr++;
}
myFILE *myfopen(char *name, char *mode)
{
int fd;
myFILE *fp;
if(*mode!='r' && *mode!='w' && *mode!='a')
return NULL;
for(fp=_iob; fp<_iob+OPEN_MAX; fp++)
if((fp->flag & (_READ | _WRITE))==0)
break;
if(fp>=_iob+OPEN_MAX)
return NULL;
if(*mode=='w')
fd=creat(name, PERM);
else if(*mode=='a'){
if((fd=open(name, O_WRONLY, 0))==-1)
fd=creat(name, PERM);
lseek(fd, 0L, 2);
} else
fd=open(name, O_RDONLY, 0);
if(fd==-1)
return NULL;
fp->fd = fd;
fp->cnt = 0;
fp->base = NULL;
fp->flag = (*mode=='r')? _READ : _WRITE;
return fp;
}
int main(int argc, char *argv[])
{
myFILE *fp;
int c;
if((fp=myfopen(argv[1], "r"))!=NULL)
write(1, "opened\n", sizeof("opened\n"));
while((c=getc(fp))!=EOF)
write(1, &c, sizeof(c));
return 0;
}
EDIT: Please see Jonathan Leffler's answer. It is more accurate and provides a better diagnosis. My answer works, but there is a better way to do things.
I see the problem.
myFILE *fp;
if(*mode!='r' && *mode!='w' && *mode!='a')
return NULL;
for(fp=_iob; fp<_iob+OPEN_MAX; fp++)
if((fp->flag & (_READ | _WRITE))==0) // marked line
break;
When you reach the marked line, you try to dereference the fp pointer. Since it is (likely, but not certainly) initialized to zero (but I should say NULL), you are dereferencing a null pointer. Boom. Segfault.
Here's what you need to change.
myFILE *fp = (myFILE *)malloc(sizeof(myFILE));
Be sure to #include <malloc.h> to use malloc.
Also your close function should later free() your myFILE to prevent memory leaks.
A different analysis of the code in the question
The code shown in the question consists of parts, but not all, of the code from K&R "The C Programming Language, 2nd Edition" (1988; my copy is marked 'Based on Draft Proposed ANSI C'), pages 176-178, plus a sample main program that is not from the book at all. The name of the type was changed from FILE to myFILE too, and fopen() was renamed to myfopen(). I note that the expressions in the code in the question have many fewer spaces than the original code in K&R. The compiler doesn't mind; human readers generally prefer spaces around operators.
As stated in another (later) question and answer, the diagnosis given by Mark Yisri in the currently accepted answer is incorrect — the problem is not a null pointer in the for loop. The prescribed remedy works (as long as the program is invoked correctly), but the memory allocation is not necessary. Fortunately for all concerned, the fclose() function was not included in the implementations, so it wasn't possible to close a file once it was opened.
In particular, the loop:
for (fp = _iob; fp < _iob + OPEN_MAX; fp++)
if ((fp->flag & (_READ | _WRITE)) == 0)
break;
is perfectly OK because the array _iob is defined as:
FILE _iob[OPEN_MAX] = {
…initializers for stdin, stdout, stderr…
};
This is an array of structures, not structure pointers. The first three elements are initialized explicitly; the remaining elements are implicitly initialized to all zeros. Consequently, there is no chance of there being a null pointer in fp as it steps through the array. The loop might also be written as:
for (fp = &_iob[0]; fp < &_iob[OPEN_MAX]; fp++)
if ((fp->flag & (_READ | _WRITE)) == 0)
break;
Empirically, if the code shown in the question (including the main(), which was not — repeat not — written by K&R) is invoked correctly, it works without crashing. However, the code in the main() program does not protect itself from:
Being invoked without a non-null argv[1].
Being invoked with a non-existent or non-readable file name in argv[1].
These are very common problems, and with the main program as written, either could cause the program to crash.
Although it is hard to be sure 16 months later, it seems likely to me that the problem was in the way that the program was invoked rather than anything else. If the main program is written more-or-less appropriately, you end up with code similar to this (you also need to add #include <string.h> to the list of included headers):
int main(int argc, char *argv[])
{
myFILE *fp;
int c;
if (argc != 2)
{
static const char usage[] = "Usage: mystdio filename\n";
write(2, usage, sizeof(usage) - 1);
return 1;
}
if ((fp = myfopen(argv[1], "r")) == NULL)
{
static const char filenotopened[] = "mystdio: failed to open file ";
write(2, filenotopened, sizeof(filenotopened) - 1);
write(2, argv[1], strlen(argv[1]));
write(2, "\n", 1);
return 1;
}
write(1, "opened\n", sizeof("opened\n"));
while ((c = getc(fp)) != EOF)
write(1, &c, sizeof(c));
return 0;
}
This can't use fprintf() etc because the surrogate implementation of the standard I/O library is not complete. Writing the errors direct to file descriptor 2 (standard error) with write() is fiddly, if not painful. It also means that I've taken shortcuts like assuming that the program is called mystdio rather than actually using argv[0] in the error messages. However, if it is invoked without any file name (or if more than one file name is given), or if the named file cannot be opened for reading, then it produces a more or less appropriate error message — and does not crash.
Leading underscores
Note that the C standard reserves identifiers starting with underscores.
You should not create function, variable or macro names that start with an underscore, in general. C11 §7.1.3 Reserved identifiers says (in part):
All identifiers that begin with an underscore and either an uppercase letter or another underscore are always reserved for any use.
All identifiers that begin with an underscore are always reserved for use as identifiers with file scope in both the ordinary and tag name spaces.
See also What does double underscore (__const) mean in C?
In fairness, K&R were essentially describing the standard implementation of the standard I/O library at the time when the 1st Edition was written (1978), modernized sufficiently to be using function prototype notation in the 2nd Edition. The original code was on pages 165-168 of the 1st Edition.
Even today, if you are implementing the standard library, you would use names starting with underscores precisely because they are reserved for use 'by the implementation'. If you are not implementing the standard library, you do not use names starting with underscores because that uses the namespace reserved for the implementation. Most people, most of the time, are not writing the standard library — most people should not be using leading underscores.
Basically what I want to do is have a program with int main(argc, *argv[]) and instead of writing chars into command line, I want to have my program read those words from a file. How could I accomplish this? Is there a special command in Linux for that?
You can use standard redirect operations in a *nix shell to pass files as input:
./myprogram < inputfile.txt
This statement executes your program (myprogram) and pumps the data inside of inputfile.txt to your program
You can also redirect the output of program to a file in a similar fashion:
./myprogram > outputfile.txt
Instead of doing
for(int i = 1; i < argc; i++)
{
insert(&trie, argv[i]);
}
you could doing something like
FILE *input;
char *line;
....
while (fscanf(input, "%ms", &line) != EOF) {
insert(&trie, line);
/* If you make a copy of line in `insert()`, you should
* free `line` at here; if you do not, free it later. */
free(line);
}
Use redirection
yourprogram < youtextfile
will offer the content of yourtextfile as standard input (stdin) to yourprogram. Likewise
yourprogram > yourothertextfile
will send everything the program writes to standard output (stdout) to yourothertextfile
You'll notice when reading man pages that most system calls have a version that works directly with stdin or stdout
For example consider the printf family:
printf ("hello world\n");
is a shorter version of
fprintf (stdout,"hello world\n");
and the same goes for scanf and stdin.
This is only the most basic usage of redirection, which in my opinion is one of the key aspects of "the unix way of doing things". As such, you'll find lots of articles and tutorials that show examples that are a lot more advanced than what I wrote here. Have a look at this Linux Documentation Project page on redirection to get started.
EDIT: getting fed input via redirection ior interactively "looks" the same to the program, so it will react the same to redirected input as it does to console input. This means that if your program expects data line-wise (eg because it uses gets() to read lines), the input text file should be organized in lines.
By default, every program you execute on POSIX-compliant systems has three file descriptors open (see <unistd.h> for the macros' definition): the standard input (STDOUT_FILENO), the standard output (STDOUT_FILENO), and the error output (STDERR_FILENO), which is tied to the console.
Since you said you want read lines, I believe the ssize_t getline(char **lineptr, size_t *n, FILE *stream) function can do the job. It takes a stream (FILE pointer) as a third argument, so you must either use fopen(3) to open a file, or a combination of open(2) and fdopen(3).
Getting inspiration from man 3 getline, here is a program demonstrating what you want:
#define _GNU_SOURCE
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *fp;
size_t len;
char *line;
ssize_t bytes_read;
len = 0;
line = NULL;
if (argc > 1)
{
fp = fopen(argv[1], "r");
if (fp == NULL)
{
perror(*argv);
exit(EXIT_FAILURE);
}
}
else
fp = stdin;
while ((bytes_read = getline(&line, &len, fp)) != -1)
printf("[%2zi] %s", bytes_read, line);
free(line);
exit(EXIT_SUCCESS);
}
Without arguments, this program reads lines from the standard input: you can either feed it lines like echo "This is a line of 31 characters" | ./a.out or execute it directly and write your input from there (finish with ^D).
With a file as an argument, it will output every line from the file, and then exit.
You can have your executable read its arguments on the command line and use xargs, the special Linux command for passing the contents of a file to a command as arguments.
An alternative to xargs is parallel.
There are a bunch of ways describing how to use various methods to print out lines of a text file on this site:
Posix-style,
reading IP addresses,
Fixed line length.
They all seem to be tailored to a specific example.
It would be great to have the Clearest and Most Concise and Easiest way to simply: print each line of any text file to the screen. Preferably with detailed explanations of what each line does.
Points for brevity and clarity.
#include <stdio.h>
static void cat(FILE *fp)
{
char buffer[4096];
size_t nbytes;
while ((nbytes = fread(buffer, sizeof(char), sizeof(buffer), fp)) != 0)
fwrite(buffer, sizeof(char), nbytes, stdout);
}
int main(int argc, char **argv)
{
FILE *fp;
const char *file;
while ((file = *++argv) != 0)
{
if ((fp = fopen(file, "r")) != 0)
{
cat(fp);
fclose(fp);
}
}
return(0);
}
The cat() function is not strictly necessary, but I'd rather use it. The main program steps through each command line argument and opens the named file. If it succeeds, it calls the cat() function to print its contents. Since the call to fopen() does not specify "rb", it is opened as a text file. If the file is not opened, this code silently ignores the issue. If no files are specified, nothing is printed at all.
The cat() function simply reads blocks of text up to 4096 bytes at a time, and writes them to standard output ('the screen'). It stops when there's no more to read.
If you want to extend the code to read standard input when no file is specified, then you can use:
if (argc == 1)
cat(stdin);
else
{
...while loop as now...
}
which is one of the reasons for having the cat() function written as shown.
This code does not pay direct attention to newlines — or lines of any sort. If you want to process it formally one line at a time, then you can do several things:
static void cat(FILE *fp)
{
char buffer[4096];
while (fgets(buffer, sizeof(buffer), fp) != 0)
fputs(buffer, stdout);
}
This will read and write one line at a time. If any line is longer than 4095 bytes, it will read the line in two or more operations and write it in the same number of operations. Note that this assumes a text file in a way that the version using fread() and fwrite() does not. On POSIX systems, the version with fread() and fwrite() will handle arbitrary binary files with null bytes ('\0') in the data, but the version using fgets() and fputs() will not. Both the versions so far are strictly standard C (any version of the standard) as they don't use any platform-specific extensions; they are about as portable as code can be.
Alternatively again, if you have the POSIX 2008 getline() function, you can use that, but you need #include <stdlib.h> too (because you end up having to release the memory it allocates):
static void cat(FILE *fp)
{
char *buffer = 0;
size_t buflen = 0;
while (getline(&buffer, &buflen, fp) != -1)
fputs(buffer, stdout);
free(buffer);
}
This version, too, will not handle binary data (meaning data with null bytes in it). It could be upgraded to do so, of course:
static void cat(FILE *fp)
{
char *buffer = 0;
size_t buflen = 0;
ssize_t nbytes;
while ((nbytes = getline(&buffer, &buflen, fp)) != -1)
fwrite(buffer, sizeof(char), nbytes, stdout);
free(buffer);
}
The getline() function reports how many bytes it read (there's a null byte after that), but the fwrite() function is the only one that takes a stream of arbitrary bytes and writes them all to the given stream.
Well, here is a very short solution I eventually made. I imagine there is somethign fundamentally wrong with it otherwise it would have been suggested, but I figured I would post it here and hope someone tears it apart:
#include <stdio.h>
main()
{
FILE *MyFile;
int c;
MyFile=fopen("C:\YourFile.txt","r");
c = fgetc(MyFile);
while (c!=EOF)
{
printf("%c",c);
c = fgetc(MyFile);
}
}
#Dlinet, you are trying to learn some useful lessons on how to organize a program. I won't post code because there is already a really excellent answer; I cannot possibly improve upon it. But I would like to recommend a book to you.
The book is called Software Tools in Pascal. The language is Pascal, not C, but for reading the book this will cause no serious hardship. They start out implementing simple tools like the one in this example (which on UNIX is called cat) and they move on to more advanced stuff. Not only do they teach great lessons on how to organize this sort of program, they also cover language design issues. (There are problems in Pascal that really vex them, and if you know C you will realize that C doesn't have those problems.)
The book is out of print now, but I found it to be hugely valuable when I was learning to write code. The so-called "left corner design" methodology serves me well to this day.
I encourage you to find a used copy on Amazon or wherever. Amazon has used copies starting at $0.02 plus $4 shipping.
http://www.amazon.com/Software-Tools-Pascal-Brian-Kernighan/dp/0201103427
It would be an educational exercise to study the programs in this book and implement them in C. Any Linux system already has more-powerful and fully-debugged versions of these programs, but it would not be a waste of your time to work through this book and learn how to write this stuff.
Alternatively you could install FreePascal on your computer and use it to run the programs from the book.
Good luck and may you always enjoy software development!
If you want something prebaked, there's cat on POSIX systems.
If you want to write it yourself, here's the basic layout:
Check to make sure file name, permissions, and path are valid
Read til newline separator in a loop (\n on Unix, \r\n on Windows/DOS)
Check for error. If so, print error an abort.
Print line to screen.
Repeat
The point is, there isn't really a specific way to do it. Just read, then write, and repeat. With some error checking, you've got cat all over again.
This looks like a simple question, but I didn't find anything similar here.
Since there is no file copy function in C, we have to implement file copying ourselves, but I don't like reinventing the wheel even for trivial stuff like that, so I'd like to ask the cloud:
What code would you recommend for file copying using fopen()/fread()/fwrite()?
What code would you recommend for file copying using open()/read()/write()?
This code should be portable (windows/mac/linux/bsd/qnx/younameit), stable, time tested, fast, memory efficient and etc. Getting into specific system's internals to squeeze some more performance is welcomed (like getting filesystem cluster size).
This seems like a trivial question but, for example, source code for CP command isn't 10 lines of C code.
This is the function I use when I need to copy from one file to another - with test harness:
/*
#(#)File: $RCSfile: fcopy.c,v $
#(#)Version: $Revision: 1.11 $
#(#)Last changed: $Date: 2008/02/11 07:28:06 $
#(#)Purpose: Copy the rest of file1 to file2
#(#)Author: J Leffler
#(#)Modified: 1991,1997,2000,2003,2005,2008
*/
/*TABSTOP=4*/
#include "jlss.h"
#include "stderr.h"
#ifndef lint
/* Prevent over-aggressive optimizers from eliminating ID string */
const char jlss_id_fcopy_c[] = "#(#)$Id: fcopy.c,v 1.11 2008/02/11 07:28:06 jleffler Exp $";
#endif /* lint */
void fcopy(FILE *f1, FILE *f2)
{
char buffer[BUFSIZ];
size_t n;
while ((n = fread(buffer, sizeof(char), sizeof(buffer), f1)) > 0)
{
if (fwrite(buffer, sizeof(char), n, f2) != n)
err_syserr("write failed\n");
}
}
#ifdef TEST
int main(int argc, char **argv)
{
FILE *fp1;
FILE *fp2;
err_setarg0(argv[0]);
if (argc != 3)
err_usage("from to");
if ((fp1 = fopen(argv[1], "rb")) == 0)
err_syserr("cannot open file %s for reading\n", argv[1]);
if ((fp2 = fopen(argv[2], "wb")) == 0)
err_syserr("cannot open file %s for writing\n", argv[2]);
fcopy(fp1, fp2);
return(0);
}
#endif /* TEST */
Clearly, this version uses file pointers from standard I/O and not file descriptors, but it is reasonably efficient and about as portable as it can be.
Well, except the error function - that's peculiar to me. As long as you handle errors cleanly, you should be OK. The "jlss.h" header declares fcopy(); the "stderr.h" header declares err_syserr() amongst many other similar error reporting functions. A simple version of the function follows - the real one adds the program name and does some other stuff.
#include "stderr.h"
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
void err_syserr(const char *fmt, ...)
{
int errnum = errno;
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
if (errnum != 0)
fprintf(stderr, "(%d: %s)\n", errnum, strerror(errnum));
exit(1);
}
The code above may be treated as having a modern BSD license or GPL v3 at your choice.
As far as the actual I/O goes, the code I've written a million times in various guises for copying data from one stream to another goes something like this. It returns 0 on success, or -1 with errno set on error (in which case any number of bytes might have been copied).
Note that for copying regular files, you can skip the EAGAIN stuff, since regular files are always blocking I/O. But inevitably if you write this code, someone will use it on other types of file descriptors, so consider it a freebie.
There's a file-specific optimisation that GNU cp does, which I haven't bothered with here, that for long blocks of 0 bytes instead of writing you just extend the output file by seeking off the end.
void block(int fd, int event) {
pollfd topoll;
topoll.fd = fd;
topoll.events = event;
poll(&topoll, 1, -1);
// no need to check errors - if the stream is bust then the
// next read/write will tell us
}
int copy_data_buffer(int fdin, int fdout, void *buf, size_t bufsize) {
for(;;) {
void *pos;
// read data to buffer
ssize_t bytestowrite = read(fdin, buf, bufsize);
if (bytestowrite == 0) break; // end of input
if (bytestowrite == -1) {
if (errno == EINTR) continue; // signal handled
if (errno == EAGAIN) {
block(fdin, POLLIN);
continue;
}
return -1; // error
}
// write data from buffer
pos = buf;
while (bytestowrite > 0) {
ssize_t bytes_written = write(fdout, pos, bytestowrite);
if (bytes_written == -1) {
if (errno == EINTR) continue; // signal handled
if (errno == EAGAIN) {
block(fdout, POLLOUT);
continue;
}
return -1; // error
}
bytestowrite -= bytes_written;
pos += bytes_written;
}
}
return 0; // success
}
// Default value. I think it will get close to maximum speed on most
// systems, short of using mmap etc. But porters / integrators
// might want to set it smaller, if the system is very memory
// constrained and they don't want this routine to starve
// concurrent ops of memory. And they might want to set it larger
// if I'm completely wrong and larger buffers improve performance.
// It's worth trying several MB at least once, although with huge
// allocations you have to watch for the linux
// "crash on access instead of returning 0" behaviour for failed malloc.
#ifndef FILECOPY_BUFFER_SIZE
#define FILECOPY_BUFFER_SIZE (64*1024)
#endif
int copy_data(int fdin, int fdout) {
// optional exercise for reader: take the file size as a parameter,
// and don't use a buffer any bigger than that. This prevents
// memory-hogging if FILECOPY_BUFFER_SIZE is very large and the file
// is small.
for (size_t bufsize = FILECOPY_BUFFER_SIZE; bufsize >= 256; bufsize /= 2) {
void *buffer = malloc(bufsize);
if (buffer != NULL) {
int result = copy_data_buffer(fdin, fdout, buffer, bufsize);
free(buffer);
return result;
}
}
// could use a stack buffer here instead of failing, if desired.
// 128 bytes ought to fit on any stack worth having, but again
// this could be made configurable.
return -1; // errno is ENOMEM
}
To open the input file:
int fdin = open(infile, O_RDONLY|O_BINARY, 0);
if (fdin == -1) return -1;
Opening the output file is tricksy. As a basis, you want:
int fdout = open(outfile, O_WRONLY|O_BINARY|O_CREAT|O_TRUNC, 0x1ff);
if (fdout == -1) {
close(fdin);
return -1;
}
But there are confounding factors:
you need to special-case when the files are the same, and I can't remember how to do that portably.
if the output filename is a directory, you might want to copy the file into the directory.
if the output file already exists (open with O_EXCL to determine this and check for EEXIST on error), you might want to do something different, as cp -i does.
you might want the permissions of the output file to reflect those of the input file.
you might want other platform-specific meta-data to be copied.
you may or may not wish to unlink the output file on error.
Obviously the answers to all these questions could be "do the same as cp". In which case the answer to the original question is "ignore everything I or anyone else has said, and use the source of cp".
Btw, getting the filesystem's cluster size is next to useless. You'll almost always see speed increasing with buffer size long after you've passed the size of a disk block.
the size of each read need to be a multiple of 512 ( sector size ) 4096 is a good one
Here is a very easy and clear example: Copy a file. Since it is written in ANSI-C without any particular function calls I think this one would be pretty much portable.
Depending on what you mean by copying a file, it is certainly far from trivial. If you mean copying the content only, then there is almost nothing to do. But generally, you need to copy the metadata of the file, and that's surely platform dependent. I don't know of any C library which does what you want in a portable manner. Just handling the filename by itself is no trivial matter if you care about portability.
In C++, there is the file library in boost
One thing I found when implementing my own file copy, and it seems obvious but it's not: I/O's are slow. You can pretty much time your copy's speed by how many of them you do. So clearly you need to do as few of them as possible.
The best results I found were when I got myself a ginourmous buffer, read the entire source file into it in one I/O, then wrote the entire buffer back out of it in one I/O. If I even had to do it in 10 batches, it got way slow. Trying to read and write out each byte, like a naieve coder might try first, was just painful.
The accepted answer written by Steve Jessop does not answer to the first part of the quession, Jonathan Leffler do it, but do it wrong: code should be written as
while ((n = fread(buffer, 1, sizeof(buffer), f1)) > 0)
if (fwrite(buffer, n, 1, f2) != 1)
/* we got write error here */
/* test ferror(f1) for a read errors */
Explanation:
sizeof(char) = 1 by definition, always: it does not matter how many bits in it, 8 (in most cases), 9, 11 or 32 (on some DSP, for example) — size of char is one. Note, it is not an error here, but an extra code.
The fwrite function writes upto nmemb (second argument) elements of specified size (third argument), it does not required to write exactly nmemb elements. To fix this you must write the rest of the data readed or just write one element of size n — let fwrite do all his work. (This item is in question, should fwrite write all data or not, but in my version short writes impossible until error occurs.)
You should test for a read errors too: just test ferror(f1) at the end of loop.
Note, you probably need to disable buffering on both input and output files to prevent triple buffering: first on read to f1 buffer, second in our code, third on write to f2 buffer:
setvbuf(f1, NULL, _IONBF, 0);
setvbuf(f2, NULL, _IONBF, 0);
(Internal buffers should, probably, be of size BUFSIZ.)