So I am trying to make a function print_file_rows() that prints the first ten rows of a file. If the file has more than 10 rows it works perfectly fine but if there's 10 or less it starts printing garbage. Any ideas on how I can fix this? (MUST ONLY USE THE SYSTEM FUNCTIONS OPEN/READ/WRITE/CLOSE)
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
void print_file_rows(char *path)
{
int fd = open(path, O_RDONLY);
if (fd < 0)
{
return NULL;
}
size_t size = 100;
size_t offset = 0;
size_t res;
char *buff = malloc(size);
while((res = read(fd, buff + offset, 100)) != 0)
{
offset += res;
if (offset + 100 > size)
{
size *= 2;
buff = realloc(buff, size);
}
}
close(fd);
int j = 0;
for(int i = 0;buff[i] != '\0'; i++)
{
if(j == 10)
{
break;
}
if(buff[i] == '\n')
{
j++;
}
printf("%c", buff[i]);
}
free(buff);
}
int main()
{
print_file_rows("a.txt");
return 0;
}
You do not need any buffers. It is most likely buffered on the OS level so you may print char by char.
int print_file_rows(char *path, size_t nrows)
{
int result = -1;
int fd = open(path, O_RDONLY);
char c;
if (fd > 0)
{
while(nrows && read(fd, &c, 1) == 1)
{
write(STDOUT_FILENO, &c, 1);
if(c == `\n`) nrows--;
}
result = nrows;
}
close(fd);
return result;
}
int main()
{
if(print_file_rows("a.txt", 10) == -1)
printf("Something has gone wrong\n");
return 0;
}
From man 2 read:
SYNOPSIS
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
DESCRIPTION
read() attempts to read up to count bytes from file descriptor fd into the buffer starting at buf.
read is for reading raw bytes, and as such has no notion of strings. It does not place a NUL terminating byte ('\0') at the end of the buffer. If you are going to treat the data you read as a string, you must terminate the buffer yourself.
To make room for this NUL terminating byte you should always allocate one extra byte in your buffer (i.e., read one less byte that your maximum).
We can see the return value is actually of type ssize_t, rather than size_t, which allows for
On error, -1 is returned, and errno is set to indicate the error.
This means we will need to check that the return value is greater than zero, rather than not zero which would cause the offset to be decremented on error.
With all that said, note that this answer from a similar question posted just yesterday shows how to achieve this without the use of a dynamic buffer. You can simply read the file one byte at a time and stop reading when you've encountered 10 newline characters.
If you do want to understand how to read a file into a dynamic buffer, then here is an example using the calculated offset to NUL terminate the buffer as it grows. Note that reading the entire file this way is inefficient for this task (especially for a large file).
(Note: the call to write, instead of printf)
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
void print_file_rows(const char *path)
{
int fd = open(path, O_RDONLY);
const size_t read_size = 100;
size_t size = read_size;
size_t offset = 0;
ssize_t res;
char *buff = malloc(size + 1);
while ((res = read(fd, buff + offset, read_size)) > 0) {
offset += res;
buff[offset] = '\0';
if (offset + read_size > size) {
size *= 2;
buff = realloc(buff, size + 1);
}
}
close(fd);
int lines = 0;
for (size_t i = 0; lines < 10 && buff[i] != '\0'; i++) {
write(STDOUT_FILENO, &buff[i], 1);
if (buff[i] == '\n')
lines++;
}
free(buff);
}
int main(void)
{
print_file_rows("a.txt");
}
(Error handling omitted for code brevity. malloc, realloc, and open can all fail, and should normally be handled.)
Related
I've got this homework, which is to make an implementation of the "tail" command in Linux and this is what i have so far:
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <stdlib.h>
char *resize(char *data, int size)
{
char *newData = malloc((size + 1) * sizeof(char));
int counter;
for(counter = 0; counter < size; counter++)
newData[counter] = data[counter];
return newData;
}
char *readStdIn(char *data)
{
char buff, end = '\n';
int rState, size = 1;
data = malloc(size * sizeof(char));
printf("%ld\n", sizeof(data));
while ((rState = read(STDIN_FILENO, &buff, 1)) > 0)
{
if(rState < 0)
{
if(errno == EINTR) rState = 0;
else
{
perror("read()");
return 0;
}
}
data = resize(data, size);
data[size - 1] = buff;
size++;
}
printf("%ld\n", sizeof(data));
if(rState == 0) write(STDOUT_FILENO, &end, 1);
return data;
}
int printLines(char *data)
{
int lines = 0, position;// counter;
for(position = sizeof(data) - 1; position > -1; position--);
if (data[position] == '\n') lines++;
return 0;
}
int main(int argc, char *argv[])
{
char *data = 0;
if(argc == 1)
{
readStdIn(data);
if(data == 0) return 1;
if(printLines(data) != 0) return 1;
}
return 0;
}
In readStdIn() I'm supposed to save what i have read in the standard input and put it in char* data, so that i can find the last 10 lines(or find that there are less than 10 lines) of the input. The problem is, that when i call resize(), data doesn't resize, and I can't find out why. It's most likely a problem in the resize() itself, but i can't figure it out. The idea is that resize() increases the size of data by 1 char.
I hope I was thorough enough in my explanation.
Your printf is incorrect. You are printing the size of a pointer, which will be a fixed number depending on your architecture. You should instead print out the size directly.
printf("%d\n", size);
Your resize is correct except that you failed to free the previous memory, so you have a memory leak. You can fix this by adding a free() before you return.
char *resize(char *data, int size)
{
char *newData = malloc((size + 1) * sizeof(char));
int counter;
for(counter = 0; counter < size; counter++)
newData[counter] = data[counter];
free(data);
return newData;
}
I'm a newbie and I'm trying to implement my own readline() with POSIX IO primitives.
I've wrote the code:
int readline(int fd, char **line, size_t size)
{
char *ch = malloc(sizeof(char));
char buf[BUFSIZ];
int index = 0;
int nr;
if (NULL == line)
return -1;
do
{
nr = read(fd, ch, 1);
if (nr == -1)
{
perror("read()");
return -1;
} else
if (0 == nr)
break;
buf[index++] = (*ch);
} while ((*ch != '\n') && (index < BUFSIZ));
if ((index) && (buf[index-1] == '\r'))
{
index--;
}
buf[index++] = '\n';
buf[index] = 0; /* null-terminate */
printf("index = %d\n", index);
strncpy(*line, buf, size);
if ((buf[0] == '\n') || (buf[0] == '\0') || (buf[0] == '\r'))
return 0;
return index;
}
int main(void)
{
int fd;
ssize_t nbytes;
char *line = malloc(BUFSIZ*sizeof(char));
fd = open("/etc/passwd", O_RDONLY);
while((nbytes = readline(fd, &line, sizeof(line))) > 0)
{
write(1, line, nbytes);
}
close(fd);
return 0;
}
But it reads only 8 characters of each line. I spent much time to fix the code, but didn't get anywhere. I also read many topics on this website, most of them use the same algorithm, but it is only my code that does not work!
The problem is your use of the sizeof operator to get the size of the allocate memory. The sizeof operator will not give you the size of what a pointer points to, it will give you the size of the actual pointer.
In your case, it's 8 bytes which equals 64 bits, which is the standard pointer size on 64-bit systems.
You need to keep track of the sizes you allocate yourself, and in this case pass BUFSIZ as the argument instead.
I've got a C program that reproduces a server using FIFOs. The program reads two lines from an input FIFO — a number n and a string str— and writes on an output FIFO n lines, each of which is a single occurrence of str. I wrote the following code.
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#define MAX_SIZE 256
char *readline(int fd, char *buffer) {
char c;
int i = 0;
while (read(fd, &c, 1) != 0) {
if (c == '\n')
break;
buffer[i++] = c;
}
return buffer;
}
int main(int argc, char** argv) {
mkfifo(argv[1], 0666);
mkfifo(argv[2], 0666);
int in = open(argv[1], O_RDONLY);
int out = open(argv[2] ,O_WRONLY);
char line[MAX_SIZE];
memset(line, 0, MAX_SIZE);
int n, i;
while (1) {
strcpy(line, readline(in, line));
sscanf(line, "%d", &n);
strcpy(line, readline(in, line));
for (i = 0; i < n; i++) {
write(out, line, strlen(line));
write(out, "\n", 1);
}
}
close(in);
close(out);
return 0;
}
This program compiles and runs with no errors, but it outputs a different number of occurrences of the string at each execution. For example, if the two input lines in the input FIFO are 5\nhello, it then prints out from 1 to 25 occurrences of hello at each run (frequency appears to be completely random).
I've been stuck on this for two days. Please give me some help.
I make no claims or warrants that I even know what your program does, as it has been 20 years since I had any pressing need to work with FIFO's at the system level. But one thing is clear. Two days is a long time to work on something without ever running it in a debugger, of which doing so would have exposed a number of problems.
First, readline() never terminates the string it is passed. This isn't as important the first time around as the second and beyond, since shorter data may be present in the input lines. Furthermore, read() can fail, and in doing so does not return 0, the only condition on your loop which will break. That failure should break the loop and be reflected in the return result. Because you return the buffer pointer, a reasonable failure-result could be NULL:
Consider something like this:
char *readline(int fd, char *buffer)
{
ssize_t res = 0;
char c = 0;
int i = 0;
for(;;)
{
res = read(fd, &c, 1);
if (res < 0)
return NULL;
else if (res == 0 || c == '\n')
break;
buffer[i++] = c;
};
buffer[i] = 0;
return buffer;
}
One could argue that it should return NULL if the buffer is empty, since you can't put a 0-length packet on a FIFO. I leave that for you to decide, but it is a potential hole in your algorithm, to be sure.
Next the strcpy() function has undefined behavior if the buffers submitted overlap. Since readline() returns the very buffer that was passed in, and since said-same buffer is also the target of strcpy() is the same buffer, your program is executing UB. From all that I see, strcpy() is useless in this program in the first place, and shouldn't even be there at all.
This is clearly wrong:
strcpy(line, readline(in, line));
sscanf(line, "%d", &n);
strcpy(line, readline(in, line));
for (i = 0; i < n; i++) {
write(out, line, strlen(line));
write(out, "\n", 1);
}
The above should be this:
if (readline(in, line))
{
if (sscanf(line, "%d", &n) == 1)
{
if (readline(in, line))
{
for (i = 0; i < n; i++)
{
write(out, line, strlen(line));
write(out, "\n", 1);
}
}
}
}
assuming the changes to readline() as prescribed were made. These could be combined into a single three-expression if-clause, but as written above it is at-least debuggable. In other words, via short-circuit eval there should be no problems doing this:
if (readline(in, line) &&
sscanf(line, "%d", &n) == 1 &&
readline(in, line))
{
for (i = 0; i < n; i++)
{
write(out, line, strlen(line));
write(out, "\n", 1);
}
}
but I would advise you keep the former until you have thoroughly debugged this.
Finally, note that readline() is still a buffer overflow just waiting to happen. You really should pass a max-len to that function and limit that potential possibility, or dynamically manage the buffer.
The code does not initalises line for each iteration, so if readline() does not read anything it leaves line's content untouched.
And you do not test whether sscanf() fails, the code does not recognize als n is left unchanged and the last value of line get printed out n times and everything starts over again ...
Also readline() misses to check whether read() failed.
To learn from this exercise it to always test the result of a (system) call whether it failed or not.
int readline(int fd, char *buf, int nbytes) {
int numread = 0;
int value; /* read fonksiyonu sonunda okunan sayı degerini gore islem yapar */
/* Controls */
while (numread < nbytes - 1) {
value = read(fd, buf + numread, 1);
if ((value == -1) && (errno == EINTR))
continue;
if ( (value == 0) && (numread == 0) )
return 0;
if (value == 0)
break;
if (value == -1)
return -1;
numread++;
/* realocating for expand the buffer ...*/
if( numread == allocSize-2 ){
allocSize*=2; /* allocSize yeterli olmadıgı zaman buf ı genisletmemizi saglayarak
memory leak i onler */
buf=realloc(buf,allocSize);
if( buf == NULL ){
fprintf(stderr,"Failed to reallocate!\n");
return -1;
}
}
/* Eger gelen karakter \n ise return okudugu karakter sayısı */
if (buf[numread-1] == '\n') {
buf[numread] = '\0';
return numread;
}
}
errno = EINVAL;
return -1;
}
I'm trying to read in bits using the read function and I'm not sure how I'm supposed to printf the results using the buffer.
currently the code fragment is as follows
char *infile = argv[1];
char *ptr = buff;
int fd = open(infile, O_RDONLY); /* read only */
assert(fd > -1);
char n;
while((n = read(fd, ptr, SIZE)) > 0){ /*loops that reads the file until it returns empty */
printf(ptr);
}
The data read into ptr may contain \0 bytes, format specifiers and is not necessarily \0 terminated. All good reasons not to use printf(ptr). Instead:
// char n;
ssize_t n;
while((n = read(fd, ptr, SIZE)) > 0) {
ssize_t i;
for (i = 0; i < n; i++) {
printf(" %02hhX", ptr[i]);
// On older compilers use --> printf(" %02X", (unsigned) ptr[i]);
}
}
Here is the code that does the job for you:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <string.h>
#define SIZE 1024
int main(int argc, char* argv[])
{
char *infile = "Text.txt";
char ptrBuffer[SIZE];
int fd = open(infile, O_RDONLY); /* read only */
assert(fd > -1);
int n;
while((n = read(fd, ptrBuffer, SIZE)) > 0){ /*loops that reads the file until it returns empty */
printf("%s", ptrBuffer);
memset(ptrBuffer, 0, SIZE);
}
return 0;
}
You can read file name as parameter.
Even if ptr is a string, you need to use printf("%s", ptr);, not printf(ptr);
However, after you call
read(fd, ptr, SIZE)
ptr is rarely a string (strings need to be null-terminated). You need to use a loop and choose the format you need. For example:
for (int i = 0; i < n; i++)
printf("%02X ", *ptr);
I want count the characters (in various charsets) in a file and I'm using the function 'mbtowc' to detect the characters. I cannot figure out why the chars and results values are different. Here is my example:
char buf[BUFFER_SIZE + MB_LEN_MAX];
int fd = open ("chinese_test", O_RDONLY);
unsigned int bytes, chars;
int bytes_read;
bytes = chars = 0;
while((bytes_read = read(fd, buf, BUFFER_SIZE)) > 0) {
wchar_t wc_buf[BUFFER_SIZE], *wcp;
char *p;
int n = 0;
bytes += bytes_read;
p = buf;
wcp = wc_buf;
while((n = mbtowc(wcp, p, MB_LEN_MAX)) > 0) {
p += n;
wcp++;
chars++;
}
}
printf("chars: %d\tbytes: %d\n", chars, bytes);
I test the function with a text with some GB2312 characters, but chars and bytes are too different values.
My test returns -> chars: 4638 | bytes: 17473
but 'wc' linux command returns: chars: 16770 | bytes: 17473
Why this difference? What did I do wrong?
Now I've this code but there are still soe difference in the result.
char buf[BUFFER_SIZE * MB_LEN_MAX];
int fd = open ("test_chinese", O_RDONLY), filled = 0;
unsigned int bytes, chars;
int bytes_read;
bytes = chars = 0;
while((bytes_read = read(fd, buf, BUFFER_SIZE)) > 0) {
wchar_t wc_buf[BUFFER_SIZE], *wcp;
char *p;
int n = 0;
bytes += bytes_read;
p = buf;
wcp = wc_buf;
while(bytes_read > 0) {
n = mbtowc(NULL, p, MB_LEN_MAX);
if (n <= 0) {
p++;
bytes_read--;
continue;
}
p += n;
bytes_read -= n;
chars++;
}
}
printf("\n\nchars: %d\tbytes: %d\n", chars, bytes);
The problem is a combination of your BUFFER_SIZE, the file size of chinese_test and the byte alignment of wchar_t. As proof, try drastically increasing BUFFER_SIZE- you should start getting the answer you want.
What is happening is that your program works for the first block of text that it receives. But think about what happens in your code if a character is split between the first and second blocks as follows:
| First Block | Second Block |
| [wchar_t] [wchar_t] ... [wchar_t] [wchar_t] ... |
| [1,2,3,4] [1,2,3,4] ... [1,2,3,4] [1,2,3,4] ... |
Your code will begin the second block on the 3rd byte in the first character, and that will not be recognized as a valid character. Since mbtowc will return -1 when it does not find a valid character, your loop will immediately end and will count zero characters for that entire block. The same will apply for the following blocks.
EDIT:
Another issue I noticed is that you need to set the locale in order for mbtowc to work correctly. Taking all of these issues into account, I wrote the following which returns the same character count as wc for me:
#include <stdlib.h>
#include <stdio.h>
#include <locale.h>
int BUFFER_SIZE = 1024;
const char *DEFAULT_F_IN = "chinese_test";
struct counts {
int bytes;
int chars;
};
int count_block(struct counts *c, char *buf, int buf_size)
{
int offset = 0;
while (offset < buf_size) {
int n = mbtowc(NULL, buf + offset, MB_CUR_MAX);
if (n <= 0) {
break;
}
offset += n;
c->bytes += n;
c->chars++;
}
return buf_size - offset;
}
void get_counts(struct counts *c, FILE *fd)
{
char buf[BUFFER_SIZE];
c->bytes = 0;
c->chars = 0;
int bytes_read;
while((bytes_read = fread(buf, sizeof(*buf), BUFFER_SIZE, fd)) > 0) {
int remaining = count_block(c, buf, bytes_read);
if (remaining == 0) {
continue;
} else if (remaining < MB_CUR_MAX) {
fseek(fd, -remaining, SEEK_CUR);
} else {
perror("Error");
exit(1);
}
}
}
int main(int argc, char *argv[]) {
FILE *fd;
if (argc > 1) {
fd = fopen(argv[1], "rb");
} else {
fd = fopen(DEFAULT_F_IN, "rb");
}
setlocale(LC_ALL, "");
struct counts c;
get_counts(&c, fd);
printf("chars: %d\tbytes: %d\n", c.chars, c.bytes);
return 0;
}