C mmap implementation for linux command - c

I have the following code which basically reproduce the functionality of the wc command in linux. My question is how I can rewrite the code using mmap? I know I can use struct stat sb; and then char *file_in_memory = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); but I can't get it work/ I don't know how to implement it correctly in the while loop while ((n = read(file, buffer, LUNG_BUF - 1)) > 0). In my tries after I run the code it will display only values of 0.
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#define LUNG_BUF 4096
int main(int argc, char** argv)
{
int bytes = 0;
int words = 0;
int newLine = 0;
int max_value; // the maximum of the above three
int dim; // string width of the max value
char buffer[LUNG_BUF];
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
if ( argc !=2 )
{
printf( "No file name\n%s", argv[0]);
}
else
{
int file = open(argv[1], O_RDONLY);
if(file < 0)
{
printf("can not open :%s\n",argv[1]);
}
else
{
char *thefile = argv[1];
size_t n;
while ((n = read(file, buffer, LUNG_BUF - 1)) > 0)
{
buffer[n] = '\0';
char *ptr = buffer;
while (*ptr)
{
bytes++;
if (*ptr == ' ' || *ptr == '\t')
{
state = WHITESPACE;
}
else if (*ptr == '\n')
{
newLine++;
state = WHITESPACE;
}
else
{
if (state == WHITESPACE)
{
words++;
}
state = WORD;
}
ptr++;
}
}
// find out the largest value of all and determine the printed width of it
max_value = newLine;
if (words > max_value)
max_value = words;
if (bytes > max_value)
max_value = bytes;
dim = snprintf(NULL, 0, "%d", max_value);
// print lines, words, bytes and filename aligned to the longest number
printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, thefile);
}
}
}
The script that I was trying:
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#define LUNG_BUF 4096
int main(int argc, char** argv)
{
int bytes = 0;
int words = 0;
int newLine = 0;
int max_value; // the maximum of the above three
int dim; // string width of the max value
char buffer[LUNG_BUF];
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
if ( argc !=2 )
{
printf( "No file name\n%s", argv[0]);
}
else
{
int file = open(argv[1], O_RDONLY);
if(file < 0)
{
printf("can not open :%s\n",argv[1]);
}
else
{
char *thefile = argv[1];
size_t n;
struct stat sb;
char *file_in_memory = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
for(int i=0;i<=sb.st_size;i++)
{
buffer[i] = '\0';
char *ptr = buffer;
while (*ptr)
{
bytes++;
if (*ptr == ' ' || *ptr == '\t')
{
state = WHITESPACE;
}
else if (*ptr == '\n')
{
newLine++;
state = WHITESPACE;
}
else
{
if (state == WHITESPACE)
{
words++;
}
state = WORD;
}
ptr++;
}
}
// find out the largest value of all and determine the printed width of it
max_value = newLine;
if (words > max_value)
max_value = words;
if (bytes > max_value)
max_value = bytes;
dim = snprintf(NULL, 0, "%d", max_value);
// print lines, words, bytes and filename aligned to the longest number
printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, thefile);
munmap(file_in_memory, sb.st_size);
close(file);
}
}
}

The code you posted above didn't compile and had quite a few problems. I've tidied it up a bit below, hopefully this will help. I tried not to change it too much so you could see what I did.
You hadn't actually called stat and the fd variable you had passed to mmap was not the variable you used to open the file.
I would always compile your code with "-Wall -Werror" if you can.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
int main(int argc, char** argv)
{
if ( argc !=2 )
{
printf( "No file name\n%s", argv[0]);
exit(-1);
}
char *fileName = argv[1];
int file = open(fileName, O_RDONLY);
if(file < 0)
{
perror("Error: ");
exit(-1);
}
struct stat sb = {0};
stat(fileName, &sb);
char *filePtr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, file, 0);
if (filePtr == MAP_FAILED)
{
perror("Error:");
exit(-1);
}
int bytes = sb.st_size;
int words = 0;
int newLine = 0;
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
for(size_t pos=0;pos<=sb.st_size;pos++)
{
if (state == WHITESPACE)
{
if (filePtr[pos] == '\n')
{
newLine++;
}
else if ((filePtr[pos] != ' ') && (filePtr[pos] != '\t'))
{
state = WORD;
}
}
else // (state == WORD)
{
if (filePtr[pos] == ' ' || filePtr[pos] == '\t')
{
state = WHITESPACE;
words++;
}
else if (filePtr[pos] == '\n')
{
state = WHITESPACE;
words++;
newLine++;
}
}
}
// Max value is always bytes
int dim = snprintf(NULL, 0, "%d", bytes);
// print lines, words, bytes and filename aligned to the longest number
printf("%*d %*d %*d %s\n", dim, newLine, dim, words, dim, bytes, fileName);
munmap(filePtr, sb.st_size);
close(file);
}

Related

Why am I getting heap-buffer-overflow in this C code?

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
char *get_next_line(int fd);
int main (void)
{
int i = 0;
char *s;
int fd;
fd = open("./text", O_RDONLY);
s = get_next_line (fd);
}
char *get_next_line(int fd)
{
char *buf;
char c;
int nread;
int cnt;
if (fd < 0 || BUFFER_SIZE < 1)
return (NULL);
buf = (char*)malloc(BUFFER_SIZE + 1);
if (!buf)
return (NULL);
while(nread = (read(fd, &c, 1)) > 0)
{
*buf = c;
buf++;
cnt++;
if (c == '\n')
break;
}
if (nread < 0)
return (NULL);
*buf = '\n';
printf("%s\n", buf);
return (buf - cnt - 1);
}
When I compile with no flags, I just get two empty line. Compiling with -fsanitize=address and I know heap-buffer-overflow happens at the line printf("%s\n", buf);
But I don't know why this happen. I tried STDIN to fix it but didn't work. Can someone check this please?
You are not terminating the buf with null character.
*buf = '\n';
*buf = '\0';
Make sure you reserve the space for null character while allocating memory to buf.
Free the memory if number of bytes read are less than 0.
if (nread < 0) {
return (NULL);
}
to
if (nread < 0) {
free(startAddress);
return (NULL);
}
You can have temporary pointer to preserve the starting address of buf instead of calculating the starting address.
char *get_next_line(int fd)
{
char *buf;
char c;
int nread;
if (fd < 0 || BUFFER_SIZE < 1)
return (NULL);
buf = (char*)malloc(BUFFER_SIZE + 2);
if (!buf)
return (NULL);
char *startAddress = buf;
while(nread = (read(fd, &c, 1)) > 0)
{
*buf = c;
buf++;
if (c == '\n')
break;
}
if (nread < 0) {
free(startAddress);
return (NULL);
}
*buf = '\0';
printf("%s\n", buf);
return startAddress;
}

Unix HEAD command implementation in C fails on larger lines

I am currently implementing the Unix HEAD command with C and using only system functions. So far, it works perfectly on files, which have lines with less length than the one that I specified for my buffer:
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define LINES_TO_READ 10
#define BUFF_SIZE 4096
int main(int argc, char const *argv[]) {
for (ssize_t i = 1; i < argc; i++) {
const char *filename = argv[i];
int fd = open(filename, O_RDONLY);
if (fd < 0) {
perror("open");
return -1;
}
char ch, buffer[BUFF_SIZE];
size_t index = 0, lines = 1;
ssize_t rresult, wresult;
// Read the file byte by byte
while ((rresult = read(fd, &ch, 1)) != 0) {
if (rresult < 0) {
perror("read");
return -1;
}
// Check if the current character is a new line (the line ends here)
if (ch == '\n') {
buffer[index] = ch;
buffer[index + 1] = '\0';
ch = 0;
index = 0;
// Print the line
wresult = 0;
ssize_t buffer_length = strlen(buffer);
while (wresult != buffer_length) {
ssize_t res = write(STDOUT_FILENO, buffer + wresult, buffer_length - wresult);
if (wresult < 0) {
perror("write");
return -1;
}
wresult += res;
}
// Stop if we read 10 lines already
if (lines == LINES_TO_READ) {
break;
}
lines++;
} else {
buffer[index++] = ch;
}
}
if (close(fd) < 0) {
perror("close");
return -1;
}
}
return 0;
}
And it works on files, which have a line length with less than BUFF_SIZE (as now set, 4096).
How to avoid this and make it work for whatever the line length is?
Don't read one byte at a time. Read a chunk (4096 or 8192 bytes are reasonable sizes, or use PIPE_BUF (from limits.h)) into a buffer. Output each character while counting newlines. If you print enough newlines, terminate. If you reach the end of the buffer and haven't printed enough lines, read more data into the buffer.

How to get a line from csv file with a custom fgets

I'm currently writing a program in C that reads in from a CSV file, I have a defined buffer size but am having trouble separating each line from the buffer. I can see where the line ends by checking for a '\n' char. I cannot extract that line from the buffer for parsing however. Anybody have some ideas?
#ifndef BUFFSIZE
#define BUFFSIZE 4096
#endif
int main() {
int fd;
int fdBin;
char * buf = malloc(BUFFSIZE);
int count = 0;
bool EOFFlag = false;
fd = open("SongCSV.csv", O_RDONLY);
fdBin = open("BinarySongData.bin", O_CREAT | O_WRONLY, "0600");
if (fd == -1) {
printf("failed to open a file\n");
exit(1);
}
off_t offset = 0;
off_t offsetOld = 0;
int readBytes;
while (!EOFFlag) {
offsetOld = offset;
offset = lseek(fd, offset - offsetOld, SEEK_CUR);
readBytes = read(fd, buf, BUFFSIZE);
printf("\n\n%lld\n\n", (offset));
int i = 0;
int commaCounter = 0;
while (i < readBytes) {
if (buf[i] != '\n') {
}
if (buf[i] == '\n') {
printf("\t\t THIS IS END OF LINE \t%d", i);
commaCounter = 0;
}
if (buf[i] == ',') {
commaCounter++;
if (commaCounter == 4) {
printf("****Album Name****");
}
}
write(fdBin, buf, BUFFSIZE);
printf("%c", buf[i]);
i++;
}
if (readBytes < BUFFSIZE) {
EOFFlag = true;
printf("\nREACHED END OF FILE");
}
printf("\n");
printf("AA: END OF LINE ****%d*****", count);
count++;
}
close(fd);
close(fdBin);
return 0;
}
I do it this way, easy and simple. I just did it quickly, any doubts just ask me, Cheers.
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
int main()
{
int len = sending();
char *firstline;
int i = 0;
char buf[0];
int rd ;
int fd = open("hey.csv", O_RDONLY);
rd = read(fd, buf, 1);
firstline = malloc(sizeof(char) * len);
while (i != len)
{
firstline[i] = buf[0];
i++;
rd = read(fd, buf, 1);
}
firstline[i] = '\0';
printf("%s\n", firstline);
return (0);
}
int sending()
{
int fd = open("hey.csv", O_RDONLY);
char buf[1];
int r = 0;
r = read(fd, buf, 1);
int len = 0;
while (buf[0] != '\n')//getting exact size to malloc
{
len++;
r = read(fd, buf, 1);
}
return len;
}

Trying to use pipe to read from / write to another program

I'm trying to write a program which read output of another program and write to the program as input.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
char str[30];
printf("Input string : ");
fflush(stdout);
scanf("%s", &str);
fflush(stdout);
printf("entered string is %s\n", str);
return 0;
}
This program1 is a simple program reading input from stdin and print the string entered.
And here in the program2, I tried to create 2 pipes and execute the program1.
And read the output of program1 and get user input and deliver the string user entered to program1.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
typedef struct pipe_rw
{
pid_t cpid;
int pipe_r[2];
int pipe_w[2];
} RWPIPE;
char *get_user_input(void)
{
char buf[128];
char *input;
char ch;
int n;
int len = 0;
memset(buf, 0x0, 128);
while((ch = fgetc(stdin)) != 0xa)
{
buf[len] = ch;
len++;
}
input = malloc(sizeof(char) * (len));
strncpy(input, buf, (len));
return input;
}
int pclose_rw(RWPIPE *rwp)
{
int status, ret = 0;
if (rwp)
{
if (rwp->cpid > 0)
{
kill(rwp->cpid, SIGTERM);
do {
ret = waitpid(rwp->cpid, &status, WUNTRACED|WCONTINUED);
} while (!WIFEXITED(status) && !WIFSIGNALED(status));
}
close(rwp->pipe_r[0]);
close(rwp->pipe_w[1]);
free(rwp);
}
return ret;
}
RWPIPE *popen_rw(const char *command)
{
RWPIPE *rwp = (RWPIPE *)malloc(sizeof(*rwp));
if (rwp == NULL)
return NULL;
memset(rwp, 0x00, sizeof(*rwp));
if (pipe(rwp->pipe_r) != 0 || pipe(rwp->pipe_w) != 0)
{
free(rwp);
return NULL;
}
rwp->cpid = fork();
if (rwp->cpid == -1)
{
free(rwp);
return NULL;
}
if (rwp->cpid == 0)
{
dup2(rwp->pipe_w[0], STDIN_FILENO);
dup2(rwp->pipe_r[1], STDOUT_FILENO);
close(rwp->pipe_r[0]);
close(rwp->pipe_r[1]);
close(rwp->pipe_w[0]);
close(rwp->pipe_w[1]);
execl(command, command, NULL);
printf("Error: fail to exec command - %s ..\n", command);
exit (1);
}
else
{
close(rwp->pipe_r[1]);
close(rwp->pipe_w[0]);
}
return rwp;
}
ssize_t read_p(RWPIPE *rwp, void *buf, size_t count)
{
return read(rwp->pipe_r[0], buf, count);
}
ssize_t write_p(RWPIPE *rwp, const void *buf, size_t count)
{
return write(rwp->pipe_w[1], buf, count);
}
int main(void)
{
char rbuf[BUFSIZ], wbuf[BUFSIZ];
int ret, len, n = 0;
char *string;
RWPIPE *rwp = popen_rw("./read_write");
if (rwp == NULL)
{
printf("Error: fail to open command ..\n");
return EXIT_FAILURE;
}
while (1)
{
memset(rbuf, 0x00, sizeof(rbuf));
if (read_p(rwp, rbuf, sizeof(rbuf)) < 1)
{
printf("No more input..\n");
break;
}
printf("%s", rbuf);
string = get_user_input();
len = strlen(string);
ret = write_p(rwp, string, len);
if (ret != len)
{
printf("Write %d bytes (expected %d) ..\n", ret, len);
break;
}
printf("end");
}
pclose_rw(rwp);
return EXIT_SUCCESS;
}
If run the program2 reads output of program1 successfully.
And it gets user input but it failed to give the string entered from user to program1.
[root#localhost test_code]# ./rw_pipe
Input string : 1234
^C
Please give me some ideas why it works like this.
Your primary problem is that the data written to the child does not end with a newline, so the child is not aware that the message is complete (it isn't complete) and the child is still busy reading while the parent is waiting for a response — a deadlock.
This code adds some instrumentation and fixes the problem by including the newline in the string read by get_input().
The original program expects two lots of input (one in response to the prompt from read_write, the other in response to the echoed output), but dies from a SIGPIPE when it tries to send the second input to the now-exited child. The code below circumvents that by ignoring SIGPIPE signals, which means that the parent gets a write error instead of being killed by the signal.
There's an unusual control flow between the two programs, and if you made read_write into an iterative program, you'd see that it generates two outputs for a single input. That's not the way it's usually done, of course. Fixing that is outside of the scope of the immediate exercise, though.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
typedef struct pipe_rw
{
pid_t cpid;
int pipe_r[2];
int pipe_w[2];
} RWPIPE;
static char *get_user_input(void)
{
char buf[128];
char *input;
char ch;
size_t len = 0;
while((ch = fgetc(stdin)) != '\n' && ch != EOF && len < sizeof(buf) - 2)
buf[len++] = ch;
buf[len++] = '\n';
buf[len] = '\0';
input = malloc(sizeof(char) * (len + 1));
strncpy(input, buf, (len + 1));
printf("Got: [%s]\n", input);
return input;
}
static int pclose_rw(RWPIPE *rwp)
{
int status, ret = 0;
if (rwp)
{
if (rwp->cpid > 0)
{
kill(rwp->cpid, SIGTERM);
do {
ret = waitpid(rwp->cpid, &status, WUNTRACED|WCONTINUED);
} while (!WIFEXITED(status) && !WIFSIGNALED(status));
}
close(rwp->pipe_r[0]);
close(rwp->pipe_w[1]);
free(rwp);
}
return ret;
}
static RWPIPE *popen_rw(const char *command)
{
RWPIPE *rwp = (RWPIPE *)malloc(sizeof(*rwp));
if (rwp == NULL)
return NULL;
memset(rwp, 0x00, sizeof(*rwp));
if (pipe(rwp->pipe_r) != 0 || pipe(rwp->pipe_w) != 0)
{
free(rwp);
return NULL;
}
rwp->cpid = fork();
if (rwp->cpid == -1)
{
free(rwp);
return NULL;
}
if (rwp->cpid == 0)
{
dup2(rwp->pipe_w[0], STDIN_FILENO);
dup2(rwp->pipe_r[1], STDOUT_FILENO);
close(rwp->pipe_r[0]);
close(rwp->pipe_r[1]);
close(rwp->pipe_w[0]);
close(rwp->pipe_w[1]);
execl(command, command, NULL);
fprintf(stderr, "Error: fail to exec command '%s'.\n", command);
exit (1);
}
else
{
close(rwp->pipe_r[1]);
close(rwp->pipe_w[0]);
}
return rwp;
}
static ssize_t read_p(RWPIPE *rwp, void *buf, size_t count)
{
return read(rwp->pipe_r[0], buf, count);
}
static ssize_t write_p(RWPIPE *rwp, const void *buf, size_t count)
{
return write(rwp->pipe_w[1], buf, count);
}
int main(void)
{
char rbuf[BUFSIZ];
int ret, len;
char *string;
signal(SIGPIPE, SIG_IGN);
RWPIPE *rwp = popen_rw("./read_write");
if (rwp == NULL)
{
printf("Error: fail to open command ..\n");
return EXIT_FAILURE;
}
while (1)
{
memset(rbuf, 0x00, sizeof(rbuf));
if (read_p(rwp, rbuf, sizeof(rbuf)) <= 0)
{
printf("No more input..\n");
break;
}
printf("From child: [%s]\n", rbuf);
string = get_user_input();
len = strlen(string);
printf("Length %d: [%s]\n", len, string);
ret = write_p(rwp, string, len);
if (ret != len)
{
fprintf(stderr, "Write %d bytes (expected %d) ..\n", ret, len);
break;
}
printf("end cycle\n");
}
printf("End of loop\n");
pclose_rw(rwp);
return EXIT_SUCCESS;
}
Sample run
The program is rwpipe53; the input I typed was Ocelot and Grumble.
$ ./rwpipe53
From child: [Input string : ]
Ocelot
Got: [Ocelot
]
Length 7: [Ocelot
]
end cycle
From child: [entered string is Ocelot
]
Grumble
Got: [Grumble
]
Length 8: [Grumble
]
Write -1 bytes (expected 8) ..
End of loop
$
Note how the square brackets (any pair of marker symbols can be used if you prefer) shows where the data starts and ends. I find that a valuable technique when debugging code.

Writing to a Named Pipe not showing the full content of a string

I am trying to read data from 2 named pipe and write it to another named pipe concatenating the content from 2 inputs. But why my output only shows the string from first input?
Here is my code:
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#define MAX_REC_SIZE 1024
int open_fifo(char *name, int mode) {
mode = mode == O_RDONLY ? (O_RDONLY | O_NONBLOCK): mode;
int fd;
if (access(name, F_OK) == -1) {
if(mkfifo(name, 0777) != 0) {
fprintf(stderr, "Could not create fifo %s\n", name);
exit(EXIT_FAILURE);
}
}
fd = open(name, mode);;
return fd;
}
void read_fifo(int fd, char *out_r) {
memset (out_r, '\0', MAX_REC_SIZE);
do {
if(read(fd, out_r, MAX_REC_SIZE) > 0) {
out_r = strtok(out_r, "\n");
return;
}
} while (1);
}
void write_fifo(int fd, char *out_w) {
write(fd, out_w, sizeof(out_w));
}
int main()
{
int pipe_fd[3], i;
char *pipe_nm[] = {"./in_pipe_1", "./in_pipe_2", "./out_pipe_1"};
int read_mode = O_RDONLY;
int write_mode = O_WRONLY;
char out[MAX_REC_SIZE];
char out_store[MAX_REC_SIZE];
for(i=0; i<3; i++) {
pipe_fd[i] = open_fifo(pipe_nm[i], i == 2 ? write_mode : read_mode);
}
read_fifo(pipe_fd[0], out);
strcpy(out_store, out);
read_fifo(pipe_fd[1], out);
strcat(out_store, out);
strcat(out_store, "\n");
write_fifo(pipe_fd[2], out_store);
return 0;
}
A suspicious part of your code is:
write(fd, out_w, sizeof(out_w))
Here, out_w is not an array, and the sizeof operator would yield the size of a char * pointer, not the length of the block.
You should pass the length of out_store to your write_fifo function.
Also, I'm not really sure what your intent is when using the strtok function.

Resources