Printing from a buffer using the read(2) function in C - c

I'm trying to read in bits using the read function and I'm not sure how I'm supposed to printf the results using the buffer.
currently the code fragment is as follows
char *infile = argv[1];
char *ptr = buff;
int fd = open(infile, O_RDONLY); /* read only */
assert(fd > -1);
char n;
while((n = read(fd, ptr, SIZE)) > 0){ /*loops that reads the file until it returns empty */
printf(ptr);
}

The data read into ptr may contain \0 bytes, format specifiers and is not necessarily \0 terminated. All good reasons not to use printf(ptr). Instead:
// char n;
ssize_t n;
while((n = read(fd, ptr, SIZE)) > 0) {
ssize_t i;
for (i = 0; i < n; i++) {
printf(" %02hhX", ptr[i]);
// On older compilers use --> printf(" %02X", (unsigned) ptr[i]);
}
}

Here is the code that does the job for you:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <string.h>
#define SIZE 1024
int main(int argc, char* argv[])
{
char *infile = "Text.txt";
char ptrBuffer[SIZE];
int fd = open(infile, O_RDONLY); /* read only */
assert(fd > -1);
int n;
while((n = read(fd, ptrBuffer, SIZE)) > 0){ /*loops that reads the file until it returns empty */
printf("%s", ptrBuffer);
memset(ptrBuffer, 0, SIZE);
}
return 0;
}
You can read file name as parameter.

Even if ptr is a string, you need to use printf("%s", ptr);, not printf(ptr);
However, after you call
read(fd, ptr, SIZE)
ptr is rarely a string (strings need to be null-terminated). You need to use a loop and choose the format you need. For example:
for (int i = 0; i < n; i++)
printf("%02X ", *ptr);

Related

How to use read() function to read data from a file in C?

I would like to read data from a file, I need to open file in binary form and read blocks of data at a time?
#include <stdio.h>
#include <stdlib.h>
#include<string.h>
#include <fcntl.h>
#include <unistd.h>
int main(int argc,char* argv[])
{
int n;
FILE * fp;
size_t nbyte;
unsigned char * buffer[1024];
fp=open("file_test.txt",O_RDONLY);
read(fp,buffer,1);
printf("%s\n",buffer[0]);
close(fp);
return 0;
}
open/read is the POSIX version of fopen/fread, open does not return FILE* pointer.
unsigned char * buffer[1024]; is for array of character strings. You just need a buffer unsigned char buffer[1024];
printf("%s\n",...); is for printing c-string, it cannot print binary data in general.
int main(void)
{
int fin = open("file_test.txt", O_BINARY | O_RDONLY);
if (!fin)
return 0;
unsigned char buffer[1024];
while (1)
{
size_t size = read(fin, buffer, sizeof(buffer));
if (size == 0)
break;
for (size_t i = 0; i < size; i++)
printf("%02X ", buffer[i]);
//break; print the whole file!
}
close(fin);
return 0;
}

Problem with printing first 10 rows of a file

So I am trying to make a function print_file_rows() that prints the first ten rows of a file. If the file has more than 10 rows it works perfectly fine but if there's 10 or less it starts printing garbage. Any ideas on how I can fix this? (MUST ONLY USE THE SYSTEM FUNCTIONS OPEN/READ/WRITE/CLOSE)
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
void print_file_rows(char *path)
{
int fd = open(path, O_RDONLY);
if (fd < 0)
{
return NULL;
}
size_t size = 100;
size_t offset = 0;
size_t res;
char *buff = malloc(size);
while((res = read(fd, buff + offset, 100)) != 0)
{
offset += res;
if (offset + 100 > size)
{
size *= 2;
buff = realloc(buff, size);
}
}
close(fd);
int j = 0;
for(int i = 0;buff[i] != '\0'; i++)
{
if(j == 10)
{
break;
}
if(buff[i] == '\n')
{
j++;
}
printf("%c", buff[i]);
}
free(buff);
}
int main()
{
print_file_rows("a.txt");
return 0;
}
You do not need any buffers. It is most likely buffered on the OS level so you may print char by char.
int print_file_rows(char *path, size_t nrows)
{
int result = -1;
int fd = open(path, O_RDONLY);
char c;
if (fd > 0)
{
while(nrows && read(fd, &c, 1) == 1)
{
write(STDOUT_FILENO, &c, 1);
if(c == `\n`) nrows--;
}
result = nrows;
}
close(fd);
return result;
}
int main()
{
if(print_file_rows("a.txt", 10) == -1)
printf("Something has gone wrong\n");
return 0;
}
From man 2 read:
SYNOPSIS
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
DESCRIPTION
read() attempts to read up to count bytes from file descriptor fd into the buffer starting at buf.
read is for reading raw bytes, and as such has no notion of strings. It does not place a NUL terminating byte ('\0') at the end of the buffer. If you are going to treat the data you read as a string, you must terminate the buffer yourself.
To make room for this NUL terminating byte you should always allocate one extra byte in your buffer (i.e., read one less byte that your maximum).
We can see the return value is actually of type ssize_t, rather than size_t, which allows for
On error, -1 is returned, and errno is set to indicate the error.
This means we will need to check that the return value is greater than zero, rather than not zero which would cause the offset to be decremented on error.
With all that said, note that this answer from a similar question posted just yesterday shows how to achieve this without the use of a dynamic buffer. You can simply read the file one byte at a time and stop reading when you've encountered 10 newline characters.
If you do want to understand how to read a file into a dynamic buffer, then here is an example using the calculated offset to NUL terminate the buffer as it grows. Note that reading the entire file this way is inefficient for this task (especially for a large file).
(Note: the call to write, instead of printf)
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
void print_file_rows(const char *path)
{
int fd = open(path, O_RDONLY);
const size_t read_size = 100;
size_t size = read_size;
size_t offset = 0;
ssize_t res;
char *buff = malloc(size + 1);
while ((res = read(fd, buff + offset, read_size)) > 0) {
offset += res;
buff[offset] = '\0';
if (offset + read_size > size) {
size *= 2;
buff = realloc(buff, size + 1);
}
}
close(fd);
int lines = 0;
for (size_t i = 0; lines < 10 && buff[i] != '\0'; i++) {
write(STDOUT_FILENO, &buff[i], 1);
if (buff[i] == '\n')
lines++;
}
free(buff);
}
int main(void)
{
print_file_rows("a.txt");
}
(Error handling omitted for code brevity. malloc, realloc, and open can all fail, and should normally be handled.)

How do I compare string pointers?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void sort(char *array[1000][3], int size){
char *temp;
temp = malloc(30);
int j, i;
for(i = 0; i < size-1; i ++){
for(j = j+1; j < size; j ++){
if(strcmp(array[i][0],array[j][0]) > 0){
strcpy(temp, array[i][0]);
strcpy(array[i][0], array[j][0]);
strcpy(array[j][0], temp);
}
}
}
}
int main(){
FILE * myfile;
myfile = fopen("/public/lab4/hurricanes.csv", "r");
char line[100];
char *token;
char *array[1000][3];
int counter = 0;
if(myfile == NULL){
perror("Could not open file");
return 1;
}
while(fgets(line, 100,myfile) != NULL){
token = (char*) malloc((strlen(line)+1) * sizeof(char));
strcpy(token,line);
token = strtok(token, ",");
for(int i = 0; token != NULL; i ++){
array[counter][i] = token;
token = strtok(NULL, ",");
}
counter ++;
}
printf("%s", array[0][0]);
sort(array, counter);
printf("%s", array[0][0]);
return 0;
}
The file gives info on hurricanes with each line looking similar to this
Easy,Category 4 hurricane,5-Sep,1950
Having trouble being able to compare some of the string pointers to sort them alphabetically. Not sure if I need to use malloc to allocate some memory or what I need to do. Right now the array is staying the exact same.
Well, your program has only a few drawbacks, all of them commented in the corrected version in comments. I have edited on top of your source code and tried to conserver as much of your original code as possible, so it is coded as you did in the first place. Please, read the code and don't hesitate to make any comment you want:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* better use constants, so in case you have to change them, you
* only change them here. */
#define MAX 1000
#define NFIELDS 3
/* this is for pretty formatting error messages. */
#define F(_fmt) __FILE__":%d:%s: "_fmt, __LINE__, __func__
/* if you pass the field number used to sort, then you
* have a more flexible way to sort (any field can be used)
*/
void sort(char *array[MAX][NFIELDS], int size, int field)
{
/* space for NFIELDS pointers, no need to malloc it */
char *temp[NFIELDS];
int j, i;
for(i = 0; i < size - 1; i++) {
/* ---v--- oops!!! (there was a j here) :) */
for(j = i+1; j < size; j++) {
if(strcmp(array[i][field], array[j][field]) > 0){
/* exchange the arrays of NFIELD pointers */
memcpy(temp, array[i], sizeof temp);
memcpy(array[i], array[j], sizeof array[i]);
memcpy(array[j], temp, sizeof array[j]);
}
}
}
}
void print_array(char *array[MAX][NFIELDS], int size)
{
int i;
for (i = 0; i < size; i++) {
int j;
printf("#%d:", i);
for (j = 0; j < NFIELDS; j++) {
printf(" %s", array[i][j]);
}
printf("\n");
}
}
int main()
{
char *filename = "/public/lab4/hurricanes.csv";
/* better do the declaration and the initialization at the same time */
FILE *myfile = fopen(filename, "r");
char line[100];
char *array[MAX][NFIELDS];
int counter = 0;
if(myfile == NULL){
fprintf(stderr,
F("FOPEN: %s: %s(errno = %d)\n"),
filename, strerror(errno), errno);
return 1;
}
/* don't hesitate to use the sizeof operator below, it will save you a
* lot of nightmares */
while(fgets(line, sizeof line, myfile) != NULL) {
/* don't do anything if we don't have enough space */
if (counter >= MAX) {
fprintf(stderr, F("MAX NUMBER OF LINES EXHAUSTED (%d)\n)"), MAX);
exit(1);
}
/* NEVER, NEVER, NEVER... cast the result of malloc. See text
* below */
/* by the way, why don't use strdup(3) instead? (it does the
* allocation and the copy in one shot) */
char *token = strdup(line);
if (!token) {
fprintf(stderr, F("Not enough memory: %s (errno = %d)\n"),
strerror(errno), errno);
exit(1);
}
/* don't use strtok, because it considers a sequence of commas as
* a single delimiter, use strsep(3), that will consider ",," as
* three empty strings. By the way, be careful because in your
* sample data you have included commas in the dates. */
int i;
char *s;
/* Add also \n to the separator string, so you don't get the
* last \n included in the last field.
/* Here: ---vv--- */
for (i = 0;
(i < NFIELDS) && (s = strsep(&token, ",\n")) != NULL;
i++)
{
array[counter][i] = s;
}
counter++;
}
print_array(array, counter);
sort(array, counter, 2);
printf("\n");
print_array(array, counter);
return 0;
}
Note: Never cast the result of malloc(3) this is a legacy from ancient times, when there was no void type to allow for automatic pointer conversion. Casting malloc makes the compiler to silently comply if you forget to #include <stdlib.h> and this can make an error if pointer types are not the same size as integer types (you get an undefined behaviour on the compiler assuming by mistake that malloc() returns an int, but as you have stated so in the source, there will be no message from the compiler) Casting malloc(3) is a very bad habit, and makes you more difficult to search for errors.

Converting Greek words to uppercase

I have to create a function that reads a file called grwords.txt containing around 540000 words which are written in Greek letters.
I have to convert these words to uppercase and fill an array called char **words.
This is what I have so far.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>
#include <ctype.h>
void fp();
int main(int argc, char *argv[]) {
SetConsoleOutputCP(1253);
fp();
return 0;
}
void fp(){
char **words;
words = malloc(546490 * sizeof(int *));
for (i = 0; i < 546490; i++)
words[i] = malloc(24 * sizeof(int));
FILE *file;
char *word;
size_t cnt;
file = fopen("grwords.txt", "rt");
if (file == NULL){
printf("File cannot be opened.\n");
exit(1);
}
cnt = 0;
while (1==fscanf(file, "%24s",word)){
if (cnt == 546490)
break;
strcpy(words[cnt++], word);
}
fclose(file);
}
I'm still trying to figure out pointers. I know that & makes a pointer from a value and * a value from a pointer. Updated the program and it successfully fills the array with the words from the file! I still have no idea how to convert Greek lowercase to uppercase.
Handling Greek words can be dependent on your platform.
First of all, you need to understand how file handling works. Here is what I wrote:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define bufSize 1024 // max lenght of word
// we are going to receive the .txt from cmd line
int main(int argc, char *argv[])
{
FILE *fp;
// Assume file has max 10 words
const size_t N = 10;
// Allocate a 2D array of N rows
// and bufSize columns.
// You can think of it like an array
// of N strings, where every string
// has, at most, bufSize length.
char buf[N][bufSize];
// make sure we got the .txt
if (argc != 2)
{
fprintf(stderr,
"Usage: %s <soure-file>\n", argv[0]);
return 1;
}
// open the file
if ((fp = fopen(argv[1], "r")) == NULL)
{ /* Open source file. */
perror("fopen source-file");
return 1;
}
// we will use that for toupper()
char c;
// counters
int i = 0, j;
while (fscanf(fp, "%1024s", buf[i]) == 1)
{ /* While we don't reach the end of source. */
/* Read characters from source file to fill buffer. */
// print what we read
printf("%s\n", buf[i]);
j = 0;
// while we are on a letter of word placed
// in buf[i]
while (buf[i][j])
{
// make the letter capital and print it
c = buf[i][j];
putchar (toupper(c));
j++;
}
i++;
printf("\ndone with this word\n");
}
// close the file
fclose(fp);
return 0;
}
For this test.txt file:
Georgios
Samaras
Γιώργος
Σαμαράς
the code would run as:
./exe test.txt
Georgios
GEORGIOS
done with this word
Samaras
SAMARAS
done with this word
Γιώργος
Γιώργος
done with this word
Σαμαράς
Σαμαράς
done with this word
As you can see, I could read the Greek words, but failed to convert them in upper case ones.
Once you got how file handling goes, you need to use wide characters to read a file with Greek words.
So, by just modifying the above code, we get:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>
#define bufSize 1024
int main(int argc, char *argv[])
{
setlocale(LC_CTYPE, "en_GB.UTF-8");
FILE *fp;
const size_t N = 15;
wchar_t buf[N][bufSize];
if (argc != 2)
{
fprintf(stderr,
"Usage: %s <soure-file>\n", argv[0]);
return 1;
}
if ((fp = fopen(argv[1], "r")) == NULL)
{
perror("fopen source-file");
return 1;
}
wchar_t c;
int i = 0, j;
while (fwscanf(fp, L"%ls", buf[i]) == 1)
{
wprintf( L"%ls\n\n", buf[i]);
j = 0;
while (buf[i][j])
{
c = buf[i][j];
putwchar (towupper(c));
j++;
}
i++;
wprintf(L"\ndone with this word\n");
}
fclose(fp);
return 0;
}
And now the output is this:
Georgios
GEORGIOS
done with this word
Samaras
SAMARAS
done with this word
Γιώργος
ΓΙΏΡΓΟΣ
done with this word
Σαμαράς
ΣΑΜΑΡΆΣ
done with this word
I see that you may want to create a function which reads the words. If you need a simple example of functions in C, you can visit my pseudo-site here.
As for the 2D array I mentioned above, this picture might help:
where N is the number of rows (equal to 4) and M is the number of columns (equal to 5). In the code above, N is N and M is bufSize. I explain more here, were you can also found code for dynamic allocation of a 2D array.
I know see that you are on Windows. I tested the code in Ubuntu.
For Windows you might want to take a good look at this question.
So, after you read all the above and understand them, you can see what you asked for with dynamic memory management.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>
#define bufSize 1024
wchar_t **get(int N, int M);
void free2Darray(wchar_t** p, int N);
int main(int argc, char *argv[])
{
setlocale(LC_CTYPE, "en_GB.UTF-8");
FILE *fp;
const size_t N = 15;
wchar_t** buf = get(N, bufSize);
if (argc != 2)
{
fprintf(stderr,
"Usage: %s <soure-file>\n", argv[0]);
return 1;
}
if ((fp = fopen(argv[1], "r")) == NULL)
{
perror("fopen source-file");
return 1;
}
wchar_t c;
int i = 0, j;
while (fwscanf(fp, L"%ls", buf[i]) == 1)
{
wprintf( L"%ls\n", buf[i]);
j = 0;
while (buf[i][j])
{
c = buf[i][j];
putwchar (towupper(c));
j++;
}
i++;
wprintf(L"\ndone with this word\n");
}
fclose(fp);
// NEVER FORGET, FREE THE DYNAMIC MEMORY
free2Darray(buf, N);
return 0;
}
// We return the pointer
wchar_t **get(int N, int M) /* Allocate the array */
{
/* Check if allocation succeeded. (check for NULL pointer) */
int i;
wchar_t **table;
table = malloc(N*sizeof(wchar_t *));
for(i = 0 ; i < N ; i++)
table[i] = malloc( M*sizeof(wchar_t) );
return table;
}
void free2Darray(wchar_t** p, int N)
{
int i;
for(i = 0 ; i < N ; i++)
free(p[i]);
free(p);
}
Note that this code is expected to work on Linux (tested on Ubuntu 12.04), not on Windows (tested on Win 7).

md5sum of file in Linux C

I want to find md5sum of a file in Linux C, Is there any API where I can send file name to get md5sum of that file.
There's code here.
Also, the openssl libs have md5 functions (from here):
#include <openssl/md5.h>
#include <unistd.h>
int main()
{
int n;
MD5_CTX c;
char buf[512];
ssize_t bytes;
unsigned char out[MD5_DIGEST_LENGTH];
MD5_Init(&c);
bytes=read(STDIN_FILENO, buf, 512);
while(bytes > 0)
{
MD5_Update(&c, buf, bytes);
bytes=read(STDIN_FILENO, buf, 512);
}
MD5_Final(out, &c);
for(n=0; n<MD5_DIGEST_LENGTH; n++)
printf("%02x", out[n]);
printf("\n");
return(0);
}
You can use popen to run md5sum and read the output:
#include <stdio.h>
#include <ctype.h>
#define STR_VALUE(val) #val
#define STR(name) STR_VALUE(name)
#define PATH_LEN 256
#define MD5_LEN 32
int CalcFileMD5(char *file_name, char *md5_sum)
{
#define MD5SUM_CMD_FMT "md5sum %." STR(PATH_LEN) "s 2>/dev/null"
char cmd[PATH_LEN + sizeof (MD5SUM_CMD_FMT)];
sprintf(cmd, MD5SUM_CMD_FMT, file_name);
#undef MD5SUM_CMD_FMT
FILE *p = popen(cmd, "r");
if (p == NULL) return 0;
int i, ch;
for (i = 0; i < MD5_LEN && isxdigit(ch = fgetc(p)); i++) {
*md5_sum++ = ch;
}
*md5_sum = '\0';
pclose(p);
return i == MD5_LEN;
}
int main(int argc, char *argv[])
{
char md5[MD5_LEN + 1];
if (!CalcFileMD5("~/testfile", md5)) {
puts("Error occured!");
} else {
printf("Success! MD5 sum is: %s\n", md5);
}
}
You can use the mhash library (license is LGPL). On Debian systems:
sudo apt-get install libmhash-dev
See the man page man 3 mhash
But I don't think you can just give it the name of a file. You have to open the file yourself, read the data, and feed the data to this library's functions.
An easy answer to the question asked by Raja and using answer from sje397, the md5sum of a file can be calculated within the C program as below. Also notice that there is no need of writing the read command twice when you can use the do while loop.
int calculate_md5sum(char *filename)
{
//open file for calculating md5sum
FILE *file_ptr;
file_ptr = fopen(filename, "r");
if (file_ptr==NULL)
{
perror("Error opening file");
fflush(stdout);
return 1;
}
int n;
MD5_CTX c;
char buf[512];
ssize_t bytes;
unsigned char out[MD5_DIGEST_LENGTH];
MD5_Init(&c);
do
{
bytes=fread(buf, 1, 512, file_ptr);
MD5_Update(&c, buf, bytes);
}while(bytes > 0);
MD5_Final(out, &c);
for(n=0; n<MD5_DIGEST_LENGTH; n++)
printf("%02x", out[n]);
printf("\n");
return 0;
}
If you're looking to generate MD5 hash for a file and compare it with a string, you can use this.
Here, I have used D'Nabre's code from another SO answer and Michael Foukarakis's hex string to byte array code from this SO answer.
It needs to be linked against the OpenSSL library (gcc md5.c -o md5 -lssl) to work.
Sample usage:
unsigned char *file_hash = md5_for_file("~/testfile");
if (md5_is_match_str(file_hash, "b7be4ec867f9b0286b91dd40178774d6")) {
printf("Match\n");
} else {
printf("Mismatch\n");
}
free(file_hash);
md5.h:
#ifndef MD5_H
#define MD5_H
/** Caller to free result */
unsigned char *md5_for_file(char *filename);
/** md5_1 & md5_2 maybe NULL */
int md5_is_match(unsigned char *md5_1, unsigned char *md5_2);
/** md5 maybe NULL */
int md5_is_match_str(unsigned char *md5, const char *md5_str);
#endif //MD5_H
md5.c:
#include "md5.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/md5.h>
// Print the MD5 sum as hex-digits.
void print_md5_sum(unsigned char *md) {
int i;
for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
printf("%02x", md[i]);
}
printf("\n");
}
// Get the size of the file by its file descriptor
unsigned long get_size_by_fd(int fd) {
struct stat statbuf;
if (fstat(fd, &statbuf) < 0) exit(-1);
return statbuf.st_size;
}
unsigned char *md5_for_file(char *filename) {
int file_descript;
unsigned long file_size;
char *file_buffer;
unsigned char *result = malloc(sizeof(*result) * MD5_DIGEST_LENGTH);
if (NULL == result) {
printf("malloc failed\n");
goto END;
}
printf("using file:\t%s\n", filename);
file_descript = open(filename, O_RDONLY);
if (file_descript < 0) exit(-1);
file_size = get_size_by_fd(file_descript);
printf("file size:\t%lu\n", file_size);
file_buffer = mmap(0, file_size, PROT_READ, MAP_SHARED, file_descript, 0);
MD5((unsigned char *) file_buffer, file_size, result);
munmap(file_buffer, file_size);
print_md5_sum(result);
END:
return result;
}
int md5_is_match(unsigned char *md5_1, unsigned char *md5_2) {
if (!md5_1 || !md5_2) {
return 0;
}
int i;
for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
if (md5_1[i] != md5_2[i]) {
return 0;
}
}
return 1;
}
int md5_is_match_str(unsigned char *md5, char *md5_str) {
if (!md5 || !md5_str) { return 0; }
/** Make byte arrary from md5_str */
unsigned char md5_arr[MD5_DIGEST_LENGTH] = {0};
const char *pos = md5_str;
size_t count = 0;
/* WARNING: no sanitization or error-checking whatsoever */
for (count = 0; count < sizeof(md5_arr) / sizeof(md5_arr[0]); count++) {
sscanf(pos, "%2hhx", &md5_arr[count]);
pos += 2;
}
for (count = 0; count < sizeof(md5_arr) / sizeof(md5_arr[0]); count++) {
printf("%02x", md5_arr[count]);
}
printf("\n");
/** actual comparison */
if (memcmp(md5, md5_arr, MD5_DIGEST_LENGTH)) {
return 0;
}
return 1;
}

Resources