Security considerations behind this code snippet in C

Security considerations behind this code snippet in C - c

I found this code on git and wanted to use it, but someone made a comment about a security bug in it. I don't seem to identify it:
int32_t read_arrbuff(FILE *f, uint32_t *arrmap) {
int32_t i = 0, n_map;
fread(&n_map, sizeof(n_map), 1, f);
if (n_map > 256)
return -1;
while (n_map--) {
fread(&arrmap[i++], sizeof(uint32_t), 1, f);
}
return n_map;
}

Taken in isolation, the problems include:
You don't check that f is not NULL.
You don't check that arrmap is not NULL.
You don't check that the first fread() was successful.
You don't fully validate the value that is read; negative or zero values will throw things off badly.
You don't check that the second fread() was successful.
You always return -1, regardless of whether anything worked or failed.
Which of those are security problems? At some levels, all of them. In some contexts, you could assume that f and arrmap are legitimate without checking. Not checking that the reads are successful, especially the first, is a serious problem. The negative values for n_map would be a serious issue. Claiming success when every read failed would be a problem.
When the loop completes, n_map is set to -1 (it was zero before the post-decrement). So, you return -1 on failure or success. That's not helpful. It should almost certainly return the value of n_map that was read from file, so the caller can tell how many values are in the array.
It is usually best not to hard code a size limit like 256 into the program. The interface should include the array size and you should check against the passed array size.
Working to the original interface, you could use:
#include "stderr.h"
#include <stdio.h>
#include <inttypes.h>
extern int32_t read_arrbuff(FILE *f, uint32_t *arrmap);
int32_t read_arrbuff(FILE *f, uint32_t *arrmap)
{
int32_t n_map;
if (fread(&n_map, sizeof(n_map), 1, f) != 1)
err_syserr("failed to read data (count)\n");
if (n_map > 256 || n_map <= 0)
return -1;
for (int32_t i = 0; i < n_map; i++)
{
if (fread(&arrmap[i], sizeof(uint32_t), 1, f) != 1)
err_syserr("failed to read data (value %" PRId32 ")\n", i);
}
return n_map;
}
int main(int argc, char **argv)
{
err_setarg0(argv[0]);
if (argc != 1)
err_usage("");
char file[] = "data";
/* Create data file */
FILE *fp = fopen(file, "wb");
if (fp == NULL)
err_syserr("failed to open file '%s' for writing\n", file);
int32_t nmap = 32;
if (fwrite(&nmap, sizeof(nmap), 1, fp) != 1)
err_syserr("failed to write to file '%s'\n", file);
for (int32_t i = 0; i < nmap; i++)
{
if (fwrite(&i, sizeof(i), 1, fp) != 1)
err_syserr("failed to write to file '%s'\n", file);
}
fclose(fp);
/* Read data file */
fp = fopen(file, "rb");
if (fp == NULL)
err_syserr("failed to open file '%s' for reading\n", file);
uint32_t amap[256];
int32_t rc = read_arrbuff(fp, amap);
printf("rc = %" PRId32 "\n", rc);
for (int32_t i = 0; i < rc; i++)
printf("%3" PRId32 " = %3" PRId32 "\n", i, amap[i]);
fclose(fp);
return 0;
}
You can debate whether the unilateral exits imposed by err_syserr() are appropriate. (The declarations and source for the err_*() functions is in stderr.h and stderr.c, and is available from GitHub.)
An alternative version taking the maximum array size from a function parameter is:
#include "stderr.h"
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
extern int32_t read_arrbuff(FILE *f, int32_t a_size, uint32_t arrmap[a_size]);
int32_t read_arrbuff(FILE *f, int32_t a_size, uint32_t arrmap[a_size])
{
int32_t n_map;
assert(f != NULL && arrmap != NULL && a_size > 0 && a_size <= 256);
if (fread(&n_map, sizeof(n_map), 1, f) != 1)
{
err_sysrem("failed to read data (count)\n");
return -1;
}
if (n_map > a_size || n_map <= 0)
{
err_sysrem("count %" PRId32 " is out of range 1..%" PRId32 "\n",
n_map, a_size);
return -1;
}
for (int32_t i = 0; i < n_map; i++)
{
if (fread(&arrmap[i], sizeof(uint32_t), 1, f) != 1)
{
err_syserr("failed to read data (value %" PRId32 " of %" PRId32 ")\n",
i, n_map);
}
}
return n_map;
}
int main(int argc, char **argv)
{
err_setarg0(argv[0]);
if (argc != 1)
err_usage("");
char file[] = "data";
/* Create data file */
FILE *fp = fopen(file, "wb");
if (fp == NULL)
err_syserr("failed to open file '%s' for writing\n", file);
int32_t nmap = 32;
if (fwrite(&nmap, sizeof(nmap), 1, fp) != 1)
err_syserr("failed to write to file '%s'\n", file);
for (int32_t i = 0; i < nmap; i++)
{
if (fwrite(&i, sizeof(i), 1, fp) != 1)
err_syserr("failed to write to file '%s'\n", file);
}
fclose(fp);
/* Read data file */
fp = fopen(file, "rb");
if (fp == NULL)
err_syserr("failed to open file '%s' for reading\n", file);
enum { AMAP_SIZE = 256 };
uint32_t amap[AMAP_SIZE];
int32_t rc = read_arrbuff(fp, AMAP_SIZE, amap);
printf("rc = %" PRId32 "\n", rc);
for (int32_t i = 0; i < rc; i++)
printf("%3" PRId32 " = %3" PRId32 "\n", i, amap[i]);
fclose(fp);
return 0;
}
This version reports a specific error and returns on error. It makes semi-appropriate assertions on entry (the 256 limit is not necessarily appropriate, but is consistent with the original code).
The output is unexciting (and the same from both):
rc = 32
0 = 0
1 = 1
2 = 2
3 = 3
4 = 4
5 = 5
6 = 6
7 = 7
8 = 8
9 = 9
10 = 10
11 = 11
12 = 12
13 = 13
14 = 14
15 = 15
16 = 16
17 = 17
18 = 18
19 = 19
20 = 20
21 = 21
22 = 22
23 = 23
24 = 24
25 = 25
26 = 26
27 = 27
28 = 28
29 = 29
30 = 30
31 = 31

Related

realloc() C-language change value in int array

I'm trying to use realloc() every loop, so I only use necessary memory for my int array in C, but the output values are changed. Nevertheless, when using Valgrind on my code, I have the right values.
I'm doing the first day of Advent of Code 2022.
The input file is a .txt file that looks like this:
7569
1357
10134
4696
4423
8869
3562
6597
4038
9038
1352
8005
4811
6281
3961
4023
7234
3510
7728
1569
4583
7495
3941
6015
6531
2637
I was trying to sum numbers and store it in my array in a specific index, and if there is a blank line increase my index.
Given that example input, it should print like this:
elf [0] = 47207
elf [1] = 41509
elf [2] = 51243
What I got:
elf [245] = 63138
elf [246] = 181168
elf [247] = 41570
elf [248] = 36264
elf [249] = 59089
elf [250] = 185061
What I want (result using valgrind):
elf [245] = 63138
elf [246] = 52399
elf [247] = 41570
elf [248] = 36264
elf [249] = 59089
elf [250] = 56308
My code:
int *read_calories(char *filename)
{
FILE *fp = fopen(filename, "r");
char *line = NULL;
int i = 0;
size_t len = 0;
ssize_t nread;
struct stat size;
stat(filename, &size);
int tab_size = 1;
int *calories = malloc(sizeof(int) * 2);
if (fp == NULL)
{
perror("Can't open file\n");
exit(EXIT_FAILURE);
}
while ((nread = getline(&line, &len, fp)) != -1)
{
if (nread == 1) {
i++;
++tab_size;
calories = realloc(calories, tab_size * sizeof(int));
} else {
calories[i] += atoi(line);
}
}
calories[i + 1] = '\0';
free(line);
fclose(fp);
return calories;
}
int main()
{
int *calories = read_calories("input.txt");
for (int i = 0; calories[i] != '\0'; i++) {
printf("elf [%d] = %d \n", i, calories[i]);
}
free(calories);
return 0;
}

Your calorie reading code has good stuff in it, but is rather disorganized. The data allocated by malloc() is not zeroed, so using += in calories[i] += atoi(line); is not good. You've not shown the input data format.
It isn't clear whether you have to read a bunch of numbers up to a blank line and store the sum in the array (and rinse and repeat to EOF), or whether you just need to read numbers from the file and store those into the array.
Each line has a number to be stored separately
The code below assumes that each line contains a number that should be stored in the array. Adapting to the other style of processing is not difficult.
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern int *read_calories(const char *filename);
int *read_calories(const char *filename)
{
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Failed to open file '%s' for reading (%d: %s)\n", filename, errno, strerror(errno));
exit(EXIT_FAILURE);
}
int tab_used = 0;
int tab_size = 2;
int *calories = malloc(sizeof(int) * tab_size);
if (calories == NULL)
{
fprintf(stderr, "Failed to allocate memory (%d: %s)\n", errno, strerror(errno));
exit(EXIT_FAILURE);
}
char *line = NULL;
size_t len = 0;
while (getline(&line, &len, fp) != -1)
{
if (tab_used == tab_size - 1)
{
size_t new_size = 2 * tab_size;
void *new_data = realloc(calories, new_size * sizeof(int));
if (new_data == NULL)
{
fprintf(stderr, "Failed to allocate memory (%d: %s)\n", errno, strerror(errno));
exit(EXIT_FAILURE);
}
calories = new_data;
tab_size = new_size;
}
calories[tab_used++] = atoi(line);
}
calories[tab_used] = 0;
free(line);
fclose(fp);
return calories;
}
int main(void)
{
int *calories = read_calories("input.txt");
assert(calories != NULL);
for (int i = 0; calories[i] != 0; i++)
printf("elf [%d] = %d \n", i, calories[i]);
free(calories);
return 0;
}
I'm not keen on perror() — it does a job and is simple, but it's relatively hard to get good messages out of it. The code ensures there's an extra entry in the array for a zero entry at the end. It does not, however, spot a zero entry in the middle of the array. That would usually be caused by atoi() failing to convert the value.
I generated an input.txt file containing 10 random values between 100 and 1000:
478
459
499
997
237
423
185
630
964
594
The output from the program was:
elf [0] = 478
elf [1] = 459
elf [2] = 499
elf [3] = 997
elf [4] = 237
elf [5] = 423
elf [6] = 185
elf [7] = 630
elf [8] = 964
elf [9] = 594
Blocks of numbers to be summed, separated by blank lines
This code is closely based on the previous answer, but the 'add to the array' code is extracted into a function so it can be used twice. It might be better to use a structure to encapsulate the array details. I should probably also use size_t rather than int for the sizes.
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void add_to_array(int **table, int *tab_size, int *tab_used, int value)
{
if (*tab_used == *tab_size - 1)
{
size_t new_size = 2 * *tab_size;
void *new_data = realloc(*table, new_size * sizeof(int));
if (new_data == NULL)
{
fprintf(stderr, "Failed to allocate memory (%d: %s)\n", errno, strerror(errno));
exit(EXIT_FAILURE);
}
*table = new_data;
*tab_size = new_size;
}
(*table)[(*tab_used)++] = value;
}
static int *read_calories(const char *filename)
{
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Failed to open file '%s' for reading (%d: %s)\n", filename, errno, strerror(errno));
exit(EXIT_FAILURE);
}
int tab_used = 0;
int tab_size = 2;
int *calories = malloc(sizeof(int) * tab_size);
if (calories == NULL)
{
fprintf(stderr, "Failed to allocate memory (%d: %s)\n", errno, strerror(errno));
exit(EXIT_FAILURE);
}
char *line = NULL;
size_t len = 0;
int current_sum = 0;
ssize_t nread;
while ((nread = getline(&line, &len, fp)) != -1)
{
if (nread == 1)
{
add_to_array(&calories, &tab_size, &tab_used, current_sum);
current_sum = 0;
}
else
current_sum += atoi(line);
}
if (current_sum > 0)
add_to_array(&calories, &tab_size, &tab_used, current_sum);
calories[tab_used] = 0;
free(line);
fclose(fp);
return calories;
}
int main(void)
{
int *calories = read_calories("input.txt");
assert(calories != NULL);
for (int i = 0; calories[i] != 0; i++)
printf("elf [%d] = %d \n", i, calories[i]);
free(calories);
return 0;
}
Revised data file:
184
861
513
507
790
897
715
287
729
534
777
945
950
696
605
287
763
839
860
779
522
140
281
190
744
976
420
462
591
710
435
707
580
855
208
806
205
799
537
395
922
356
397
464
435
470
973
203
713
264
(Note that there isn't a blank line at the end!)
Output:
elf [0] = 2855
elf [1] = 4884
elf [2] = 2251
elf [3] = 3528
elf [4] = 2853
elf [5] = 4968
elf [6] = 1810
elf [7] = 932
elf [8] = 4017
elf [9] = 1180
Awk script to cross-check the result:
awk 'NF == 0 { print sum; sum = 0 } NF == 1 { sum += $1 } END { print sum }' input.txt
Results:
2855
4884
2251
3528
2853
4968
1810
932
4017
1180

Changing STDOUT to file in ncat source code

I managed to compile ncat. I am using -k option to keep server open. Instead of accepting data to STDOUT, my goal is to write to files instead. So far I was able to write to a file instead of STDOUT but my goal is to loop through new files on each new connection. Right now it is appending to the same filename_0 and f++ is not incrementing. Here is what I have so far. The original code will be below. The difference is in the else clause, basically if n is actually greater than 0. On each loop, n is 512 bytes until the last chunk. I just want to be able to have new files from each new connection. filename_0, filename_1, filename_3, etc.
MODIFIED CODE:
/* Read from a client socket and write to stdout. Return the number of bytes
read from the socket, or -1 on error. */
int read_socket(int recv_fd)
{
char buf[DEFAULT_TCP_BUF_LEN];
struct fdinfo *fdn;
int nbytes, pending;
int f = 0;
fdn = get_fdinfo(&client_fdlist, recv_fd);
ncat_assert(fdn != NULL);
nbytes = 0;
do {
int n, s;
n = ncat_recv(fdn, buf, 512, &pending);
if (n <= 0) {
if (o.debug)
logdebug("Closing fd %d.\n", recv_fd);
#ifdef HAVE_OPENSSL
if (o.ssl && fdn->ssl) {
if (nbytes == 0)
SSL_shutdown(fdn->ssl);
SSL_free(fdn->ssl);
}
#endif
close(recv_fd);
checked_fd_clr(recv_fd, &master_readfds);
rm_fd(&client_fdlist, recv_fd);
checked_fd_clr(recv_fd, &master_broadcastfds);
rm_fd(&broadcast_fdlist, recv_fd);
conn_inc--;
if (get_conn_count() == 0)
checked_fd_clr(STDIN_FILENO, &master_readfds);
return n;
}
else {
char filename[20];
snprintf(filename, sizeof(char) * 20, "filename_%i", f);
FILE *fp = fopen(filename, "a");
if (fp == NULL)
{
printf("Could not open file");
return 0;
}
//Write(STDOUT_FILENO, buf, n);
s = fwrite(buf, 1, n, fp);
fclose(fp);
f++;
nbytes += n;
}
} while (pending);
return nbytes;
}
ORIGINAL CODE:
int read_socket(int recv_fd)
{
char buf[DEFAULT_TCP_BUF_LEN];
struct fdinfo *fdn;
int nbytes, pending;
fdn = get_fdinfo(&client_fdlist, recv_fd);
ncat_assert(fdn != NULL);
nbytes = 0;
do {
int n;
n = ncat_recv(fdn, buf, sizeof(buf), &pending);
if (n <= 0) {
if (o.debug)
logdebug("Closing fd %d.\n", recv_fd);
#ifdef HAVE_OPENSSL
if (o.ssl && fdn->ssl) {
if (nbytes == 0)
SSL_shutdown(fdn->ssl);
SSL_free(fdn->ssl);
}
#endif
close(recv_fd);
checked_fd_clr(recv_fd, &master_readfds);
rm_fd(&client_fdlist, recv_fd);
checked_fd_clr(recv_fd, &master_broadcastfds);
rm_fd(&broadcast_fdlist, recv_fd);
conn_inc--;
if (get_conn_count() == 0)
checked_fd_clr(STDIN_FILENO, &master_readfds);
return n;
}
else {
Write(STDOUT_FILENO, buf, n);
nbytes += n;
}
} while (pending);
return nbytes;
}

I was able to figure out using the other functions involved. i passed a pointer into this function to write to it. the handler is a function i added the open() file pointer to.

Sync issue in Linux C reading and writing to a file concurrently

I'm trying to test possible problems for reading from and writing to same files concurrently on an embedded device which has embedded Linux on it.
I have two processes: Writer and Reader. As the names suggest, the Writer constantly writes 1 of 3 const strings to 10 files one-by-one and at the same time the Reader reads the same files and compares the outputs to be sure they are correct. In order to overcome synchronization issues, I thought I could use mandatory file locking mechanism.
Note: Reader process is actually a placeholder for an user application which will read the files in a similar fashion. Since it will not be in my control, advisory file locking will not be applicable.
I have mounted a tmpfs with mandatory file locking enabled.
mount -t tmpfs -o mand,size=1m tmpfs /tmp2
The Writer creates 10 files with mandatory file locking enabled. And in each iteration one of the three strings below is written to files.
const char* s1 = "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679";
const char* s2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const char* s3 = "************************************************************************";
The Reader reads these files one-by-one and reports if there is a mismatch. And it occasionally (roughly once in every 1000 iterations, see the codes below) reads strings like:
"************************************************************************62862089986280348253421170679"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz********************"
This is exactly the problem I was expecting to encounter. The smaller strings are written on top of the bigger ones and the remaining characters are not deleted.
I tried to use fsync(fn) after writing to file, but it did not work. It should not be needed anyway, since this is a tmpfs.
What is the reason for this and what can I do to avoid this?
Here are the codes for the Writer and the Reader:
The Writer
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <errno.h>
const char* s1 = "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679";
const char* s2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const char* s3 = "************************************************************************";
int main()
{
// create /tmp2/test directory
if (mkdir("/tmp2/test", 0777) == -1) {
if (errno != EEXIST) {
perror("mkdir");
return -1;
}
}
// create 10 input files input0, input1, ..., input9
for (int i = 0; i < 10; i++) {
char path[50] = "/tmp2/test/input";
sprintf(path + 16, "%d", i);
FILE* fp = fopen(path, "w");
if (fp == NULL) {
fprintf(stderr, "Unable to create file %s\n", path);
perror("create: ");
return -1;
}
chmod(path, 02644); //mandatory file locking enabled
fclose(fp);
}
// flock structure
struct flock fl;
fl.l_type = F_WRLCK;
fl.l_whence = SEEK_SET;
fl.l_start = 0;
fl.l_len = 0;
fl.l_pid = 0;
// loop
for(int step = 0; ; step++) {
usleep(50000);
for (int i = 0; i < 10; i++) {
char path[50] = "/tmp2/test/input";
sprintf(path + 16, "%d", i);
FILE* fp = fopen(path, "w+");
if (fp == NULL) {
fprintf(stderr, "Unable to open file %s\n", path);
perror("fopen: ");
return -1;
}
int fd = fileno(fp);
// place a write lock
fl.l_type = F_WRLCK;
if (fcntl(fd, F_SETLK, &fl) == -1) {
perror("lock: ");
return -1;
}
// write 1 of 3 strings
if (step % 3 == 0) {
write(fd, s1, strlen(s1));
} else if (step % 3 == 1) {
write(fd, s2, strlen(s2));
} else {
write(fd, s3, strlen(s3));
}
//fsync(fd); // fsync should not be needed since this is a tmpfs
// unlock
fl.l_type = F_UNLCK;
if (fcntl(fd, F_SETLK, &fl) == -1) {
perror("lock: ");
return -1;
}
fclose(fp);
}
}
return 0;
}
The Reader:
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <errno.h>
const char* s1 = "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679";
const char* s2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const char* s3 = "************************************************************************";
int main()
{
// counters for errors
int err_ctr = 0;
int read_err_ctr = 0;
// loop
for(int step = 0; ; step++) {
usleep(50000);
for (int i = 0; i < 10; i++) {
char path[50] = "/tmp2/test/input";
sprintf(path + 16, "%d", i);
FILE* fp = fopen(path, "r");
if (fp == NULL) {
fprintf(stderr, "Unable to open file %s\n", path);
perror("read: ");
return -1;
}
// read the file
char reading[150];
if (fgets(reading, 150, fp) == NULL) {
fclose(fp);
read_err_ctr++;
printf("Step = %d; ReadError = %d; from %s\n", step, read_err_ctr, path);
continue;
}
fclose(fp);
// continue if the string matches
if (strcmp(reading, s1) == 0) {
continue;
} else if (strcmp(reading, s2) == 0) {
continue;
} else if (strcmp(reading, s3) == 0) {
continue;
}
// print error
err_ctr++;
printf("Step = %d; Error = %d; I've read %s from %s\n", step, err_ctr, reading, path);
}
}
return 0;
}

Linux C read file UNICODE formatted text (notepad Windows)

Is there a way to read a text file, under Linux with C, saved on Windows as "UNICODE" with notepad?
The text in Linux with nano editor looks like:
��T^#e^#s^#t^#
^#
but under vi editor is read properly as:
Test
I must specify the text is normal strings ANSI (no Unicode characters or foreign languages related).
Tried like this but no result:
#include <stdio.h>
#include <wchar.h>
#include <locale.h>
int main() {
char *loc = setlocale(LC_ALL, 0);
setlocale(LC_ALL, loc);
FILE * f = fopen("unicode.txt", "r");
wint_t c;
while((c = fgetwc(f)) != WEOF) {
wprintf(L"%lc\n", c);
}
return 0;
}
UPDATE:
Forgot to mention the file format is Little-endian UTF-16 Unicode text or UTF-16LE

Include <wchar.h>, set an UTF-8 locale (setlocale(LC_ALL, "en_US.UTF-8") is fine), open the file or stream in byte-oriented mode (handle=fopen(filename, "rb"), fwide(handle,-1), i.e. in not-wide mode). Then you can use
wint_t getwc_utf16le(FILE *const in)
{
int lo, hi, code, also;
if ((lo = getc(in)) == EOF)
return WEOF;
if ((hi = getc(in)) == EOF)
return lo; /* Or abort; input sequence ends prematurely */
code = lo + 256 * hi;
if (code < 0xD800 || code > 0xDBFF)
return code; /* Or abort; input sequence is not UTF16-LE */
if ((lo = getc(in)) == EOF)
return code; /* Or abort; input sequence ends prematurely */
if ((hi = getc(in)) == EOF) {
ungetc(lo, in);
return code; /* Or abort; input sequence ends prematurely */
}
/* Note: if ((lo + 256*hi) < 0xDC00 || (lo + 256*hi) > 0xDFFF)
* the input sequence is not valid UTF16-LE. */
return 0x10000 + ((code & 0x3FF) << 10) + ((lo + 256 * hi) & 0x3FF);
}
to read code points from such an input file, assuming it contains UTF16-LE data.
The above function is more permissive than strictly necessary, but it does parse all UTF16-LE I could throw at it (including the sometimes problematic U+100000..U+10FFFF code points), so if the input is correct, this function should handle it just fine.
Because the locale is set to UTF-8 in Linux, and Linux implementations support the full Unicode set, the code points match the ones produced by above functions, and you can safely use wide character functions (from <wchar.h>) to handle the input.
Often the first character in the file is BOM, "byte-order mark", 0xFEFF. You can ignore it if it is the first character in the file. Elsewhere it is the zero-width non-breaking space. In my experience, those two bytes at the start of a file that is supposed to be text, is quite reliable indicator that the file is UTF16-LE. (So, you could peek at the first two bytes, and if they match those, assume it is UTF16-LE.)
Remember that wide-character end-of-file is WEOF, not EOF.
Hope this helps.
Edited 20150505: Here is a helper function one could use instead, to read inputs (using low-level unistd.h interface), converting to UTF-8: read_utf8.h:
#ifndef READ_UTF8_H
#define READ_UTF8_H
/* Read input from file descriptor fd,
* convert it to UTF-8 (using "UTF8//TRANSLIT" iconv conversion),
* and appending to the specified buffer.
* (*dataptr) points to a dynamically allocated buffer (may reallocate),
* (*sizeptr) points to the size allocated for that buffer,
* (*usedptr) points to the amount of data already in the buffer.
* You may initialize the values to NULL,0,0, in which case they will
* be dynamically allocated as needed.
*/
int read_utf8(char **dataptr, size_t *sizeptr, size_t *usedptr, const int fd, const char *const charset);
#endif /* READ_UTF8_H */
read_utf8.c:
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <iconv.h>
#include <string.h>
#include <errno.h>
#define INPUT_CHUNK 16384
#define OUTPUT_CHUNK 8192
int read_utf8(char **dataptr, size_t *sizeptr, size_t *usedptr, const int fd, const char *const charset)
{
char *data;
size_t size;
size_t used;
char *input_data;
size_t input_size, input_head, input_tail;
int input_more;
iconv_t conversion = (iconv_t)-1;
if (!dataptr || !sizeptr || !usedptr || fd == -1 || !charset || !*charset)
return errno = EINVAL;
if (*dataptr) {
data = *dataptr;
size = *sizeptr;
used = *usedptr;
if (used > size)
return errno = EINVAL;
} else {
data = NULL;
size = 0;
used = 0;
}
conversion = iconv_open("UTF8//TRANSLIT", charset);
if (conversion == (iconv_t)-1)
return errno = ENOTSUP;
input_size = INPUT_CHUNK;
input_data = malloc(input_size);
if (!input_data) {
if (conversion != (iconv_t)-1)
iconv_close(conversion);
errno = ENOMEM;
return 0;
}
input_head = 0;
input_tail = 0;
input_more = 1;
while (1) {
if (input_tail > input_head) {
if (input_head > 0) {
memmove(input_data, input_data + input_head, input_tail - input_head);
input_tail -= input_head;
input_head = 0;
}
} else {
input_head = 0;
input_tail = 0;
}
if (input_more && input_tail < input_size) {
ssize_t n;
do {
n = read(fd, input_data + input_tail, input_size - input_tail);
} while (n == (ssize_t)-1 && errno == EINTR);
if (n > (ssize_t)0)
input_tail += n;
else
if (n == (ssize_t)0)
input_more = 0;
else
if (n != (ssize_t)-1) {
free(input_data);
iconv_close(conversion);
return errno = EIO;
} else {
const int errcode = errno;
free(input_data);
iconv_close(conversion);
return errno = errcode;
}
}
if (input_head == 0 && input_tail == 0)
break;
if (used + OUTPUT_CHUNK > size) {
size = (used / (size_t)OUTPUT_CHUNK + (size_t)2) * (size_t)OUTPUT_CHUNK;
data = realloc(data, size);
if (!data) {
free(input_data);
iconv_close(conversion);
return errno = ENOMEM;
}
*dataptr = data;
*sizeptr = size;
}
{
char *source_ptr = input_data + input_head;
size_t source_len = input_tail - input_head;
char *target_ptr = data + used;
size_t target_len = size - used;
size_t n;
n = iconv(conversion, &source_ptr, &source_len, &target_ptr, &target_len);
if (n == (size_t)-1 && errno == EILSEQ) {
free(input_data);
iconv_close(conversion);
return errno = EILSEQ;
}
if (source_ptr == input_data + input_head && target_ptr == data + used) {
free(input_data);
iconv_close(conversion);
return errno = EDEADLK;
}
input_head = (size_t)(source_ptr - input_data);
used = (size_t)(target_ptr - data);
*usedptr = used;
}
}
free(input_data);
iconv_close(conversion);
if (used + 16 >= size) {
size = (used | 15) + 17;
data = realloc(data, size);
if (!data)
return errno = ENOMEM;
*dataptr = data;
*sizeptr = size;
memset(data + used, 0, size - used);
} else
if (used + 32 < size)
memset(data + used, 0, size - used);
else
memset(data + used, 0, 32);
return errno = 0;
}
and an example program, example.c, on how to use it:
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include "read_utf8.h"
int main(int argc, char *argv[])
{
char *file_buffer = NULL;
size_t file_allocd = 0;
size_t file_length = 0;
int fd;
if (argc != 3 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s FILENAME CHARSET\n", argv[0]);
fprintf(stderr, " %s FILENAME CHARSET//IGNORE\n", argv[0]);
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
do {
fd = open(argv[1], O_RDONLY | O_NOCTTY);
} while (fd == -1 && errno == EINTR);
if (fd == -1) {
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
if (read_utf8(&file_buffer, &file_allocd, &file_length, fd, argv[2])) {
if (errno == ENOTSUP)
fprintf(stderr, "%s: Unsupported character set.\n", argv[2]);
else
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
errno = EIO;
if (close(fd)) {
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return EXIT_FAILURE;
}
fprintf(stderr, "%s: read %zu bytes, allocated %zu.\n", argv[1], file_length, file_allocd);
if (file_length > 0)
if (fwrite(file_buffer, file_length, 1, stdout) != 1) {
fprintf(stderr, "Error writing to standard output.\n");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
This lets you read (either into an empty, dynamically allocated buffer, or append to an existing dynamically allocated buffer) using any character set supported by your system (use iconv --list to see the list), auto-converting the contents to UTF-8.
It uses a temporary input buffer (of INPUT_CHUNK bytes) to read the file part by part, and reallocates the output buffer in multiples of OUTPUT_CHUNK bytes, keeping at least OUTPUT_CHUNK bytes available for each conversion. The constants may need a bit of tuning for different use cases; they're by no means optimal or even suggested values. Larger ones lead to faster code, especially for INPUT_CHUNK, as most filesystems perform better when reading large chunks (2097152 is suggested size currently, if I/O performance is important) -- but you should have OUTPUT_CHUNK at similar size, or perhaps twice that, to reduce the number of reallocations needed. (You can trim the resulting buffer afterwards, to used+1 bytes, using realloc(), to avoid memory waste.)

Printing Partition Table - C program

I am trying to print a partition table using C programming language, everything seems to work fine: Opening and reading, but I don't understand why it is printing garbage values.
Here is the code:
struct partition
{
unsigned char drive;
unsigned char chs_begin[3];
unsigned char sys_type;
unsigned char chs_end[3];
unsigned char start_sector[4];
unsigned char nr_sector[4];
};
int main()
{
int gc = 0, i = 1, nr = 0, pos = -1, nw = 0;
int fd =0;
char buf[512] ;
struct partition *sp;
printf("Ok ");
if ( (fd = open("/dev/sda", O_RDONLY | O_SYNC )) == -1)
{
perror("Open");
exit(1);
}
printf("fd is %d \n", fd);
pos = lseek (fd, 0, SEEK_CUR);
printf("Position of pointer is :%d\n", pos);
if ((nr = read(fd, buf, sizeof(buf))) == -1)
{
perror("Read");
exit(1);
}
close(fd);
printf("Size of buf = %d\n and number of bytes read are %d ", sizeof(buf), nr);
if ((nw = write(1, buf, 64)) == -1)
{
printf("Write: Error");
exit(1);
}
printf("\n\n %d bytes are just been written on stdout\n", nw,"this can also be printed\n");
printf("\n\t\t*************Partition Table****************\n\n");
for (i=0 ; i<4 ; i++)
{
sp = (struct partition *)(buf + 446 + (16 * i));
putchar(sp -> drive);
}
return 0;
}
It is printing garbage instead of partition table.
I might have some basic understanding issues but I searched with Google for a long time but it did not really help. I also saw the source code of fdisk but it is beyond my understanding at this point. Could anyone please guide me? I am not expecting someone to clear my mistake and give me the working code. Just a sentence or two - or any link.

Try this:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
struct partition
{
unsigned char boot_flag; /* 0 = Not active, 0x80 = Active */
unsigned char chs_begin[3];
unsigned char sys_type; /* For example : 82 --> Linux swap, 83 --> Linux native partition, ... */
unsigned char chs_end[3];
unsigned char start_sector[4];
unsigned char nr_sector[4];
};
void string_in_hex(void *in_string, int in_string_size);
void dump_partition(struct partition *part, int partition_number);
void dump_partition(struct partition *part, int partition_number)
{
printf("Partition /dev/sda%d\n", partition_number + 1);
printf("boot_flag = %02X\n", part->boot_flag);
printf("chs_begin = ");
string_in_hex(part->chs_begin, 3);
printf("sys_type = %02X\n", part->sys_type);
printf("chs_end = ");
string_in_hex(part->chs_end, 3);
printf("start_sector = ");
string_in_hex(part->start_sector, 4);
printf("nr_sector = ");
string_in_hex(part->nr_sector, 4);
}
void string_in_hex(void *in_string, int in_string_size)
{
int i;
int k = 0;
for (i = 0; i < in_string_size; i++)
{
printf("%02x ", ((char *)in_string)[i]& 0xFF);
k = k + 1;
if (k == 16)
{
printf("\n");
k = 0;
}
}
printf("\n");
}
int main(int argc, char **argv)
{
int /*gc = 0,*/ i = 1, nr = 0, pos = -1/*, nw = 0*/;
int fd = 0;
char buf[512] ;
struct partition *sp;
int ret = 0;
printf("Ok ");
if ((fd = open("/dev/sda", O_RDONLY | O_SYNC)) == -1)
{
perror("Open");
exit(1);
}
printf("fd is %d\n", fd);
pos = lseek (fd, 0, SEEK_CUR);
printf("Position of pointer is :%d\n", pos);
if ((nr = read(fd, buf, sizeof(buf))) == -1)
{
perror("Read");
exit(1);
}
ret = close(fd);
if (ret == -1)
{
perror("close");
exit(1);
}
/* Dump the MBR buffer, you can compare it on your system with the output of the command:
* hexdump -n 512 -C /dev/sda
*/
string_in_hex(buf, 512);
printf("Size of buf = %d - and number of bytes read are %d\n", sizeof(buf), nr);
/*if ((nw = write(1, buf, 64)) == -1)
{
printf("Write: Error");
exit(1);
}
printf("\n\n%d bytes are just been written on stdout\nthis can also be printed\n", nw);
*/
//printf("\n\t\t*************Partition Table****************\n\n");
printf("\n\t\t*************THE 4 MAIN PARTITIONS****************\n\n");
/* Dump main partitions (4 partitions) */
/* Note : the 4 partitions you are trying to dump are not necessarily existing! */
for (i = 0 ; i < 4 ; i++)
{
sp = (struct partition *)(buf + 446 + (16 * i));
//putchar(sp->boot_flag);
dump_partition(sp, i);
}
return 0;
}