Reading a UTF-16 CSV file by char - c

Currently I am trying to read a UTF-16 encoded CSV file char by char, and convert each char into ascii so I can process it. I later plan to change my processed data back to UTF-16 but that is besides the point right now.
I know right off the bat I am doing this completely wrong, as I have never attempted anything like this before:
int main(void)
{
FILE *fp;
int ch;
if(!(fp = fopen("x.csv", "r"))) return 1;
while(ch != EOF)
{
ch = fgetc(fp);
ch = (wchar_t) ch;
ch = (char) ch;
printf("%c", ch);
}
fclose(fp);
return 0;
}
Wishfully thinking, I was hoping that that work by magic for some reason but that was not the case. How can I read a UTF-16 CSV file and convert it to ascii? My guess is since each utf-16 char is two bytes (i think?) I'm going to have to read two bytes at a time from the file into a variable of some datatype which I am not sure of. Then I guess I will have to check the bits of this variable to make sure it is valid ascii and convert it from there? I don't know how I would do this though and any help would be great.

You should use fgetwc. The below code should work in the presence of a byte-order mark, and an available locale named en_US.UTF-16.
#include <stdio.h>
#include <wchar.h>
#include <locale.h>
main() {
setlocale(LC_ALL, "en_US.UTF-16");
FILE *fp = fopen("x.csv", "rb");
if (fp) {
int order = fgetc(fp) == 0xFE;
order = fgetc(fp) == 0xFF;
wint_t ch;
while ((ch = fgetwc(fp)) != WEOF) {
putchar(order ? ch >> 8 : ch);
}
putchar('\n');
fclose(fp);
return 0;
} else {
perror("opening x.csv");
return 1;
}
}

This is my solution thanks to the comments under my original question. Since every character in the CSV file is valid ascii the solution was simple as this:
int main(void)
{
FILE *fp;
int ch, i = 1;
if(!(fp = fopen("x.csv", "r"))) return 1;
while(ch != EOF)
{
ch = fgetc(fp);
if(i % 2) //ch is valid ascii
i++;
}
fclose(fp);
return 0;
}

Related

fgetc only reads UTF8 encoded file. not working for UTF16

My aim is to find the encoding of a text file by dividing the size of the file by the number of characters in the file. but fgetc only reads UTF8 encoded files. not working for UTF16. Kindly help me to solve this problem or suggest me if any substitute for fgetc.
#include <stdio.h>
#include <stdlib.h>
void main()
{
findEncode("C:\\UTF-8_TestCase\\TestCase1.txt");
}
int findEncode(char *str){
int ch = NumberOfCharecter(str);
int size = SizeOfFile(str);
if(size/ch == 1){
printf("UTF-8");
}else if(size/ch == 2){
printf("UTF-16");
}else {
printf("UTF-32");
}
}
int NumberOfCharecter(char *str){
FILE *fptr;
char ch;
int character=1;
fptr=fopen(str,"r");
if(fptr==NULL)
{
printf("File does not exist or can not be opened.");
}
while(1)
{
ch = fgetc(fptr); //fgetc only reads UTF8 encoded file. not working for UTF16
if(ch==EOF)
break;
character++;
}
fclose(fptr);
printf("The number of characters in the file %s are : %d\n\n",str,character-1);
return character-1;
}
//SizeOfFile working well
int SizeOfFile(char *str) {
FILE *fptr;
char ch;
int sz;
fptr=fopen(str,"r+");
fseek(fptr, 0, SEEK_END);
sz = ftell(fptr);
printf("the size of the file is %d \n\n", sz);
fclose(fptr);
return sz;
}
char ch;
…
ch = fgetc(fptr); //…
if(ch==EOF)
You wrongly assign the return value of fgetc() to a char; in order to compare it to EOF, you have to define int ch. After this, you'll find that NumberOfCharecter() returns the same number as SizeOfFile(), since the character read by fgetc() is not a character in the sense of an encoding, it's independent from that.

Using file and loop in c

In this program, how i do at the same time convert the uppercase letters to lowercase letters and lowercase letters to uppercase letter?
I have tried many times but it is not working.
My expectations, Suppose for example
Input:
from read.txt(orginal contant of the file:
Hello World)
Output
hELLO wORLD
This is my code....
(I can only convert from uppercase to lowercase. At the same time I could not convert from uppercase to lowercase and lowercase to uppercase).
#include<stdio.h>
#include<stdlib.h>
int main()
{
FILE* file;
char ch;
file = fopen("read.txt","r");
while (ch != EOF)
{
ch = toupper(ch);
printf("%c", ch);
ch = fgetc(file);
}
fclose(file);
return 0;
}
you have plenty errors here.
your first check the not initialized ch variable, you use it and try to print, then you read it. The order has to be right opposite.
ch has to be of type int to accommodate EOF
you need to check if the fopen was successful
int main()
{
FILE* fptr;
int ch;
fptr = fopen("read.txt","r");
if(fptr)
{
while ((ch = fgetc(fptr)) != EOF)
{
ch = toupper(ch);
printf("%c", ch);
}
fclose(fptr);
}
return 0;
}
// Note that UPPER and lower chars differ by bit 5 (value 0x20).
// If you want to switch case for any [A-Za-z] in one step,
// an exclusive-OR (^ bitwise operator) can be used:
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#define TOGGLE_CASE(c) ((c) ^ 0x20)
int
main(void)
{
FILE *file;
char ch;
if ((file = fopen("read.txt", "r")) == NULL) {
dprintf(2, "fopen error\n");
return (1);
}
while ((ch = fgetc(file)) != EOF)
printf("%c", isalpha(ch) ? TOGGLE_CASE(ch) : ch);
fclose(file);
return (0);
}

Read file with comma separated hex values into array in C

How I can read a text file with comma separated hex values e.g. 0x58,0xA9,0x00 and put its values as elements of an array e.g. LR0
I need to replace this hard-coded with reading from the file:
const unsigned char LR0[] = {0x58,0xA9,0x00}
Here is what I wrote so far. printf("%c", ch); shows me what I need but when I uncomment strcat(LR0, ch); it fails at run time with a segment fault. I don't know if I should use strcat or anything else to append the elements of this LR0 array.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
int ch;
FILE *fp;
//const unsigned char LR0 [1024] = "";
const unsigned char LR0 [] = {};
fp = fopen("test.txt", "r");
if (fp == NULL)
{
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}
while((ch = fgetc(fp)) != EOF){
printf("%c", ch);
//strcat(LR0, (char) ch);
}
fclose(fp);
return 0;
}
Sorry so basic question not able to fix it by googling etc. I am not a C developer and I am using gcc on linux. My text file does not contain lines so I cannot use this solution
There are two problems in your code.
LR0 is declared as const with unspecified size which is just pointer, writing to it may result in UB.
strcat needs it arguments as char * type but your second argument is of char type (int ch;).
strcat(LR0, (char) ch)
You can use the fscanf with , as delimiter as below to read only the hex values discarding ,.
int main() {
FILE *fp;
unsigned char LR0 [1024] = {};
fp = fopen("test.txt", "r");
if (fp == NULL) {
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}
int i = 0;
while(fscanf(fp, "%c,", &LR0[i]) == 1){
printf("%c", LR0[i++]);
}
fclose(fp);
return 0;
}
const unsigned char LR0 [] = {}; implies a zero length array - that is not standard C yet allowed with some compilers.
strcat(LR0, (char) ch); attempts to 1) write to a const array LR0 and 2) write outside the array - it is only 0 length. Both of the are undefined behavior (UB).
I don't know if I should use strcat
Using str...() functions will not well handle input which may contain many "0x00, 0x00, ...".
How I can read a text file with comma separated hex values e.g. 0x58,0xA9,0x00 and put its values as elements of an array (?)
Read the file to determine its length and contents. I suggest a pass for each.
The below is untested, yet hopefully enough to get OP started. It has little error detection.
// Parse a text file like "0x58,0xA9,0x00"
// Return byte count. Return 0 on error.
size_t read_comma_hex(FILE *f, unsigned char *dest, size_t num) {
rewind(f);
size_t i;
for (i = 0; i<num; i++) {
if (i) {
int ch = fgetc(f);
// Add to tolerate white space before the ',' or after the the last number
while (isspace(ch)) {
ch = fgetc(f);
}
if (ch == EOF) break; // no more data
if (ch != ',') return 0; // Fail, as ',' expected
}
unsigned char hex;
if (fscanf(f, "0x%hhx", &hex) != 1) return 0;
if (dest) dest[i] = hex;
}
return i;
}
void read_comma_hex_file(FILE *f) {
size_t n = read_comma_hex(f, NULL, SIZE_MAX);
if (n == 0) return; // no data or failure
// OP wants an array - research variable length array
const unsigned char LR0[n];
// Alternative: allocate memory instead of using an _array_. (not shown)
// Read data into the array
read_comma_hex(f, LR0, n);
// use LR0 and n some how
for (size_t i = 0; i<n; i++) {
printf("0x%02hhX%s", LR0[i], i > 0 ? "," : "");
}
}

Adding chars to char array from csv.

I am trying to take a csv file and assign the strings from it to a csv. I've tried a couple of things with very little luck. This is what I have so far:
int main(int argc, char* argv []) {
FILE *file = fopen( argv[1], "r" );
int x;
char strings[50];
while ((x = fgetc(file)) != EOF) {
printf( "%c", x);
}
fclose(file);
return 0;
}
I've created the strings char array, but I'm unsure how to assign the contents of my csv into it. Any ideas?
Thanks in advance.
You can use fgets to directly read into array strings-
char strings[255]; //give size accordingly
while(fgets(strings,sizeof strings,file)!=NULL) { //iteratre until fgets return NULL
printf("%s", strings); //print string that is read from file
}
Note-
1.You have not checked value of argc as well as return of fopen . You should probably check them .
You can use your characterwise reading approach. Fill your temporary buffer as you go, but take care not to overflow it. When you encounter a separator – a comma for columns, a newline for rows – process the current buffer.
Reset the buffer, adjust the current row and column and repeat until you find the end of the file. When you want to store the content of a cell, be sure to make a copy, because the scratch buffer will be overwritten.
Here's a simple example that just prints the data:
#include <stdlib.h>
#include <stdio.h>
int main(void)
{
FILE *file = fopen("example.csv", "r");
// check for errors
char buf[50];
int nbuf = 0;
int row = 0;
int col = 0;
for (;;) {
int c = fgetc(file);
if (c == EOF) break;
if (c == '\n' || c == ',') {
buf[nbuf] = '\0';
printf("[%d][%d] '%s'\n", row, col, buf);
nbuf = 0;
if (c == ',') {
col++;
} else {
col = 0;
row++;
}
} else {
if (nbuf + 1 < sizeof(buf)) buf[nbuf++] = c;
}
}
fclose(file);
return 0;
}

Reading intergers from a text file in c

I want to take input from a file to my c program. The input should consists of Numbers and characters thereafter i want to differentiate both of them. As fscanf returns 0 when it encounters a non integers, it not worthy of being used here, what to do?
#include <stdio.h>
#include <ctype.h>
int main(){
int num, status;
FILE *fp = fopen("data.txt", "r");
while(EOF!=(status = fscanf(fp, "%d", &num))){
if(status == 1){
printf("%d\n", num);
} else { //if(status == 0){
(void)fgetc(fp);//replace by #chux's suggestion
int ch;
while(EOF!=(ch=fgetc(fp)) && ch != '-' && !isdigit(ch));
if(ch == '-'){
int pch = fgetc(fp);
if(isdigit(pch)){
ungetc(pch, fp);
ungetc(ch, fp);
}
} else {
ungetc(ch, fp);
}
}
}
fclose(fp);
return 0;
}
simply just read the file one by one character at a time and check the range of the value as we know the value in integer value of '0' to '9' is from 48 to 57, just check and use.
Hope it will helps you.

Resources