I have to create a function that reads a file called grwords.txt containing around 540000 words which are written in Greek letters.
I have to convert these words to uppercase and fill an array called char **words.
This is what I have so far.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>
#include <ctype.h>
void fp();
int main(int argc, char *argv[]) {
SetConsoleOutputCP(1253);
fp();
return 0;
}
void fp(){
char **words;
words = malloc(546490 * sizeof(int *));
for (i = 0; i < 546490; i++)
words[i] = malloc(24 * sizeof(int));
FILE *file;
char *word;
size_t cnt;
file = fopen("grwords.txt", "rt");
if (file == NULL){
printf("File cannot be opened.\n");
exit(1);
}
cnt = 0;
while (1==fscanf(file, "%24s",word)){
if (cnt == 546490)
break;
strcpy(words[cnt++], word);
}
fclose(file);
}
I'm still trying to figure out pointers. I know that & makes a pointer from a value and * a value from a pointer. Updated the program and it successfully fills the array with the words from the file! I still have no idea how to convert Greek lowercase to uppercase.
Handling Greek words can be dependent on your platform.
First of all, you need to understand how file handling works. Here is what I wrote:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define bufSize 1024 // max lenght of word
// we are going to receive the .txt from cmd line
int main(int argc, char *argv[])
{
FILE *fp;
// Assume file has max 10 words
const size_t N = 10;
// Allocate a 2D array of N rows
// and bufSize columns.
// You can think of it like an array
// of N strings, where every string
// has, at most, bufSize length.
char buf[N][bufSize];
// make sure we got the .txt
if (argc != 2)
{
fprintf(stderr,
"Usage: %s <soure-file>\n", argv[0]);
return 1;
}
// open the file
if ((fp = fopen(argv[1], "r")) == NULL)
{ /* Open source file. */
perror("fopen source-file");
return 1;
}
// we will use that for toupper()
char c;
// counters
int i = 0, j;
while (fscanf(fp, "%1024s", buf[i]) == 1)
{ /* While we don't reach the end of source. */
/* Read characters from source file to fill buffer. */
// print what we read
printf("%s\n", buf[i]);
j = 0;
// while we are on a letter of word placed
// in buf[i]
while (buf[i][j])
{
// make the letter capital and print it
c = buf[i][j];
putchar (toupper(c));
j++;
}
i++;
printf("\ndone with this word\n");
}
// close the file
fclose(fp);
return 0;
}
For this test.txt file:
Georgios
Samaras
Γιώργος
Σαμαράς
the code would run as:
./exe test.txt
Georgios
GEORGIOS
done with this word
Samaras
SAMARAS
done with this word
Γιώργος
Γιώργος
done with this word
Σαμαράς
Σαμαράς
done with this word
As you can see, I could read the Greek words, but failed to convert them in upper case ones.
Once you got how file handling goes, you need to use wide characters to read a file with Greek words.
So, by just modifying the above code, we get:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>
#define bufSize 1024
int main(int argc, char *argv[])
{
setlocale(LC_CTYPE, "en_GB.UTF-8");
FILE *fp;
const size_t N = 15;
wchar_t buf[N][bufSize];
if (argc != 2)
{
fprintf(stderr,
"Usage: %s <soure-file>\n", argv[0]);
return 1;
}
if ((fp = fopen(argv[1], "r")) == NULL)
{
perror("fopen source-file");
return 1;
}
wchar_t c;
int i = 0, j;
while (fwscanf(fp, L"%ls", buf[i]) == 1)
{
wprintf( L"%ls\n\n", buf[i]);
j = 0;
while (buf[i][j])
{
c = buf[i][j];
putwchar (towupper(c));
j++;
}
i++;
wprintf(L"\ndone with this word\n");
}
fclose(fp);
return 0;
}
And now the output is this:
Georgios
GEORGIOS
done with this word
Samaras
SAMARAS
done with this word
Γιώργος
ΓΙΏΡΓΟΣ
done with this word
Σαμαράς
ΣΑΜΑΡΆΣ
done with this word
I see that you may want to create a function which reads the words. If you need a simple example of functions in C, you can visit my pseudo-site here.
As for the 2D array I mentioned above, this picture might help:
where N is the number of rows (equal to 4) and M is the number of columns (equal to 5). In the code above, N is N and M is bufSize. I explain more here, were you can also found code for dynamic allocation of a 2D array.
I know see that you are on Windows. I tested the code in Ubuntu.
For Windows you might want to take a good look at this question.
So, after you read all the above and understand them, you can see what you asked for with dynamic memory management.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>
#define bufSize 1024
wchar_t **get(int N, int M);
void free2Darray(wchar_t** p, int N);
int main(int argc, char *argv[])
{
setlocale(LC_CTYPE, "en_GB.UTF-8");
FILE *fp;
const size_t N = 15;
wchar_t** buf = get(N, bufSize);
if (argc != 2)
{
fprintf(stderr,
"Usage: %s <soure-file>\n", argv[0]);
return 1;
}
if ((fp = fopen(argv[1], "r")) == NULL)
{
perror("fopen source-file");
return 1;
}
wchar_t c;
int i = 0, j;
while (fwscanf(fp, L"%ls", buf[i]) == 1)
{
wprintf( L"%ls\n", buf[i]);
j = 0;
while (buf[i][j])
{
c = buf[i][j];
putwchar (towupper(c));
j++;
}
i++;
wprintf(L"\ndone with this word\n");
}
fclose(fp);
// NEVER FORGET, FREE THE DYNAMIC MEMORY
free2Darray(buf, N);
return 0;
}
// We return the pointer
wchar_t **get(int N, int M) /* Allocate the array */
{
/* Check if allocation succeeded. (check for NULL pointer) */
int i;
wchar_t **table;
table = malloc(N*sizeof(wchar_t *));
for(i = 0 ; i < N ; i++)
table[i] = malloc( M*sizeof(wchar_t) );
return table;
}
void free2Darray(wchar_t** p, int N)
{
int i;
for(i = 0 ; i < N ; i++)
free(p[i]);
free(p);
}
Note that this code is expected to work on Linux (tested on Ubuntu 12.04), not on Windows (tested on Win 7).
Related
This question already has answers here:
How should character arrays be used as strings?
(4 answers)
Closed 12 months ago.
I have a file with an unknown number of strings and each of these strings is of an unknown length.
I would like to make each line of the file its own string in an array of strings.
I tried to use dynamic allocation in a char** array, but I don't think I'm approaching this correctly.
Below is the code I have tried. It's getting stuck in an infinite loop, and I can't figure out why.
(The text file I'm reading from ends with a line break, by the way.)
#include <getopt.h> //for getopts
#include <sys/stat.h> //to do file stat
#include <dirent.h>
#include <string.h>
#include <pwd.h>
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h> //user macros
#include <stdlib.h>
#include <stdbool.h>
#include <libgen.h>
#include <errno.h>
int main(int argc, char *argv[]) {
//storing the filename inside string
char* filename = argv[1];
FILE *fp1 = fopen(filename, "r");
if (fp1 == NULL) {
fprintf(stderr, "Error: Cannot open '%s'. No such file or directory.\n", filename);
return EXIT_FAILURE;
}
/**
* we begin by getting the number of numbers in the file
* the number of numbers = number of lines = number of line breaks
*/
size_t numNumbers = 0;
// while((fscanf(fp1, "%*[^\n]"), fscanf(fp1, "%*c")) != EOF){
// numNumbers = numNumbers + 1;
// }
char c;
while((c = fgetc(fp1)) != EOF){
if(c == '\n'){
numNumbers++;
}
}
fclose(fp1);
FILE *fp2 = fopen(filename, "r");
char** arrayOfStrings = malloc(numNumbers * sizeof(char*));
for(int i = 0; i < numNumbers; i++) {
int len = 0;
if(((c = fgetc(fp1)) != '\n') && (c != EOF)){
len++;
}
arrayOfStrings[i] = malloc(len * sizeof(char));
}
printf("hello1\n");
//for(int i = 0; i < numNumbers; i++){
// fscanf(fp2, "%s", (arrayOfStrings[i]));
//}
fclose(fp2);
// for(int i = 0; i < numNumbers; i++){
// fprintf(stdout, "%s", arrayOfStrings[i]);
// }
return 0;
}
(I'm very new to C, so please go easy on me!)
In C, strings are terminated with a '0' byte, so it looks like your malloc for each string is 1 character too short -- you've only allowed space for the text.
In addition, you mean the count for the size of each line to be a while loop, not an if statement - right now you are counting each line as length "1".
Finally, you are reading off the end of the file in your commented out fscanf code because you haven't closed and reopened it.
Assuming you want to split the input to the strings by the newline character, would you please try:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[])
{
char *filename; // filename to read
char **arrayOfStrings = NULL; // array of strings
char line[BUFSIZ]; // line buffer while reading
char *p; // temporal pointer to the input line
int i, num; // counter for lines
FILE *fp; // file pointer to read
if (argc != 2) {
fprintf(stderr, "usage: %s file.txt\n", argv[0]);
return EXIT_FAILURE;
}
filename = argv[1];
if (NULL == (fp = fopen(filename, "r"))) {
perror(filename);
return EXIT_FAILURE;
}
// read the input file line by line
while (fgets(line, BUFSIZ, fp)) {
if ((p = strrchr(line, '\n'))) *p = '\0'; // remove trailing newline, if any
if ((p = strrchr(line, '\r'))) *p = '\0'; // remove trailing cr character, if any
if (NULL == (arrayOfStrings = realloc(arrayOfStrings, (num + 1) * sizeof(char **)))) {
// enlarge the array according to the line count
perror("realloc");
return EXIT_FAILURE;
}
if (NULL == (arrayOfStrings[num] = malloc(strlen(line) + 1))) {
// memory for the string of the line
perror("malloc");
return EXIT_FAILURE;
}
strcpy(arrayOfStrings[num], line);
num++;
}
// print the strings in the array
for (i = 0; i < num; i++) {
printf("%d %s\n", i, arrayOfStrings[i]);
}
fclose(fp);
return 0;
}
If the input file looks something like:
This
is
the
input.
Then the output will be:
0 This
1 is
2 the
3 input.
Hi I was trying to create an array of string of an undetermined length in c.
This is my code :
int main()
{
int lineCount=linesCount();
char text[lineCount][10];
printf("%d",lineCount);
FILE * fpointer = fopen("test.txt","r");
fgets(text,10,fpointer);
fclose(fpointer);
printf("%s",text);
return 0;
}
I would like to replace 10 in
char text[lineCount][10];
My code reads out a file I already made the amount of lines dynamic.
Since the line length is unpredictable I would like to replace 10 by a something dynamic.
Thanks in advance.
To do this cleanly, we want a char * array rather than an 2D char array:
char *text[lineCount];
And, we need to use memory from the heap to store the individual lines.
Also, don't "hardwire" so called "magic" numbers like 10. Use an enum or #define (e.g) #define MAXWID 10. Note that with the solution below, we obviate the need for using the magic number at all.
Also, note the use of sizeof(buf) below instead of a magic number.
And, we want [separate] loops when reading and printing.
Anyway, here's the refactored code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
linesCount(void)
{
return 23;
}
int
main(void)
{
int lineCount = linesCount();
char *text[lineCount];
char buf[10000];
printf("%d", lineCount);
// open file and _check_ the return
const char *file = "test.txt";
FILE *fpointer = fopen(file, "r");
if (fpointer == NULL) {
perror(file);
exit(1);
}
int i = 0;
while (fgets(buf, sizeof(buf), fpointer) != NULL) {
// strip newline
buf[strcspn(buf,"\n")] = 0;
// store line -- we must allocate this
text[i++] = strdup(buf);
}
fclose(fpointer);
for (i = 0; i < lineCount; ++i)
printf("%s\n", text[i]);
return 0;
}
UPDATE:
The above code is derived from your original code. But, it assumes that the linesCount function can predict the number of lines. And, it doesn't check against overflow of the fixed length text array.
Here is a more generalized version that will allow an arbitrary number of lines with varying line lengths:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
main(void)
{
int lineCount = 0;
char **text = NULL;
char buf[10000];
// open file and _check_ the return
const char *file = "test.txt";
FILE *fpointer = fopen(file, "r");
if (fpointer == NULL) {
perror(file);
exit(1);
}
int i = 0;
while (fgets(buf, sizeof(buf), fpointer) != NULL) {
// strip newline
buf[strcspn(buf,"\n")] = 0;
++lineCount;
// increase number of lines in array
text = realloc(text,sizeof(*text) * lineCount);
if (text == NULL) {
perror("realloc");
exit(1);
}
// store line -- we must allocate this
text[lineCount - 1] = strdup(buf);
}
fclose(fpointer);
// print the lines
for (i = 0; i < lineCount; ++i)
printf("%s\n", text[i]);
// more processing ...
// free the lines
for (i = 0; i < lineCount; ++i)
free(text[i]);
// free the list of lines
free(text);
return 0;
}
I am trying to read an input txt file from command line and find the most frequent character in that file for a school project. I can open the txt file and print it without an issue with the following code. Also the funcion below freqcount(), works perfectly when I give it a string from the command line. But I can't seem to make them work together. I think I'm messing up something while setting up the dest array down below. Any help would be appreciated.
Also, for non static sized strings, which one is generally better to use, malloc or calloc?
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <string.h>
#define DEST_SIZE 26 // An arbitrary size but longest string to work is 24
char freqcount(char * str){
// Construct character count array from the input
// string.
int len = strlen(str);
int max = 0; // Initialize max count
char result; // Initialize result
int count[255] = {0};
// Traversing through the string and maintaining
// the count of each character
for (int i = 0; i < len; i++) {
count[str[i]]++;
if (max < count[str[i]]) {
max = count[str[i]];
result = str[i];
}
}
return result;
}
//////////////////////////////////////////////////////////////////////
int main(int argc,char ** argv){
int i=0;
char dest[DEST_SIZE] = {0};
if(argc !=2){
perror("Error: ");
return -1;
}
FILE * f = fopen(argv[1], "r");
if (f == NULL) {
return -1;
}
int c;
while ( (c=fgetc(f)) != EOF && i++<DEST_SIZE ) {
printf("%c",c);
dest[i]=c;
char cnt=freqcount(dest);
printf("%c",cnt);
}
return EXIT_SUCCESS;
}
Sorry I forgot to add, originally call was after the loop such as;
(omitted the first part)
while ( (c=fgetc(f)) != EOF && i++<DEST_SIZE ) {
printf("%c",c);
dest[i]=c;
}
/*int l;
for (l=0; l<DEST_SIZE;l++){
if (dest[i] != 0){
printf("%c\n",dest[l]); // burda da arrayi okuyor ama array 255 long oldugu icin cogu bos
}
}*/
char cnt=freqcount(dest);
printf("%s",cnt);
return EXIT_SUCCESS;
}
when it is like this, the code returns the following with the input "An example Of the input.
An example
Of the input.(null)
Move the call of freqcount to after the while loop:
while ( (c=fgetc(f)) != EOF && i++<DEST_SIZE ) {
printf("%c",c);
dest[i]=c;
}
dest[i]='\0'; // terminate
char cnt=freqcount(dest);
printf("%c",cnt);
I'm trying to write a program that takes a file and a string by using standard POSIX functions, program counts all the characters in file which the string contains.
For example if the user writes:
count.exe x.txt abcd
The program calculates the number of each character: a, b, c, d in file x.txt
Sample message:
Number of 'a' characters in 'x.txt' file is: 4
Number of 'b' characters in 'x.txt' file is: 9
Number of 'c' characters in 'x.txt' file is: 7
Number of 'd' characters in 'x.txt' file is: 0
The code that I got so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#define BUFSIZE 1024
void exit_sys(const char* msg)
{
perror(msg);
exit(EXIT_FAILURE);
}
void exit_fail(const char* msg)
{
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}
int get_count(char* p, size_t size, char c)
{
int count = 0;
size_t i;
for (i = 0; i < size; ++i)
if (p[i] == c)
++count;
return count;
}
void run_count_characters_application(int argc, char** argv)
{
int fd;
char c;
char buf[BUFSIZE];
int n;
int count;
if (argc != 3)
exit_fail("usage: ./mycounter file character");
if (strlen(argv[2]) < 0)
exit_fail("You have to give at least one character");
c = argv[2][0];
if ((fd = open(argv[1], O_RDONLY)) < 0)
exit_sys("open");
count = 0;
while ((n = read(fd, buf, BUFSIZE)) > 0)
count += get_count(buf, n, c);
if (n < 0)
exit_sys("read");
printf("Count:%d\n", count);
close(fd);
}
int main(int argc, char** argv)
{
run_count_characters_application(argc, argv);
return 0;
}
The problem with what I got so far in this code is that it only counts one character (only the first character), I want to know how to make it read and count the other characters that I write in the command, thank you in advance :)
since you asked for an example in the comments:
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void die(const char *reason)
{
fprintf(stderr, "%s\n", reason);
exit(EXIT_FAILURE);
}
int main(int argc, char **argv)
{
if (argc != 3)
die("usage: ./count file characters");
FILE *f = fopen(argv[1], "r");
if (f == NULL)
die("unable to open file");
unsigned int counter[UCHAR_MAX + 1] = { 0 };
int c;
while ((c = fgetc(f)) != EOF)
counter[c]++;
fclose(f);
size_t len = strlen(argv[2]);
for (unsigned int i = 0; i < len; i++) {
char c = argv[2][i];
unsigned int count = counter[c];
printf("Number of '%c' characters in '%s' file is: %u\n", c, argv[1], count);
}
}
$ cc count.c -o count
$ echo "bbaaafff" > test.txt
$ ./count test.txt afm
Number of 'a' characters in 'test.txt' file is: 3
Number of 'f' characters in 'test.txt' file is: 3
Number of 'm' characters in 'test.txt' file is: 0
$
the function fgetc()
return the character read as an unsigned char cast to an int or EOF on end of file or error.
there are UCHAR_MAX + 1 possible values that a unsigned char can store (typically 0 to 255) so I made an array that can be indexed by these values (counter) to store ours counts. think of it as a "map" from other languages that maps characters to their count.
then at the end, I loop over the characters from the input string and print their count.
as already mentioned in the comments, this works properly only for ASCII characters.
If I'm printing the whole string everything looks good, whitespace and indenting looks perfect (I'm loading the source file with this code).
But if I'm trying to print a single character in the buffer I'm getting letters where there are not supposed to be any.
For example, if I print buffer[2] I'm getting letters where it should be whitespace, but if I print the whole string the letters aren't there.
Here is my code that's not working:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(void) {
char *buffer = (char*) malloc(100*sizeof(char));
FILE *myFile;
myFile = fopen("thisSourceFile.c", "r");
if (!myFile) {
printf("could not open file");
}
else {
while(fgets(buffer,100,myFile)) {
printf("%c \n",buffer[2]);
}
}
fclose(myFile);
free(buffer);
buffer = NULL;
return 0;
}
OUTPUT:
n
n
n
t
h
I
y
f
p
l
w
p
}
r
u
e
As you can se it is printing letters where it should by whitespace. Those letters are not there if I print the whole string.
If you're interested in parsing a source file and processing each character, this might be a solution.
But there are two constants; charsand num_lines_to_read.
M.M mentions in the comments below that isprint() isn't fully portable and comes with some quirks to be careful of.
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
int main(void) {
const int chars = 100; /* Num chars per line to read */
const int num_lines_to_read = 3; /* Num lines to read */
char *buffer = (char*) malloc(chars*sizeof(char));
int i = 0, j = 0;
FILE *myFile;
myFile = fopen("thisSourceFile.c", "r");
if (myFile == NULL) {
printf("could not open file");
fclose(myFile);
return 1;
}
for(i=0; i<num_lines_to_read; i++)
{
if(fgets(buffer,chars,myFile) != NULL)
{
while(isprint((unsigned char) buffer[j]))
{
printf("%c", (buffer[j]));
j++;
}
j=0;
}
}
fclose(myFile);
free(buffer);
return 0;
}
Example output (itself!):
#include <stdio.h>#include <stdlib.h>#include <ctype.h>