c program to read file and count words specified in array - arrays

I'm trying to read a file containing a paragraph, count the number of times specific words occur (words that I have specified and stored in an array) and then print that result to another file that would look something like,
systems, 2
computer, 3
programming, 6
and so on. Currently, all this code does is spit out every word in the paragraph and their respective counts. Any help would be much appreciated.
#include <stdio.h>
#include <string.h>
int main()
{
FILE* in;
FILE* out;
char arr1[13][100] = { "systems", "programming", "computer", "applications", "language", "machine"};
int arr2[180] = {0};
int count = 0;
char temp[150];
in = fopen("out2.dat", "r");
out = fopen("out3.dat", "w");
while (fscanf(in, "%s", temp) != EOF)
{
int i, check = 8;
for (i = 0;i < count;i++)
{
if (strcmp(temp, arr1[i]) == 0)
{
arr2[i]++;
check = 1;
break;
}
}
if (check == 1) continue;
strcpy(arr1[count], temp);
arr2[count++] = 1;
}
int i;
for (i = 0; i < count; i++)
fprintf(out, "%s, %d\n", arr1[i], arr2[i]);
return 0;
}

The use of count does not make much sense throughout this program.
It is declared as int count = 0;, and then used as the upper bound in this loop
for (i = 0; i < count; i++)
limiting which search words will be used. This also means that this loop will not be entered on the first iteration of the surrounding while loop.
As such, check != 1, so after this count is used as the index in arr1 at which the currently read "word" will be copied into
strcpy(arr1[count], temp);
which makes absolutely no sense. Why overwrite data you are searching for?
Then count is incremented to 1 after being used to set the first element of arr2 to 1.
On the second iteration of the while loop, the for loop will run for exactly one iteration, comparing the newly read "word" (temp) against the first element of arr1 (which is now the last "word" read).
If this matches: the first element in arr2 is incremented from 1 to 2, the string copy is skipped, and count is not incremented.
If this does not match, the new "word" is copied into the second element of arr1, the second element of arr2 is set to 1, and count is incremented to 2.
This spirals out of control from here.
Given the input shown above, this accesses arr1 out-of-bounds when count reaches 13.
With files that have a small selection of data (<= 13 unique "words", lengths < 100), this may accidentally "work" by populating arr1 with the words from the file. This will have the end effect of showing you the counts of each "word" in the input file.
Eventually, you will invoke Undefined Behavior when one of the following occurs:
fscanf(in, "%s", temp) reads a string that overflows the temp buffer.
count exceeds the bounds of arr1 or arr2.
strcpy(arr1[count], temp); copies a string that overflows a buffer in arr1.
Either fopen fail.
In addition to being unsafe, fscanf(in, "%s", temp) will consider anything other than whitespace as being part of a valid string. This includes trailing punctuation, which may or may not be an issue depending on which tokens you want to match (systems. vs. systems). You may need more robust parsing.
In any case, either create an array of structures composed of search words and frequencies, or, create two arrays of the same length to represent this data:
const char *words[6] = { "systems", "programming", "computer", "applications", "language", "machine"};
unsigned freq[6] = { 0 };
There is no need to copy anything. Remember to check if fopen fails, and to limit %s when reading as not to overflow the input buffer.
The rest of the program looks similar: test each input "word" against all search words; increment the corresponding frequency if a match.
An example using an array of structures:
#include <stdio.h>
#include <string.h>
int main(void) {
struct {
const char *word;
unsigned freq;
} search_words[] = {
{ "systems", 0 },
{ "programming", 0 },
{ "computer", 0 },
{ "applications", 0 },
{ "language", 0 },
{ "machine", 0 }
};
size_t length = sizeof search_words / sizeof *search_words;
FILE *input_file = fopen("out2.dat", "r");
FILE *output_file = fopen("out3.dat", "w");
if (!input_file || !output_file) {
fclose(input_file);
fclose(output_file);
fprintf(stderr, "Could not access files.\n");
return 1;
}
char word[256];
while (1 == fscanf(input_file, "%255s", word))
for (size_t i = 0; i < length; i++)
if (0 == strcmp(word, search_words[i].word))
search_words[i].freq++;
fclose(input_file);
for (size_t i = 0; i < length; i++)
fprintf(output_file, "%s, %u\n",
search_words[i].word,
search_words[i].freq);
fclose(output_file);
}
cat out3.dat:
systems, 1
programming, 1
computer, 2
applications, 2
language, 1
machine, 1

Related

HEAP CORRUPTION DETECTED: after normal block(#87)

I'm trying to do a program that get number of names from the user, then it get the names from the user and save them in array in strings. After it, it sort the names in the array by abc and then print the names ordered. The program work good, but the problem is when I try to free the dynamic memory I defined.
Here is the code:
#include <stdio.h>
#include <string.h>
#define STR_LEN 51
void myFgets(char str[], int n);
void sortString(char** arr, int numberOfStrings);
int main(void)
{
int i = 0, numberOfFriends = 0, sizeOfMemory = 0;
char name[STR_LEN] = { 0 };
char** arrOfNames = (char*)malloc(sizeof(int) * sizeOfMemory);
printf("Enter number of friends: ");
scanf("%d", &numberOfFriends);
getchar();
for (i = 0; i < numberOfFriends; i++) // In this loop we save the names into the array.
{
printf("Enter name of friend %d: ", i + 1);
myFgets(name, STR_LEN); // Get the name from the user.
sizeOfMemory += 1;
arrOfNames = (char*)realloc(arrOfNames, sizeof(int) * sizeOfMemory); // Change the size of the memory to more place to pointer from the last time.
arrOfNames[i] = (char*)malloc(sizeof(char) * strlen(name) + 1); // Set dynamic size to the name.
*(arrOfNames[i]) = '\0'; // We remove the string in the currnet name.
strncat(arrOfNames[i], name, strlen(name) + 1); // Then, we save the name of the user into the string.
}
sortString(arrOfNames, numberOfFriends); // We use this function to sort the array.
for (i = 0; i < numberOfFriends; i++)
{
printf("Friend %d: %s\n", i + 1, arrOfNames[i]);
}
for (i = 0; i < numberOfFriends; i++)
{
free(arrOfNames[i]);
}
free(arrOfNames);
getchar();
return 0;
}
/*
Function will perform the fgets command and also remove the newline
that might be at the end of the string - a known issue with fgets.
input: the buffer to read into, the number of chars to read
*/
void myFgets(char str[], int n)
{
fgets(str, n, stdin);
str[strcspn(str, "\n")] = 0;
}
/*In this function we get array of strings and sort the array by abc.
Input: The array and the long.
Output: None*/
void sortString(char** arr, int numberOfStrings)
{
int i = 0, x = 0;
char tmp[STR_LEN] = { 0 };
for (i = 0; i < numberOfStrings; i++) // In this loop we run on all the indexes of the array. From the first string to the last.
{
for (x = i + 1; x < numberOfStrings; x++) // In this loop we run on the next indexes and check if is there smaller string than the currnet.
{
if (strcmp(arr[i], arr[x]) > 0) // If the original string is bigger than the currnet string.
{
strncat(tmp, arr[i], strlen(arr[i])); // Save the original string to temp string.
// Switch between the orginal to the smaller string.
arr[i][0] = '\0';
strncat(arr[i], arr[x], strlen(arr[x]));
arr[x][0] = '\0';
strncat(arr[x], tmp, strlen(tmp));
tmp[0] = '\0';
}
}
}
}
After the print of the names, when I want to free the names and the array, in the first try to free, I get an error of: "HEAP CORRUPTION DETECTED: after normal block(#87)". By the way, I get this error only when I enter 4 or more players. If I enter 3 or less players, the program work properly.
Why does that happen and what I should do to fix it?
First of all remove the unnecessary (and partly wrong) casts of the return value of malloc and realloc. In other words: replace (char*)malloc(... with malloc(..., and the same for realloc.
Then there is a big problem here: realloc(arrOfNames, sizeof(int) * sizeOfMemory) : you want to allocate an array of pointers not an array of int and the size of a pointer may or may not be the same as the size of an int. You need sizeof(char**) or rather the less error prone sizeof(*arrOfNames) here.
Furthermore this in too convoluted (but not actually wrong):
*(arrOfNames[i]) = '\0';
strncat(arrOfNames[i], name, strlen(name) + 1);
instead you can simply use this:
strcpy(arrOfNames[i], name);
Same thing in the sort function.
Keep your code simple.
But actually there are more problems in your sort function. You naively swap the contents of the strings (which by the way is inefficient), but the real problem is that if you copy a longer string, say "Walter" into a shorter one, say "Joe", you'll write beyond the end of the allocated memory for "Joe".
Instead of swapping the content of the strings just swap the pointers.
I suggest you take a pencil and a piece of paper and draw the pointers and the memory they point to.

Read lines of text file into array, using double pointer and malloc

I want to read lines of a text file, and the content of it is as below.
first
second
third
I've already written some code, but the result was different from what I expected. I hope you can help me a little. (code below)
/*
content of test.txt
first
second
third
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
// double pointer for two string lines
// and the two string lines will be pointed by (char *) pointer
FILE *fp = fopen("test.txt", "r");
char **lst = malloc(3 * sizeof(char *)); // corrected 2 -> 3
// read lines until feof(fp) is not Null
// Only 2 lines will be read by this loop
int i = 0;
while (!feof(fp)) {
char line[10];
fgets(line, 10, fp);
*(lst + i) = line;
printf("%d : %s", i, *(lst + i));
i++;
}
/*
Result:
0 : first
1 : second
2 : third // Why was this line printed here?
There are 3 lines, not 2 lines!
*/
printf("\n\n");
int j;
for (j = 0; j < i; j++) {
printf("%d : %s\n", j, *(lst + j));
}
/*
result:
0 : third
1 : third
2 : third
The whole lines were not printed correctly!
*/
free(lst);
return 0;
}
Expected output:
0 : first
1 : second
2 : third
Many thanks.
First and foremost, you are allocating space for an array of two char*s and you have a single statically sized buffer for a string. But you’re attempting to read three strings. Where do you think the space for the strings is coming from? You’re not allocating it.
You need to make your various numbers match up: allocate an array of three strings, and then allocate three string buffers:
char **lst = malloc(3 * sizeof *lst);
for (int i = 0; i < 3; i++) {
lst[i] = malloc(10);
fgets(lst[i], 10, fp);
}
And don’t forget to free all allocated buffers subsequently:
for (int i = 0; i < 3; i++) {
free(lst[i]);
}
free(lst);
… of course this code isn’t terribly great either since it hard-codes the number of lines you can read, and the maximum line length. But it should get you started.

Retrieving an array from a file and find it's size C

I have a file that I have to read some numbers from and put them into an array. The only problem is that I don't know how to find the size of it. I am given the maximum size of the array but the numbers don't fill the array completely. I tried many different ways to make it work but it doesn't read the correct values from the file. Is there any other way to do it without sizeof?
#include<stdio.h>
#define MAX_NUMBER 25
int main(void)
{
int test[];
int size;
FILE* sp_input;
int i;
sp_input = fopen("a20.dat", "r");
if (sp_input == NULL)
printf("\nUnable to open the file a20.dat\n");
else
{
while( fscanf(sp_input, "%d", &test[i])!=EOF)
{
size=sizeof(test)/sizeof(test[0]);
}
for(i = 0; i < size; i++)
printf("\na[%d]=%d has a size of %d\n", i,test[i],size);
fclose(sp_input);
}
return 0;
}
If you increment i each time you successfully do a fscanf, it will serve as a count of the number of items read.
i = 0;
while (fscanf(sp_input, "%d", &test[i]) == 1) {
i = i + 1;
}
// Now, i is the number of items in the list, and test[0] .. test[i-1]
// are the items.
Edit: As #chux pointed out, in this case it's better to compare to 1, the expected number of items scanned, on each call. If a bogus input is provided (non-digits), there's still a problem and you should stop.
Define a maximum size array and continue looping as able.
File input need not fill the array, just populate it as it can. Keep track, i, of how many of test[] was used and be sure not to overfill the array.
#define MAX_NUMBER 25
int test[MAX_NUMBER];
FILE* sp_input = fopen("a20.dat", "r");
...
// Use `size_t` for array indexing
size_t i;
// do not read too many `int`
for (i=0; i<MAX_NUMBER; i++) {
if (fscanf(sp_input, "%d", &test[i]) != 1) {
break;
}
printf("test[%zu]=%d\n", i, test[i]);
}

Reading a file into a 2d array

What is the best approach to take when reading in a file from c (essentially some sort of grid map of characters), and putting it into a 2 dimensional array of some sort at which character can be accessed by it's coordinates?
sample input file is something like:
ffflli
ffsdfg
fl979p
kl8dfj
and each character can be accessed by coordinates depending on their position. i.e. (0,3) for the bottom left coordinate
You can do something like:
FILE *f;
f = fopen("sample.txt","r");
char ch, strr[100], *str;
int row = 0, column = 0, i = 0, j = 0;
while(fgets(strr, sizeof strr, f)) {
row++;
if(column < strlen(strr) )
column = strlen(strr);
}
rewind(f);
char arr[row][column];
while(i < row) {
ch = fgetc(f);
while( ch != EOF || ch != '\n') {
arr[i][j] = ch;
j++;
}
i++;
}
fclose(f);
You can also avoid first iteration through the file if the file size is very large. It is to avoid wastage of memory by unnecessarily allocating large size to array.
I would suggest you an approach
while not end of file
if character pointed by file pointer is not '\n' or EOF
push chracter into array
This worked well for me. It iterates through an infile like the one provided, fills a provided 2D-array, and null-terminates unused slots.
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <memory.h>
int main(int argc, char* argv[])
{
FILE* infile;
char* file_path = "C:\\Your\\File\\Path\\Here.txt";
char grid[10][10];
unsigned int num_rows = 10;
// Open infile for reading.
if (0 == fopen_s(&infile, file_path, "r"))
{
char character;
unsigned int row = 0;
unsigned int column = 0;
// Iterate through entire infile.
while (!feof(infile))
{
// Scan in next character.
fscanf_s(infile, "%c", &character, 1);
// Check for newline, moving to next grid row when necessary.
if (character == '\n')
{
// Null-terminate/zero rest of row.
grid[row][column] = '\0';
// Move to beginning of next row.
++row;
column = 0;
continue;
}
// Set appropriate cell to next character.
grid[row][column] = character;
++column;
}
// Null-terminate to sever junk in unused slots.
for (; row < num_rows; ++row)
{
grid[row][column] = '\0';
column = 0;
}
// Close file.
fclose(infile);
}
return 0;
}
A few caveats:
I assumed a 10x10 grid was sufficient. If this doesn't work for you, by all means change the dimensions. The algorithm should adjust fine (for reasonable dimension values).
Multiple newlines in succession will probably result in entirely empty rows. If you want to handle this differently, check for this case as you read input.
I assumed that null-terminating rows was desirable behavior. If I was wrong, feel free to do as you please.
I wrote this in Visual Studio, compiling with MSVC, hence the usage of fopen_s() and fscanf_s(). If this is undesirable, alteration to fopen() and fscanf() respectively should be relatively simple (mostly changing a couple of statements and function arguments).
I hope this helps you. Let me know if you have any questions.
In C you can access any 2D array stored in a 1D buffer (like a file) by casting the pointer to the correct type. Like this:
int buffer1D[20] = {
0, 1, 2, 3, 4,
1, 2, 3, 4, 5,
2, 3, 4, 5, 6,
3, 4, 5, 6, 7
};
int* bufferPtr = buffer1D;
int (*twoDPtr)[5]; //a pointer of the correct type
twoDPtr = (int (*)[5])bufferPtr; //the interesting part: the cast
Now you can access the buffer as a 2D array:
assert(twoDPtr[2][3] == 5);
The trick is, that twoDPtr is a pointer to a line array of the 2D data. So, when you perform pointer arithmetic on it with twoDPtr[2], it will skip the first two lines of the 2D data.

Fill dynamically sized array in C++ and use the values

I'd like to fill a char-array dynamically and check whether the contained values are valid integers, here's what I got so far:
for(int i = 0; i < 50000; i++)
{
if(input[i] == ',')
{
commaIndex = i;
}
}
commaIndex is the index of a comma inside a file, numerical values should have been entered before a comma, file looks like this: -44,5,19,-3,13,(etc), it's important for this part:
char *tempNumber = new char[commaIndex];
Fill tempNumber (which should presumably be just as big as the number due to my dynamic allocation) so I don't have a number in a size 50000 char-array (named input).
for(int i = 0; i < commaIndex; i++)
{
cout << i << "\n";
tempNumber[i] = input[i];
}
And now I want to use it:
if(!isValidInteger(tempNumber))
{
cout << "ERROR!\n";
}
Unfortunately, tempNumber always seems to be of size 4 irregardless of the value of "commaIndex", i.e. I get the following output:
(Inputdata: 50000,3,-4)
commaIndex: 5
content of tempNumber: 5000 (one 0 missing)
commaIndex: 1
content of tempNumber: 3²²² (notice the 3 ^2s)
commaIndex: 2
content of tempNumber: -4²²
Any ideas?
One more thing: This is for a homework assignment and I am not allowed to use any object-oriented element of C++ (this includes strings and vectors, I've been there and I know it would be SO easy.)
Thanks,
Dennis
You might be interested by the strtol function.
You may also consider using strtok() with sscanf(). Notice, that strtol() does not allow you to check for errors since it simply returns (perfectly valid) value 0 on parse error. On the other hand, sscanf() returns number of successfully read items, so you may easily check if there was an error while reading a number.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
int i = 0;
char str[] = "1,2,-3,+4,a6,6";
/* calculate result table size and alloc */
int max = 1;
char* tmp = str;
while (*tmp)
if (*tmp++ == ',')
++max;
int* nums = malloc(sizeof(int) * max);
/* tokenize string by , and extract numbers */
char* pch = strtok(str, ",");
while (pch != NULL) {
if (sscanf(pch, "%d", &nums[i++]) == 0)
printf("Not a number: %s\n", pch);
pch = strtok(NULL, ",");
}
/* print read numbers */
for (i = 0; i < max; ++i)
printf("%d\n", nums[i]);
free(nums);
return 0;
}

Resources