How to parse bigger amount of words? - c

I have a program, which receives filename as an input, saves file contents into 2d char array and then outputs words. It works absolutely fine for about 400 words, but then, when I add more words, it crashes. Debugging showed that i am trying to access unused address, and I don't understand how is that possible considering that previous tests with lesser amount of words were successful.
The question is: what am i missing here?
FILE: functions.c
#include "Lab10.h"
#include <stdio.h>
#include <malloc.h>
char** parser(char* filename) {
FILE* fp;
fp = fopen(filename, "r");
char** str = (char**)calloc(N, sizeof(char*) * N);
if (!str)
{
printf("\n Allocation error");
return NULL;
}
char ch;
int space = 0, words = 0;
for (int i = 0; !feof(fp); i++) // Memory allocation
{
ch = fgetc(fp);
if (!is_ch(ch))
{
if (i != space)
{
if (!(str[words] = (char*)calloc(i - space, sizeof(char) * (i - space))))
{
printf("\n Allocation error");
return NULL;
}
words++;
}
while (!is_ch(ch) && !feof(fp))
{
ch = fgetc(fp);
i++;
}
if(!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
i--;
space = i;
}
}
fseek(fp, 0, SEEK_SET);
for (int i = 0; i < words; i++) // Copying words into 2d array
{
while (!is_ch(fgetc(fp)));
if (!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
int j = 0;
do {
if (((fscanf(fp, "%c", &str[i][j])) != 1))
break;
j++;
} while (is_ch(str[i][j-1]) && !feof(fp));
}
return str;
}
int is_ch(char ch)
{
return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'));
}
FILE: main.c
#define _CRT_SECURE_NO_WARNINGS
#include "Lab10.h"
#include <stdio.h>
#include <stdlib.h>
int main() {
char* filename = (char*)malloc(sizeof(char) * N);
if (!scanf("%s", filename) || filename == 0)
{
printf("\n Incorrect filename input");
return -1;
}
char** str = parser(filename);
printf("\n Contents of .txt file:");
for (int i = 0; str[i] != NULL; i++) {
printf("\n\t%d) ", i+1);
for (int j = 0; is_ch(str[i][j]); j++) {
printf("%c", str[i][j]);
}
}
return 0;
}

This answer was posted as a reply to one of the comments below the question itself. I tried writing readWord function, which recieves filepointer, reads one word and then returns pointer to the resulting array - that's eases the procedure, making it less complex. It works almost like fgets(), but it reads till non-character, instead of a newline
readWord function itself:
char* readWord(FILE* fp) {
char ch = 0;
while (!is_ch(ch))
{
ch = fgetc(fp);
if (ch == EOF || !ch)
return NULL;
}
int size = 1;
while (is_ch(ch))
{
if ((ch = fgetc(fp)) == EOF || !ch)
break;
size++;
}
fseek(fp, -(size * (int)sizeof(char)), 1);
if (ch != EOF || !ch)
size--;
char* word = (char*)calloc(size, sizeof(char) * size + 1);
if (!word)
{
printf("\n Allocation error.");
return NULL;
}
for (int i = 0; i < size; i++)
word[i] = fgetc(fp);
word[size] = '\0';
return word;
}
That's how i use it in main():
FILE* fp = fopen("test.txt", "r");
char* word;
while ((word = readWord(fp)) != NULL)
{
for (int i = 0; word[i] != '\0'; i++)
printf("%c", word[i]);
printf(" ");
}
Is there is anything i need to improve here? It works fine, but is it possible to somehow make it better?

Related

Trying to make QuizMaker in C, but some steps are ignored with !X

Basically I'm trying to make QuizMaker by asking user how many questions they want, then make a string of an array. I'm new to C, so I might be missing some little details. Please look at the screenshot of console which I included.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 4096
int input(char *str, int n);
int main() {
int a, i, length = 0;
printf("How many questions do you want?\n");
scanf("%d", &a);
char *strarr[a];
char buffer[BUFFER_SIZE]; //string holder
while (getchar() != '\n'); //in case there is \n, flush it
for(i = 0; i < a; i++) {
printf("Question number #%d:\n", i + 1);
length = input(buffer, BUFFER_SIZE);
//input method returns number of chars we've entered
strarr[i] = malloc((length) * sizeof(char));
//allocating memory for each pointers to array of chars
strcpy(strarr[i], buffer);
//copy the string you've just created to an array of strings
}
//printing results
printf("_____________\n");
for(i = 0; i < a; i++) {
printf("%s\n", strarr[i]);
}
return 0;
}
int input(char *str, int n) {
int ch, i = 0;
while ((ch = getchar()) != '\n') {
if (i < n)
str[i++] = ch;
str[i] = '\0';
}
return i;
}
There are some problems in the code:
the test if (i < n) is not strict enough: you should stop storing characters to the destination array before it is full to save space for the null terminator.
you must allocate one extra byte for the null terminator: malloc((length + 1) * sizeof(char)) or just malloc(length + 1) as sizeof(char) is 1 by definition.
you should test for EOF in addition to '\n' in the reading loops.
Here is a modified version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 4096
int input(char *str, int n);
int main() {
int a, c, i, length = 0;
printf("How many questions do you want?\n");
if (scanf("%d", &a) != 1 || a <= 0) {
fprintf(stderr, "invalid input\n");
return 1;
}
char *strarr[a];
char buffer[BUFFER_SIZE]; //string holder
// flush the rest of the input line
while ((c = getchar()) != EOF && c != '\n')
continue;
for (i = 0; i < a; i++) {
printf("Question number #%d:\n", i + 1);
//input method returns number of chars we've entered
length = input(buffer, BUFFER_SIZE);
//allocating memory for each pointers to array of chars
strarr[i] = malloc((length + 1) * sizeof(char));
if (strarr[i] == NULL) {
fprintf(stderr, "allocation error\n");
a = i;
break;
}
//copy the string you've just created to an array of strings
strcpy(strarr[i], buffer);
}
//printing results
printf("_____________\n");
for (i = 0; i < a; i++) {
printf("%s\n", strarr[i]);
}
return 0;
}
int input(char *str, int n) {
int ch, i = 0;
while ((ch = getchar()) != EOF && ch != '\n') {
if (i + 1 < n)
str[i++] = ch;
}
if (i < n)
str[i] = '\0';
return i;
}
Also note that you can use strdup() to allocate and copy the string in a single call:
for (i = 0; i < a; i++) {
printf("Question number #%d:\n", i + 1);
input(buffer, BUFFER_SIZE);
/* allocate a copy of the string */
strarr[i] = strdup(buffer);
if (strarr[i] == NULL) {
fprintf(stderr, "allocation error\n");
a = i;
break;
}
}

Get length of char array with null elements in C

Currently I am making a project that uses char arrays that have null elements. I want to be able to get the length of the array, in the sense of the number of elements that aren't null. This seemed reasonably trivial and I made this function:
int getWordLen(char word[]) {
int count = 0;
for (int i = 0; i < 512; i++) {
if (word[i] != '\0') {
count++;
}
}
printf("%d ", count);
return count;
}
However, every char array returns a length of 188. Any help would be appreciated.
This is the function I was calling it from:
void redact(Words * redactWords, char fileName[]) {
FILE * file = fopen(fileName, "r");
FILE * outputFile = fopen("outputFile.txt", "w+");
char word[512];
int i = 0;
char c;
while (c != EOF) {
c = getc(file);
if ((c > 96) && (c < 123)) {
word[i] = c;
i++;
continue;
}
else if ((c > 64) && (c < 91)) {
word[i] = c + 32;
i++;
continue;
}
i = 0;
if (isWordRedactWord(redactWords, word)) {
//write stars to file
char starStr[512];
for (int i = 0; i < getWordLen(word); i++) {
starStr[i] = '*';
}
fputs(starStr, outputFile);
}
else {
//write word to file
fputs(word, outputFile);
}
strcpy(word, emptyWord(word));
}
fclose(file);
fclose(outputFile);
}
In the initial while, I would only use while(!EOF).
Also, I believe you are using a lot more resources than necessary with the implementation of that for inside the while:
char starStr[512];
for (int i = 0; i < getWordLen(word); i++) {
starStr[i] = '*';
I suggest you to put it outside the while loop and see what happens.
If it is always giving you 188 of lenght, it is counting something that's constant, and may be related to that outer loop.
Hope you can solve it!

Passing a matrix using pointers in C

Compiles without warnings or errors, just the following code crashes when I try to either read from or write to the matrix constValues.
It needs to be passed to another function to be read from also; the function createOutputLine.
How can I point to the data held correctly so it can be modified and read from? Thanks.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
void createOutputFile(FILE*, int, char**);
char createOutputLine(int, int, char*, char**);
int main(int argc, char **argv) {
int removeComments = 0;
FILE *file;
if (argc > 1 && strcmp(argv[1], "-i") == 0) {
if (argc > 2) {
if (!(file = fopen(argv[2], "r"))) {
printf("Error: file not found");
return -1;
}
}
else {
printf("Error: no file specified");
return -1;
}
}
else {
printf("Error: command requires -i");
return -2;
}
createOutputFile(file, argc, argv);
fclose(file);
}
void createOutputFile(FILE *file, int argc, char **argv) {
fseek(file, 0, SEEK_SET);
char *data = (char*)malloc(2000);
FILE *header;
char name[20];
char *token = strtok(argv[2], ".");
strcpy(name, strcat(token, ".o"));
FILE *output = fopen(name, "w");
char constNames[10][15];
char **constValues[10][10];
int constsStored = 0;
while (fgets(data, 2000, file) != NULL) {
for (int i = 0; i < strlen(data); i++) {
int c = i;
bool linePrinted = false;
if (data[i] == '#' && data[i + 1] == 'd') {
for (c = i; c <= i + 7; c++) {
data[c] = '\0';
} int ch = 0;
while (data[c] != ' ') {
constNames[constsStored][ch] = data[c];
data[c] = '\0';
ch++;
c++;
} ch = 0;
while (data[c] != '\n') {
**constValues[constsStored][ch] = data[c]; //this line crashes
data[c] = '\0';
ch++;
c++;
}
if (data[c] == '\n') data[c] = '\0';
constsStored++;
}
for (int ch = 0; ch <= constsStored; ch++) {
if (data[i] == constNames[ch][0]) {
int ch2 = i + 1;
int ch3 = 1;
bool isConst = false;
while (data[ch2] != ' ') {
if (data[ch2] == constNames[ch][ch3] && isConst == false) isConst = true;
ch2++;
ch3++;
}
if (isConst || data[i + 1] == ' ') {
char line[200];
line[200] = createOutputLine(i, ch, data, **constValues);
fprintf(output, "%c", line[200]);
linePrinted = true;
}
}
}
if (!linePrinted)
fprintf(output, "%c", data[i]);
}
}
fclose(output);
free(data);
}
char createOutputLine(int i, int constElem, char *data, char **constValues) {
int ch = i;
int ch2 = 0;
char temp[200];
while (data[ch] != '\n' && data[ch] != ' ' && data[ch] != ';') {
temp[ch2] = data[ch];
printf("%c", data[ch]);
ch++;
ch2++;
}
char line[200];
ch2 = 0;
for (ch = i; ch <= sizeof(data); ch++) {
line[ch2] = data[ch];
ch2++;
}
for (ch = 0; ch <= 10; ch++) {
line[ch2] = constValues[constElem][ch];
ch2++;
}
for (ch = 0; ch <= sizeof(temp); ch++) {
line[ch2] = temp[ch];
ch2++;
}
line[ch2 + 1] = '\n';
return line[200];
}
A pointer shall point to an object before it can be derefenced. Full stop.
char **constValues[10][10]; just declares an 2D array of pointers to pointers to characters. And as it is an automatic array (neither statically nor dynamically allocated), its pointers are just uninitialized.
When you late use **constValues[constsStored][ch] = data[c];, you try to dereference an uninitialized pointer which is explicitely Undefined Behaviour. You are lucky to get an immediate crash, because UB consequences can be apparently unrelated problems.
The normal way is to declare arrays of objects, and use the addresses of those objects for pointers.
That's not all: C arrays are not first class citizens. You cannot assign to array, nor return it from a function. So this is plain wrong:
char line[200];
line[200] = createOutputLine(i, ch, data, **constValues);
It just assigns the unique character returned by the function past the end of the array!
So is this:
char line[200];
...
return line[200];
It does not return an array (C does not allow it) but the value of the byte that happens to live past the array.
I am sorry, but there are too many errors for me to fix them is such a long program.
You may find C hard and ask for help. But build small code containing only what you want to work on. And only when those small pieces work correctly, try to assemble them in a larger program.

Base change program in C complies but does not show any output

I'm recently new to programming but this assignment has proven to be my most difficult. The program is suppose to read in a .txt file with the following format
input_base number output_base
and outputs the results. the bases can only range from 2-36 For example:
input: 2 10 4
output: 2
My program reads in each part of the line and stores them respectively. I then go through the steps to convert the number into the output base and then print it backwards. My problem is that the program runs fine and prints all the stored values and calculated values, but once I add in my "base_conversion" function the program no longer works and my friend even said it gave him a segmentation fault. I don't really know what could be causing it. Any help would be appreciated. Thank you.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void read_file_to_buffer(FILE *file);
char *buffer = NULL;
void string_check(char *buffer);
int char_to_int(char c);
int int_conversion(int x, int y);
void base_conversion(int end_number, int out_base);
int main(int argc, char *argv[]){
FILE *file = NULL;
if ( argc != 2 ){
fprintf(stderr, "Error: To many/few arguments\n");
exit(EXIT_FAILURE);
}
else {
file = fopen(argv[1], "rb");
if (file == NULL) {
fprintf(stderr, "Error: could not open file '%s'\n", argv[1]);
exit(EXIT_FAILURE);
}
else{
printf("File %s opened!\n", argv[1]);
}
}
read_file_to_buffer(file);
string_check(buffer);
fclose(file);
free(buffer);
buffer = NULL;
exit(EXIT_SUCCESS);
}
void read_file_to_buffer(FILE *file) {
long file_size = 0;
if(buffer != NULL){
fprintf(stderr, "buffer in use\n");
exit(EXIT_FAILURE);
}
rewind(file);
if (fseek(file, 0, SEEK_END) != 0){
perror("Could not seek to end of file");
exit(EXIT_FAILURE);
}
file_size = ftell(file);
if (file_size < 0){
perror("could not tell size of file");
exit(EXIT_FAILURE);
}
rewind(file);
buffer = (char *)malloc(sizeof(char) * (file_size + 1));
if (buffer == NULL){
fprintf(stderr, "Could not allocate memory");
exit(EXIT_FAILURE);
}
if(fread(buffer, sizeof(char), (size_t)file_size, file) != file_size){
fprintf(stderr, "Could not read file\n");
exit(EXIT_FAILURE);
}
buffer[file_size] = '\0';
return;
}
void string_check(char *buffer){
int i = 0;
int j = 0;
char base_in[2];
char number[50];
char base_out[2];
int actual_number[100];
int end_number = 0;
int x = 0;
int y = 0;
int in_base;
int out_base;
while( buffer[i] != '\0'){
if (buffer[i] != '#' && buffer[i] != ' ' && buffer[i] != '\n' && buffer[i] != '\r'){
while (buffer[i] != ' '){
base_in[j] = buffer[i];
i++;
j++;
}
j = 0;
i++;
if (base_in[1] != '\0'){
x = char_to_int(base_in[0]);
y = char_to_int(base_in[1]);
in_base = int_conversion(x, y);
x = 0;
y = 0;
}
else{
in_base = char_to_int(base_in[0]);
}
while (buffer[i] != ' '){
number[j] = buffer[i];
i++;
j++;
}
int q;
q=j;
j=0;
while (number[j] != '\0'){
actual_number[j] = char_to_int(number[j]);
end_number = end_number + pow(in_base, q-1) * actual_number[j];
j++;
q--;
}
j = 0;
i++;
while (buffer[i] != '\n' && buffer[i] != '\0'){
base_out[j] = buffer[i];
i++;
j++;
}
if (base_out[1] != '\0'){
x = char_to_int(base_out[0]);
y = char_to_int(base_out[1]);
out_base = int_conversion(x, y);
x = 0;
y = 0;
}
else{
out_base = char_to_int(base_out[0]);
}
j = 0;
i++;
base_conversion(end_number, out_base);
}
else{
while (buffer[i] != '\n' && buffer[i] != '\0'){
i++;
}
}
i++;
}
return;
}
int char_to_int(char c){
char map[] = "0123456789abcdefghijklmnopqrstuvwxyz";
int result = -1;
char *next = map;
while(*next != '\0'){
if(*next == c){
result = next - map;
break;
}
next++;
}
return result;
}
int int_conversion(int x, int y){
int value;
value = (x * 10) + y;
return value;
}
void base_conversion(int end_number, int out_base){
int remainder[100];
char map[] = "0123456789abcdefghijklmnopqrstuvwxyz";
int index = 0;
int i;
while (end_number != 0){
remainder[index] = end_number % out_base;
end_number = end_number / out_base;
index++;
}
for (i=0; i<index; i++){
printf("%c", map[remainder[index-1]]);
}
printf("\n");
return;
}
OP's base_conversion() is messed.
Loop prints same character repeatedly.
for (i=0; i<index; i++){
printf("%c", map[remainder[index-1]]); // Why same character?
}
Code is using signed math and % and can create negative remainders which may be used as array index.
remainder[index] = end_number % out_base; // result may be negative.
...
printf("%c", map[remainder[index-1]]); // accessing out of bounds with negative
Suggested simplification.
void base_conversion_helper(unsigned end_number, int out_base){
if (end_number >= out_base) base_conversion_helper(end_number/out_base, out_base);
putchar("0123456789abcdefghijklmnopqrstuvwxyz"[end_number % outbase]);
}
void base_conversion(unsigned end_number, int out_base){
assert(out_base >= 2 && out_base <= 36);
base_conversion_helper(end_number, out_base);
putchar('\n');
}

Better Way Without Goto?

I have a program where my code uses a goto statement, and I want to get rid of it in a nice way, but I can't seem to find a solution. If goto is the best way, then please let me know. Here is a summary of the code:
//Counts how many times every word appears in a file
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define NUMWORDS 1000
#define WORDLEN 50
typedef struct
{
char word[WORDLEN + 1];
int num;
} Appearance;
int main(int argc, char *argv[]) {
FILE *readfile;
Appearance *appearlist[NUMWORDS] = {NULL};
char word[WORDLEN + 1];
int i;
//Get a valid filename and open the file, store pointer into readfile
...
char c;
while (c != EOF) {
skip: //Annoying label
//Get a word from readfile, store into word
...
if (word[0] != '\0') {
for (i = 0; i < NUMWORDS && appearlist[i]; i++) {
if (strcmp(appearlist[i] -> word, word) == 0) {
appearlist[i] -> num++;
goto skip; //Annoying goto
}
}
appearlist[i] = (Appearance *) malloc(sizeof(Appearance));
appearlist[i] -> num = 1;
strcpy(appearlist[i] -> word, word);
}
}
//Display results, free memory
...
return 0;
}
The problem is, I want to skip code that is outside of the loop I want to skip from. I would like to not create another variable only designed for this. If you want the full code, click on "Show code snippet."
//Counts how many times every word appears in a file
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define NUMWORDS 1000
#define WORDLEN 50
#define FILENAMELEN 50
typedef struct
{
char word[WORDLEN + 1];
int num;
} Appearance;
int main(int argc, char *argv[])
{
char filename[FILENAMELEN];
FILE *readfile;
Appearance *appearlist[NUMWORDS] = {NULL};
char word[WORDLEN + 1];
size_t ln;
int i;
if (argc == 2)
strncpy(filename, argv[1], sizeof(filename));
else {
printf("Enter a filename to count appearances from, or just press enter to quit: ");
fgets(filename, FILENAMELEN, stdin);
ln = strlen(filename) - 1;
if (filename[ln] == '\n')
filename[ln] = '\0';
}
while((readfile = fopen(filename, "r")) == NULL) {
if (filename[0] == '\0')
return 0;
printf("Invalid file! Please enter another filename, or just press enter to quit: ");
fgets(filename, FILENAMELEN, stdin);
ln = strlen(filename) - 1;
if (filename[ln] == '\n') filename[ln] = '\0';
}
char c;
while (c != EOF) {
skip:
for (i = 0; (c = getc(readfile)) != EOF && (isalnum(c) || c == '\''); i++) {
if (i >= WORDLEN) {
word[i] = '\0';
printf("\nWarning: word too long (over %d characters), trimming to: %s\n", WORDLEN, word);
while ((c = getc(readfile)) != EOF && (isalnum(c) || c == '\'')) ;
} else {
word[i] = tolower(c);
}
}
word[i] = '\0';
if (word[0] != '\0') {
for (i = 0; i < NUMWORDS && appearlist[i]; i++) {
if (strcmp(appearlist[i] -> word, word) == 0) {
appearlist[i] -> num++;
goto skip;
}
}
appearlist[i] = (Appearance *) malloc(sizeof(Appearance));
appearlist[i] -> num = 1;
strcpy(appearlist[i] -> word, word);
}
}
for (i = 0; i < NUMWORDS && appearlist[i]; i++) {
printf("Word: %s, Appearances: %d\n", appearlist[i] -> word, appearlist[i] -> num);
free(appearlist[i]);
}
return 0;
}
Using goto in this case is often considered acceptable.
Alternatives would be to set a variable so that you can continue in the outer loop after breaking from the inner one, or turning the whole segment that you want to escape from into a separate function, and returning from it instead of using goto.
I'm ignoring any other issues there may be with the code that aren't relevant to the question!
Put everything beginning with the 'if' statement into a separate method (let's call it "process" and replace the goto with return. Then the while-loop becomes:
while (c != EOF) {
//Get a word from readfile, store into word
...
process(...);
}
Sometimes using goto is a hint that code should use a helper function
static bool findword(Appearance *appearlist, size_t size, const char *word) {
for (size_t i = 0; i < size && appearlist[i]; i++) {
if (strcmp(appearlist[i]->word, word) == 0) {
appearlist[i]->num++;
return true;
}
}
return false;
}
while (c != EOF) {
//Get a word from readfile, store into word
...
if (word[0] != '\0') {
if (findword(appearlist, NUMWORDS, word)) {
continue;
}
appearlist[i] = (Appearance *) malloc(sizeof(Appearance));
appearlist[i] -> num = 1;
strcpy(appearlist[i] -> word, word);
}
}

Resources