Segmentation fault (core dumped) when reading file - c

I'm doing a simple project to read a input.txt text file and show some basic statistics about it. The file is supposed to be about the entrance on a company building. That's how the file is:
1 ; Visitor ; 10 ; 19 ; 2
2 ; 1 ; Worker ; 8 ; 0
3 ; 2 ; Director ; 12 ; 19
4 ; 5 ; Worker ; 18 ; 22
5 ; Visitor ; 8 ; 0 ; 3
Format is = ID ; Companions(if employee) ; Type ; Entrance Time ; Exit Time ; Services(if Visitor)
I got the program to read the file correctly (i guess), it reads the first worker correctly, but when it gets to the second one it reads the ID and suddenly exits with Segmentation fault (core dumped).
I'd really appreciate if someone with more knowledge could help since i have no idea what's happening, and other questions with the same error didn't help.
Here's the code:
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef enum { false, true } bool;
char * removeSpaces(char *line) {
int counter = 0,i=0;
while (line[i]!='\0'){
if (line[i] == ' '){
i++;
} else{
line[counter] = line[i];
counter++;
i++;
}
}
return(line);
line[counter] = '\0';
}
int main (int argc, char *argv[]){
FILE * fp;
char * line = NULL;
char field[30];
size_t len = 0;
ssize_t read;
bool flag = false;
int i=0,j=0,k=0,counter=0; //variables to count on loops
fp = fopen("input.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, fp)) != -1) {
printf("%s", line);
if(strstr(line, "Worker") != NULL) { //determine the entrance type
line = removeSpaces(line);
printf("\nWORKER READ\n");
i = 0;
while(line[i] != ';'){ //Read ID
field[i] = line[i]; //saves the content of the field (all between ';')
i++;
}
field[i] = '\0';
printf("\nID: ");
for(i=0;field[i] != '\0';i++){
printf("%c",field[i]);
}
//memset(field,0,strlen(field));
i = 0; j = 0;
while(flag != true){ //Read Companions
if(line[i] == ';'){
flag = true; //keeps skipping the string till it finds the right field
}
if(flag == true){
j = i+1;
while(line[j] != ';'){
field[k] = line[j]; //saves the content of the field (all between ';')
j++; k++;
}
}
i++;
}
field[k] = '\0';
printf("\nCOMPANIONS: ");
for(i=0;field[i] != '\0';i++){
printf("%c",field[i]); //prints what the number of companions read
}
//memset(field,0,strlen(field));
i = 0; j = 0; k = 0; flag = false;
while(flag != true){ //Read Type
if(line[i] == ';'){
counter++;
if(counter == 2){
flag = true; //keeps skipping the string till it finds the right field
}
}
if(flag == true){
j = i+1;
while(line[j] != ';'){
field[k] = line[j]; //saves the content of the field (all between ';')
j++; k++;
}
}
i++;
}
field[k] = '\0';
printf("\nTIPO: ");
for(i=0;field[i] != '\0';i++){
printf("%c",field[i]); /prints the type of entrance read
}
//memset(field,0,strlen(field));
i = 0; j = 0; k = 0; flag = false; counter = 0;
while(flag != true){ //Read Entrance Time
if(line[i] == ';'){
counter++;
if(counter == 3){
flag = true; //keeps skipping the string till it finds the right field
}
}
if(flag == true){
j = i+1;
while(line[j] != ';'){
field[k] = line[j]; //saves it
j++; k++;
}
}
i++;
}
field[k] = '\0';
printf("\nENTRANCE: ");
for(i=0;field[i] != '\0';i++){
printf("%c",field[i]);
}
i = 0; j = 0; k = 0; flag = false; counter = 0;
while(flag != true){ //Read Exit Time
if(line[i] == ';'){
counter++;
if(counter == 4){
flag = true;
}
}
if(flag == true){
j = i+1;
while(line[j] != ';'){
field[k] = line[j];
j++; k++;
}
}
i++;
}
field[k] = '\0';
printf("\nSAIDA: ");
for(i=0;field[i] != '\0';i++){
printf("%c",field[i]);
}
printf("\n\n");
i = 0; j = 0; k = 0; flag = false;
memset(field,0,strlen(field));
} else if(strstr(line, "Director") != NULL){
//TODO
} else if(strstr(line, "Visitor") != NULL){
//TODO
}
}
return 0;
}

While I recommend sscanf or fscanf over writing complicated parsing code, it's also good for your coding skills to learn how to write parsers. After adjusting your code to use fgets instead of getline so that I could get it to compile, I hit a fault in this loop:
while ( flag != true )
{ //Read Type
if ( line[i] == ';' ) // <<<< Fault when i = 2488
{
counter++;
if ( counter == 2 )
{
flag = true; //keeps skipping the string till it finds the right field
}
}
if ( flag == true )
{
j = i + 1;
while ( line[j] != ';' )
{
field[k] = line[j]; //saves the content of the field (all between ';')
j++; k++;
}
}
i++;
}
You're incrementing i without reference to the length of the actual line, so at some point you are accessing a memory address that you do not own.
You should enable all warnings when you compile your code. Your compiler should be warning you of some of your mistakes. One of the important ones you should be seeing is "Unreachable code..." at line 27 in your removeSpaces function:
return(line);
line[counter] = '\0'; // This never executed.
See http://pubs.opengroup.org/onlinepubs/009696699/functions/fgets.html

sscanf could be used to parse the values from line.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, char *argv[]){
FILE * fp;
char * line = NULL;
char id[30];
char companions[30];
char type[30];
char entrance[30];
char depart[30];
size_t len = 0;
ssize_t read;
fp = fopen("input.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, fp)) != -1) {
printf("%s", line);
if(strstr(line, "Worker") != NULL) { //determine the entrance type
printf("\nWORKER READ\n");
if ( 5 == sscanf ( line, "%29s ; %29s ; %29s ; %29s ; %29s"
, id, companions, type, entrance, depart)) {
printf("\nID: %s", id);
printf("\nCOMPANIONS: %s", companions);
printf("\nTIPO: %s", type);
printf("\nENTRANCE: %s", entrance);
printf("\nSAIDA: %s", depart);
printf("\n\n");
}
else {
printf ( "problem parsing Worker\n");
}
} else if(strstr(line, "Director") != NULL){
//TODO
} else if(strstr(line, "Visitor") != NULL){
//TODO
}
}
return 0;
}

Related

How to parse bigger amount of words?

I have a program, which receives filename as an input, saves file contents into 2d char array and then outputs words. It works absolutely fine for about 400 words, but then, when I add more words, it crashes. Debugging showed that i am trying to access unused address, and I don't understand how is that possible considering that previous tests with lesser amount of words were successful.
The question is: what am i missing here?
FILE: functions.c
#include "Lab10.h"
#include <stdio.h>
#include <malloc.h>
char** parser(char* filename) {
FILE* fp;
fp = fopen(filename, "r");
char** str = (char**)calloc(N, sizeof(char*) * N);
if (!str)
{
printf("\n Allocation error");
return NULL;
}
char ch;
int space = 0, words = 0;
for (int i = 0; !feof(fp); i++) // Memory allocation
{
ch = fgetc(fp);
if (!is_ch(ch))
{
if (i != space)
{
if (!(str[words] = (char*)calloc(i - space, sizeof(char) * (i - space))))
{
printf("\n Allocation error");
return NULL;
}
words++;
}
while (!is_ch(ch) && !feof(fp))
{
ch = fgetc(fp);
i++;
}
if(!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
i--;
space = i;
}
}
fseek(fp, 0, SEEK_SET);
for (int i = 0; i < words; i++) // Copying words into 2d array
{
while (!is_ch(fgetc(fp)));
if (!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
int j = 0;
do {
if (((fscanf(fp, "%c", &str[i][j])) != 1))
break;
j++;
} while (is_ch(str[i][j-1]) && !feof(fp));
}
return str;
}
int is_ch(char ch)
{
return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'));
}
FILE: main.c
#define _CRT_SECURE_NO_WARNINGS
#include "Lab10.h"
#include <stdio.h>
#include <stdlib.h>
int main() {
char* filename = (char*)malloc(sizeof(char) * N);
if (!scanf("%s", filename) || filename == 0)
{
printf("\n Incorrect filename input");
return -1;
}
char** str = parser(filename);
printf("\n Contents of .txt file:");
for (int i = 0; str[i] != NULL; i++) {
printf("\n\t%d) ", i+1);
for (int j = 0; is_ch(str[i][j]); j++) {
printf("%c", str[i][j]);
}
}
return 0;
}
This answer was posted as a reply to one of the comments below the question itself. I tried writing readWord function, which recieves filepointer, reads one word and then returns pointer to the resulting array - that's eases the procedure, making it less complex. It works almost like fgets(), but it reads till non-character, instead of a newline
readWord function itself:
char* readWord(FILE* fp) {
char ch = 0;
while (!is_ch(ch))
{
ch = fgetc(fp);
if (ch == EOF || !ch)
return NULL;
}
int size = 1;
while (is_ch(ch))
{
if ((ch = fgetc(fp)) == EOF || !ch)
break;
size++;
}
fseek(fp, -(size * (int)sizeof(char)), 1);
if (ch != EOF || !ch)
size--;
char* word = (char*)calloc(size, sizeof(char) * size + 1);
if (!word)
{
printf("\n Allocation error.");
return NULL;
}
for (int i = 0; i < size; i++)
word[i] = fgetc(fp);
word[size] = '\0';
return word;
}
That's how i use it in main():
FILE* fp = fopen("test.txt", "r");
char* word;
while ((word = readWord(fp)) != NULL)
{
for (int i = 0; word[i] != '\0'; i++)
printf("%c", word[i]);
printf(" ");
}
Is there is anything i need to improve here? It works fine, but is it possible to somehow make it better?

How to print a string based on the postion in c?

I am working on a question which requires me to print a string given a field-number at that position. The strings should be read from a file.
file.txt
C is a language.
lex lexical analyser
(blank line)
gcc is good
If the field-number is 2 (i.e the second word in the sentence). The program should output
is
lexical
(NULL)
is
I wrote a function but don't think its the correct way and that it would work for all cases. It should handle extra blanks or newlines.
while (fgets(buffer, MAX, file) != NULL) {
for (int i = 1; i < strlen(buffer); i++) {
if (count == field_number - 1) {
int j = i;
while (j < strlen(buffer) && buffer[j] != ' ') {
printf("%c", buffer[j++]);
}
printf("\n");
count = 0;
break;
}
if (buffer[i] == ' ' && buffer[i - 1] != ' ') {
count++;
}
}
}
I am a beginner. This code should be easy to understand.
This should work for all the cases,
int main() {
//FILE* file = fopen(__FILE__, "r");
//int field_number = 2;
int new_line = 0; // var to keep track of new line came or not
int word = 0;
int count = 0;
char c, prev_c;
while ((c = fgetc(file)) != EOF) {
// printf("[%c]", c);
// if a new line char comes it means you entered a new line
if(c == '\n') {
// you have to print the new line here on the output to handle
// empty line cases
printf("\n");
new_line = 1; // when line changes
word = 0; // no word has come in this new line so far
count = 0; // count becomes 0
} else if( c == ' ' && prev_c != ' ') {
if(word)
count++;
if(count == field_number) // if count exceeds field_number
new_line = 0; // wait till next line comes
} else if (new_line && count == field_number - 1) {
printf("%c", c);
} else {
word = 1; // fi a not new line or non space char comes, a word has come
}
prev_c = c;
}
return 0;
}

How can I get my Lexical analyzer program to print out more than just seperators?

Im writing a Lexical analyzer to read from a file and describe the text as either identifiers, keywords, separators, or operators. For some reason I am only able to output the separators, unless I delete or comment out the while loop with the print statement for the separators. When I delete that, the program prints out everything else correctly, skipping over the separators.
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<ctype.h>
int isKeyword(char buffer[]){
char keywords[32][10] = {"auto","break","case","char","const","continue","default",
"do","double","Else","enum","Function","Float","for","goto",
"If","Integer","long","register","Return","short","signed",
"sizeof","static","struct","switch","typedef","union",
"DOWhile","void","Write","while"};
int i, flag = 0;
for(i = 0; i < 32; ++i){
if(strcmp(keywords[i], buffer) == 0){
flag = 1;
break;
}
}
int k, flag2 = 0;
for (k = 0; k<32; ++k){
if(strcmp(keywords[i], buffer) == 0){
flag2 = 1;
break;
}
}
return flag;
return flag2;
}
int main(){
char ch, buffer2[15], operators[] = "+-*/%=";
char ch2, buffer[15], seperators[] = "{}(),;";
FILE *fp;
int i,k,j=0;
fp = fopen("input.txt","r");
if(fp == NULL){
printf("error while opening the file\n");
exit(0);
}
while((ch = fgetc(fp)) != EOF){
for(i = 0; i < 6; ++i){
if(ch == operators[i])
printf("%c is operator\n", ch);
while((ch2 = fgetc(fp)) != EOF){
for(k = 0; k < 6; ++k){
if(ch2 == seperators[k])
printf("%c is seperator\n", ch2);
}
}
if(isalnum(ch)){
buffer[j++] = ch;
}
else if((ch == ' ' || ch == '\n') && (j != 0)){
buffer[j] = '\0';
j = 0;
if(isKeyword(buffer) == 1)
printf("%s is keyword\n", buffer);
else
printf("%s is indentifier\n", buffer);
}
}
}
fclose(fp);
return 0;
}
The problem seems to be the second while loop containing the separator instructions, but I cant seem to figure out how to print that along with everything else.
I think this may help.There is no reason to create an additional char ch, or the buffer, when creating the separators. Just add it onto the end of the previous line. I just split it with a comma.This will clean your code up and allow you to keep the separators with the rest of the while loop code.
#include <stdio.h>
#define KEY 32
#define BUFFER_SIZE 15
int isKeyword(char buffer[])
{
char keywords[KEY][10] = { "auto","break","case","char","const","continue","default", "do","double","else","enum","extern","float","for","goto",
"if","int","long","register","return","short","signed", "sizeof","static","struct","switch","typedef","union", "unsigned","void","volatile","while"};
int i, flag = 0;
for (i = 0; i < KEY; ++i)
{
if (strcmp(keywords[i], buffer) == 0)
{
flag = 1;
break;
}
}
return flag;
}
int main()
{
char ch, buffer[BUFFER_SIZE], operators[] = "+-*/%=", separators[] = "(){}[]<>,";
FILE *fp;
int i, j = 0;
fp = fopen("Text.txt", "r");
while ((ch = fgetc(fp)) != EOF)
{
for (i = 0; i < 6; ++i)
{
if (ch == operators[i])
{
printf(" OPERATOR: %c \n", ch);
}
else if (ch == separators[i])
printf(" SEPARATOR: %c \n", ch);
}
if (isalnum(ch))
{
buffer[j++] = ch;
}
else if ((ch == ' ' || ch == '\n') && (j != 0))
{
buffer[j] = '\0';
j = 0;
{
if (isKeyword(buffer) == 1)
printf(" KEYWORD: %s \n", buffer);
else
printf(" IDENTIFIER: %s \n", buffer);
}
}
}
fclose(fp);
return 0;
}

Program to count lines, char, or words from file

I am writing a program to count words as practice but I am running into a problem where it is incorrectly counting no matter which option I choose.
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
int main(int argc, char **argv){
int totalcount = 0; //hold overall count
for(int i = 2; i < argc; i++){
int count = 0; //hold count for each file
int c; //temporarily hold char from file
FILE *file = fopen(argv[i], "r");
if (strcmp("-c",argv[1])){
while((c = fgetc(file)) != EOF){
count++;
}
}
else if(strcmp("-w",argv[1])){
bool toggle = false; //keeps track whether the next space or line indicates a word
while((c = fgetc(file)) != EOF){
if(!toggle && ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))){
toggle = true;
}
if(toggle && ((c == '\n') || (c == ' '))){
count++;
toggle = false;
}
}
}
else{
while((c = fgetc(file)) != EOF){
if(c == '\n'){
count++;
}
}
}
printf("%d %s", count, argv[i]);
fclose(file);
totalcount += count;
}
if (argc > 3){
printf("%d total", totalcount);
}
return 0;
}
I don't know why my logic for char count doesn't work. I have ran through my logic when writing each section and it doesnt make sense to me why it would not me working. Any help would be greatly appreciated.
strcmp returns 0 when the strings equal, so never enter into the if/else statement
if (strcmp("-c",argv[1]) == 0){ //return value is 0
while((c = fgetc(file)) != EOF){
count++;
}
}
else if(strcmp("-w",argv[1]) == 0){ //return value is 0
bool toggle = false; //keeps track whether the next space or line indicates a word
while((c = fgetc(file)) != EOF){
if(!toggle && ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))){
toggle = true;
}
if(toggle && ((c == '\n') || (c == ' '))){
count++;
toggle = false;
}
}
}
Hope it works for you
You can read file line by line, it may simplify the task
int get_lines_chars(const char *path)
{
/* Open templorary file */
FILE *fp = fopen(path, "r");
if (fp != NULL)
{
ssize_t read;
size_t len = 0;
char *line = NULL;
unsigned int line_no, char_no;
line_no = char_no = 0;
/* Read line-by-line */
while ((read = getline(&line, &len, fp)) != -1)
{
int curr_line = 0;
while (*line)
{
curr_line++;
char_no++;
line++;
}
line -= curr_line;
line_no++;
}
/* Cleanup */
fclose(fp);
if(line) free(line);
printf("File has %d lines and %d chars\n", line_no, char_no);
return 1;
}
return 0;
}

Base change program in C complies but does not show any output

I'm recently new to programming but this assignment has proven to be my most difficult. The program is suppose to read in a .txt file with the following format
input_base number output_base
and outputs the results. the bases can only range from 2-36 For example:
input: 2 10 4
output: 2
My program reads in each part of the line and stores them respectively. I then go through the steps to convert the number into the output base and then print it backwards. My problem is that the program runs fine and prints all the stored values and calculated values, but once I add in my "base_conversion" function the program no longer works and my friend even said it gave him a segmentation fault. I don't really know what could be causing it. Any help would be appreciated. Thank you.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void read_file_to_buffer(FILE *file);
char *buffer = NULL;
void string_check(char *buffer);
int char_to_int(char c);
int int_conversion(int x, int y);
void base_conversion(int end_number, int out_base);
int main(int argc, char *argv[]){
FILE *file = NULL;
if ( argc != 2 ){
fprintf(stderr, "Error: To many/few arguments\n");
exit(EXIT_FAILURE);
}
else {
file = fopen(argv[1], "rb");
if (file == NULL) {
fprintf(stderr, "Error: could not open file '%s'\n", argv[1]);
exit(EXIT_FAILURE);
}
else{
printf("File %s opened!\n", argv[1]);
}
}
read_file_to_buffer(file);
string_check(buffer);
fclose(file);
free(buffer);
buffer = NULL;
exit(EXIT_SUCCESS);
}
void read_file_to_buffer(FILE *file) {
long file_size = 0;
if(buffer != NULL){
fprintf(stderr, "buffer in use\n");
exit(EXIT_FAILURE);
}
rewind(file);
if (fseek(file, 0, SEEK_END) != 0){
perror("Could not seek to end of file");
exit(EXIT_FAILURE);
}
file_size = ftell(file);
if (file_size < 0){
perror("could not tell size of file");
exit(EXIT_FAILURE);
}
rewind(file);
buffer = (char *)malloc(sizeof(char) * (file_size + 1));
if (buffer == NULL){
fprintf(stderr, "Could not allocate memory");
exit(EXIT_FAILURE);
}
if(fread(buffer, sizeof(char), (size_t)file_size, file) != file_size){
fprintf(stderr, "Could not read file\n");
exit(EXIT_FAILURE);
}
buffer[file_size] = '\0';
return;
}
void string_check(char *buffer){
int i = 0;
int j = 0;
char base_in[2];
char number[50];
char base_out[2];
int actual_number[100];
int end_number = 0;
int x = 0;
int y = 0;
int in_base;
int out_base;
while( buffer[i] != '\0'){
if (buffer[i] != '#' && buffer[i] != ' ' && buffer[i] != '\n' && buffer[i] != '\r'){
while (buffer[i] != ' '){
base_in[j] = buffer[i];
i++;
j++;
}
j = 0;
i++;
if (base_in[1] != '\0'){
x = char_to_int(base_in[0]);
y = char_to_int(base_in[1]);
in_base = int_conversion(x, y);
x = 0;
y = 0;
}
else{
in_base = char_to_int(base_in[0]);
}
while (buffer[i] != ' '){
number[j] = buffer[i];
i++;
j++;
}
int q;
q=j;
j=0;
while (number[j] != '\0'){
actual_number[j] = char_to_int(number[j]);
end_number = end_number + pow(in_base, q-1) * actual_number[j];
j++;
q--;
}
j = 0;
i++;
while (buffer[i] != '\n' && buffer[i] != '\0'){
base_out[j] = buffer[i];
i++;
j++;
}
if (base_out[1] != '\0'){
x = char_to_int(base_out[0]);
y = char_to_int(base_out[1]);
out_base = int_conversion(x, y);
x = 0;
y = 0;
}
else{
out_base = char_to_int(base_out[0]);
}
j = 0;
i++;
base_conversion(end_number, out_base);
}
else{
while (buffer[i] != '\n' && buffer[i] != '\0'){
i++;
}
}
i++;
}
return;
}
int char_to_int(char c){
char map[] = "0123456789abcdefghijklmnopqrstuvwxyz";
int result = -1;
char *next = map;
while(*next != '\0'){
if(*next == c){
result = next - map;
break;
}
next++;
}
return result;
}
int int_conversion(int x, int y){
int value;
value = (x * 10) + y;
return value;
}
void base_conversion(int end_number, int out_base){
int remainder[100];
char map[] = "0123456789abcdefghijklmnopqrstuvwxyz";
int index = 0;
int i;
while (end_number != 0){
remainder[index] = end_number % out_base;
end_number = end_number / out_base;
index++;
}
for (i=0; i<index; i++){
printf("%c", map[remainder[index-1]]);
}
printf("\n");
return;
}
OP's base_conversion() is messed.
Loop prints same character repeatedly.
for (i=0; i<index; i++){
printf("%c", map[remainder[index-1]]); // Why same character?
}
Code is using signed math and % and can create negative remainders which may be used as array index.
remainder[index] = end_number % out_base; // result may be negative.
...
printf("%c", map[remainder[index-1]]); // accessing out of bounds with negative
Suggested simplification.
void base_conversion_helper(unsigned end_number, int out_base){
if (end_number >= out_base) base_conversion_helper(end_number/out_base, out_base);
putchar("0123456789abcdefghijklmnopqrstuvwxyz"[end_number % outbase]);
}
void base_conversion(unsigned end_number, int out_base){
assert(out_base >= 2 && out_base <= 36);
base_conversion_helper(end_number, out_base);
putchar('\n');
}

Resources