Segmentation Error in C program when reading text file - c

I want to print a bunch of words with their definitions in this format (word:defn) from a text file. However, I experience a segmentation error when running the program using gcc on a server. The weird thing is that when I compile the C program on a local desktop the program works perfectly.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int read_dict() {
FILE *fp;
int c;
char word[50];
char defn[500];
int sep = 0;
int doublenew = 0;
int i = 0;
fp = fopen("textfile.txt", "r");
if (fp == NULL) {
perror("Error in opening file");
return (-1);
}
while ((c = fgetc(fp)) != EOF) {
if (feof(fp)) {
break;
}
if (c == '.' && sep == 0) {
sep = 1;
word[i] = '\0';
//c = fgetc(fp);
i = 0;
} else
if (doublenew == 1 && c == '\n' && sep == 1) {
defn[i] = c;
i++;
defn[i] = '\0';
printf("%s %s", word, defn);
i = 0;
sep = 0;
doublenew = 0;
} else
if (c == '\n' && sep == 1) {
defn[i] = c;
doublenew = 1;
i++;
} else
if (sep == 0) {
word[i] = c;
i++;
} else
if (sep == 1) {
defn[i] = c;
i++;
doublenew = 0;
}
}
fclose(fp);
return 0;
}
text file:
COOKIE. is a small, flat, sweet, baked good, usually containing flour, eggs, sugar, and either butter, cooking oil or another oil or fat. It may include other ingredients such as raisins, oats, chocolate chips or nuts.
ICE CREAM. is a sweetened frozen food typically eaten as a snack or dessert.

The word length is limited to 49 characters and the definition to 499 characters, but you never check for overflow in your code. If, unlike your sample, the dictionary used on the server has longer words and/or definitions, your code invokes undefined behavior which can cause a segmentation fault.
Undefined behavior might also not cause any visible effect, as seems to be the case on your local machine. The code generated by the local compiler and that of the server may be different, due to a different version or different command line options.
Check for array boundaries to avoid this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int read_dict() {
FILE *fp;
int c;
char word[50];
char defn[500];
int sep = 0;
int doublenew = 0;
size_t i = 0;
fp = fopen("textfile.txt", "r");
if (fp == NULL) {
perror("Error in opening file");
return (-1);
}
while ((c = fgetc(fp)) != EOF) {
if (feof(fp)) {
break;
}
if (c == '\r') {
/* ignore CR characters inserted by Windows before LF */
continue;
}
if (c == '.' && sep == 0) {
sep = 1;
word[i] = '\0';
//c = fgetc(fp);
i = 0;
} else
if (doublenew == 1 && c == '\n' && sep == 1) {
if (i < sizeof(defn) - 1) {
defn[i] = c;
i++;
}
defn[i] = '\0';
printf("%s %s", word, defn);
i = 0;
sep = 0;
doublenew = 0;
} else
if (c == '\n' && sep == 1) {
if (i < sizeof(defn) - 1) {
defn[i] = c;
i++;
}
doublenew = 1;
} else
if (sep == 0) {
if (i < sizeof(word) - 1) {
word[i] = c;
i++;
}
} else
if (sep == 1) {
if (i < sizeof(defn) - 1) {
defn[i] = c;
i++;
}
doublenew = 0;
}
}
fclose(fp);
return 0;
}
Note: if nothing gets printed on the server, it means the file does not have 2 consecutive newline characters '\n'. If you are using the same file on your system and on the server, and if you are using Windows on your system and Linux on your server, the behavior of your program will be different on the '\r' characters used by Windows for the end of line. You must ignore these characters explicitly as they are only implicitly ignored on Windows, not on Linux. I modified the code above to account for this.

Related

How to parse bigger amount of words?

I have a program, which receives filename as an input, saves file contents into 2d char array and then outputs words. It works absolutely fine for about 400 words, but then, when I add more words, it crashes. Debugging showed that i am trying to access unused address, and I don't understand how is that possible considering that previous tests with lesser amount of words were successful.
The question is: what am i missing here?
FILE: functions.c
#include "Lab10.h"
#include <stdio.h>
#include <malloc.h>
char** parser(char* filename) {
FILE* fp;
fp = fopen(filename, "r");
char** str = (char**)calloc(N, sizeof(char*) * N);
if (!str)
{
printf("\n Allocation error");
return NULL;
}
char ch;
int space = 0, words = 0;
for (int i = 0; !feof(fp); i++) // Memory allocation
{
ch = fgetc(fp);
if (!is_ch(ch))
{
if (i != space)
{
if (!(str[words] = (char*)calloc(i - space, sizeof(char) * (i - space))))
{
printf("\n Allocation error");
return NULL;
}
words++;
}
while (!is_ch(ch) && !feof(fp))
{
ch = fgetc(fp);
i++;
}
if(!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
i--;
space = i;
}
}
fseek(fp, 0, SEEK_SET);
for (int i = 0; i < words; i++) // Copying words into 2d array
{
while (!is_ch(fgetc(fp)));
if (!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
int j = 0;
do {
if (((fscanf(fp, "%c", &str[i][j])) != 1))
break;
j++;
} while (is_ch(str[i][j-1]) && !feof(fp));
}
return str;
}
int is_ch(char ch)
{
return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'));
}
FILE: main.c
#define _CRT_SECURE_NO_WARNINGS
#include "Lab10.h"
#include <stdio.h>
#include <stdlib.h>
int main() {
char* filename = (char*)malloc(sizeof(char) * N);
if (!scanf("%s", filename) || filename == 0)
{
printf("\n Incorrect filename input");
return -1;
}
char** str = parser(filename);
printf("\n Contents of .txt file:");
for (int i = 0; str[i] != NULL; i++) {
printf("\n\t%d) ", i+1);
for (int j = 0; is_ch(str[i][j]); j++) {
printf("%c", str[i][j]);
}
}
return 0;
}
This answer was posted as a reply to one of the comments below the question itself. I tried writing readWord function, which recieves filepointer, reads one word and then returns pointer to the resulting array - that's eases the procedure, making it less complex. It works almost like fgets(), but it reads till non-character, instead of a newline
readWord function itself:
char* readWord(FILE* fp) {
char ch = 0;
while (!is_ch(ch))
{
ch = fgetc(fp);
if (ch == EOF || !ch)
return NULL;
}
int size = 1;
while (is_ch(ch))
{
if ((ch = fgetc(fp)) == EOF || !ch)
break;
size++;
}
fseek(fp, -(size * (int)sizeof(char)), 1);
if (ch != EOF || !ch)
size--;
char* word = (char*)calloc(size, sizeof(char) * size + 1);
if (!word)
{
printf("\n Allocation error.");
return NULL;
}
for (int i = 0; i < size; i++)
word[i] = fgetc(fp);
word[size] = '\0';
return word;
}
That's how i use it in main():
FILE* fp = fopen("test.txt", "r");
char* word;
while ((word = readWord(fp)) != NULL)
{
for (int i = 0; word[i] != '\0'; i++)
printf("%c", word[i]);
printf(" ");
}
Is there is anything i need to improve here? It works fine, but is it possible to somehow make it better?

Segmentation fault (core dumped) in C Snapshot

Snapshot
Introduction
Usual search engines receive a set of keywords and look for all the
documents that contain these keywords. The documents are listed in the
order of document significance. In this problem we consider the
significance of a document for a set of keywords is given by the
minimum number of words of the continuous piece of text that contains
all the searched keywords. For instance: consider the keywords “2008”
and “IEEEXtreme”, and the following two texts: “The registration for
the 2008 edition of IEEEXtreme is now open” and “IEEEXtreme 2008
edition is going to take place on March 8th 2008”. The significance of
the first text is 4, and of the second one is 2. If any of the given
words is not present in the text, the significance is zero.
Task
Please write a program that reads from the standard input a text in
which the words are separated only by spaces, and finds the
significance of text against the keywords given as the parameters to
your program.
Syntax
For the input text:
The registration for the 2008 edition of IEEEXtreme is now open
your program executed as:
> snapshot 2008 IEEEXtreme
should write 4 on the standard output. Note: if not all
the words are found, the program should return 0.
#include <stdio.h>
#include <string.h>
int compare(char *x, char *z) {
int a = 0;
if (strlen(x) == strlen(z)) {
while (a < strlen(x)) {
if (x[a] == z[a])
a++;
else
return 0;
}
return 1;
}
}
int verify(int q, int n, char *v) {
static int flag2 = 0;
static int error = 0;
if ((v[0] == '#') && (v[1] == '#') && (v[2] == '#') && (v[3] == '#')
&& (v[4] == '#')) {
flag2 = 1;
} else {
error++;
}
if ((q = n - 1) && flag2 == 1 && error == 0)
return 1;
else
return 0;
}
int main(int argc, char *argv[]) {
char text[1000];
char word[30];
FILE *fp = fopen("filename", "r");
int i = 0, j = 0, k = 0, y = 1, w = 1, t = 1, flag = 0, signifiancia = 0,
sucesso = 0;
while (feof(fp))
text[i++] = fgetc(fp);
text[i] = '\0';
while (text[j] != '\0') {
if (text[j] == ' ') {
j++;
word[k] = '\0';
k = 0;
while (y < argc) {
compare(argv[y], word);
if (1) {
flag = 1;
argv[y] = "#####";
signifiancia++;
y++;
} else {
if (flag = 1)
signifiancia++;
y++;
}
}
} else {
word[k] = text[j];
j++;
k++;
}
while (w < argc) {
verify(t, argc, argv[w]);
t++;
if (1) {
sucesso++;
printf("%d", signifiancia);
}
}
}
if (sucesso == 0)
printf("0");
}
The error given is:
Segmentation fault (core dumped)
At least these problems:
Missing return
When if (strlen(x) == strlen(z)) is false, function does not return anything.
Yet calling code does not use the return value anyways.
Assignment rather than compare
if (flag = 1)
Too many loops
Code iterates once too often.
while (feof(fp))
text[i++] = fgetc(fp);
Infinite loop
Once while (w < argc) { loop is entered, it appears to iterate infinitely - likely leading to UB.
Failure to prevent buffer over-runs

Brainfuck interpreter not running some codes

I'm kinda new to C programming and decided that making a brainfuck interpreter in C would be a good way to learn the language. I could write and tested with these bf codes:
this should print a hello world
++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.
this works as expected, so i thought my interpreter worked fine, but when I tested with a few variants of the hello world code, strange things happened.
this bf code should also print a hello world, but instead it prints out ²♣■■ÖFu ÖÖ■♦u
--<-<<+[+[<+>--->->->-<<<]>]<<--.<++++++.<<-..<<.<+.>>.>>.<<<.+++.>>.>>-.<<<+.
this bf code should also print a hello world, but instead the program gets stuck
+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+.
this is the code I wrote to interprete brainfuck:
#include <stdio.h>
int main(int argc, char const *argv[])
{
if (argc == 1)
{
printf("You must specify a file path\n");
return -1;
}
//amount of memory locations available
int mem = 30000;
//creating an integer array with mem positions
char arr[mem];
//current memory position
int index = 0;
//setting everything to 0
for (int i = 0; i < mem; i++)
{
arr[i] = 0;
}
FILE *file = fopen(argv[1], "r");
if (file == NULL)
{
printf("ERROR, file couldn't be read\n");
return -1;
}
//reading util the END OF THE FILE
char c;
while ((c = fgetc(file)) != EOF)
{
if (c == '+')
{
arr[index]++;
}
else if (c == '-')
{
arr[index]--;
}
else if (c == '>')
{
index++;
index %= mem;
}
else if (c == '<')
{
index--;
index %= mem;
}
else if (c == '.')
{
printf("%c", arr[index]);
}
else if (c == ',')
{
scanf("%c", &arr[index]);
}
else if (c == '[')
{
char temp = fgetc(file);
int skip = 0;
while (temp != ']' || skip != 0)
{
if (temp == '[')
skip++;
if (temp == ']' && skip > 0)
skip--;
temp = fgetc(file);
}
fseek(file, -1, SEEK_CUR);
}
else if (c == ']')
{
if (arr[index] != 0)
{
fseek(file, -2, SEEK_CUR);
char temp = fgetc(file);
int skip = 0;
while (temp != '[' || skip != 0)
{
if (temp == ']')
skip++;
if (temp == '[' && skip > 0)
skip--;
fseek(file, -2, SEEK_CUR);
temp = fgetc(file);
}
}
else
{
continue;
}
}
}
fclose(file);
return 0;
}
Thanks a lot if you can help me out on this one.
There's could be a problem with this piece of code, when index become negative.
index--;
index %= mem;
% operator retains the sign of the left argument, so -1 % mem is –1, not mem–1 you may expect.

Program to count lines, char, or words from file

I am writing a program to count words as practice but I am running into a problem where it is incorrectly counting no matter which option I choose.
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
int main(int argc, char **argv){
int totalcount = 0; //hold overall count
for(int i = 2; i < argc; i++){
int count = 0; //hold count for each file
int c; //temporarily hold char from file
FILE *file = fopen(argv[i], "r");
if (strcmp("-c",argv[1])){
while((c = fgetc(file)) != EOF){
count++;
}
}
else if(strcmp("-w",argv[1])){
bool toggle = false; //keeps track whether the next space or line indicates a word
while((c = fgetc(file)) != EOF){
if(!toggle && ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))){
toggle = true;
}
if(toggle && ((c == '\n') || (c == ' '))){
count++;
toggle = false;
}
}
}
else{
while((c = fgetc(file)) != EOF){
if(c == '\n'){
count++;
}
}
}
printf("%d %s", count, argv[i]);
fclose(file);
totalcount += count;
}
if (argc > 3){
printf("%d total", totalcount);
}
return 0;
}
I don't know why my logic for char count doesn't work. I have ran through my logic when writing each section and it doesnt make sense to me why it would not me working. Any help would be greatly appreciated.
strcmp returns 0 when the strings equal, so never enter into the if/else statement
if (strcmp("-c",argv[1]) == 0){ //return value is 0
while((c = fgetc(file)) != EOF){
count++;
}
}
else if(strcmp("-w",argv[1]) == 0){ //return value is 0
bool toggle = false; //keeps track whether the next space or line indicates a word
while((c = fgetc(file)) != EOF){
if(!toggle && ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))){
toggle = true;
}
if(toggle && ((c == '\n') || (c == ' '))){
count++;
toggle = false;
}
}
}
Hope it works for you
You can read file line by line, it may simplify the task
int get_lines_chars(const char *path)
{
/* Open templorary file */
FILE *fp = fopen(path, "r");
if (fp != NULL)
{
ssize_t read;
size_t len = 0;
char *line = NULL;
unsigned int line_no, char_no;
line_no = char_no = 0;
/* Read line-by-line */
while ((read = getline(&line, &len, fp)) != -1)
{
int curr_line = 0;
while (*line)
{
curr_line++;
char_no++;
line++;
}
line -= curr_line;
line_no++;
}
/* Cleanup */
fclose(fp);
if(line) free(line);
printf("File has %d lines and %d chars\n", line_no, char_no);
return 1;
}
return 0;
}

Segmentation fault (core dumped) in some cases

Any idea why this program will fail in downloading images from page #12 while it did pages 1-9 perfectly? I really don't know how can I debug. Maybe there's a problem with wget when it doesn't find the first image? http://img717.imageshack.us/img717/7954/white2u.png
#include <stdio.h>
#include <stdlib.h> // for using system calls
#include <unistd.h> // for sleep
int main ()
{
char body[] = "forum-post-body-content", notes[] = "p-comment-notes", img[] = "img src=", link[200], cmd[200]={0}, file[10];
int c, pos = 0, pos2 = 0, fin = 0, i, j, num = 0, found = 0;
FILE *fp;
for (i = 12; i <= 149; ++i)
{
sprintf(cmd,"wget -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'",i,i);
system(cmd);
sprintf(file, "page%d.txt", i);
fp = fopen (file, "r");
while ((c = fgetc(fp)) != EOF)
{
if (body[pos] == c)
{
if (pos == 22)
{
pos = 0;
while (fin == 0)
{
c = fgetc (fp);
if (feof (fp))
break;
if (notes[pos] == c)
{
if (pos == 14)
{
fin = 1;
pos = -1;
}
++pos;
}
else
{
if(pos > 0)
pos = 0;
}
if (img[pos2] == c)
{
if (pos2 == 7)
{
pos2 = 0;
while (found == 0)
{
c = fgetc (fp); // get char from file
link[pos2] = c;
if (pos2 > 0)
{
if(link[pos2-1] == 'g' && link[pos2] == '\"')
{
found = 1;
}
}
++pos2;
}
--pos2;
found = 0;
char link2[pos2];
for (j = 1; j < pos2; ++j)
{
link2[j - 1] = link[j];
}
link2[j - 1] = '\0';
sprintf(cmd, "wget -O /home/arturo/Dropbox/Digital_Renders/%d \'%s\'", ++num, link2);
system(cmd);
pos2 = -1;
}
++pos2;
}
else
{
if(pos2 > 0)
pos2 = 0;
}
}
fin = 0;
}
++pos;
}
else
pos = 0;
}
// closing file
fclose (fp);
if (remove (file))
fprintf(stderr, "Can't remove file\n");
}
}
char file[10];
"page12.txt" has 11 characters in it including the null character. Please just do something like char file[128]. Memory is cheap. Time spent debugging is expensive.
You have an overflow.
file[10];
page1.txt = 10 characters including the null terminator
page12.txt = 11 characters
Look into to the safe functions like snprintf()

Resources