Better Way Without Goto? - c

I have a program where my code uses a goto statement, and I want to get rid of it in a nice way, but I can't seem to find a solution. If goto is the best way, then please let me know. Here is a summary of the code:
//Counts how many times every word appears in a file
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define NUMWORDS 1000
#define WORDLEN 50
typedef struct
{
char word[WORDLEN + 1];
int num;
} Appearance;
int main(int argc, char *argv[]) {
FILE *readfile;
Appearance *appearlist[NUMWORDS] = {NULL};
char word[WORDLEN + 1];
int i;
//Get a valid filename and open the file, store pointer into readfile
...
char c;
while (c != EOF) {
skip: //Annoying label
//Get a word from readfile, store into word
...
if (word[0] != '\0') {
for (i = 0; i < NUMWORDS && appearlist[i]; i++) {
if (strcmp(appearlist[i] -> word, word) == 0) {
appearlist[i] -> num++;
goto skip; //Annoying goto
}
}
appearlist[i] = (Appearance *) malloc(sizeof(Appearance));
appearlist[i] -> num = 1;
strcpy(appearlist[i] -> word, word);
}
}
//Display results, free memory
...
return 0;
}
The problem is, I want to skip code that is outside of the loop I want to skip from. I would like to not create another variable only designed for this. If you want the full code, click on "Show code snippet."
//Counts how many times every word appears in a file
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define NUMWORDS 1000
#define WORDLEN 50
#define FILENAMELEN 50
typedef struct
{
char word[WORDLEN + 1];
int num;
} Appearance;
int main(int argc, char *argv[])
{
char filename[FILENAMELEN];
FILE *readfile;
Appearance *appearlist[NUMWORDS] = {NULL};
char word[WORDLEN + 1];
size_t ln;
int i;
if (argc == 2)
strncpy(filename, argv[1], sizeof(filename));
else {
printf("Enter a filename to count appearances from, or just press enter to quit: ");
fgets(filename, FILENAMELEN, stdin);
ln = strlen(filename) - 1;
if (filename[ln] == '\n')
filename[ln] = '\0';
}
while((readfile = fopen(filename, "r")) == NULL) {
if (filename[0] == '\0')
return 0;
printf("Invalid file! Please enter another filename, or just press enter to quit: ");
fgets(filename, FILENAMELEN, stdin);
ln = strlen(filename) - 1;
if (filename[ln] == '\n') filename[ln] = '\0';
}
char c;
while (c != EOF) {
skip:
for (i = 0; (c = getc(readfile)) != EOF && (isalnum(c) || c == '\''); i++) {
if (i >= WORDLEN) {
word[i] = '\0';
printf("\nWarning: word too long (over %d characters), trimming to: %s\n", WORDLEN, word);
while ((c = getc(readfile)) != EOF && (isalnum(c) || c == '\'')) ;
} else {
word[i] = tolower(c);
}
}
word[i] = '\0';
if (word[0] != '\0') {
for (i = 0; i < NUMWORDS && appearlist[i]; i++) {
if (strcmp(appearlist[i] -> word, word) == 0) {
appearlist[i] -> num++;
goto skip;
}
}
appearlist[i] = (Appearance *) malloc(sizeof(Appearance));
appearlist[i] -> num = 1;
strcpy(appearlist[i] -> word, word);
}
}
for (i = 0; i < NUMWORDS && appearlist[i]; i++) {
printf("Word: %s, Appearances: %d\n", appearlist[i] -> word, appearlist[i] -> num);
free(appearlist[i]);
}
return 0;
}

Using goto in this case is often considered acceptable.
Alternatives would be to set a variable so that you can continue in the outer loop after breaking from the inner one, or turning the whole segment that you want to escape from into a separate function, and returning from it instead of using goto.
I'm ignoring any other issues there may be with the code that aren't relevant to the question!

Put everything beginning with the 'if' statement into a separate method (let's call it "process" and replace the goto with return. Then the while-loop becomes:
while (c != EOF) {
//Get a word from readfile, store into word
...
process(...);
}

Sometimes using goto is a hint that code should use a helper function
static bool findword(Appearance *appearlist, size_t size, const char *word) {
for (size_t i = 0; i < size && appearlist[i]; i++) {
if (strcmp(appearlist[i]->word, word) == 0) {
appearlist[i]->num++;
return true;
}
}
return false;
}
while (c != EOF) {
//Get a word from readfile, store into word
...
if (word[0] != '\0') {
if (findword(appearlist, NUMWORDS, word)) {
continue;
}
appearlist[i] = (Appearance *) malloc(sizeof(Appearance));
appearlist[i] -> num = 1;
strcpy(appearlist[i] -> word, word);
}
}

Related

Finding palindrome in C

A palindrome is a word that reads the same from left to right and from right to left.
I wrote a program that finds palindromes from a console.
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define SIZE 100
int main() {
int i = 0, c;
int left, right;
char string[SIZE];
while (EOF != (c = getchar()) || (c = getchar()) != '\n') {
if (isspace(c) != 0) {
if (i > 0) {
left = 0;
right = i - 1;
while (right > left) {
if (string[left] != string[right]) {
i = 0;
break;
}
++left;
--right;
}
if (left >= right) {
while (i > 0)
printf("%c", string[--i]);
printf("%c", c);
}
i = 0;
}
if (c == '\n')
break;
}
else {
string[i++] = c;
}
}
}
For example, we enter the words: dad sad mum. She outputs: dad mum. But if we write dad sad or dad mum sad. The output will be: dad mum.
That is, an extra space is printed when the last word we read is not a palindrome. How can you get around this situation?
Code is convoluted
First read input properly and form a string.
for (i = 0; i < SIZE - 1; i++) [
int ch = getchar();
if (ch == EOF || ch == '\n') {
break;
}
string[i++] = (char) ch;
}
string[i] = 0;
Then process the string in string[]. Only print spaces when needed.
const char *separator = "";
int i = 0;
while (string[i]) {
// Beginning of a word?
if (!isspace(string[i])) {
int start = i;
int end = i;
while (!isspace(string[end+1]) && string[end+1]) {
end++;
}
// At this point, start indexes the 1st char of the word
// and end indexes the last char of the word
// Now find if a palindrome
while (start <= end && string[start] == string[end]) {
start++;
end--;
}
// Found a palindrome?
if (start > end) {
fputs(separator, stdout);
separator = " "; // print a space _next_ time
while (!isspace(string[i]) && string[i]) {
fputc(string[i++], stdout);
}
} else {
i = end + 1;
}
} else {
i++;
}
}
fputc('\n', stdout);
Life is easier if you just read the string all at once, then process the string.
char s[1000];
fgets( s, sizeof(s), stdin );
char * p = strchr( s, '\n' );
if (p) *p = '\0';
If you wanted to read one character at a time you should read once, test twice:
int c;
while ( ((c = getchar()) != '\n') and (c != EOF) )
But trying to compute the palindrome-ness at the same time as reading seriously makes your algorithm waaaaay more complicated than it needs to be. Read a string first, then compute.
Now you can use integer indices from each end of the string. If you can get them to meet (or cross) then you’ve got a palindrome. Hint: put that in a function:
bool is_palindrome( const char * s )
{
int left = 0;
int right = strlen(s) - 1;
...
}

How to parse bigger amount of words?

I have a program, which receives filename as an input, saves file contents into 2d char array and then outputs words. It works absolutely fine for about 400 words, but then, when I add more words, it crashes. Debugging showed that i am trying to access unused address, and I don't understand how is that possible considering that previous tests with lesser amount of words were successful.
The question is: what am i missing here?
FILE: functions.c
#include "Lab10.h"
#include <stdio.h>
#include <malloc.h>
char** parser(char* filename) {
FILE* fp;
fp = fopen(filename, "r");
char** str = (char**)calloc(N, sizeof(char*) * N);
if (!str)
{
printf("\n Allocation error");
return NULL;
}
char ch;
int space = 0, words = 0;
for (int i = 0; !feof(fp); i++) // Memory allocation
{
ch = fgetc(fp);
if (!is_ch(ch))
{
if (i != space)
{
if (!(str[words] = (char*)calloc(i - space, sizeof(char) * (i - space))))
{
printf("\n Allocation error");
return NULL;
}
words++;
}
while (!is_ch(ch) && !feof(fp))
{
ch = fgetc(fp);
i++;
}
if(!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
i--;
space = i;
}
}
fseek(fp, 0, SEEK_SET);
for (int i = 0; i < words; i++) // Copying words into 2d array
{
while (!is_ch(fgetc(fp)));
if (!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
int j = 0;
do {
if (((fscanf(fp, "%c", &str[i][j])) != 1))
break;
j++;
} while (is_ch(str[i][j-1]) && !feof(fp));
}
return str;
}
int is_ch(char ch)
{
return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'));
}
FILE: main.c
#define _CRT_SECURE_NO_WARNINGS
#include "Lab10.h"
#include <stdio.h>
#include <stdlib.h>
int main() {
char* filename = (char*)malloc(sizeof(char) * N);
if (!scanf("%s", filename) || filename == 0)
{
printf("\n Incorrect filename input");
return -1;
}
char** str = parser(filename);
printf("\n Contents of .txt file:");
for (int i = 0; str[i] != NULL; i++) {
printf("\n\t%d) ", i+1);
for (int j = 0; is_ch(str[i][j]); j++) {
printf("%c", str[i][j]);
}
}
return 0;
}
This answer was posted as a reply to one of the comments below the question itself. I tried writing readWord function, which recieves filepointer, reads one word and then returns pointer to the resulting array - that's eases the procedure, making it less complex. It works almost like fgets(), but it reads till non-character, instead of a newline
readWord function itself:
char* readWord(FILE* fp) {
char ch = 0;
while (!is_ch(ch))
{
ch = fgetc(fp);
if (ch == EOF || !ch)
return NULL;
}
int size = 1;
while (is_ch(ch))
{
if ((ch = fgetc(fp)) == EOF || !ch)
break;
size++;
}
fseek(fp, -(size * (int)sizeof(char)), 1);
if (ch != EOF || !ch)
size--;
char* word = (char*)calloc(size, sizeof(char) * size + 1);
if (!word)
{
printf("\n Allocation error.");
return NULL;
}
for (int i = 0; i < size; i++)
word[i] = fgetc(fp);
word[size] = '\0';
return word;
}
That's how i use it in main():
FILE* fp = fopen("test.txt", "r");
char* word;
while ((word = readWord(fp)) != NULL)
{
for (int i = 0; word[i] != '\0'; i++)
printf("%c", word[i]);
printf(" ");
}
Is there is anything i need to improve here? It works fine, but is it possible to somehow make it better?

C parser program to add list of numbers separated by ; over multiple lines taking input using read()

I want to write a program that takes numbers as inputs over multiple lines that are identified/separated by let's say ; character and print out their sum(s). Example:
1 2 3; 4 5 6; 7 8 9;(enter)
10 11 12;(enter)
exit(enter)
And I want the expected output to be exactly like:
List 1: 6 (sum of 1 2 3)
List 2: 15 (sum of 4 5 6)
List 3: 24 (sum of 7 8 9)
List 4: 33 (sum of 10 11 12)
sum of a b c, printing out this is not necessary, but their result as number is (enter), i.e. I'm pressing enter/getting to new line.
I am terminating when user types exit. But I am getting segmentation fault error in my code. Plus in this code the sum is also getting wrong values (I tried it separately).
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
int main() {
char *b;
int sum = 0;
int rc;
int i = 1;
while (strcasecmp(b, "exit") != 0) {
char buff[50];
rc = read(0, buff, 50);
if (rc == -1) {
perror("");
exit(0);
}
char *a = buff;
b = strtok(a, "\n");
char *c = strtok(b, ";");
while (c != NULL) {
char *d = strtok(c, " ");
while (d != NULL) {
int a = atoi(d);
sum += a;
d = strtok(NULL, " ");
printf("List %d: %d", i, sum);
i++;
}
c = strtok(NULL, ";");
}
}
}
You can use getchar and parse the integers on the go as below, without strtok.
int main() {
int sum = 0; int rc; int i = 0, j = 0;
char buff[50] = "";
while(1) {
if (i>= sizeof buff) break; //not enough memory
if (read(STDIN_FILENO, &buff[i], 1) < 1) {break;} //read error
if (strcasecmp(buff, "exit") == 0) break;
else if (buff[i] == ';'){
buff[i] = '\0';
int a = atoi(buff);
sum += a;
printf("sum = %d\n", sum);
sum = 0;
i = 0;
memset(buff, 0 , sizeof buff);
}
else if (buff[i] == ' '){
buff[i] = '\0';
int a = atoi(buff);
sum += a;
i = 0;
}
else if (buff[i] != '\n'){
i++;
}
}
}
There are multiple problems in your code:
b is an uninitialized pointer, reading and writing through it have undefined behavior, most likely the cause of the segmentation fault.
you should not use the POSIX low level functions to read input, it is non portable and the input might not be read in line chunks and will not be null terminated... Furthermore, a -1 return value is not always an error.
Use fgets() or other standard stream functions.
Here is a simple solution if you can assume that lists do not span multiple lines and are always terminated by ;:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int sumlist(int n, char *str) {
char *p, *q;
int sum = 0, term;
for (p = str;; p = q) {
p += strspn(p, " \t\n"); // skip blanks
if (*p == '\0')
break;
term = strtol(p, &q, 10);
if (q == p) {
printf("invalid input: %s\n", str);
return -1;
}
sum += term;
}
printf("List %d: %d (sum of %s)\n", n, sum, str);
return 0;
}
int main() {
char buf[200];
int n = 1;
char *p, *q;
while (fgets(buf, sizeof buf, stdin) {
for (p = str;;) {
p += strspn(p, " \t\n"); // skip initial blanks
if (*p == '\0')
break;
q = strchr(p, ';');
if (q != NULL)
*q = '\0';
if (p == q) {
p = q + 1; // skip empty lists
continue;
}
if (!strcmp(p, "exit"))
break;
sumlist(n++, p);
if (q == NULL)
break;
p = q + 1;
}
}
return 0;
}
If you cannot use fgets() or any standard stream functions, re-write your own version, reading one byte at a time from the OS handle with read() and carefully test for potential signal interrupts:
#include <errno.h>
#include <unistd.h>
char *my_gets(int hd, char *buf, size_t size) {
size_t i;
for (i = 0; i + 1 < size;) {
ssize_t n = read(hd, &buf[i], 1);
if (n != 1) {
if (n == -1 && errno == EINTR)
continue;
break;
}
if (buf[i++] == '\n')
break;
}
if (i == 0)
return NULL;
buf[i] = '\0';
return buf;
}
int main() {
char buf[200];
int n = 1;
char *p, *q;
while (my_gets(0, buf, sizeof buf) {
for (p = str;;) {
p += strspn(p, " \t\n"); // skip initial blanks
if (*p == '\0')
break;
q = strchr(p, ';');
if (q != NULL)
*q = '\0';
if (p == q) {
p = q + 1; // skip empty lists
continue;
}
if (!strcmp(p, "exit"))
break;
sumlist(n++, p);
if (q == NULL)
break;
p = q + 1;
}
}
return 0;
}
There are already working solutions here, but I'd like to suggest another one that might be helpful to understand some concepts.
Although you cannot use getc and ungetc, I would still address your problem in a way that uses the concept of a get_buf. My solution reads a character at a time and tries to turn it into a valid token that the main loop can switch on. In my opinion, that's a nice way to handle the parsing of simple 'languages' like the one you want to interpret. Also, it is pretty extensible & it's easy to add additional tokens (e.g. math operations like + - / *).
As a quick description what's happening: In get_char, a single byte is read from STDIN whenever the internal buffer is empty. If it is not, the character that's on the buffer is returned. This functionality is used by get_valid_token which returns either your delimiter ; or a (potentially multi-digit) number. Being able to 'unget' a character is required here. In main, we continuously get tokens and perform the appropriate action, nicely separating getting and interpretation of tokens. Obviously, this a quick and dirty program, but it might work for you.
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define BUF_SIZ 2 /* get_buf never buffers more than one char by design */
static char get_buf[BUF_SIZ];
static char *get_buf_ptr = get_buf;
char get_char(int fd)
{
char c;
/* check buffer first */
if (!(get_buf == get_buf_ptr))
return *get_buf_ptr--;
/* if buffer is empty, read from STDIN */
if ((read(fd, &c, 1)) == -1) {
perror("read");
exit(1);
}
return c;
}
void unget_char(char c)
{
*(++get_buf_ptr) = c;
}
void flush(int fd)
{
char c;
do {
read(fd, &c, 1);
} while (c != '\n');
}
char is_exit()
{
if ((get_char(STDIN_FILENO)) != 'x') return 0;
if ((get_char(STDIN_FILENO)) != 'i') return 0;
if ((get_char(STDIN_FILENO)) != 't') return 0;
flush(STDIN_FILENO); /* remove already buffered input */
return 1;
}
char *get_valid_token(void)
{
char c;
char *out;
char *out_ptr;
out_ptr = out = (char *)malloc(sizeof(char)*BUFSIZ);
while (1) {
c = get_char(STDIN_FILENO);
if (c == ';') {
*out = ';';
break;
} else if (isdigit(c)) {
*out = c;
out_ptr++;
/* get the rest of the digit */
while (1) {
c = get_char(STDIN_FILENO);
if (isdigit(c)) {
*out_ptr++ = c;
} else {
unget_char(c);
break;
}
}
*out_ptr = '\0';
break;
} else if (c == 'e') {
if (is_exit())
exit(0);
}
};
return out;
}
int main(void)
{
char *t;
int sum;
sum = 0;
while ((t = get_valid_token())) {
switch (*t) {
case ';':
fprintf(stderr, "sum: %d\n", sum);
sum = 0;
break;
default:
sum += atoi(t);
break;
}
free(t);
}
return 0;
}

My program doesn't find EOF symbol

I have this program which deletes from an .txt file all the words, which start and end with the same symbol. In my opinion it should work, but somehow it doesn't stop when EOF is reached and ir prints me some strange chinese symbols...
Here's the code:
#include <stdio.h>
#include <stdlib.h>
#define MAX 255
void search(char *symbolMass, FILE *duomFail, FILE *rezFail)
{
int i = 0, k =0, j =0, p = 0;
char symbol = 0;
char *rezMass;
char word[20];
rezMass = (char*)malloc(sizeof(char)*MAX);
while(simbolis != EOF)
{
printf("veikia");
symbol = symbolMass[i];
if (symbol != 32 && symbol != 10 && symbol != EOF)
{
word[j] = symbol;
i++;
j++;
}
else
{
word[j] = symbol;
i++;
if(word [0] == word[j - 1])
{
rezMass[k] = word[j];
k++;
}
else
{
for (p = 0; p <= j; p++, k++)
{
rezMass[k] = word[p];
}
}
j = 0;
}
}
for(i = 0; i <= k; i++)
symbolMass[i] = rezMass[i];
}
int main(int argc, char* argv[])
{
FILE *duom, *rez;
char *symbols;
symbols = (char*)malloc(sizeof(char)*MAX);
if (argc > 1)
{
duom = fopen (argv[1],"r");
rez =fopen (argv[2],"w");
if (duom != NULL)
{
while (symbols != NULL)
{
fgets(symbols, MAX, duom);
search(symbols, duom, rez);
fputs(symbols, rez);
}
fclose(duom);
}
else
{
printf("There is no file with name \"%s\"\n",argv[1]);
}
}
else
{
printf("The command has no arguments.\n");
}
fclose(rez);
free(simboliai);
return 0;
}
It works like this: it scans symbols and puts them into "word" till it reaches "space", "new line" or "EOF", then it checks if the word starts and ends with the same symbol, if yes, it prints only the "space", "new line" or "EOF", if not, then it prints the whole word.
Oh, and the code is wrote in C (usiing CodeBlocks if it matters).
That's because there is no EOF character in the buffer you pass to the search function. The buffer, symbolMass is a string and like all strings in C it's terminated by the special null-character '\0' (which incidentally happens to be the same as 0).
I suggest you change your loop to something like this
char symbol;
for (int i = 0; (symbol = symbolMass[i]) != '\0'; ++i)
{
...
}
Also, don't use "magic numbers" for characters, use the actual character literals instead, so instead of
if (symbol != 32 && symbol != 10 && symbol != EOF)
do
if (symbol != ' ' && symbol != '\n')

Is it possible for me to remove this last if statement?

I've written a small utility to open up executables and spit out certain printable strings it finds.
It works fine but I was wondering, is there some way I could remove one of these if statements? I was trying to see how I could arrange my conditionals so I wouldn't need the 3 different if statements but I don't see how I can do it with the current structure.
#include <stdio.h>
#define MAX_STR_SIZE 0x666
#define MIN_STR_SIZE 0x5
int main(int argc, char** argv)
{
int ch;
int pos = 0;
FILE* f;
char buff[MAX_STR_SIZE];
if (argc>1 && (f=fopen(argv[1], "rb")))
{
while ((ch=getc(f)) != EOF)
{
if (ch >= ' ' && ch <= 'z') // is printable char?
{
buff[pos++] = ch;
buff[pos] = '\0';
if (pos == (MAX_STR_SIZE-1)) // is current string > max length?
{
printf("%08x: %s\n", ftell(f), &buff[0]);
pos = 0;
}
}
else // non-printable char, print any string in buffer and start over
{
if (pos > (MIN_STR_SIZE - 1)) // is current string > min string?
{
printf("%08x: %s\n", ftell(f), &buff[0]); // print current string
}
pos = 0;
}
}
if (pos > (MIN_STR_SIZE - 1)) // any remaining string left to print?
{
printf("%08x: %s\n", ftell(f), &buff[0]);
}
fclose(f);
}
}
I believe this version eliminates most of the if statements (or at least collapses them together:
#include <stdio.h>
#define MAX_STR_SIZE 0x666
#define MIN_STR_SIZE 0x5
int main(int argc, char** argv)
{
int ch;
int pos = 0;
FILE* f;
char buff[MAX_STR_SIZE];
if (argc>1 && (f=fopen(argv[1], "rb")))
{
while ((ch = getc(f)) != EOF)
{
pos = 0;
while (ch >= ' ' && ch <= 'z' && pos < (MAX_STR_SIZE-1)) {
buff[pos++] = ch;
ch = getc(f);
}
if (pos > (MIN_STR_SIZE - 1)) // is current string > min string?
{
buff[pos] = '\0';
printf("%08x: %s\n", ftell(f), buff);
}
}
fclose(f);
}
}
The last if seems to be necessary for the current logic of the code.
However, the check in your code is not entirely correct. Why don't you use isprint() function to check whether a character is printable or not? Something like this:
if (isprint(c)) // is printable char?
{
//c is printable
}

Resources