How can I correct my output to what is intended? - c

I have a code called unscramble that takes two files, Jumbled.txt and dictionary.txt and finds if any words contain the same characters in both the files or not, for instance, here is a sample input for
Jumbled.txt:
Hello
Wassup
Rigga
Boyka
Popeye
dictionary.txt:
olleH
Yello
elloH
lloeH
aggiR
ggiRa
giRag
yokaB
Bakoy
kaBoy
eyePop
poePye
and the expected output of the code above is:
Hello: olleH elloH lloeH
Wassup: NO MATCHES
Rigga: aggiR ggiRa giRag
Boyka: yokaB Bakoy kaBoy
Popeye: eyePop poePye
Here is my code that attempts to solve it:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_LEN 50
#define MAX_DICT_WORDS 500000
#define MAX_JUMBLES 10000
char dict[MAX_DICT_WORDS][MAX_WORD_LEN + 1];
int dict_size;
char jumbles[MAX_JUMBLES][MAX_WORD_LEN + 1];
int jumbles_size;
int compare_chars(const void *a, const void *b) {
return *(char *)a - *(char *)b;
}
void sort_chars(char *s) {
qsort(s, strlen(s), sizeof(char), compare_chars);
}
int compare_words(const void *a, const void *b) {
return strcmp((char *)a, (char *)b);
}
void load_dict(const char *filename) {
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
perror("Error opening dictionary file");
exit(1);
}
dict_size = 0;
char word[MAX_WORD_LEN + 1];
while (fgets(word, MAX_WORD_LEN + 1, fp) != NULL) {
int len = strlen(word);
if (len > 0 && word[len - 1] == '\n') {
word[len - 1] = '\0'; // remove newline
}
if (len > 1 && len <= MAX_WORD_LEN) {
strcpy(dict[dict_size++], word);
}
}
fclose(fp);
qsort(dict, dict_size, sizeof(dict[0]), compare_words);
}
void load_jumbles(const char *filename) {
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
perror("Error opening jumbles file");
exit(1);
}
jumbles_size = 0;
char word[MAX_WORD_LEN + 1];
while (fgets(word, MAX_WORD_LEN + 1, fp) != NULL) {
int len = strlen(word);
if (len > 0 && word[len - 1] == '\n') {
word[len - 1] = '\0'; // remove newline
}
if (len > 1 && len <= MAX_WORD_LEN) {
strcpy(jumbles[jumbles_size++], word);
}
}
fclose(fp);
}
void unscramble() {
char sorted[MAX_WORD_LEN + 1];
for (int i = 0; i < jumbles_size; i++) {
strcpy(sorted, jumbles[i]);
sort_chars(sorted);
printf("%s: ", jumbles[i]);
int count = 0;
for (int j = 0; j < dict_size && count < 10; j++) {
if (strcmp(sorted, dict[j]) == 0) {
printf("%s ", dict[j]);
count++;
}
}
if (count == 0) {
printf("NO MATCHES");
}
printf("\n");
}
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s <dictionary> <jumbles>\n", argv[0]);
return 1;
}
load_dict(argv[1]);
load_jumbles(argv[2]);
unscramble();
return 0;
}
However, when I do ./unscramble dictionary.txt Jumbled.txt, this is what I get:
xxxxxxxxx#LAPTOPxxxxxxxx:~$ ./unscramble dictionary.txt Jumbled.txt
lloeHH
: NO MATCHES
giRagR
kaBoyB
poePyep
I've been trying to debug this and change up my code but nothing is working, what is the problem here?

At least these errors:
Enable all compiler warnings
Fix all warnings.
Sorted dictionary
The dictionary set of words are sorted. If original file order expected, do not sort the dictionary.
// Do not expect
Hello: olleH elloH lloeH
// Expect
Hello: elloH lloeH olleH
Given OP's expected output, there is no need to sort the words
Unsorted words
Code relies on each individual word's letter are sorted, yet individual letters in the dictionary words are not sorted. Form a sorted string before passing to strcmp(sorted, dict[j]).
Code should not sort letters or words more than once
Do it once and save results.
Advanced: Sorting amiss when a char value is negative.
String should be sorted as unsigned char. That is what strcmp() does.
// return *(char*) a - *(char*) b;
return *(unsigned char*) a - *(unsigned char*) b;
Following test code suggested
FILE *outf = fopen("Jumbled.txt", "w");
fputs("Hello\n", outf);
fputs("Wassup\n", outf);
fputs("Rigga\n", outf);
fputs("Boyka\n", outf);
fputs("Popeye\n", outf);
fclose(outf);
outf = fopen("dictionary.txt", "w");
fputs("olleH\n", outf);
fputs("Yello\n", outf);
fputs("elloH\n", outf);
fputs("lloeH\n", outf);
fputs("aggiR\n", outf);
fputs("ggiRa\n", outf);
fputs("giRag\n", outf);
fputs("yokaB\n", outf);
fputs("Bakoy\n", outf);
fputs("kaBoy\n", outf);
fputs("eyePop\n", outf);
fputs("poePye\n", outf);
fclose(outf);

Related

How to get the length of the longest Line in a File in C

this is my first Question here so im grateful for every kind of Help.
Im trying to get the length of the longest Line in a File, so i can later calloc it and read the whole File in. My first attempt was Dynamic, but it didnt work.
My Code till now is:
FILE *inputData;
inputData = fopen("input.txt", "r");
char *input = NULL;
int longestLinelength = 0;
while(fscanf(inputData,"%[^\n]", input) != EOF) {
if(longestLineLength<strlen(input)){
longestLineLength=strlen(input);
}
}
fclose()
This code unfortunetly leads to a memory access error.
size_t longestLine(FILE *fi)
{
size_t largest = 0, current = 0;
int ch;
if(fi)
{
while((ch = fgetc(fi)) != EOF)
{
if(ch == '\n')
{
if(current > largest) largest = current;
current = 0;
}
else
{
current++;
}
}
if(current > largest) largest = current;
}
return largest;
}
I think the problem is not with realloc, but with a misunderstanding of how things work.
It would be best to read carefully what scanf does. And how pointers work.
input is a NULL pointer and you want to write to it, this causes a crash in the application. scanf needs allocated memory to write to, it does not allocate it itself. Generally I would suggest to use fgets instead of scanf as it is better to handle. The formating options of scanf can be done after you read it with fgets.
Probably this help this is based on the book The C Programming Language.
First we need a main function to get the lines in the file
int get_file_line(char line[], int maxline, FILE *fptr) {
int ch, i;
for (i = 0; i < (maxline - 1) && ((ch = getc(fptr)) != EOF) && (ch != '\n'); ++i) {
line[i] = ch;
}
if (ch == '\n') {
line[i] = ch;
++i;
}
line[i] = '\0';
return i;
}
Then we will store the data into a new array of chars
void copy(char to[], char from[]) {
int i = 0;
while (from[i] != '\0') {
to[i] = from[i];
i++;
}
}
And finally in the main function we gonna open the file and use the previous functions
FILE *ptr;
const char *file_name = "your_file.txt";
ptr = fopen(file_name, "r");
while ((len = get_file_line(line, MAXLINE, ptr)) > 0) {
if (len > max) {
max = len;
copy(longest, line);
}
}
fclose(ptr);
if (max > 0) {
printf("longest: %s\n", longest);
printf("len : %d\n", max);
}
All together
#include <stdio.h>
#define MAXLINE 1000
int get_file_line(char line[], int maxline, FILE *fptr) {
int ch, i;
for (i = 0; i < (maxline - 1) && ((ch = getc(fptr)) != EOF) && (ch != '\n'); ++i) {
line[i] = ch;
}
if (ch == '\n') {
line[i] = ch;
++i;
}
line[i] = '\0';
return i;
}
void copy(char to[], char from[]) {
int i = 0;
while (from[i] != '\0') {
to[i] = from[i];
i++;
}
}
int main() {
int len, max = 0;
char line[MAXLINE];
char longest[MAXLINE];
FILE *ptr;
const char *file_name = "your_file.txt";
ptr = fopen(file_name, "r");
while ((len = get_file_line(line, MAXLINE, ptr)) > 0) {
if (len > max) {
max = len;
copy(longest, line);
}
}
fclose(ptr);
if (max > 0) {
printf("longest: %s\n", longest);
printf("len : %d\n", max);
}
return 0;
}
I hope this was helpful
#include <stdio.h>
#include <string.h>
#define MAX_LINE_LENGTH 4096
static void process_file(char *filename);
int main(int argc, char **argv) {
int q;
if(argc <= 1) {
printf("Usage: %s <files>\n", argv[0]);
return 1;
}
for(q = 1; q < argc; q++) {
process_file(argv[q]);
}
return 0;
}
void process_file(char *filename) {
char buf[MAX_LINE_LENGTH] = {0};
FILE *file;
char line_val[MAX_LINE_LENGTH] = {0};
int line_len = -1;
int line_num = -1;
int cur_line = 1;
file = fopen(filename, "r");
if(file == NULL) {
return;
}
while(fgets(buf, MAX_LINE_LENGTH, file) != NULL) {
int len_tmp = strlen(buf) - 1;
if(buf[len_tmp] == '\n')
buf[len_tmp] = '\0';
if(line_len < len_tmp) {
strncpy(line_val, buf, len_tmp + 1);
line_len = len_tmp;
line_num = cur_line;
}
cur_line++;
/*printf("%s", buf);*/
}
fclose(file);
if(line_num < 1) {
return;
}
printf("%d:%s:%d:%s\n", line_len, filename, line_num, line_val);
}

How to split a text file into multiple parts in c

What i need to do, is to take a file of n lines, and for every x lines, create a new file with the lines of the original file. An example would be this:
Original File:
stefano
angela
giuseppe
lucrezia
In this case, if x == 2, 3 file would be created, in order:
First file:
stefano
angela
Second FIle:
giuseppe
lucrezia
Third File:
lorenzo
What i've done so far is this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 10
int getlines(FILE *fp)
{
int c = 0;
int ch;
do{
ch = fgetc(fp);
if(ch == '\n')
{
c++;
}
}while(ch != EOF);
fseek(fp, 0 , SEEK_SET);
return c;
}
int ix = 0;
void Split(FILE *fp, FILE **fpo, int step, int lines, int *mem)
{
FILE **fpo2 = NULL;
char * filename = malloc(sizeof(char)*64);
char * ext = ".txt";
char number[2];
for(int i = ix; i < *mem; i++)
{
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
if(!(fpo[i] = fopen(filename, "w")))
{
fprintf(stderr, "Error in writing\n");
exit(EXIT_FAILURE);
}
}
char ch;
int c = 0;
do{
ch = fgetc(fp);
printf("%c", ch);
if(ch == '\n')
{
c++;
}
if(c >= step)
{
c = 0;
ix++;
if(ix >= *mem && (ix*step) <= lines)
{
*mem = *mem + 1;
fpo2 = realloc(fpo, sizeof(FILE*)*(*mem));
Split(fp, fpo2, step, lines, mem);
}
}
putc(ch, fpo[ix]);
}while(ch != EOF);
}
int main()
{
FILE * fp;
if(!(fp = fopen("file.txt", "r")))
{
fprintf(stderr, "Error in opening file\n");
exit(EXIT_FAILURE);
}
int mem = N;
int lines = getlines(fp);
int step = lines/N;
FILE **fpo = malloc(sizeof(FILE *)*N);
Split(fp, fpo, step, lines, &mem);
exit(EXIT_SUCCESS);
}
I'm stack with segmentation error, i couldn't find the bug doing
gdb myprogram
run
bt
I really appreciate any help.
EDIT:
I've changed some things and now it works, but it creates an additional file that contains strange characters. I need to still adjust some things:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 10
int getlines(FILE *fp)
{
int c = 0;
int ch;
do{
ch = fgetc(fp);
if(ch == '\n')
{
c++;
}
}while(ch != EOF);
fseek(fp, 0 , SEEK_SET);
return c;
}
int ix = 0;
void Split(FILE *fp, FILE **fpo, int step, int lines, int *mem)
{
FILE **fpo2 = NULL;
char * ext = ".txt";
for(int i = ix; i < *mem; i++)
{
char * filename = malloc(sizeof(char)*64);
char * number = malloc(sizeof(char)*64);
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
if(!(fpo[i] = fopen(filename, "w")))
{
fprintf(stderr, "Error in writing\n");
exit(EXIT_FAILURE);
}
free(number);
free(filename);
}
char ch;
int c = 0;
do{
ch = fgetc(fp);
printf("%c", ch);
if(ch == '\n')
{
c++;
}
if(c >= step)
{
c = 0;
ix++;
if(ix >= *mem && ((ix-1)*step) <= lines)
{
*mem = *mem + 1;
fpo2 = realloc(fpo, sizeof(FILE*)*(*mem));
Split(fp, fpo2, step, lines, mem);
}
}
putc(ch, fpo[ix]);
}while(ch != EOF);
}
int main()
{
FILE * fp;
if(!(fp = fopen("file.txt", "r")))
{
fprintf(stderr, "Error in opening file\n");
exit(EXIT_FAILURE);
}
int mem = N;
int lines = getlines(fp);
int step = lines/N;
FILE **fpo = malloc(sizeof(FILE *)*N);
Split(fp, fpo, step, lines, &mem);
exit(EXIT_SUCCESS);
}
There are a few problems in your code. But first I think you need to fix the most important thing
int step = lines/N;
Here step is 0 if your input file has less than N lines of text. This is because lines and N both are integer and integer division is rounding down.
I won't fix your code, but I'll help you with it. Some changes I
suggest:
Instead of getlines, use getline(3) from the standard
library.
fseek(fp, 0 , SEEK_SET) is pointless.
In char * filename = malloc(sizeof(char)*64), note that
both arguments to malloc are constant, and the size is arbitrary.
These days, it's safe to allocate filename buffers statically,
either on the stack or with static: char filename[PATH_MAX].
You'll want to use limits.h to get that constant.
Similarly you have no need to dynamically allocate your FILE
pointers.
Instead of
itoa(i+1, number,10);
strcpy(filename, "temp");
strcat(filename, number);
strcat(filename, ext);
use sprintf(filename, "temp%d%s", i+1, ext)
get familiar with err(3) and friends, for your own convenience.
Finally, your recursive Split is -- how shall we say it? -- a nightmare. Your whole program
should be something like:
open input
while getline input
if nlines % N == 0
create output filename with 1 + n/N
open output
write output
nlines++

Reading a file of strings to a string array

I'm trying to read a file (full of a word followed by a newline), to an array full of pointers to each string. Then print each word in the array, and count the number of words read. However it just prints no words and says 0 words imported.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define ARGS_REQUIRED 2
#define MAX_WORDS 50
#define MAX_WORD_LENGTH 1024
void read_file (char * argv[], char word_storage[]);
void usage (char * argv[]);
int main (int argc, char* argv[])
{
char word_storage[MAX_WORDS];
if (argc == ARGS_REQUIRED)
{
system("clear");
read_file(&argv[1], word_storage);
}
else
{
usage(&argv[0]);
}
return 0;
}
void usage (char * argv[])
{
printf("Incorrect usage, try: ./program_name %s\n", argv[1]);
}
void read_file (char * argv[], char word_storage[])
{
FILE * file_name;
char *word[MAX_WORDS][MAX_WORD_LENGTH];
int word_count = 0, i = 0, j;
if ((file_name = fopen(argv[0], "r")) == NULL)
{
printf("Cannot open file ... \n");
}
while (fscanf(file_name, "%s", *word[MAX_WORDS]) == 1)
{
for (j = 0; j < MAX_WORDS; j++)
{
if (printf("%s\n", *word[j]) == 1)
{
word_count++;
}
}
}
fclose(file_name);
printf("Imported words: %d\n", word_count);
}
You can modify your program as below. It worked for me. Please add error check for printf() function.
void read_file (char * argv[], char word_storage[])
{
FILE * file_name;
char word[MAX_WORDS][MAX_WORD_LENGTH];
int word_count = 0, i = 0, j;
if ((file_name = fopen(argv[0], "r")) == NULL)
{
printf("Cannot open file ... \n");
}
while (fscanf(file_name, "%s", word) == 1)
{
printf("%s\n",word);
word_count++;
/*
for (j = 0; j < MAX_WORDS; j++)
{
if (printf("%s\n", word[j]) == 1)
{
word_count++;
}
}
*/
}
fclose(file_name);
printf("Imported words: %d\n", word_count);
}

Search and Print all non-duplicate struct names inside input file

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/* Search and Print all non-duplicate struct names inside input file */
int main(int argc, char *argv[])
{
char temp[64], buf[64], filename[128], array[1024] = "";
char *ptr, *line = NULL;
char *tmp1, *tmp2;
ssize_t rv;
size_t len;
int count = 0;
FILE *fp;
if (argc < 2) {
printf("enter file name at cmd line...\n");
return -1;
}
sprintf(filename, argv[1], strlen(argv[1]));
fp = fopen(argv[1], "r");
if (!fp) {
printf("File could not be opened: %s\n", argv[1]);
return -1;
}
while ((rv = getline(&line, &len, fp)) != -1) {
ptr = strstr(line, "struct");
if (ptr) {
ptr += strlen("struct");
while (*ptr == ' ')
ptr++;
tmp1 = strchr(ptr, ' ');
tmp2 = strchr(ptr, ';');
len = 0;
if (tmp1 == NULL && tmp2 == NULL) {
continue;
}
else if (tmp1 == NULL && tmp2 != NULL) {
len = tmp2 - ptr;
}
else if (tmp1 != NULL && tmp2 == NULL) {
len = tmp1 - ptr;
}
else if (tmp1 && tmp2) {
len = tmp1 < tmp2 ? tmp1 - ptr : tmp2 - ptr;
}
if (len) {
snprintf(temp, len+1, "%s", ptr);
if (!strstr(array, temp)) {
sprintf(buf, "%2d. ", count++);
strcat(buf, temp);
strcat(array, buf);
strcat(array, "\n");
}
}
}
}
fclose(fp);
if (line)
free(line);
printf("%s\n", array);
return 0;
}
Above program finds struct names correctly, however I see chars like , and ) at the end of output names. How to remove it? Below is sample output:
[root#mnm-server programs]# ./a.out /usr/src/linux/drivers/net/ethernet/smsc/smsc911x.c
0. smsc911x_data
1. smsc911x_ops
2. smsc911x_platform_config
3. phy_device
4. mii_bus
5. net_device
6. napi_struct
7. regulator_bulk_data
8. clk
9. platform_device
10. smsc911x_data,
11. sk_buff
12. net_device_stats
13. netdev_hw_addr
14. sockaddr
15. ethtool_drvinfo
16. ethtool_eeprom
17. ethtool_ops
18. net_device_ops
19. ures,
20. resource
21. device_node
22. smsc911x_data))
23. dev_pm_ops
24. of_device_id
25. platform_driver
Notice output of line 10 and 22. One approach would be to do strchr for ,, ), ; and remove char from end. However, this is not a clean solution if the number of non-alphabetic characters increases.
NOTE: The best solution I found for this is here.
Thanks to inputs from Daniel Jour, the following code handles all cases of struct name* ptr;, struct name{ };, struct { };
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
/* Search and Print all non-duplicate struct names inside input file */
int main(int argc, char *argv[])
{
char temp[64], buf[64], filename[128], array[1024] = "";
char *ptr, *line = NULL;
size_t len;
int count = 0, flag = 0;
FILE *fp;
if (argc < 2) {
printf("enter file name at cmd line...\n");
return EXIT_FAILURE;
}
sprintf(filename, argv[1], strlen(argv[1]));
fp = fopen(argv[1], "r");
if (!fp) {
printf("File could not be opened: %s\n", argv[1]);
return EXIT_FAILURE;
}
while ((getline(&line, &len, fp)) != -1) {
ptr = flag ? line : strstr(line, "struct ");
if (ptr) {
if (!flag)
ptr += strlen("struct ");
while (*ptr == ' ')
ptr++;
len = 0;
while (isalnum(*ptr) || *ptr == '_' || *ptr == '{' || *ptr == '}') {
if (*ptr == '{') {
flag++;
}
else if (*ptr == '}') {
len = 0;
flag--;
do {
ptr++;
} while (*ptr == ' ');
ptr--;
}
else if ((*ptr != '{') || (*ptr != '}')) {
len++;
}
ptr++;
}
if (len && !flag) {
ptr -= len;
snprintf(temp, len+1, "%s", ptr);
if (!strstr(array, temp)) {
sprintf(buf, "%2d. ", count++);
strcat(buf, temp);
strcat(array, buf);
strcat(array, "\n");
}
}
}
}
fclose(fp);
if (line)
free(line);
printf("%s\n", array);
return EXIT_SUCCESS;
}
This program doesn't handle cases like func(struct x, struct y), interested users can fix it or just use grep -o "struct [^ ;,)]\+" # | awk '{print $2}' | sort -u. Output of the above program for pre-processed file hworld.i:
[root#server]# cat hworld.c
#include <stdio.h>
int main()
{
printf("hello world\n");
return 0;
}
[root#server]# gcc -Wall --save-temps hworld.c
[root#server]# ./find_structs hworld.i
0. _IO_FILE
1. _IO_marker
2. _IO_FILE_plus

Program to print and sum numbers in a text file

I want to write a program which print all numbers found in a file and then add them up. I have two problems:
How to add up the numbers I've printed?
Why in output_file do I have so many commas:
Here's my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define CHUNK 12
char *getWord(FILE *infile);
void clean(char *dirty);
char *getWord(FILE *infile)
{
char *word, *word2;
int length, cursor, c;
word = (char*)malloc(sizeof(char)*CHUNK);
if(word == NULL) return NULL;
length = CHUNK;
cursor = 0;
while(!isspace(c = getc(infile)) && !feof(infile))
{
word[cursor] = c;
cursor++;
if(cursor >= length)
{
length += CHUNK;
word2 = (char*)realloc(word, cursor);
if(word2 == NULL)
{
free(word2);
return NULL;
}
else
{
word = word2;
}
}
}
word[cursor] = '\0';
return word;
}
void clean(char *dirty)
{
int i = 0, j = 0;
char *temp;
temp = strdup(dirty);
while(i < strlen(temp))
{
if(isdigit(temp[i]))
{
dirty[j] = temp[i];
j++;
}
i++;
}
dirty[j] = '\0';
free(temp);
}
int main(int argc, char *argv[])
{
char *word;
FILE *infile, *outfile;
if(argc != 3)
{
printf("Missing argument!\n");
exit(1);
}
infile = fopen(argv[1], "r");
if(infile != NULL)
{
outfile = fopen(argv[2], "w");
if(outfile == NULL)
{
printf("Error, cannot open the outfile!\n");
abort();
}
else
{
while(!feof(infile))
{
word = getWord(infile);
if(word == NULL)
{
free(word);
abort();
}
clean(word);
fputs(word, outfile);
fputs(",", outfile);
free(word);
}
}
}
else
{
printf("Error, cannot open the outfile!\n");
abort();
}
fclose(infile);
fclose(outfile);
return 0;
}
infile:
You are getting , because of this -
fputs(",", outfile);
It is related in structure to the echo unix command. The core of the program could be simplified to something along the following lines:
int c, need_comma = 0;
while ((c = fgetc(infile)) != EOF) {
if (isdigit(c)) {
fputc(c, outfile);
need_comma = 1;
}
else {
if (need_comma == 1) {
fputc(',', outfile);
need_comma = 0;
}
}
}
this removes the need for getWord and clean functions.
This is just the printing part. the intermediate file is in CSV format,
which is structured and easy to parse and add the numbers (and print the
result to another file).

Resources