How to compare strings of two files? - c

I am a new c-language programmer.
I have got two files.
One consists of lines like:
84:1b:5e:a8:bf:7f
00:8e:f2:c0:13:cc
Another consists of lines like:
00-22-39
8C-FD-F0
My question is how can I using C language compare first half of line in the first file with a line in the second file?
Like: is 84:1b:5e equals to 8C-FD-F0?
I know the way to create an arrays to store those lines for the further comparison. But do I really need to create arrays?
P.S: comparison is case-insensitive

You haven't been very clear about what rules constitute a match. But if you want to compare the byte values, then you need to parse each line, converting it to those byte values.
You could use variations of strtok() to get the values from each line. However, a variation of sscanf() might be easier. Once you have the binary values from each file, then you can compare them.

Read the second file completely and store the contents in a sorted array. Then for each line read from the first file, binary search the sorted array to locate the match.
Implementation is below. It compiles with gcc.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
int cmp(const void * s1, const void * s2)
{
return strcasecmp(*(char **)s1, *(char **)s2);
}
int cmp_half(const char * s1, const char * s2)
{
int i;
for (i = 0; i < 3; i++)
{
int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
if (res != 0) return res;
}
return 0;
}
char * line[1024];
int n = 0;
int search(const char * s)
{
int first, last, middle;
first = 0;
last = n - 1;
middle = (first+last)/2;
while( first <= last )
{
int res = cmp_half(s, line[middle]);
if (res == 0) return middle;
if (res > 0)
first = middle + 1;
else
last = middle - 1;
middle = (first + last)/2;
}
return -1;
}
int main()
{
FILE * f1, * f2;
char * s;
char buf[1024*1024], text[1024];
f1 = fopen("file1.txt", "rt");
f2 = fopen("file2.txt", "rt");
s = buf;
while (fgets(s, 1024, f2) != NULL)
{
line[n] = s;
s = s+strlen(s)+1;
n++;
}
qsort(line, n, sizeof(char *), cmp);
while (fgets(text, 1024, f1) != NULL)
{
text[strlen(text)-1] = 0;
int idx = search(text);
if (idx >= 0)
{
printf("%s matched %s\n", text, line[idx]);
}
else
{
printf("%s not matched\n", text);
}
}
return 0;
}

Related

Longest word in file

My program needs to print longest word which contains only letters from a file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int checkString(const char s[]) {
unsigned char c;
while ((c = *s) && (isalpha(c) || isblank(c)))
++s;
return *s == '\0';
}
int main() {
char file_name[]="document.txt";
FILE *fp = fopen(file_name, "r");
char *largest = str;
int largest_len = 0;
while (fgets(file_name, 1000, fp) != NULL) {
char *temp = strtok(file_name, " ");
while (temp != NULL) {
if (strlen(temp) > largest_len) {
strcpy(largest, temp);
largest_len = strlen(largest);
}
temp = strtok(NULL, "\",.,1,2,4,5,6,7,8,9 ");
}
}
if(checkString(largest))
printf("%s", largest);
fclose(fp);
return 0;
}
In my code, if the largest word contains only letters it will be printed. How to modify this code to check next words if the largest doesn't contain only letters?
First of all, you cannot store the pointer to longest word like that. You re-use str for the next line and so the pointer is not likely to point to something useful.
Second, while strtok() appears simple, initially, I tend to apply a straightforward approach to a straightforward problem.
The problem is O(n) (where n is the length of the document). You just need to go through it character by character. Of course, since every line is ended by a \n, you can use the line based approach in this case.
So, instead of strtok, simply check each character, if it is a legal word character (an alphanumeric character, that is). You can easily do so with the standard library function isalpha() from header ctype.h.
Below is the program, copying the longest string into a dedicated buffer, using isalpha() and doing the line based reading of the file, just like the code in the original question did.
Of course, this code assumes, no line is ever longer than 999 characters.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <ctype.h>
static size_t gulp(const char* line, size_t istart, size_t len) {
size_t n = 0;
for (size_t i = istart; i < len; i++, n++) {
if (!isalpha(line[i])) {
break;
}
}
return n;
}
int main(int argc, const char * argv[]) {
FILE* f = fopen("document.txt","r");
char line[1000];
char longest_word[1000];
size_t longest_word_length = 0;
while (fgets(line, sizeof(line), f) != NULL) {
size_t i0 = 0;
size_t line_length = strlen(line);
while (i0 < line_length) {
if (isalpha(line[i0])) {
size_t n = gulp(line, i0, line_length);
if (n > longest_word_length) {
strncpy(longest_word, &line[i0], n);
longest_word[n] = '\0';
longest_word_length = n;
}
i0 = i0 + n;
} else {
i0++;
}
}
}
fclose(f);
f = NULL;
if (longest_word_length > 0) {
printf("longest word: %s (%lu characters)\n",
longest_word, longest_word_length);
}
return 0;
}
There are a number of problems here:
you use the same buffer (str) for two different uses: as a read buffer and to store the longest word. If you find the largest word in the first line, the word will be erased when reading the second line. Furthemore, if you find a rather long word at the beginning of a line, the strings pointed to by largest and temp could overlap which leads to undefined behaviour => use a different array or strdup (and free) for largest
you only use the space as possible separator. You should wonder whether you should add tab and/or punctuations
once you have got a word you should ensure that it only contains valid letters before testing its length and ignore it if for example it contains digits.
if a single line can be longer than 1000 characters, you should wrap the end of the current part before the beginning of the next one for the possible case where a long word would be splitted there.
For additional corner case processing, you should specify what to do if a word contains illegal characters but only at one side. For example if . is not used as a word delimiter, a word with an embedded . like "a.b" should be ignored, but a terminating . should only be stripped (like "example." should become "example"
I think the order you do things should be a bit different, here is an example
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
int isCandidate(char* word);
int main(int argc, char* argv[])
{
if (--argc == 0)
{
perror("not enough command line arguments, expecting a filename");
return -1;
}
++argv;
FILE* fp = fopen(*argv, "r");
if (fp == NULL)
{
perror(*argv);
return -1;
}
// get size of file
fseek(fp, 0L, SEEK_END);
long fileLength = ftell(fp);
if (fileLength < 1)
{
perror("file is empty");
return -1;
}
fseek(fp, 0L, SEEK_SET); // position file pointer at the beginning again
// allocate space for the whole file and then read it in
// for a text file it should be OK to do so since they
// normally are not that large.
char* buffer = malloc(fileLength+1);
if (fread(buffer, 1, fileLength, fp) != 0)
{
buffer[fileLength] = '\0'; // make sure the buffer ends with \0
}
else
{
perror("Failed reading into buffer");
return -1;
}
fclose(fp); // we are done with the file
const char filter[] = " \n\r";
char* longestWord = malloc(fileLength+1); // max length in theory
long unsigned int maxLength = 0;
for (char* token = strtok(buffer, filter); token != NULL; token = strtok(NULL, filter))
{
if (isCandidate(token))
{
if (strlen(token) > maxLength)
{
strcpy(longestWord, token);
maxLength = strlen(token);
}
}
}
printf("Longest word:'%s', len=%lu\n", longestWord, maxLength);
free(longestWord);
free(buffer);
}
int isCandidate(char* word)
{
if (word == NULL)
{
perror("invalid argument to isCandidate");
return 0;
}
for (char* ch = word; *ch; ++ch)
{
if (!isalpha(*ch)) return 0;
}
return 1;
}

How to get ASCII code for characters from a text file?

Update, Hello guys Thank you all for the help, my initial approach was wrong and I did not use ASCII codes at all.
Sorry for the late replay I had a half-day off today and made a new post for the complete code
there is no errors but the prgram is not working proberly ( this is an update of old post )
I wrote the program, and it is working with no errors But it is not giving me the results I wanted
My only problem is when I read a character how to check its ASCII and store it.
#include <stdio.h>
#include <string.h>
int main()
{
char dictionary[300];
char ch, temp1, temp2;
FILE *test;
test=fopen("HW2.txt","r");
for(int i=0;i<2000;i+=1)
{ ch=fgetc(test);
printf("%c",ch);
}
}
If we are talking about plain ASCII, values goes from 0 to 127, your table shoud look like:
int dictionary[128] = {0};
Regarding your question:
how to check its ASCII and store it
Consider a char being a tiny int, they are interchangeable and you don't need any conversion.
fgetc wants an int in order to handle EOF, and trying to read 2000 characters from a file containing less than 2000 bytes can have very bad consequences, to read the whole file:
int c;
while ((c = fgetc(test)) != EOF)
{
if ((c > 0) && (c < 128))
{
dictionary[c]++;
}
}
for (int i = 1; i < 128; i++)
{
if (dictionary[i] > 0)
{
printf("%c appeared %d times\n", i, dictionary[i]);
}
}
EDIT:
Rereading, I see that you want to store words, not chars, ok, then it's a bit more difficult but nothing terrible, do not limit yourself to 300 words, use dynamic memory:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// A struct to hold the words and the
// number of times it appears
struct words
{
size_t count;
char *word;
};
int main(void)
{
FILE *file;
file = fopen("HW2.txt", "r");
// Always check the result of fopen
if (file == NULL)
{
perror("fopen");
exit(EXIT_FAILURE);
}
struct words *words = NULL;
size_t nwords = 0;
char *word = NULL;
size_t nchars = 1;
size_t i;
int c;
// while there is text to scan
while ((c = fgetc(file)) != EOF)
{
if (isspace(c))
{
if (word != NULL)
{
// Search the word in the table
for (i = 0; i < nwords; i++)
{
// Found, increment the counter
if (strcmp(word, words[i].word) == 0)
{
words[i].count++;
free(word);
break;
}
}
// Not found, add the word to the table
if (i == nwords)
{
struct words *temp;
temp = realloc(words, sizeof(*temp) * (nwords + 1));
if (temp == NULL)
{
perror("realloc");
exit(EXIT_FAILURE);
}
words = temp;
words[nwords].word = word;
words[nwords].count = 1;
nwords++;
}
// Prepare the next word
word = NULL;
nchars = 1;
}
}
else
{
char *temp;
temp = realloc(word, nchars + 1);
if (temp == NULL)
{
perror("realloc");
exit(EXIT_FAILURE);
}
word = temp;
word[nchars - 1] = (char)c;
word[nchars++] = '\0';
}
}
for (i = 0; i < nwords; i++)
{
printf("%s appeared %zu times\n", words[i].word, words[i].count);
free(words[i].word);
}
free(words);
fclose(file);
return 0;
}
In C, characters are, essentially, their ASCII code (or rather, their char or unsigned char value). So once you read a character, you have its ASCII code already.
However, fgetc() doesn't always return the character it read for you; it may fail, for which reason it returns an int, not an unsigned char, which will be -1 in case of failure.
So:
You need to define an int variable to take the result of fgetc().
If it's not EOF, you can cast the result back into a unsigned char. That's your character, and it's ASCII value, at the same time.
PS - I'm ignoring non-ASCII characters, non-Latin languages etc. (But C mostly ignores them in its basic standard library functions too.)

Trouble reading a .txt file and storing into an array

I've been given a .txt file with a specific structure: each line has a String with 5 characters but with a random number of lines, and we should read the file and store it as we want.
I've tried doing it with a linked list and it worked just fine but as the size of the file grew up, the time it took to execute was too long. Since then i've been trying to store the Strings into an array of strings, so everything would be stored contiguously in memory. When executing, i get a segmentation fault error and i have no idea why. The code goes as follows:
int nLines (char *path)
{
int answer = 0;
FILE* fp;
fp = fopen(path,"r");
char line[6];
while (fgets(line, sizeof(line),fp))
{
answer++;
}
return answer;
}
int main (int argc, char *argv[])
{
FILE* fp;
fp = fopen(argv[1], "r");
int numberLines = nLines(argv[1]);
char **storage = malloc(numberLines * 6 * sizeof(char));
if(storage != NULL)
{
int i = 0;
char line [6];
while (fgets(line, sizeof(line),fp))
{
strcpy(storage[i], line);
i++;
}
}
free(storage);
}
The first function is supposed to return the number of lines there is in the file. With this information, i'm trying to allocate memory equal to the number of strings * the size of each string since i know before hand this value. I'm imagining the problem comes from the line:
char **storage = malloc (numberLines * 6 *sizeof(char));
I haven't touched C in a long time and i'm kinda rusty with the whole pointers and memory stuff. Can someone help please. Thank you!
your allocation is wrong
int main (int argc, char *argv[])
{
FILE* fp;
fp = fopen(argv[1], "r");
size_t numberLines = 0;
char **storage = NULL;
char line [8];
while (fgets(line, sizeof(line),fp))
{
storage = realloc(storage, (numberLines + 1) * sizeof(*storage));
storage[numberLines] = malloc(8);
strcpy(storage[numlines++], line);
}
/* ... */
}
you need to allocate space for the pointers, then space for the strings. It is demo only and you should implement the correct error handling (memory and file).
If one wants to truly have an on-line algorithm, one isn't going to have the number of lines available. The idiomatic way to have a contiguous dynamic container is to reallocate geometrically increasing capacity, like vector or ArrayList. C doesn't have that type built-in, but it's worth the extra code if one uses it a lot. For example, this reads from stdin until EOF and uses a Fibonacci sequence as it's capacities.
#include <stddef.h>
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
/** One line of maximum 5 `char` plus 1 `NUL`. */
struct Line { char str[6]; };
/** A dynamic line array. */
struct LineArray {
struct Line *data; /* data -> (c0 < c1 || c0 == c1 == max_size) */
size_t capacity, next_capacity; /* !data -> !size, data -> size<=capacity */
size_t size;
};
/** Ensures `min_capacity` of `a`. Return success, otherwise, `errno` will be
set: `realloc` or `ERANGE` -- tried allocating more then can fit in `size_t`
or `realloc` doesn't follow [IEEE Std 1003.1-2001
](https://pubs.opengroup.org/onlinepubs/009695399/functions/realloc.html). */
static int reserve(struct LineArray *const a, const size_t min_capacity) {
size_t c0, c1;
struct Line *data;
const size_t max_size = (size_t)-1 / sizeof(struct Line *);
assert(a);
if(!a->data) {
if(!min_capacity) return 1;
c0 = 8, c1 = 13;
} else {
if(min_capacity <= a->capacity) return 1;
c0 = a->capacity, c1 = a->next_capacity;
}
if(min_capacity > max_size) return errno = ERANGE, 0;
assert(c0 < c1); /* Fibonacci: c0 ^= c1, c1 ^= c0, c0 ^= c1, c1 += c0. */
while(c0 < min_capacity) {
size_t temp = c0 + c1; c0 = c1; c1 = temp;
if(c1 > max_size || c1 < c0) c1 = max_size;
}
if(!(data = realloc(a->data, c0 * sizeof *a->data)))
{ if(!errno) errno = ERANGE; return 0; }
a->data = data;
a->capacity = c0;
a->next_capacity = c1;
return 1;
}
/** Adds one to the size of `a` and returns it (`push_back`.) Exceptional
return null and `errno` is `realloc` or `ERANGE`. */
static struct Line *new_line(struct LineArray *const a) {
assert(a);
if(a->size >= (size_t)-1) { errno = ERANGE; return 0; } /* Unlikely. */
if(!reserve(a, a->size + 1)) return 0; /* (Less) unlikely. */
return a->data + a->size++;
}
/** Destructor. */
static void linearray_(struct LineArray *const a) {
assert(a);
free(a->data);
a->data = 0, a->capacity = a->next_capacity = a->size = 0;
}
#include <string.h>
#include <stdio.h>
int main(void)
{
struct LineArray storage = { 0, 0, 0, 0 };
struct Line *s, *s_end;
size_t l = 0, line_len;
char line[7] = "";
int success = EXIT_FAILURE;
/* `line` must be capable of storing the "*[,5]\n\0". */
assert(sizeof line == sizeof ((struct Line *)0)->str + 1);
while (fgets(line, sizeof line, stdin))
{
l++;
line_len = strlen(line);
assert(line_len && line_len < sizeof line);
/* Too long. */
if(line[line_len - 1] != '\n') { errno = ERANGE; goto catch; }
/* Cut off the trailing new-line. */
line[line_len-- - 1] = '\0';
/* Store `line`. */
if(!(s = new_line(&storage))) goto catch;
strcpy(s->str, line);
}
if(ferror(stdin)) goto catch;
/* Print all. */
for(s = storage.data, s_end = s + storage.size; s < s_end; s++)
printf("stored: %s\n", s->str);
success = EXIT_SUCCESS;
goto finally;
catch:
perror("Error");
fprintf(stderr, "On line %lu: \"%s\".\n", (unsigned long)l, line);
finally:
linearray_(&storage);
return success;
}

Read tab delimited data to array in C

I have an input file in text format that looks like:
G: 5 10 20 30
C: 24 49 4.0 30.0
I'd like to set each of these to an array, array, respectively. I saw from this answer reading input parameters from a text file with C, a way to read some of the values, but how would I get the arrays G and C?
EDIT:
If I removed G:, and C: from the .txt file I could just run a for loop.
double *conc = (double*)malloc(properConfigs*sizeof(double));
double *G = (double*)malloc(properConfigs*sizeof(double));
for (int i=0;i<properConfigs;i++)
fscanf(inputfile,"%lf", &G[i]);
for (int i=0;i<properConfigs;i++)
fscanf(inputfile,"%lf", &conc[i]);
This would work, but I'd like to be able to account for someone saving the .txt file in a different order or at some point adding more rows (with different parameters).
I am not a fan of scanf, and would strongly encourage you to parse the line yourself. If you insist on using scanf, I recommend using the sscanf variant for this so you can check the line beforehand to see which array to write. I'm not sure why you're using named arrays at all, though. C is not very good at introspection, and you can make your program more flexible without trying to tie your input to particular symbols. Something like:
#include <stdio.h>
#include <stdlib.h>
#define properConfigs 4
void *Malloc(size_t s);
int
main(int argc, char **argv)
{
FILE *fp = argc > 1 ? fopen(argv[1],"r") : stdin;
double *G = Malloc( properConfigs * sizeof *G );
double *C = Malloc( properConfigs * sizeof *G );
int line_count = 0;
char line[256];
if( fp == NULL ) {
perror(argv[1]);
return 1;
}
while( line_count += 1, fgets( line, sizeof line, fp ) != NULL ) {
double *target = NULL;
switch(line[0]) {
case 'G': target = G; break;
case 'C': target = C; break;
}
if( target == NULL || 4 != sscanf(
line, "%*s%lf%lf%lf%lf",
target, target+1, target+2, target+3)) {
fprintf(stderr, "Bad input on line %d\n", line_count);
}
}
for(int i=0; i < 4; i += 1 ) {
printf ("G[%d] = %g\tC[%d] = %g\n", i, G[i], i, C[i]);
}
return ferror(fp);
}
void *Malloc(size_t s) {
void *r = malloc(s);
if(r == NULL) {
perror("malloc");
exit(EXIT_FAILURE);
}
return r;
}
Looks like your issue is atof() in c discards any white space after the first valid number. If you want to get all of the numbers you will have to split tmpstr2 and do each element separately in atof().
You can use strtok to split it into tokens then use atof() on each.
char temp[];
char *nums;
nums = strtok(temp, " \t");
int count = 0;
while (nums != NULL)
{
G[count] = atof(chrs);
nums = strtok(NULL, " \t");
count++;
}
Of course that is if you know before hand how many numbers you are going to get.
View this article for more info: Split string with delimiters in C

Why is my Program is Returning (null) and Gibberish rather than the Intended Output

Windows NT crashed.
I am the Blue Screen of Death.
No one hears your screams.
I am writing a program in which I have to read in haikus from a file, after which I must store all of the five syllable lines into one array and all of the seven syllable lines into another. I have to use an array of char* pointers to store each set of lines. I also can only allocate enough space at each pointer for the actual length of the string. As a result, for the haiku above, I should malloc 20, 31, and 28 bytes for those three lines, respectively (including the \0).
Once all the lines have been read in from the file and stored into the appropriate arrays, I must use a random number generator to pick a set of three lines to print to the screen.
#include <stdio.h>
#include <ctype.h>
#include <math.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
int main(int argc, char **argv)
{
if (argc != 2)
{
printf("This number of arguments is incorrect.");
return 1;
}
FILE *oldfile, *newfile;
char const *newfilename = "myhaiku.txt";
int linecntr = 0, fivesyllines = 0, svnsyllines = 0;
int freearr, newhaiku;
// char **fivesylptrs = malloc(200 * sizeof(char *));
// char **svnsylptrs = malloc(200 * sizeof(char *));
char *fivesylptrs[200], *svnsylptrs[100];
char line[129];
srand(time(NULL));
/* for (fivesyllines = 0; fivesyllines < 200; fivesyllines++)
{
fivesylptrs[fivesyllines] = (char *) malloc(129 * sizeof(char));
}
for (svnsyllines = 0; svnsyllines < 200; svnsyllines++)
{
svnsylptrs[svnsyllines] = (char *) malloc(129 * sizeof(char));
} */
oldfile = fopen(argv[1], "r");
newfile = fopen(newfilename, "w");
if (oldfile)
{
while (fgets(line, 129, oldfile) != NULL)
{
linecntr++;
if ((linecntr % 2) != 0)
{
// printf("%s", line);
fivesylptrs[fivesyllines] = (char *)malloc(129 * sizeof(char));
if (fivesylptrs[fivesyllines] == NULL)
{
printf("The memory for the five syllable strings wasn't allocated properly.\n");
}
strcpy(fivesylptrs[fivesyllines], line);
// printf("%s", fivesylptrs[fivesyllines]);
fivesylptrs[fivesyllines] = fivesylptrs[fivesyllines + 1];
// fivesyllines++;
}
else if ((linecntr % 2) == 0 && line[0] != '\n')
{
// printf("%s", line);
svnsylptrs[svnsyllines] = (char *)malloc(129 * sizeof(char));
if (svnsylptrs[svnsyllines] == NULL)
{
printf("The memory for the seven syllable strings wasn't allocated properly.\n");
}
strcpy(svnsylptrs[svnsyllines], line);
// printf("%s", svnsylptrs[svnsyllines]);
svnsylptrs[svnsyllines] = svnsylptrs[svnsyllines + 1];
// svnsyllines++;
}
}
}
else
{
printf("This file could not be opened, please try again.\n");
}
for (newhaiku = 0; newhaiku < 10; newhaiku++)
{
printf("%s", fivesylptrs[(rand() % 200)]);
printf("%s", svnsylptrs[(rand() % 100)]);
printf("%s", fivesylptrs[(rand() % 200)]);
}
fclose(oldfile);
fclose(newfile);
for (freearr = 0; freearr < 200; freearr++)
{
free(fivesylptrs[freearr]);
}
for (freearr = 0; freearr < 100; freearr++)
{
free(svnsylptrs[freearr]);
}
return 0;
}
I have tried quite a few different techniques to resolve the errors that I'm pretty sure I'm having with malloc, but I can't figure out exactly what I'm doing incorrectly. I would really appreciate any and all help!
svnsyllines and fivesyllines are not incremented so you allocate svnsylptrs and fivesylptrs at index 0 multiple times (leak). The rest of the array is never allocated even though you access it.
Two issues here:
Array indices for the arrays svnsylptrs and fivesylptrs are not incremented in the respective blocks.
The size of the array svnsylptrs is 200. But the for loop at the end only frees the first 100 elements. This will cause a memory leak.
for (freearr = 0; freearr < 100; freearr++) //This should check upto 200
{
free(svnsylptrs[freearr]);
}

Resources