I want to compare 2 files for identical lines: mytab2411.txt(15,017,210 bytes in size) and shadow.txt (569 bytes in size) but when I compiled this code and ran the program, I get a segmentation fault. I know that it's because the "mytab2411.txt" file exceeds the size of "char buf" but how do I go about solving this problem without overflowing the buffer?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
int cmp(const void * s1, const void * s2)
{
return strcasecmp(*(char **)s1, *(char **)s2);
}
int cmp_half(const char * s1, const char * s2)
{
int i;
for (i = 0; i < 3; i++)
{
int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
if (res != 0) return res;
}
return 0;
}
char * line[1024];
int n = 0;
int search(const char * s)
{
int first, last, middle;
first = 0;
last = n - 1;
middle = (first+last)/2;
while( first <= last )
{
int res = cmp_half(s, line[middle]);
if (res == 0) return middle;
if (res > 0)
first = middle + 1;
else
last = middle - 1;
middle = (first + last)/2;
}
return -1;
}
int main()
{
FILE * f1, * f2;
char * s;
char buf[1024*1024], text[1024];
f1 = fopen("shadow.txt", "rt");
f2 = fopen("mytab2411.txt", "rt");
s = buf;
while (fgets(s, 1024, f2) != NULL)
{
line[n] = s;
s = s+strlen(s)+1;
n++;
}
qsort(line, n, sizeof(char *), cmp);
while (fgets(text, 1024, f1) != NULL)
{
text[strlen(text)-1] = 0;
int idx = search(text);
if (idx >= 0)
{
printf("%s matched %s\n", text, line[idx]);
}
else
{
printf("%s not matched\n", text);
}
}
return 0;
}
Your method assumes that each line in the file is 1024 bytes long. In practice the lines can be up to 1024 bytes, but most lines are much shorter. Use strdup or malloc to allocate memory for each line based on line's length.
Store the lines in dynamically allocated arrays. This is about 15 MB of data and it should not be a problem unless there are resource limitations.
int main(void)
{
char buf[1024];
char **arr1 = NULL;
char **arr2 = NULL;
int size1 = 0;
int size2 = 0;
FILE * f1, *f2;
f1 = fopen("shadow.txt", "r");
f2 = fopen("mytab2411.txt", "r");
while(fgets(buf, 1024, f1))
{
size1++;
arr1 = realloc(arr1, sizeof(char*) * size1);
arr1[size1 - 1] = strdup(buf);
}
while(fgets(buf, 1024, f2))
{
size2++;
arr2 = realloc(arr2, sizeof(char*) * size2);
arr2[size2 - 1] = strdup(buf);
}
for(int i = 0; i < size1; i++)
for(int j = 0; j < size2; j++)
{
if(strcmp(arr1[i], arr2[j]) == 0)
printf("match %s\n", arr1[i]);
}
return 0;
}
Related
I attempt to malloc char** to store string, and free this, but I got this error. I can't understand why. The steps are as follows:
1:
char **pid_array = (char **)malloc(sizeof(char *) * MAX_LEN);
2:
pid_array[0] = (char *)malloc(sizeof(char) * SINGLE_LEN * MAX_LEN);
3:
free(pid_array); free(pid_array[0]);
The detailed code follows:
#include <assert.h>
#include <ctype.h>
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define MAX_LEN 1000
#define SINGLE_LEN 10
int isPid(char *str) {
int len = strlen(str);
for (int i = 0; i < len; i++) {
if (isdigit(str[i]) == 0) {
return 1;
}
}
return 0;
}
void getFileName(char *dir_path, char *pid_array[], int *len) {
DIR *dir = opendir(dir_path);
if (dir == NULL) {
fprintf(stderr, "path open failed!\n");
exit(EXIT_FAILURE);
}
chdir(dir_path);
struct dirent *ent;
int i = 0;
while ((ent = readdir(dir)) != NULL) {
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
continue;
}
int size = strlen(ent->d_name);
if (isPid(ent->d_name) == 0) {
pid_array[i++] = ent->d_name;
}
}
*len = i;
closedir(dir);
}
int main(int argc, char *argv[]) {
int pflag, nflag, vflag;
pflag = 0;
nflag = 0;
vflag = 0;
int opt;
while ((opt = getopt(argc, argv, "pvn")) != -1) {
switch (opt) {
case 'p':
pflag = 1;
break;
case 'v':
vflag = 1;
break;
case 'n':
nflag = 1;
break;
}
}
printf("pflag=%d; nflag=%d; vflag=%d; optind=%d\n", pflag, nflag, vflag, optind);
char **pid_array = (char **)malloc(sizeof(char *) * MAX_LEN);
pid_array[0] = (char *)malloc(sizeof(char) * SINGLE_LEN * MAX_LEN);
for(int i=0; i < MAX_LEN; i++){
pid_array[i]=pid_array[i-1]+SINGLE_LEN;
}
/*
for (int i = 0; i < MAX_LEN; i++) {
pid_array[i] = (char *)malloc(sizeof(char) * SINGLE_LEN);
assert(pid_array[i] != NULL);
}
*/
for (int i = 0; i < MAX_LEN; i++) {
free(pid_array[i]);
}
int *pid_array_len = (int *)malloc(sizeof(int));
getFileName("/proc", pid_array, pid_array_len);
for (int i = 0; i < *pid_array_len; i++) {
printf("%d\n", atoi(pid_array[i]));
}
free(pid_array);
free(pid_array[0]);
free(pid_array_len);
return 0;
}
The error is follow:
error
The steps as noted are not correct.
if pid_array is char** then
*pid_array is char*
**pid_array is char
And you need to construct them as such. And free them in the reverse order. If you intend to have a vector of pointers at pid_array then your case is very very common: every C program gets one for free. The main prototype can be declared as
int main(int argc, char**argv);
The system knows how many char* to pass to the program, but in your case maybe the simplest (safest) way is to use encapsulation and build a block like this
typedef struct
{
size_t argc;
char** argv;
} Block;
I will let an example below.
a way to free the block properly
If you insist in using just the pointer you can easily adapt this. Anyway a possible implementation is
Block* delete (Block* blk)
{
if (blk == NULL) return NULL;
fprintf(
stderr, "Deleting block of %llu strings\n",
blk->argc);
for (int i = 0; i < blk->argc; i += 1)
free(blk->argv[i]);
free(blk->argv);
free(blk);
fprintf(stderr, "Deleted...\n");
return NULL;
}
The reason to return a pointer is to create a simple way to assure the pointer is invalidated as in
my_block = delete (my_block);
In the example
A block is created
is filled with strings of random size
the strings are printed
the block is deleted
main for the example
int main(void)
{
srand(220630);
const int size = MAX_LEN;
Block* my_block = build(size);
fill(my_block);
show(my_block, "a vector of numbered strings");
my_block = delete (my_block);
return 0;
}
the output
a vector of numbered strings
25 strings:
1 "#000#k"
2 "#001#swfsxji"
3 "#002#cn"
4 "#003#akmxhksqgb"
5 "#004#dqnegzryobmhucldx"
6 "#005#iiuqddvuvukkrs"
7 "#006#jxvlsolocgnvgjcrwh"
8 "#007#zylbzumyhmeswxuno"
9 "#008#ex"
10 "#009#ixinxqyxqydnswb"
11 "#010#ylxelydzqgs"
12 "#011#absdfpdjvgwhxcmzekr"
13 "#012#sceqzvmjskkrmszpth"
14 "#013#n"
15 "#014#rsmkrqhssjniqgphjp"
16 "#015#dgojvpflydevwudvv"
17 "#016#qbmaolgrskkqghhkgb"
18 "#017#uzsunopqpdawg"
19 "#018#rvdeaiooylywf"
20 "#019#zfejmgqxu"
21 "#020#fjubcmllylxqahvbfh"
22 "#021#zwanyivra"
23 "#022#vooropiugmuya"
24 "#023#js"
25 "#024#qzecia"
Deleting block of 25 strings
Deleted...
The complete C code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LEN 25
typedef struct
{
size_t argc;
char** argv;
} Block;
Block* build(size_t ttl);
Block* delete (Block* blk);
int fill(Block* bl);
int show(Block* blk, const char* title);
int main(void)
{
srand(220630);
const int size = MAX_LEN;
Block* my_block = build(size);
fill(my_block);
show(my_block, "a vector of numbered strings");
my_block = delete (my_block);
return 0;
}
Block* build(size_t ttl)
{
if (ttl == 0) return NULL;
Block* blk = (Block*)malloc(sizeof(Block));
if (blk == NULL) return NULL;
blk->argc = (ttl > MAX_LEN) ? MAX_LEN : ttl;
blk->argv = (char**)malloc(ttl * sizeof(char*));
if (blk->argv == NULL) return NULL;
for (int i = 0; i < ttl; i += 1)
*(blk->argv + i) = NULL;
return blk;
}
int fill(Block* bl)
{
const char prefix[] = "#nnn#"; // common prefix
char buffer[30] = {0};
char data[20] = {0};
for (int i = 0; i < bl->argc; i += 1)
{
int rest = 1 + rand() % 19;
for (int j = 0; j < rest; j += 1)
data[j] = 'a' + rand() % 26; // a single letter
data[rest] = 0; // terminates string
int res = sprintf(buffer, "#%03d#%s", i, data);
bl->argv[i] = (char*)malloc(strlen(buffer) + 1);
strcpy(bl->argv[i], buffer);
}
return 0;
}
int show(Block* blk, const char* title)
{
if (title != NULL) printf("%s\n", title);
printf("%llu strings:\n", blk->argc);
for (int i = 0; i < MAX_LEN; i += 1)
printf("%d\t \"%s\"\n", 1 + i, *(blk->argv + i));
printf("\n");
return 0;
}
Block* delete (Block* blk)
{
if (blk == NULL) return NULL;
fprintf(
stderr, "Deleting block of %llu strings\n",
blk->argc);
for (int i = 0; i < blk->argc; i += 1)
free(blk->argv[i]);
free(blk->argv);
free(blk);
fprintf(stderr, "Deleted...\n");
return NULL;
}
// https://stackoverflow.com/questions/72809939/
// how-do-i-use-free-properly-to-free-memory-when
// -using-malloc-for-char
I have the following inputs:
23 34 43 56 45
100 73
I want to input these two strings in different arrays integer arrays using pointer since the input size is apriori unknown.
I have written the following code, but I cannot get the integers into any of the two arrays.
#include <stdio.h>
#include <stdlib.h>
#define lent(x) (sizeof(x) / sizeof(x[0]))
#define Malloc(n, type) (type *)malloc((unsigned)((n) * sizeof(type)))
#define Realloc(ptr, n, type) (type *)realloc(ptr, (n) * sizeof(type))
void getInt_Stream(int *ptr)
{
int i = 0;
ptr = Malloc(i+1, int);
char c;
scanf("%c", &c);
while ((c != EOF) && (c != '\n'))
{
if (c >= '0' && c <= '9')
{
ptr[i] = ptr[i] * 10 + (c - '0');
}
else if (c == ' ')
{
i++;
ptr = Realloc(ptr, i+1,int);
ptr[i] = 0;
}
scanf("%c", &c);
}
}
int main()
{
int *arr1, *arr2;
getInt_Stream(arr1);
getInt_Stream(arr2);
int n1 = lent(arr1);
for (int i = 0; i < n1; i++)
{
printf(" arr1[%d] =%d\n", i, *(arr1+i));
}
return 0;
}
Also, I am getting some errors and warnings when I compile the program using
gcc prog.c -o prog -Wall -Wextra.
Please help with some hints. Thanks in advance.
Would you please try the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* get integer values from the file stream
* return the pointer to the array of integers
* the size of the array is stored in *n
*/
int *
getInt_Stream(FILE *fp, int *n)
{
char str[BUFSIZ]; // line buffer of the input stream
char *tk; // pointer to each token
char delim[] = " "; // delimiter of the tokens
int *arr = NULL; // array of intergers
int count = 0; // token counter
if (NULL == (fgets(str, BUFSIZ, fp))) {
// read a line and assign str
perror("fgets");
exit(1);
}
for (tk = strtok(str, delim); tk != NULL; tk = strtok(NULL, delim)) {
// get token one by one
if (NULL == (arr = realloc(arr, (count + 1) * sizeof(int)))) {
// allocate memory for the array
perror("realloc");
exit(1);
}
arr[count] = (int)strtol(tk, (char **)NULL, 10);
// assign the array element to int
count++;
}
*n = count; // number of the elements
return arr; // pointer to the array
}
int
main(int argc, char *argv[])
{
int *arr1, *arr2;
int n1, n2;
char *filename = argv[1];
FILE *fp;
int i;
if (argc != 2) {
fprintf(stderr, "usage: %s file.txt\n", argv[0]);
exit(1);
}
if (NULL == (fp = fopen(filename, "r"))) {
perror(filename);
exit(1);
}
arr1 = getInt_Stream(fp, &n1);
for (i = 0; i < n1; i++) {
printf("arr1[%d] = %d\n", i, arr1[i]);
}
arr2 = getInt_Stream(fp, &n2);
for (i = 0; i < n2; i++) {
printf("arr2[%d] = %d\n", i, arr2[i]);
}
free(arr1);
free(arr2);
fclose(fp);
return 0;
}
Output for the provided file:
$ ./a.out file.txt
arr1[0] = 23
arr1[1] = 34
arr1[2] = 43
arr1[3] = 56
arr1[4] = 45
arr2[0] = 100
arr2[1] = 73
I need to sort ints from a file in ascending order and print them to the standard output. I can't modify the structure of the file.
The txt file looks like this:
41
65
68
35
51
...(one number in a row)
My program works just fine for small files, but I have to optomize it for larger files (like 3 million numbers) using malloc, but don't know exactly where and how. I'd like to ask for help in this. (I'm a beginner)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER 100000
int sort(int size, int arr[])
{
for (int i = 0; i < size - 1; i++)
{
for (int j = 0; j < size - i - 1; j++)
{
if (arr[j] > arr[j + 1])
{
int swap = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = swap;
}
}
}
}
int main(int argc, char *argv[])
{
char *filename = argv[1];
char s[20];
if (argc == 1)
{
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
int arr[BUFFER];
int i = 0;
int size = 0;
while ((fgets(s, BUFFER, fp)) != NULL)
{
s[strlen(s) - 1] = '\0';
arr[i] = atoi(s);
++i;
++size;
}
fclose(fp);
sort(size, arr);
for (int i = 0; i < size; ++i)
{
printf("%d\n", arr[i]);
}
return 0;
}
Your program could look like this:
#include <stdlib.h>
#include <stdio.h>
static int numcompar(const void *a, const void *b) {
const int *x = a;
const int *y = b;
// it is tempting to return *x - *y; but undefined behavior lurks
return *x < *y ? -1 : *x == *y ? 0 : 1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
// TODO: handle error
abort();
}
char *filename = argv[1];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
abort();
}
// this will be our array
// note realloc(NULL is equal to malloc()
int *arr = NULL;
size_t arrcnt = 0;
// note - I am using fscanf for simplicity
int temp = 0;
while (fscanf(fp, "%d", &temp) == 1) {
// note - reallocating the space each number for the next number
void *tmp = realloc(arr, sizeof(*arr) * (arrcnt + 1));
if (tmp == NULL) {
free(arr);
fclose(fp);
abort();
}
arr = tmp;
// finally assignment
arr[arrcnt] = temp;
arrcnt++;
}
fclose(fp);
// writing sorting algorithms is boring
qsort(arr, arrcnt, sizeof(*arr), numcompar);
for (size_t i = 0; i < arrcnt; ++i) {
printf("%d\n", arr[i]);
}
free(arr);
}
Note that reallocating for one int at a time is inefficient - realloc is usually a costly function. The next step would be to keep the number of the size of the array and "used" (assigned to) elements of the array separately and reallocate the array by a ratio greater then 1. There are voices that prefer to use the golden ratio number in such cases.
To read an undetermined number of entries from the input file, you can allocate and reallocate an array using realloc() as more entries are read. For better performance it is recommended to increase the allocated size by a multiple instead of increasing linearly, especially one entry at a time.
Your sorting routine is inappropriate for large arrays: insertion sort has quadratic time complexity, so it might take a long time for 3 million items, unless they are already sorted. Use qsort() with a simple comparison function for this.
Here is a modified program:
#include <stdio.h>
#include <stdlib.h>
static int compare_int(const void *pa, const void *pb) {
int a = *(const int *)pa;
int b = *(const int *)pb;
// return -1 if a < b, 0 if a == b and +1 if a > b
return (a > b) - (a < b);
}
int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
char buf[80];
size_t n = 0, size = 0;
int *array = NULL;
/* read the numbers */
while (fgets(buf, sizeof buf, fp)) {
if (n == size) {
/* increase size by at least 1.625 */
size_t newsize = size + size / 2 + size / 8 + 32;
int *newarray = realloc(array, newsize * sizeof(*array));
if (newarray == NULL) {
printf("cannot allocate space for %zu numbers\n", newsize);
free(array);
fclose(fp);
exit(1);
}
array = newarray;
size = newsize;
}
array[n++] = strtol(buf, NULL, 10);
}
fclose(fp);
/* sort the array */
qsort(array, n, sizeof(*array), compare_int);
for (size_t i = 0; i < n; i++) {
printf("%d\n", array[i]);
}
free(array);
return 0;
}
I'm allocating memory for my int *occurrences int *wordCounts and char **uniqueWords pointers and then at the end of the function that allocates the memory, i free them. However, when i compile the program i get an double free or corruption (!prev) aborting error. Is it caused by malloc,free or could it be due to how i initialize them inside the for loop ?
PS: I'm talking about the sortedCount() method, located towards the end
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#define MAX_STRING_SIZE 512 /* each line in the file can have up to 512 chars */
void populateWordsArray(int);
void reverse(int);
void first(int);
void middle(int);
void last(int);
int count(int, char*, int);
void sortedCount(int);
void determineUniqueWords(int *,char **, int);
void *malloc_or_end(size_t);
void* malloc_or_end(size_t sz) {
void *pointer;
pointer = malloc(sz);
if(pointer == NULL) {
printf("Out of memory, terminating.\n");
exit(-1);
}
return pointer;
}
/* turn into local */
FILE *file;
char **wordList;
void determineUniqueWords(int *occurrences, char **word, int N) {
int i = 0;
int j = 0;
for(i = 0; i < N; i++) {
if(occurrences[i] < 1) {
continue;
}
for(j = i + 1; j < N; j++) {
if(occurrences[j] == 1 && (strcmp(word[i],word[j])) == 0) {
occurrences[i]++;
occurrences[j] = 0;
}
}
}
}
/**
* Function populateWordsArray: reads N words from
* the given file and populates the wordList array with them.
* Has one argument: int N - the number of words to read.
* */
void populateWordsArray(int N) {
int i = 0;
while(i < N && (fscanf(file,"%s",wordList[i]) == 1)) { /* fscanf returns the number of successfully read items. If it's not 1, the read failed. Same as checking if fscanf reads the eof char. */
i++;
}
}
/**
* Function reverse: prints the words of the
* text file in reverse order.
* */
void reverse(int N) {
int i = 0;
for(i = N-1; i >= 0; i--) {
if(i == 0) {
printf("%s \n",wordList[i]);
} else if(strcmp(wordList[i],"") == 0) { /* improve this in main-> memory allocation */
continue;
}else {
printf("%s ",wordList[i]);
}
}
return;
}
/**
* Function first: Prints the first char of each
* word in the file.
* */
void first(int N) {
char firstChar;
int i = 0;
for(i = 0; i < N; i++) {
firstChar = *wordList[i];
printf("%c",firstChar);
}
printf("\n");
return;
}
/**
* Function middle: Prints the middle char of each word
* from the given file.
* */
void middle(int N) {
int middleIndex = 0;
int i = 0;
char midChar;
for(i = 0; i < N; i++) {
if((strlen(wordList[i]) % 2) == 0) { /* artios */
middleIndex = ((strlen(wordList[i]) / 2) - 1);
midChar = wordList[i][middleIndex];
}
else { /* peritos */
middleIndex = (int) ceil((strlen(wordList[i]) / 2));
midChar = wordList[i][middleIndex];
}
printf("%c",midChar);
}
printf("\n");
return;
}
/**
* Function last: Prints the last char of each
* word from the given file.
* */
void last(int N) {
int i = 0;
char lastChar;
int lastPos;
for(i = 0; i < N; i++) {
lastPos = strlen(wordList[i]) - 1;
lastChar = wordList[i][lastPos];
printf("%c",lastChar);
}
printf("\n");
return;
}
/**
* Function count: Prints the number of times
* that the selected word is found inside the N first words
* of the file.
* */
int count(int N, char *word, int callID) {
int i = 0;
int count = 0;
for(i = 0; i < N; i++) {
if(strcmp(word,wordList[i]) == 0) {
count++;
}
}
if(callID == 0) { /* if callID == 0 (main called count and we want the output) */
printf("%d",count);
printf("\n");
}
return count;
}
void sortedCount(int N) {
int i,j = 0;
int *occurrences;
int *wordCounts;
char **uniqueWords;
/* mem allocation */
uniqueWords = malloc_or_end(N * sizeof(char*)); /* worst case: every word is unique */
wordCounts = malloc_or_end(N * sizeof(int));
occurrences = malloc_or_end(N * sizeof(int));
/* initialize rootWord and occurrences for the "each word is unique and occurs only once" scenario */
for(i = 0; i < N; i++) {
uniqueWords[i] = malloc_or_end(MAX_STRING_SIZE * sizeof(char));
occurrences[i] = 1;
}
determineUniqueWords(occurrences,wordList,N);
/* populate the wordCounts & uniqueWords "arrays" with the appropriate data in order to sort them successfully */
for(i = 0; i < N; i++) {
if(occurrences[i] > 0) {
wordCounts[i] = count(N,wordList[i],1);
uniqueWords[i] = wordList[i];
}
}
for(i = 0; i < N; i++) {
free(uniqueWords[i]);
}
free(uniqueWords);
free(occurrences);
free(wordCounts);
return;
}
int main(int argc,char *argv[]) { /* argv[1] = op argv[2] = name argv[3] = <word> */
int N = -1;
int i = 0;
int spaceNum,nlNum = -1;
file = fopen(argv[2],"r");
if(file == (FILE *) NULL) { /* check if the file opened successfully */
fprintf(stderr,"Cannot open file\n");
}
fscanf(file,"%d",&N); /* get the N number */
wordList = malloc_or_end(N * sizeof(char *)); /* allocate memory for pointers */
for(i = 0; i < N; i++) {
wordList[i] = malloc_or_end(MAX_STRING_SIZE * sizeof(char)); /* allocate memory for strings */
}
populateWordsArray(N);
if(strcmp(argv[1],"-reverse") == 0) {
reverse(N);
} else if(strcmp(argv[1],"-first") == 0) {
first(N);
} else if(strcmp(argv[1],"-middle") == 0) {
middle(N);
} else if(strcmp(argv[1],"-last") == 0) {
last(N);
} else if((strcmp(argv[1],"-count") == 0) && argv[3] != NULL) {
i = count(N,argv[3],0);
} else if((strcmp(argv[1],"-sorted") == 0) && (strcmp(argv[3],"-count") == 0)) {
sortedCount(N);
} else {
/* i only wish i could print something here */
}
/* End of program operations */
for(i = 0; i < N; i++) {
free(wordList[i]);
}
free(wordList);
fclose(file);
return 0;
}
You are overwriting the value of a pointer to heap memory on line 185:
uniqueWords[i] = wordList[i];
This means that when you free it later, you are actually freeing the allocated rows in wordList. Now you have two problems:
When you free the wordList rows on lines 244-246, it will be a double-free
You are losing your reference to the uniqueWords rows.
Use strcpy to assign to a dynamically-allocated string rather than the = operation.
I'm trying to make a quick and simple signature detection program in C. It should read a binary file (.exe, ELF, a library, etc...) and search for binary data (sometimes strings, sometimes bytes);
I have a simple test program in C:
#include <stdio.h>
#include <unistd.h>
const char *str = "TestingOneTwoThree";
int main()
{
while(1)
{
fprintf(stdout, "%s %ld\n", str, (long)getpid());
sleep(1);
}
}
Here is the horspool algorithm I'm using. I adapted it directly from the wikipedia pseudocode found here: https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define HORSPOOL_COUNT 256
#define BLOCK_SIZE 1024
#define MAX(a, b) a > b ? a : b
ssize_t horspool_find(const char *buf, size_t buflen, const char *egg, size_t egglen)
{
int table[HORSPOOL_COUNT];
ssize_t shift = 0, i, tmp;
for(i = 0; i < HORSPOOL_COUNT; ++i)
{
table[i] = (int)egglen;
}
for(i = 0; i < egglen - 1; ++i)
{
table[(int)egg[i]] = egglen - i - 1;
}
while(shift <= buflen - egglen)
{
i = egglen - 1;
while(buf[shift + i] == egg[i])
{
if(i == 0)
{
return shift;
}
i--;
}
shift += MAX(1, table[(int)buf[shift + egglen - 1]]);
}
return -1;
}
char *readfile(const char *filename, size_t *size)
{
int ch;
size_t used = 0, allocated = 0;
char *buf = NULL, *tmp = NULL;
FILE *f;
if((f = fopen(filename, "rb")) == NULL)
{
if(size) *size = 0;
return perror("fopen"), NULL;
}
while((ch=fgetc(f)) != EOF)
{
if(used >= allocated)
{
allocated += BLOCK_SIZE;
tmp = realloc(buf, allocated);
if(tmp == NULL)
{
free(buf);
if(size) *size = 0;
fclose(f);
return perror("realloc"), NULL;
}
buf = tmp;
}
buf[used++] = (char)ch;
}
fclose(f);
if(size) *size = used;
return realloc(buf, used);
}
ssize_t naivealg_find(const char *buf, size_t buflen, const char *find, size_t findlen)
{
size_t i, j, diff = buflen - findlen;
for(i = 0; i < diff; ++i)
{
for(j = 0; j < findlen; ++j)
{
if(buf[i+j] != find[j])
{
break;
}
}
if(j == findlen)
{
return (ssize_t)i;
}
}
return -1;
}
int main()
{
size_t size;
char *buf = readfile("./a.out", &size);
char *pat = "TestingOneTwoThree";
ssize_t pos1 = horspool_find(buf, size, pat, strlen(pat));
ssize_t pos2 = naivealg_find(buf, size, pat, strlen(pat));
fprintf(stdout, "Offsets: %zd ~ %zd\n", pos1, pos2);
return 0;
}
Output is something along the lines of:
Offsets: -1 ~ 2052
Notes:
The same buffer and "egg" work with the naive search implementation.
The horspool implementation seems to work correctly with normal strings as the buf and egg parameters.
Code was using a signed char and with binary data, from time to time, would index incorrectly with a negative index.
// table[(int)buf[shift + egglen - 1]]
table[(unsigned char )buf[shift + egglen - 1]]
This problem also exists in the the egg pattern.
// table[(int) egg[i]] = egglen - i - 1;
table[(unsigned char) egg[i]] = egglen - i - 1;
Other sign issues occur when buflen < egglen
// while (shift <= buflen - egglen)
// change to avoid underflow
while (shift + egglen <= buflen)
Also consider opening the file in binary and:
ssize_t shift,i; --> size_t shift,i;
int table[HORSPOOL_COUNT]; -- > size_t table[HORSPOOL_COUNT];
Add ()s to #define MAX(a, b) (((a) > (b)) ? (a) : (b))