Why does Horspool not work on binaries? - c

I'm trying to make a quick and simple signature detection program in C. It should read a binary file (.exe, ELF, a library, etc...) and search for binary data (sometimes strings, sometimes bytes);
I have a simple test program in C:
#include <stdio.h>
#include <unistd.h>
const char *str = "TestingOneTwoThree";
int main()
{
while(1)
{
fprintf(stdout, "%s %ld\n", str, (long)getpid());
sleep(1);
}
}
Here is the horspool algorithm I'm using. I adapted it directly from the wikipedia pseudocode found here: https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define HORSPOOL_COUNT 256
#define BLOCK_SIZE 1024
#define MAX(a, b) a > b ? a : b
ssize_t horspool_find(const char *buf, size_t buflen, const char *egg, size_t egglen)
{
int table[HORSPOOL_COUNT];
ssize_t shift = 0, i, tmp;
for(i = 0; i < HORSPOOL_COUNT; ++i)
{
table[i] = (int)egglen;
}
for(i = 0; i < egglen - 1; ++i)
{
table[(int)egg[i]] = egglen - i - 1;
}
while(shift <= buflen - egglen)
{
i = egglen - 1;
while(buf[shift + i] == egg[i])
{
if(i == 0)
{
return shift;
}
i--;
}
shift += MAX(1, table[(int)buf[shift + egglen - 1]]);
}
return -1;
}
char *readfile(const char *filename, size_t *size)
{
int ch;
size_t used = 0, allocated = 0;
char *buf = NULL, *tmp = NULL;
FILE *f;
if((f = fopen(filename, "rb")) == NULL)
{
if(size) *size = 0;
return perror("fopen"), NULL;
}
while((ch=fgetc(f)) != EOF)
{
if(used >= allocated)
{
allocated += BLOCK_SIZE;
tmp = realloc(buf, allocated);
if(tmp == NULL)
{
free(buf);
if(size) *size = 0;
fclose(f);
return perror("realloc"), NULL;
}
buf = tmp;
}
buf[used++] = (char)ch;
}
fclose(f);
if(size) *size = used;
return realloc(buf, used);
}
ssize_t naivealg_find(const char *buf, size_t buflen, const char *find, size_t findlen)
{
size_t i, j, diff = buflen - findlen;
for(i = 0; i < diff; ++i)
{
for(j = 0; j < findlen; ++j)
{
if(buf[i+j] != find[j])
{
break;
}
}
if(j == findlen)
{
return (ssize_t)i;
}
}
return -1;
}
int main()
{
size_t size;
char *buf = readfile("./a.out", &size);
char *pat = "TestingOneTwoThree";
ssize_t pos1 = horspool_find(buf, size, pat, strlen(pat));
ssize_t pos2 = naivealg_find(buf, size, pat, strlen(pat));
fprintf(stdout, "Offsets: %zd ~ %zd\n", pos1, pos2);
return 0;
}
Output is something along the lines of:
Offsets: -1 ~ 2052
Notes:
The same buffer and "egg" work with the naive search implementation.
The horspool implementation seems to work correctly with normal strings as the buf and egg parameters.

Code was using a signed char and with binary data, from time to time, would index incorrectly with a negative index.
// table[(int)buf[shift + egglen - 1]]
table[(unsigned char )buf[shift + egglen - 1]]
This problem also exists in the the egg pattern.
// table[(int) egg[i]] = egglen - i - 1;
table[(unsigned char) egg[i]] = egglen - i - 1;
Other sign issues occur when buflen < egglen
// while (shift <= buflen - egglen)
// change to avoid underflow
while (shift + egglen <= buflen)
Also consider opening the file in binary and:
ssize_t shift,i; --> size_t shift,i;
int table[HORSPOOL_COUNT]; -- > size_t table[HORSPOOL_COUNT];
Add ()s to #define MAX(a, b) (((a) > (b)) ? (a) : (b))

Related

How do I use free() properly to free memory when using malloc for char?

I attempt to malloc char** to store string, and free this, but I got this error. I can't understand why. The steps are as follows:
1:
char **pid_array = (char **)malloc(sizeof(char *) * MAX_LEN);
2:
pid_array[0] = (char *)malloc(sizeof(char) * SINGLE_LEN * MAX_LEN);
3:
free(pid_array); free(pid_array[0]);
The detailed code follows:
#include <assert.h>
#include <ctype.h>
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define MAX_LEN 1000
#define SINGLE_LEN 10
int isPid(char *str) {
int len = strlen(str);
for (int i = 0; i < len; i++) {
if (isdigit(str[i]) == 0) {
return 1;
}
}
return 0;
}
void getFileName(char *dir_path, char *pid_array[], int *len) {
DIR *dir = opendir(dir_path);
if (dir == NULL) {
fprintf(stderr, "path open failed!\n");
exit(EXIT_FAILURE);
}
chdir(dir_path);
struct dirent *ent;
int i = 0;
while ((ent = readdir(dir)) != NULL) {
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
continue;
}
int size = strlen(ent->d_name);
if (isPid(ent->d_name) == 0) {
pid_array[i++] = ent->d_name;
}
}
*len = i;
closedir(dir);
}
int main(int argc, char *argv[]) {
int pflag, nflag, vflag;
pflag = 0;
nflag = 0;
vflag = 0;
int opt;
while ((opt = getopt(argc, argv, "pvn")) != -1) {
switch (opt) {
case 'p':
pflag = 1;
break;
case 'v':
vflag = 1;
break;
case 'n':
nflag = 1;
break;
}
}
printf("pflag=%d; nflag=%d; vflag=%d; optind=%d\n", pflag, nflag, vflag, optind);
char **pid_array = (char **)malloc(sizeof(char *) * MAX_LEN);
pid_array[0] = (char *)malloc(sizeof(char) * SINGLE_LEN * MAX_LEN);
for(int i=0; i < MAX_LEN; i++){
pid_array[i]=pid_array[i-1]+SINGLE_LEN;
}
/*
for (int i = 0; i < MAX_LEN; i++) {
pid_array[i] = (char *)malloc(sizeof(char) * SINGLE_LEN);
assert(pid_array[i] != NULL);
}
*/
for (int i = 0; i < MAX_LEN; i++) {
free(pid_array[i]);
}
int *pid_array_len = (int *)malloc(sizeof(int));
getFileName("/proc", pid_array, pid_array_len);
for (int i = 0; i < *pid_array_len; i++) {
printf("%d\n", atoi(pid_array[i]));
}
free(pid_array);
free(pid_array[0]);
free(pid_array_len);
return 0;
}
The error is follow:
error
The steps as noted are not correct.
if pid_array is char** then
*pid_array is char*
**pid_array is char
And you need to construct them as such. And free them in the reverse order. If you intend to have a vector of pointers at pid_array then your case is very very common: every C program gets one for free. The main prototype can be declared as
int main(int argc, char**argv);
The system knows how many char* to pass to the program, but in your case maybe the simplest (safest) way is to use encapsulation and build a block like this
typedef struct
{
size_t argc;
char** argv;
} Block;
I will let an example below.
a way to free the block properly
If you insist in using just the pointer you can easily adapt this. Anyway a possible implementation is
Block* delete (Block* blk)
{
if (blk == NULL) return NULL;
fprintf(
stderr, "Deleting block of %llu strings\n",
blk->argc);
for (int i = 0; i < blk->argc; i += 1)
free(blk->argv[i]);
free(blk->argv);
free(blk);
fprintf(stderr, "Deleted...\n");
return NULL;
}
The reason to return a pointer is to create a simple way to assure the pointer is invalidated as in
my_block = delete (my_block);
In the example
A block is created
is filled with strings of random size
the strings are printed
the block is deleted
main for the example
int main(void)
{
srand(220630);
const int size = MAX_LEN;
Block* my_block = build(size);
fill(my_block);
show(my_block, "a vector of numbered strings");
my_block = delete (my_block);
return 0;
}
the output
a vector of numbered strings
25 strings:
1 "#000#k"
2 "#001#swfsxji"
3 "#002#cn"
4 "#003#akmxhksqgb"
5 "#004#dqnegzryobmhucldx"
6 "#005#iiuqddvuvukkrs"
7 "#006#jxvlsolocgnvgjcrwh"
8 "#007#zylbzumyhmeswxuno"
9 "#008#ex"
10 "#009#ixinxqyxqydnswb"
11 "#010#ylxelydzqgs"
12 "#011#absdfpdjvgwhxcmzekr"
13 "#012#sceqzvmjskkrmszpth"
14 "#013#n"
15 "#014#rsmkrqhssjniqgphjp"
16 "#015#dgojvpflydevwudvv"
17 "#016#qbmaolgrskkqghhkgb"
18 "#017#uzsunopqpdawg"
19 "#018#rvdeaiooylywf"
20 "#019#zfejmgqxu"
21 "#020#fjubcmllylxqahvbfh"
22 "#021#zwanyivra"
23 "#022#vooropiugmuya"
24 "#023#js"
25 "#024#qzecia"
Deleting block of 25 strings
Deleted...
The complete C code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LEN 25
typedef struct
{
size_t argc;
char** argv;
} Block;
Block* build(size_t ttl);
Block* delete (Block* blk);
int fill(Block* bl);
int show(Block* blk, const char* title);
int main(void)
{
srand(220630);
const int size = MAX_LEN;
Block* my_block = build(size);
fill(my_block);
show(my_block, "a vector of numbered strings");
my_block = delete (my_block);
return 0;
}
Block* build(size_t ttl)
{
if (ttl == 0) return NULL;
Block* blk = (Block*)malloc(sizeof(Block));
if (blk == NULL) return NULL;
blk->argc = (ttl > MAX_LEN) ? MAX_LEN : ttl;
blk->argv = (char**)malloc(ttl * sizeof(char*));
if (blk->argv == NULL) return NULL;
for (int i = 0; i < ttl; i += 1)
*(blk->argv + i) = NULL;
return blk;
}
int fill(Block* bl)
{
const char prefix[] = "#nnn#"; // common prefix
char buffer[30] = {0};
char data[20] = {0};
for (int i = 0; i < bl->argc; i += 1)
{
int rest = 1 + rand() % 19;
for (int j = 0; j < rest; j += 1)
data[j] = 'a' + rand() % 26; // a single letter
data[rest] = 0; // terminates string
int res = sprintf(buffer, "#%03d#%s", i, data);
bl->argv[i] = (char*)malloc(strlen(buffer) + 1);
strcpy(bl->argv[i], buffer);
}
return 0;
}
int show(Block* blk, const char* title)
{
if (title != NULL) printf("%s\n", title);
printf("%llu strings:\n", blk->argc);
for (int i = 0; i < MAX_LEN; i += 1)
printf("%d\t \"%s\"\n", 1 + i, *(blk->argv + i));
printf("\n");
return 0;
}
Block* delete (Block* blk)
{
if (blk == NULL) return NULL;
fprintf(
stderr, "Deleting block of %llu strings\n",
blk->argc);
for (int i = 0; i < blk->argc; i += 1)
free(blk->argv[i]);
free(blk->argv);
free(blk);
fprintf(stderr, "Deleted...\n");
return NULL;
}
// https://stackoverflow.com/questions/72809939/
// how-do-i-use-free-properly-to-free-memory-when
// -using-malloc-for-char

Sort ints from a txt file

I need to sort ints from a file in ascending order and print them to the standard output. I can't modify the structure of the file.
The txt file looks like this:
41
65
68
35
51
...(one number in a row)
My program works just fine for small files, but I have to optomize it for larger files (like 3 million numbers) using malloc, but don't know exactly where and how. I'd like to ask for help in this. (I'm a beginner)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER 100000
int sort(int size, int arr[])
{
for (int i = 0; i < size - 1; i++)
{
for (int j = 0; j < size - i - 1; j++)
{
if (arr[j] > arr[j + 1])
{
int swap = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = swap;
}
}
}
}
int main(int argc, char *argv[])
{
char *filename = argv[1];
char s[20];
if (argc == 1)
{
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
FILE *fp = fopen(filename, "r");
if (fp == NULL)
{
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
int arr[BUFFER];
int i = 0;
int size = 0;
while ((fgets(s, BUFFER, fp)) != NULL)
{
s[strlen(s) - 1] = '\0';
arr[i] = atoi(s);
++i;
++size;
}
fclose(fp);
sort(size, arr);
for (int i = 0; i < size; ++i)
{
printf("%d\n", arr[i]);
}
return 0;
}
Your program could look like this:
#include <stdlib.h>
#include <stdio.h>
static int numcompar(const void *a, const void *b) {
const int *x = a;
const int *y = b;
// it is tempting to return *x - *y; but undefined behavior lurks
return *x < *y ? -1 : *x == *y ? 0 : 1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
// TODO: handle error
abort();
}
char *filename = argv[1];
// open the file
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
abort();
}
// this will be our array
// note realloc(NULL is equal to malloc()
int *arr = NULL;
size_t arrcnt = 0;
// note - I am using fscanf for simplicity
int temp = 0;
while (fscanf(fp, "%d", &temp) == 1) {
// note - reallocating the space each number for the next number
void *tmp = realloc(arr, sizeof(*arr) * (arrcnt + 1));
if (tmp == NULL) {
free(arr);
fclose(fp);
abort();
}
arr = tmp;
// finally assignment
arr[arrcnt] = temp;
arrcnt++;
}
fclose(fp);
// writing sorting algorithms is boring
qsort(arr, arrcnt, sizeof(*arr), numcompar);
for (size_t i = 0; i < arrcnt; ++i) {
printf("%d\n", arr[i]);
}
free(arr);
}
Note that reallocating for one int at a time is inefficient - realloc is usually a costly function. The next step would be to keep the number of the size of the array and "used" (assigned to) elements of the array separately and reallocate the array by a ratio greater then 1. There are voices that prefer to use the golden ratio number in such cases.
To read an undetermined number of entries from the input file, you can allocate and reallocate an array using realloc() as more entries are read. For better performance it is recommended to increase the allocated size by a multiple instead of increasing linearly, especially one entry at a time.
Your sorting routine is inappropriate for large arrays: insertion sort has quadratic time complexity, so it might take a long time for 3 million items, unless they are already sorted. Use qsort() with a simple comparison function for this.
Here is a modified program:
#include <stdio.h>
#include <stdlib.h>
static int compare_int(const void *pa, const void *pb) {
int a = *(const int *)pa;
int b = *(const int *)pb;
// return -1 if a < b, 0 if a == b and +1 if a > b
return (a > b) - (a < b);
}
int main(int argc, char *argv[]) {
if (argc == 1) {
fprintf(stderr, "Error! Input then name of a .txt file\n");
exit(1);
}
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
fprintf(stderr, "Error! Can't open %s\n", filename);
exit(1);
}
char buf[80];
size_t n = 0, size = 0;
int *array = NULL;
/* read the numbers */
while (fgets(buf, sizeof buf, fp)) {
if (n == size) {
/* increase size by at least 1.625 */
size_t newsize = size + size / 2 + size / 8 + 32;
int *newarray = realloc(array, newsize * sizeof(*array));
if (newarray == NULL) {
printf("cannot allocate space for %zu numbers\n", newsize);
free(array);
fclose(fp);
exit(1);
}
array = newarray;
size = newsize;
}
array[n++] = strtol(buf, NULL, 10);
}
fclose(fp);
/* sort the array */
qsort(array, n, sizeof(*array), compare_int);
for (size_t i = 0; i < n; i++) {
printf("%d\n", array[i]);
}
free(array);
return 0;
}

How to order split command line arguments in lexicographical order using a function?

I'm working on a program that takes command line arguments and splits them in half and then orders them in lexicographical order.
For example:
hello, world!
would turn into:
he
ld!
llo
wor
I have a main method that reads through the arguments, a function that splits the arguments, and finally a function that is supposed to order the halves in lexicographical order. I can't get this to run properly because of argument type errors in the lexicographicalSort method and an incompatible pointer type in the main method. I'm having issues to correct these syntax errors, how exactly would I correct them? Also, is there anything here that would cause logical errors? This is what I have so far:
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
int splitString(char arg[], int n)
{
int len = strlen(arg);
int len1 = len/2;
int len2 = len - len1; // Compensate for possible odd length
char *s1 = malloc(len1 + 1); // one for the null terminator
memcpy(s1, arg, len1);
s1[len1] = '\0';
char *s2 = malloc(len2 + 1); // one for the null terminator
memcpy(s2, arg + len1, len2);
s2[len2] = '\0';
printf("%s\n", s1);
printf("%s\n", s2);
free(s1);
free(s2);
return 0;
}
int lexicographicalSort(char *arg[], int n)
{
char temp[50];
for(int i = 0; i < n; ++i)
scanf("%s[^\n]",arg[i]);
for(int i = 0; i < n - 1; ++i)
for(int j = i + 1; j < n ; ++j)
{
if(strcmp(arg[i], arg[j]) > 0)
{
strcpy(temp, arg[i]);
strcpy(arg[i], arg[j]);
strcpy(arg[j], temp);
}
}
for(int i = 0; i < n; ++i)
{
puts(arg[i]);
}
return 0;
}
int main(int argc, char *argv[])
{
if (argc > 1)
{
for (int i = 1; i < argc; i++)
{
int j = 1;
int k = strlen(argv[i]);
splitString(argv[i], j);
lexicographicalSort(argv[i], j);
}
}
}
Basic scheme is simple. Make an array of tuples {start_pointer, length}. Do some programming on args to split the args. Fill in the array as appropriate. Make sorting with qsort, or any other sort of your choise.
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
char *s = "hello, world! . hello.....";
char *pc;
int i, n, nargs;
struct pp{
char *p;
int l;
};
struct pp args[10], hargs[20];
struct pp *pargs;
int cmp(const void * v0, const void * v1) {
struct pp *pv0 = v0, *pv1 = v1;
return strncmp(pv0->p, pv1->p, pv0->l);
}
int main(void)
{
for(pc = s, i = 0; *pc; ++i){
sscanf(pc, "%*[^ ]%n", &n);
if(n > 0){
args[i].p = pc;
args[i].l = n;
}
for(pc += n, n = 0; isspace(*pc); ++pc);
}
for(nargs = i, i = 0; i < nargs; ++i)
printf("%d arg is: %.*s\n", i, args[i].l, args[i].p);
putchar('\n');
for(i = 0, pargs = hargs; i < nargs; ++i){
if(args[i].l == 1){
pargs->p = args[i].p;
pargs->l = 1;
pargs = pargs + 1;
}else {
pargs->p = args[i].p;
pargs->l = args[i].l / 2;
pargs = pargs + 1;
pargs->p = args[i].p + args[i].l / 2;
pargs->l = args[i].l - args[i].l / 2;
pargs = pargs + 1;
}
}
putchar('\n');
for(nargs = pargs - hargs, i = 0; i < nargs; ++i)
printf("%d arg is: %.*s\n", i, hargs[i].l, hargs[i].p);
qsort(hargs, nargs, sizeof(struct pp), cmp);
putchar('\n');
for(i = 0; i < nargs; ++i)
printf("%d arg is: %.*s\n", i, hargs[i].l, hargs[i].p);
return 0;
}
https://rextester.com/GSH22767
Upon splitting a C string, one needs one extra char to store extra null-terminator. There is one answer that bypasses this by storing the length. For completeness, this is closer to your original intention: allocating enough space to copy the programmes arguments. It probably works slower, but one is free to use the strings elsewhere in the programme.
#include <stdlib.h> /* malloc free EXIT qsort */
#include <stdio.h> /* fprintf */
#include <string.h> /* strlen memcpy */
#include <errno.h> /* errno */
static int strcompare(const void *a, const void *b) {
const char *a_str = *(const char *const*)a, *b_str = *(const char *const*)b;
return strcmp(a_str, b_str);
}
int main(int argc, char **argv) {
char *spacev = 0, **listv = 0;
size_t spacec = 0, listc = 0;
int is_done = 0;
do { /* "Try." */
int i;
char *sv;
size_t j;
/* This requires argc > 1. */
if(argc <= 1) { errno = EDOM; break; }
/* Allocate maximum space. */
for(i = 1; i < argc; i++) spacec += strlen(argv[i]) + 2;
if(!(spacev = malloc(spacec)) || !(listv = malloc(argc * 2))) break;
sv = spacev;
/* Copy and split the arguments. */
for(i = 1; i < argc; i++) {
const char *const word = argv[i];
const size_t word_len = strlen(word),
w0_len = word_len / 2, w1_len = word_len - w0_len;
if(w0_len) {
listv[listc++] = sv;
memcpy(sv, word, w0_len);
sv += w0_len;
*(sv++) = '\0';
}
if(w1_len) {
listv[listc++] = sv;
memcpy(sv, word + w0_len, w1_len);
sv += w1_len;
*(sv++) = '\0';
}
}
/* Sort. */
qsort(listv, listc, sizeof listv, &strcompare);
for(j = 0; j < listc; j++) printf("%s\n", listv[j]);
is_done = 1;
} while(0); if(!is_done) {
perror("split");
} {
free(spacev);
free(listv);
}
return is_done ? EXIT_SUCCESS : EXIT_FAILURE;
}
It is simpler than your original; instead of allocating each string individually, it counts the maximum number of chars needed (plus two for two null terminators) and allocates the block all at once (space.) The pointers to the new list also need allocating, the maximum is 2 * argc. Once you copy and modify the argument list, one has an actual array of strings that one can qsort.

file size exceeds buffer size

I want to compare 2 files for identical lines: mytab2411.txt(15,017,210 bytes in size) and shadow.txt (569 bytes in size) but when I compiled this code and ran the program, I get a segmentation fault. I know that it's because the "mytab2411.txt" file exceeds the size of "char buf" but how do I go about solving this problem without overflowing the buffer?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
int cmp(const void * s1, const void * s2)
{
return strcasecmp(*(char **)s1, *(char **)s2);
}
int cmp_half(const char * s1, const char * s2)
{
int i;
for (i = 0; i < 3; i++)
{
int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
if (res != 0) return res;
}
return 0;
}
char * line[1024];
int n = 0;
int search(const char * s)
{
int first, last, middle;
first = 0;
last = n - 1;
middle = (first+last)/2;
while( first <= last )
{
int res = cmp_half(s, line[middle]);
if (res == 0) return middle;
if (res > 0)
first = middle + 1;
else
last = middle - 1;
middle = (first + last)/2;
}
return -1;
}
int main()
{
FILE * f1, * f2;
char * s;
char buf[1024*1024], text[1024];
f1 = fopen("shadow.txt", "rt");
f2 = fopen("mytab2411.txt", "rt");
s = buf;
while (fgets(s, 1024, f2) != NULL)
{
line[n] = s;
s = s+strlen(s)+1;
n++;
}
qsort(line, n, sizeof(char *), cmp);
while (fgets(text, 1024, f1) != NULL)
{
text[strlen(text)-1] = 0;
int idx = search(text);
if (idx >= 0)
{
printf("%s matched %s\n", text, line[idx]);
}
else
{
printf("%s not matched\n", text);
}
}
return 0;
}
Your method assumes that each line in the file is 1024 bytes long. In practice the lines can be up to 1024 bytes, but most lines are much shorter. Use strdup or malloc to allocate memory for each line based on line's length.
Store the lines in dynamically allocated arrays. This is about 15 MB of data and it should not be a problem unless there are resource limitations.
int main(void)
{
char buf[1024];
char **arr1 = NULL;
char **arr2 = NULL;
int size1 = 0;
int size2 = 0;
FILE * f1, *f2;
f1 = fopen("shadow.txt", "r");
f2 = fopen("mytab2411.txt", "r");
while(fgets(buf, 1024, f1))
{
size1++;
arr1 = realloc(arr1, sizeof(char*) * size1);
arr1[size1 - 1] = strdup(buf);
}
while(fgets(buf, 1024, f2))
{
size2++;
arr2 = realloc(arr2, sizeof(char*) * size2);
arr2[size2 - 1] = strdup(buf);
}
for(int i = 0; i < size1; i++)
for(int j = 0; j < size2; j++)
{
if(strcmp(arr1[i], arr2[j]) == 0)
printf("match %s\n", arr1[i]);
}
return 0;
}

C: string replace in loop (c beginner)

I need to replace a strings in some text. I found this function here at stackoverflow:
char *replace(const char *s, const char *old, const char *new)
{
char *ret;
int i, count = 0;
size_t newlen = strlen(new);
size_t oldlen = strlen(old);
for (i = 0; s[i] != '\0'; i++) {
if (strstr(&s[i], old) == &s[i]) {
count++;
i += oldlen - 1;
}
}
ret = malloc(i + count * (newlen - oldlen));
if (ret == NULL)
exit(EXIT_FAILURE);
i = 0;
while (*s) {
if (strstr(s, old) == s) {
strcpy(&ret[i], new);
i += newlen;
s += oldlen;
} else
ret[i++] = *s++;
}
ret[i] = '\0';
return ret;
}
This function works for me fine for single replacement. But i need to replace a whole array "str2rep" to "replacement". So what i'm trying to do(im just a beginner)
****
#define MAXTEXT 39016
int l;
int j;
char *newsms = NULL;
char text[MAXTEXT];
char *str2rep[] = {":q:",":n:"};
char *replacement[] = {"?","\n"};
strcpy((char *)text,(char *)argv[5]);
l = sizeof(str2rep) / sizeof(*str2rep);
for(j = 0; j < l; j++)
{
newsms = replace(text,(char *)str2rep[j],(char *)replacement[j]);
strcpy(text,newsms);
free(newsms);
}
textlen = strlen(text);
This code even works locally, If I build it from single file... But this is asterisk module, so when this is being executed, asterisk stops with:
* glibc detected * /usr/sbin/asterisk: double free or corruption (!prev): 0x00007fa720006310 *
Issues:
ret = malloc(i + count * (newlen - oldlen)); is too small. Need + 1.
Consider what happens with replace("", "", ""). If your SO ref is this, it is wrong too.
Questionable results mixing signed/unsigned. count is signed. newlen, oldlen are unsigned.
I think the original code works OK, but I do not like using the wrap-around nature of unsigned math when it can be avoided which is what happens when newlen < oldlen.
// i + count * (newlen - oldlen)
size_t newsize = i + 1; // + 1 for above reason
if (newlen > oldlen) newsize += count * (newlen - oldlen);
if (newlen < oldlen) newsize -= count * (oldlen - newlen);
ret = malloc(newsize);
Insure enough space. #hyde Various approaches available here.
// strcpy(text, newsms);
if (strlen(newsms) >= sizeof text) Handle_Error();
strcpy(text, newsms);
Minor
No need for casts
// newsms = replace(text, (char *) str2rep[j], (char *) replacement[j]);
newsms = replace(text, str2rep[j], replacement[j]);
Better to use size_t for i. A pedantic solution would also use size_t count.
// int i;
size_t i;
I will suggest something that to me looks a bit more clear as an alternative, in place of a proper dynamic string implementation. Exception handling is left as an exercise for the reader to add. :)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *appendn(char *to, char *from, int length)
{
return strncat(realloc(to, strlen(to) + length + 1), from, length);
}
char *replace(char *string, char *find, char *sub)
{
char *result = calloc(1, 1);
while (1)
{
char *found = strstr(string, find);
if (!found)
break;
result = appendn(result, string, found - string);
result = appendn(result, sub, strlen(sub));
string = found + strlen(find);
}
return appendn(result, string, strlen(string));
}
int main()
{
const char text[] = "some [1] with [2] to [3] with other [2]";
char *find[] = {"[1]", "[2]", "[3]", NULL};
char *sub[] = {"text", "words", "replace"};
char *result, *s;
int i;
result = malloc(sizeof(text));
(void) strcpy(result, text);
for (i = 0; find[i]; i ++)
{
s = replace(result, find[i], sub[i]);
free(result);
result = s;
}
(void) printf("%s\n", result);
free(result);
}

Resources