c - make string compare function more brief - c

I wrote a function to compare 2 strings, return int as compare result, and pass an additional int pointer as param to retrieve the max match lengh.
// compare 2 strings
#include <stdio.h>
/**
* compare 2 string,
*
* #param sa
* string 1
* #param sb
* string 2
* #param len
* a int pointer pass from outside to store match length,
*
* return
* 0 if equlas, <0 if (a < b), >0 if (a > b),
*/
static int strCompare (char *sa, char *sb, int *len) {
for((*len)=0; *sa==*sb; sa++,sb++, (*len)++) {
// handle equals case, prevent ++ for \0,
if(!*sa)
break;
// printf("%c,%c\n", *sa, *sb);
}
return *sa - *sb;
}
int main(int argc, char *argv[]) {
if(argc < 3) {
printf("need 2 arguments.\n");
return 0;
}
int matchLen = 0;
int result = strCompare(argv[1], argv[2], &matchLen);
printf("compare:\n\t%s\n\t%s\nresult: %d\nmatch length: %d\n", argv[1], argv[2],
result, matchLen);
return 0;
}
Question:
I want the loop be more brief, e.g. avoid the if inside for, but didn't found out by myself, can anyone help to write a brief version with the same function interface.
(Please don't use libc function, I do this to improve my code style ;)

You might want to avoid the repeated reads and writes through the pointer while you are at it, and go for const-correctness:
static int strCompare (const char *sa, const char *sb, int *len) {
int tlen = 0;
while(*sa && *sa == *sb)
++tlen, ++sa, ++sb;
*len = tlen;
return *sa - *sb;
}
Or maybe better with restrict:
static int strCompare (const char *sa, const char *sb, int * restrict len) {
*len = 0;
while(*sa && *sa == *sb)
++*len, ++sa, ++sb;
return *sa - *sb;
}
BTW: The only thing making the code more efficient in the first case is avoiding the repeated writes through len.
In the second, it's using restrict and thus reducing aliasing (which will also remove all but the last write).
Also, consider whether size_t would not be a better type for the length.

Perhaps something like:
static int str_compare(const char *a, const char *b, size_t *len) {
const char *p = a;
for ( ; *p && *p == *b; ++p, ++b)
;
*len = p - a;
return *p - *b;
}
As Duplicator has mentioned use const for input strings.
Also size_t is widely used for sizes and counts, so likely better.
Alternative by tracking length:
static int str_compare(const char *a, const char *b, size_t *n) {
for (*n = 0; a[*n] && a[*n] == b[*n]; ++*n)
;
return a[*n] - b[*n];
}
Does not look too good with all the indirection on n, but still.
As a side note; you should return 1 (or something other then 0) on error (in main).

In your code if condition is needed. Because you are checking the pointer. If you accessing the pointer that is not allocate that will give you a segmentation fault. So avoid this you
have to do the if condition. Or else you can made that in the for loop.
for((*len)=0; *sa==*sb && *sa!='\0' ; sa++,sb++, (*len)++);
So avoiding the segmentation fault you need the another condition for checking.

Related

C arrays memcmp trouble

I need to write a code for my UNI, in which I have to receive 2 arrays of some data (it may be int array or char array or even float array) and compare n characters from pointers in both of the arrays, so far I am stuck with this
*note in other words - I need to write my own memcmp :
int byte_compare(const void *b1,const void *b2,size_t len)
{
char *q1 = (char *)p1;
char *q2 = (char *)p2;
int i;
for (i=0; i<=len*sizeof(*q1) ;i++)
{
if(*q1++ != *q2++)
return 1;
}
return 0;
}
int main()
{
char str1[] = "abcheya";
char str2[] = "gtyheyb";
printf((byte_compare(str1[3],str2[3],3))?"not equal\n":"equal\n");
/*the output is equal*/
int arr1[]={1,2,3,4};
int arr2[]={1,2,7,59};
printf((byte_compare(arr1[0],arr2[0],3))?"not equal\n":"equal\n");
/*the output is also equal although it is not supposed to be*/
return 0;
}
when I compare strings it works just fine, but when I try to compare int or float it wont work properly, please help me.
You should calculate the comparing byte length while passing the len parameter. So, you can use templates for this.
template<typename T>
int byte_compare(const T *b1, const T *b2, size_t len)
{
return memcmp(b1, b2, len * sizeof(T));
}
Edit note : The question is edited and the answer has no more meaning. And it uses C++, not applicable for C lessons. I'm remaining the answer for whose have no restriction to use C++.
Edit: to write own memcmp function
int byte_compare(const void *b1,const void *b2,size_t len)
{
if (len != 0) {
register const unsigned char *p1 = b1, *p2 = b2;
do {
if (*p1++ != *p2++)
return (*--p1 - *--p2);
} while (--len != 0);
}
return (0);
}
You need to pass the size of the objects, not their length:
int byte_compare(const void *b1, const void *b2, size_t size)
{
return memcmp(b1, b2, size);
}
Your instructor probably does not want you to use a library call. So write a byte by byte comparison (aka memcmp).
Get a copy of K&R C (or another book on C), and you will find examples of how to process char(acter) values in a loop.
int byte_compare(const void *b1,const void *b2,size_t len)
{
if( !b1 || !b2 ) return 0; //0 represents false
char* p1=(char*)b1; char* p2=(char*)p2;
for( ; len --> 0; ) {
if( *p1++ != *p2++ ) return 0;
}
return 1;
}

Generic binary search in C

I am having trouble with this code I wrote for a generic binary search.
when trying to execute the search on an array of strings I noticed that the array of strings, passed to binSearch function does not contain the strings.
can someone suggest a hint?
Much appreciation
#define SIZE 100
typedef unsigned char BYTE
please consider this main:
void main()
{
char ** stringArr, stringToFind[SIZE];
int stringSize;
int res;
stringArr = getStringArr(&stringSize);
// string to find
gets(stringToFind);
res = stringBinSearch(stringArr, stringSize, stringToFind);
if (res == 1)
printf("The string %s was found\n", stringToFind);
else
printf("The string %s was not found\n", stringToFind);
}
char** getStringArr(int* stringSize)
{
int i, size, len;
char** arr;
char temp[SIZE];
scanf("%d", &size);
getchar();
arr = (char**)malloc(size * sizeof(char*));
checkAllocation(arr);
for (i = 0; i < size; i++)
{
gets(temp);
len = strlen(temp);
temp[len] = '\0';
arr[i] = (char*)malloc((len+1) * sizeof(char));
checkAllocation(arr[i]);
strcpy(arr[i], temp);
}
*stringSize = size;
return arr;
}
int stringBinSearch(char** stringArr, int stringSize, char* stringToFind)
{
return binSearch(stringArr, stringSize, sizeof(char*), stringToFind,compare2Strings);
}
int binSearch(void* Arr, int size, int ElemSize, void* Item, int(*compare)(void*, void*))
{
int left = 0, right = size - 1, place;
BOOL found = FALSE;
while (found == FALSE && left <= right)
{
place = (left + right) / 2;
if (compare(Item, (BYTE*)Arr + place*ElemSize) == 0)
found = TRUE;
else if (compare(Item, (BYTE*)Arr + place*ElemSize) < 0)
right = place - 1;
else
left = place + 1;
}
return found;
}
int compare2Strings(void* str1, void* str2)
{
char* elemA, *elemB;
elemA = (char*)str1;
elemB = (char*)str2;
return strcmp(elemA, elemB);
}
When you sort an array of int, the values passed are pointer to int, spelled int *. When you sort an array of strings (spelled char *), the values passed are pointer to string, spelled char **. You comparator is no use for comparing strings. As the inimitable BLUEPIXY said in their incredibly terse style — you need to modify the code to treat the passed void * arguments as char ** and not as char *.
With generic sorting, that's usually the end of the issue. With binary search, there's another issue that you run foul of. That is that the type of the item being searched for needs to be the same as the one of the entries in the array, so you need to pass a pointer to the item, not just the item.
So, adding material to allow the code to compile with minimal changes, changing from gets() to a cover for fgets() (because gets() is too dangerous to be used — ever! and programs that use it produce a warning when its used on macOS Sierra 10.12.5 — warning: this program uses gets(), which is unsafe.), and printing out the input data so you can see what's what, I end up with:
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BOOL int
#define TRUE 1
#define FALSE 0
static inline char *sgets(size_t buflen, char *buffer)
{
char *result = fgets(buffer, buflen, stdin);
if (result)
buffer[strcspn(buffer, "\n")] = '\0';
return result;
}
#define checkAllocation(x) assert((x) != 0)
#define SIZE 100
typedef unsigned char BYTE;
char **getStringArr(int *stringSize);
int stringBinSearch(char **stringArr, int stringSize, char *stringToFind);
int binSearch(void *Arr, int size, int ElemSize, void *Item, int (*compare)(void *, void *));
int compare2Strings(void *str1, void *str2);
int main(void)
{
char **stringArr, stringToFind[SIZE];
int stringSize;
int res;
stringArr = getStringArr(&stringSize);
sgets(sizeof(stringToFind), stringToFind);
printf("Strings: %d\n", stringSize);
for (int i = 0; i < stringSize; i++)
printf("[%d] = [%s]\n", i, stringArr[i]);
printf("Search: [%s]\n", stringToFind);
res = stringBinSearch(stringArr, stringSize, stringToFind);
if (res == 1)
printf("The string %s was found\n", stringToFind);
else
printf("The string %s was not found\n", stringToFind);
return 0;
}
char **getStringArr(int *stringSize)
{
int i, size, len;
char **arr;
char temp[SIZE];
scanf("%d", &size);
getchar();
arr = (char **)malloc(size * sizeof(char *));
checkAllocation(arr);
for (i = 0; i < size; i++)
{
sgets(sizeof(temp), temp);
len = strlen(temp);
temp[len] = '\0';
arr[i] = (char *)malloc((len + 1) * sizeof(char));
checkAllocation(arr[i]);
strcpy(arr[i], temp);
}
*stringSize = size;
return arr;
}
int stringBinSearch(char **stringArr, int stringSize, char *stringToFind)
{
return binSearch(stringArr, stringSize, sizeof(char *), &stringToFind, compare2Strings);
}
int binSearch(void *Arr, int size, int ElemSize, void *Item, int (*compare)(void *, void *))
{
int left = 0, right = size - 1, place;
BOOL found = FALSE;
while (found == FALSE && left <= right)
{
place = (left + right) / 2;
if (compare(Item, (BYTE *)Arr + place * ElemSize) == 0)
found = TRUE;
else if (compare(Item, (BYTE *)Arr + place * ElemSize) < 0)
right = place - 1;
else
left = place + 1;
}
return found;
}
int compare2Strings(void *str1, void *str2)
{
char *elemA = *(char **)str1;
char *elemB = *(char **)str2;
return strcmp(elemA, elemB);
}
The key changes are:
compare2Strings() — compare the data in char ** values.
stringBinSearch() — pass the address of stringToFind.
AFAICR, any other change is cosmetic or 'infrastructure'.
Note that the return type of main() should be int — you can get away with void only on Windows where it is allowed.
Example run 1:
Data:
5
Antikythera
albatross
armadillo
pusillanimous
pygmalion
pygmalion
Output:
Strings: 5
[0] = [Antikythera]
[1] = [albatross]
[2] = [armadillo]
[3] = [pusillanimous]
[4] = [pygmalion]
Search: [pygmalion]
The string pygmalion was found
Example run 2:
Data file:
5
armadillo
pygmalion
Antikythera
pusillanimous
albatross
pygmalion
Output:
Strings: 5
[0] = [armadillo]
[1] = [pygmalion]
[2] = [Antikythera]
[3] = [pusillanimous]
[4] = [albatross]
Search: [pygmalion]
The string pygmalion was not found
The difference between the two sets of data is that in the first case, the strings are in correct sorted order — a prerequisite condition for successful (reliable) binary search — and in the second, the data is not in correct sorted order. (That said, I had one non-sorted order that still found 'pygmalion' — I used a different shuffle for the shown results. But the 'reliable' comment applies.)
Hello your problem is the way you send the array of strings to the binary search function. Because you need to pass an array of strings to it your Arr parameter must be void** not void*
int binSearch(void** Arr, int size, int ElemSize, void* Item, int(*compare)(void*, void*))
And in your function whenever you want to acces a string from your array it will be enough to acces it like: (char*) *(Arr+place*ElemSize)
Your approach which is to write a generic binary search is right. However attempting to return early slows down a binary search. It also means you can't use the C++ convention that "less than" is the comparison operator defined. Wait until left and right equal each other, and return that.

qsort Segmentation Fault structs

So, my first question here, please be patient with me:
My task is to sort an array of structs (name, surname and another struct for the birthday, which consists of the year, month, day). I have to sort by birthdate and by using qsort.
My problem is, I looked up everything about qsort but i am not quite sure if my implementation is correct since I am new to C. I can create the executable program but it is not giving my any result only Segmentation Fault.
Here is my Code:
#include <stdio.h>
#include <stdlib.h>
typedef int (*compfn) (const void*, const void*);
typedef struct {
unsigned year, month, day;
} date_t;
typedef struct {
char name[32];
char surname[32];
date_t birthday;
}person_t;
typedef struct {
unsigned n;
unsigned cap;
person_t *arr;
} persons_t;
int compare(person_t *a, person_t *b){
if(a->birthday.year!=b->birthday.year){
return a->birthday.year-b->birthday.year;
}else{
if(a->birthday.month!=b->birthday.month){
return a->birthday.month-b->birthday.month;
}else{
return a->birthday.day-b->birthday.day;
}
}
}
int main(int argc, char* argv[])
{
if (argc <= 1) {
fprintf(stderr, "syntax: %s <inputfile>\n", argv[0]);
return 1;
}
FILE* f = fopen(argv[1], "rt");
if (f == NULL) {
fprintf(stderr, "cannot open file %s\n", argv[1]);
return 1;
}
persons_t persons;
persons.n = 0;
persons.cap = 0;
persons.arr = NULL;
person_t p;
while (fscanf(f, "%s %s %4u-%2u-%2u", p.name, p.surname,
&p.birthday.year, &p.birthday.month, &p.birthday.day) == 5) {
if (persons.n == persons.cap) {
persons.cap = persons.cap == 0 ? 1 : 2 * persons.cap;
persons.arr = realloc(persons.arr, persons.cap * sizeof(persons.arr[0]));
}
persons.arr[persons.n++] = p;
}
int nitems = persons.cap*sizeof(persons.arr[0]);
int size = sizeof(persons.arr[0]);
qsort(persons.arr, nitems, size, (compfn)compare);
for (unsigned i = 0; i < persons.n; i++) {
person_t *p = persons.arr + i;
printf("%s %s %4u-%2u-%2u\n",
p->name, p->surname,
p->birthday.year, p->birthday.month, p->birthday.day);
}
fclose(f);
return 0;
}
I hope someone can help me,
Thanks in advance ;)
As far as _t-suffixed identifiers go, according to the C standard they're reserved for the implementation (e.g. your compiler, and/or your standard library). It's very possible that your implementation already has a date_t type, and your code might be causing some kind of mischief. If you wish to avoid subtly and dangerously clashing identifiers wreaking all sorts of havoc, it's probably best to avoid them. Not to worry, you could always use '_s' to denote a struct type instead!
Whenever you're declaring a variable that represents an index within an array, use size_t as the type!
int compare(person_t *a, person_t *b){
...
qsort(persons.arr, nitems, size, (compfn)compare);
According to the qsort manual, the argument given as the comparator function should be an int (*compar)(const void *, const void *), and that's what you've given since you've cast to (compfn). As far as qsort is aware that function accepts two const void * arguments, which might differ in representation to person_t * arguments. This could certainly cause segfaults. Don't lie about the type of compare. Change it to look more like:
int compare(const void *x, const void *y) {
const person_s *a = x, *b = y;
/* ... */
}
... and you won't need the cast or the typedef.
Next, onto return values for that function. I have used implementations where-by lexically illogical return values cause segmentation faults. For example, if a <= b and b <= c, then a <= c, but your code doesn't guarantee this. In fact, using your code it is possible that a <= b, b <= c and a > c. I recommend making sure your code guarantees correspondence between the return value and lexical order. You can do so by returning 1 for greater than, 0 for equal to or -1 for less than.
#define lexical_order(x,y) ((x > y) - (x < y))
int compare(const void *x, const void *b){
const person_s *a = x, *b = y;
return a->birthday.year != b->birthday.year ? lexical_order(a->birthday.year, b->birthday.year)
: a->birthday.month != b->birthday.month ? lexical_order(a->birthday.month, b->birthday.month)
: lexical_order(a->birthday.day, b->birthday.day);
}
I'm sure you're aware that you should be checking the return value of realloc... For example:
void *temp = realloc(persons.arr, persons.cap * sizeof(persons.arr[0]));
if (temp == NULL) { /* If we don't check return value prior *
* to assigning to persons.arr, we *
* might leak some memory... */
puts("Error in realloc");
free(persons.arr);
exit(-1);
}
persons.arr = temp;
Finally, and most importantly (this is probably your error), are you sure about this?
int nitems = persons.cap*sizeof(persons.arr[0]);
If you mean to pass this as the number of items to qsort (which is usual), then I think that should be:
size_t nitems = persons.n;
P.S. In case you missed it the second time, you should probably audit your code to make sure you're using size_t to store array indexes only.
P.P.S. Don't forget to free(persons); at the end of your program, so you don't end up with reports of memory leaks when you use valgrind...
P.P.P.S. valgrind is awesome!
So you are allocating our array by doubling its size whenever needed, using persons.cap, but you are not filling all its elements, are you?
From your code, the actual number of persons is nitems = persons.n, not persons.cap. What if you retry your code with nitems=persons.n?
If you have unfilled elements in your array, it means the strings inside them are arbitrary (i.e person.name), so probably not null-terminated, and the crash will occur when you try to display them.

qsort() sorts one array of strings but segfaults on another one

I'm trying to read a bunch of names from a .txt file and copying them to an array as I go. I then want to sort the array using qsort(). Also, the file I'm reading is names.txt from Project Euler #22. Here is the code:
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
/* create a pointer to point to s */
char *strdup(char *s)
{
char *p;
p = (char *) malloc(strlen(s)+1);
if (p != NULL)
strcpy(p, s);
return p;
}
int compare(const void *a, const void *b)
{
const char *ap = *(const char **) a;
const char *bp = *(const char **) b;
return strcmp(ap, bp);
}
int main(void)
{
FILE *fp;
int c, i, j=0;
char name[100], *names[10000];
fp = fopen("names.txt", "r");
if (fp == NULL) {
printf("can't open file\n");
exit(0);
}
c = fgetc(fp); /* initialize c and skip first quotation mark */
while (c != EOF) { /* loop until no names are left */
i = 0;
while ((c=fgetc(fp)) != '"') /* copy chars to name until " is reached */
name[i++] = c;
name[i] = '\0';
names[j++] = strdup(name);
fgetc(fp); /* skip comma */
c = fgetc(fp);
}
size_t size = sizeof(names[0]);
size_t count = sizeof(names)/size;
qsort((void **) names, count, size, &compare);
return 0;
}
Trying to sort the names array causes a segfault. However, if I instead try to sort an array of strings that is explicitly declared it works:
char *test[] = { "FOO", "BAR", "TEST" };
size_t size = sizeof(test[0]);
size_t count = sizeof(test)/size;
qsort((void **) test, count, size, &compare);
for (i = 0; i < 3; ++i)
printf("%s\n", test[i]);
return 0;
I suspect that the segfault is due to an error in my array "names", but if I loop through and print each element of "names" before trying to sort it does so without a problem.
Any help is much appreciated!
This line:
size_t count = sizeof(names)/size;
Will yield the entire length of your names array, not just the values you have initialized. If you entered fewer than 10000 names, you're going to have some invalid pointers in there, and when you try to sort them - KABOOM!
You can just use j instead of count, since you're using that to keep track of how many names have been input.
You are missing to initialise names.
The easiest way to do so is like this:
names[10000] = {NULL};
Also the compare function is not prepared to handle the unused entries, you could modify it like this, treating unused entries like emtpy entries.
int compare(const void *a, const void *b)
{
const char *ap = a ?*(const char **) a :"";
const char *bp = b ?*(const char **) b :"";
return strcmp(ap, bp);
}
Alternativly you could sort all unused entries to the end:
int compare(const void *a, const void *b)
{
if (*a && *b)
{
const char *ap = a ?*(const char **) a :"";
const char *bp = b ?*(const char **) b :"";
return strcmp(ap, bp);
}
else
{
if (*a)
return -1;
else (*b)
return 1;
return 0;
}
}
Also you are telling qsort() to always inspect all of names's entries. Which is is unnecessary.

how to create a substring without using malloc()

How can I implement a substring function such as the following that returns the substring but without using malloc() in the process so I don't have to worry about freeing the associated memory elsewhere in my code using the free() function. Is this even possible?
const char *substring(const char *string, int position, int length)
{
char *pointer;
int c;
pointer = malloc(length+1);
if (pointer == NULL)
{
printf("Unable to allocate memory.\n");
exit(EXIT_FAILURE);
}
for (c = 0 ; c < position -1 ; c++)
string++;
for (c = 0 ; c < length ; c++)
{
*(pointer+c) = *string;
string++;
}
*(pointer+c) = '\0';
return substr;
}
UPDATE: 30 DEC 2012
Having considered all the answers and comments it's clear that essentially what I'm trying to do is create a dynamically sized array (i.e. the substring) and that is not possible in C without somewhere along the way having to use some kind of malloc() function and a subsequent free() call on the substring pointer or without the aid of a garbage collector. I attempted to integrate the libgc garbage collector as kindly suggested by #elhadi but so far have not been able to get this to work in my Xcode project. So I have opted to stick with using the following code with malloc() and free().
char * subStr(const char* srcString, const int offset, const int len)
{
char * sub = (char*)malloc(len+1);
memcpy(sub, srcString + offset, len);
sub[len] = 0;
return sub;
}
int main()
{
const char * message = "hello universe";
char * sub = subStr( message, 6, 8 );
printf( "substring: [%s]", sub );
free(sub);
}
I see two options:
If you can destroy the source string (usually a bad thing):
{
string[ position + length] = 0;
return & string[ position ];
}
Note: (see Cole Johnsons note: free no longer works on the returned pointer!)
If you can't modify the source string:
Modify your methods signature so that the caller has to worry about it:
const char *substring(const char *source, char* destination, int position, int length)
And put the modified string into destination (and return it).
And do not even think about this:
const char *substring(const char *string, int position, int length)
{
char *pointer;
int c;
static char modifiedString[256];
...
return modifiedString;
}
Using a static variable inside the function for the modified results...
(This is not thread-safe (not re-entrant!) )
Use a local buffer (an auto array) and a function like this:
void substr(char *dst, const char *src, size_t loc, size_t len)
{
memcpy(dst, src + loc, len);
dst[len] = 0;
}
Call it like this:
const size_t size = 3;
char buf[size + 1]; // this is an auto array, it will be "freed" at the end of the scope
substr(buf, "abcdFOObar", 4, size);
Always ensure the buffer is at least len + 1 bytes long to avoid buffer overflow errors.
const char *substring(const char *string, char *substr, int position, int length)
{
int c;
for (c = 0 ; c < position -1 ; c++)
string++;
for (c = 0 ; c < length ; c++)
{
*(substr+c) = *string;
string++;
}
*(substr+c) = '\0';
return substr;
}
calling function...
int main(int argc, char * argv[]) {
char substr[10];
substring("hello! World", &substr[0], 2, 4);
}
The best way to do it is:
typedef struct vstr_t {
char *s;
int len;
} vstr_t;
#define vstr_set(d, l) \
({ \
vstr_t vs = {.s = d, .len = l}; \
\
vs; \
})
#define vstr_fmt_arg(vs) (vs).len, (vs).s
int main()
{
const char *message = "hello universe";
printf( "substring: [%.*s]\n", vstr_fmt_arg(vstr_set(smpl + 6, 8)));
return 0;
}
You can use a garbage collector, you allocate the memory the first time, the garbage collector will free the memory when no needed.
you should include
#include "gc.h"
in the main you should make something like
GC_INIT(); /* Optional on Linux/X86;*/
and your substr function is:
char *substr(const char* buffer, const int offset, int len)
{
char sub = (char*)GC_MALLOC(len+1);
memcpy(sub, buffer + offset, len);
sub[len] = 0;
return sub;
}
you should link with libgc.a

Resources