Converting a comma separated string to array - arrays

I have been trying to convert a string in array of integers, floats and characters. While I could get it work for integers and floats, there is some problem for characters.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char *s1;
int k, no=5;
char* variable = "R1,R2,R3,R4,R5";
void* value;
s1 = calloc(no,sizeof(char)*81);
for (k=0; k<no; k++) s1[k] = strdup(mchar);
ListChar(variable, s1, no, ",");
memcpy(value, s1, no*sizeof(char)*81);
free(s1);
int i;
for (i = 0; i < no; i++)
printf("%s", value[i]);
printf("\n");
return 0;
}
In the header file I have
#define mchar "A...(81times)"
Implementation:
int ListChar(char *buf, char *list, int maxloop, char* delim)
{
int n = 0;
char *s,*t;
s= strdup(buf);
t= strtok(s,delim);
while ( t && (n<maxloop))
{
if (list!=NULL) list[n] =strdup(t);
n++;
t=strtok(NULL,delim);
}
free(s);
return(n);
}
During the calloc memory assignment when I watch s1 its 0xsomeadress ""
After the for loop s1 becomes 0xsomeadress "Garbage value 81 times"
When s1 is assigned to list its still reads the same garbage value.
And when list [n] = strdup(t) list[0] reads the first block of garbage value like -21 '\221 ṗ'.
t is getting delimited correctly. I even tried initializing char *s1[81] = {"something"} and looping it on j but it wont work, same problem, and I need to free s1 at the end because this function runs for number of times. I did it for integers and floats by list[n]=atoi(t) it works fine. Can anyone suggest me something?

There seems to be a fundamental misunderstanding about how strings work. Your s1 clearly needs to be a char ** and the usage of strdup is incorrect. If s1 is of type char *, then s1[k] is of type char. But strdup returns a char *, so s1[k] = strdup ... is clearly an error which your compiler ought to warn you about. Perhaps you want something like:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void * xmalloc(size_t s);
void
ListChar(const char *buf, char **list, int maxloop, int delim)
{
char set[] = {delim, 0};
for( int n = 0; n < maxloop; n += 1 ){
size_t len = strcspn(buf, set);
list[n] = xmalloc(len + 1);
memcpy(list[n], buf, len);
buf += len + 1;
}
}
int
main(int argc, char **argv)
{
int delim = ',';
(void)argc; /* Suppress compiler warning */
while( *++argv ){
char **s1;
int k, num = 1;
char *input = *argv;
for( const char *p = input; *p; p += 1 ){
if( *p == delim ){
num += 1;
}
}
s1 = xmalloc(num * sizeof *s1);
ListChar(input, s1, num, delim);
for( int i = 0; i < num; i += 1 ){
printf("%s\n", s1[i]);
}
free(s1);
}
return 0;
}
void *
xmalloc(size_t s)
{
void *rv = malloc(s);
if( rv == NULL ){
perror("malloc");
exit(EXIT_FAILURE);
}
return rv;
}
Note that the above code scans each string twice, which is not ideal. Rather than scanning the string to find the number of delimiters and then parsing the string, it would be better to do both in one pass. But for the purposes of demonstrating how to break up the string, that seems like unnecessary complexity. (Though it's actually simpler, IMO)

Related

Segmentation fault - divide and conquer

I'm trying to write a program that displays the longest common prefix using the divide and conquer method. My code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
const char *lcpUtil(char str1[], char str2[])
{
char *result = NULL;
result = (char *)malloc(sizeof(char) * 100);
int n1 = strlen(str1), n2 = strlen(str2);
for(int i = 0, j = 0; i <= n1-1 && j <= n2-1; i++, j++)
{
if(str1[i] != str2[j])
break;
strncat(result, &str1[i], 1); // append the prefix to the result string
}
return (result);
}
const char *lcp(char **str, int l, int r)
{
char str1[100], str2[100];
if(l == r)
{
return (str[l]);
}
if (l < r)
{
int m = (l + r)/2;
strcpy(str1, lcp(str, l, r));
strcpy(str2, lcp(str, m+1, r));
}
return (lcpUtil(str1, str2));
}
int main(int argc, char **argv)
{
char *arr[4] = {"apple", "application", "april", "apartment"}; // ap
int n = 4;
char prefix[100];
strcpy(prefix, lcp(arr, 0 , n-1));
if(strlen(prefix))
{
printf("The longest common prefix: %s\n", prefix);
}
else
printf("There is no common prefix");
return 0;
}
If I run this, a get a segmentation fault, debugging with gdb says:
Program received signal SIGSEGV, Segmentation fault.
0x00005555555552b7 in lcp (str=<error reading variable: Cannot access memory at address 0x7fffff7fefa8>, l=<error reading variable: Cannot access memory at address 0x7fffff7fefa4>,
r=<error reading variable: Cannot access memory at address 0x7fffff7fefa0>) at lab11.c:23
#1 0x0000555555555325 in lcp (str=0x7fffffffde40, l=0, r=3) at lab11.c:30
I know I didn't free the memory allocated with malloc, but it should still be working. I'm not sure why this is happening. Any ideas?
An infinite recursion is invoked because the function lcp(str, l, r) calls lcp(str, l, r).
Also undefined behavior is invoked by using contents of uninitialized buffer allocated via malloc() and assigned to result in strncmp() called from lcpUtil.
#include <string.h> /* needed for size_t */
size_t lcp_len(char *left, char *right)
{
/* if the strings differ we are done */
if (*left != *right) return 0;
/* if we reached the end of the string(s) we are done) */
if ( !*left ) return 0;
/* add 1 to the length and compare the next two characters, recursively */
return 1+lcp_len(left+1, right+1);
}
size_t lcp_fetch(char *dest, char *left, char *right)
{
if (*left != *right) { *dest=0; return 0; }
if ( !*left ) { *dest=0; return 0; }
*dest = *left;
return 1+lcp_fetch(dest+1, left+1, right+1);
}
Now let's call it:
#include <stdio.h>
int main(int argc, char **argv)
{
size_t result_len;
char result[100];
if (argc < 3) return 1;
result_len = lcp_fetch(result, argv[1], argv[2] );
printf("%zu:%s\n", result_len, result );
return 0;
}

In C, how can one dynamically build an array of strings in a function and return to caller

RHEL6
I'm trying to implement a perl split funciton in a C subroutine which dynamically builds the array of strings. My attempt fails with a segfault. But it does not fail if I comment out the printf statement in the for loop (perhaps implying that the segfault is in where its getting built as opposed to how)
Here it is...
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int split(char *s, char **arr);
void main(int argc, char* argv[])
{
int x;
int arrsz;
char str[]="aaa:bbb:ccc";
char **arr;
arrsz=split(str,arr);
for(x=0;x<arrsz;x++) {
printf("%s\n",arr[x]);
}
exit(0);
}
/***********************************/
int split(char *str, char **arr) {
int arrsz=0;
char delim[2] = ":";
char *tok;
arr = malloc(sizeof(char **));
arr[0] = malloc(1);
arr[0] = '\0';
tok = strtok(str,delim);
while(tok != NULL) {
arrsz++;
arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
strcpy(arr[arrsz-1],tok);
arr[arrsz]=malloc(1);
arr[arrsz]='\0';
tok = strtok(NULL,delim);
}
return(arrsz);
}
I think the problem is in how I'm passing "arr" to the split function or how it's being received and used in the function. I say this because if I move the body of the function to main, it works there.
I tried dealing with arr inside the functions as it it was a (char ***), but that didn't work.
Can a C expert out there set me straight ?
The main error is that you should pass a pointer to the strings list to the split function, not the strings list itself, so you should use an ***arr:
int split(char *str, char ***arr);
And you should use & to pass the pointer in main:
...
arrsz=split(str,&arr);
...
In the function you could use a double pointer to avoid confusion and at the end assign that pointer to the parameter:
int split(char *str, char ***arrreturn) {
char **arr; //Use this strings list to add the strings
...
*arreturn = arr;
return(arrsz);
}
-You should not call realloc anytime you need to insert a string, but you could oversize it and increment its dimension if you need.
-I cannot see the need of assign '\0' at the end of the list if you have a variable with the length
-You can use strdup instead of malloc-strcpy funcs:
char *first = "ciao";
char *str = malloc(strlen(first) * sizeof(char));
strcpy(str, first);
Is equal to:
char *first = "ciao";
char *str = strdup(first);
I corrected your code:
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int split(char *str, char ***arrreturn);
void main(int argc, char *argv[]) {
int x;
int arrsz;
char str[] = "aaa:bbb:ccc";
char **arr;
arrsz = split(str, &arr);
for (x = 0; x < arrsz; x++) {
printf("%s\n", arr[x]);
}
exit(0);
}
/***********************************/
int split(char *str, char ***arrreturn) {
int arrsz = 1;
int len = 0;
char delim[2] = ":";
char *tok;
char **arr;
arr = malloc(sizeof(char **));
tok = strtok(str, delim);
while (tok != NULL) {
len++;
if (len >= arrsz) {
arrsz *= 2;
arr = realloc(arr, arrsz * sizeof(char **));
}
arr[len - 1] = strdup(tok);
tok = strtok(NULL, delim);
}
*arrreturn = arr;
return (len);
}
There are a few bugs. I've annotated and [partially] fixed bugs. It will still segfault. I added a refactored version that will work correctly.
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int split(char *s, char **arr);
void main(int argc, char* argv[])
{
int x;
int arrsz;
char str[]="aaa:bbb:ccc";
char **arr;
#if 1
#endif
arrsz=split(str,arr);
for(x=0;x<arrsz;x++) {
printf("%s\n",arr[x]);
}
exit(0);
}
/***********************************/
int split(char *str, char **arr) {
int arrsz=0;
char delim[2] = ":";
char *tok;
// NOTE/BUG: this function only changes arr within the function and does
// _not_ propagate it to the caller
arr = malloc(sizeof(char **));
// NOTE/BUG: this is replaced in the loop and leaks memory
#if 0
arr[0] = malloc(1);
arr[0] = '\0';
#endif
tok = strtok(str,delim);
while(tok != NULL) {
arrsz++;
// NOTE/BUG: this is incorrect -- it only adds a byte instead of another
// pointer (i.e. it doesn't allocate enough)
#if 0
arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
#else
arr = (char **)realloc(arr,sizeof(char *) * (arrsz + 1));
#endif
#if 0
arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
strcpy(arr[arrsz-1],tok);
#else
arr[arrsz-1] = strdup(tok);
#endif
// NOTE/BUG: this is wrong and leaks memory
#if 0
arr[arrsz]=malloc(1);
arr[arrsz]='\0';
#endif
tok = strtok(NULL,delim);
}
#if 1
arr[arrsz] = NULL;
#endif
return(arrsz);
}
But, as written, your function doesn't update caller's value of arr.
To fix your function, split would need arr to be defined as a "three star" pointer (e.g. char ***arr) which is considered cumbersome and very bad practice.
So, a better/simpler solution is to refactor the function and pass back arr as return (e.g. char **split(char *str,int *sizrtn):
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **split(char *s, int *arsiz);
int main(int argc, char* argv[])
{
int x;
int arrsz;
char str[]="aaa:bbb:ccc";
char **arr;
arrsz = 0;
arr = split(str,&arrsz);
for(x=0;x<arrsz;x++) {
printf("%s\n",arr[x]);
}
return 0;
}
/***********************************/
char **split(char *str, int *sizrtn)
{
int arrsz=0;
const char *delim = ":";
char *tok;
char **arr = NULL;
tok = strtok(str,delim);
while (tok != NULL) {
arrsz++;
arr = realloc(arr,sizeof(char *) * (arrsz + 1));
arr[arrsz - 1] = strdup(tok);
tok = strtok(NULL,delim);
}
if (arr == NULL)
arr = malloc(sizeof(*arr));
arr[arrsz] = NULL;
*sizrtn = arrsz;
return arr;
}
To modify an object in the caller's scope you must pass a pointer to the object - so you need one more level of indirection. There is also at least one semantic error in the implementation - assigning '\0' to the pointer returned by malloc(), will both invalidate the pointer and cause a memory leak.
Change split() prototype to:
int split( char* s, char*** arr ) ;
Then call it thus:
arrsz = split( str, &arr ) ;
And change the implementation:
int split( char* str, char*** arr )
{
int arrsz = 0 ;
char delim[2] = ":" ;
char* tok ;
*arr = malloc(sizeof(char**));
*arr[0] = malloc(1);
**arr[0] = '\0'; // <<< This is fixed too
tok = strtok( str, delim ) ;
while( tok != NULL )
{
arrsz++;
*arr = (char **)realloc(*arr,(arrsz*sizeof(char *))+1);
*arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
strcpy(*arr[arrsz-1],tok);
*arr[arrsz]=malloc(1);
*arr[arrsz]='\0';
tok = strtok(NULL,delim);
}
return(arrsz);
}
There may be other errors I have not spotted, but that is fundamental. Best from hereon debugged using a debugger rather then Q&A.
the following proposed code:
cleanly compiles
performs the desired functionality
properly checks for errors from system functions
eliminates any need to use a *** parameter -- google three star programer as to why that is bad
does not include header files those contents are not used
and now, the proposed code:
//#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char ** split(char *str, size_t *arrsz);
int main( void )
{
size_t x;
size_t arrsz;
char str[]="aaa:bbb:ccc";
char **arr=split(str,&arrsz);
for(x=0;x<arrsz;x++)
{
printf("%s\n",arr[x]);
}
exit(0);
}
/***********************************/
char ** split(char *str, size_t *arrsz)
{
char **arr = NULL;
size_t count = 0;
char delim[2] = ":";
char *tok;
tok = strtok(str,delim);
while(tok != NULL)
{
count++;
char **temp = realloc(arr,(count*sizeof(char *)));
if( !temp )
{
perror( "malloc failed" );
// perform cleanup and
free( arr );
exit( EXIT_FAILURE );
}
arr = temp;
arr[count-1] = strdup( tok );
if( !arr[count-1] )
{
perror( "strdup failed" );
// perform cleanup and
free( arr );
exit( EXIT_FAILURE );
}
tok = strtok(NULL,delim);
}
*arrsz = count;
return( arr );
}
OP's code does not return the allocated memory assigned to arr
int split(char *str, char **arr) {
...
// Memory allocated and assigned to local `arr`
// Yet `arr` is not returned.
// Calling code never sees the result of this assignment.
arr = malloc(sizeof(char **));
...
return(arrsz);
}
Instead, I took a whole new approach to mimic split /PATTERN/,EXPR.
I really wanted to avoid all the ** and *** programming.
IMO, a split() should not change the expression so directly using strtok() is out. A common implementation of strtok() effectively does a strspn() and strcspsn(), so coding those directly avoids the strtok().
The below returns a string list type. Various other function signatures could be used, this return type seemed natural for OP's goal. Another solution might return a NULL terminated array of char * pointers.
When memory allocations fails, it is detected and then code calls TBD_Code();. Unclear how OP wants to handle that. Code could print a message and exit or attempt some recovery.
#include <stdlib.h>
#include <string.h>
typedef struct {
size_t n;
char **strings;
} string_list;
string_list split(const char *pattern, const char *expr) {
string_list list = { 0, NULL };
size_t length;
// Find length of initial matching characters
while ((length = strspn(expr, pattern)), expr[length]) {
// Skip leading characters from `expr` that match the pattern
expr += length;
// Find length of characters NOT from the pattern
length = strcspn(expr, pattern);
// Allocate for 1 more pointer
void *tmp = realloc(list.strings, sizeof *(list.strings) * (list.n + 1));
if (tmp == NULL) TBD_Code();
list.strings = tmp;
//Allocate for the token and save it
list.strings[list.n] = malloc(length + 1u);
if (list.strings[list.n] == 0) TBD_Code();
memcpy(list.strings[list.n], expr, length);
list.strings[list.n][length] = '\0';
// Advance
list.n++;
expr += length;
}
return list;
}
void string_list_free(string_list list) {
if (list.strings) {
for (size_t i = 0; i < list.n; i++) {
free(list.strings[i]);
}
free(list.strings);
}
}
Test code
#include <stdio.h>
void print_string_list(string_list list) {
for (size_t i = 0; i < list.n; i++) {
printf("%zu: <%s>\n", i, list.strings[i]);
}
string_list_free(list);
}
int main(void) {
print_string_list(split(":", "aaa:bbb:ccc"));
print_string_list(split(":b", "aaa:bbb:ccc"));
print_string_list(split("a:", "aaa:bbb:ccc"));
print_string_list(split(":c", "aaa:bbb:ccc"));
}
Output
0: <aaa>
1: <bbb>
2: <ccc>
0: <aaa>
1: <ccc>
0: <bbb>
1: <ccc>
0: <aaa>
1: <bbb>

Generic binary search in C

I am having trouble with this code I wrote for a generic binary search.
when trying to execute the search on an array of strings I noticed that the array of strings, passed to binSearch function does not contain the strings.
can someone suggest a hint?
Much appreciation
#define SIZE 100
typedef unsigned char BYTE
please consider this main:
void main()
{
char ** stringArr, stringToFind[SIZE];
int stringSize;
int res;
stringArr = getStringArr(&stringSize);
// string to find
gets(stringToFind);
res = stringBinSearch(stringArr, stringSize, stringToFind);
if (res == 1)
printf("The string %s was found\n", stringToFind);
else
printf("The string %s was not found\n", stringToFind);
}
char** getStringArr(int* stringSize)
{
int i, size, len;
char** arr;
char temp[SIZE];
scanf("%d", &size);
getchar();
arr = (char**)malloc(size * sizeof(char*));
checkAllocation(arr);
for (i = 0; i < size; i++)
{
gets(temp);
len = strlen(temp);
temp[len] = '\0';
arr[i] = (char*)malloc((len+1) * sizeof(char));
checkAllocation(arr[i]);
strcpy(arr[i], temp);
}
*stringSize = size;
return arr;
}
int stringBinSearch(char** stringArr, int stringSize, char* stringToFind)
{
return binSearch(stringArr, stringSize, sizeof(char*), stringToFind,compare2Strings);
}
int binSearch(void* Arr, int size, int ElemSize, void* Item, int(*compare)(void*, void*))
{
int left = 0, right = size - 1, place;
BOOL found = FALSE;
while (found == FALSE && left <= right)
{
place = (left + right) / 2;
if (compare(Item, (BYTE*)Arr + place*ElemSize) == 0)
found = TRUE;
else if (compare(Item, (BYTE*)Arr + place*ElemSize) < 0)
right = place - 1;
else
left = place + 1;
}
return found;
}
int compare2Strings(void* str1, void* str2)
{
char* elemA, *elemB;
elemA = (char*)str1;
elemB = (char*)str2;
return strcmp(elemA, elemB);
}
When you sort an array of int, the values passed are pointer to int, spelled int *. When you sort an array of strings (spelled char *), the values passed are pointer to string, spelled char **. You comparator is no use for comparing strings. As the inimitable BLUEPIXY said in their incredibly terse style — you need to modify the code to treat the passed void * arguments as char ** and not as char *.
With generic sorting, that's usually the end of the issue. With binary search, there's another issue that you run foul of. That is that the type of the item being searched for needs to be the same as the one of the entries in the array, so you need to pass a pointer to the item, not just the item.
So, adding material to allow the code to compile with minimal changes, changing from gets() to a cover for fgets() (because gets() is too dangerous to be used — ever! and programs that use it produce a warning when its used on macOS Sierra 10.12.5 — warning: this program uses gets(), which is unsafe.), and printing out the input data so you can see what's what, I end up with:
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BOOL int
#define TRUE 1
#define FALSE 0
static inline char *sgets(size_t buflen, char *buffer)
{
char *result = fgets(buffer, buflen, stdin);
if (result)
buffer[strcspn(buffer, "\n")] = '\0';
return result;
}
#define checkAllocation(x) assert((x) != 0)
#define SIZE 100
typedef unsigned char BYTE;
char **getStringArr(int *stringSize);
int stringBinSearch(char **stringArr, int stringSize, char *stringToFind);
int binSearch(void *Arr, int size, int ElemSize, void *Item, int (*compare)(void *, void *));
int compare2Strings(void *str1, void *str2);
int main(void)
{
char **stringArr, stringToFind[SIZE];
int stringSize;
int res;
stringArr = getStringArr(&stringSize);
sgets(sizeof(stringToFind), stringToFind);
printf("Strings: %d\n", stringSize);
for (int i = 0; i < stringSize; i++)
printf("[%d] = [%s]\n", i, stringArr[i]);
printf("Search: [%s]\n", stringToFind);
res = stringBinSearch(stringArr, stringSize, stringToFind);
if (res == 1)
printf("The string %s was found\n", stringToFind);
else
printf("The string %s was not found\n", stringToFind);
return 0;
}
char **getStringArr(int *stringSize)
{
int i, size, len;
char **arr;
char temp[SIZE];
scanf("%d", &size);
getchar();
arr = (char **)malloc(size * sizeof(char *));
checkAllocation(arr);
for (i = 0; i < size; i++)
{
sgets(sizeof(temp), temp);
len = strlen(temp);
temp[len] = '\0';
arr[i] = (char *)malloc((len + 1) * sizeof(char));
checkAllocation(arr[i]);
strcpy(arr[i], temp);
}
*stringSize = size;
return arr;
}
int stringBinSearch(char **stringArr, int stringSize, char *stringToFind)
{
return binSearch(stringArr, stringSize, sizeof(char *), &stringToFind, compare2Strings);
}
int binSearch(void *Arr, int size, int ElemSize, void *Item, int (*compare)(void *, void *))
{
int left = 0, right = size - 1, place;
BOOL found = FALSE;
while (found == FALSE && left <= right)
{
place = (left + right) / 2;
if (compare(Item, (BYTE *)Arr + place * ElemSize) == 0)
found = TRUE;
else if (compare(Item, (BYTE *)Arr + place * ElemSize) < 0)
right = place - 1;
else
left = place + 1;
}
return found;
}
int compare2Strings(void *str1, void *str2)
{
char *elemA = *(char **)str1;
char *elemB = *(char **)str2;
return strcmp(elemA, elemB);
}
The key changes are:
compare2Strings() — compare the data in char ** values.
stringBinSearch() — pass the address of stringToFind.
AFAICR, any other change is cosmetic or 'infrastructure'.
Note that the return type of main() should be int — you can get away with void only on Windows where it is allowed.
Example run 1:
Data:
5
Antikythera
albatross
armadillo
pusillanimous
pygmalion
pygmalion
Output:
Strings: 5
[0] = [Antikythera]
[1] = [albatross]
[2] = [armadillo]
[3] = [pusillanimous]
[4] = [pygmalion]
Search: [pygmalion]
The string pygmalion was found
Example run 2:
Data file:
5
armadillo
pygmalion
Antikythera
pusillanimous
albatross
pygmalion
Output:
Strings: 5
[0] = [armadillo]
[1] = [pygmalion]
[2] = [Antikythera]
[3] = [pusillanimous]
[4] = [albatross]
Search: [pygmalion]
The string pygmalion was not found
The difference between the two sets of data is that in the first case, the strings are in correct sorted order — a prerequisite condition for successful (reliable) binary search — and in the second, the data is not in correct sorted order. (That said, I had one non-sorted order that still found 'pygmalion' — I used a different shuffle for the shown results. But the 'reliable' comment applies.)
Hello your problem is the way you send the array of strings to the binary search function. Because you need to pass an array of strings to it your Arr parameter must be void** not void*
int binSearch(void** Arr, int size, int ElemSize, void* Item, int(*compare)(void*, void*))
And in your function whenever you want to acces a string from your array it will be enough to acces it like: (char*) *(Arr+place*ElemSize)
Your approach which is to write a generic binary search is right. However attempting to return early slows down a binary search. It also means you can't use the C++ convention that "less than" is the comparison operator defined. Wait until left and right equal each other, and return that.

How to concatenate 2 strings using malloc and not the library functions

I need to create a function to concatenate 2 strings, in my case they are already given. I will need to concatenate the strings 'hello' and 'world!' to make it into 'helloworld!'. However, I can't use library functions besides strlen(). I also need to use malloc. I understand malloc would create n amounts of bytes for memory, however, how would I make it so that it can return a string array if thats possible.
Here is what I have so far,
#include <stdio.h>
#include <string.h>
int *my_strcat(const char* const str1, const char *const str2)
{
int s1, s2, s3, i = 0;
char *a;
s1 = strlen(str1);
s2 = strlen(str2);
s3 = s1 + s2 + 1;
a = char *malloc(size_t s3);
for(i = 0; i < s1; i++)
a[i] = str1[i];
for(i = 0; i < s2; i++)
a[i+s1] = str2[i];
a[i]='\0';
return a;
}
int main(void)
{
printf("%s\n",my_strcat("Hello","world!"));
return 0;
}
Thanks to anyone who can help me out.
This problem is imo a bit simpler with pointers:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *mystrcat(char *a, char *b) {
char *p, *q, *rtn;
rtn = q = malloc(strlen(a) + strlen(b) + 1);
for (p = a; (*q = *p) != '\0'; ++p, ++q) {}
for (p = b; (*q = *p) != '\0'; ++p, ++q) {}
return rtn;
}
int main(void) {
char *rtn = mystrcat("Hello ", "world!");
printf("Returned: %s\n", rtn);
free(rtn);
return 0;
}
But you can do the same thing with indices:
char *mystrcat(char *a, char *b) {
char *rtn = malloc(strlen(a) + strlen(b) + 1);
int p, q = 0;
for (p = 0; (rtn[q] = a[p]) != '\0'; ++p, ++q) {}
for (p = 0; (rtn[q] = b[p]) != '\0'; ++p, ++q) {}
return rtn;
}
Here is an alternate fix. First, you forgot #include <stdlib.h> for malloc(). You return a pointer to char from the function my_strcat(), so you need to change the function prototype to reflect this. I also changed the const declarations so that the pointers are not const, only the values that they point to:
char * my_strcat(const char *str1, const char *str2);
Your call to malloc() is incorrectly cast, and there is no reason to do so anyway in C. It also looks like you were trying to cast the argument in malloc() to size_t. You can do so, but you have to surround the type identifier with parentheses:
a = malloc((size_t) s3);
Instead, I have changed the type declaration for s1, s2, s3, i to size_t since all of these variables are used in the context of string lengths and array indices.
The loops were the most significant change, and the reason that I changed the consts in the function prototype. Your loops looked fine, but you can also use pointers for this. You step through the strings by incrementing a pointer, incrementing a counter i, and store the value stored there in the ith location of a. At the end, the index i has been incremented to indicate the location one past the last character, and you store a '\0' there. Note that in your original code, the counter i was not incremented to indicate the location of the null terminator of the concatenated string, because you reset it when you looped through str2. #jpw shows one way of solving this problem.
I changed main() just a little. I declared a pointer to char to receive the return value from the function call. That way you can free() the allocated memory when you are through with it.
Here is the modified code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char * my_strcat(const char *str1, const char *str2)
{
size_t s1, s2, s3, i = 0;
char *a;
s1 = strlen(str1);
s2 = strlen(str2);
s3 = s1+s2+1;
a = malloc(s3);
while(*str1 != '\0') {
a[i] = *str1;
str1++;
i++;
}
while(*str2 != '\0') {
a[i] = *str2;
str2++;
i++;
}
a[i] = '\0'; // Here i = s1 + s2
return a;
}
int main(void)
{
char *str = my_strcat("Hello", "world!");
printf("%s\n", str);
/* Always free allocated memory! */
free(str);
return 0;
}
There are a few issues:
In the return from malloc you don't need to do any cast (you had the syntax for the cast wrong anyway) (see this for more information).
You need to include the header stdlib.h for the malloc function.
And most importantly, a[i]='\0'; in this i is not what you need it to be; you want to add the null char at the end which should be a[s3]='\0'; (the length of s1+s2).
This version should be correct (unless I missed something):
#include <stdio.h>
#include <stdlib.h> //for malloc
#include <string.h>
char *my_strcat(const char* const str1, const char *const str2)
{
int s1,s2,s3,i=0;
char *a;
s1 = strlen(str1);
s2 = strlen(str2);
s3 = s1+s2+1;
a = malloc(s3);
for(i = 0; i < s1; i++) {
a[i] = str1[i];
}
for(i = 0; i < s2; i++) {
a[i+s1] = str2[i];
}
a[s3-1] = '\0'; // you need the size of s1 + s2 + 1 here, but - 1 as it is 0-indexed
return a;
}
int main(void)
{
printf("%s\n",my_strcat("Hello","world!"));
return 0;
}
Testing with Ideone renders this output: Helloworld!

How can I find symmetric difference in the words in two strings in C?

For example I have two strings:
lihuayu zhangxuehui sunyunlei guolei fuwenxia
lihuayu lixin fuwenxia zhangxuehui
And I will get
sunyunlei guolei lixin
I wrote following code
#include<stdio.h>
#include<string.h>
#define STRINGSIZE 64
void main()
{
char *line1 = NULL;
char *line2 = NULL;
size_t size1;
size_t size2;
getline(&line1, &size1, stdin);
getline(&line2, &size2, stdin);
char* spilted1 = strtok(line1, " ");
while (spilted1 != NULL){
if (strstr(line2, spilted1) == NULL){
printf("%s", spilted1);
}
spilted1 = strtok(NULL, " ");
}
}
But it's obviously wrong in that I can't get those distinct words in string2.
I know how to do it in Python but have no idea how to do it in C.
Here's one way:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAX_WORDS = 64 };
static int split_words(char *buffer, char **words, int max_words)
{
char *token;
char *next = buffer;
int num_words = 0;
while ((token = strtok(next, " \n")) != 0 && num_words < max_words)
{
words[num_words++] = token;
next = NULL;
}
return num_words;
}
static int word_in_list(char *word, char **list, int list_size)
{
for (int i = 0; i < list_size; i++)
{
if (strcmp(word, list[i]) == 0)
return 1;
}
return 0;
}
/* Print words in list w1 that do not appear in list w2 */
static void print_unique(char **w1, int n1, char **w2, int n2)
{
for (int i = 0; i < n1; i++)
{
if (!word_in_list(w1[i], w2, n2))
printf("[%s]\n", w1[i]);
}
}
int main(void)
{
char *line1 = NULL;
char *line2 = NULL;
size_t size1 = 0;
size_t size2 = 0;
if (getline(&line1, &size1, stdin) > 0 &&
getline(&line2, &size2, stdin) > 0)
{
char *w1[MAX_WORDS];
char *w2[MAX_WORDS];
int n1 = split_words(line1, w1, MAX_WORDS);
int n2 = split_words(line2, w2, MAX_WORDS);
print_unique(w1, n1, w2, n2);
print_unique(w2, n2, w1, n1);
}
free(line1);
free(line2);
return 0;
}
/*
You'll need two
arrays of char pointers, one for each line of input. You'll split the
first line into the first array, and the second line into the second
array. Then you'll go through the two arrays of pointers, comparing
strings and counting only those that do not match any of the entries in
the other array. (What do you do if one input line itself contains
repeats — The Lion, the Witch, and the Wardrobe for example? Also, do
you need to treat The as the same as the in that example?)
You can use strtok_r() or strtok_s() if you have them available; at a
pinch, you could use strtok(), but it is dangerous to use that in
library code. And you'll need to use strcmp() to compare the strings
— plus macros/functions from <ctype.h> to handle case-conversion if
that's appropriate.
Also note that strtok() is destructive. If you've split string 1 with
it, you can't then search in string 1 when you split string 2. Also
note that strstr("then came a deluge", "the") matches, even though most
people would not regard the haystack string as containing the needle
word the.
*/
The algorithm used is quadratic in the number of words (it runs in O(N2) time); it compares each unique word in one list with every word in the other list. You can do things like sort the lists and eliminate duplicates (in O(N.log N) time), and then step through the two lists to find unique words in linear time. Being quadratic won't matter for tens of words, and probably not for hundreds of words, but would probably begin to matter after that.
Compilation:
$ gcc -O3 -g -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
> -Wold-style-definition -Werror uniq_words.c -o uniq_words
$
Example run:
$ cat data
lihuayu zhangxuehui sunyunlei guolei fuwenxia
lihuayu lixin fuwenxia zhangxuehui
$ uniq_words < data
[sunyunlei]
[guolei]
[lixin]
$
The square brackets around the data reassure me that the strings contain what I think they should contain.
Like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **split(const char *str, const char *delimiter, size_t *len);
int cmp(const void *a, const void *b);
void find_diff(char **a1, char **a2);
void drop(char **a);
int main(void){
char *line1 = NULL, *line2 = NULL;
size_t size1 = 0, size2 = 0;
getline(&line1, &size1, stdin);
getline(&line2, &size2, stdin);
//(1)split
size_t len1, len2;
char **array1 = split(line1, " \t\n", &len1);
char **array2 = split(line2, " \t\n", &len2);
//(2)sort
qsort(array1, len1, sizeof(*array1), cmp);
qsort(array2, len2, sizeof(*array2), cmp);
//(3)compare
find_diff(array1, array2);
drop(array1);drop(array2);
free(line1);free(line2);
return 0;
}
char **split(const char *str, const char *delimiter, size_t *len){
char *text, *p, *first, **array, **ret;
size_t c;
*len = 0;
text = strdup(str);//make clone
if(text == NULL) return NULL;
for(c = 0, p = text; p = strtok(p, delimiter); p = NULL)
++c;//count elements
ret = malloc(sizeof(char*)*(c+1));//+1 for NULL
if(ret==NULL){
free(text);
return NULL;
}
strcpy(text, str);//restore
array=ret;
for(p = text; p = strtok(p, delimiter); p = NULL)
*array++ = strdup(p);
*array = NULL;
*len = c;
free(text);
return ret;
}
int cmp(const void *a, const void *b){
return strcmp(*(char **)a, *(char **)b);
}
void find_diff(char **a1, char **a2){//arguments has been sorted
while(*a1 || *a2){
if(*a1 && a1[1] && !strcmp(*a1, a1[1])){
++a1;//distinct
continue;
}
if(*a2 && a2[1] && !strcmp(*a2, a2[1])){
++a2;
continue;
}
if(*a1 == NULL){
puts(*a2++);
} else if(*a2 == NULL){
puts(*a1++);
} else {
int result;
if((result=strcmp(*a1, *a2)) < 0){
puts(*a1++);
} else if(result > 0){
puts(*a2++);
} else {
++a1;
++a2;
}
}
}
}
void drop(char **a){
char **tmp = a;
while(*a)
free(*a++);
free(tmp);
}

Resources