C string append - c

I'm looking for an efficient method for appending multiple strings.
The way it should work is C++ std::string::append or JAVA StringBuffer.append.
I wrote a function which actually reallocs previous source pointer and does strcat.
I believe this is not an efficient method as compiler may implement this free and malloc.
Other way I could think of (like std::vector) is allocate memory in bulk (1KB for eg) and do strcpy. In that case every append call will check if the total required allocation is more than (1200 bytes) the amount allocated in bulk, realloc to 2KB. But in that case there will be some memory wasted.
I'm looking for a balance between the above but the preference is performance.
What other approaches are possible. Please suggest.

I would add each string to a list, and add the length of each new string to a running total. Then, when you're done, allocate space for that total, walk the list and strcpy each string to the newly allocated space.

The classical approach is to double the buffer every time it is too small.
Start out with a "reasonable" buffer, so you don't need to do realloc()s for sizes 1, 2, 4, 8, 16 which are going to be hit by a large number of your strings.
Starting out at 1024 bytes means you will have one realloc() if you hit 2048, a second if you hit 4096, and so on. If rampant memory consumption scares you, cap the growth rate once it hits something suitably big, like 65536 bytes or whatever, it depends on your data and memory tolerance.
Also make sure you buffer the current length, so you can do strcpy() without having to walk the string to find the length, first.

Sample function to concatenate strings
void
addToBuffer(char **content, char *buf) {
int textlen, oldtextlen;
textlen = strlen(buf);
if (*content == NULL)
oldtextlen = 0;
else
oldtextlen = strlen(*content);
*content = (char *) realloc( (void *) *content, (sizeof(char)) * (oldtextlen+textlen+1));
if ( oldtextlen != 0 ) {
strncpy(*content + oldtextlen, buf, textlen + 1);
} else {
strncpy(*content, buf, textlen + 1);
}
}
int main(void) {
char *content = NULL;
addToBuffer(&content, "test");
addToBuffer(&content, "test1");
}

I would do something like this:
typedef struct Stringbuffer {
int capacity; /* Maximum capacity. */
int length; /* Current length (excluding null terminator). */
char* characters; /* Pointer to characters. */
} Stringbuffer;
BOOL StringBuffer_init(Stringbuffer* buffer) {
buffer->capacity = 0;
buffer->length = 0;
buffer->characters = NULL;
}
void StringBuffer_del(Stringbuffer* buffer) {
if (!buffer)
return;
free(buffer->characters);
buffer->capacity = 0;
buffer->length = 0;
buffer->characters = NULL;
}
BOOL StringBuffer_add(Stringbuffer* buffer, char* string) {
int len;
int new_length;
if (!buffer)
return FALSE;
len = string ? strlen(string) : 0;
if (len == 0)
return TRUE;
new_length = buffer->length + len;
if (new_length >= new_capacity) {
int new_capacity;
new_capacity = buffer->capacity;
if (new_capacity == 0)
new_capacity = 16;
while (new_length >= new_capacity)
new_capacity *= 2;
new_characters = (char*)realloc(buffer->characters, new_capacity);
if (!new_characters)
return FALSE;
buffer->capacity = new_capacity;
buffer->characters = new_characters;
}
memmove(buffer->characters + buffer->length, string, len);
buffer->length = new_length;
buffer->characters[buffer->length] = '\0';
return TRUE;
}

Related

Splitting a string and store to the heap algorithm question

For this code below that I was writing. I was wondering, if I want to split the string but still retain the original string is this the best method?
Should the caller provided the ** char or should the function "split" make an additional malloc call and memory manage the ** char?
Also, I was wondering if this is the most optimizing method, or could I optimize the code better than this?
I still have not debug the code yet, I am a bit undecided whether if the caller manage the ** char or the function manage the pointer ** char.
#include <stdio.h>
#include <stdlib.h>
size_t split(const char * restrict string, const char splitChar, char ** restrict parts, const size_t maxParts){
size_t size = 100;
size_t partSize = 0;
size_t len = 0;
size_t newPart = 1;
char * tempMem;
/*
* We just reverse a long page of memory
* At reaching the space character that is the boundary of the new
*/
char * mem = (char*) malloc( sizeof(char) * size );
if ( mem == NULL ) return 0;
for ( size_t i = 0; string[i] != 0; i++ ) {
// If it is a split char we at a new part
if ( string[i] == splitChar) {
// If the last character was not the split character
// Then mem[len] = 0 and increase the len by 1.
if (newPart == 0) mem[len++] = 0;
newPart = 1;
continue;
} else {
// If this is a new part
// and not a split character
// we make a new pointer
if ( newPart == 1 ){
// if reach maxpart we break.
// It is okay here, to not worry about memory
if ( partSize == maxParts ) break;
parts[partSize++] = &mem[len];
newPart = 0;
}
mem[len++] = string[i];
if ( len == size ){
// if ran out of memory realloc.
tempMem = (char*)realloc(mem, sizeof(char) * (size << 1) );
// if fail quit loop
if ( tempMem == NULL ) {
// If we can't get more memory the last part could be corrupted
// We have to return.
// Otherwise the code below can seg.
// There maybe a better way than this.
return partSize--;
}
size = size << 1;
mem = tempMem;
}
}
}
// If we got here and still in a newPart that is fine no need
// an additional character.
if ( newPart != 1 ) mem[len++] = 0;
// realloc to give back the unneed memory
if ( len < size ) {
tempMem = (char*) realloc(mem, sizeof(char) * len );
// If the resizing did not fail but yielded a different
// memory block;
if ( tempMem != NULL && tempMem != mem ){
for ( size_t i = 0; i < partSize; i++ ){
parts[i] = tempMem + (parts[i] - mem);
}
}
}
return partSize;
}
int main(){
char * tStr = "This is a super long string just to test the str str adfasfas something split";
char * parts[10];
size_t len = split(tStr, ' ', parts, 10);
for (size_t i = 0; i < len; i++ ){
printf("%zu: %s\n", i, parts[i]);
}
}
What is "best" is very subjective, as well as use case dependent.
I personally would keep the parameters as input only, define a struct to contain the split result, and probably return such by value. The struct would probably contain pointers to memory allocation, so would also create a helper function free that memory. The parts might be stored as list of strings (copy string data) or index&len pairs for the original string (no string copies needed, but original string needs to remain valid).
But there are dozens of very different ways to do this in C, and all a bit klunky. You need to choose your flavor of klunkiness based on your use case.
About being "more optimized": unless you are coding for a very small embedded device or something, always choose a more robust, clear, easier to use, harder to use wrong over more micro-optimized. The useful kind of optimization turns, for example, O(n^2) to O(n log n). Turning O(3n) to O(2n) of a single function is almost always completely irrelevant (you are not going to do string splitting in a game engine inner rendering loop...).

Is this appender, with realloc function safe?

Just finished putting this function together from some man documentation, it takes a char* and appends a const char* to it, if the size of the char* is too small it reallocates it to something a little bigger and finally appends it. Its been a long time since I used c, so just checking in.
// append with realloc
int append(char *orig_str, const char *append_str) {
int result = 0; // fail by default
// is there enough space to append our data?
int req_space = strlen(orig_str) + strlen(append_str);
if (req_space > strlen(orig_str)) {
// just reallocate enough + 4096
int new_size = req_space;
char *new_str = realloc(orig_str, req_space * sizeof(char));
// resize success..
if(new_str != NULL) {
orig_str = new_str;
result = 1; // success
} else {
// the resize failed..
fprintf(stderr, "Couldn't reallocate memory\n");
}
} else {
result = 1;
}
// finally, append the data
if (result) {
strncat(orig_str, append_str, strlen(append_str));
}
// return 0 if Ok
return result;
}
This is not usable because you never tell the caller where the memory is that you got back from realloc.
You will need to either return a pointer, or pass orig_str by reference.
Also (as pointed out in comments) you need to do realloc(orig_str, req_space + 1); to allow space for the null terminator.
Your code has a some inefficient logic , compare with this fixed version:
bool append(char **p_orig_str, const char *append_str)
{
// no action required if appending an empty string
if ( append_str[0] == 0 )
return true;
size_t orig_len = strlen(*p_orig_str);
size_t req_space = orig_len + strlen(append_str) + 1;
char *new_str = realloc(*p_orig_str, req_space);
// resize success..
if(new_str == NULL)
{
fprintf(stderr, "Couldn't reallocate memory\n");
return false;
}
*p_orig_str = new_str;
strcpy(new_str + orig_len, append_str);
return true;
}
This logic doesn't make any sense:
// is there enough space to append our data?
int req_space = strlen(orig_str) + strlen(append_str);
if (req_space > strlen(orig_str)) {
As long as append_str has non-zero length, you're always going to have to re-allocate.
The main problem is that you're trying to track the size of your buffers with strlen. If your string is NUL-terminated (as it should be), your perceived buffer size is always going to be the exact length of the data in it, ignoring any extra.
If you want to work with buffers like this, you need to track the size in a separate size_t, or keep some sort of descriptor like this:
struct buffer {
void *buf;
size_t alloc_size;
size_t used_amt; /* Omit if strings are NUL-terminated */
}

Incorrect checksum for freed object on malloc

I get an
malloc: *** error for object 0x1001012f8: incorrect checksum for freed object
- object was probably modified after being freed.
*** set a breakpoint in malloc_error_break to debug
error in the following function:
char* substr(const char* source, const char* start, const char* end) {
char *path_start, *path_end, *path;
int path_len, needle_len = strlen(start);
path_start = strcasestr(source, start);
if (path_start != NULL) {
path_start += needle_len;
path_end = strcasestr(path_start, end);
path_len = path_end - path_start;
path = malloc(path_len + 1);
strncpy(path, path_start, path_len);
path[path_len] = '\0';
} else {
path = NULL;
}
return path;
}
How can I make this work? When I rewrite the function to allocate the memory using path[path_len + 1] it works just fine.
Now, the part I don't understand is, that I never even call free in any point of my application, as every allocated memory is needed for the program until it exists (which, AFAIK will invalidate every allocated memory anyway?!)
So, how can a freed object be corrupt if I never free one?
The function is called in this one:
char *read_response(int sock) {
int bytes_read;
char *buf = (char*)malloc(BUF_SIZE);
char *cur_position = buf;
while ((bytes_read = read(sock, cur_position, BUF_SIZE)) > 0) {
cur_position += bytes_read;
buf = realloc(buf, sizeof(buf) + BUF_SIZE);
}
int status = atoi(substr(buf, "HTTP/1.0 ", " "));
There is the realloc, am I using that wrong? I want to read the complete server response, so I have to reallocate after every iteration, don't I?
In read_response, you are probably overwriting the end of the buffer pointed to by buf.
The problem is that buf is a pointer, so sizeof(buf) will return the size of a pointer (probably 4 or 8 depending on your CPU). You are using sizeof as if buf were an array, which is not really the same thing as a pointer in C although they seem interchangeable in some contexts.
Instead of using sizeof, you need to be keeping track of the last size that you allocated for buf, and add BUF_SIZE to that each time you enlarge the buffer.
You should also consider that the read operation may be returning considerably fewer characters than BUF_SIZE on each call, so doing a realloc on buf in each iteration may be overkill. That probably won't cause any problems for you in terms of correctness, though; it will just use more memory than it needs to.
I would do something more like the code below.
#define MIN_BUF_SPACE_THRESHOLD (BUF_SIZE / 2)
char *read_response(int sock) {
int bytes_read;
char *buf = (char*)malloc(BUF_SIZE);
int cur_position = 0;
int space_left = BUF_SIZE;
if (buf == NULL) {
exit(1); /* or try to cope with out-of-memory situation */
}
while ((bytes_read = read(sock, buf + cur_position, space_left)) > 0) {
cur_position += bytes_read;
space_left -= bytes_read;
if (space_left < MIN_BUF_SPACE_THRESHOLD) {
buf = realloc(buf, cur_position + space_left + BUF_SIZE);
if (buf == NULL) {
exit(1); /* or try to cope with out-of-memory situation */
}
space_left += BUF_SIZE;
}
}
This version has the advantage of not trying to allocate more space if the read call comes back with only a few bytes of data.
This line
buf = realloc(buf, sizeof(buf) + BUF_SIZE);
is wrong. All reallocations are with the same size, BUF_SIZE + sizeof(char*). Then you are writing to unallocated memory when reading from the socket, overwriting memory previously freed by a realloc.
You have to keep track of the allocated size,
size_t current_buf_size = BUF_SIZE;
/* ... */
char *temp = realloc(buf, current_buf_size + BUF_SIZE);
if (temp == NULL) {
/* die or repair */
}
buf = temp;

Storing text in a char matrix in C

I want to take a text from the standard input and store it into an array of strings. But I want the array of strings to be dynamic in memory. My code right now is the following:
char** readStandard()
{
int size = 0;
char** textMatrix = (char**)malloc(size);
int index = 0;
char* currentString = (char*)malloc(10); //10 is the maximum char per string
while(fgets(currentString, 10, stdin) > 0)
{
size += 10;
textMatrix = (char**)realloc(textMatrix, size);
textMatrix[index] = currentString;
index++;
}
return textMatrix;
}
The result I have while printing is the last string read in all positions of the array.
Example
Reading:
hello
nice
to
meet
you
Printing:
you
you
you
you
you
Why? I've searched over the Internet. But I didn't find this kind of error.
You are storing the same address (currentString) over and over. Try something like
while(fgets(currentString, 10, stdin) > 0)
{
textMatrix[index] = strdup(currentString); /* Make copy, assign that. */
}
The function strdup is not standard (just widely available). It should be easy to implement it yourself with malloc + memcpy.
currentString always point to the same memory area and all the pointers in textMatrix will point to it
char** readStandard()
{
int size = 0;
char** textMatrix = (char**)malloc(size);
int index = 0;
char currentString[10];
while(fgets(currentString, 10, stdin) > 0)
{
size += sizeof(char*);
textMatrix = (char**)realloc(textMatrix, size);
textMatrix[index] = strdup(currentString);
index++;
}
return textMatrix;
}

Problems with 'Heap Buffer' Error in C

I get the following error in my C program:
Writing to heap after end of help buffer
Can you tell me what I'm missing?
char * path_delimiter(char * path)
{
int i = 0, index = 0, size = 0, length = (int)strlen(path);
char *tmp, *ans;
for(; i < length; i++) {
if(path[i] == PATH_DELIM[0]) {
break;
}
}
i++;
size = (int)strlen(path) - i;
ans = (char*)malloc(sizeof(path));
tmp = (char*)malloc(size);
strcpy(ans,path);
ans[i-1] = END_ARRAY;
if(size > 0)
{
strcpy(tmp,&path[i]);
realloc(path,size);
strcpy(path,tmp);
}
else
{
strcpy(path,ans);
}
free(tmp);
return ans;
}
This ...
sizeof(path)
... is the same as ...
sizeof(char *)
... which is the size of the pointer (not the size of the buffer which it's pointing to), so it's probably about 4.
So this ...
ans= (char*)malloc(sizeof(path));
... is a 4-byte buffer, and so this ...
strcpy(ans,path);
... is overwriting (writing past the end of) that buffer.
Instead of ...
malloc(sizeof(path));
... I think you want ...
malloc(strlen(path)+1);
You are not checking if malloc and realloc succeeded. More importantly, realloc may return a different handle which you are discarding.
Further, you have:
ans = malloc(sizeof(path));
...
strcpy(ans, path);
On the most common platform today, sizeof(path) is most likely 4 or maybe 8, regardless of the length of the character array path points to.
You normally need size = strlen(xxx) + 1; to allow for the null terminator on the string.
In this case, I think you need:
size = strlen(path) - i + 1;

Resources