Possible Buffer Overrun - c

I'm having an infuriating issue here where I'm crashing on malloc/calloc/strdup and I'm assuming currently that it's because of a buffer over run somewhere.
I'm finding this very difficult to find and I was wondering if any of you can offer me a hand. I'll post code snippets here, and link to full source.
File reading and array operations: (common.c)
Pastebin
char * S6_ReadFileBytes(const char* path)
FILE * file;
long length;
char * bytes = NULL;
file = fopen(path, "r");
fseek(file, 0, SEEK_END)
length = ftell(file);
fseek(file, 0, 0);
bytes = (char*)calloc(1, (size_t)length + 1);
fread(bytes, 1, (size_t)length, file);
return bytes;
S6_Array * S6_ArrayNew(size_t count, size_t typeSize)
S6_Array * a = (S6_Array*)malloc(sizeof(S6_Array));
a->typeSize = typeSize;
a->Length = count;
void * S6_ArrayGet(S6_Array * a, int idx)
return &((char*)a->Data)[idx * a->typeSize];
void S6_ArraySet(S6_Array * a, int idx, void * val)
memcpy(&((char*)a->Data)[idx * a->typeSize], val, a->typeSize);
void S6_ArrayGrow(S6_Array * a, int amount)
void * data;
data = realloc(a->Data, (a->Length + amount) * a->typeSize);
a->Data = data;
a->Length += amount;
void S6_ArrayPushBack(S6_Array * a, void* val)
S6_ArrayGrow(a, 1);
S6_ArraySet(a, a->Length - 1, val);
CSV Reading: (CSV.c)
Pastebin
void S6_CSV_PushRect(S6_Array ** rectangles, S6_Rectangle * rect)
if( !*rectangles )
*rectangles = S6_ArrayNew(1, sizeof(S6_Rectangle*));
S6_ArraySet(*rectangles, 0, &rect);
else
S6_ArrayPushBack(*rectangles, &rect);
int S6_CSV_ReadRects(const char* file, S6_Array ** rectangles)
char * bytes = S6_ReadFileBytes(file);
char * line;
char * nameIndex;
size_t nameLength;
S6_Rectangle * tempRect;
line = strtok( bytes , "\n");
while( line )
nameIndex = strstr(line, ",");
tempRect = (S6_Rectangle*)calloc(1, sizeof(S6_Rectangle));
nameLength = (size_t)(nameIndex - line) + 1;
strncpy(tempRect->name, line, nameLength-1);
tempRect->name[nameLength-1] = '\0';
sscanf(nameIndex, "%*[,]%d%*[,]%d%*[,]%d%*[,]%d", &tempRect->x, &tempRect->y, &tempRect->w, &tempRect->h)
S6_CSV_PushRect(rectangles , tempRect);
strtok(NULL, "\n");
free(bytes);
A function where I modify the array: (BinPacker.c)
Pastebin
int S6_BinPacker_Pack(S6_Array * rectangles, int binSize)
// This sort appears to be working fine. View pastebin for test.
qsort(rectangles->Data, rectangles->Length, sizeof(S6_Rectangle*), S6_BinPacker_CompareRects);
CSV Writing [CRASH]
: (CSV.c)
Pastebin
void S6_CSV_WriteRects(const char* file, S6_Array * rectangles)
char * bytes = NULL;
char buffer[128];
S6_Rectangle * tempRect;
size_t i;
for( i = 0; i < rectangles->Length; ++i)
tempRect = *(S6_Rectangle**)S6_ArrayGet(rectangles, i);
memset(buffer, '\0', sizeof(buffer));
sprintf(buffer,
"%s,%d,%d,%d,%d\n",
tempRect->name,
temprect->x,
temprect->y,
temprect->w,
temprect->h);
if( bytes )
bytes = strcat(bytes, _strdup(buffer));
else
bytes = _strdup(buffer);
So I'm crashing here on the strcat(bytes, _strdup(buffer)) line. When I separate it out It's still the string duplication or any sort of allocation I've tried.
I get the following break dialog from visual studio:
Windows has triggered a breakpoint in myapp.exe.
This may be due to a corruption of the heap, which indicates a bug in Slant6.Debug.exe or any of the DLLs it has loaded.
This may also be due to the user pressing F12 while Slant6.Debug.exe has focus.
The output window may have more diagnostic information.
And the break point it triggers is in tidtable.c on
PFLS_GETVALUE_FUNCTION flsGetValue = FLS_GETVALUE;
SOLUTION
strdup doesn't do any allocations, and even if it did I would be leaking like crazy. So instead of:
bytes = strcat(bytes, _strdup(buffer));
in CSV.c, I replaced it with some manual string concatenation that's easier for me to read (and remember).
size_t oldSize = strlen(bytes);
size_t bufferSize = strlen(buffer);
size_t newSize = oldSize + bufferSize ;
char * newMem = (char*)calloc(newSize + 1, 1);
memcpy(newMem, bytes, newSize);
memcpy(&newMem[oldSize], buffer, bufferSize);
free(bytes);
bytes = newMem;
/SOLUTION

I'm thinking that this line:
bytes = strcat(bytes, _strdup(buffer));
Does not do what you think it does.
You are making a copy of a string (buffer), and then concatenating that onto bytes. The duplicated string is never freed and
bytes is only as big as the last _strdup, thus doing a strcat will overflow the buffer.
You need to allocate (or reallocate) strlen(bytes) + strlen(buffer), etc. etc. for the strcat.

Related

Segmentation fault happened in Linux but works in Mac

I have a c code which wrote in my Mac laptop in Xcode but it didn't work in Linux system.
I run this code by two ways:
1.One is run in Eclipse but the while loop didn't look like finish. Please find the message below:
Please wait while calculating...
But not more message in the console. It looks like while loop can't finish by some reason.
2.The second way is that I complier the code directly in Linux environment by the command:
cc -std=c99 main.c -o main
Then run by the command:
./main
The message shows that:
Please wait while calculating... Segmentation fault (core dumped)
I checked by gdb
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff7a9bd4a in ?? () from /lib/x86_64-linux-gnu/libc.so.6
My data is saved in:
/home/alan_yu/workspace/scandi.csv
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
char **split(char *line, char sep, int fields) {
char **r = (char **)malloc(fields * sizeof(char*));
int lptr = 0, fptr = 0;
r[fptr++] = line;
while (line[lptr]) {
if (line[lptr] == sep) {
line[lptr] = '\0';
r[fptr] = &(line[lptr+1]);
fptr++;
}
lptr++;
}
return r;
}
int cmpfunc (const void * a, const void * b)
{
return *(double *)a > *(double *)b ? 1 : -1;
}
#define LINE_SIZE 1000000
#define EXPECTED_STOCK_SIZE 10000000
void calculate2(char * fileName) {
printf("Please wait while calculating...\n");
// Open the file for reading.
FILE *file = fopen(fileName, "r");
// maximun size of the line to read.
// memory allocation for the line to read.
char* line = malloc(LINE_SIZE);
// char **stockNameArray = malloc( sizeof(char *) * EXPECTED_STOCK_SIZE);
// int stockNameArrayPos = 0;
double *bidArray = malloc( sizeof(double) * EXPECTED_STOCK_SIZE );
int bidArrayPos = 0;
double *askArray = malloc( sizeof(double) * EXPECTED_STOCK_SIZE);
int askArrayPos = 0;
double *spreadArray = malloc( sizeof(double) * EXPECTED_STOCK_SIZE);
int spreadArrayPos = 0;
double sum=0;
int i=0,j=0;
while (fgets(line, LINE_SIZE, file)!= NULL){
// printf("Please wait while ...%d\n ", j);
j++;
char **fields = split(line, ',', 15);
const char * volvbEquity = "VOLVB SS Equity";
int comp = strcmp(fields[0], volvbEquity);
if (comp == 0) {
double bidValue = atof(fields[2]);
double askValue = atof(fields[3]);
bidArray[bidArrayPos++] = bidValue;
askArray[askArrayPos++] = askValue;
if (askValue - bidValue > 0) {
double spreadValue = ((askValue - bidValue) / (askValue + bidValue) * 20000);
spreadArray[spreadArrayPos++] = spreadValue;
sum = sum + spreadValue;
}
}
}
//quick sort the spread
qsort(spreadArray, spreadArrayPos, sizeof(double), cmpfunc);
int mediumPos;
double mean;
double medium;
if(spreadArrayPos % 2 == 0) {
mediumPos = spreadArrayPos / 2;
medium = (spreadArray[mediumPos] + spreadArray[mediumPos+1]) / 2;
} else {
mediumPos = (spreadArrayPos)/2 + 1;
medium = spreadArray[mediumPos];
}
mean = sum / spreadArrayPos;
printf("Please find mean and medium %f %f\n", mean, medium);
free(bidArray);
free(askArray);
free(spreadArray);
}
int main(int argc, char **argv) {
calculate2("/home/alan_yu/workspace/scandi.csv");
return(0);
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
void split(char *line, char sep, char **fields) {
int lptr = 0, fptr = 0;
fields[fptr++] = line;
while (line[lptr]) {
if (line[lptr] == sep) {
line[lptr] = '\0';
fields[fptr] = &(line[lptr+1]);
fptr++;
}
lptr++;
}
}
int cmpfunc (const void * a, const void * b) {
return *(double *)a > *(double *)b ? 1 : -1;
}
#define LINE_SIZE 1000000
#define EXPECTED_STOCK_SIZE 10000000
#define COLUMN_NUM 15
void calculate2(char * fileName) {
printf("Please wait while calculating...\n");
// Open the file for reading.
FILE *file = fopen(fileName, "r");
// maximun size of the line to read.
// memory allocation for the line to read.
char* line = malloc(LINE_SIZE);
// char **stockNameArray = malloc( sizeof(char *) * EXPECTED_STOCK_SIZE);
// int stockNameArrayPos = 0;
double *bidArray = malloc( sizeof(double) * EXPECTED_STOCK_SIZE );
int bidArrayPos = 0;
double *askArray = malloc( sizeof(double) * EXPECTED_STOCK_SIZE);
int askArrayPos = 0;
double *spreadArray = malloc( sizeof(double) * EXPECTED_STOCK_SIZE);
int spreadArrayPos = 0;
double sum=0;
int i=0,j=0;
while (fgets(line, LINE_SIZE, file)!= NULL){
// printf("Please wait while ...%d\n ", j);
j++;
char **fields = malloc(sizeof(char *) * COLUMN_NUM);
split(line, ',', fields);
const char * volvbEquity = "VOLVB SS Equity";
int comp = strcmp(fields[0], volvbEquity);
if (comp == 0) {
double bidValue = atof(fields[2]);
double askValue = atof(fields[3]);
bidArray[bidArrayPos++] = bidValue;
askArray[askArrayPos++] = askValue;
if (askValue - bidValue > 0) {
double spreadValue = ((askValue - bidValue) / (askValue + bidValue) * 20000);
spreadArray[spreadArrayPos++] = spreadValue;
sum = sum + spreadValue;
}
}
// free memory for fields.
free(fields);
}
// free memory for the line variable.
free(line);
//quick sort the spread
qsort(spreadArray, spreadArrayPos, sizeof(double), cmpfunc);
int mediumPos;
double mean;
double medium;
if(spreadArrayPos % 2 == 0) {
mediumPos = spreadArrayPos / 2;
medium = (spreadArray[mediumPos] + spreadArray[mediumPos+1]) / 2;
} else {
mediumPos = (spreadArrayPos)/2 + 1;
medium = spreadArray[mediumPos];
}
mean = sum / spreadArrayPos;
printf("Please find mean and medium %f %f\n", mean, medium);
free(bidArray);
free(askArray);
free(spreadArray);
}
int main(int argc, char **argv) {
calculate2("/home/alan_yu/workspace/scandi.csv");
return(0);
}
On Linux, always run the program in valgrind if it is crashing.
It will not only tell you exactly what is wrong in your code, but also specify what code lines are responsible for the error.
You need to check the return value of fopen! I would suggest doing the same thing for malloc, but it's much less likely that malloc is failing due to a missing file or typographical error particularly if you're allocating large chunks! You wouldn't want to dereference a null pointer, right?
I'm assuming each line has at least four comma-separated fields, because you're using fields[3]. You should probably work out how to guard against using uninitialised values here. I'd start by re-engineering split so that you have some terminal NULL value or something in its output (and while we're on that topic, don't forget to free the return value).
Is it possible that you might be dividing by zero? That'd be something else you need to guard against.
Shouldn't cmpfunc return 0 when items are equal? I've seen implementations crash when return values for comparison functions for qsort and bsearch are inconsistent.
You claimed below in a comment that your lines have fifteen commas. This implies that you have sixteen fields (count them below), since the number of fields is n+1 where n is the number of separators.
field1, field2, field3, field4,
field4, field6, field7, field8,
field9, field10,field11,field12,
field13,field14,field15,field16
There are fifteen commas and sixteen fields in this table. You're only allocating enough for fifteen fields, however. This is a buffer overflow, more typical undefined behaviour.
Finally, I find out the problem comes from the
while (fgets(line, LINE_SIZE, file)!= NULL){
char **fields = split(line, ',', 15);
I have changed it to
char **fields = malloc(sizeof(char *) * 15 * 10000);
while (fgets(line, LINE_SIZE, file)!= NULL){
I haven't try to allocate a large memory to **fields after the while loop due to it takes too much memory to my machine.
It looks like under gcc compile that if I do:
while (fgets(line, LINE_SIZE, file)!= NULL){
char **fields = split(line, ',', 15);
It won't overwrite the **fields from last time. But it works in Mac
Not sure is that correct?
In the end, thanks for all of you guys help for my problem.

Dynamic array inside struct and malloc fail

Consider the following abstracted code that reads some bytes from a file:
typedef struct A{
int size;
char * dataArray;
}A
A load(char* filename, int inSize)
{
A newA;
newA.size = inSize;
FILE *filePtr;
filePtr = fopen(filename,"rb");
char buff[1];
int i = 0;
newA.dataArray = ( char*)malloc(sizeof(char) * newA.size);
for (i = 0; i < newA.size; i++)
{
fread(buff, sizeof(char), 1, filePtr);
newA.dataArray[i] = buff[0];
}
char* copyOfDataArray = (char*)malloc(sizeof(char) * newA.size);
for (i = 0; i < newA.size; i++)
{
fread(buff, sizeof(char), 1, filePtr);
copyOfDataArray[i] = newA.dataArray[i];
}
newA.dataArray = copyOfDataArray;
return newA
}
void Initialize()
{
A first = load("file1", 100);
A second = load("file2", 20);
}
Both calls to function load return the expected result (data array has the same bytes as the file). Variables first and second are never used again.
However after a couple of hundreds lines of code the program always crashes with:
*malloc.c:2451: sYSMALLOC: Assertion '(old_top == (..... failed.*
The crash always occurs on the same line of code, but that line has nothing to do with variables first, second or even with struct A whatsoever.
My question is: is my way of instancing and loading 'first' and 'second' wrong? Can it cause some kind of memory leak / memory overflow that crashes the program long after the load function has finished?
Bonus: The crash does not occur if I only load "file1", as soon as i load both "file1" and "file2" the crash reappears.
Sorry for the long question.
You have memory leaks there. You have to free the previously allocated memory in newA.dataArray, before you assign there a new memory.
As stated by Joachim, read operation is very time consuming and you shall read data in blocks to minimize overhead.
Additionally, you have to close file descriptors, otherwise they will be depleted soon.
There are many issue on the code as already given by others.
Please checks bellow
typedef struct A{
int size;
char * dataArray;
}A
A load(char* filename, int inSize)
{
A newA;
newA.size = inSize;
FILE *filePtr = NULL ; //Use NULL
char buff[1]; //Size of buffer is only 1 ,If needed increase that to copy more at a time
int i = 0;
filePtr = fopen(filename,"rb");
//Try to check for the filePtr == NULL or not
newA.dataArray = ( char*)malloc(sizeof(char) * newA.size);
//Same checking should be done here
for (i = 0; i < size; i++) //What is size
{
fread(buff, sizeof(char), 1, filePtr);
newA.dataArray[i] = char[0]; //What is char[0]
}
//instead this you can read the bytes in a single call, use that.
// fread(buff, sizeof(char), <size to read >, filePtr);
char* copyOfDataArray = (char*)malloc(sizeof(char) * newA.size);
for (i = 0; i < size; i++)
{
fread(buff, sizeof(char), 1, filePtr);
copyOfDataArray[i] = newA.dataArray[i];
}
//why reading again once you done above.
newA.dataArray = copyOfDataArray;
return newA; //Please check: How you can return a auto variable.
}
void Initialize()
{
A first = load("file1", 100);
A second = load("file2", 20);
}

Memory comparison causes system halt

I am working on a kernel module and I need to compare two buffers to find out if they are equivalent. I am using the memcmp function defined in the Linux kernel to do so. My first buffer is like this:
cache_buffer = (unsigned char *)vmalloc(4097);
cache_buffer[4096] = '/0';
The second buffer is from a page using the page_address() function.
page = bio_page(bio);
kmap(page);
write_buffer = (char *)page_address(page);
kunmap(page);
I have printed the contents of both buffers before hand and not only to they print correctly, but they also have the same content. So next, I do this:
result = memcmp(write_buffer, cache_buffer, 2048); // only comparing up to 2048 positions
This causes the kernel to freeze up and I cannot figure out why. I checked the implementation of memcmp and saw nothing that would cause the freeze. Can anyone suggest a cause?
Here is the memcmp implementation:
int memcmp(const void *cs, const void *ct, size_t count)
{
const unsigned char *su1, *su2;
int res = 0;
for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
if ((res = *su1 - *su2) != 0)
break;
return res;
}
EDIT: The function causing the freeze is memcmp. When I commented it out, everything worked. Also, when I did I memcmp as follows
memcmp(write_buffer, write_buffer, 2048); //comparing two write_buffers
Everything worked as well. Only when I throw the cache_buffer into the mix is when I get the error. Also, above is a simplification of my actual code. Here is the entire function:
static int compare_data(sector_t location, struct bio * bio, struct cache_c * dmc)
{
struct dm_io_region where;
unsigned long bits;
int segno;
struct bio_vec * bvec;
struct page * page;
unsigned char * cache_data;
char * temp_data;
char * write_data;
int result, length, i;
cache_data = (unsigned char *)vmalloc((dmc->block_size * 512) + 1);
where.bdev = dmc->cache_dev->bdev;
where.count = dmc->block_size;
where.sector = location << dmc->block_shift;
printk(KERN_DEBUG "place: %llu\n", where.sector);
dm_io_sync_vm(1, &where, READ, cache_data, &bits, dmc);
length = 0;
bio_for_each_segment(bvec, bio, segno)
{
if(segno == 0)
{
page = bio_page(bio);
kmap(page);
write_data = (char *)page_address(page);
//kunmap(page);
length += bvec->bv_len;
}
else
{
page = bio_page(bio);
kmap(page);
temp_data = strcat(write_data, (char *)page_address(page));
//kunmap(page);
write_data = temp_data;
length += bvec->bv_len;
}
}
printk(KERN_INFO "length: %u\n", length);
cache_data[dmc->block_size * 512] = '\0';
for(i = 0; i < 2048; i++)
{
printk("%c", write_data[i]);
}
printk("\n");
for(i = 0; i < 2048; i++)
{
printk("%c", cache_data[i]);
}
printk("\n");
result = memcmp(write_data, cache_data, length);
return result;
}
EDIT #2: Sorry guys. The problem was not memcmp. It was the result of memcmp. When ever it returned a positive or negative number, the function that called my function would play with some pointers, one of which was uninitialized. I don't know why I didn't realize it before. Thanks for trying to help though!
I'm no kernel expert, but I would assume you need to keep this memory mapped while doing the comparison? In other words, don't call kunmap until after the memcmp is complete. I would presume that calling it before will result in write_buffer pointing to a page which is no longer mapped.
Taking your code in the other question, here is a rough attempt at incremental. Still needs some cleanup, I'm sure:
static int compare_data(sector_t location, struct bio * bio, struct cache_c * dmc)
{
struct dm_io_region where;
unsigned long bits;
int segno;
struct bio_vec * bvec;
struct page * page;
unsigned char * cache_data;
char * temp_data;
char * write_data;
int length, i;
int result = 0;
size_t position = 0;
size_t max_size = (dmc->block_size * 512) + 1;
cache_data = (unsigned char *)vmalloc(max_size);
where.bdev = dmc->cache_dev->bdev;
where.count = dmc->block_size;
where.sector = location << dmc->block_shift;
printk(KERN_DEBUG "place: %llu\n", where.sector);
dm_io_sync_vm(1, &where, READ, cache_data, &bits, dmc);
bio_for_each_segment(bvec, bio, segno)
{
// Map the page into memory
page = bio_page(bio);
write_data = (char *)kmap(page);
length = bvec->bv_len;
// Make sure we don't go past the end
if(position >= max_size)
break;
if(position + length > max_size)
length = max_size - position;
// Compare the data
result = memcmp(write_data, cache_data + position, length);
position += length;
kunmap(page);
// If the memory is not equal, bail out now and return the result
if(result != 0)
break;
}
cache_data[dmc->block_size * 512] = '\0';
return result;
}

Incorrect checksum for freed object on malloc

I get an
malloc: *** error for object 0x1001012f8: incorrect checksum for freed object
- object was probably modified after being freed.
*** set a breakpoint in malloc_error_break to debug
error in the following function:
char* substr(const char* source, const char* start, const char* end) {
char *path_start, *path_end, *path;
int path_len, needle_len = strlen(start);
path_start = strcasestr(source, start);
if (path_start != NULL) {
path_start += needle_len;
path_end = strcasestr(path_start, end);
path_len = path_end - path_start;
path = malloc(path_len + 1);
strncpy(path, path_start, path_len);
path[path_len] = '\0';
} else {
path = NULL;
}
return path;
}
How can I make this work? When I rewrite the function to allocate the memory using path[path_len + 1] it works just fine.
Now, the part I don't understand is, that I never even call free in any point of my application, as every allocated memory is needed for the program until it exists (which, AFAIK will invalidate every allocated memory anyway?!)
So, how can a freed object be corrupt if I never free one?
The function is called in this one:
char *read_response(int sock) {
int bytes_read;
char *buf = (char*)malloc(BUF_SIZE);
char *cur_position = buf;
while ((bytes_read = read(sock, cur_position, BUF_SIZE)) > 0) {
cur_position += bytes_read;
buf = realloc(buf, sizeof(buf) + BUF_SIZE);
}
int status = atoi(substr(buf, "HTTP/1.0 ", " "));
There is the realloc, am I using that wrong? I want to read the complete server response, so I have to reallocate after every iteration, don't I?
In read_response, you are probably overwriting the end of the buffer pointed to by buf.
The problem is that buf is a pointer, so sizeof(buf) will return the size of a pointer (probably 4 or 8 depending on your CPU). You are using sizeof as if buf were an array, which is not really the same thing as a pointer in C although they seem interchangeable in some contexts.
Instead of using sizeof, you need to be keeping track of the last size that you allocated for buf, and add BUF_SIZE to that each time you enlarge the buffer.
You should also consider that the read operation may be returning considerably fewer characters than BUF_SIZE on each call, so doing a realloc on buf in each iteration may be overkill. That probably won't cause any problems for you in terms of correctness, though; it will just use more memory than it needs to.
I would do something more like the code below.
#define MIN_BUF_SPACE_THRESHOLD (BUF_SIZE / 2)
char *read_response(int sock) {
int bytes_read;
char *buf = (char*)malloc(BUF_SIZE);
int cur_position = 0;
int space_left = BUF_SIZE;
if (buf == NULL) {
exit(1); /* or try to cope with out-of-memory situation */
}
while ((bytes_read = read(sock, buf + cur_position, space_left)) > 0) {
cur_position += bytes_read;
space_left -= bytes_read;
if (space_left < MIN_BUF_SPACE_THRESHOLD) {
buf = realloc(buf, cur_position + space_left + BUF_SIZE);
if (buf == NULL) {
exit(1); /* or try to cope with out-of-memory situation */
}
space_left += BUF_SIZE;
}
}
This version has the advantage of not trying to allocate more space if the read call comes back with only a few bytes of data.
This line
buf = realloc(buf, sizeof(buf) + BUF_SIZE);
is wrong. All reallocations are with the same size, BUF_SIZE + sizeof(char*). Then you are writing to unallocated memory when reading from the socket, overwriting memory previously freed by a realloc.
You have to keep track of the allocated size,
size_t current_buf_size = BUF_SIZE;
/* ... */
char *temp = realloc(buf, current_buf_size + BUF_SIZE);
if (temp == NULL) {
/* die or repair */
}
buf = temp;

C string append

I'm looking for an efficient method for appending multiple strings.
The way it should work is C++ std::string::append or JAVA StringBuffer.append.
I wrote a function which actually reallocs previous source pointer and does strcat.
I believe this is not an efficient method as compiler may implement this free and malloc.
Other way I could think of (like std::vector) is allocate memory in bulk (1KB for eg) and do strcpy. In that case every append call will check if the total required allocation is more than (1200 bytes) the amount allocated in bulk, realloc to 2KB. But in that case there will be some memory wasted.
I'm looking for a balance between the above but the preference is performance.
What other approaches are possible. Please suggest.
I would add each string to a list, and add the length of each new string to a running total. Then, when you're done, allocate space for that total, walk the list and strcpy each string to the newly allocated space.
The classical approach is to double the buffer every time it is too small.
Start out with a "reasonable" buffer, so you don't need to do realloc()s for sizes 1, 2, 4, 8, 16 which are going to be hit by a large number of your strings.
Starting out at 1024 bytes means you will have one realloc() if you hit 2048, a second if you hit 4096, and so on. If rampant memory consumption scares you, cap the growth rate once it hits something suitably big, like 65536 bytes or whatever, it depends on your data and memory tolerance.
Also make sure you buffer the current length, so you can do strcpy() without having to walk the string to find the length, first.
Sample function to concatenate strings
void
addToBuffer(char **content, char *buf) {
int textlen, oldtextlen;
textlen = strlen(buf);
if (*content == NULL)
oldtextlen = 0;
else
oldtextlen = strlen(*content);
*content = (char *) realloc( (void *) *content, (sizeof(char)) * (oldtextlen+textlen+1));
if ( oldtextlen != 0 ) {
strncpy(*content + oldtextlen, buf, textlen + 1);
} else {
strncpy(*content, buf, textlen + 1);
}
}
int main(void) {
char *content = NULL;
addToBuffer(&content, "test");
addToBuffer(&content, "test1");
}
I would do something like this:
typedef struct Stringbuffer {
int capacity; /* Maximum capacity. */
int length; /* Current length (excluding null terminator). */
char* characters; /* Pointer to characters. */
} Stringbuffer;
BOOL StringBuffer_init(Stringbuffer* buffer) {
buffer->capacity = 0;
buffer->length = 0;
buffer->characters = NULL;
}
void StringBuffer_del(Stringbuffer* buffer) {
if (!buffer)
return;
free(buffer->characters);
buffer->capacity = 0;
buffer->length = 0;
buffer->characters = NULL;
}
BOOL StringBuffer_add(Stringbuffer* buffer, char* string) {
int len;
int new_length;
if (!buffer)
return FALSE;
len = string ? strlen(string) : 0;
if (len == 0)
return TRUE;
new_length = buffer->length + len;
if (new_length >= new_capacity) {
int new_capacity;
new_capacity = buffer->capacity;
if (new_capacity == 0)
new_capacity = 16;
while (new_length >= new_capacity)
new_capacity *= 2;
new_characters = (char*)realloc(buffer->characters, new_capacity);
if (!new_characters)
return FALSE;
buffer->capacity = new_capacity;
buffer->characters = new_characters;
}
memmove(buffer->characters + buffer->length, string, len);
buffer->length = new_length;
buffer->characters[buffer->length] = '\0';
return TRUE;
}

Resources