malloc()/free() in a loop - c

I have an assignment for a class where I have a text file, bikes.txt. The contents are attributes of bikes like so:
bike_id=16415
bike_station_id=455
bike_status=free
bike_id=6541
bike_station_id=1
bike_status=reserved
bike_id=5
bike_station_id=6451
bike_status=reserved
Right now I'm trying to read all bike_id's, and I have a question:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* getAttribute(const char* attr, const char* line);
int main(int argc, char** argv)
{
FILE* file = fopen("bikes.txt", "r");
if (file != NULL)
{
char line[256];
while (fgets(line, sizeof(line), file))
{
if (strstr(line, "bike_id=") != NULL)
{
char* bikeIdText = getAttribute("bike_id", line);
printf("\"%s\"", bikeIdText);
//free(bikeIdText);
//bikeIdText = NULL;
}
}
}
}
char* getAttribute(const char* attr, const char* line)
{
int lineLength = strlen(line);
int attrLength = strlen(attr);
// +1 because of "="
char* attrText = malloc(lineLength - attrLength + 1);
// +2 because of "=" and NEWLINE
memcpy(attrText, line + attrLength + 1, lineLength - (attrLength + 2));
return attrText;
}
The above code works. The output is:
"16415""6541""5"
The problem is that - if I'm right - the getAttribute() function will allocate more and more memory which won't get freed.
However, if I uncomment the free(bikeIdText); and bikeIdText = NULL; lines in main(), the output shows that the same memory location is used, because the longer values won't get overwritten by shorter ones.
The output in this case:
"16415""65415""55415"
How could I solve the problem?

This
char* attrText = malloc(lineLength - attrLength + 1);
shall be
char * attrText = malloc(lineLength - (attrLength + 1));
attrText[lineLength - (attrLength + 1) - 1] = '\0' ;
or the equivalent
char * attrText = malloc(lineLength - attrLength - 1);
attrText[lineLength - attrLength - 2] = '\0' ;
This assumes line to end with one addtional character.

Last character of the string is not set to '\0' therefore, the "%s" prints more than it should (%s stops output at \0 byte).
Try to malloc one byte more char* attrText = malloc(lineLength - attrLength + 2) and set, attrText[lineLength - attrLength + 1] = '\0' before return.

Related

Getting error when trying release heap-allocated memory

I am working on a program that reads text, line by line from input file. Once the line is read, the program reverses order of words in that string, prints it to the output file and starts reading next line. My program reads only specific number of characters from one line, meaning that if line contains more characters then that specific number, all of them have to skipped until next line is reached. My program seems to work fine.
One of the task requirements is to use dynamically allocated arrays. That is the part where my main problem lies. Once I try to free heap-allocated memory, the program fails with error message that says: HEAP CORRUPTION DETECTED. It must be that I messed up something while working with them. However, I am unable to find the real reason.
#include <stdio.h>
#include <stdlib.h>
#define BUFFER_SIZE 255
int readLine(FILE** stream, char** buffer, int* bufferSize);
void reverseString(char* buffer, char** reverse, int bufferSize, int lastLine);
int main(int argc, char** argv)
{
char* buffer = NULL;
char* reverse = NULL;
int bufferSize = 0;
int lastLine = 0;
FILE* intputStream = fopen(argv[1], "r");
FILE* outputStream = fopen(argv[2], "w");
if (intputStream == NULL || outputStream == NULL)
{
printf("Input or output file cannot be opened\n");
return 0;
}
while (!feof(intputStream))
{
lastLine = readLine(&intputStream, &buffer, &bufferSize);
reverse = (char*)malloc(sizeof(char) * bufferSize);
if (reverse != NULL)
{
reverseString(buffer, &reverse, bufferSize, lastLine);
fputs(reverse, outputStream);
}
}
fclose(intputStream);
fclose(outputStream);
free(buffer);
free(reverse);
return 0;
}
int readLine(FILE** stream, char** buffer, int* bufferSize)
{
char tempBuffer[BUFFER_SIZE] = { 0 };
int lastLine = 0;
if (*stream != NULL)
{
fgets(tempBuffer, BUFFER_SIZE, *stream);
char ignoredChar[100] = { 0 };
*bufferSize = strlen(tempBuffer);
// Ignoring in the same line left characters and checking if this is the last line
if (tempBuffer[(*bufferSize) - 1] != '\n')
{
fgets(ignoredChar, 100, *stream);
if (!feof(*stream))
lastLine = 1;
}
// Allocating memory and copying line to dynamically-allocated array
*buffer = (char*)malloc(sizeof(char) * (*bufferSize));
if (*buffer != NULL)
{
memcpy(*buffer, tempBuffer, (*bufferSize));
(*buffer)[(*bufferSize)] = '\0';
}
}
// Return whether or not the last line is read
return lastLine;
}
void reverseString(char* buffer, char** reverse, int bufferSize, int lastLine)
{
int startingValue = (lastLine ? bufferSize - 1 : bufferSize - 2);
int wordStart = startingValue, wordEnd = startingValue;
int index = 0;
while (wordStart > 0)
{
if (buffer[wordStart] == ' ')
{
int i = wordStart + 1;
while (i <= wordEnd)
(*reverse)[index++] = buffer[i++];
(*reverse)[index++] = ' ';
wordEnd = wordStart - 1;
}
wordStart--;
}
for (int i = 0; i <= wordEnd; i++)
{
(*reverse)[index] = buffer[i];
index++;
}
if (!lastLine)
(*reverse)[index++] = '\n';
(*reverse)[index] = '\0';
}
One of the problems is in readLine where you allocate and copy your string like this (code shortened to the relevant parts):
*bufferSize = strlen(tempBuffer);
*buffer = (char*)malloc(sizeof(char) * (*bufferSize));
(*buffer)[(*bufferSize)] = '\0';
This will not allocate space for the null-terminator. And you will write the null-terminator out of bounds of the allocated memory. That leads to undefined behavior.
You need to allocate an extra byte for the null-terminator:
*buffer = malloc(*bufferSize + 1); // +1 for null-terminator
[Note that I don't cast the result, and don't use sizeof(char) because it's specified to always be equal to 1.]
Another problem is because you don't include the null-terminator in the bufferSize the allocation for reverse in main will be wrong as well:
reverse = (char*)malloc(sizeof(char) * bufferSize);
Which should of course be changed to:
reverse = malloc(bufferSize + 1); // +1 for null-terminator

how to reverse the words in string with dot delimiter in c maintaining the dot in output

i want to reverse the words of string with dots in between them in c.
i tried using for loops in c.
#include <stdio.h>
#include <string.h>
int main()
{
char str[100];
scanf("%s",str);
printf("%s",str);
int length = strlen(str);
// Traverse string from end
int i;
for (i = length - 1; i >= 0; i--) {
if (str[i] == '.') {
// putting the NULL character at the
// position of space characters for
// next iteration.
str[i] = '.';
// Start from next charatcer
printf("%s ", &(str[i]) + 1);
}
}
// printing the last word
printf("%s", str);
return 0;
}
Example input
i.love.you
Example output
you.love.i
This is how I would do it (omitting error checking):
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
char str[100];
char *c;
scanf("%s", str);
while ((c = strrchr(str, '.'))) {
*c = 0;
printf("%s.", c + 1);
}
printf("%s\n", str);
return 0;
}
Output:
$ ./a.out
this.is.a.test.for.stackoverflow
stackoverflow.for.test.a.is.this
Well, there's two glaring errors:
You never actually NUL-terminate (not NULL) the string you're going to pass to printf. I suspect you copied that part from somewhere or somebody.
// putting the NULL character at the
// position of space characters for
// next iteration.
str[i] = '.';
Actually printing spaces instead of whatever delimiter:
printf("%s ", &(str[i]) + 1);
/* ^ */
What I'd do instead:
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
struct {
char input[100];
char result[100];
}testcases[] = {
{.input = "i.love.you",
.result = "you.love.i"},
{.input = ".hi.you",
.result = "you.hi."},
{.input = "there.is.no spoon",
.result = "no spoon.is.there"},
{.input = "..",
.result = ".."},
{.input = "oh.no.",
.result = ".no.oh"},
{.input = "",
.result = ""}
};
char *function(char * s) {
size_t max_length = strlen(s) + 1;
/* Choosing to allocate and return a new string,
but you can do whatever you like: */
char * scratch = malloc(max_length);
/* Keeping an offset to where we concatenate substrings in the output: */
size_t offset = 0;
for (char * delimiter = NULL; (delimiter = strrchr(s, '.')) != NULL;) {
/* NUL-terminate where delimiter was found, without modifying the input argument,
we'll need it later: */
*delimiter = 0;
/* Copy the substring after the delimiter: */
int nwritten = snprintf(scratch + offset, max_length - offset, "%s.", delimiter + 1);
/* Adjust offset: */
offset += nwritten;
}
/* Now copy what's left over (if anything) at the beginning of the input string: */
snprintf(scratch + offset, max_length - offset, "%s", s);
return scratch;
}
int main(void) {
for (size_t idx = 0; idx < sizeof testcases / sizeof *testcases; ++idx) {
char * reversed = function(testcases[idx].input);
assert(!strcmp(reversed, testcases[idx].result));
printf("Testcase #%lu passed\n", idx);
free (reversed);
}
return EXIT_SUCCESS;
}
Hope you have time to understand how it works before your homework's deadline is up. :)

How to replace a part of a string with another substring

I need the string "on" to be replaced with "in", strstr() function returns a pointer to a string so i figured assigning the new value to that pointer would work but it didn't
#include <stdio.h>
#include <string.h>
int main(void) {
char *m = "cat on couch";
*strstr(m, "on") = "in";
printf("%s\n", m);
}
Replacing a substring with another is easy if both substrings have the same length:
locate the position of the substring with strstr
if it is present, use memcpy to overwrite it with the new substring.
assigning the pointer with *strstr(m, "on") = "in"; is incorrect and should generate a compiler warning. You would avoid such mistakes with gcc -Wall -Werror.
note however that you cannot modify a string literal, you need to define an initialized array of char so you can modify it.
Here is a corrected version:
#include <stdio.h>
#include <string.h>
int main(void) {
char m[] = "cat on couch";
char *p = strstr(m, "on");
if (p != NULL) {
memcpy(p, "in", 2);
}
printf("%s\n", m);
return 0;
}
If the replacement is shorter, the code is a little more complicated:
#include <stdio.h>
#include <string.h>
int main(void) {
char m[] = "cat is out roaming";
char *p = strstr(m, "out");
if (p != NULL) {
memcpy(p, "in", 2);
memmove(p + 2, p + 3, strlen(p + 3) + 1);
}
printf("%s\n", m);
return 0;
}
In the generic case, it is even more complicated and the array must be large enough to accommodate for the length difference:
#include <stdio.h>
#include <string.h>
int main(void) {
char m[30] = "cat is inside the barn";
char *p = strstr(m, "inside");
if (p != NULL) {
memmove(p + 7, p + 6, strlen(p + 6) + 1);
memcpy(p, "outside", 7);
}
printf("%s\n", m);
return 0;
}
Here is a generic function that handles all cases:
#include <stdio.h>
#include <string.h>
char *strreplace(char *s, const char *s1, const char *s2) {
char *p = strstr(s, s1);
if (p != NULL) {
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
if (len1 != len2)
memmove(p + len2, p + len1, strlen(p + len1) + 1);
memcpy(p, s2, len2);
}
return s;
}
int main(void) {
char m[30] = "cat is inside the barn";
printf("%s\n", m);
printf("%s\n", strreplace(m, "inside", "in"));
printf("%s\n", strreplace(m, "in", "on"));
printf("%s\n", strreplace(m, "on", "outside"));
return 0;
}
There are a few problems with this approach. First, off, m is pointing to read-only memory, so attempting to overwrite the memory there it is undefined behavior.
Second, the line: strstr(m, "on") = "in" is not going to change the pointed-to string, but instead reassign the pointer.
Solution:
#include <stdio.h>
#include <string.h>
int main(void)
{
char m[] = "cat on couch";
memcpy(strstr(m, "on"), "in", 2);
printf("%s\n", m);
}
Note that if you had just used plain strcpy it would null-terminate after "cat in", so memcpy is necessary here. strncpy will also work, but you should read this discussion before using it.
It should also be known that if you are dealing with strings that are not hard-coded constants in your program, you should always check the return value of strstr, strchr, and related functions for NULL.
This function performs a generic pattern replace for all instances of a substring with a replacement string. It allocates a buffer of the correct size for the result. Behaviour is well defined for the case of the empty substring corresponding to the javascript replace() semantics. Where possible memcpy is used in place of strcpy.
/*
* strsub : substring and replace substring in strings.
*
* Function to replace a substring with a replacement string. Returns a
* buffer of the correct size containing the input string with all instances
* of the substring replaced by the replacement string.
*
* If the substring is empty the replace string is written before each character
* and at the end of the string.
*
* Returns NULL on error after setting the error number.
*
*/
char * strsub (char *input, char *substring, char *replace)
{
int number_of_matches = 0;
size_t substring_size = strlen(substring), replace_size = strlen(replace), buffer_size;
char *buffer, *bp, *ip;
/*
* Count the number of non overlapping substring occurences in the input string. This
* information is used to calculate the correct buffer size.
*/
if (substring_size)
{
ip = strstr(input, substring);
while (ip != NULL)
{
number_of_matches++;
ip = strstr(ip+substring_size, substring);
}
}
else
number_of_matches = strlen (input) + 1;
/*
* Allocate a buffer of the correct size for the output.
*/
buffer_size = strlen(input) + number_of_matches*(replace_size - substring_size) + 1;
if ((buffer = ((char *) malloc(buffer_size))) == NULL)
{
errno=ENOMEM;
return NULL;
}
/*
* Rescan the string replacing each occurence of a match with the replacement string.
* Take care to copy buffer content between matches or in the case of an empty find
* string one character.
*/
bp = buffer;
ip = strstr(input, substring);
while ((ip != NULL) && (*input != '\0'))
{
if (ip == input)
{
memcpy (bp, replace, replace_size+1);
bp += replace_size;
if (substring_size)
input += substring_size;
else
*(bp++) = *(input++);
ip = strstr(input, substring);
}
else
while (input != ip)
*(bp++) = *(input++);
}
/*
* Write any remaining suffix to the buffer, or in the case of an empty find string
* append the replacement pattern.
*/
if (substring_size)
strcpy (bp, input);
else
memcpy (bp, replace, replace_size+1);
return buffer;
}
For testing purposes I include a main program that uses the replacement function.
#define BUFSIZE 1024
char * read_string (const char * prompt)
{
char *buf, *bp;
if ((buf=(char *)malloc(BUFSIZE))==NULL)
{
error (0, ENOMEM, "Memory allocation failure in read_string");
return NULL;
}
else
bp=buf;
printf ("%s\n> ", prompt);
while ((*bp=getchar()) != '\n')bp++;
*bp = '\0';
return buf;
}
int main ()
{
char * input_string = read_string ("Please enter the input string");
char * pattern_string = read_string ("Please enter the test string");
char * replace_string = read_string ("Please enter the replacement string");
char * output_string = strsub (input_string, pattern_string, replace_string);
printf ("Result :\n> %s\n", output_string);
free (input_string);
free (pattern_string);
free (replace_string);
free (output_string);
exit(0);
}

CSV file reading issue: with large amont of data

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* getfield(char* line, int num) {
char* tok = line;
char* result;
if (line)
{
do
{
if (!--num)
{
tok = strchr(line, ',');
if (tok == NULL)
{
tok = &line[strlen(line)];
}
size_t fieldlen = tok - line;
if (fieldlen)
{
result = (char*)malloc(fieldlen+1);
result[fieldlen] = '\0';
strncpy(result, line, fieldlen);
return result;
}
else
{
break;
}
}
tok = strchr(line, ',');
line = tok + 1;
} while (tok);
}
result = (char*)malloc(2);
strcpy(result, "0");
return result;
}
int main()
{
FILE* stream = fopen("data.csv", "r");
char line[1024];
char *pstr;int num1,num2,num3;
char* value1,value2,value3;
while (fgets(line, 1024, stream))
{
char* tmp = strdup(line);
value1=getfield(tmp, 1);
value2=getfield(tmp, 2);
value3=getfield(tmp, 3);
num1 =strtol(value1,&pstr,10);
num2 =strtol(value2,&pstr,10);
num3 =strtol(value3,&pstr,10)
free(value1);
free(value2);
free(value3);
printf("Fields 1,2,3 would be 1=%d 2=%d 3=%d\n", num1,num2,num3);
// NOTE strtok clobbers tmp
free(tmp);
}
}
above is my C code to read the file....
:::: data.csv ::::
10,34,30
10,33,
23,45,23
25,,45
above is my file..
here my issue is I can call the function with "num" field. so that for reading of every line I suppose to call the function 3 times.. !! so the performance is too low for the large data files.. can someone help me that I can call the function at once and It will return an array.. than I can easily store and print (e.g. for the first line array[0]=10,array[1]=34,array[2]=30 )
You could speed it up by creating a fast split function that will destroy your line (not to mention the many lurking segmentation faults and memory leaks; this code has NO error checking or freeing of resources):
#include <stdio.h>
#include <stdlib.h>
char **split(char *line, char sep, int fields) {
char **r = (char **)malloc(fields * sizeof(char*));
int lptr = 0, fptr = 0;
r[fptr++] = line;
while (line[lptr]) {
if (line[lptr] == sep) {
line[lptr] = '\0';
r[fptr] = &(line[lptr+1]);
fptr++;
}
lptr++;
}
return r;
}
int main(int argc, char **argv) {
char line[] = "some,info,in a line";
char **fields = split(line, ',', 3);
printf("0:%s 1:%s 2:%s\n", fields[0], fields[1], fields[2]);
}
result:
0:some 1:info 2:in a line
I haven't run timing test on your code, but I'll bet a nickel that the problems is using malloc(). That is SLOW.
What Bart means is that a char[] array can contain multiple strings, back-to-back. If you scan through the array as a single string once, changing all ',' characters to '\0', your last line would look like:
{ '2', '5', 0, 0, '4', '5', 0, ? rest of buffer }
^ ^ ^ !
The ^ carets below mark the positions where you'd record pointers to three strings. As you can see, they are equivalent to separate strings of "25", "", "45" in separate arrays. The ! below marks the 0 that ended the original string. Nothing beyond that has any meaning.
All this depends on being able to modify the original string in-place, probably rendering it useless for any further processing (like printing out the offending line if an invalid field is detected). However, you are already copying the original buffer for local use, so that shouldn't be a problem. I'd get rid of the malloc for that copy buffer too, by the way.
Code might look like:
while (fgets(line, 1024, stream))
{
char tmp[sizeof line]; /* this will save a malloc()/free() pair */
char *tok, *fence, *pstr;
char ch, *cp1=line, *cp2=tmp;
while (0 != (ch = *cp1++))
*cp2++ = (ch == ',') ? 0 : ch;
fence = cp2; /* remember end of string */
*fence = 0; /* and terminate final string */
tok = tmp; /* point to first token */
num1 =strtol(tok, &pstr, 10);
if (tok < fence) tok += strlen(tok) + 1;
num2 =strtol(tok,&pstr,10);
if (tok < fence) tok += strlen(tok) + 1;
num3 =strtol(tok,&pstr,10);
printf("Fields 1,2,3 would be 1=%d 2=%d 3=%d\n", num1,num2,num3);
}
Obviously you don't need a 1K buffer to handle three values, so there will be a loop to pull out the values. The if statement after the first two strtol() calls is your replacement for getfield(), which isn't needed any more.
After this is working, look at data validation. Nothing in this (or in the original) will detect invalid numbers.

extract string value from a string

gcc 4.4.3
c89
I have the following string
sip:12387654345443222118765#xxx.xxx.xxx.xxx
How can I extract just the number? I just want the number.
12387654345443222118765
Many thanks for any advice,
There are lots of ways to do it, if the string is well-formatted you could use strchr() to search for the : and use strchr() again to search for the # and take everything in between.
Here is another method that looks for a continuous sequence of digits:
char *start = sipStr + strcspn(sipStr, "0123456789");
int len = strspn(start, "0123456789");
char *copy = malloc(len + 1);
memcpy(copy, start, len);
copy[len] = '\0'; //add null terminator
...
//don't forget to
free(copy);
It sounds like you want it as a numeric type, which is going to be difficult (it's too large to fit in an int or a long). In theory you could just do:
const char* original = "sip:12387654345443222118765#xxx.xxx.xxx.xxx";
long num = strtoul(original + 4, NULL, 10);
but it will overflow and strtoul will return -1. If you want it as a string and you know it's always going to be that exact length, you can just pull out the substring with strcpy/strncpy:
const char* original = "sip:12387654345443222118765#xxx.xxx.xxx.xxx";
char num[24];
strncpy(num, original + 4, 23);
num[23] = 0;
If you don't know it's going to be 23 characters long every time, you'll need to find the # sign in the original string first:
unsigned int num_length = strchr(original, '#') - (original + 4);
char* num = malloc(num_length + 1);
strncpy(num, original + 4, num_length);
num[num_length] = 0;
Use a regular expression :)
#include <regex.h>
regcomp() // compile your regex
regexec() // run your regex
regfree() // free your regex
:)
Have a look into the strtok or strtok_r functions.
Here is something that will deal with a variable width substring, which doesn't care about the starting position of the substring. For instance, if string was iax2:xxx#xx.xx.xx.xx, it would still work. It will, however return NULL if either delimiter can't be found.
It uses strchr() to find the delimiters, which lets us know where to start copying and where to stop. It returns an allocated string, the calling function must free() the returned pointer.
I'm pretty sure this is what you want?
Note: Edited from original to be more re-usable and a bit saner.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char *extract_string(const char *str, const char s1, const char s2)
{
char *ret = NULL, *pos1 = NULL, *pos2 = NULL;
size_t len;
if (str == NULL || s1 < 0 || s2 < 0)
return NULL;
pos1 = strchr(str, s1);
pos2 = strchr(str, s2);
if (! pos1 || ! pos2)
return NULL;
len = ((pos2 - str) - (pos1 - str) - 1);
ret = (char *) malloc(len + 1);
if (ret == NULL)
return NULL;
memcpy(ret, str + (pos1 - str) + 1, len);
ret[len] = '\0';
return ret;
}
int main(void)
{
const char *string = "sip:12387654345443222118765#xxx.xxx.xxx.xxx";
char *buff = NULL;
buff = extract_string(string, ':', '#');
if (buff == NULL)
return 1;
printf("The string extracted from %s is %s\n" , string, buff);
free(buff);
return 0;
}
You could easily modify that to not care if the second delimiter is not found and just copy everything to the right of the first. That's an exercise for the reader.

Resources