Negative input crashes program - c

I have the following function:
char *ft_substr(char const *s, unsigned int start, size_t len)
{
char *substr;
if (!s)
return (NULL);
if (start > ft_strlen(s))
len = 0;
if (start <= ft_strlen(s) && s[start] != '\0')
{
if (ft_strlen(s + start) < len)
len = ft_strlen(s + start);
}
substr = (char *)malloc(sizeof(*s) * (len + 1));
if (!substr)
return (NULL);
if (len == 0)
return (substr);
ft_strlcpy(substr, s + start, len + 1);
return (substr);
}
Some of the functions are self-made (strlen = ft_strlen etc).
The problem: if I pass in -1 for "start", my program segfaults because it is converted to 4.294.967.295. So it crashes when passed in that is my function body does not even execute making all protective measures useless. Even stranger, if I execute this on my Mac everything works fine, on Ubuntu 22.04 it does not.
Any suggestions? I am not allowed to change the function prototype.
Edit: I have to pass a series of tests and the following one I am failing constantly:
Error in test 4: ft_substr("hola", 4294967295, 0): Segmentation fault!
(The test actually inputs -1).
Also, I am not allowed to include a main that tests user input.
Edit 2: My main is simple and just calls the function with input "hola", -1, 3

You need to terminate the string you've copied with ft_strlcpy with '\0'.
Your bounds checking could also be made a little simpler.
Example:
char *ft_substr(char const *s, unsigned int start, size_t len)
{
if (!s) return NULL;
size_t slen = ft_strlen(s);
if (start > slen) { // start out of bounds, make empty string
start = 0;
len = 0;
} else if(start + len > slen) { // len is too large, shrink it
len = slen - start;
}
char *substr = malloc(len + 1);
if (!substr) return NULL;
ft_strlcpy(substr, s + start, len); // note: copy len, not len+1
substr[len] = '\0'; // terminate the string
return substr;
}

Related

Can't figure out where I have memory leak

I'm new to C and still don't really know how to work with valgrind. I'm doing a project where i need to create a function that returns a line of text from a file descriptor each time it's called using just one static variable.
Repeated calls (e.g., using a loop) to your get_next_line() function should let
you read the text file pointed to by the file descriptor, one line at a time.
I have come up with this but I can't find where the memory leak is:
char *output(char **backup, char *rbackup, int ret, int fd)
{
int value;
char *temp;
if (ret < 0)
return (NULL);
else if (ret == 0 && backup[fd] == NULL)
return (NULL);
value = (int)(ft_strchr(backup[fd], '\n') - backup[fd] + 1);
rbackup = ft_substr(backup[fd], 0, value);
temp = ft_substr(backup[fd], value, BUFFER_SIZE * BUFFER_SIZE);
free(backup[fd]);
if (temp[0] == '\0')
{
free(temp);
temp = NULL;
}
backup[fd] = temp;
return (rbackup);
}
char *get_next_line(int fd)
{
int ret;
char buf[BUFFER_SIZE + 1];
static char *backup[NUM_OF_FD];
char *rbackup;
if (fd < 0 || fd > NUM_OF_FD)
return (NULL);
while (ft_strchr(backup[fd], '\n') == NULL)
{
ret = read(fd, buf, BUFFER_SIZE);
buf[ret] = '\0';
if (ret <= 0)
break ;
if (backup[fd] == NULL)
backup[fd] = ft_strdup(buf);
else
{
rbackup = ft_strjoin(backup[fd], buf);
free(backup[fd]);
backup[fd] = rbackup;
}
}
return (output(backup, rbackup, ret, fd));
}
The ft_functions are equivalent to the LibC counterparts but in case of having a bug I'll post them here:
void *ft_memcpy(void *dst, const void *src, size_t n)
{
size_t i;
i = -1;
if ((dst != src) && n)
while (++i < n)
((unsigned char *)dst)[i] = ((unsigned char *)src)[i];
return (dst);
}
size_t ft_strlen(const char *s)
{
size_t i;
i = 0;
while (s[i])
{
i++;
}
return (i);
}
char *ft_strchr(const char *s, int c)
{
char chr;
chr = (char)c;
if (s == NULL)
return (NULL);
while (*s && *s != chr)
s++;
if (*s == chr)
return ((char *)s);
else
return (NULL);
}
char *ft_substr(char const *s, unsigned int start, size_t len)
{
char *str;
if (!s)
return (NULL);
if (len > ft_strlen(s))
len = ft_strlen(s);
if (start > ft_strlen(s))
len = 0;
str = malloc(sizeof(char) * (len + 1));
if (!str)
return (NULL);
str = ft_memcpy(str, &s[start], len);
str[len] = '\0';
return (str);
}
char *ft_strdup(const char *s1)
{
size_t len;
void *new;
len = ft_strlen(s1) + 1;
new = malloc(len);
if (new == NULL)
return (NULL);
return ((char *) ft_memcpy(new, s1, len));
}
char *ft_strjoin(char const *s1, char const *s2)
{
int i;
char *str;
size_t size;
if (!s1 || !s2)
return (NULL);
i = 0;
size = (ft_strlen(s1) + ft_strlen(s2) + 1);
str = malloc(sizeof(char) * size);
if (!str)
return (NULL);
while (*s1)
str[i++] = *s1++;
while (*s2)
str[i++] = *s2++;
str[i] = '\0';
return (str);
}
void *ft_memset(void *b, int c, size_t len)
{
size_t i;
i = -1;
while (++i < len)
((unsigned char *)b)[i] = (unsigned char)c;
return (b);
}
Is there any rookie mistake in my code?
Well I ran your code like this
int main(int a, char**b)
{
int f = open("poop.c", O_RDONLY);
for(int i = 0; i < 10; i++)
{
char *x = get_next_line(f);
printf("x");
free(x);
}
}
reading the first 10 lines. No leaks detected by valgrind (once I added the free in the loop)
It did moan about other things though
==3695== Invalid read of size 1
==3695== at 0x109270: ft_memcpy (poop.c:26)
==3695== by 0x1093BB: ft_substr (poop.c:70)
==3695== by 0x109619: output (poop.c:120)
==3695== by 0x109802: get_next_line (poop.c:155)
==3695== by 0x109857: main (poop.c:162)
==3695== Address 0x4a4a0a5 is 0 bytes after a block of size 101 alloc'd
==3695== at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==3695== by 0x109408: ft_strdup (poop.c:81)
==3695== by 0x109724: get_next_line (poop.c:147)
==3695== by 0x109857: main (poop.c:162)
==3695==
==3695== Conditional jump or move depends on uninitialised value(s)
==3695== at 0x109546: output (poop.c:114)
==3695== by 0x109802: get_next_line (poop.c:155)
==3695== by 0x109857: main (poop.c:162)
==3695==
==3695== Conditional jump or move depends on uninitialised value(s)
==3695== at 0x109556: output (poop.c:116)
==3695== by 0x109802: get_next_line (poop.c:155)
==3695== by 0x109857: main (poop.c:162)
==3695==
seems like the uninitialized read is caused by reading an empty line.
The invalid read is here
((unsigned char*)dst)[i] = ((unsigned char*)src)[i];
If you have Makefile for the C file, then you can add -g after flags in your Makefile. If you use compile the C file by clang or gcc directly, you can put -g after clang or gcc. Then you can get more information about your problem. -g is used for debugging. If you don't add that, Valgrind will only tell you which part you need to debug (like a specific function). But if you add -g, you will be able to know which line you need to debug with.
You definitely have a problem with:
static char *backup[NUM_OF_FD];
You check that fd is larger than NUM_OF_FD, but NUM_OF_FD would still try to index outside of the backup array.
if (fd < 0 || fd > NUM_OF_FD)
return (NULL);
should be
if (fd < 0 || fd >= NUM_OF_FD)
return (NULL);
Also be aware that read() can return -1, so this code could be problematic:
ret = read(fd, buf, BUFFER_SIZE);
buf[ret] = '\0';
if (ret <= 0)
break ;
Check the return value before using it.

Free, invalid pointer

I have a program, that splits strings based on the delimiter. I have also, 2 other functions, one that prints the returned array and another that frees the array.
My program prints the array and returns an error when the free array method is called. Below is the full code.
#include "stringsplit.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
unsigned long int getNofTokens(const char *string) {
char *stringCopy;
unsigned long int stringLength;
unsigned long int count = 0;
stringLength = (unsigned)strlen(string);
stringCopy = malloc((stringLength + 1) * sizeof(char));
strcpy(stringCopy, string);
if (strtok(stringCopy, " \t") != NULL) {
count++;
while (strtok(NULL, " \t") != NULL)
count++;
}
free(stringCopy);
return count;
}
char **split_string(const char *str, const char *split) {
unsigned long int count = getNofTokens(str);
char **result;
result = malloc(sizeof(char *) * count + 1);
char *tmp = malloc(sizeof(char) * strlen(str));
strcpy(tmp, str);
char *token = strtok(tmp, split);
int idx = 0;
while (token != NULL) {
result[idx++] = token;
token = strtok(NULL, split);
}
return result;
}
void print_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
char *currentPointer = split_string[i];
free(currentPointer);
}
free(split_string);
}
Also, do I need to explicitly add \0 at the end of the array or does strtok add it automatically?
There are some problems in your code:
[Major] the function getNofTokens() does not take the separator string as an argument, it counts the number of words separated by blanks, potentially returning an inconsistent count to its caller.
[Major] the size allocated in result = malloc(sizeof(char *) * count + 1); is incorrect: it should be:
result = malloc(sizeof(char *) * (count + 1));
Storing the trailing NULL pointer will write beyond the end of the allocated space.
[Major] storing the said NULL terminator at the end of the array is indeed necessary, as the block of memory returned by malloc() is uninitialized.
[Major] the copy of the string allocated and parsed by split_string cannot be safely freed because the pointer tmp is not saved anywhere. The pointer to the first token will be different from tmp in 2 cases: if the string contains only delimiters (no token found) or if the string starts with a delimiter (the initial delimiters will be skipped). In order to simplify the code and make it reliable, each token could be duplicated and tmp should be freed. In fact your free_split_string() function relies on this behavior. With the current implementation, the behavior is undefined.
[Minor] you use unsigned long and int inconsistently for strings lengths and array index variables. For consistency, you should use size_t for both.
[Remark] you should allocate string copies with strdup(). If this POSIX standard function is not available on your system, write a simple implementation.
[Major] you never test for memory allocation failure. This is OK for testing purposes and throw away code, but such potential failures should always be accounted for in production code.
[Remark] strtok() is a tricky function to use: it modifies the source string and keeps a hidden static state that makes it non-reentrant. You should avoid using this function although in this particular case it performs correctly, but if the caller of split_string or getNofTokens relied on this hidden state being preserved, it would get unexpected behavior.
Here is a modified version:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *string, const char *split) {
char *tmp = strdup(string);
size_t count = 0;
if (strtok(tmp, split) != NULL) {
count++;
while (strtok(NULL, split) != NULL)
count++;
}
free(tmp);
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
char *tmp = strdup(str);
char *token = strtok(tmp, split);
size_t idx = 0;
while (token != NULL && idx < count) {
result[idx++] = strdup(token);
token = strtok(NULL, split);
}
result[idx] = NULL;
free(tmp);
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
Here is an alternative without strtok() and without intermediary allocations:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
size_t getNofTokens(const char *str, const char *split) {
size_t count = 0;
size_t pos = 0, len;
for (pos = 0;; pos += len) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
count++;
}
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t pos, len, idx;
for (pos = 0, idx = 0; idx < count; pos += len, idx++) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
result[idx] = strndup(str + pos, len);
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
EDIT After re-reading the specification in your comment, there seems to be some potential confusion as to the semantics of the split argument:
if split is a set of delimiters, the above code does the job. And the examples will be split as expected.
if split is an actual string to match explicitly, the above code only works by coincidence on the examples given in the comment.
To implement the latter semantics, you should use strstr() to search for the split substring in both getNofTokens and split_string.
Here is an example:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *str, const char *split) {
const char *p;
size_t count = 1;
size_t len = strlen(split);
if (len == 0)
return strlen(str);
for (p = str; (p = strstr(p, split)) != NULL; p += len)
count++;
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t len = strlen(split);
size_t idx;
const char *p = str;
for (idx = 0; idx < count; idx++) {
const char *q = strstr(p, split);
if (q == NULL) {
q = p + strlen(p);
} else
if (q == p && *q != '\0') {
q++;
}
result[idx] = strndup(p, q - p);
p = q + len;
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
When debugging, take note of values that you got from malloc, strdup, etc. Let's call these values "the active set". It's just a name, so that we can refer to them. You get a pointer from those functions, you mentally add it to the active set. When you call free, you can only pass values from the active set, and after free returns, you mentally remove them from the set. Any other use of free is invalid and a bug.
You can easily find this out by putting breakpoints after all memory allocations, so that you can write down the pointer values, and then breakpoints on all frees, so that you can see if one of those pointer values got passed to free - since, again, to do otherwise is to misuse free.
This can be done also using "printf" debugging. Like this:
char *buf = malloc(...); // or strdup, or ...
fprintf(stderr, "+++ Alloc %8p\n", buf);
And then whenever you have free, do it again:
fprintf(stderr, "--- Free %8p\n", ptr);
free(ptr);
In the output of the program, you must be able to match every +++ with ---. If you see any --- with a value that wasn't earlier listed with a +++, there's your problem: that's the buggy invocation of free :)
I suggest using fprintf(stderr, ... instead of printf(..., since the former is typically unbuffered, so if your program crashes, you won't miss any output. printf is buffered on some architectures (and not buffered on others - so much for consistency).

How to malloc for getline implementation

I'm trying to add getline support to http-fs-wrapper and I have some malloc problems.
ssize_t _intercept_getdelim(int fd, char **lineptr, size_t *n, int delim)
{
intercept_t *obj = intercept[fd];
int counter;
size_t nc = sizeof(char);
counter = -1;
while (obj->offset < obj->size)
{
++counter;
if (*lineptr) {
*lineptr = realloc(*lineptr, (counter + 2) * nc);
}
else {
*lineptr = malloc(nc);
}
_intercept_read(fd, lineptr[counter], nc);
if (*lineptr[counter] == delim)
{
break;
}
}
*n = counter ? counter + 1 : counter;
*lineptr[counter + 2] = '\0';
// Why do we need a *n when the return value is the same??
return *n;
}
Here's the relevant section of _intercept_read:
size_t _intercept_read(int fd, void *buf, size_t count)
{
memcpy(buf, obj->ra_buf+bo, count);
When I step through this in gdb, the second iteration throws a SIGSEGV (from memcpy -- it's not the ending \0, it's still inside the loop). I also don't quite get what's the difference between the *n of getline/getdelim and the return value.
The difference between n and the return value is that n is always the buffer size, but the return value can be -1 for error states per posix spec. You aren't fully handling EOF (it should return -1 if it hits EOF and hasn't read anything yet).
A note, reallocing for every character is fairly inefficient. The standard pattern is to double the buffer size each time it is necessary. This is another way the return value and n can differ, since n is the buffer size, which can be much larger than the read character count it returns.
You also don't need to special case a starting null pointer, realloc internally calls malloc in that case.
buf = realloc(buf...) is an unsafe pattern, realloc can return null, you have to save the realloc result to a temp variable and check it before assigning, otherwise you both leak memory and can reference a null pointer.
I don't think there's actually space for the trailing null you're adding to the buffer at the end there.
This works:
ssize_t _intercept_getdelim(int fd, char **lineptr, size_t *n, int delim)
{
intercept_t *obj = intercept[fd];
int counter = -1;
char *c, *newbuf;
*n = 1;
*lineptr = malloc(*n);
while (obj->offset < obj->size)
{
++counter;
if (counter >= *n)
{
if ((newbuf = realloc(*lineptr, *n << 1)))
{
*n = *n << 1;
*lineptr = newbuf;
}
else
{
return -1;
}
}
c = *lineptr + counter;
_intercept_read(fd, c, nc);
if (*c == delim)
{
break;
}
}
if (counter > -1)
{
*(*lineptr + ++counter) = '\0';
}
return counter;
}

read user input without maxsize in C

In C i can use the char *fgets(char *s, int size, FILE *stream) function to read user input from stdin. But the size of the user input is limited to size.
How can i read user input of variable size?
In C you are responsible for your buffers, and responsible for their size. So you can not have some dynamic buffer ready for you.
So the only solution is to use a loop (either of fgets or fgetc - depends on your processing and on your stop condition)
If you go beyond C to C++, you will find that you can accept std::string objects of variable sizes (there you need to deal with word and/or line termination instead - and loop again)
This function reads from standard input until end-of-file is encountered, and returns the number of characters read. It should be fairly easy to modify it to read exactly one line, or alike.
ssize_t read_from_stdin(char **s)
{
char buf[1024];
char *p;
char *tmp;
ssize_t total;
size_t len;
size_t allocsize;
if (s == NULL) {
return -1;
}
total = 0;
allocsize = 1024;
p = malloc(allocsize);
if (p == NULL) {
*s = NULL;
return -1;
}
while(fgets(buf, sizeof(buf), stdin) != NULL) {
len = strlen(buf);
if (total + len >= allocsize) {
allocsize <<= 1;
tmp = realloc(p, allocsize);
if (tmp == NULL) {
free(p);
*s = NULL;
return -1;
}
p = tmp;
}
memcpy(p + total, buf, len);
total += len;
}
p[total] = 0;
*s = p;
return total;
}

Can't figure out why this trimming function won't work correctly

===============================================================================
void trim(const char * orig, char * dest)
{
size_t front = 0;
size_t end = sizeof(orig) - 1;
size_t counter = 0;
char * tmp = null;
if (sizeof(orig) > 0)
{
memset(dest, '\0', sizeof(dest));
/* Find the first non-space character */
while (isspace(orig[front]))
{
front++;
}
/* Find the last non-space character */
while (isspace(orig[end]))
{
end--;
}
tmp = strndup(orig + front, end - front + 1);
strncpy(dest, tmp, sizeof(dest) - 1);
free(tmp); //strndup automatically malloc space
}
}
===============================================================================
I have a string:
' ABCDEF/G01 '
The above function is supposed to remove the spaces and return to me:
'ABCDEF/G01'.
Instead, what I get back is:
'ABCDEF/'
Any ideas?
Note: the quotes are just to show you that spaces exist in the original string.
The strncpy is wrong. sizeof(dest) is not what you want (it's the size of a pointer on your machine). You probably want: end - front. Instead, try:
memcpy(dest, front + start, end - front);
dest[end] = 0;
The sizeof(dest) doesn't do what you think it does! It returns the size of the pointer, not the length of the string. You need to supply a maximum length of the destination to your function.
For the string orig you want to use the strlen function.
size_t end = sizeof(orig) - 1;
strncpy(dest, tmp, sizeof(dest) - 1);
You probably want strlen instead of sizeof here.
void trim(const char * orig, char * dest)
{
size_t front = 0;
size_t end = sizeof(orig) - 1;
In that code, sizeof(orig) is the size of a pointer. All pointers are the same size, probably 8 in your implementation. What you want to use is strlen(orig) instead.
Try this code (it doesn't use temporary memory):
void trim(const char * orig, char * dest)
{
size_t front = 0;
size_t end = strlen(orig)-1;
size_t counter = 0;
*dest = '\0';
if (strlen(orig) > 0)
{
/* Find the first non-space character */
while (front < end && isspace(orig[front]) )
{
front++;
}
/* Find the last non-space character */
while (front < end && isspace(orig[end]))
{
end--;
}
counter = front;
while ( counter <= end )
{
dest[counter-front] = orig[counter];
counter++;
}
}
}
NOTE: Not tested!
You must replace sizeof() to strlen() everywhere in your function.
Here is working edit:
void trim(const char * orig, char * dest)
{
size_t front = 0;
size_t end = strlen(orig)-1;
size_t counter = 0;
char * tmp = NULL;
if (strlen(orig) > 0)
{
memset(dest, '\0', strlen(dest));
/* Find the first non-space character */
while (isspace(orig[front]))
{
front++;
}
/* Find the last non-space character */
while (isspace(orig[end]))
{
end--;
}
tmp = strndup(orig + front, end - front + 1);
strncpy(dest, tmp, strlen(dest));
free(tmp); //strndup automatically malloc space
}
}
(I've tested it)

Resources