compressing the lines of a file using dynamic memory - c

i need to make a function that returns a compressed line with the following formats,
input:
pprrrinnnttttfff
output:
p2r3i1n3t4f3
and if the new string is larger than the original, return the original, can someone tell what is wrong with my code?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *comprimir(char *s);
int main(){
FILE* input;
char *lineptr = NULL;
size_t len=0, c;
input =fopen ("input.dat", "r");
while ((c = getline(&lineptr, &len, input))!= -1){
lineptr = comprimir(lineptr);
printf("%s", lineptr );
}
fclose(input);
}
char* comprimir (char *s){
int len1 = strlen(s), len2=0;
char *str, *in, *mystr;
mystr =(char*) calloc(len1*2, sizeof(char));
strcpy(mystr, s);
for (str =mystr, in=mystr; *str; str++){
len2 += 2;
if (len2 >= len1) {
free(mystr);
return s;
}
int count =1;
in[0] = str[0]; printf("%s",in[0] ); in++;
if (len2 > len1) return s;
while (str[0] == str[1]){
count++;
str++;
}
in[0] = '0' + count;
in++; printf("%s", in[0] );
if (len2 > len1) return s;
}
strcpy(s, in);
free(mystr);
return s;
}

sample to fix
char* comprimir (char *s){
int len1 = strlen(s), len2=0;
char *out, *in, *mystr;
mystr =malloc(len1 + 2);//2*len1 not required, +2 : "r" -> "r1"(len + NUM_len + NUL
//`s` not copy to mystr, avoid overwriting
for (out = mystr, in=s; *in;){
int count = 1;
*out++ = *in++;//Pre-increment `in` to reduce code.
while (in[-1] == in[0]){
count++;
in++;
}
int num_len = sprintf(out, "%d", count);//OK even count is more than 10
len2 += 1 + num_len;
if (len2 >= len1) {
free(mystr);
return s;
}
out += num_len;
}
*out = 0;//terminate by '\0'
strcpy(s, mystr);
free(mystr);
return s;
}

Related

Reading string line by line causes buffer overflow

This is the code I am trying to run, I should read a sequence of strings line by line.
int i;
char buf[100];
int o_size = 16;
char *output = malloc(o_size * sizeof(char));
strcpy(output, "");
for(i = 0; i < n; i++){
fgets(buf, 100, stdin);
strtok(buf,"\n");
char temp[100];
strcpy(temp, buf);
strtok(temp, " "); // The instruction is stored inside the temp variable
char temp1[100];
strcpy(temp1, strchr(buf, ' ') + 1);
strcpy(buf, temp1); // The input string is stored inside the buf variable (or the index line in the check case)
int h_val = poly_hash(buf, x, p, m);
if(!strcmp(temp, "add")){
...
}
else if(!strcmp(temp, "del")){
...
}
else if(!strcmp(temp, "find")){
...
}
else if(!strcmp(temp, "check")){
...
}
}
I also wrote a function to append to the output string, could this be the cause for the buffer overflow?
void to_output(char **output, int *o_size, char *string){
if(strlen(*output) + strlen(string) >= *o_size){
*o_size *= 2;
char *temp = malloc(*o_size * sizeof(char));
strcpy(temp, *output);
*output = realloc(*output, *o_size * sizeof(char));
strcpy(*output, temp);
free(temp);
}
strcat(*output, string);
}
Simpler is:
void to_output(char **output, int *o_size, char *string){
int outlen = strlen(*output) + strlen(string) + 1;
if (outlen > *o_size) {
*output = realloc(*output, outlen);
*o_size = outlen;
}
strcat(*output, string);
}

Getting valgrind errors with static char * [duplicate]

I have to recode an implementation of the getline() function, but using the file descriptor of the file and not a FILE *. I am only allowed to use malloc() and free(), along with 5 functions being 25 lines long at most.
I think I've done correctly the project although I am a beginner in C and my code isn't probably good.
When I run it, it works fine, but valgrind shows that I definetely lost x bytes, x depending of the file length and the READ_SIZE (macro defined in the header).
According to valgrind's --leak-check=full, I have a memory leak in the str_realloc_cat function, when I malloc dest. I tried but couldn't find where should I free / do something else?
Here below is my code:
char *get_next_line(const int fd)
{
static char *remaining = "";
char *buffer;
ssize_t cread;
size_t i;
i = 0;
if (remaining == NULL)
return (NULL);
if ((buffer = malloc(SOF(char) * READ_SIZE + 1)) == NULL ||
(cread = read(fd, buffer, READ_SIZE)) < 0)
return (NULL);
buffer[cread] = 0;
remaining = str_realloc_cat(remaining, buffer);
while (remaining[i])
{
if (remaining[i] == 10)
{
remaining[i] = 0;
buffer = str_create_cpy(remaining);
remaining = remaining + i + 1;
return (buffer);
}
i++;
}
return (check_eof(fd, buffer, remaining, cread));
}
char *str_realloc_cat(char *rem, char *buf)
{
size_t i;
size_t dest_i;
char *dest;
i = (dest_i = 0);
if ((dest = malloc(SOF(char) * (str_len(rem) + str_len(buf) + 1))) == NULL)
return (NULL);
while (rem[i])
{
dest[dest_i] = rem[i];
dest_i++;
i++;
}
i = 0;
while (buf[i])
{
dest[dest_i] = buf[i];
dest_i++;
i++;
}
dest[dest_i] = 0;
free(buf);
return (dest);
}
char *check_eof(const int fd, char *buffer, char *remaining, ssize_t cread)
{
if (cread == 0)
return (NULL);
if (cread < READ_SIZE)
{
buffer = remaining;
remaining = NULL;
return (buffer);
}
return (get_next_line(fd));
}
char *str_create_cpy(const char *src)
{
char *dest;
size_t i;
i = 0;
if ((dest = malloc(sizeof(char) * str_len(src) + 1)) == NULL)
return (NULL);
while (src[i])
{
dest[i] = src[i];
i++;
}
dest[i] = 0;
return (dest);
}
int str_len(const char *str)
{
size_t i;
i = 0;
while (str[i])
i++;
return (i);
}
And a main functon if you would like to test:
#define SOF(x) sizeof(x) // Why in the comments
int main(int ac, char **av)
{
int fd;
char *s;
UNUSED(ac);
if (!av[1])
return 1;
fd = open(av[1], O_RDONLY);
while ((s = get_next_line(fd)))
{
printf("%s\n", s);
free(s);
}
close(fd);
}
Your algorithm is bad:
You keep the buffer in a allocate memory
You don't use a structure to regroup your variable
You use magic number remaining[i] == 10
You use recursive you can stack overflow return get_next_line(fd). Never mind, I didn't read well you have a tail recursive, just be sure to have the optimization on your compile for it.
You have Spaghetti code.
etc.
You should rewrite your whole function with a better logic first use this structure:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define GNL_SIZE 4096
struct gnl_context {
char buffer[GNL_SIZE];
size_t i;
size_t read;
};
char *get_next_line_r(int fd, struct gnl_context *gnl_context);
char *get_next_line(int fd);
static char *read_buffer(struct gnl_context *gnl_context, char *str,
size_t *size) {
size_t i = gnl_context->i;
while (i < gnl_context->read && gnl_context->buffer[i] != '\n') {
i++;
}
size_t j = i - gnl_context->i;
char *ret = realloc(str, *size + j + 1);
if (ret == NULL) {
return NULL;
}
memcpy(ret + *size, gnl_context->buffer + gnl_context->i, j);
*size += j;
ret[*size] = '\0';
gnl_context->i = i;
return ret;
}
char *get_next_line_r(int fd, struct gnl_context *gnl_context) {
char *str = NULL;
size_t size = 0;
loop:
if (gnl_context->i == gnl_context->read) {
ssize_t ret = read(fd, gnl_context->buffer, GNL_SIZE);
if (ret <= 0) {
return str;
}
gnl_context->read = (size_t)ret;
gnl_context->i = 0;
}
char *tmp = read_buffer(gnl_context, str, &size);
if (tmp == NULL) {
return str;
}
if (gnl_context->i != gnl_context->read) {
gnl_context->i++;
return tmp;
}
str = tmp;
goto loop;
}
char *get_next_line(int fd) {
static struct gnl_context gnl_context;
return get_next_line_r(fd, &gnl_context);
}
int main(void) {
char *str;
while ((str = get_next_line(0)) != NULL) {
printf("%s\n", str);
free(str);
}
}
I am concerned about this line:
remaining = remaining + i + 1;
remaining is a pointer to the allocated buffer. On this line, you destroy it, which means that you cannot free() it anymore.

Output not as expected

I am supposed to write a program to extract Web addresses starting with www. and ending with .edu. The program displays Web address contained in the input entered by the user. If the input does not contain a web address that starts with www. and ends with .edu, the program should display a message that indicates such a web address cannot be found.
Input: http://www.usf.edu/admission
Output: www.usf.edu
Input: https://www.facebook.com/
Output: Web address starting with www. and ending with .edu not found
However when my program runs, it is not displaying the correct output. I don't have any compiler errors or warnings so I'm not sure where the issue could be.
// This program extracts the text from the website URL
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define STR_LEN 1000
void read_line(char *str, int n);
void pass_check(char *str);
void extract(char *s1, char *s2);
int main(void)
{
char instr[STR_LEN + 1];
char outstr[STR_LEN + 1];
printf("Please enter a URL: ");
read_line(instr, STR_LEN);
extract(instr, outstr);
puts(outstr);
pass_check(outstr);
return 0;
}
void extract(char *s1, char *s2) {
char *p, *q;
q = s2;
for (p = s1 + 7; *p != 0; p++) {
if (*p == '/')
break;
else {
*q = *p;
q++;
}
}
*q = '\0';
*p = '\0';
}
void read_line(char *str, int n) {
int ch;
int i = 0;
while ((ch = getchar()) != '\n') {
if (i < n) {
*str++ = ch;
i++;
}
}
*str = '\0';
}
void pass_check(char *str) {
const char *fref = "www";
const char *lref = "edu";
int len = strlen(str);
printf("%d", len);
char *l = &str[len - 3];
char f[STR_LEN + 1];
strncpy(f, str, 3);
if ((strcmp(f, fref) == 0) && strcmp(l, lref) == 0) {
printf("Output: ");
puts(str);
printf("\n");
} else
printf("Please only insert a .edu URL.");
}
The function strncpy() does not do what you think it does: strncpy(f, str, 3); will not append a null byte to f, so strcmp(f, fref); will actually have undefined behavior as f is uninitialized beyond the first 3 bytes.
Do not use this function, learn why from these blogs:
https://randomascii.wordpress.com/2013/04/03/stop-using-strncpy-already/
https://blog.liw.fi/posts/strncpy/
Also note that your readline() function will run an infinite loop is the file is empty or not terminated by a newline.
Here is a corrected version:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define STR_LEN 1000
void read_line(char *str, size_t n);
int extract(const char *str, char *dest);
int main(void) {
char instr[STR_LEN + 1];
char outstr[STR_LEN + 1];
printf("Please enter a URL: ");
read_line(instr, sizeof(instr));
if (extract(instr, outstr)) {
puts(outstr);
} else {
printf("Web address starting with www. and ending with .edu not found\n");
}
return 0;
}
int read_line(char *str, size size) {
int ch;
size_t i = 0;
while ((ch = getchar()) != EOF && c != '\n') {
if (i + 1 < size) {
str[i++] = ch;
}
}
str[i] = '\0';
return (ch == EOF && i == 0) ? EOF : i;
}
int extact(const char *str, char *dest) {
const char *p;
*dest = '\0';
for (;;) {
if ((p = strstr(str, "https://www.")) != NULL) {
p += 8; // skip the https:// prefix
} else
if ((p = strstr(str, "http://www.")) != NULL) {
p += 7; // skip the http:// prefix
} else {
break;
}
// URL starts with www.
size_t len = strcspn(p, "/ \n"); // compute length of website name
if (len > 8 && !memcmp(p + len - 4, ".edu", 4)) {
// copy website name, assuming dest is at least as large as str
strncat(dest, p, len);
return 1;
}
str = p + len;
}
return 0;
}

Coding a getline() implementation - Valgrind errors

I have to recode an implementation of the getline() function, but using the file descriptor of the file and not a FILE *. I am only allowed to use malloc() and free(), along with 5 functions being 25 lines long at most.
I think I've done correctly the project although I am a beginner in C and my code isn't probably good.
When I run it, it works fine, but valgrind shows that I definetely lost x bytes, x depending of the file length and the READ_SIZE (macro defined in the header).
According to valgrind's --leak-check=full, I have a memory leak in the str_realloc_cat function, when I malloc dest. I tried but couldn't find where should I free / do something else?
Here below is my code:
char *get_next_line(const int fd)
{
static char *remaining = "";
char *buffer;
ssize_t cread;
size_t i;
i = 0;
if (remaining == NULL)
return (NULL);
if ((buffer = malloc(SOF(char) * READ_SIZE + 1)) == NULL ||
(cread = read(fd, buffer, READ_SIZE)) < 0)
return (NULL);
buffer[cread] = 0;
remaining = str_realloc_cat(remaining, buffer);
while (remaining[i])
{
if (remaining[i] == 10)
{
remaining[i] = 0;
buffer = str_create_cpy(remaining);
remaining = remaining + i + 1;
return (buffer);
}
i++;
}
return (check_eof(fd, buffer, remaining, cread));
}
char *str_realloc_cat(char *rem, char *buf)
{
size_t i;
size_t dest_i;
char *dest;
i = (dest_i = 0);
if ((dest = malloc(SOF(char) * (str_len(rem) + str_len(buf) + 1))) == NULL)
return (NULL);
while (rem[i])
{
dest[dest_i] = rem[i];
dest_i++;
i++;
}
i = 0;
while (buf[i])
{
dest[dest_i] = buf[i];
dest_i++;
i++;
}
dest[dest_i] = 0;
free(buf);
return (dest);
}
char *check_eof(const int fd, char *buffer, char *remaining, ssize_t cread)
{
if (cread == 0)
return (NULL);
if (cread < READ_SIZE)
{
buffer = remaining;
remaining = NULL;
return (buffer);
}
return (get_next_line(fd));
}
char *str_create_cpy(const char *src)
{
char *dest;
size_t i;
i = 0;
if ((dest = malloc(sizeof(char) * str_len(src) + 1)) == NULL)
return (NULL);
while (src[i])
{
dest[i] = src[i];
i++;
}
dest[i] = 0;
return (dest);
}
int str_len(const char *str)
{
size_t i;
i = 0;
while (str[i])
i++;
return (i);
}
And a main functon if you would like to test:
#define SOF(x) sizeof(x) // Why in the comments
int main(int ac, char **av)
{
int fd;
char *s;
UNUSED(ac);
if (!av[1])
return 1;
fd = open(av[1], O_RDONLY);
while ((s = get_next_line(fd)))
{
printf("%s\n", s);
free(s);
}
close(fd);
}
Your algorithm is bad:
You keep the buffer in a allocate memory
You don't use a structure to regroup your variable
You use magic number remaining[i] == 10
You use recursive you can stack overflow return get_next_line(fd). Never mind, I didn't read well you have a tail recursive, just be sure to have the optimization on your compile for it.
You have Spaghetti code.
etc.
You should rewrite your whole function with a better logic first use this structure:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define GNL_SIZE 4096
struct gnl_context {
char buffer[GNL_SIZE];
size_t i;
size_t read;
};
char *get_next_line_r(int fd, struct gnl_context *gnl_context);
char *get_next_line(int fd);
static char *read_buffer(struct gnl_context *gnl_context, char *str,
size_t *size) {
size_t i = gnl_context->i;
while (i < gnl_context->read && gnl_context->buffer[i] != '\n') {
i++;
}
size_t j = i - gnl_context->i;
char *ret = realloc(str, *size + j + 1);
if (ret == NULL) {
return NULL;
}
memcpy(ret + *size, gnl_context->buffer + gnl_context->i, j);
*size += j;
ret[*size] = '\0';
gnl_context->i = i;
return ret;
}
char *get_next_line_r(int fd, struct gnl_context *gnl_context) {
char *str = NULL;
size_t size = 0;
loop:
if (gnl_context->i == gnl_context->read) {
ssize_t ret = read(fd, gnl_context->buffer, GNL_SIZE);
if (ret <= 0) {
return str;
}
gnl_context->read = (size_t)ret;
gnl_context->i = 0;
}
char *tmp = read_buffer(gnl_context, str, &size);
if (tmp == NULL) {
return str;
}
if (gnl_context->i != gnl_context->read) {
gnl_context->i++;
return tmp;
}
str = tmp;
goto loop;
}
char *get_next_line(int fd) {
static struct gnl_context gnl_context;
return get_next_line_r(fd, &gnl_context);
}
int main(void) {
char *str;
while ((str = get_next_line(0)) != NULL) {
printf("%s\n", str);
free(str);
}
}
I am concerned about this line:
remaining = remaining + i + 1;
remaining is a pointer to the allocated buffer. On this line, you destroy it, which means that you cannot free() it anymore.

Delete space from string in ansi C

I'm trying to write simple function trim space in the string in ansi C.
My str_utis.h:
#include <string.h>
const char* trim_str(char *input_str);
My str_utils.c:
const char* trim_str(char* input_str){
char* str = NULL;
int len = strlen(input_str);
int i = 0;
for (i = 0; i < len - 1; i++){
if (input_str[i] == ' ')
;
else
str += input_str[i];
}
return str;
}
When i try to execute it i got segfault:
int main(int argc, char** argv) {
const char* a = trim_str("Hey this is string");
printf("%s", a);
return 0;
}
why is it wrong? how can i write it correctly?
Thank you.
You cannot modify a string literal. It's UB. Copy the string out and modify its contents. Change
char* str = NULL;
int len = strlen(input_str);
to
size_t len = strlen( input_str );
char *str = malloc( len + 1 );
and then proceed to copy out the non-whitespace contents.
str isn't allocated, and you can't use += to append to a string anyway. Read up on strncat.
Go through this. this will remove blank spaces and tabs from both ends of the input string.
const char* trim_str(char* input_str){
char* str = NULL;
int len = strlen(input_str);
str = (char *)malloc(len+1);
int i = 0;
while(i < len && (input_str[i]==' ' || input_str[i]=='\t')){
++i;
}
int j = 0;
while(i< len && input_str[i]!=' ' && input_str[i]!='\t'){
str[j]= input_str[i];
++j;
++i;
}
str[j] = '\0';
return str;
}
first of all you need to get the new string size without spaces:
You don't want to allocate big strings if you don't have to.
const char* trim_str(char* input_str){
char* str = NULL;
int len = strlen(input_str);
int i = 0;
int newSize = 0;
for (i = 0; i < len - 1; i++){
if (input_str[i] == ' ')
;
else
newSize++;
}
str = malloc( newSize+ 1 );
str[newSize] = '\0'
// put the code of the copy bytes here...
return str;
}
char *trimdupstr(char *str)
{
size_t len, src,dst;
char *new;
if (!str) return NULL;
len = strlen (str);
new = malloc(len+1);
if (!new) return NULL;
for(src=dst=0; new[dst] = str[src++]; ) {
if (new[dst] != ' ') dst++;
}
return new;
}
And this one also removes tabs and CR/LF's:
char *trimdupstr(char *str)
{
size_t len, src,dst;
char *new;
if (!str) return NULL;
len = strlen (str);
new = malloc(len+1);
for (src=dst=0; str[src]; dst += len) {
len = strspn(str+src, "\t \r\n" );
src += len;
len = strcspn(str+src, "\t \r\n" );
memcpy (new+dst, str+src, len);
src += len;
}
new[dst] = 0;
return new;
}
This is a pretty safe method.
void rtrim(char instrng[]) {
assert (instrng != NULL);
if (strlen(instrng) == 0) {
return;
}
while (instrng[strlen(instrng)-1] == '\n' || instrng[strlen(instrng)-1] == ' ' ) {
instrng[strlen(instrng)-1] = '\0';
}
return;
}

Resources