get strings from the buffer - c

I wish to get strings from the buffer of raw bytes in the memory, will work well?
static int in = 0;
void *loadFile (FILE *fp)
{
fseek (fp, 0L, SEEK_END);
size_t size = ftell (fp);
fseek (fp, 0L, SEEK_SET);
char *buf = malloc (sizeof(char) * size);
if (!buf)
return NULL;
if (fread (buf, sizeof(char), size, fp) != size) {
free (buf);
return NULL;
}
return buf;
}
char *getString (void *buf)
{
char *l_buf = buf;
int i, j, num;
char *string = NULL;
for (i = in; l_buf[i] == '\n' || l_buf[i] == '\r'; i++);
for (j = i; l_buf[j] != '\n' && l_buf[j] != '\r'; j++);
num = j - i;
string = malloc (sizeof(char) * (num + 1));
if (!string)
return NULL;
in = j;
strncpy (string, &l_buf[i], num);
string[num] = '\0';
return string;
}

I believe there is at least one problem with the solution as proposed and that is there is no check to ensure you don't run off the end of the memory buffer in getString(). So one way to avoid this in your read code would be to add an explicit NULL to the end of the buffer like so
char *buf = malloc (sizeof(char) * (size + 1));
if (!buf)
return NULL;
if (fread (buf, sizeof(char), size, fp) != size) {
free (buf);
return NULL;
}
buf[size] = `\0`;
And then in your string extraction function add the a NULL check to the line termination tests, something like this:
for (i = in; l_buf[i] != '\0' && (l_buf[i] == '\n' || l_buf[i] == '\r'); i++);
if (l_buf[i] == '\0') {
/* Never saw the start of a line before the buffer ran out */
return NULL;
}
for (j = i; l_buf[i] != '\0' && l_buf[j] != '\n' && l_buf[j] != '\r'; j++);
if (i == j) {
return NULL;
}
There is another potential problem but as you didn't say whether you were running on UNIX or Windows or cared about portability here I can't be sure. The proposed code doesn't deal with line terminations that include both `\r' and '\n'.
I would also suggest to make the function re-entrant by replacing the global start position index with a parameter like so:
char *getString (void *buf, int *in) { ...
Then just update that pointer in getString() like so:
*in = j;

All references to buf[i] should be l_buf[i]. buf[i] is indexing from a void pointer (not what you want), but l_buf [i] is indexing from a char pointer.

Related

Can't compare Lines of a file in C

I got this piece of code:
void scanLinesforArray(FILE* file, char search[], int* lineNr){
char line[1024];
int line_count = 0;
while(fgets(line, sizeof(line),file) !=NULL){
++line_count;
printf("%d",line_count);
printf(line);
char *temp = malloc(strlen(line));
// strncpy(temp,line,sizeof(line));
// printf("%s\n",temp);
free(temp);
continue;
}
}
This will print all lines of the file, but as soon as I uncomment the strncpy(), the program just stops without error.
Same happens as soon as I use strstr() to compare the line to my search variable.
I tried the continue statement and other redundant things, but nothing helps.
Many problems:
Do not print a general string as a format
Code risks undefined behavior should the string contain a %.
// printf(line); // BAD
printf("%s", line);
// or
fputs(line, stdout);
Bad size
strncpy(temp,line,sizeof(line)); is like strncpy(temp,line, 1024);, yet temp points to less than 1024 allocated bytes. Code attempts to write outside allocated memory. Undefined behavior (UB).
Rarely should code use strncpy().
Bad specifier
%s expects a match string. temp does not point to a string as it lacks a null character. Instead allocated for the '\0'.
// printf("%s\n", temp);`.
char *temp = malloc(strlen(line) + 1); // + 1
strcpy(temp,line);
printf("<%s>", temp);
free(temp);
No compare
"Can't compare Lines of a file in C" is curious as there is no compare code.
Recall fgets() typically retains a '\n' in line[].
Perhaps
long scanLinesforArray(FILE* file, const char search[]){
char line[1024*4]; // Suggest wider buffer - should be at least as wide as the search string.
long line_count = 0; // Suggest wider type
while(fgets(line, sizeof line, file)) {
line_count++;
line[strcspn(line, "\n")] = 0; // Lop off potential \n
if (strcmp(line, search) == 0) {
return line_count;
}
}
return 0; // No match
}
Advanced: Sample better performance code.
long scanLinesforArray(FILE *file, const char search[]) {
size_t len = strlen(search);
size_t sz = len + 1;
if (sz < BUFSIZ) sz = BUFSIZ;
if (sz > INT_MAX) {
return -2; // Too big for fgets()
}
char *line = malloc(sz);
if (line == NULL) {
return -1;
}
long line_count = 0;
while (fgets(line, (int) sz, file)) {
line_count++;
if (memcmp(line, search, len) == 0) {
if (line[len] == '\n' || line[len] == 0) {
free(line);
return line_count;
}
}
}
free(line);
return 0; // No match
}

how can I append a char to a string allocating memory dynamically in C?

I wrote this code, but inserts garbage in the start of string:
void append(char *s, char c) {
int len = strlen(s);
s[len] = c;
s[len + 1] = '\0';
}
int main(void) {
char c, *s;
int i = 0;
s = malloc(sizeof(char));
while ((c = getchar()) != '\n') {
i++;
s = realloc(s, i * sizeof(char));
append(s, c);
}
printf("\n%s",s);
}
How can I do it?
There are multiple problems in your code:
you iterate until you read a newline ('\n') from the standard input stream. This will cause an endless loop if the end of file occurs before you read a newline, which would happen if you redirect standard input from an empty file.
c should be defined as int so you can test for EOF properly.
s should be null terminated at all times, you must set the first byte to '\0' after malloc() as this function does not initialize the memory it allocates.
i should be initialized to 1 so the first realloc() extends the array by 1 etc. As coded, your array is one byte too short to accommodate the extra character.
you should check for memory allocation failure.
for good style, you should free the allocated memory before exiting the program
main() should return an int, preferably 0 for success.
Here is a corrected version:
#include <stdio.h>
#include <stdlib.h>
/* append a character to a string, assuming s points to an array with enough space */
void append(char *s, char c) {
size_t len = strlen(s);
s[len] = c;
s[len + 1] = '\0';
}
int main(void) {
int c;
char *s;
size_t i = 1;
s = malloc(i * sizeof(char));
if (s == NULL) {
printf("memory allocation failure\n");
return 1;
}
*s = '\0';
while ((c = getchar()) != EOF && c != '\n') {
i++;
s = realloc(s, i * sizeof(char));
if (s == NULL) {
printf("memory allocation failure\n");
return 1;
}
append(s, c);
}
printf("%s\n", s);
free(s);
return 0;
}
when you call strlen it searches for a '\0' char to end the string. You don't have this char inside your string to the behavior of strlen is unpredictable.
Your append function is acually good.
Also, a minor thing, you need to add return 0; to your main function. And i should start from 1 instead if 0.
Here is how it should look:
int main(void){
char *s;
size_t i = 1;
s = malloc (i * sizeof(char));//Just for fun. The i is not needed.
if(s == NULL) {
fprintf(stderr, "Coul'd not allocate enough memory");
return 1;
}
s[0] = '\0';
for(char c = getchar(); c != '\n' && c != EOF; c = getchar()) {//it is not needed in this case to store the result as an int.
i++;
s = realloc (s,i * sizeof(char) );
if(s == NULL) {
fprintf(stderr, "Coul'd not allocate enough memory");
return 1;
}
append (s,c);
}
printf("%s\n",s);
return 0;
}
Thanks for the comments that helped me improve the code (and for my english). I am not perfect :)
The inner realloc needs to allocate one element more (for the trailing \0) and you have to initialize s[0] = '\0' before starting the loop.
Btw, you can replace your append by strcat() or write it like
size_t i = 0;
s = malloc(1);
/* TODO: check for s != NULL */
while ((c = getchar()) != '\n') {
s[i] = c;
i++;
s = realloc(s, i + 1);
/* TODO: check for s != NULL */
}
s[i] = '\0';

can anyone explain me how does this function work?

I don't understand what this function do. Can anyone explain me in detail please?
char *my_getline(FILE *stream) {
char *line = NULL;
size_t pos = 0;
int c;
while ((c = getc(stream)) != EOF) {
char *newp = realloc(line, pos + 2);
if (newp == NULL) {
free(line);
return NULL;
}
line = newp;
if (c == '\n')
break;
line[pos++] = (char)c;
}
if (line) {
line[pos] = '\0';
}
return line;
}
If you can add a comment on my code, I think that will help me. I want to search a substring in a string and I found this function code.
This is the main function:
int main(void) {
char *str, *sub;
size_t len1, len2, i, count = 0;
printf("Insert string :\n");
str = my_getline(stdin);
printf("insert substring :\n");
sub = my_getline(stdin);
if (str && sub) {
len1 = strlen(str);
len2 = strlen(sub);
for (i = 0; i + len2 <= len1; i++) {
if (!memcmp(str + i, sub, len2)) {
count++;
printf("Substring found at index : %d\n", i);
}
}
printf("in the number of: %d\n", count);
if (count == 0) {
printf("Substring not found\n");
}
}
free(str);
free(sub);
return 0;
}
I understand the main function but unable to understand the logic in function my_getline.
Please help me in understanding the logic. Thanks!
char *my_getline(FILE *stream) {
// pointer to the line to be read:
char *line = NULL;
// position of the next character:
size_t pos = 0;
// single character:
int c;
while ((c = getc(stream)) != EOF) { // read 1 character at a time until EOF
// allocate a new buffer with room for the char just read + a 0 terminator
// when `line` is NULL, this is the same as `malloc()`, otherwise it
// will change the size of the allocation:
char *newp = realloc(line, pos + 2);
// check for errors:
if (newp == NULL) {
free(line);
return NULL;
}
// no errors, assign new buffer to `line`:
line = newp;
// end of line found: we're done:
if (c == '\n')
break;
// otherwise add new character to the line:
line[pos++] = (char)c;
}
// if there was *anything* to read, add 0 terminator (marks end of string):
if (line) {
line[pos] = '\0';
}
return line;
}
That's about it. Note it's horribly inefficient for two reasons: It reads only one character at a time and it calls realloc() for each and every character.
A better solution would use e.g. fgets() and increase the buffer size in reasonable chunks, for example like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GETLINE_CHUNK 1024
static void xrealloc(void *bufPtr, size_t size)
{
void **buf = bufPtr;
void *tmp = realloc(*buf, size);
if (!tmp)
{
free(*buf);
*buf = 0;
}
*buf = tmp;
}
char *my_getline(FILE *stream)
{
// allocate first chunk:
char *buf = malloc(GETLINE_CHUNK);
if (!buf) return 0;
*buf = 0;
size_t pos = 0;
// read up to GETLINE_CHUNK bytes, until newline:
while (fgets(buf + pos, GETLINE_CHUNK, stream))
{
// look for newline:
char *nlPos = strchr(buf, '\n');
if (nlPos)
{
// found, then our line is complete
*nlPos = 0;
// shrink buffer to needed size
xrealloc(&buf, nlPos-buf+1);
return buf;
}
// set next offset to read
pos = strlen(buf);
// increase buffer size to have room for a whole other GETLINE_CHUNK:
xrealloc(&buf, pos + GETLINE_CHUNK);
if (!buf) return 0;
}
// if nothing was read, free buffer and return NULL:
if (*buf == 0)
{
free(buf);
buf = 0;
}
return buf;
}
int main(void)
{
char *line = my_getline(stdin);
if (line)
{
puts(line);
free(line);
}
else puts("no input!");
return 0;
}
Well this function gives you line, Lets go Step by Step:
char *my_getline(FILE *stream) {
char *line = NULL; //this is just pointer initialization
size_t pos = 0; //position variable definition and init
int c; //a variable to store temporary character
while ((c = getc(stream)) != EOF) //read every character till end of file
{
// To dynamically allocate memory, with reference to the
// number of character and plus '2' is just to compensate null
// character and the character(Since pos is 0)
char *newp = realloc(line, pos + 2);
if (newp == NULL) { // this is to check whether memory was alloacted properly or not.
free(line); //if not free line
return NULL;// break the program and return NULL
}
line = newp;// if memory is allocated properly store allocated memory in line pointer
if (c == '\n') //if new line is detected
break;// break the while loop
line[pos++] = (char)c; // store the character in dynamically allocated memory and new character in new location.
}
if (line) { //if line contains something then add a null character at last, to complete that string
line[pos] = '\0';
}
return line; //returns the content of line.
}
Hope this helps :)

c fgets retrieves the whole array?

I have a file named phobebook where i retrieve the number of contacts I have(here the int is assigned on variable cc), then saved the names, address etc.
problem is when I display the info, the details are there but they are separated with new lines. I tried to put the \0 but it seems it does not work.
typedef struct myphonebook{
char name[31];
char address[101];
char cellphone[11];
char email[21];
} Myphonebooktype;
FILE*db;
db = fopen("db.txt", "r");
fscanf(db, "%d" , &cc);
pb = (Myphonebooktype*)malloc(cc*sizeof(Myphonebooktype));
addcounter = cc;
for(i = 0; i<cc ; i++){
size_t lenn = strlen(pb[i].name);
if (pb[i].name[lenn - 1] == '\n') {
pb[i].name[lenn - 1] = '\0';
}
fgets(pb[i].name, sizeof(pb[i].name), db);
size_t lena = strlen(pb[i].address);
if (pb[i].address[lena - 1] == '\n') {
pb[i].address[lena - 1] = '\0';
}
fgets(pb[i].address, sizeof(pb[i].address), db);
size_t lenc = strlen(pb[i].cellphone);
if (pb[i].cellphone[lenc - 1] == '\n') {
pb[i].cellphone[lenc - 1] = '\0';
}
fgets(pb[i].cellphone, sizeof(pb[i].cellphone), db);
size_t lene = strlen(pb[i].email);
if (pb[i].email[lene - 1] == '\n') {
pb[i].email[lene - 1] = '\0';
}
fgets(pb[i].email, sizeof(pb[i].email), db);
}
You can't reference data in the newly allocated array of Myphonebooktype before initializing it. You calls to strlen() all generate undefined behavior, since the struct members haven't been initialized.
Also, don't cast the return value of malloc() in C.
As #unwind said, code is referencing uninitialized data before writing to it
size_t lenn = strlen(pb[i].name); // pb[i].name contents are not defined yet.
...
fgets(pb[i].name, sizeof(pb[i].name), db);
Suggest creating a function to handle the reading of the line.
void ReadLine(FILE *db, char *dest, size_t size) {
char buffer[size+2];
dest[0] = '\0';
if (fgets(buffer, sizeof buffer, db) != NULL) {
size_t len = strlen(buffer);
// Get rid of potential \n
if (len > 0 && buffer[len-1] == '\n') buffer[--len] = '\0';
strncpy(dest, buffer, size);
}
}
for(i = 0; i<cc ; i++) {
ReadLine(db, pb[i].name, sizeof pb[i].name);
ReadLine(db, pb[i].address, sizeof pb[i].address);
ReadLine(db, pb[i].cellphone, sizeof pb[i].cellphone);
ReadLine(db, pb[i].email, sizeof pb[i].email);
}
Additions could be made to ReadLine() to return EOF on NULL read, excessively long lines or \r concern as raised by #MadHatter. It's all in one function, so easier to maintain and enhance code.

Read line from file issue

I wrote this simple readline function, it can return each line length but it doesn't return a pointer to the allocated buffer. Another issue is the last line ignored(it doesn't return it):
FILE *passFile = NULL;
char *current = NULL;
size_t len = 0;
passFile = fopen("pass.txt", "r");
while(readline(passFile, &current, &len) != -1) {
printf("%s\n", current); // SEGMENTAION FAULT
printf("%d\n", len);
free(current);
current = NULL;
}
ssize_t
readline(FILE *file, char **bufPtr, size_t *len)
{
char c, *buf = NULL;
size_t n = 0;
buf = (char*)malloc(sizeof(char));
while((c = fgetc(file)) != '\n' && (c != EOF)) {
buf[n] = c;
++n;
buf = realloc(buf, n + 1);
}
buf[n] = '\0';
*bufPtr = buf;
*len = n;
if(c == EOF) // reach end of file
return -1;
return 0;
}
Your readline() function is not returning a pointer to allocated memory. In your call, current is never set, so the pointer is invalid and you get the error.
In C, functions are "call by value". Inside readline(), bufPtr is a copy of whatever was passed to readline(). Assigning to bufPtr merely overwrites the local copy and does not return a value that the calling code can see.
In pseudocode:
TYPE a;
define function foo(TYPE x)
{
x = new_value;
}
foo(a); // does not change a
This only changes the local copy of x and does not return a value. You change it to use a pointer... the function still gets a copy, but now it's a copy of a pointer, and it can use that pointer value to find the original variable. In pseudocode:
TYPE a;
define function foo(TYPE *px)
{
*px = new_value;
}
foo(&a); // does change a
Now, to change your function:
ssize_t
readline(FILE *file, char **pbufPtr, size_t *len)
{
// ...deleted...
buf[n] = '\0';
*pbufPtr = buf;
// ...deleted...
}
And you call it like so:
while(readline(passFile, &current, &len) != -1)
P.S. It is not a good idea to call realloc() the way you do here. It's potentially a very slow function, and for an input string of 65 characters you will call it 65 times. It would be better to use an internal buffer for the initial file input, then use malloc() to allocate a string that is just the right size and copy the string into the buffer. If the string is too long to fit in the internal buffer at once, use malloc() to get a big-enough place to copy out the part of the string you have in the internal buffer, then continue using the internal buffer to copy more of the string, and then call realloc() as needed. Basically I'm suggesting you have an internal buffer of size N, and copy the string in chunks of N characters at a time, thus minimizing the number of calls to realloc() while still allowing arbitrary-length input strings.
EDIT: Your last-line problem is that you return -1 when you hit end of file, even though there is a line to return.
Change your code so that you return -1 only if c == EOF and n == 0, so a final line that ends with EOF will be correctly returned.
You should also make readline() use the feof() function to check if file is at end-of-file, and if so, return -1 without calling malloc().
Basically, when you return -1, you don't want to call malloc(), and when you did call malloc() and copy data into it, you don't want to return -1! -1 should mean "you got nothing because we hit end of file". If you got something before we hit end of file, that's not -1, that is 0. Then the next call to readline() after that will return -1.
In your readline function you pass current by value. So if you change bufPtr inside your function, it doesn't change value of current outside. If you want to change value of current pass it by reference: &current and change readline() parameter to char **bufPTR.
You could pass current the way you did if you wanted to change something it points to, but you want to change where it points in first place.
replace your readlinefunction with this
char* readline(FILE *file, size_t *len)
{
char c, *buf = NULL;
size_t n = 0;
buf = (char*)malloc(sizeof(char));
while((c = fgetc(file)) != '\n' && (c != EOF)) {
buf[n] = c;
++n;
buf = realloc(buf, n + 1);
}
buf[n] = '\0';
bufPtr = buf;
*len = n;
if(c == EOF) // reach end of file
return NULL;
return buf;
}
and then in main replace this line while(readline(passFile, current, &len) != -1) with this while((current = readline(passFile, &len) != NULL)
Now it works:
ssize_t
readline(FILE *file, char **bufPtr, size_t *len)
{
if(feof(file)) // reach end of file
return -1;
char c, *buf = NULL;
size_t n = 0, portion = CHUNK;
buf = (char*)malloc(sizeof(char) * CHUNK);
while((c = fgetc(file)) != '\n' && (c != EOF)) {
buf[n] = c;
++n;
if(n == portion) {
buf = realloc(buf, CHUNK + n);
portion += n;
}
}
buf[n] = '\0';
*bufPtr = buf;
*len = n;
return 0;
}

Resources