I'm making a raytracing engine in C using the minilibX library.
I want to be able to read in a .conf file the configuration for the scene to display:
For example:
(Az#Az 117)cat universe.conf
#randomcomment
obj:eye:x:y:z
light:sun:100
light:moon:test
The number of objects can vary between 1 and the infinite.
From now on, I'm reading the file, copying each line 1 by 1 in a char **tab, and mallocing by the number of objects found, like this:
void open_file(int fd, struct s_img *m)
{
int i;
char *s;
int curs_obj;
int curs_light;
i = 0;
curs_light = 0;
curs_obj = 0;
while (s = get_next_line(fd))
{
i = i + 1;
if (s[0] == 'l')
{
m->lights[curs_light] = s;
curs_light = curs_light + 1;
}
else if (s[0] == 'o')
{
m->objs[curs_obj] = s;
curs_obj = curs_obj + 1;
}
else if (s[0] != '#')
{
show_error(i, s);
stop_parsing(m);
}
}
Now, I want to be able to store each information of each tab[i] in a new char **tab, 1 for each object, using the ':' as a separation.
So I need to initialize and malloc an undetermined number of char **tab. How can I do that?
(Ps: I hope my code and my english are good enough for you to understand. And I'm using only the very basic function, like read, write, open, malloc... and I'm re-building everything else, like printf, get_line, and so on)
You can't allocate an indeterminate amount of memory; malloc doesn't support it. What you can do is to allocate enough memory for now and revise that later:
size_t buffer = 10;
char **tab = malloc(buffer);
//...
if (indexOfObjectToCreate > buffer) {
buffer *= 2;
tab = realloc(tab, buffer);
}
I'd use an alternative approach (as this is c, not c++) and allocate simply large buffers as we go by:
char *my_malloc(size_t n) {
static size_t space_left = 0;
static char *base = NULL;
if (base==NULL || space_left < n) base=malloc(space_left=BIG_N);
base +=n; return base-n;
}
Disclaimer: I've omitted the garbage collection stuff and testing return values and all safety measures to keep the routine short.
Another way to think this is to read the file in to a large enough mallocated array (you can check it with ftell), scan the buffer, replace delimiters, line feeds etc. with ascii zero characters and remember the starting locations of keywords.
Related
For this code below that I was writing. I was wondering, if I want to split the string but still retain the original string is this the best method?
Should the caller provided the ** char or should the function "split" make an additional malloc call and memory manage the ** char?
Also, I was wondering if this is the most optimizing method, or could I optimize the code better than this?
I still have not debug the code yet, I am a bit undecided whether if the caller manage the ** char or the function manage the pointer ** char.
#include <stdio.h>
#include <stdlib.h>
size_t split(const char * restrict string, const char splitChar, char ** restrict parts, const size_t maxParts){
size_t size = 100;
size_t partSize = 0;
size_t len = 0;
size_t newPart = 1;
char * tempMem;
/*
* We just reverse a long page of memory
* At reaching the space character that is the boundary of the new
*/
char * mem = (char*) malloc( sizeof(char) * size );
if ( mem == NULL ) return 0;
for ( size_t i = 0; string[i] != 0; i++ ) {
// If it is a split char we at a new part
if ( string[i] == splitChar) {
// If the last character was not the split character
// Then mem[len] = 0 and increase the len by 1.
if (newPart == 0) mem[len++] = 0;
newPart = 1;
continue;
} else {
// If this is a new part
// and not a split character
// we make a new pointer
if ( newPart == 1 ){
// if reach maxpart we break.
// It is okay here, to not worry about memory
if ( partSize == maxParts ) break;
parts[partSize++] = &mem[len];
newPart = 0;
}
mem[len++] = string[i];
if ( len == size ){
// if ran out of memory realloc.
tempMem = (char*)realloc(mem, sizeof(char) * (size << 1) );
// if fail quit loop
if ( tempMem == NULL ) {
// If we can't get more memory the last part could be corrupted
// We have to return.
// Otherwise the code below can seg.
// There maybe a better way than this.
return partSize--;
}
size = size << 1;
mem = tempMem;
}
}
}
// If we got here and still in a newPart that is fine no need
// an additional character.
if ( newPart != 1 ) mem[len++] = 0;
// realloc to give back the unneed memory
if ( len < size ) {
tempMem = (char*) realloc(mem, sizeof(char) * len );
// If the resizing did not fail but yielded a different
// memory block;
if ( tempMem != NULL && tempMem != mem ){
for ( size_t i = 0; i < partSize; i++ ){
parts[i] = tempMem + (parts[i] - mem);
}
}
}
return partSize;
}
int main(){
char * tStr = "This is a super long string just to test the str str adfasfas something split";
char * parts[10];
size_t len = split(tStr, ' ', parts, 10);
for (size_t i = 0; i < len; i++ ){
printf("%zu: %s\n", i, parts[i]);
}
}
What is "best" is very subjective, as well as use case dependent.
I personally would keep the parameters as input only, define a struct to contain the split result, and probably return such by value. The struct would probably contain pointers to memory allocation, so would also create a helper function free that memory. The parts might be stored as list of strings (copy string data) or index&len pairs for the original string (no string copies needed, but original string needs to remain valid).
But there are dozens of very different ways to do this in C, and all a bit klunky. You need to choose your flavor of klunkiness based on your use case.
About being "more optimized": unless you are coding for a very small embedded device or something, always choose a more robust, clear, easier to use, harder to use wrong over more micro-optimized. The useful kind of optimization turns, for example, O(n^2) to O(n log n). Turning O(3n) to O(2n) of a single function is almost always completely irrelevant (you are not going to do string splitting in a game engine inner rendering loop...).
int getLineCount() {
int ret = 0;
char c;
while ((c = fgetc(stdin)) != EOF)
if (c == '\n')
ret++;
return ret + 1;
}
void fill(char *WORD) {
int charIndex = 0;
char c;
while ((c = fgetc(stdin)) != EOF) {
*(WORD + charIndex++) = c;
}
}
int main() {
int lineNum = getLineCount();
char *WORD = (char*)calloc(lineNum * 18,sizeof(int));
fill(WORD);
return 0;
}
Here is the part of my code, and my question is(as you can see):
I'm trying to read stdin's content twice, but after the getLineCount function, it stays at the EOF and I can't read it again in fill function.
Im taking stdin from the user with this command in Linux;
$./output < text_file.txt
Is there any way to roll back stdin to starting character? If not, how can I fix this problem?
Thanks.
You can use rewind(stdin) to set the stream back to the start of file, but be aware that it is not guaranteed to work, especially if the stream is a pipe, a terminal or a device.
Your allocation scheme is incorrect: you could compute the size of the file and then allocate that many bytes, but your current (char*)calloc(lineNum * 18,sizeof(int)); allocates 18 times the size of type int for each line. Some files with short lines will fit in this array while others will invoke undefined behavior.
Note that c must be defined as int for c = fgetc(stdin); to properly store all values including the EOF special value.
Don't use rewind.
You can, of course, save the data you read from stdin (potentially in a file if it's too large for main memory) and operate on that.
Another possibility is this:
struct callback {
void (*call) (char, void *);
void * data;
};
void with_characters_from(FILE * file, struct callback const * callbacks, size_t count) {
int c;
while ((c = fgetc(file)) != EOF) {
char character = c & 0xFF;
for (size_t i = 0; i < count; ++i) {
callbacks[i].call(character, callbacks[i].data);
}
}
}
You inverse control, such that no longer your functions are "pulling data out of" stdin, but rather the data (characters) are "pushed to" them. Note that this can lead to callback hell, and in C you sacrifice a good portion of type safety (as well as code clarity .. no first class functions / closures ... sigh).
A small test:
struct counter_data {
char const character;
unsigned count;
};
void counter (char character, void * vptr) {
struct counter_data * data = vptr;
if (character == data->character) {
++(data->count);
}
}
int main() {
struct counter_data data [2] = {
{'a', 0}, {'x', 0}};
struct callback callbacks [2] = {
{&counter, &(data [0])},
{&counter, &(data [1])}};
with_characters_from (stdin, callbacks, 2);
printf("Counted %c %u times \n", data [0].character, data [0].count);
printf("Counted %c %u times \n", data [1].character, data [1].count);
return 0;
}
As already noted, for your particular example, you should consider a completely different approach: If possible compute the required size beforehand. If you exceed that size (which you should always test for), then use realloc in order to get a larger chunk of memory.
I'd like to know what's the most memory efficient way to read & store a list of strings in C.
Each string may have a different length, so pre-allocating a big 2D array would be wasteful.
I also want to avoid a separate malloc for each string, as there may be many strings.
The strings will be read from a large buffer into this list data-structure I'm asking about.
Is it possible to store all strings separately with a single allocation of exactly the right size?
One idea I have is to store them contiguously in a buffer, then have a char * array pointing to the different parts in the buffer, which will have '\0's in it to delimit. I'm hoping there's a better way though.
struct list {
char *index[32];
char buf[];
};
The data-structure and strings will be strictly read-only.
Here's a mildly efficient format, assuming you know the length of all the strings in advance:
|| total size | string 1 | string 2 | ........ | string N | len(string N) | ... | len(string 2) | len(string 1) ||
You can store the lengths either in fixed-width integers or in variable-width integers, but the point is that you can jump to the end and scan all the lengths relatively efficiently, and from the length sum you can compute the offset of the string. You know when you reached the last string when there is no remaining space.
You can create your single buffer and store them contiguously, expanding the buffer as needed by using realloc(). But then you would need a second array to store string positions and maybe realloc() it as well, so I might simply create a dynamically allocated array and malloc() each string separately.
Find the number and total-length of all strings:
int num = 0;
int len = 0;
char* string = GetNextString(input);
while (string)
{
num += 1;
len += strlen(string);
string = GetNextString(input);
}
Rewind(input);
Then, allocate the following two buffers:
int* indexes = malloc(num*sizeof(int));
char* strings = malloc((num+len)*sizeof(char));
Finally, fill these two buffers:
int index = 0;
for (int i=0; i<num; i++)
{
indexes[i] = index;
string = GetNextString(input);
strcpy(strings+index,string);
index += strlen(string)+1;
}
After that, you can simply use strings[indexes[i]] in order to access the ith string.
Most efficient and memory efficient way is a two pass solution. In the first pass you calculate the total size for all strings, then you allocate the total memory block. In the second pass you read all strings using large buffers.
You can create a pointer array for the strings and calculate the difference between the pointers to get the string sizes. This way you save the null byte as end marker.
Here a complete example:
#include <stdio.h>
#include <memory.h>
#include <stdlib.h>
struct StringMap
{
char *data;
char **ptr;
long cPos;
};
void initStringMap(StringMap *stringMap, long numberOfStrings, long totalCharacters)
{
stringMap->data = (char*)malloc(sizeof(char)*(totalCharacters+1));
stringMap->ptr = (char**)malloc(sizeof(char*)*(numberOfStrings+2));
memset(stringMap->ptr, 0, sizeof(char*)*(numberOfStrings+1));
stringMap->ptr[0] = stringMap->data;
stringMap->ptr[1] = stringMap->data;
stringMap->cPos = 0;
}
void extendString(StringMap *stringMap, char *str, size_t size)
{
memcpy(stringMap->ptr[stringMap->cPos+1], str, size);
stringMap->ptr[stringMap->cPos+1] += size;
}
void endString(StringMap *stringMap)
{
stringMap->cPos++;
stringMap->ptr[stringMap->cPos+1] = stringMap->ptr[stringMap->cPos];
}
long numberOfStringsInStringMap(StringMap *stringMap)
{
return stringMap->cPos;
}
size_t stringSizeInStringMap(StringMap *stringMap, long index)
{
return stringMap->ptr[index+1] - stringMap->ptr[index];
}
char* stringinStringMap(StringMap *stringMap, long index)
{
return stringMap->ptr[index];
}
void freeStringMap(StringMap *stringMap)
{
free(stringMap->data);
free(stringMap->ptr);
}
int main()
{
// The interesting values
long numberOfStrings = 0;
long totalCharacters = 0;
// Scan the input for required information
FILE *fd = fopen("/path/to/large/textfile.txt", "r");
int bufferSize = 4096;
char *readBuffer = (char*)malloc(sizeof(char)*bufferSize);
int currentStringLength = 0;
ssize_t readBytes;
while ((readBytes = fread(readBuffer, sizeof(char), bufferSize, fd))>0) {
for (int i = 0; i < readBytes; ++i) {
const char c = readBuffer[i];
if (c != '\n') {
++currentStringLength;
} else {
++numberOfStrings;
totalCharacters += currentStringLength;
currentStringLength = 0;
}
}
}
// Display the found results
printf("Found %ld strings with total of %ld bytes\n", numberOfStrings, totalCharacters);
// Allocate the memory for the resource
StringMap stringMap;
initStringMap(&stringMap, numberOfStrings, totalCharacters);
// read all strings
rewind(fd);
while ((readBytes = fread(readBuffer, sizeof(char), bufferSize, fd))>0) {
char *stringStart = readBuffer;
for (int i = 0; i < readBytes; ++i) {
const char c = readBuffer[i];
if (c == '\n') {
extendString(&stringMap, stringStart, &readBuffer[i]-stringStart);
endString(&stringMap);
stringStart = &readBuffer[i+1];
}
}
if (stringStart < &readBuffer[readBytes]) {
extendString(&stringMap, stringStart, &readBuffer[readBytes]-stringStart);
}
}
endString(&stringMap);
fclose(fd);
// Ok read the list
numberOfStrings = numberOfStringsInStringMap(&stringMap);
printf("Number of strings in map: %ld\n", numberOfStrings);
for (long i = 0; i < numberOfStrings; ++i) {
size_t stringSize = stringSizeInStringMap(&stringMap, i);
char *buffer = (char*)malloc(stringSize+1);
memcpy(buffer, stringinStringMap(&stringMap, i), stringSize);
buffer[stringSize-1] = '\0';
printf("string %05ld size=%8ld : %s\n", i, stringSize, buffer);
free(buffer);
}
// free the resource
freeStringMap(&stringMap);
}
This example reads a very large text file, splits it into lines and creates an array with a string per line. It only needs two malloc calls. One for the pointer array and one for the sting block.
If it's strictly read-only as you've described, you can store the entire list of strings and their offsets in a single chunk of memory and read the whole thing with a single read.
The first sizeof(long) bytes stores the number of strings, n. The next n longs store the offsets into each string from the start of the string buffer which starts at position (n+1)*sizeof(long). You don't have to store the trailing zero for each string, but if you do, you can access each string with &str_buffer[offset[i]]. If you don't store the trailing '\0' then you would have to copy into a temporary buffer and append it yourself.
I'm trying to make a quick function that gets a word/argument in a string by its number:
char* arg(char* S, int Num) {
char* Return = "";
int Spaces = 0;
int i = 0;
for (i; i<strlen(S); i++) {
if (S[i] == ' ') {
Spaces++;
}
else if (Spaces == Num) {
//Want to append S[i] to Return here.
}
else if (Spaces > Num) {
return Return;
}
}
printf("%s-\n", Return);
return Return;
}
I can't find a way to put the characters into Return. I have found lots of posts that suggest strcat() or tricks with pointers, but every one segfaults. I've also seen people saying that malloc() should be used, but I'm not sure of how I'd used it in a loop like this.
I will not claim to understand what it is that you're trying to do, but your code has two problems:
You're assigning a read-only string to Return; that string will be in your
binary's data section, which is read-only, and if you try to modify it you will get a segfault.
Your for loop is O(n^2), because strlen() is O(n)
There are several different ways of solving the "how to return a string" problem. You can, for example:
Use malloc() / calloc() to allocate a new string, as has been suggested
Use asprintf(), which is similar but gives you formatting if you need
Pass an output string (and its maximum size) as a parameter to the function
The first two require the calling function to free() the returned value. The third allows the caller to decide how to allocate the string (stack or heap), but requires some sort of contract about the minumum size needed for the output string.
In your code, when the function returns, then Return will be gone as well, so this behavior is undefined. It might work, but you should never rely on it.
Typically in C, you'd want to pass the "return" string as an argument instead, so that you don't have to free it all the time. Both require a local variable on the caller's side, but malloc'ing it will require an additional call to free the allocated memory and is also more expensive than simply passing a pointer to a local variable.
As for appending to the string, just use array notation (keep track of the current char/index) and don't forget to add a null character at the end.
Example:
int arg(char* ptr, char* S, int Num) {
int i, Spaces = 0, cur = 0;
for (i=0; i<strlen(S); i++) {
if (S[i] == ' ') {
Spaces++;
}
else if (Spaces == Num) {
ptr[cur++] = S[i]; // append char
}
else if (Spaces > Num) {
ptr[cur] = '\0'; // insert null char
return 0; // returns 0 on success
}
}
ptr[cur] = '\0'; // insert null char
return (cur > 0 ? 0 : -1); // returns 0 on success, -1 on error
}
Then invoke it like so:
char myArg[50];
if (arg(myArg, "this is an example", 3) == 0) {
printf("arg is %s\n", myArg);
} else {
// arg not found
}
Just make sure you don't overflow ptr (e.g.: by passing its size and adding a check in the function).
There are numbers of ways you could improve your code, but let's just start by making it meet the standard. ;-)
P.S.: Don't malloc unless you need to. And in that case you don't.
char * Return; //by the way horrible name for a variable.
Return = malloc(<some size>);
......
......
*(Return + index) = *(S+i);
You can't assign anything to a string literal such as "".
You may want to use your loop to determine the offsets of the start of the word in your string that you're looking for. Then find its length by continuing through the string until you encounter the end or another space. Then, you can malloc an array of chars with size equal to the size of the offset+1 (For the null terminator.) Finally, copy the substring into this new buffer and return it.
Also, as mentioned above, you may want to remove the strlen call from the loop - most compilers will optimize it out but it is indeed a linear operation for every character in the array, making the loop O(n**2).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *arg(const char *S, unsigned int Num) {
char *Return = "";
const char *top, *p;
unsigned int Spaces = 0;
int i = 0;
Return=(char*)malloc(sizeof(char));
*Return = '\0';
if(S == NULL || *S=='\0') return Return;
p=top=S;
while(Spaces != Num){
if(NULL!=(p=strchr(top, ' '))){
++Spaces;
top=++p;
} else {
break;
}
}
if(Spaces < Num) return Return;
if(NULL!=(p=strchr(top, ' '))){
int len = p - top;
Return=(char*)realloc(Return, sizeof(char)*(len+1));
strncpy(Return, top, len);
Return[len]='\0';
} else {
free(Return);
Return=strdup(top);
}
//printf("%s-\n", Return);
return Return;
}
int main(){
char *word;
word=arg("make a quick function", 2);//quick
printf("\"%s\"\n", word);
free(word);
return 0;
}
I want to take a text from the standard input and store it into an array of strings. But I want the array of strings to be dynamic in memory. My code right now is the following:
char** readStandard()
{
int size = 0;
char** textMatrix = (char**)malloc(size);
int index = 0;
char* currentString = (char*)malloc(10); //10 is the maximum char per string
while(fgets(currentString, 10, stdin) > 0)
{
size += 10;
textMatrix = (char**)realloc(textMatrix, size);
textMatrix[index] = currentString;
index++;
}
return textMatrix;
}
The result I have while printing is the last string read in all positions of the array.
Example
Reading:
hello
nice
to
meet
you
Printing:
you
you
you
you
you
Why? I've searched over the Internet. But I didn't find this kind of error.
You are storing the same address (currentString) over and over. Try something like
while(fgets(currentString, 10, stdin) > 0)
{
textMatrix[index] = strdup(currentString); /* Make copy, assign that. */
}
The function strdup is not standard (just widely available). It should be easy to implement it yourself with malloc + memcpy.
currentString always point to the same memory area and all the pointers in textMatrix will point to it
char** readStandard()
{
int size = 0;
char** textMatrix = (char**)malloc(size);
int index = 0;
char currentString[10];
while(fgets(currentString, 10, stdin) > 0)
{
size += sizeof(char*);
textMatrix = (char**)realloc(textMatrix, size);
textMatrix[index] = strdup(currentString);
index++;
}
return textMatrix;
}