Creating array to hold 1000000 numbers - c

I wrote a code in C that read a text file with numbers into memory and the create an 2d int array to store them.
The file has the following format:
9
9 5 6 2235 45558 6 5544 56565 2
The first number is the size of the array and the second line holds as many numbers as the first line says.
MY problem is that the size of the array can't hold more than ~30.000 numbers. How can I make the following code so I can make the array hold until 1.000.000 numbers? I know that I should use some king of long integer but I couldn't do it.
Heres the code
#include <stdio.h>
#include <stdlib.h>
int is_end(char* input) {
return *input == 0;
}
int is_separator(char* input) {
return *input == '\n' || *input == ' ';
}
char* eat_separators(char* input) {
while (is_separator(input))
++input;
return input;
}
size_t count_lines(char* input) {
size_t rows = 1;
while (!is_end(input)) {
if (is_separator(input)) {
++rows;
input = eat_separators(input);
}
else {
++input;
}
}
return rows;
}
char** get_lines(char* input, size_t number_of_rows) {
char* from = input;
size_t length = 0;
size_t line = 0;
size_t i;
char** lines = (char**)malloc(number_of_rows * sizeof(char*));
do {
if (is_end(input) || is_separator(input)) {
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
length = 0;
++line;
input = eat_separators(input);
from = input;
}
else {
++length;
++input;
}
} while (!is_end(input));
/*
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
++line; */
return lines;
}
int main(int argc, char* argv[]) {
char** lines;
size_t size;
size_t number_of_rows;
int count;
int* children;
FILE *input, *output;
char *contents;
int fileSize = 0;
int i;
input = fopen("xxx.in", "r");
long int filepos = 0L;
fseek(input, 0L, SEEK_END);
fileSize = ftell(input);
fseek(input, 0L, SEEK_SET);
contents = (char*)malloc(fileSize + 1);
size = fread(contents, 1, fileSize, input);
contents[size] = 0;
fclose(input);
number_of_rows = count_lines(contents);
lines = get_lines(contents, number_of_rows);
if ((count = atoi(lines[0])) <= 0 || count > 1000000){
return 1;
}
children = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
if ((children[i] = atoi(lines[i + 1])) <= 0 )
return(-1);
}
// a check to see if everything stored in the array
for(i = 0;i<count;i++)
{
printf(" %d : %d\n", i, children[i]);
}
free(children);
free(lines);
// This is the end! Oh my dear friend, the end!
return 0;
}

First Let me explaint the reason of having only 30.000 number that will give reply to your question?
Basically you are trying to convert the character to ASCII values. Let us take the example of character x whos ASCII value is 120. You are changing the character x with 120, the storage capacity of x is 1 byte but the storage capacity of 120 is 3 bytes. So, basically you have to do memory allocation of 3 times higher the actual value computed as 1 byte is expanding into 3 bytes.
In Your code increase the memory allocation 3 times then your problem would be solved.

Related

My matrix vector multiplication is returning incorrect values

After digging deep into the internet I managed to read all the numbers in my csv to a matrix vector and also for the other single dimension vector from its related csv. The matrix csv file contains a matrix in the following format
91,86,94
12,54,88
79,58,66
The other input vector file contains the members of the one dimension vector as follows
14
20
22
So I expect the output Vector as a result of this multiplication to be for the first row as
91*14+86*20+94*22=5062
Instead of the above my C code is giving me an insane -1469150284 as the member of the first row of the resultant Vector, I suspected the initialization of the two dimension matrix at first but then even after using memset() to set all elements in the array to 0, I still get the same incorrect values.
The complete code on how I read the csvs and how I load each number into the arrays and how I multiply is provided below, help me trace the bug that is causing the multiplication error
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
//define the three files for reading
FILE *matFile = fopen("test1_input_mat.csv", "r");
FILE *vecFile = fopen("test1_input_vec.csv", "r");
//we are writing to the below file
FILE *outFile = fopen("test1_out_vec.csv", "w");
//make sure the file exists
if (matFile == NULL) {
printf("%s","File does not exist");
//break and return an exit code to the operating system
return 99;
}
//define the dimensions of the matrix
int x = 3;
int y = 3;
//allocate memory to the matrix dynamically
int (*matrix_array)[x] = malloc(sizeof(int[x][y]));
//initialize all the members to zero
memset(matrix_array, 0, sizeof(matrix_array));
//read from the matFile and assign to the vector
char *r, l;
//create a buffer variable for the read file process
char buffer[255];
char line[255] = "";
char *replaced = NULL;
while (fgets(buffer, sizeof(buffer), matFile)) {
strncat(line, buffer, 255);
}
// printf("%s",line);
replaced = replaceWord(line, "\n", ",");
//printf("%s", replaced);
//now that we have the elements of the file in a line
//separated by commas
char delim[] = ",";
char *token;
//get the first token
token = strtok(replaced, delim);
//walk through other tokens
while (token != NULL) {
//parse this and add it to the array
int sub = atoi(token);
//assign the number to th array
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
matrix_array[i][j] = sub;
}
}
token = strtok(NULL, delim);
}
//allocate memory to one dimension array
int *vec = (int *)malloc(3 * sizeof(int));
//section below handles the parsing of numbers from the vector file
char vline[255] = "";
char vbuffer[255];
char concatenated[255];
char *replacing;
//read the vector file
while (fgets(vbuffer, 255, vecFile)) {
strncat(concatenated, vbuffer, sizeof(vbuffer));
}
//replace the new line characters with commas
replacing = replaceWord(concatenated, "\n", ",");
//now parse that into the one dimension vector
char *vtoken;
//get the first token
vtoken = strtok(replacing, delim);
//get the rest of the tokens
while (vtoken != NULL) {
int no = atoi(vtoken);
//append the numbers to the one dimension vector
for (int i = 0; i < 3; i++) {
vec[i] = no;
}
vtoken = strtok(NULL, delim);
}
//this is the section where we do the multiplication of the two
int *out_vec = (int *)malloc(3 * sizeof(int));
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
//assign the product of the multiplication to the right index in the vector
out_vec[j] += matrix_array[i][j] * vec[j];
}
}
//write the integers in the second to the out file
char str[255];
char fin[255];
for (int i = 0; i < 3; i++) {
printf("%d\n", out_vec[i]);
}
//close the matFile
fclose(matFile);
}
//method to replace the newline characters with commas
This code helped me replace the new line characters with commas in the single line comprised of all lines in the file
//this method replaces a string in the target string with another string
char *replaceWord(const char *s, const char *oldW,
const char *newW)
{
char *result;
int i, cnt = 0;
int newWlen = strlen(newW);
int oldWlen = strlen(oldW);
// Counting the number of times old word
// occur in the string
for (i = 0; s[i] != '\0'; i++) {
if (strstr(&s[i], oldW) == &s[i]) {
cnt++;
// Jumping to index after the old word.
i += oldWlen - 1;
}
}
// Making new string of enough length
result = (char *)malloc(i + cnt * (newWlen - oldWlen) + 1);
i = 0;
while (*s) {
// compare the substring with the result
if (strstr(s, oldW) == s) {
strcpy(&result[i], newW);
i += newWlen;
s += oldWlen;
} else
result[i++] = *s++;
}
result[i] = '\0';
return result;
}
I have tried to simplify and clean up your code as much as possible.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ROWS 3
#define COLS 3
int main(int argc, char* argv[])
{
FILE* matFile = fopen("test1_input_mat.csv", "r");
if (!matFile) {
puts("File does not exist");
return 99;
}
int (*matrix_array)[ROWS] = malloc(sizeof(int[ROWS][COLS]));
char buffer[255];
for (int row = 0; row < ROWS; row++) {
if (fgets(buffer, sizeof(buffer), matFile)) {
char *p = strtok(buffer, ",");
for (int col = 0; col < COLS; col++) {
if (!p) return 99;
matrix_array[row][col] = atoi(p);
p = strtok(NULL, ",");
}
}
}
fclose(matFile);
FILE* vecFile = fopen("test1_input_vec.csv", "r");
if (!vecFile) {
puts("File does not exist");
return 99;
}
int* vec = malloc(ROWS * sizeof(int));
for (int row = 0; row < ROWS; row++) {
if (fscanf(vecFile, "%d", &vec[row]) != 1) return 99;
}
fclose(vecFile);
int* out_vec = calloc(ROWS, sizeof(int));
for (int row = 0; row < ROWS; row++){
for (int col = 0; col < COLS; col++){
out_vec[row] += matrix_array[row][col] * vec[col];
}
}
for (int row = 0; row < ROWS; row++){
printf("%d\n", out_vec[row]);
}
free(matrix_array);
free(vec);
free(out_vec);
return 0;
}
One thing still to do is check the return values from malloc and calloc. Though it is unlikely in a small program like this, they can return NULL
Also, since you have hard-coded the size to be 3x3, there is no need for malloc/calloc.
#define ROWS 3
#define COLS 3
...
int matrix_array[ROWS][COLS];
int vec[ROWS];
int out_vec[ROWS] = {0}

Is my usage of fgets() and strtok() incorrect for parsing a multi-line input?

I'm writing an implementation of the Moore Voting algorithm for finding the majority element (i.e. the element which occurs more than size/2 times) in an array. The code should return the majority element if it exists or else it should return -1. Now my version of the majorityElement(int size, int arr[]) seems to work perfectly fine if I directly hardcode the integer array in the main() function and invoke it from there.
int majorityElement(int size, int arr[])
{
int majorityindex = 0;
int votes = 1;
int index;
for (index = 1; index < size; index++)
{
if (arr[index] == arr[majorityindex])
votes++;
else
votes--;
if (votes == 0)
{
majorityindex = index;
votes = 1;
}
}
int count = 0;
int i;
for (i = 0; i < size; i++)
{
if(arr[majorityindex] == arr[i])
count++;
}
if (count > (size/2))
return arr[majorityindex];
return -1;
}
However, I'm facing some issues if I try to read an input stream like these:
2
5
3 1 3 3 2
3
1 2 3
The first line of the input contains the number of test cases. The first line of the test case will be the size of the array and the second line will be the elements of the array.
I tried to read the input stream from within the main() function like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 100
int majorityElement(int size, int arr[]);
int main()
{
char buf[3];
fgets(buf, MAX, stdin);
int n = atoi(buf);
char a[3];
char b[MAX];
int i;
int count;
int* num;
for (i = 0; i < n; i++)
{
count = 0;
fgets(a, MAX, stdin);
fgets(b, MAX, stdin);
int x = atoi(a);
char* num[x];
int arr[x];
int k = 0;
char* token = strtok(b, " ");
while (token != NULL)
{
num[k] = token;
arr[k] = atoi(num[k]);
token = strtok(NULL, " ");
k++;
}
printf("%d\n", majorityElement(x, arr));
}
return 1;
}
I took the size of buf[] and a[] as 3 during declaration as they must have sufficient space for the \n character read by fgets() as well as the terminating \0 character. As far as I know, the atoi() function ignores the \n character while converting the character array (string) into an integer. I tried to store the first entry of the input (i.e. the number of entries) in a character array buf, converted it into a string and stored it in a variable n. Similarly, I tried to obtain the size of a test array in a variable x and the test arrays (second line of test case) in an integer array arr. While buf and n seem to obtain the correct values in all cases, I'm not quite sure about arr. I'm aware that fgets() leaves a terminal \n character and that might be causing some havoc during tokenization using strtok, although I can't finger at why. I tried submitting this code on GeeksForGeeks. It gives absolutely correct outputs for the sample test case:
2
5
3 1 3 3 2
3
1 2 3
that is
3
-1
However, when I try to "submit" my solution it says:
Possibly your code doesn't work correctly for multiple test-cases (TCs).
The first test case where your code failed:
Input:
4
1 2 2 1
Its Correct output is:
-1
And Your Code's output is:
1
I can't seem to make sense of this. If I manually write this in stdin:
1
4
1 2 2 1
the code outputs
-1
which is indeed the correct solution. This doesn't match with the output claimed during the submission i.e. 1. So I'm not really sure where I'm going wrong. Have I used fgets() or strtok() incorrectly in the main() function? Or is it something else?
Updated the main() function according to suggestions in the comments.
int main()
{
char buf[MAX];
fgets(buf, MAX, stdin);
int n = atoi(buf);
char a[MAX];
char b[MAX];
int i;
int count;
int* num;
for (i = 0; i < n; i++)
{
count = 0;
fgets(a, MAX, stdin);
fgets(b, sizeof(a), stdin);
a[sizeof(a)-1] = '\0';
b[sizeof(b)-1] = '\0';
int x = atoi(a);
int arr[x];
int k = 0;
char* token = strtok(b, " ");
while (token != NULL)
{
if (k > x)
break;
arr[k] = atoi(token);
token = strtok(NULL, " ");
k++;
}
printf("%d\n", majorityElement(x, arr));
}
return 1;
}
As pointed out by #Vlad, the MAX was set too low in my original array. The question says that the number of entries in an array is upper bounded by 10^7 and each array entry is upper bounded by 10^6 (7 digits). So MAX needs to be of the order 10^8. According to the suggestions in the comments, I'm now using dynamic allocation instead of variable length arrays.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 10000000
int majorityElement(int size, int arr[])
{
int majorityindex = 0;
int votes = 1;
int index;
for (index = 1; index < size; index++)
{
if (arr[index] == arr[majorityindex])
votes++;
else
votes--;
if (votes == 0)
{
majorityindex = index;
votes = 1;
}
}
int count = 0;
int i;
for (i = 0; i < size; i++)
{
if(arr[majorityindex] == arr[i])
count++;
}
if (count > (size/2))
return arr[majorityindex];
return -1;
}
int main()
{
char* buf = calloc (MAX, sizeof(char));
fgets(buf, MAX, stdin);
int n = atoi(buf);
char* a = calloc (MAX, sizeof(char));
char* b = calloc(MAX, sizeof(char));
int i;
for (i = 0; i < n; i++)
{
fgets(a, MAX, stdin);
fgets(b, MAX, stdin);
a[strlen(a)-1] = '\0';
b[strlen(b)-1] = '\0';
int x = atoi(a);
int *arr = calloc(x, sizeof(int));
int k = 0;
char* token = strtok(b, " ");
while (token != NULL)
{
if (k > x)
break;
arr[k] = atoi(token);
token = strtok(NULL, " ");
k++;
}
printf("%d\n", majorityElement(x, arr));
free(arr)
}
free(buf);
free(a);
free(b);
return 1;
}
If I set MAX to 10^7 then the code passes all the test cases and is accepted for submission. However, if I set MAX to 10^8 (as required), I get a segmentation fault. How to overcome this?
Your program has several drawbacks.
For example within the function main there are unused variables declared like
int count;
int* num;
The function does take into account that -1 can be a valid value of the array.
There is a problem with the number of elements that can be specified in a test. It is a very big number (according to the description 1 <= N <= 10000000). So the value of MAX equal to 100 is too low. As a result the data can be read incorrectly and not completely. Also there can occur problems with the variable length arrays.
There is no need to use the function fgets because each integer number can be read using scanf.
I could suggest the following solution. Try it and see whether it will pass the tests.
#include <stdio.h>
#include <stdlib.h>
size_t majorityElement( const int a[], size_t n )
{
size_t majority_index = 0;
for ( size_t i = 1, votes = 1; i < n; i++ )
{
if ( a[majority_index] == a[i] )
{
++votes;
}
else
{
--votes;
}
if ( votes == 0 )
{
majority_index = i;
++votes;
}
}
size_t count = 0;
for ( size_t i = 0; i < n; i++ ) count += a[i] == a[majority_index];
return n / 2 < count ? majority_index : n;
}
int main(void)
{
size_t n = 0;
scanf( "%zu", &n );
for ( size_t i = 0; i < n; i++ )
{
size_t m = 0;
scanf( "%zu", &m );
if ( m != 0 )
{
int *a = calloc( m, sizeof( int ) );
for ( size_t j = 0; j < m; j++ ) scanf( "%d", a + j );
size_t majority_index = majorityElement( a, m );
printf( "%d\n", majority_index == m ? -1 : a[majority_index] );
free( a );
}
}
return 0;
}
If it will not pass the tests then it seems there is a bug in tests.:)
Or if the function return type may not be changed then the function definition can look like
int majorityElement( const int a[], size_t n )
{
size_t majority_index = 0;
for ( size_t i = 1, votes = 1; i < n; i++ )
{
if ( a[majority_index] == a[i] )
{
++votes;
}
else
{
--votes;
}
if ( votes == 0 )
{
majority_index = i;
++votes;
}
}
size_t count = 0;
for ( size_t i = 0; i < n; i++ ) count += a[i] == a[majority_index];
return n / 2 < count ? a[majority_index] : -1;
}

C - function that compresses characters

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char * compress(char *input, int size){
char *inputa;
char compressedString[100];
inputa = (char*)malloc(sizeof(size));
snprintf (inputa, size, "%s", input);
int i = 0;
int x;
int counter;
while(i < size){
counter = 1;
x = i;
while (inputa[x] == inputa[x + 1] && (x+1) < size){
x++;
counter++;
}
if (i != x){
i = x;
}else{
i++;
}
}
return inputa;
}
main(){
char ez[] = "blaablaaa";
printf("%s \n", compress(ez, sizeof(ez)));
printf("%s", ez);
return 0;
}
So, I am trying to make this function that compresses consecutive characters (eg. "blaablaaa" to "bla2bla3"). My thought process is to put the inputa[x] on the compressed array and next to it the counter, but I can't seem to make it to work.
Lets take a look at these two lines:
inputa = (char*)malloc(sizeof(size));
snprintf (inputa, size, "%s", input);
size has type int, so sizeof(size) is the size of an integer, which is probably 4.
You used malloc to allocate 4 bytes.
Then you use snprintf to try to copy all of your input (blaablaaa, 10-bytes long) into a buffer that is only 4 bytes long.
10 bytes won't fit into a 4 byte buffer.
I'm not sure what you're trying to do there, but it is not correct.
1) Your allocated buffer was too short:
inputa = (char*)malloc(sizeof(size));
It allocates only 4 bytes.
You needed
inputa = (char*)malloc(sizeof(char)*size + 1 ));
2) You forgot to release the allocated memory.
3) The algorithm itself needed the improvements. Comments in the code:
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
/* reverse: reverse string s in place */
void reverse(char s[])
{
int i, j;
char c;
for (i = 0, j = strlen(s)-1; i<j; i++, j--) {
c = s[i];
s[i] = s[j];
s[j] = c;
}
}
/* itoa is not a standard function */
/* itoa: convert n to characters in s */
void itoa1(int n, char s[])
{
int i, sign;
if ((sign = n) < 0) /* record sign */
n = -n; /* make n positive */
i = 0;
do { /* generate digits in reverse order */
s[i++] = n % 10 + '0'; /* get next digit */
} while ((n /= 10) > 0); /* delete it */
if (sign < 0)
s[i++] = '-';
s[i] = '\0';
reverse(s);
}
char * compress(char *input, int size){
int i = 0;
int r; // number of repetitions
char add[2]; // current character buffer
char rep[32]; // repetitions buffer
char c; // current character
char *compr = (char* )malloc(sizeof(char)*size + 1); // memory for the compressed string
compr[0] = 0; // terminate the buffer
add[1] = 0; // terminate the buffer
while(i < size){
c = add[0] = input[i]; // get a character
strcat(compr,add); // add to compr
r = 1; // default number of repetitions is one
while(1) // count and add to the string
{
if(c == input[i+1] )
{ // find how many characters follows c
r++; // number of repetition
i++; // moving along the input buffer
}
else
{
// check the r for number of repetitions
if( r > 1)
{
// there were repetitions:
// char * itoa ( int value, char * str, int base );
itoa1(r,rep); // get the number
strcat(compr,rep); // add repetition number to the compressed string
}
i++;// advance to the next character
break;
} // else
}// while
} //while
return compr;
}
int main(void){
char sg7[] = "BLaaaBBLLaaaaXXXaaY";
char ez[] = "blaablaaa";
char *ptr;
printf("%s \n", ptr = compress(sg7, strlen(sg7) ) );
printf("%s \n", sg7);
free(ptr);
printf("\n");
printf("%s \n", ptr = compress(ez, strlen(ez)));
printf("%s \n", ez);
free(ptr);
return 0;
}
Output:
BLa3B2L2a4X3a2Y
BLaaaBBLLaaaaXXXaaY
bla2bla3
blaablaaa
I hope it helps.

find special cases while comparing files in reverse order

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BS 12
void reverse(char * buffer, int size)
{
char tmp;
int i;
for(i = 0; i < size / 2; i++)
{
tmp = (char)buffer[i];
buffer[i] = buffer[size - i - 1];
buffer[size - i - 1] = tmp;
}
}
int compare_bin(char * buffer, char * buffer2, int size)
{
// because strncmp is only for string without \x00, so there must be a customized compare function
int i;
for(i = 0; i < size; i++)
{
if(buffer[i] != buffer2[i])
return 0;
}
return 1;
}
int main (const int argc, const char** argv)
{
if(argc != 3)
exit(-1);
int equal = 1;
char * buffer = malloc(BS), * buffer2 = malloc(BS);
FILE * f1, * f2;
f1 = fopen(argv[1], "r");
f2 = fopen(argv[2], "r");
fseek(f1, 0, SEEK_END);
fseek(f2, 0, SEEK_END);
long i = ftell(f1), j = ftell(f2);
if(i != j)
{
equal = 0;
goto endp;
}
fseek(f2, 0, SEEK_SET);
int need = 0;
int count;
int f2_pos = 0;
do
{
i = i - BS;
if(i < 0)
{
need = BS - abs((int)i);
i = 0;
}
else
need = BS;
fseek(f1, i, SEEK_SET);
count = fread(buffer, need, 1, f1);
reverse(buffer, count * need);
// fwrite(buffer, count * need, 1, f2);
fread(buffer2, need * need, 1, f2);
// printf("compare...\n");
// for(int i = 0; i < need * count; i++)
// {
// printf("%02hhX", buffer[i]);
// }
// printf("\n");
// for (int i = 0; i < need * count; i++)
// {
// printf("%02hhX", buffer2[i]);
// }
// printf("\n");
if(compare_bin(buffer, buffer2, need * count) == 0)
{
equal = 0;
break;
}
f2_pos += need * count;
fseek(f2, f2_pos, SEEK_SET);
if(i == 0)
break;
}while(i > 0);
fclose(f1);
fclose(f2);
free(buffer);
free(buffer2);
endp:
if(equal)
return 0;
else
{
printf("2 files not equal is reversed order\n");
return 1;
}
return 0;
}
So I write a program to compare file content in reverse order. I have already considered \x00 in binary file and strncmp isn't used. But there is still flaw. There is a test server to test this program. But I dont have access to it. This program always fails on that server. So there must be some special cases to make it fail. Any idea?
There are other ways around it. For instance, calculating MD5. But I want to fix this.
For the very first iteration where you read data you have
fread(buffer2, need * need, 1, f2);
The problem is that in that case need is 12, which is the size of the memory allocated for buffer2, but you ask to read 12 * 12 bytes.
If the second file is large enough, you will write out of bounds in the memory, leading to undefined behavior. If the file is not large enough then you won't read anything.
Also note that the order of the two middle arguments to fread matter. If you changed the order you would write out of bounds of the buffer both if the file is larger than need * need or not. You should really read count byte-sized object (the second argument should be 1 and the third should be count, which of course mean you need to change the order in the first call as well).
In short, your two fread calls should be
count = fread(buffer, 1, BS, f1);
fread(buffer2, 1, count, f2);
PS. Don't forget error checking.

Read a txt file with gets in C

I want to know what is the best option to read a txt file that contain two line of numbers using gets function in c and save them in an array within 1 second.
Assume the following example as an txt file called ooo.txt and it has the number 2.000.000 in the first line (which will be the size of the array) and 2.000.000 number in the second line that will be stored in the array.
Eg
2000000
59 595 45 492 89289 5 8959 (+1.999.993 numbers)
code i try (only the fcanf function)
int t_size;
fscanf(fp, "%d",&t_size); //bypass the first character!
int* my_array = NULL;
my_array = malloc(t_size*sizeof(*my_array));
if (my_array==NULL) {
printf("Error allocating memory!\n"); //print an error message
return 1; //return with failure
getchar();
}
int i =0;
for ( i = 0; i < t_size; i++ )
{
fscanf(fp, "%d",&my_array[i]); /*p[i] is the content of element at index i and &p[i] is the address of element at index i */
}
best, so far, code to make the procedure in 1 second
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
int is_end(char* input) {
return *input == 0;
}
int is_linebreak(char* input) {
return *input == '\r' || *input == '\n' || *input == ' ';
}
char* eat_linebreaks(char* input) {
while (is_linebreak(input))
++input;
return input;
}
size_t count_lines(char* input) {
char* p = input;
size_t rows = 1;
if (is_end(p))
return 0;
while (!is_end(p)) {
if (is_linebreak(p)) {
++rows;
p = eat_linebreaks(p);
}
else {
++p;
}
}
return rows;
}
/* split string by lines */
char** get_lines(char* input, size_t line_count) {
char* p = input;
char* from = input;
size_t length = 0;
size_t line = 0;
int i;
char** lines = (char**)malloc(line_count * sizeof(char*));
do {
if (is_end(p) || is_linebreak(p)) {
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
length = 0;
++line;
p = eat_linebreaks(p);
from = p;
}
else {
++length;
++p;
}
} while (!is_end(p));
// Copy the last line as well in case the input doesn't end in line-break
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
++line;
return lines;
}
int main(int argc, char* argv[]) {
clock_t start;
unsigned long microseconds;
float seconds;
char** lines;
size_t size;
size_t number_of_rows;
int count;
int* my_array;
start = clock();
FILE *stream;
char *contents;
int fileSize = 0;
int i;
// Open file, find the size of it
stream = fopen(argv[1], "rb");
fseek(stream, 0L, SEEK_END);
fileSize = ftell(stream);
fseek(stream, 0L, SEEK_SET);
// Allocate space for the entire file content
contents = (char*)malloc(fileSize + 1);
// Stream file into memory
size = fread(contents, 1, fileSize, stream);
contents[size] = 0;
fclose(stream);
// Count rows in content
number_of_rows = count_lines(contents);
// Get array of char*, one for each line
lines = get_lines(contents, number_of_rows);
// Get the numbers out of the lines
count = atoi(lines[0]); // First row has count
my_array = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
my_array[i] = atoi(lines[i + 1]);
}
microseconds = clock() - start;
seconds = microseconds / 1000000.0f;
printf("Took %fs", seconds);
return 0;
}
First of all, you will want to use fgets instead to avoid dangerous buffer overflows. Second, you want to remove all punctuation from your numbers. Thus 2.000.000 becomes 2000000. Then you can use pointers and the strtol function to convert characters to integers; there are also other functions to convert to floats and other types.
Since code wants speed and IO is a typically bottle-neck, reading the entire file at once after using fstat() to find its length (#Charlon) makes some sense. Following is a quick parsing of that buffer.
// Stream file into memory
size = fread(contents, 1, fileSize, stream);
contents[size] = 0;
fclose(stream);
#if 1
// new code
size_t array_n;
int n;
if (sscanf(contents, "%zu%n", &array_n, &n) != 1) Handle_BadInput();
my_array = malloc(array_n * sizeof *my_array);
if (my_array == NULL) Handle_OOM();
char *p = &contents[n];
errno = 0;
char *endptr;
for (size_t count = 0; count < array_n; count++) {
my_array[count] = strtol(p, &endptr, 10);
if (p == endptr || errno)
Handle_BadInput();
p = endptr;
}
char ch;
if (sscanf(p, " %c", &ch) == 1) Handle_ExtraInput();
#else
//old code
// Count rows in content
number_of_rows = count_lines(contents);
// Get array of char*, one for each line
lines = get_lines(contents, number_of_rows);
// Get the numbers out of the lines
count = atoi(lines[0]); // First row has count
my_array = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
my_array[i] = atoi(lines[i + 1]);
}
#endif
Still prefer the scale-able approach of reading one number at a time.
The fastest way needs a lot of RAM :
1) open the file (man open)
2) use the fstat function to get the size of you file (man fstat)
3) read the file with a buffer malloc-ed with the size you just get at 2) (man malloc)
4) close the file (man close)
5) parse your buffer and transform each block of digits (each time until ' ' or '\0') to int
EDIT : if your RAM is not enough large, you need to create a get_next_int function that only stores in your buffer the next number in the file
EDIT 2 : You can read until you know the number of int you will need to store and compares this number with a security coef to the size of your ram, and use the good way so that your program won't set errno to ENOMEM if you know what I'm talking about ;)

Resources