How can I create an array of unique strings without knowing how many strings there are until I process the input file? There can be as many as 2 million strings, max length of 50.
My program is something like this. This works for 51 items then overwrites other data. I don't know how to add an element to the array, if possible.
main() {
char *DB_NAMES[51]; // i thought this gave me ptrs to chunks of 51
// but it's 51 pointers!
char *word;
while not eof {
...function to read big string
...function to separate big sting into words
...
processWord(ctr, DB_NAMES, word);
...
}
}
processWord(int ndx, char *array1[], char *word){
...function to find if word already exists...
//if word is new, store in array
array1[ndx]= (char *)malloc(sizeof(51)); // isn't this giving me a char[51]?
strcpy(array1[ndx],word);
...
}
You can first get the number of words in your file using the below logic and when you get the number of words in the file you can initialize the array size with the word count.
#include<stdio.h>
#define FILE_READ "file.txt"
int main()
{
FILE * filp;
int count = 1;
char c;
filp = fopen(FILE_READ, "r");
if(filp == NULL)
printf("file not found\n");
while((c = fgetc(filp)) != EOF) {
if(c == ' ')
count++;
}
printf("worrds = %d\n", count);
return 0;
}
Regards,
yanivx
Better not use a fixed string length; save memory space.
char **DB_NAMES = 0; // pointer to first char * ("string") in array; initially 0
Pass pointer by reference so that it can be altered. Moreover, you'll want the new ctr value in case a new word has been stored.
ctr = processWord(ctr, &DB_NAMES, word);
Change function processWord accordingly.
int processWord(int ndx, char ***array1a, char *word)
{ char **array1 = *array1a;
...function to find if word already exists...
// if word is new, store in array
{
array1 = realloc(array1, (ndx+1)*sizeof*array1); // one more string
if (!array1) exit(1); // out of memory
array1[ndx++] = strdup(word); // store word's copy
*array1a = array1; // return new array
}
return ndx; // return count
}
Related
Alright, I'm making a grading program/code that will have its own text file where it stores all the grades. And I thought of making a 2d array where the first "dimension" will be the student and second "dimension" the individual grade (if there's a smarter way of doing grades tell me, by the way I chose this method because it is the only way I know how I could later on just add more students or more grades) keep in mind that the number of grades and students isn't always set so there's no easy way out. Anyways I've tried something, and I think it only works with characters and not with integers (even though the grades will be 1-5).Also I want a way to print it out but I think this is the bigger problem. Anyways THANKS.
typedef char string [20];
string row;
int i=0,j=0;
char arr[20][20];
FILE *fp;
fp=fopen("grades.txt","r");
for(i=0;arr[i-1][j]!=EOF;i++)
{
fgets(row,sizeof(row),fp);//I used fgets so I could get the size of the line
for(j=0;j<strlen(row);j++)
{
fscanf(fp,"%c ",&arr[i][j]);
}
}
I don't know if it will help but I thought the text file would look something like this:
54455
43544
22443
21232
21121
fgets reports if it worked or not. So, reading till end of file or till the buffer is full:
for(i = 0; i < sizeof arr / sizeof *arr && fgets(arr[i], sizeof arr[i], fp); i++)
{
// probably remove the \n that fgets writes into the buffer
// otherwise nothing else to do
}
Site notes:
fgets reads from the file, no need for additional reads with fscanf
fgets reads a line including the newline character, remove it if you don't want it
you need to check if fopen worked
I found out the best way to store grades of students in struct. Every student in general has first name, last name, grades,... You can add whatever you want. I am just fine with fname, lname, grades.
typedef struct student_s {
char fname[25];
char lname[25];
int* grades;
int count_of_grades; // Track number of grades for each student
} student_t;
By allocating a dynamic array of student_t you can get as many students as you want.
// Allocate array of structs
student_t* students = (student_t *) malloc(sizeof(student_t));
By using getline() you can read the whole line from file at once (line ends with \n). getline() is not a standart C function therefore you need to put #define _GNU_SOURCE at the beginning of your script.
while ((read_len = getline(&line, &len, fp)) != -1)
Every time function getline() reads next line of file, the array size count will be incremented and reallocated array.
++count;
// Increase size of array beacause of new student to add
students = realloc(students, sizeof(student_t) * count);
if (students == NULL)
{
printf("Couldn't allocated memmory\n");
return 1;
}
Next step is to allocate grades array which will store all grades of specific student. Looping through line you can extract each grade. Then by just defining members of struct you can add grades for each student.
// Allocate array to store all grades from file for one student
// Count of grades does not have to be the same for every student
students[index].grades = (int *) malloc(sizeof(int) * (read_len-1));
// Iterate grades read from file
for (int i=0; i<read_len-1; ++i)
{
// char --> char *
char grade[2] = "\0";
grade[0] = line[i];
// Add grade to the array of grades
students[index].grades[i] = atoi(grade);
}
At the end you should store number of grades are in array for a simple manipulation with data later in your script.
// Track number of grades
students[index].count_of_grades = read_len-1;
++index
Full code:
#define _GNU_SOURCE // necessery to use getline()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct student_s {
char fname[25];
char lname[25];
int* grades;
int count_of_grades;
} student_t;
int main(int argc, char const *argv[])
{
// Allocate array of structs
student_t* students = (student_t *) malloc(sizeof(student_t));
int count = 0;
int index = 0;
FILE* fp;
char* line = NULL;
size_t len = 0;
ssize_t read_len;
fp = fopen("data.txt", "r");
if (fp == NULL)
{
return 1;
}
// Read line by line from file until fp reaches end of file
while ((read_len = getline(&line, &len, fp)) != -1)
{
++count;
// Increase size of array beacause of new student to add
students = realloc(students, sizeof(student_t) * count);
if (students == NULL)
{
printf("Couldn't allocated memmory\n");
return 1;
}
// Replace with your code, which adds name to struct or get rid of it (also from struct)
memcpy(students[index].fname, "John", 4);
memcpy(students[index].lname, "Wash", 4);
// Allocate array to store all grades from file for one student
// Count of grades does not have to be the same for every student
students[index].grades = (int *) malloc(sizeof(int) * (read_len-1));
// Iterate grades read from file
for (int i=0; i<read_len-1; ++i)
{
// char --> char *
char grade[2] = "\0";
grade[0] = line[i];
// Add grade to the array of grades
students[index].grades[i] = atoi(grade);
}
// Track number of grades
students[index].count_of_grades = read_len-1;
++index;
}
fclose(fp);
if (line)
{
free(line);
}
// Print data from structs
for (int i=0; i<count; ++i)
{
printf("%s: ", students[i].fname);
for (int j=0; j<students[i].count_of_grades; ++j)
{
printf("%d ", students[i].grades[j]);
}
printf("\n");
}
return 0;
}
C newbie here.
I'm trying to append a files content to an array line by line by looping over it using fgets in a while loop.
When trying to access the array however I get a segmentation fault while accessing the array on value array[1] or array[2]. When reading array[3] it does work. Am I doing something wrong?
Could someone point me into the right direction?
I'll leave my code below.
#include "library.h"
#include <stdio.h>
int main(int argc, char **argv) {
FILE *file = fopen("words.txt", "r");
char line[100];
char *array[] = {};
int i = 0;
while(fgets(line, sizeof(line), file) != NULL) {
i++;
array[i] = line;
}
printf("Line %d = %s\n", i , array[1]); //segmentation fault on array[1] and array[2] not on array[3]
}
Your code has two bugs.
There is no memory for array
You keep storing the address of line into array. What you need is to get the content of line into the array.
Try like:
#define MAX_LEN 100
char line[MAX_LEN];
size_t capacity = 256;
char (*array)[MAX_LEN] = malloc(capacity * sizeof *array);
if (array == NULL)
{
// Error
// Add error handling or just:
exit(1);
}
size_t i = 0;
while(fgets(line, sizeof(line), file) != NULL)
{
if (i == capacity)
{
capacity = 2 * capacity;
char (*tmp)[MAX_LEN] = realloc(array, capacity * sizeof *array);
if (tmp == NULL)
{
// Error
// Add error handling or just:
exit(1);
}
array = tmp;
}
strcpy(array[i], line);
++i;
}
... do something with array ...
free(array);
First you must dimension your array. Otherwise it will be dimentionned by its initializer. As your initializer is empty, your array is empty too.
So, it you want to be able to store, say, a hundred lines, you should write:
char *array[100] = {};
(I'm explaining this with a static dimension for the array, because the dynamic allocation approach is a bit more complex)
Then, when you write your line:
array[i] = line;
Do you realize that you're working with pointer?
line represents the address of the buffer where the characters of the line are stored. And you assign this address of the variable line to an array of pointers.
array[1] is the second pointer of your array of pointer. (because array indexation starts with 0 in C)
There is a segmentation because your array contains zero value, so you can't access the 2nd (non existing) value.
In addition, you assign always the same addres (the address of the variable line to every entry of your array.
In fact you've got only one line whose address you duplicate in your array
There are multiple errors that will prevent you from storing all the lines of the files in your array.
I'll edit this post ASAP to provide you with a working example of what you're trying to do.
What I have is a string, let's say char input[] = "one two three"; and what I want is a function that takes in two arguments, the input string and an array of strings where I want those words to be.
For example, in pseudo code, transferWords(input, words) would take every word in the input string and put it in the string array words so that words = {"one", "two", "three"}. I can't allocate memory (malloc(), etc...) to do this since the exercise does not allow me to.
What I've tried is using pointers but this isn't useful because if I happen to access words[21] it would be reading something else:
void transfer(char input[], char *words[20]){
char *p;
int i = 0;
p = strtok(input," \t\n");
while(p != 0)
{
words[i++] = p;
p = strtok(0, " \t\n");
}
}
where words would be initalized as char *words[20] = {0}; before.
How could I go about doing this?
(I am still pretty new to C and I'm not very used to it yet, so apologies if this is something obvious.)
If you are not able to resize your arrays, you must allocate them initially with the proper size. For any input array a, the max number of words is (n/2)+1, where n is the number of characters in a. We then know the max size of any word is n, as we could have an input string with only one word. If you declare your words array with this size, you can guarantee for any input you can capture all the words. You will, in many cases, waste some (or a lot) of space, but you will guarantee all possible words can be stored. I'm not sure how the allocation is done before hand, but see the following code for a general description.
int n;
//Get first the size of the input array...
scanf("%d", n);
//Now we need to get the entire input and allocate our arrays
char input[n + 1]; //plus one for the null terminator if we need it
char words[(n/2) + 1][n + 1]; //(n/2) + 1 max words with a max size of n for each
//plus one on n for the null terminator
//get input...
fgets(input, n, stdin);
//Now you can run your function
The general more intutive way of doing this is using malloc and realloc to dynamically grow your array so you don't waste so much space, but since you explicitly said you cannot do this, this will work as well and will guarantee the minimum amount of space used while guaranteeing all possible combination of words can be stored.
Then, to move the strings from the input to the words array, use strcpy to copy the individual words to the words array.
void transfer(char *input, char **words){
char *p;
int i = 0;
p = strtok(input," \t\n");
while(p != NULL)
{
strcpy(words[i++], p);
p = strtok(NULL, " \t\n");
}
}
As a hint, name your functions and parameters more meaningful.
For Example:
/*
* Breaks the string str into words (delimited by whitespace)
* and stores them in the array words.
*
* #param str a null-terminated string, must not be NULL
* #param words an array of char pointers, must not be NULL
* #param length the size of the array words, must be >0
*
* #return returns the number of words in the string
*/
int split(char *str, char *words[], unsigned length)
{
int i=0;
for (; i < length; ++i, str = NULL) {
words[i] = strtok(str, "\r\n\t\f ");
if (words[i] == NULL)
break;
}
return i;
}
int main()
{
#define N 20
char *words[N];
char *input = strdup("one two three");
int num = split(input, words, N);
printf("%d\n", num);
free(input);
return 0;
}
I'm writing a c code to read strings from stdin with scanf() and while loop (into a two-dimensional char array). My strategy is to use an input array to temporarily store each string and then assign it to a preword array (fixed sized). However, my strategy failed and all strings stored in my arrays are the same (the last string input). How to fix it?
I used a fgets() and it works find. However, I cannot use it to deal with a new line of strings (from stdin). My fgets() reads only the first line and that's why I turn to scanf and while loop.
#include<stdio.h>
#include<stdlib.h>
#define MAX 1000
#define size 50
int main ()
{
int count = 0;
char input[size];
char * preword[MAX];
while (scanf("%s",input)!= EOF){
preword[count] = input;
printf("preword[%d] is %s\n",count,preword[count]);
count++;
}
printf("the count is %d\n",count);
for (int i = 0; i < count; i++){
printf("preword[%d] is %s\n",i,preword[i]);
}
return 0;
}
I expect my input arrays from stdin will be stored in a two-dimensional char array. Below is the output in terminal after compilation. My input is a txt file, in which I have
hello world
I am a hero
It turns out that all strings stored in the two-d array are the last word.
preword[0] is hello
preword[1] is world
preword[2] is I
preword[3] is am
preword[4] is a
preword[5] is hero
the count is 6
preword[0] is hero
preword[1] is hero
preword[2] is hero
preword[3] is hero
preword[4] is hero
preword[5] is hero
Firstly here
char * preword[MAX];
preword is array of character pointer i.e each element is a char pointer & when you are doing like
preword[count] = input;
as #paddy pointed its copies input in every element of preword and it's the same pointer since you haven't allocated memory for preword[count], correct way is to allocate memory for each pointer and then copy.
Also use fgets() instead of scanf() here. For e.g
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MAX 1000
#define size 50
int main (void)
{
int count = 0;
char input[size] = {0};
char * preword[MAX] = {0};
size_t retStrCspn = 0;
while (fgets(input, size, stdin) != NULL){
/* remove trailing new line if its stored at end of buffer by fgets() */
input[retStrCspn = strcspn(input, "\n")] = 0; /* remove the trailing & use the return value for allocating memory purpose \n */
preword[count] = malloc(retStrCspn + 1); /* Allocate memory for each pointer elements */
if(preword[count] != NULL) {
memcpy (preword[count], input, retStrCspn + 1); /* copy input buffer into each different memory location */
printf("preword[%d] is %s\n",count,preword[count]);
count++;
}
else {
/* #TODO malloc erro handling */
}
}
printf("the count is %d\n",count);
for (int i = 0; i < count && preword[i] != NULL; i++){
printf("preword[%d] is %s\n",i,preword[i]);
free(preword[count]); /* free dynamically allocated memory here*/
}
return 0;
}
I am trying to read the file and get file content and store it in 2 element array [0]: is for content [1]: is for NULL value. This code is working correctly when just I want to print it but I want to use as an array:
char ch;
FILE *file;
file = fopen("input.txt","r");
int allocated_size = 10;
int used_size = 0;
char c, *input, *tmp_input;
// allocate our buffer
input = (char*)malloc(allocated_size);
if (input == NULL) {
printf("Memory allocation error");
return 1;
}
while ((c = fgetc(file)) != EOF){
// make sure there's an empty one at the end to avoid
// having to do this check after the loop
if (used_size == allocated_size-1) {
allocated_size *= 2;
tmp_input = (char*)realloc(input, allocated_size);
if (tmp_input == NULL) {
free (input);
printf("Memory allocation error");
return 1;
}
input = tmp_input;
}
input[used_size++] = c;
}
// we are sure that there's a spot for last one
// because of if (used_size == allocated_size-1)
input[used_size] = '\0';
printf("\nEntered string in the file: %s\n", input);
But how can I use "input" like an array:
char *input[] = {"This is string value from file!", NULL};
For this case I can get access to the text in this way: input[0]
So in order to achieve this
char *input[] = {"This is string value from file!", NULL};
If I am understanding correctly from your write-up then declare input as this
char *input[2];
And every time you perform any operation on your string pointer e.g. malloc and re-alloc etc. use input[0] . This way array's first record will contain your text.
The reason behind this, the string in first record means you need array of char pointers.