C - Get random words from text a file - c

I have a text file which contains a list of words in a precise order.
I'm trying to create a function that return an array of words from this file. I managed to retrieve words in the same order as the file like this:
char *readDict(char *fileName) {
int i;
char * lines[100];
FILE *pf = fopen ("francais.txt", "r");
if (pf == NULL) {
printf("Unable to open the file");
} else {
for (i = 0; i < 100; i++) {
lines[i] = malloc(128);
fscanf(pf, "%s", lines[i]);
printf("%d: %s\n", i, lines[i]);
}
fclose(pf);
return *lines;
}
return "NULL";
}
My question is: How can I return an array with random words from the text file; Not as the file words order?
The file looks like this:
exemple1
exemple2
exemple3
exemple4

Reservoir sampling allows you to select a random number of elements from a stream of indeterminate size. Something like this could work (although untested):
char **reservoir_sample(const char *filename, int count) {
FILE *file;
char **lines;
char buf[LINE_MAX];
int i, n;
file = fopen(filename, "r");
lines = calloc(count, sizeof(char *));
for (n = 1; fgets(buf, LINE_MAX, file); n++) {
if (n <= count) {
lines[n - 1] = strdup(buf);
} else {
i = random() % n;
if (i < count) {
free(lines[i]);
lines[i] = strdup(buf);
}
}
}
fclose(file);
return lines;
}
This is "Algorithm R":
Read the first count lines into the sample array.
For each subsequent line, replace a random element of the sample array with probability count / n, where n is the line number.
At the end, the sample contains a set of random lines. (The order is not uniformly random, but you can fix that with a shuffle.)

If each line of the file contains one word, one possibility would be to open the file and count the number of lines first. Then rewind() the file stream and select a random number, sel, in the range of the number of words in the file. Next, call fgets() in a loop to read sel words into a buffer. The last word read can be copied into an array that stores the results. Rewind and repeat for each word desired.
Here is a program that uses the /usr/share/dict/words file that is typical on Linux systems. Note that if the number of lines in the file is greater than RAND_MAX (the largest number that can be returned by rand()), words with greater line numbers will be ignored. This number can be as small as 32767. In the GNU C Library RAND_MAX is 2147483647.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define MAX_WORD 100
#define NUM_WORDS 10
int main(void)
{
/* Open words file */
FILE *fp = fopen("/usr/share/dict/words", "r");
if (fp == NULL) {
perror("Unable to locate word list");
exit(EXIT_FAILURE);
}
/* Count words in file */
char word[MAX_WORD];
long wc = 0;
while (fgets(word, sizeof word, fp) != NULL) {
++wc;
}
/* Store random words in array */
char randwords[NUM_WORDS][MAX_WORD];
srand((unsigned) time(NULL));
for (size_t i = 0; i < NUM_WORDS; i++) {
rewind(fp);
int sel = rand() % wc + 1;
for (int j = 0; j < sel; j++) {
if (fgets(word, sizeof word, fp) == NULL) {
perror("Error in fgets()");
}
}
strcpy(randwords[i], word);
}
if (fclose(fp) != 0) {
perror("Unable to close file");
}
/* Display results */
for (size_t i = 0; i < NUM_WORDS; i++) {
printf("%s", randwords[i]);
}
return 0;
}
Program output:
biology's
lists
revamping
slitter
loftiness's
concur
solemnity's
memories
winch's
boosting
If blank lines in input are a concern, the selection loop can test for them and reset to select another word when they occur:
/* Store random words in array */
char randwords[NUM_WORDS][MAX_WORD];
srand((unsigned) time(NULL));
for (size_t i = 0; i < NUM_WORDS; i++) {
rewind(fp);
int sel = rand() % wc + 1;
for (int j = 0; j < sel; j++) {
if (fgets(word, sizeof word, fp) == NULL) {
perror("Error in fgets()");
}
}
if (word[0] == '\n') { // if line is blank
--i; // reset counter
continue; // and select another one
}
strcpy(randwords[i], word);
}
Note that if a file contains only blank lines, with the above modification the program would loop forever; it may be safer to count the number of blank lines selected in a row and skip until some reasonable threshold is reached. Better yet to verify that at least one line of the input file is not blank during the initial line-count:
/* Count words in file */
char word[MAX_WORD];
long wc = 0;
long nonblanks = 0;
while (fgets(word, sizeof word, fp) != NULL) {
++wc;
if (word[0] != '\n') {
++nonblanks;
}
}
if (nonblanks == 0) {
fprintf(stderr, "Input file contains only blank lines\n");
exit(EXIT_FAILURE);
}

Related

How to read the content from the second line onwards on a .txt file on C

I need help to read the numbers of a .txt file and put them in an array. But only from the second line onwards. I'm stuck and don't know where to go from the code that i built.
Example of the .txt file:
10 20
45000000
48000000
56000000
#define MAX 50
int main (void){
FILE *file;
int primNum;
int secNum;
int listOfNumers[50];
int numberOfLines = MAX;
int i = 0;
file = fopen("file.txt", "rt");
if (file == NULL)
{
printf("Error\n");
return 1;
}
fscanf(file, "%d %d\n", &primNum, &secNum);
printf("\n1st Number: %d",primNum);
printf("\n2nd Number: %d",secNum);
printf("List of Numbers");
for(i=0;i<numberOfLines;i++){
//Count the number from the second line onwards
}
fclose(file);
return 0;
}
You just need a loop to keep reading ints from file and populate the listOfNumers array until reading an int fails.
Since you don't know how many ints there are in the file, you could also allocate the memory dynamically. Example:
#include <stdio.h>
#include <stdlib.h>
int main(void) {
FILE* file = fopen("file.txt", "rt");
if(file == NULL) {
perror("file.txt");
return 1;
}
int primNum;
int secNum;
if(fscanf(file, "%d %d", &primNum, &secNum) != 2) {
fprintf(stderr, "failed reading primNum and secNum\n");
return 1;
}
unsigned numberOfLines = 0;
// allocate space for one `int`
int* listOfNumers = malloc((numberOfLines + 1) * sizeof *listOfNumers);
// the above could just be:
// int* listOfNumers = malloc(sizeof *listOfNumers);
while(fscanf(file, "%d", listOfNumers + numberOfLines) == 1) {
++numberOfLines;
// increase the allocated space by the sizeof 1 int
int* np = realloc(listOfNumers, (numberOfLines + 1) * sizeof *np);
if(np == NULL) break; // if allocating more space failed, break out
listOfNumers = np; // save the new pointer
}
fclose(file);
puts("List of Numbers:");
for(unsigned i = 0; i < numberOfLines; ++i) {
printf("%d\n", listOfNumers[i]);
}
free(listOfNumers); // free the dynamically allocated space
}
There are a few ways to approach this; if you know the size of the first line, you should be able to use fseek to move the position of the file than use getline to get each line of the file:
int fseek(FILE *stream, long offset, int whence);
The whence parameter can be:
SEEK_SET : the Beginning
SEEK_CUR : the current position
SEEK_END : the End
The other option would to encapsulate the entire file read in a while loop:
char *line = NULL;
size_t linecap = 0;
ssize_t linelen;
int counter = 0;
while((linelen = getline(&line, &linecap, file)) != -1){
if counter == 0{
sscanf(line, "%d %d\n", &primNum, &secNum);
}else{
//Process your line
}
counter++; //This would give you your total line length
}

string of undetermined length c

Hi I was trying to create an array of string of an undetermined length in c.
This is my code :
int main()
{
int lineCount=linesCount();
char text[lineCount][10];
printf("%d",lineCount);
FILE * fpointer = fopen("test.txt","r");
fgets(text,10,fpointer);
fclose(fpointer);
printf("%s",text);
return 0;
}
I would like to replace 10 in
char text[lineCount][10];
My code reads out a file I already made the amount of lines dynamic.
Since the line length is unpredictable I would like to replace 10 by a something dynamic.
Thanks in advance.
To do this cleanly, we want a char * array rather than an 2D char array:
char *text[lineCount];
And, we need to use memory from the heap to store the individual lines.
Also, don't "hardwire" so called "magic" numbers like 10. Use an enum or #define (e.g) #define MAXWID 10. Note that with the solution below, we obviate the need for using the magic number at all.
Also, note the use of sizeof(buf) below instead of a magic number.
And, we want [separate] loops when reading and printing.
Anyway, here's the refactored code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
linesCount(void)
{
return 23;
}
int
main(void)
{
int lineCount = linesCount();
char *text[lineCount];
char buf[10000];
printf("%d", lineCount);
// open file and _check_ the return
const char *file = "test.txt";
FILE *fpointer = fopen(file, "r");
if (fpointer == NULL) {
perror(file);
exit(1);
}
int i = 0;
while (fgets(buf, sizeof(buf), fpointer) != NULL) {
// strip newline
buf[strcspn(buf,"\n")] = 0;
// store line -- we must allocate this
text[i++] = strdup(buf);
}
fclose(fpointer);
for (i = 0; i < lineCount; ++i)
printf("%s\n", text[i]);
return 0;
}
UPDATE:
The above code is derived from your original code. But, it assumes that the linesCount function can predict the number of lines. And, it doesn't check against overflow of the fixed length text array.
Here is a more generalized version that will allow an arbitrary number of lines with varying line lengths:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
main(void)
{
int lineCount = 0;
char **text = NULL;
char buf[10000];
// open file and _check_ the return
const char *file = "test.txt";
FILE *fpointer = fopen(file, "r");
if (fpointer == NULL) {
perror(file);
exit(1);
}
int i = 0;
while (fgets(buf, sizeof(buf), fpointer) != NULL) {
// strip newline
buf[strcspn(buf,"\n")] = 0;
++lineCount;
// increase number of lines in array
text = realloc(text,sizeof(*text) * lineCount);
if (text == NULL) {
perror("realloc");
exit(1);
}
// store line -- we must allocate this
text[lineCount - 1] = strdup(buf);
}
fclose(fpointer);
// print the lines
for (i = 0; i < lineCount; ++i)
printf("%s\n", text[i]);
// more processing ...
// free the lines
for (i = 0; i < lineCount; ++i)
free(text[i]);
// free the list of lines
free(text);
return 0;
}

C project with files

I need some help with my C project:
I need to write a c program who receives 2 parameters:
1) The name of a text file(infile) which is in the same catalog
2) A number k>0
And creates 2 new files,outfile1 & outfile 2 as:
Outfile 1: k,2*k,3*k…. character of infile
Outfile 2: k,2*k,3*k…..line of infile
Example:
INFILE
Abcdefg
123456
XXXXXX
01010101
OUTFILE 1:
Cf25XX101
OUTFILE 2:
XXXXXX
I wrote some code ,but its not working. Any ideas?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char** read_lines(FILE* txt, int* count) {
char** array = NULL;
int i;
char line[100];
int line_count;
int line_length;
*count = 0;
line_count = 0;
while (fgets(line, sizeof(line), txt) != NULL) {
line_count++;
}
rewind(txt);
array = malloc(line_count * sizeof(char *));
if (array == NULL) {
return NULL;
}
for (i = 0; i < line_count; i++) {
fgets(line, sizeof(line), txt);
line_length = strlen(line);
line[line_length - 1] = '\0';
line_length--;
array[i] = malloc(line_length + 1);
strcpy(array[i], line);
}
*count = line_count;
return array;
}
int main(int argc, char * argv[]) {
char** array = NULL;
FILE* file = NULL;
const char* filename = NULL;
int i;
int line_count;
int k;
char c;
printf("ENTER ONE PHYSICAL NUMBER\n");
do{
if(k>0)
scanf("%d",&k);
else{
printf("ENTER ONE PHYSICAL NUMBER\n");
scanf("%d",&k);
}
}while(k<=0);
file = fopen("LEIT.txt", "rt");
if (file == NULL) {
printf("CANT OPEN FILE %s.\n", filename);
return 1;
}
array = read_lines(file, &line_count);
printf("ARRAY:\n");
for (i = 0; i < line_count; i++) {
printf("[%d]: %s\n", (i+1), array[i]);
}
printf("CALCULATING OUTFILE1 AND OUTFILE2\n");
printf("OUTFILE1:\n");
for(i=0;i<line_count;i++){
c=i*k;
printf("%c\n",array[c]);
}
printf("WRITING OUTFILE1 COMPLETE!\n");
printf("OUTFILE2:\n");
for(i=0;i<line_count;i++){
c=i*k;
printf("%c\n",array[c]);
}
printf("WRITING OUTFILE2 COMPLETE!\n");
return 0;
}
My actual problem is calculate and write into files (outfile1 and outfile2) the result...
You need to close file after finishing reading/writing it with fclose.
You can create and write strings to a file using fopen with correct mode.
You can output formatted string to a file by using fprintf.
It seems that you don't want to print the 0th character/line, so in the last for loop, i should start from 1 (or start from 0 but add 1 later).
array[c] is a string, not a character. So when printing it, you should use %s specifier instead of %c.
It is not a good idea using char as count in later for loops unless you know input file will be very short. signed char can only count to 127 before overflow (unsigned char can count to 255). But if you have a very long file, for example thousands of lines, this program would not work properly.
array is malloced in function char** read_lines(FILE* txt, int* count). After finish using it, you need to dealloc, or free it by calling
for (i = 0; i < line_count; i++) {
free(array[i]);
}
and followed by free(array). This avoids memory leakage.
Modified code is here. In the following code, char c is not used. This is the part where you process output files, and before return 0; in main function.
printf("CALCULATING OUTFILE1 AND OUTFILE2\n");
printf("OUTFILE1:\n");
// Since we finished using LEIT.txt, close it here.
fclose(file);
// Mode: "w" - Write file. "+" - Create if not exist.
// You can lso use "a+" (append file) here if previous record need to be preserved.
FILE *out1 = fopen("OUTFILE1.txt", "w+");
FILE *out2 = fopen("OUTFILE2.txt", "w+");
if ((out1 == NULL) || (out2 == NULL)) {
printf("CANT CREATE OUTPUT FILES.\n");
return 1;
}
// Out file 1.
unsigned int count = k;
for (i = 0; i < line_count; i++){
while (count < strlen(array[i])) {
// This just prints to stdout, but is good for debug.
printf("%c", array[i][count]);
// Write to the file.
fprintf(out1, "%c", array[i][count]);
// Calculate c for next char.
count += k + 1;
}
// Before go to next line, minus string length of current line.
count -= strlen(array[i]);
}
printf("\n");
printf("WRITING OUTFILE1 COMPLETE!\n");
// Close file.
fclose(out1);
// Out file 2.
printf("OUTFILE2:\n");
for (i = 1;i < line_count / k; i++){
count = i * k;
// This just prints to stdout, but is good for debug.
printf("%s\n", array[count]);
// Write to the file.
fprintf(out2, "%s\n", array[count]);
}
printf("WRITING OUTFILE2 COMPLETE!\n");
//Close file.
fclose(out2);
// dealloc malloced memory.
for (i = 0; i < line_count; i++) {
free(array[i]);
}
free(array);

Extracting data from input file?

I've been stuck on this problem for a while.
I'm creating a program that that reads in an input file (just a plain .Txt) This input file stores variables in the following format:
x
21
% This is a comment
y
3
And so on. My goal is to read this input file (done), and then store every variable in the file into a global variable in my c program. I.e. Global variable x will have the value 21 and y will have the value 3 in my c file, whilst comments are ignored.
I've thought about this for a while and can't figure out what functions to use. Any suggestions would be appreciated, thanks.
(note that these variables will always have the same names, but the order in which they are presented will differ from input file to input file).
I would suggest you to have a struct with two fields.
struct Foo
{
char var_name;
int var_value;
};
Then you create an array of these structs, with the size of the expected variables in your input file.
struct Foo input_array[n];
Then, as you read your file, you set the struct fields...
for(int i = 0; i < n ; i++){
input_array[i].var_name = input_var_name;
input_array[i].var_value = input_var_value;
}
Afterwards you print the values, and they will already be in order.
First of all, we need the variables to read the values into:
int x, y, z /* etc */;
Now, lets make an array of pointers so that we can easily access the variables:
int *array[] = { &x, &y, &z /* etc */ };
Now, we need a FILE* to access the file:
FILE* fp;
Opening the file:
fp = fopen("filename.txt", "r"); /* 'r' for reading */
Checking if the file opened successfully:
if(fp == NULL)
{
printf("Error opening file");
exit(-1); /* Exit the program */
}
Now, reading the file using fscanf:
int counter = 0; /* For keeping track of the array index */
for(;;) { /* Infinite loop */
int retVal = fscanf(fp, "%d", array[counter]); /* Capture return value of fscanf */
if(retVal == 1) /* Successfully scanned a number */
{
counter++;
}
else if(retVal == 0) /* Failed to scan a number */
{
fscanf(fp, "%*s"); /* Discard a word from the file */
}
else /* EOF */
{
break; /* Get out of the loop */
}
}
Now, printing the scanned data:
int i;
for(i = 0; i < counter; i++)
printf("%d", *array[i]);
and finally, closing the file:
fclose(fp);
Full code, added with #Jongware's suggestions:
int x, y, z /* etc */;
int *array[] = { &x, &y, &z /* etc */ };
char line[1024];
const char *varNames[] = { "x", "y", "z" };
int tmp = -1;
FILE* fp;
fp = fopen("filename.txt", "r"); /* 'r' for reading */
if(fp == NULL)
{
printf("Error opening file");
exit(-1); /* Exit the program */
}
int counter = 0;
for(fgets(line, sizeof(line), stdin)) {
if(line[0] == '%')
continue;
else
{
tmp = -1;
for(int i = 0; i < sizeof(varNames) / sizeof(*varNames); i++)
{
if(strcmp(line, varNames[i]) == 0)
{
tmp = i;
break;
}
}
fgets(line, sizeof(line), stdin);
sscanf(line, "%d", array[tmp]);
}
}
int i;
for(i = 0; i < counter; i++)
printf("%d", *array[i]);
fclose(fp);
All the above code is untested

fgets + dynamic malloc/realloc with a .txt as stdin

i'm trying to read lines of a file. txt, but without knowing the size of each lines...First I used the getline instruction (and works fine), but my teacher does not let me use that instruction, he says I can only use the fgets statement with malloc and realloc...
This is an input example, with variable line sizes:
[9.3,1.2,87.9]
[1.0,1.0]
[0.0,0.0,1.0]
As shown, each line defines a different vector with no size limit
Someone could help me implement this method?
Thank you very much.
NOTE: I forgot to mention, to compile the program I use these commands:
g++ -Wall-Wextra-Werror-pedantic main.c-o metbasicos.c metintermedios.c eda.exe
./eda.exe <eda.txt
I would say do something similar to this
while(fgets(buf, LEN, stdin)){
z = strtok(buf, ",");
*(*(matrix + i)) = atof(z);
for(j = 1; j < col; ++j){
z = strtok(NULL, ",");
*(*(matrix + i) + j) = atof(z);
}
++i;
}
The only extra thing you would have to take care of is making sure that you strip the brackets off of the first and last element.
Of course, if you don't know the size of the final array, you might need something like this:
struct data_t {
int nval; /* current number of values in array */
int max; /* allocated number of vlaues */
char **words; /* the data array */
};
enum {INIT = 1, GROW = 2};
...
while (fgets(buf, LEN, stdin)) {
if (data->words == NULL)
data->words = malloc(sizeof(char *));
else if (data->nval > data->max) {
data->words = realloc(data->words, GROW * data->max *sizeof(char *));
data->max = GROW * data->max;
}
z = strtok(buf, "\n");
*(data->words + i) = malloc(sizeof(char) * (strlen(z) + 1));
strcpy(*(data->words + i), z);
i++;
data->nval++;
}
data->nval--;
If you combine both of those while loops into a single one, you should be all set. The first one reads in floats, the second one is good for dynamically allocating space on the fly.
If you can use multiple steps, then use one function to get the information you need to malloc memory. (for example determine number of lines, and longest line) This function will do that for you (given the file name and location)
[EDIT] LineCount - This function will get you the number of lines, and the longest line so you can dynamically allocate memory in char **strings; in which to read the lines of the input file.
int lineCount(char *file, int *nLines)
{
FILE *fp;
int cnt=0, longest=0, numLines=0;
char c;
fp = fopen(file, "r");
while ( (c = fgetc ( fp) ) != EOF )
{
if ( c != '\n' )
{
cnt++;
if (cnt > longest) longest = cnt;
}
else
{
numLines++;
cnt= 0;
}
}
*nLines = numLines+1;//add one more
fclose(fp);
return longest+1;
}
Here is the implementation to read the input file you provided, using the function above to get the unknown dimensions of the input file...
#include <ansi_c.h>
#include <stdio.h>
#define FILENAME "c:\\dev\\play\\in.txt" //put your own path here
#define DELIM "- ,:;//_*&[]\n" //change this line as needed for search criteria
int lineCount(char *file, int *cnt);
void allocMemory(int numStrings, int max);
void freeMemory(int numStrings);
char **strings;
int main()
{
int numLines, longest, cnt, i;
FILE *fp;
longest = lineCount(FILENAME, &numLines);
char wordKeep[longest];
allocMemory(numLines, longest);
//read file into string arrays
fp = fopen(FILENAME, "r");
cnt=0;
i=0;
for(i=0;i<numLines;i++)
{
fgets(strings[i], longest, fp);
}
fclose(fp);
freeMemory(numLines);
getchar();
return 0;
}
int lineCount(char *file, int *nLines)
{
FILE *fp;
int cnt=0, longest=0, numLines=0;
char c;
fp = fopen(file, "r");
while ( (c = fgetc ( fp) ) != EOF )
{
if ( c != '\n' )
{
cnt++;
if (cnt > longest) longest = cnt;
}
else
{
numLines++;
cnt= 0;
}
}
*nLines = numLines+1;//add one more
fclose(fp);
return longest+1;
}
void allocMemory(int numStrings, int max)
{
int i;
// need number of lines by longest line for string containers
strings = calloc(sizeof(char*)*(numStrings+1), sizeof(char*));
for(i=0;i<numStrings; i++)
{
strings[i] = calloc(sizeof(char)*max + 1, sizeof(char));
}
}
void freeMemory(int numStrings)
{
int i;
for(i=0;i<numStrings; i++)
if(strings[i]) free(strings[i]);
free(strings);
}

Resources