fscanf() to read in only characters with no punctuation marks - c

I would like to read in some words (in this example first 20) from a text file (name specified as an argument in the command line). As the below code runs, I found it takes punctuation marks with characters too.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char * argv[]){
int wordCap = 20;
int wordc = 0;
char** ptr = (char **) calloc (wordCap, sizeof(char*));
FILE *myFile = fopen (argv[1], "r");
if (!myFile) return 1;
rewind(myFile);
for (wordc = 0; wordc < wordCap; wordc++){
ptr[wordc] = (char *)malloc(30 * sizeof( char ) );
fscanf(myFile, "%s", ptr[wordc]);
int length = strlen(ptr[wordc]);
ptr[wordc][length] = '\0';
printf("word[%d] is %s\n", wordc, ptr[wordc]);
}
return 0;
}
As I pass through the sentence: "Once when a Lion was asleep a little Mouse began running up and down upon him;", "him" will be followed with a semicolon.
I changed the fscanf() to be fscanf(myFile, "[a-z | A-Z]", ptr[wordc]);, it takes the whole sentence as a word.
How can I change it to make the correct output?

You could accept the semi-colon and then remove it latter, like so:
after you've stored the word in ptr[wordc]:
i = 0;
while (i < strlen(ptr[wordc]))
{
if (strchr(".;,!?", ptr[wordc][i])) //add any char you wanna delete to that string
memmove(&ptr[wordc][i], &ptr[wordc][i + 1], strlen(ptr[wordc]) - i);
else
i++;
}
if (strlen(ptr[wordc]) > 0) // to not print any word that was just punctuations beforehand
printf("word[%d] is %s\n", wordc, ptr[wordc]);
I haven't tested this code, so there might be a typo or something in it.
Alternatively you could switch
fscanf(myFile, "%s", ptr[wordc]);
for
fscanf(myFile, "%29[a-zA-Z]%*[^a-zA-Z]", ptr[wordc]);
to capture only letters. the 29 limits word size so you don't get overflow since you're allocating size for only 30 chars

Related

C read file content into an array of strings

I need to load the contents of a file into two string arrays. I tried the following and it is not working.
file.txt contains 10 records and each record has two string values separated by whitespace.
CODE:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
char line[12][20];
FILE *fptr = NULL;
int i = 0;
int tot = 0;
fptr = fopen("file.txt", "r");
char arr[20][20];
while (fgets(line, sizeof(line), fptr)) {
strcpy(arr[i],line);
i++;
}
tot=i;
for (int i=0; i<tot; i++) {
printf("first value %s",arr[i][0]);
printf("second value is %s",arr[i][1]);
printf("\n");
}
return 0;
}
If I understand correctly, you're trying to store data in a structure like:
{{"line1A", "line1B"}, {"line2A", "line2B"}, {"line3A", "line3B"}}
It looks like you need an array where each element consists of two arrays (strings), one for the first value and one for the second value on each line. If this is the case, you need a three dimensional array of chars.
In the example below I've declared arrayOfLines as array with 12 elements each of which has 2 arrays of chars (for your two values per line), with space for 20 chars in each string (NULL terminated char array)
There are some other problems with your code:
The first parameter for fgets() should be a char * - a pointer to a string buffer. Your code passes in a multi-dimensional array of chars.
Your while loop should continue until fgets returns NULL
You need to split each line into multiple strings
Check for buffer overruns when copying strings with strcpy()
In the example code I used strtok() delimited by a " " space character - you may need to play around with this - strtok can accept an array of chars to be used as a delimiter. In the example, I split the first string using the first space char, and the second string is delimited by the end of line.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
// Array for 12 lines, each with 2 strings, each string max 20 chars
// Adjust values as required.
char arrayOfLines[12][2][20];
FILE *fptr = NULL;
int i = 0;
int tot = 0;
fptr = fopen("file.txt", "r");
// char arr[20][20]; not needed
char line[20];
while(fgets(line, sizeof(line) / sizeof(line[0]), fptr) != NULL)
{
// Rudimentary error checking - if the string has no newline
// there wasn't enough space in line
if (strchr(line, '\n') == NULL) {
printf("Line too long...");
return EXIT_FAILURE;
}
// Split string into tokens
// NB: Check for buffer overruns when copying strings
char *ptr1 = strtok(line, " ");
strcpy(arrayOfLines[i][0], ptr1);
char *ptr2 = strtok(NULL, "\n");
strcpy(arrayOfLines[i][1], ptr2);
i++;
}
tot=i; // Unecessary - just use a different variable in your loop and use i as the upper bound
for (int i=0;i<tot;i++)
{
printf("first value %s\n", arrayOfLines[i][0]);
printf("second value is %s\n", arrayOfLines[i][1]);
printf("\n");
}
return 0;
}
printf("first value %s",arr[i][0]);
printf("second value is %s",arr[i][1]);
Basicly all you are doing is printing 2 chars from i word when you want to print full string you should do it like this: printf("%s",arr[i]); You said that value is separated by whitespace so when you are getting line from file you will save it to arr[i] (if first line in file contains "Hello World", your arr[0] will contain "Hello World") when you want to split it into 2 printf you need to print them char by char until space.
Edit: I reminded myself about function sscanf you can use it to get data from file array like you whould do it with keyboard input
You can use this to do that
Code
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(void){
char line[12][20];
char arr[20][20];
FILE *fptr=NULL;
int i=0;
fptr = fopen("file.txt", "r");
if(!fptr){
printf("cant open file\n");
exit(1);
}
while(fgets(*line, sizeof(line), fptr)){
strncpy(arr[i],*line, sizeof(*line));
i++;
}
for (int j=0;j<i;j++){
printf("%s\n", arr[j]);
}
return 0;
}
Notes and changes I made on your code:
Check fptr as return value of open() if it's NULL decide what to do.
Remove unnecessary tot variable and use another index j in last for loop.
Use strncpy() as a better version of strcpy()
Correct way of print arr, printf("%s\n", arr[j]);
\n can be embed on first printf()

Get the text before and after strstr in C

I need to be able to extract the characters before and after a substring, currently I have the following code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]){
char *text = (char *) malloc (10000000);
char *word = argv[1];
int rep;
FILE *f;
if(argc < 2)
{
printf("Usage: GET <website> | ./word_counter <word>\n");
exit(1);
}
fread(text, 100, 10000000, stdin);
const char *tmp = text;
f = fopen("output.txt", "w");
fprintf(f, "%s\n", "REPS");
while(tmp = strstr(tmp, word)){
printf("%.50s\n", tmp);
rep++;
tmp++;
}
printf("Word count: %d\n", rep);
fclose(f);
system("gedit output.txt");
return 0;
}
I made a copy of the original input so I could leave it untouched and get the "before" characters from it.
Using strstr() on tmp (the original input copy) I can find the instances of the word I'm looking for and print the first 50 characters. But knowing this, how can I access the 50 characters BEFORE this instance?
Any help will be appreciated. Thanks!
Apart from the printing question itself, there are a couple of errors in your code. I have corrected most of them; a short list is:
Always test if malloc succeeded.
fread(text, 100, 10000000, ..) reads way too many text. 100 * 10000000 = 1000000000, almost a full gigabyte. You only allocated enough memory for 10 Mb.
You read from a text file and treat this data as a string. Therefore, you must make sure the data ends with a 0, else functions such as printf and strstr will try to continue reading after the end.
Your rep variable starts out uninitialized and therefore you will always see a random number.
Always free memory you allocated.
That said, it is slightly more efficient to use a dedicated function to print out text – if only to not put too much in your main. And since it's a function, you can add as many useful parameters into it as you want; I added before and after variables, so you can vary the number of characters shown.
For added niceness, this function prints a correct number of spaces when the phrase is found before the minimum number of before characters, so the results line up nicely. Also, since printing out characters such as tab and newlines will mess up your output, I replaced them with ?.
There is, admittedly, some repetition in print_range but in this case I went for clarity, rather than brevity.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LENGTH 10000000
void print_range (char *source_text, int startindex, int before, int after, int phrase_length)
{
int i;
if (before > startindex)
{
for (i=0; i<before-startindex; i++)
printf (" ");
startindex = before;
}
for (i=0; i<before; i++)
{
if (strchr ("\t\r\n", source_text[startindex-before+i]))
printf ("?");
else
printf ("%c", source_text[startindex-before+i]);
}
for (i=0; i<phrase_length; i++)
{
if (strchr ("\t\r\n", source_text[startindex+i]))
printf ("?");
else
printf ("%c", source_text[startindex+i]);
}
for (i=0; i<after; i++)
{
if (!source_text[startindex+phrase_length+i])
break;
if (strchr ("\t\r\n", source_text[startindex+phrase_length+i]))
printf ("?");
else
printf ("%c", source_text[startindex+phrase_length+i]);
}
printf ("\n");
}
int main (int argc, char *argv[]){
char *text = (char *) malloc (MAX_LENGTH);
char *word = argv[1];
int rep = 0;
if (!text)
return -1;
if(argc < 2)
{
printf("Usage: GET <website> | ./word_counter <word>\n");
exit(1);
}
fread(text, 1, MAX_LENGTH, stdin);
text[MAX_LENGTH] = 0;
const char *tmp = text;
do
{
tmp = strstr(tmp, word);
if (!tmp)
break;
print_range (text, tmp-text, 16,16, strlen(word));
rep++;
tmp++;
} while (1);
free (text);
printf ("Word count: %d\n", rep);
return 0;
}
Result of running this on its own source code:
~/Documents $ ./wordcounter printf < wordcounter.c
tindex; i++)????printf (" ");???starti
-before+i]))????printf ("?");???else??
"?");???else????printf ("%c", source_t
before+i]);??}??printf ("{");??for (i=
rtindex+i]))????printf ("?");???else??
"?");???else????printf ("%c", source_t
tindex+i]);??}??printf ("}");??for (i=
_length+i]))????printf ("?");???else??
"?");???else????printf ("%c", source_t
length+i]);??}??printf ("\n");?}??int
argc < 2)??{??? printf("Usage: GET <we
?free (text);???printf ("Word count: %
Word count: 12

How to make a C program that can read a data and copy some in a variable?

I'm a student, I am wondering...
How can I make a program that can Get some data from my text file to a variable on my program and print them
Example:
My Text File
I,Ate,Cookies
She,Drink,Coffee
Tom,Wears,Pyjamas
My code
main()
{
FILE *fp=fileopen("c:\\textfile.txt","r");
char name[20],action[20],item[20];
prinf("Enter name: \n");
scanf("%s",&name);
/* I dont Know what to do next */
}
I though about some checking code:
if (name==nametxt) /*nametxt is the first line on the text file */
{
printf("%s\n %s\n %s\n",name,action,item);
}
If the name is "I",the output would look like this :
Enter name:
I
I
Eat
Cookies
A help will satisfy my curiosity thanks in advance
You are reading characters from file until you receive new line character (\n) or fill an array, then you return characters stored in an array passed by caller.
From this returned array you may get separated values with strtok.
Repeat until you receive 0 from getline (Getline received EOF from file.)
Here is simple example with your own getline function which you may modify.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int getline(char s[],int lim, FILE * fp)
{
int c, i;
for (i=0; i < lim-1 && (c=fgetc(fp))!=EOF && c!='\n'; ++i)
{
s[i] = c;
}
if (c == '\n')
{
s[i] = c;
++i;
}
s[i] = '\0';
return i;
}
int main()
{
FILE * fp = fopen("c:\\textfile.txt", "r");
char line[100];
char * ptr;
while (getline(line, 100, fp))
{
ptr = strtok(line, ",");
while( ptr != NULL )
{
printf(" %s\n", ptr);
ptr = strtok(NULL, ",");
}
}
return 0;
}
Output
I
Ate
Cookies
She
Drink
Coffee
Tom
Wears
Pyjamas
Storing strings into variable isnt tough, here is an example
strcpy(name, ptr);
But be careful, writing outside of bounds have undefined behavior.
strncpy(name, ptr, 100); You can limit number of copied characters with strncpy, but be careful, this function is error-prone.
You can do like this,
Go on reading characters from a file, after every character is read compare with ',' character.
If the character read is ',' then you have finished reading the name, otherwise store it in a character array and continue reading the file.
Once you hit ',' character, terminate the character array with null character(Now you have a complete name with you).
Compare this character array with a string you receive as input using a strcmp(String compare function). If its it matches decide what you wanna do?
I hope i am clear.
There is different ways to read data from a FILE * in C :
You read only one character : int fgetc(FILE *fp);.
You read a whole line : char *fgets(char *buf, int n, FILE *fp); (take care to buf, it must point to allocate memory).
You read a formatted string, which is your case here : int fscanf(FILE *stream, const char *format, ...), it works like printf() :
This way :
char name[20], action[20], item[20];
FILE *f = fopen("myfile.txt", "r");
if (! f)
return;
if (3 == fscanf(f, "%19[^,\n],%19[^,\n],%19[^,\n]\n", name, action, item))
printf("%s %s %s\n", name, action, item)
%30[^,\n], here is used to read of whole object of your line, except , or \n, which will read item by item the content of your string.
start with like this
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define DATA_FILE "data.txt"
#define LEN 19
#define SIZE (LEN+1)
//Stringification
#define S_(n) #n
#define S(n) S_(n)
enum { NOT_FOUND, FIND };
int pull_data(const char name[SIZE], char action[SIZE], char item[SIZE]){
int ret = NOT_FOUND;
FILE *fp = fopen(DATA_FILE, "r");//fileopen --> fopen
if(fp == NULL){
perror("fopen:");
exit(EXIT_FAILURE);
} else {
char nametxt[SIZE];
*action = *item = 0;
while(fscanf(fp, "%" S(LEN) "[^,],%" S(LEN) "[^,],%" S(LEN) "[^\n]%*c", //"%19[^,],%19[^,],%19[^\n]%*c"
nametxt, action, item) == 3){
if(strcmp(name, nametxt) == 0){//Use strcmp for comparison of strings
ret = FIND;
break;
}
}
}
fclose(fp);
return ret;
}
int main(void){
char name[SIZE], action[SIZE], item[SIZE];
printf("Enter name: \n");//prinf --> printf
if(scanf("%" S(LEN) "s", name) == 1){
if(pull_data(name, action, item) == FIND){
printf("%s\n%s\n%s\n", name, action, item);
} else {
printf("%s not found.\n", name);
}
}
}

Unexpected characters at the end of char array - C

I know similar questions have been posted before but I haven't been able to solve the issue for my case.
I have the following C code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main()
{
char textChars[4] = { 'A', 'B', 'C', 'D' };
char noMatchChars[4] = { '1', '2', '3', '4' };
int tLengths[5] = { 14, 142, 1414, 14142, 141420 };
int i,j;
for (i = 0; i < 1; i++)
{
char textString1[tLengths[i]+1];
char textString2[tLengths[i]+1];
char textString3[tLengths[i]+1];
char textString4[tLengths[i]+1];
for (j = 0; j < tLengths[i]; j++)
{
textString1[j] = textChars[0];
textString2[j] = textChars[1];
textString3[j] = textChars[2];
textString4[j] = textChars[3];
}
textString1[tLengths[i]] = '\0';
textString2[tLengths[i]] = '\0';
textString3[tLengths[i]] = '\0';
textString4[tLengths[i]] = '\0';
FILE *fp;
char filepathPattern[14];
char filepathText[11];
char iChar[1];
sprintf(iChar, "%d", i);
strcpy(filepathText, iChar);
strcat(filepathText, "_text1.txt");
fp = fopen(filepathText, "w");
fprintf(fp, textString1);
fclose(fp);
memset(filepathText,0,strlen(filepathText));
strcpy(filepathText, iChar);
strcat(filepathText, "_text2.txt");
fp = fopen(filepathText, "w");
fprintf(fp, textString2);
fclose(fp);
memset(filepathText,0,strlen(filepathText));
strcpy(filepathText, iChar);
strcat(filepathText, "_text3.txt");
fp = fopen(filepathText, "w");
fprintf(fp, textString3);
fclose(fp);
memset(filepathText,0,strlen(filepathText));
strcpy(filepathText, iChar);
strcat(filepathText, "_text4.txt");
fp = fopen(filepathText, "w");
fprintf(fp, textString4);
fclose(fp);
}
}
It works as expected for every string expect for textString4 which outputs as 14 'D's as expected followed by a random character and then 14 'C's (the previous string) for some reason but the other strings don't have this issue.
I assumed it was a memory issue but when I replaced
char textStringX[tLengths[i]+1];
with char *textStringX = malloc( sizeof(char) * ( tLengths[i] + 1 ) );
the result was identical.
I'm new to C so apologies if the solution to this is trivial.
The first issues are string related. The following lines:
char iChar[1];
sprintf(iChar, "%d", i);
Are a problem, in that you created a char array, iChar, with room for only one char, then in the very next line try to use a string function sprintf to place two chars into iChar: the value of i (0 at this point) and the NULL char. You need to create iChar with more space: i.e.
char iChar[3]; // will allow printing up to any two digit value + NULL.
Eg. zero would look like this: |0|\0|\0|
99 like this: |9|9|\0|
In C, without a NULL termination, you do not have a C string. The string functions will not work properly without a C string.
Because the write to iChar fails in the call above, the next lines in your code, also string functions always expect NULL terminated char arrays. anything else will cause them to fail:
strcpy(filepathText, iChar);
strcat(filepathText, "_text1.txt");
Because I do not know the contents of your text file, I cannot take your code beyond this. But address these string issues, then step through your code line by line. I believe most of the issues you cite will be addressed.

Detecting single character in string

So, I'm trying to detect a single character in a string. There must be no other characters besides whitespace and a null character. This is my first issue, as my code detects the character in a string with other characters (besides the whitespace).
My second issue, is I can't seem to figure out how best to read matrices from a file. I'm supposed to read the first line and get the ROWS x COLUMNS. Then I'm supposed to read the data into the a matrix array that is stored globally. Then reading the second matrix into a second matrix array (stored globally as well).
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#define MAXLINE 100
typedef struct matrixStruct{
int rows;
int columns;
}matrixStruct;
typedef int bool;
enum{
false,
true
};
/*
*
*/
int aMatrix1[10][10];
int aMatrix2[10][10];
int multiMatrix[10][10];
int main(int argc, char** argv){
FILE *inputFile;
char tempLine[MAXLINE], *tempChar, *tempString;
char *endChar;
endChar = (char *)malloc(sizeof(char));
(*endChar) = '*';
bool readFile = true;
inputFile = fopen(argv[1], "r");
if(inputFile == NULL){
printf("File %s not found.\n", argv[1]);
perror("Error");
exit(EXIT_FAILURE);
}else{
printf("File opened!\n");
}
int numRow, numColumn, i, j, tempNum, count = 0;
do{
fgets(tempLine, MAXLINE, inputFile);
tempChar = strchr(tempLine, '*');
if(tempChar != NULL){
printf("True # %s\ncount=%d\n",tempChar,count);
readFile = false;
}else{
sscanf(tempLine, "%d %d", &numRow, &numColumn);
count++;
for(i=0;i<numRow;i++){
fgets(tempLine, MAXLINE, inputFile);
for(j=0;j<numColumn;j++){
aMatrix1[i][j] = atoi(tempNum);
}
}
}
}
while(readFile);
printf("aMatrix1[%d][%d]= \n", numRow, numColumn);
for(i=0; i < numRow;i++){
for(j=0; j < numColumn; j++){
printf("aMatrix[%d][%d] = %d\t", i, j, aMatrix1[i][j]);
}
printf("\n");
}
return (EXIT_SUCCESS);
}
For the first issue you could do what you suggested in your comment (regexp are an overkill here) - loop through the string, break on any non-whitespace char that's not what you expect, and count the ones that do match - you don't want 0 matches, and i guess also no more than 1.
However, I suggest you read the man page for strtok - I normally wouldn't suggest it as it's not thread-safe and has strange behaviors, but in this simple case it could work fine - provide whitespace chars as delimiters, and it would return the first non-whitespace string. If that's doesn't strcmp with "*", or if the next call to strtok doesn't return null, then it's not a match.
By the way - what do you plan to do with lines that aren't " .. * .. " or " ROWS x COLUMNS "? you're not handling them right now.
As for the second issue - strtok again could come to the rescue - repeated calls would just give you the whitespace-delimited numbers (as strings), and you'll be able to populate tempNum for each iteration.

Resources