Using strstr to find four different partial words - c

I am looking to find part of a string containing TP2, DP3, OP1, or OP2, in a text file.
On each line is a different set of characters and eventually these three characters are used, but they are never on the same line as each other.
I can get it to print once I find the OP2, but it will not print the three before it. If I comment out the OP2 it finds OP1 and if I do that to OP1 and OP2 it finds DP3 and so on.
I do not get why it cannot print out all four different ones once found.
I used two different methods one where I strcpy into a temp and one I just print it as is and neither work. Later I want it to print to the right of the = sign on the lines with the four search types, but I will work on that after I get the print issue fixed. Any help or reasons why would be much appreciated.
#include < stdio.h>
#include < stdlib.h>
#include < string.h>
#define MAX_LINE_LENGTH 150
int main(void) {
FILE *file1, *file2;
char parts[MAX_LINE_LENGTH+1];
int len = strlen(parts);
//char TP2[3] = "TP2";
char DP3[3] = "DP3";
char MOP1[3] = "OP1";
//char MOP2[3] = "OP2";
//char TP2Temp[MAX_LINE_LENGTH];
char DP3Temp[MAX_LINE_LENGTH];
char MOP1Temp[MAX_LINE_LENGTH];
//char MOP2Temp[MAX_LINE_LENGTH];
file1 = fopen("input.txt", "r");
file2 = fopen("output2.txt", "w");
if (file1 == NULL || file2 ==NULL) {
exit(1);
}
while(fgets(parts, sizeof(parts), file1)!=NULL){
if(parts[len -1 ] =='\n'){
parts[len -1 ] ='\0';
}
//if(strstr(parts, TP2)!=NULL){
// strcpy(TP2Temp, parts);
// fprintf(file2, "%s", TP2Temp);
//}
if(strstr(parts,DP3)!=NULL){
strcpy(DP3Temp, strstr(parts,DP3));
fprintf(file2, "%s", DP3Temp);
}
else if(strstr(parts, MOP1)!=NULL){
strcpy(MOP1Temp, strstr(parts,MOP1));
fprintf(file2, "%s", MOP1Temp);
}
/*else if(strstr(parts, MOP2)!=NULL){
strcpy(MOP2Temp, parts);
fprintf(file2, "%s", MOP2Temp);
}*/
}
fclose(file1);
fclose(file2);
return 0;
}
/*Here is the text file sample
TC_TP1[2]=1
TC_TP2[2]="9070036"
TC_TP3[2]=1
TC_TP4[2]=1
TC_TP5[2]=1
TC_TP6[2]=1
TC_TP7[2]=1
TC_DP1[2,1]=120
TC_DP2[2,1]=0
TC_DP3[2,1]=179.85
TC_DP4[2,1]=0
TC_DP5[2,1]=0
TC_MOP1[2,1]=3
TC_MOP2[2,1]=28
TC_MOP3[2,1]=0
TC_MOP4[2,1]=0
TC_TP1[3]=1
TC_TP2[3]="9005270"
TC_TP3[3]=1*/

char parts[MAX_LINE_LENGTH+1];
int len = strlen(parts);
parts is uninitialised in this code, and thus isn't guaranteed to contain a string. Even if it were to, len would be initialised the length of that garbage string, which is meaningless and thus useless.
char DP3[3] = "DP3";
If your understanding of strings is valid, you should realise there are four characters in these strings. The following program demonstrates this:
#include <stdio.h>
int main(void) {
printf("sizeof \"DP3\": %zu\n", sizeof "DP3");
}
You are reading a book to learn C, right? Your book would explain to you among many other things so we wouldn't need to, strstr requires its operands be strings, and strings always contain a terminating '\0'. Where's your terminating '\0'? How is strstr expected to know the length of the string pointed to by DP3?
Because the length of your tokens are at most three bytes, you currently only need to read and store at most three bytes at a time to conduct your search (four including the terminal byte explained above; untested&incomplete example below); this requirement could change, should you decide to introduce longer (or dynamically sized) tokens, your cursor will need to be as wide as your longest token.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef unsigned char item[4];
int item_cmp(void const *x, void const *y) {
return memcmp(x, y, sizeof (item));
}
int main(void) {
item cursor = "",
haystack[] = { "TP1", "OP0", "OP1", "OP2", "TP0", "DP3", "OOO" };
size_t size = fread(cursor, sizeof cursor - 1, 1, stdin),
nelem = sizeof haystack / sizeof *haystack;
int c = 0, e = !size;
qsort(haystack, nelem, sizeof *haystack, item_cmp);
do {
if (bsearch(cursor, haystack, nelem, sizeof *haystack, item_cmp)) {
printf("match found for %s\n", cursor);
}
memmove(cursor, cursor + 1, sizeof cursor - 1);
if (!e) {
c = fgetc(stdin);
e = c < 0 && feof(stdin);
}
cursor[size] = e || c == '\n' ? '\0' : c;
size -= e;
} while (size);
exit(0);
}

Thank you again BLUEPIXY, with your information I was able to make the changes I needed and was able to extract the data where it found TP2 and then the value after the equal sign. I am sure there is a nicer way to code this, but my solution is below. I will add a change to be able to take in any file name and the reason for the
MOP1Equal[strlen(MOP1Equal) -1] ='\0';
was to make it in columns for a csv file to put in excel and the
fprintf(file2, "\t%s", MOP1Equal+1);
where I add 1 was to get rid of the = sign.
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define MAX_LINE_LENGTH 150
int main(void) {
FILE *file1, *file2;
char parts[MAX_LINE_LENGTH+1] = "startingvaluebeforechange";
char TP2[4] = "TP2";
char DP3[4] = "DP3";
char MOP1[4] = "OP1";
char MOP2[4] = "OP2";
char Equal[2] = "=";
char TP2Temp[MAX_LINE_LENGTH];
char TP2Equal[MAX_LINE_LENGTH];
char DP3Temp[MAX_LINE_LENGTH];
char DP3Equal[MAX_LINE_LENGTH];
char MOP1Temp[MAX_LINE_LENGTH];
char MOP1Equal[MAX_LINE_LENGTH];
char MOP2Temp[MAX_LINE_LENGTH];
char MOP2Equal[MAX_LINE_LENGTH];
file1 = fopen("input.txt", "r");
file2 = fopen("output.txt", "w");
if (file1 == NULL || file2 ==NULL) {
exit(1);
}
while(fgets(parts, sizeof(parts), file1)!=NULL){
int len = strlen(parts);
if(parts[len -1 ] =='\n'){
parts[len -1 ] ='\0';
}
if(strstr(parts, TP2)!=NULL){
strcpy(TP2Temp, strstr(parts,TP2));
strcpy(TP2Equal, strstr(TP2Temp,Equal));
TP2Equal[strlen(TP2Equal) -2] ='\0';
fprintf(file2, "%s", TP2Equal+2);
}
if(strstr(parts,DP3)!=NULL){
strcpy(DP3Temp, strstr(parts,DP3));
strcpy(DP3Equal, strstr(DP3Temp,Equal));
DP3Equal[strlen(DP3Equal) -1] ='\0';
fprintf(file2, "\t%s", DP3Equal+1);
}
if(strstr(parts, MOP1)!=NULL){
strcpy(MOP1Temp, strstr(parts,MOP1));
strcpy(MOP1Equal, strstr(MOP1Temp,Equal));
MOP1Equal[strlen(MOP1Equal) -1] ='\0';
fprintf(file2, "\t%s", MOP1Equal+1);
}
if(strstr(parts, MOP2)!=NULL){
strcpy(MOP2Temp, strstr(parts,MOP2));
strcpy(MOP2Equal, strstr(MOP2Temp,Equal));
fprintf(file2, "\t%s", MOP2Equal+1);
}
}
fclose(file1);
fclose(file2);
return 0;
}

Related

Parsing a String in C, Without Strtok()

I need to parse a string in C by removing all non-alphabetic characters from it. To do this I am checking the ascii value of every char and making sure its within the correct bounds. It works just the way I want it to, so that's not the problem. What I am having trouble with, however, is storing the resulting strings after the parse is completed. (I am 3 weeks into C by the way) Also if you notice that I used weird sizes for the arrays, that's because I purposely made them bigger than they needed to be.
char * carry[2]; // This is to simulate argv
carry[1] = "hello1whats2up1"; // 0 is title so I placed at 1
char array[strlen(carry[1])]; // char array of string length
strcpy(array, carry[1]); // copied string to char array
char temp[strlen(carry[1]) + 1]; // Reusable char array
char * finalAnswer[10];
int m = 0, x = 0; // Indexes
if ((sizeof(carry))/8 > 1) { // We were given arguments
printf("Array: %lu\n\n", sizeof(array));
for (int i = 0; i < sizeof(array); i++)
{
if(isalpha(array[i])) { // A-Z & a-z
//printf("%s\n", temp);
temp[x] = array[i]; // Placing chars in temp array
x++;
}
else {
printf("String Length: %lu \nString Name: %s \nWord Index: %d \n\n",
strlen(temp), temp, m); // Testing Purposes
strcpy(finalAnswer[m], temp); // Copies temp into the final answer *** Source of Error
for(int w = 0; w < sizeof(temp); w++) { temp[w] = '\0'; } // Clears temp
x = 0;
m++;
}
}
printf("String Length: %lu \nString Name: %s \nWord Index: %d \n",
strlen(temp), temp, m); // Testing Purposes
strcpy(finalAnswer[m], temp);
for(int w = 0; w < sizeof(temp); w++) { temp[w] = '\0'; } // Clears temp
x = 0;
}
else { printf("No Arguments Given\n"); }
printf("\n");
** Edit
The error I keep getting is when I try copying temp to finalAnswer
** Edit 2
I solved the problem I was having with char * finalAnswer[10]
When I was trying to use strcpy on finalAnswer, I never specified the size that was needed to store the particular string. Works fine after I did it.
Since you have solved the actual string parsing, your last comment, I shall take as the actual requirement.
"... I want to create a list of words with varying length that can be accessed by index ..."
That is certainly not a task to be solved easily if one is "three weeks into C". Data structure that represents that is what main() second argument is:
// array (of unknown size)
// of pointers to char
char * argv[] ;
This can be written as an pointer to pointer:
// same data structure as char * []
char ** list_of_words ;
And this is pushing you straight into the deep waters of C. An non trivial C data structure. As a such it might require a bit more than four weeks of C.
But we can be creative. There is "inbuilt in C" one non trivial data structure we might use. A file.
We can write the words into the file. One word one line. And that is our output: list of words, separated by new line character, stored in a file.
We can even imagine and write a function that will read the word from that result "by index". As you (it seems) need.
// hint: there is a FILE * behind
int words_count = result_size () ;
const char * word = result_get_word(3) ;
Now, I have boldly gone ahead and have written "all" of it, beside that last "crucial" part. After all, I am sure you would like to contribute too.
So the working code (minus the result_size) and result_get_word() ) is alive and kicking here: https://wandbox.org/permlink/uLpAplNl6A3fgVGw
To avoid the "Wrath of Khan" I have also pasted it here:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
/*
task: remove all non alpha chars from a given string, store the result
*/
int process_and_save (FILE *, const char *) ;
int dump_result(FILE *) ;
int main( const int argc, const char * argv[] )
{
const char * filename = "words.txt";
const char * to_parse = "0abra123ka456dabra789" ;
(void)(&argc) ; (void)argv ; // pacify the compiler warnings
printf("\nInput: %s", to_parse ) ;
int retval = process_and_save(fopen(filename, "w"), to_parse ) ;
if ( EXIT_FAILURE != retval )
{
printf("\n\nOutput:\n") ;
retval = dump_result(fopen(filename, "r"));
}
return retval ;
}
int process_and_save (FILE * fp, const char * input )
{
if(!fp) {
perror("File opening failed");
return EXIT_FAILURE;
}
//
char * walker = (char *)(input) ;
while ( walker++ )
{
if ( ! *walker ) break ;
if ( isalpha(*walker) ) {
fprintf( fp, "%c", *walker ) ;
// I am alpha but next one is not
// so write word end, next
if ( ! isalpha(*(walker +1) ) )
fprintf( fp, "\n" ) ;
}
}
fclose(fp);
return EXIT_SUCCESS;
}
int dump_result(FILE* fp )
{
if(!fp) {
perror("\nFile opening failed");
return EXIT_FAILURE;
}
int c; while ((c = fgetc(fp)) != EOF) { putchar(c); }
if (ferror(fp))
puts("\nI/O error when reading");
fclose(fp);
return EXIT_SUCCESS;
}
I think this is functional and does the job of parsing and storing the result. Not in the complex data structure but in the simple file. The rest should be easy. If need help please do let me know.

How to make a C program that can read a data and copy some in a variable?

I'm a student, I am wondering...
How can I make a program that can Get some data from my text file to a variable on my program and print them
Example:
My Text File
I,Ate,Cookies
She,Drink,Coffee
Tom,Wears,Pyjamas
My code
main()
{
FILE *fp=fileopen("c:\\textfile.txt","r");
char name[20],action[20],item[20];
prinf("Enter name: \n");
scanf("%s",&name);
/* I dont Know what to do next */
}
I though about some checking code:
if (name==nametxt) /*nametxt is the first line on the text file */
{
printf("%s\n %s\n %s\n",name,action,item);
}
If the name is "I",the output would look like this :
Enter name:
I
I
Eat
Cookies
A help will satisfy my curiosity thanks in advance
You are reading characters from file until you receive new line character (\n) or fill an array, then you return characters stored in an array passed by caller.
From this returned array you may get separated values with strtok.
Repeat until you receive 0 from getline (Getline received EOF from file.)
Here is simple example with your own getline function which you may modify.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int getline(char s[],int lim, FILE * fp)
{
int c, i;
for (i=0; i < lim-1 && (c=fgetc(fp))!=EOF && c!='\n'; ++i)
{
s[i] = c;
}
if (c == '\n')
{
s[i] = c;
++i;
}
s[i] = '\0';
return i;
}
int main()
{
FILE * fp = fopen("c:\\textfile.txt", "r");
char line[100];
char * ptr;
while (getline(line, 100, fp))
{
ptr = strtok(line, ",");
while( ptr != NULL )
{
printf(" %s\n", ptr);
ptr = strtok(NULL, ",");
}
}
return 0;
}
Output
I
Ate
Cookies
She
Drink
Coffee
Tom
Wears
Pyjamas
Storing strings into variable isnt tough, here is an example
strcpy(name, ptr);
But be careful, writing outside of bounds have undefined behavior.
strncpy(name, ptr, 100); You can limit number of copied characters with strncpy, but be careful, this function is error-prone.
You can do like this,
Go on reading characters from a file, after every character is read compare with ',' character.
If the character read is ',' then you have finished reading the name, otherwise store it in a character array and continue reading the file.
Once you hit ',' character, terminate the character array with null character(Now you have a complete name with you).
Compare this character array with a string you receive as input using a strcmp(String compare function). If its it matches decide what you wanna do?
I hope i am clear.
There is different ways to read data from a FILE * in C :
You read only one character : int fgetc(FILE *fp);.
You read a whole line : char *fgets(char *buf, int n, FILE *fp); (take care to buf, it must point to allocate memory).
You read a formatted string, which is your case here : int fscanf(FILE *stream, const char *format, ...), it works like printf() :
This way :
char name[20], action[20], item[20];
FILE *f = fopen("myfile.txt", "r");
if (! f)
return;
if (3 == fscanf(f, "%19[^,\n],%19[^,\n],%19[^,\n]\n", name, action, item))
printf("%s %s %s\n", name, action, item)
%30[^,\n], here is used to read of whole object of your line, except , or \n, which will read item by item the content of your string.
start with like this
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define DATA_FILE "data.txt"
#define LEN 19
#define SIZE (LEN+1)
//Stringification
#define S_(n) #n
#define S(n) S_(n)
enum { NOT_FOUND, FIND };
int pull_data(const char name[SIZE], char action[SIZE], char item[SIZE]){
int ret = NOT_FOUND;
FILE *fp = fopen(DATA_FILE, "r");//fileopen --> fopen
if(fp == NULL){
perror("fopen:");
exit(EXIT_FAILURE);
} else {
char nametxt[SIZE];
*action = *item = 0;
while(fscanf(fp, "%" S(LEN) "[^,],%" S(LEN) "[^,],%" S(LEN) "[^\n]%*c", //"%19[^,],%19[^,],%19[^\n]%*c"
nametxt, action, item) == 3){
if(strcmp(name, nametxt) == 0){//Use strcmp for comparison of strings
ret = FIND;
break;
}
}
}
fclose(fp);
return ret;
}
int main(void){
char name[SIZE], action[SIZE], item[SIZE];
printf("Enter name: \n");//prinf --> printf
if(scanf("%" S(LEN) "s", name) == 1){
if(pull_data(name, action, item) == FIND){
printf("%s\n%s\n%s\n", name, action, item);
} else {
printf("%s not found.\n", name);
}
}
}

fscanf() to read in only characters with no punctuation marks

I would like to read in some words (in this example first 20) from a text file (name specified as an argument in the command line). As the below code runs, I found it takes punctuation marks with characters too.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char * argv[]){
int wordCap = 20;
int wordc = 0;
char** ptr = (char **) calloc (wordCap, sizeof(char*));
FILE *myFile = fopen (argv[1], "r");
if (!myFile) return 1;
rewind(myFile);
for (wordc = 0; wordc < wordCap; wordc++){
ptr[wordc] = (char *)malloc(30 * sizeof( char ) );
fscanf(myFile, "%s", ptr[wordc]);
int length = strlen(ptr[wordc]);
ptr[wordc][length] = '\0';
printf("word[%d] is %s\n", wordc, ptr[wordc]);
}
return 0;
}
As I pass through the sentence: "Once when a Lion was asleep a little Mouse began running up and down upon him;", "him" will be followed with a semicolon.
I changed the fscanf() to be fscanf(myFile, "[a-z | A-Z]", ptr[wordc]);, it takes the whole sentence as a word.
How can I change it to make the correct output?
You could accept the semi-colon and then remove it latter, like so:
after you've stored the word in ptr[wordc]:
i = 0;
while (i < strlen(ptr[wordc]))
{
if (strchr(".;,!?", ptr[wordc][i])) //add any char you wanna delete to that string
memmove(&ptr[wordc][i], &ptr[wordc][i + 1], strlen(ptr[wordc]) - i);
else
i++;
}
if (strlen(ptr[wordc]) > 0) // to not print any word that was just punctuations beforehand
printf("word[%d] is %s\n", wordc, ptr[wordc]);
I haven't tested this code, so there might be a typo or something in it.
Alternatively you could switch
fscanf(myFile, "%s", ptr[wordc]);
for
fscanf(myFile, "%29[a-zA-Z]%*[^a-zA-Z]", ptr[wordc]);
to capture only letters. the 29 limits word size so you don't get overflow since you're allocating size for only 30 chars

Storing values of file into array leads to weird behaviour

Let's say I've got the file
5f2
3f6
2f1
And the code:(The printf should print the second numbers (i.e 2,6, and 1) but it doesn't
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
int main (int argc, char * argv[])
{
FILE *ptr;
char str[100];
char * token;
int a, b, i;
int arr[4];
if(argc > 1)
{
ptr = fopen(argv[1],"r");
if(ptr == NULL)
{
exit(1);
}
}
else
{
exit(1);
}
//And I'm looking to parse the numbers between the "f" so..
while(fgets(str,100,ptr) != NULL)
{
token = strstr(str,"f");
if(token != NULL)
{
a = atol(str); // first number
b = atol(token+1); // second number
arr[i] = b; // store each b value (3 of em) into this array
}
i++;
printf("Values are %d\n",arr[i]); //should print 2,6 and 1
}
}
I've tried to move the printf outside the loop, but that seems to print an even weirder result, I've seen posts about storing integers from a file into an array before, however since this involves using strstr, I'm not exactly sure the procedure is the same.
int i,j=0;
while(fgets(str,sizeof(str),file) != NULL)
{
size_t n = strlen(str);
if(n>0 && str[n-1] == '\n')
str[n-1] = '\0';
i = str[strlen(str)-1] - '0'; /* Convert the character to int */
printf("%d\n",i);// Or save it to your int array arr[j++] = i;
}
Just move to the last character as shown and print it out as integer.
PS: fgets() comes with a newline character you need to suppress it as shown
You are never initializing i, then you are reading into arr[i] (which just happens to not crash right there), then increment i (to "undefined value + 1"), then print arr[i] -- i.e., you are writing to and reading from uninitialized memory.
Besides, your FILE * is ptr, not file. And you should get into the habit of using strtol() instead of atol(), because the former allows you to properly check for success (and recover from error).

Detecting single character in string

So, I'm trying to detect a single character in a string. There must be no other characters besides whitespace and a null character. This is my first issue, as my code detects the character in a string with other characters (besides the whitespace).
My second issue, is I can't seem to figure out how best to read matrices from a file. I'm supposed to read the first line and get the ROWS x COLUMNS. Then I'm supposed to read the data into the a matrix array that is stored globally. Then reading the second matrix into a second matrix array (stored globally as well).
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#define MAXLINE 100
typedef struct matrixStruct{
int rows;
int columns;
}matrixStruct;
typedef int bool;
enum{
false,
true
};
/*
*
*/
int aMatrix1[10][10];
int aMatrix2[10][10];
int multiMatrix[10][10];
int main(int argc, char** argv){
FILE *inputFile;
char tempLine[MAXLINE], *tempChar, *tempString;
char *endChar;
endChar = (char *)malloc(sizeof(char));
(*endChar) = '*';
bool readFile = true;
inputFile = fopen(argv[1], "r");
if(inputFile == NULL){
printf("File %s not found.\n", argv[1]);
perror("Error");
exit(EXIT_FAILURE);
}else{
printf("File opened!\n");
}
int numRow, numColumn, i, j, tempNum, count = 0;
do{
fgets(tempLine, MAXLINE, inputFile);
tempChar = strchr(tempLine, '*');
if(tempChar != NULL){
printf("True # %s\ncount=%d\n",tempChar,count);
readFile = false;
}else{
sscanf(tempLine, "%d %d", &numRow, &numColumn);
count++;
for(i=0;i<numRow;i++){
fgets(tempLine, MAXLINE, inputFile);
for(j=0;j<numColumn;j++){
aMatrix1[i][j] = atoi(tempNum);
}
}
}
}
while(readFile);
printf("aMatrix1[%d][%d]= \n", numRow, numColumn);
for(i=0; i < numRow;i++){
for(j=0; j < numColumn; j++){
printf("aMatrix[%d][%d] = %d\t", i, j, aMatrix1[i][j]);
}
printf("\n");
}
return (EXIT_SUCCESS);
}
For the first issue you could do what you suggested in your comment (regexp are an overkill here) - loop through the string, break on any non-whitespace char that's not what you expect, and count the ones that do match - you don't want 0 matches, and i guess also no more than 1.
However, I suggest you read the man page for strtok - I normally wouldn't suggest it as it's not thread-safe and has strange behaviors, but in this simple case it could work fine - provide whitespace chars as delimiters, and it would return the first non-whitespace string. If that's doesn't strcmp with "*", or if the next call to strtok doesn't return null, then it's not a match.
By the way - what do you plan to do with lines that aren't " .. * .. " or " ROWS x COLUMNS "? you're not handling them right now.
As for the second issue - strtok again could come to the rescue - repeated calls would just give you the whitespace-delimited numbers (as strings), and you'll be able to populate tempNum for each iteration.

Resources