How to identify a double \n\n with fgets/read/fread - c

I have a dot file, which i need to parse it into a graph structure using C.
My dot file contains 2 blocs seperated with a double \n:
the one is verticle
the other one is edges
I have tried with fgets since it reads until \n is met. It is helpfull to count the number of verticles but not whats comes after it. I don't have any idea of how i can do it
Right now I have a function like this (not complete)
int graphe_charger_dot(graphe *g, char *nom_fichier) {
FILE * f;
int n;
if ((f = fopen(nom_fichier, "r")) == 0) {
puts(nom_fichier);
puts("Erreur d'ouverture du fichier");
return -1;
}
char buffer[500];
fgets(buffer, sizeof(buffer),f);
;
if (strcmp(buffer, "graph {\n") != 0) {
puts("Erreur de compatibilité de fichier");
puts(nom_fichier);
return -1;
}
puts("Fichier ouvert et compatible");
puts("Lecture du buffer 1");
// reads the first verticles. the atoi is just here to test if i can convert an char to integer even with a non integer right after it.
fgets(buffer, sizeof(buffer),f);
n = atoi(buffer);
g->n += 1;
return 0;
}
My dot file
graph {
0;
1;
2;
3;
4;
5;
6;
7;
8;
0 -- 1;
0 -- 4;
0 -- 5;
0 -- 6;
0 -- 8;
1 -- 2;
1 -- 3;
1 -- 4;
1 -- 5;
2 -- 3;
2 -- 4;
2 -- 5;
2 -- 8;
3 -- 7;
5 -- 7;
5 -- 8;
6 -- 8;
7 -- 8;
}
and the structure of the graph
struct s_graphe {
int n; /* number of verticles */
int m; /* number of edges */
int adj[GRAPHE_ORDRE_MAX][GRAPHE_ORDRE_MAX];
/* Adjacency matrix of the graph */
};

How to identify a double \n\n ...
The prior line ended with a '\n'. With fgets(), test if buffer[0] == '\n' to see if a line begins and ends with a '\n' to detect the consecutive '\n'.
...
puts("Lecture du buffer 1");
//fgets(buffer, sizeof(buffer),f);
//n = atoi(buffer);
//g->n += 1;
while (fgets(buffer, sizeof buffer, f) && buffer[0] != '\n') {
// parse `buffer` for a vertex
}
while (fgets(buffer, sizeof buffer, f)) {
// parse `buffer` for an edge
}
My dot file contains 2 blocs separated with a double \n:
The below looks like 3 '\n' in a row. The first of the 3 ends the "8;\n" line.
7;\n
8;\n
\n
\n
0 -- 1;\n
0 -- 4;\n
In that case a little more work is needed. Perhaps
while (fgets(buffer, sizeof buffer, f) && buffer[0] != '\n') {
// parse `buffer` for a vertex
}
// Here code consumes 0 or more lines that are only `"\n"`
// Could use a counter to limit
int ch;
while ((c = fgetc(f)) == '\n');
ungetc(ch, f);
while (fgets(buffer, sizeof buffer, f)) {
// parse `buffer` for an edge
}

Related

How to count the frequency of the two first letters in a word from a dictionary?

I have a 143k lowcase word dictionary and I want to count the frequency of the first two letters
(ie: aa* = 14, ab* = 534, ac = 714 ... za = 65, ... zz = 0 ) and put it in a bidimensional array.
However I have no idea how to even go about iterating them without switches or a bunch of if elses I tried looking on google for a solution to this but I could only find counting amount of letters in the whole word and mostly only things in python.
I've sat here for a while thinking how could I do this and my brain keeps blocking this was what I came up with but I really don't know where to head.
int main (void) {
char *line = NULL;
size_t len = 0;
ssize_t read;
char *arr[143091];
FILE *fp = fopen("large", “r”);
if (*fp == NULL)
{
return 1;
}
int i = 0;
while ((read = getline(&line, &len, fp)) != -1)
{
arr[i] = line;
i++;
}
char c1 = 'a';
char c2 = 'a';
i = 0;
int j = 0;
while (c1 <= 'z')
{
while (arr[k][0] == c1)
{
while (arr[k][1] == c2)
{
}
c2++;
}
c1++;
}
fclose(fp);
if (line)
free(line);
return 0;
}
Am I being an idiot or am I just missing someting really basic? How can I go about this problem?
Edit: I forgot to mention that the dictionary is only lowercase and has some edge cases like just an a or an e and some words have ' (like e'erand e's) there are no accentuated latin characters and they are all accii lowercase
The code assumes that the input has one word per line without leading spaces and will count all words that start with two ASCII letters from 'a'..'z'. As the statement in the question is not fully clear, I further assume that the character encoding is ASCII or at least ASCII compatible. (The question states: "there are no accentuated latin characters and they are all accii lowercase")
If you want to include words that consist of only one letter or words that contain ', the calculation of the index values from the characters would be a bit more complicated. In this case I would add a function to calculate the index from the character value.
Also for non-ASCII letters the simple calculation of the array index would not work.
The program reads the input line by line without storing all lines, checks the input as defined above and converts the first two characters from range 'a'..'z' to index values in range 0..'z'-'a' to count the occurrence in a two-dimensional array.
#include <stdio.h>
#include <stdlib.h>
int main (void) {
char *line = NULL;
size_t len = 0;
ssize_t read;
/* Counter array, initialized with 0. The highest possible index will
* be 'z'-'a', so the size in each dimension is 1 more */
unsigned long count['z'-'a'+1]['z'-'a'+1] = {0};
FILE *fp = fopen("large", "r");
if (fp == NULL)
{
return 1;
}
while ((read = getline(&line, &len, fp)) != -1)
{
/* ignore short input */
if(read >= 2)
{
/* ignore other characters */
if((line[0] >= 'a') && (line[0] <= 'z') &&
(line[1] >= 'a') && (line[1] <= 'z'))
{
/* convert first 2 characters to array index range and count */
count[line[0]-'a'][line[1]-'a']++;
}
}
}
fclose(fp);
if (line)
free(line);
/* example output */
for(int i = 'a'-'a'; i <= 'z'-'a'; i++)
{
for(int j = 'a'-'a'; j <= 'z'-'a'; j++)
{
/* only print combinations that actually occurred */
if(count[i][j] > 0)
{
printf("%c%c %lu\n", i+'a', j+'a', count[i][j]);
}
}
}
return 0;
}
The example input
foo
a
foobar
bar
baz
fish
ford
results in
ba 2
fi 1
fo 3
The idea is to have a two-dimensional array, each dimension holding one of the first two characters of each line. The clever bit is that in C, even a string whose length as reported by strlen() to be 1 has two char's - the character and the trailing 0 at the end, so you don't need to special-case cases like "a". Its frequency is tracked in counts['a'][0].
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
/* Reads input on stdin, outputs to stdout. Using a multibyte
* character encoding will likely cause unusual output; don't do
* that. But it will work with encodings other than ASCII. Also handles
* mixed-cased input.
*/
int main(void) {
int *counts[UCHAR_MAX + 1] = { NULL };
char *line = NULL;
size_t bufsize = 0;
ssize_t len;
// Populate the frequency counts
while ((len = getline(&line, &bufsize, stdin)) > 0) {
if (line[len - 1] == '\n') { // Get rid of newline
line[len - 1] = 0;
}
if (line[0] == 0) { // Skip empty lines
continue;
}
unsigned fc = line[0];
unsigned sc = line[1];
if (!counts[fc]) { // Allocate the second dimension if needed
counts[fc] = calloc(UCHAR_MAX + 1, sizeof(int));
}
counts[fc][sc] += 1;
}
// Print out the frequency table.
for (int fc = 1; fc <= UCHAR_MAX; fc += 1) {
if (!counts[fc]) { // Skip unused first characters
continue;
}
if (counts[fc][0]) { // Single-character line count
printf("%c\t%d\n", fc, counts[fc][0]);
}
for (int sc = 1; sc <= UCHAR_MAX; sc += 1) {
if (counts[fc][sc]) {
printf("%c%c\t%d\n", fc, sc, counts[fc][sc]);
}
}
}
return 0;
}
Example:
$ perl -Ci -ne 'print if /^[[:ascii:]]+$/ && /^[[:lower:]]+$/' /usr/share/dict/american-english-large | ./freqs
a 1
aa 6
ab 483
ac 651
ad 497
ae 112
af 198
ag 235
ah 7
ai 161
etc.
How to count the frequency of the two first letters in a word from a dictionary?
Use a simple state machine to read one character at a time, detect when the character is first 2 letters of a word, then increment a 26x26 table. Words do not need to be on seperate lines. Any word length is allowed.
unsigned long long frequency[26][26] = { 0 }; // Set all to 0
FILE *fp = fopen("large", "r");
...
int ch;
// Below 2 objects are the state machine
int word[2];
int word_length = 0;
while ((ch = fgetc(fp)) != EOF) {
if (isalpha(ch)) {
if (word_length < 2) {
word[word_length++] = tolower(ch);
if (word_length == 2) { // 2nd letter just arrived
assert(word[0] >= 'a' && word[0] <= 'z'); // Note 1
assert(word[1] >= 'a' && word[1] <= 'z');
frequency[word[0] - 'a'][word[1] - 'a']++;
}
}
} else {
word_length = 0; // Make ready for a new word
}
}
for (int L0 = 'a'; L0 <= 'z'; L0++);
for (int L1 = 'a'; L1 <= 'z'; L1++);
unsigned long long sum = frequency[L0 - 'a'][L1 - 'a'];
if (sum) {
printf("%c%c %llu\n", L0, L1, sum);
...
Note 1, in locales that have more than a-z letters, like á, é, í, ó, ú, ü, ñ, additional needed. A simple approach is to use a frequency[256][256] - somewhat memory hoggish.
There is no need to read the entire dictionary into memory, or even to buffer lines. The dictionary consists of words, one per line. This means it has this structure:
"aardvark\nabacus\n"
The first two characters of the file are the first digraph. The other interesting digraphs are all characters which immediately follow a newline.
This can be read by a state machine, which we can code into a loop like this. Suppose f is the FILE * handle to the stream reading from the dictionary file:
for (;;) {
/* Read two characters from the dictionary file. */
int ch0 = getc(f);
int ch1 = getc(f);
/* Is the ch0 newline? That means we read an empty line,
and one character after that. So, let us move that character
into ch0, and read another ch1. Keep doing this until
ch0 is not a newline, and bail at EOF. */
while (ch0 == '\n' && ch1 != EOF) {
ch0 = ch1;
ch1 = getc(f);
}
/* After the above, if we have EOF, we are done: bail the loop */
if (ch0 == EOF || ch1 == EOF)
break;
/* We know that ch0 isn't newline. But ch1 could be newline;
i.e. we found a one-letter-long dictionary entry. We don't
process those, only two or more letters. */
if (ch1 != '\n') {
/* Here we put the code which looks up the ch0-ch1 pair
in our frequency table and increments the count. */
}
/* Now drop characters until the end of the line. If ch1
is newline, we are already there. If not, let's just use
ch1 for reading more characters until we get a newline. */
while (ch1 != '\n' && ch1 != EOF)
ch = getc(f);
/* Watch out for EOF in the middle of a line that isn't
newline-terminated. */
if (ch == EOF)
break;
}
I would do this with a state machine:
enum { begin, have_ch0, scan_eol } state = begin;
int ch0, ch1;
for (;;) {
int c = getc(f);
if (c == EOF)
break;
switch (state) {
case begin:
/* stay in begin state if newline seen */
if (c != \n') {
/* otherwise accumulate ch0,
and switch to have_ch0 state */
ch0 = c;
state = have_ch0;
}
break;
case have_ch0:
if (c == '\n') {
/* newline in ch0 state: back to begin */
state = begin;
} else {
/* we got a second character! */
ch1 = c;
/* code for processing ch0 and ch1 goes here! */
state = scan_eol; /* switch to scanning for EOL. */
}
break;
case scan_eol:
if (c == '\n') {
/* We got the newline we are looking for; go
to begin state. */
state = begin;
}
break;
}
}
Now we have a tidy loop around a single call to getc. EOF is checked in one place where we bail out of the loop. The state machine recognizes the situation when we have the first two characters of a line which is at least two characters long; there is a single place in the code where to put the logic for dealing with the two characters.
We are not allocating any buffers; we are not malloc-ing lines, so there is nothing to free. There is no limit on the dictionary size we can scan (just we have to watch for overflowing frequency counters).
You are started in the right direction. You do need a 2D array 27 x 27 for a single case (e.g. lowercase or uppercase), not including digits. To handle digits, just add another 11 x 11 array and map 2-digit frequencies there. The reason you can't use a flat 1D array and map to it without serious indexing gymnastics is that the ASCII sum of "ab" and "ba" would be the same.
The 2D array solves that problem allowing the map of the 1st character ASCII value to the first index, and the map of the ASCII of the 2nd character to the 2nd index or after in that row.
An easy way to think of it is to just take a lowercase example. Let's look at the word "accent". You have your 2D array:
+---+---+---+---+---+---+
| a | a | b | c | d | e | ...
+---+---+---+---+---+---+
| b | a | b | c | d | e | ...
+---+---+---+---+---+---+
| c | a | b | c | d | e | ...
+---+---+---+---+---+---+
...
The first column tracks the first letter and then the remaining columns (the next 'a' - 'z' characters) track the 2nd character that follows the first character. (you can do this will an array of struct holding the 1st char and a 26 char array as well -- up to you) This way, you remove ambiguity of which combination "ab" or "ba".
Now note -- you do not actually need a 27 x 27 arrays with the 1st column repeated. Recall, by mapping the ASCII value to the first index, it designates the first character associated with the row on its own, e.g. row[0][..] indicates the first character was 'a'. So a 26 x 26 array is fine (and the same for digits). So you simply need:
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
...
So the remainder of the approach is simple. Open the file, read the word into a buffer, validate there is a 1st character (e.g. not the nul-character), then validate the 2nd character (continue to get the next word if either validation fails). Convert both to lowercase (or add the additional arrays if tracking both cases -- that gets ugly). Now just map the ASCII value for each character to an index in the array, e.g.
int ltrfreq[ALPHABET][ALPHABET] = {{0}};
...
while (fgets (buf, SZBUF, fp)) { /* read each line into buf */
int ch1 = *buf, ch2; /* initialize ch1 with 1st char */
if (!ch1 || !isalpha(ch1)) /* validate 1st char or get next word */
continue;
ch2 = buf[1]; /* assign 2nd char */
if (!ch1 || !isalpha(ch2)) /* validate 2nd char or get next word */
continue;
ch1 = tolower (ch1); /* convert to lower to eliminate case */
ch2 = tolower (ch2);
ltrfreq[ch1-'a'][ch2-'a']++; /* map ASCII to index, increment */
}
With our example word "accent", that would increment the array element [0][2], so that corresponds to row 0 and column 2 for "ac" in:
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
... ^ [0][2]
Where you increment the value at that index. So ltrfreq[0][2]++ now holds the value 1 for the combination "ac" having been seen once. When encountered again, the element would be incremented to 2 and so on... Since the value is incremented it is imperative the array be initialized all zero when declared.
When you output the results, you just have to remember to subtract 1 from the j index when mapping from index back to ASCII, e.g.
for (int i = 0; i < ALPHABET; i++) /* loop over all 1st char index */
for (int j = 0; j < ALPHABET; j++) /* loop over all 2nd char index */
if (ltrfreq[i][j]) /* map i, j back to ASCII, output freq */
printf ("%c%c = %d\n", i + 'a', j + 'a', ltrfreq[i][j]);
That's it. Putting it altogether in an example that takes the filename to read as the first argument to the program (or reads from stdin if no argument is given), you would have:
#include <stdio.h>
#include <ctype.h>
#define ALPHABET 26
#define SZBUF 1024
int main (int argc, char **argv) {
char buf[SZBUF] = "";
int ltrfreq[ALPHABET][ALPHABET] = {{0}};
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, SZBUF, fp)) { /* read each line into buf */
int ch1 = *buf, ch2; /* initialize ch1 with 1st char */
if (!ch1 || !isalpha(ch1)) /* validate 1st char or get next word */
continue;
ch2 = buf[1]; /* assign 2nd char */
if (!ch1 || !isalpha(ch2)) /* validate 2nd char or get next word */
continue;
ch1 = tolower (ch1); /* convert to lower to eliminate case */
ch2 = tolower (ch2);
ltrfreq[ch1-'a'][ch2-'a']++; /* map ASCII to index, increment */
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
for (int i = 0; i < ALPHABET; i++) /* loop over all 1st char index */
for (int j = 0; j < ALPHABET; j++) /* loop over all 2nd char index */
if (ltrfreq[i][j]) /* map i, j back to ASCII, output freq */
printf ("%c%c = %d\n", i + 'a', j + 'a', ltrfreq[i][j]);
}
Example Input Dictionary
In the file dat/ltrfreq2.txt:
$ cat dat/ltrfreq2.txt
My
dog
has
fleas
and
my
cat
has
none
lucky
cat!
Example Use/Output
$ ./bin/ltrfreq2 dat/ltrfreq2.txt
an = 1
ca = 2
do = 1
fl = 1
ha = 2
lu = 1
my = 2
no = 1
Where both "cat" words accurately account for ca = 2, both "has" for ha = 2 and "My" and "my" for my = 2. The rest are just the 2 character prefixes for words that appear once in the dictionary.
Or with the entire 307993 words dictionary that comes with SuSE, timed to show the efficiency of the approach (all within 15 ms):
$ time ./bin/ltrfreq2 /var/lib/dict/words
aa = 40
ab = 990
ac = 1391
ad = 1032
ae = 338
af = 411
ag = 608
ah = 68
ai = 369
aj = 18
ak = 70
al = 2029
...
zn = 2
zo = 434
zr = 2
zs = 2
zu = 57
zw = 25
zy = 135
zz = 1
real 0m0.015s
user 0m0.015s
sys 0m0.001s
A bit about the array type. Since you have 143K words, that rules out using a short or unsigned short type -- just in case you have a bad dictionary with all 143K words being "aardvark".... The int type is more than capable of handling all words -- even if you have a bad dictionary containing only "aardvark".
Look things over and let me know if this is what you need, if not let me know where I misunderstood. Also, let me know if you have further questions.
Such job is more suitable for languages like Python, Perl, Ruby etc. instead of C. I suggest at least trying C++.
If you don't have to write it in C, here is my Python version: (since you didn't mention it in the question - are you working on an embedded system or something where C/ASM are the only options?)
FILENAME = '/etc/dictionaries-common/words'
with open(FILENAME) as f:
flattened = [ line[:2] for line in f ]
dic = {
key: flattened.count(key)
for key in sorted(frozenset(flattened))
}
for k, v in dic.items():
print(f'{k} = {v}')
Outputs:
A' = 1
AM = 2
AO = 2
AW = 2
Aa = 6
Ab = 44
Ac = 37
Ad = 68
Ae = 18
Af = 22
Ag = 36
Ah = 12
Ai = 17
Aj = 2
Ak = 14
Al = 284
Am = 91
An = 223
Ap = 44
Aq = 13
Ar = 185
As = 88
At = 56
Au = 81
Av = 28
Ax = 2
... ...

Order pair of ints inserted by input

Im getting input from user and then order a order of pairs if its possible
Case 1 - 4 pairs input [1,2][2,3][3,4][4,5] This a possible input so i should order this and show 1,2,3,4,5
Case 2 -4 pairs input [1,2][2,3][3,2][4,5] this is inculent so just show a message that doesnt make sense
Im trying doing this by using a adjacency matrix but now im stuck!
I create a adjacency matrix marking user input
Case 1 Matrix would be:
Let's say you have a 2D array to collect your pairs like:
#define MAXPAIRS 20 /* if you need a constant, #define one (or more) */
#define MAXC 1024
...
char line[MAXC]; /* buffer to hold each line of input */
int pairs[MAXPAIRS][2] = {{0}}, /* 2D array to hold pairs */
npairs = 0; /* number of pairs entered */
You can read each pair into your 2D array as follows:
while (npairs < MAXPAIRS) { /* loop while array not full */
printf ("enter pair [%2d] : ", npairs); /* prompt */
if (!fgets (line, MAXC, stdin)) { /* read/validate line */
puts ("(user canceled input)");
break;
}
else if (*line == '\n') /* end input on ENTER alone */
break;
/* parse integers from line into pair */
if (sscanf (line, "%d %d", &pairs[npairs][0], &pairs[npairs][1]) != 2) {
fputs (" error: invalid integer input.\n", stderr);
continue;
}
npairs++; /* increment counter */
}
(note: if you want your prompt to ask for "pair[1]... pair[2] ..." instead of "pair[0]... pair[1]...", you can use printf ("enter pair [%2d] : ", npairs + 1); adding 1 to the npairs value.)
Which simply collects pairs from the user until the array is full, or the user signals done entering input by pressing Enter alone on the line after the last input.
With your pairs stored, you can loop over the array validating that the array is incluent, between rows (if required) and between pair values, creating your matrix as you go:
int matrix[npairs+1][npairs+1]; /* using a VLA */
for (int i = 0; i < npairs + 1; i++) { /* loop over pairs + 1 */
/* 2nd row on, check value is 1-greater than previous row */
if (i && i < npairs && pairs[i][0] - pairs[i-1][0] != 1) {
fputs (" error: not incluent - between rows.\n", stderr);
return 1;
}
/* check that pair values are adjacent, increasing */
if (i < npairs && pairs[i][1] - pairs[i][0] != 1) {
fputs (" error not incluent - between pair values.\n", stderr);
return 1;
}
for (int j = 0; j < npairs + 1; j++) /* fill row in matrix */
matrix[i][j] = i < npairs && j == i + 1 ? 1 : 0;
}
If you have made it this far, you matrix is filled and your pairs incluent and you simply need to output the matrix, e.g.
puts ("\nmatrix:"); /* output matrix */
print_matrix (npairs+1, npairs+1, matrix);
Putting it altogether along with the print_matrix() function, you would have:
#include <stdio.h>
#define MAXPAIRS 20 /* if you need a constant, #define one (or more) */
#define MAXC 1024
/* simple print matrix function */
void print_matrix (int rows, int cols, int (*arr)[cols])
{
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++)
printf (j ? " %d" : "%d", arr[i][j]);
putchar ('\n');
}
}
int main (void) {
char line[MAXC]; /* buffer to hold each line of input */
int pairs[MAXPAIRS][2] = {{0}}, /* 2D array to hold pairs */
npairs = 0; /* number of pairs entered */
while (npairs < MAXPAIRS) { /* loop while array not full */
printf ("enter pair [%2d] : ", npairs); /* prompt */
if (!fgets (line, MAXC, stdin)) { /* read/validate line */
puts ("(user canceled input)");
break;
}
else if (*line == '\n') /* end input on ENTER alone */
break;
/* parse integers from line into pair */
if (sscanf (line, "%d %d", &pairs[npairs][0], &pairs[npairs][1]) != 2) {
fputs (" error: invalid integer input.\n", stderr);
continue;
}
npairs++; /* increment counter */
}
int matrix[npairs+1][npairs+1]; /* using a VLA */
for (int i = 0; i < npairs + 1; i++) { /* loop over pairs + 1 */
/* 2nd row on, check value is 1-greater than previous row */
if (i && i < npairs && pairs[i][0] - pairs[i-1][0] != 1) {
fputs (" error: not incluent - between rows.\n", stderr);
return 1;
}
/* check that pair values are adjacent, increasing */
if (i < npairs && pairs[i][1] - pairs[i][0] != 1) {
fputs (" error not incluent - between pair values.\n", stderr);
return 1;
}
for (int j = 0; j < npairs + 1; j++) /* fill row in matrix */
matrix[i][j] = i < npairs && j == i + 1 ? 1 : 0;
}
puts ("\nmatrix:"); /* output matrix */
print_matrix (npairs+1, npairs+1, matrix);
}
(note: the use of a ternary in several places above which has the form test ? if_true : if_false (a shorthand if (test) ... else ...) It can be used virtually anywhere, function parameters, etc.., anywhere a choice is needed.)
Example Use/Output
$ ./bin/incluentpairs
enter pair [ 0] : 1 2
enter pair [ 1] : 2 3
enter pair [ 2] : 3 4
enter pair [ 3] : 4 5
enter pair [ 4] :
matrix:
0 1 0 0 0
0 0 1 0 0
0 0 0 1 0
0 0 0 0 1
0 0 0 0 0
Edit Per-Comment
To output the message on error you can do:
for (int i = 0; i < npairs + 1; i++) { /* loop over pairs + 1 */
/* 2nd row on, check value is 1-greater than previous row */
if (i && i < npairs && pairs[i][0] - pairs[i-1][0] != 1) {
fputs (" error: not incluent - between rows.\n", stderr);
return 1;
}
/* check that pair values are adjacent, increasing */
if (i < npairs && pairs[i][1] - pairs[i][0] != 1) {
if (i) {
char msg[64];
sprintf (msg, "contradictory %d->%d %d->%d\n",
pairs[i-1][0], pairs[i-1][1], pairs[i][0], pairs[i][1]);
fputs (msg, stderr);
}
else
fputs (" error not incluent - between pair values.\n", stderr);
return 1;
}
for (int j = 0; j < npairs + 1; j++) /* fill row in matrix */
matrix[i][j] = i < npairs && j == i + 1 ? 1 : 0;
}
Example Use/Output
$ ./bin/incluentpairs
enter pair [ 0] : 1 2
enter pair [ 1] : 2 3
enter pair [ 2] : 3 2
enter pair [ 3] : 4 5
enter pair [ 4] :
contradictory 2->3 3->2
Look things over and let me know if you have questions (just leave a comment below).

Finding certain combination in a text file

I have a text file of combinations without repetition of 6 number ranging from 1 to 10, like this:
2 3 8 9 6 4
8 3 1 4 7 9
1 3 5 7 6 9
1 5 7 9 8 4
1 3 5 4 8 7
2 4 6 8 7 1
6 7 8 3 5 9
3 1 6 2 7 9
1 7 4 2 5 8
3 4 9 2 1 7
...
and I have a gold combination, let's say: 2, 1, 3, 9, 8, 5
I want to check how many times I have a combination in my text file that matches 5 numbers of the gold combination. This is my code attempt:
// Including C Standard Libraries
#include <stdint.h>
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
int main()
{
// Gold Combination
int n1 = 2;
int n2 = 1;
int n3 = 3;
int n4 = 9;
int n5 = 8;
int n6 = 5;
// Numbers of Matching Combinations
int match_comb = 0;
// Creating a file to see combinations content
char ch, file_name[25];
FILE *fp;
fp = fopen("combinations.txt", "r"); // Read Mode
if (fp == NULL)
{
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}
int j = 0;
int mn = 0; // Number of matched numbers
int x[6] = {0,0,0,0,0,0};
char c;
while((c = fgetc(fp)) != EOF)
{
if(c == ' ' || c == '\n')
{
}
else
{
x[j] = c;
if (j == 5)
{
if(x[0]==n1 || x[0]==n2 || x[0]==n3 || x[0]==n5 || x[0]==n6){
mn += 1;
}if(x[1]==n1 || x[1]==n2 || x[1]==n3 || x[1]==n5 || x[1]==n6){
mn += 1;
}if(x[2]==n1 || x[2]==n2 || x[2]==n3 || x[2]==n5 || x[2]==n6){
mn += 1;
}if(x[3]==n1 || x[3]==n2 || x[3]==n3 || x[3]==n5 || x[3]==n6){
mn += 1;
}if(x[4]==n1 || x[4]==n2 || x[4]==n3 || x[4]==n5 || x[4]==n6){
mn += 1;
}if(x[5]==n1 || x[5]==n2 || x[5]==n3 || x[5]==n5 || x[5]==n6){
mn += 1;
}
if ( mn == 5)
{
match_comb += 1; // Adding One the the Match Combinantions counter
}
for (int i = 0; i < 6; ++i) // Resetting x array
{
x[i] = 0;
}
mn = 0; // Resetting
j = -1; // Resetting j
}
j += 1;
}
}
printf("Number of Matching Combinations:");
printf("%d", match_comb);
printf("\n");
fclose(fp);
return 0;
}
But, I think the code is not working, because it always says that there are 0 matched combinations .. Are there ways to simplify or make my code work?
also, this only works for the case of numbers with one digit, but in the case I have bigger range, let's say 1-20, I am not really sure how to gather the numbers from the text file .. I was thinking in a condition where there was a counter after every space, if the counter is one, take the character as a number of one digit, if the counter is two, gather the two characters and do something to tell the code to gather the two characters and use the resulted number, but I don't know how to do that ..
Edit:
int main()
{
// Gold Combination
int n1 = 20;
int n2 = 1;
int n3 = 35;
int n4 = 9;
int n5 = 18;
int n6 = 5;
// Numbers of Matching Combinations
int match_comb = 0;
// Creating a file to see combinations content
char ch, file_name[25];
FILE *fp;
fp = fopen("combinations.txt", "r"); // Read Mode
if (fp == NULL)
{
perror("Error while opening the file.\n");
exit(EXIT_FAILURE);
}
int j = 0;
int mn = 0; // Number of matched numbers
int x[6] = {0,0,0,0,0,0};
int c;
while((c = fgetc(fp)) != EOF)
{
//x[j] = fscanf(fp, "%d", &c);
fscanf(fp, "%d %d %d %d %d %d", &x[0], &x[1], &x[2], &x[3], &x[4], &x[5]);
printf("%d", x[0]);
printf(" ");
printf("%d", x[1]);
printf(" ");
printf("%d", x[2]);
printf(" ");
printf("%d", x[3]);
printf(" ");
printf("%d", x[4]);
printf(" ");
printf("%d", x[5]);
if(x[0]==n1 || x[0]==n2 || x[0]==n3 || x[0]==n5 || x[0]==n6){
mn += 1;
}if(x[1]==n1 || x[1]==n2 || x[1]==n3 || x[1]==n5 || x[1]==n6){
mn += 1;
}if(x[2]==n1 || x[2]==n2 || x[2]==n3 || x[2]==n5 || x[2]==n6){
mn += 1;
}if(x[3]==n1 || x[3]==n2 || x[3]==n3 || x[3]==n5 || x[3]==n6){
mn += 1;
}if(x[4]==n1 || x[4]==n2 || x[4]==n3 || x[4]==n5 || x[4]==n6){
mn += 1;
}if(x[5]==n1 || x[5]==n2 || x[5]==n3 || x[5]==n5 || x[5]==n6){
mn += 1;
}
if ( mn == 5)
{
match_comb += 1; // Adding One the the Match Combinantions counter
}
for (int i = 0; i < 6; ++i) // Resetting x array
{
x[i] = 0;
}
mn = 0; // Resetting
printf("\n");
}
printf("Number of Matching Combinations:");
printf("%d", match_comb);
printf("\n");
fclose(fp);
return 0;
}
The problem lies with:
x[j] = c;
This assigns a char to an integer. You need to convert c to an integer. For example by subtracting the character code of zero:
x[j] = c-'0';
You can use isdigit(c) to check whether c is really a digit.
Either with the help of the debugger or by using printf to show the exact values of the x[0], x[1], ... you get a clearer view of what was going wrong.
As for reading numbers of more than 1 digit, the best idea is to use a function such as fscanf(fp, "%d", &c) which automatically converts the read characters to a number. Note that if you use &c here, c needs to be an int, not a char.
If you want to work with fscanf, you need to remove the calls to fgetc (in your while-loop), because otherwise fgetc everytime removes a character. Removing a character is no problem when that's a space or a newline, but it is a problem for the first digit in the line. When fgetc can not be used anymore for checking end-of-file, use the return value of fscanf as explained in this post. For example:
while (true) // endless loop, but will end via a 'break'
{
// remove if(c == ' ' || c == '\n')
if (fscanf(fp, "%d", &c) != 1) // check whether fscanf found 1 input
break; // this jumps out of the while loop
.... // rest of your code
}
If you really want to use fgetc for reading the numbers, you need something like:
if (isdigit(c))
num = num * 10 + (c - '0');
and not yet putting num in the X-array until you encounter a non-digit. num needs to be reset to 0 thereafter.
As for the code you use for calculating the number of matches, it looks quite clever if you're fully new to programming. An improvement would be to also put the n values in an array and to use for-loops to check the number of matches.

How to detect data beyond certain value out of a fixed range

I've written a c program which can read a text file with single column of data. All the data can be read into the program with the following codes:
#include <stdio.h>
#include <cstdlib>
main()
{
char temp[20];
float t;
int a = 0, x = 0; //a is no. of data greater than 180 and x is no of data
FILE *fpt;
fpt = fopen("case i.txt", "r");
fscanf(fpt, "%s", temp); // read and display the column header
printf("%s\n", temp);
while (fscanf(fpt, "%f", &t) == 1)
{
printf("%.2f\n", t);
++x; //count for number of data
if (t > 180) {
++a; //count for number of data > 180
}
if (x > 2 && a == 2) { //place where bug is expected to occur
printf("2 out of 3 in a row is greater than 180");
a=0; //reset a back to zero
x=0;
}
}
fclose(fpt);
system("pause");
}
The problem comes when I want to detect like 2 out of 3 data are beyond 180 degree Celsius. I tried some ideas like when (no. of data > 2) and (two data > 180) then generate an error message, but it will have bug as it may have two data > 180 but when 4 data are read, that means it become 2 out of 4, not 2 out of 3, is it possible to be programmed? Thank you in advance for every help.
The following is the sample data and output:
You'll need to keep a "sliding window" of 3 values indicating how many are over 180.
So one approach would be something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
char temp[20];
float t;
const int min_over = 2;
const int max_window = 3;
const int max_value = 180;
char over[max_window]; // a 1 means over, 0 otherwise
int oi = 0;
int num_values = 0;
FILE *fpt;
fpt = fopen("case i.txt", "r");
fscanf(fpt, "%s", temp); // read and display the column header
printf("%s\n", temp);
memset(over, 0, max_window);
while (fscanf(fpt, "%f", &t) == 1)
{
int num_hit, i;
printf("%.2f\n", t);
// Calculate num_hit: how many over in a window of max_window
//
over[oi] = (t > max_value) ? 1 : 0;
if (++oi >= max_window)
oi = 0;
for ( num_hit = i = 0; i < max_window; i++ ) num_hit += over[i];
// Only check for min_over/max_window after at least
// max_window values read; Reset the check
//
if ((++num_values >= max_window) && (num_hit >= min_over))
{
printf("It happened!\n");
memset(over, 0, max_window);
num_values = 0;
}
}
fclose(fpt);
system("pause");
}
Since you want a ratio of 2/3, that corresponds to min_over / max_window values.
I ran this on your commented data sample:
Temperature
190.00
190.00
170.00
It happened!
200.00
190.00
100.00
It happened!
100.00
190.00
190.00
It happened!
There are about a million billion different ways to do this, but you just need to keep track of how many samples exceed the threshold and then do whatever you want to do when you hit that mark.
Let's say, once you find your "2 out of 3" samples that exceed 180 you want to print the list and stop reading from the file:
FILE *fpt;
float t;
float samples[3] = {0}; // keep a record of 3 samples
int total = 0, i;
fpt = fopen("file1.txt", "r");
while (fscanf(fpt, "%f", &t) == 1) // read until there are no more samples
{
total = 0; // clear our counter
samples[2] = samples[1]; // toss out the old 3rd sample
samples[1] = samples[0]; // and shift them to make room for the
samples[0] = t; // one we just read
for(i = 0; i<3; i++)
if(samples[i] > 180) // if any are over 180
total++; // increment our counter
if(total == 2) { // if 2 of the 3 are over 180, we got 2 out of 3
printf("2 out of 3 samples are greater than 180!\n");
printf("1: %f\n2: %f\n3:%f\n", samples[2],samples[1],samples[0]);
break;
}
}
fclose(fpt);
It's not very efficient.. but should be pretty easy to understand.

Reading an integer and / or character without an array

I need to read an integer one by one until i read a '$', and then to determine the largest, smallest and so on. I could use a character variable and do it, but it works for numbers from 0 to 9. But how do I read integers of two or more digits and at the same time, detect a '$' - I used a char *, but I guess it is equivalent to an array, which I should not use here. Also, char holds a single number / char, hence not suitable for larger numbers. What should I do?
No arrays, no pointers, no tricky char-by-char read & convert. Just plain scanf and getchar.
#include <stdio.h>
int main()
{
int newValue=0; /* value being acquired */
int max; /* current maximum value */
int min; /* current minimum value */
int firstAcquired=0; /* boolean flag set to 1 after first acquisition */
int ch; /* used as temporary storage for the getchar() */
for(;;)
{
/* scanf returns the number of successfully acquired fields; here if it
returns 0 means that the value couldn't be acquired */
if(scanf("%d",&newValue)==0)
{
/* scanf failed, but it's guaranteed it put the offending character
back into the stream, from where we can get it */
ch=getchar();
if(ch=='$' || ch==EOF)
break;
else
/* from here to the break it's just to handle invalid input and EOF
gracefully; if you are not interested you can replace this stuff
with a random curse to the user */
{
puts("Invalid input, retry.");
/* Empty the buffer */
while((ch=getchar())!='\n' && ch!=EOF)
;
}
/* if it's EOF we exit */
if(ch==EOF)
break;
}
else
{
/* Everything went better than expected */
if(!firstAcquired || newValue>max)
max=newValue;
if(!firstAcquired || newValue<min)
min=newValue;
firstAcquired=1;
}
}
if(firstAcquired)
{
printf("The maximum value was %d\n", max);
printf("The minimum value was %d\n", min);
}
return 0;
}
In the interest of spoiling all the fun, showing off, outright overkill and darn tooting fun:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
namespace qi = boost::spirit::qi;
template <typename V>
void show_statistics(const V& data)
{
using namespace boost::spirit::karma;
std::cout << "data:\t"<< format('{' << auto_ % ", " << '}', data) << std::endl;
std::cout << "min:\t" << *std::min_element(data.begin(), data.end()) << std::endl;
std::cout << "max:\t" << *std::max_element(data.begin(), data.end()) << std::endl;
auto sum = std::accumulate(data.begin(), data.end(), 0);
std::cout << "sum:\t" << sum << std::endl;
std::cout << "avg:\t" << (1.0*sum) / data.size() << std::endl;
}
void dostats(const std::vector<int>& data) { show_statistics(data); }
int main()
{
std::cin.unsetf(std::ios::skipws);
auto f = boost::spirit::istream_iterator(std::cin);
decltype(f) l;
bool ok = qi::phrase_parse(f, l, +(+qi::int_ > "$") [ dostats ], qi::space);
if (f!=l)
std::cout << "Remaining input unparsed: " << std::string(f,l) << std::endl;
return ok? 0:255;
}
Demo:
Sample run:
sehe#natty:/tmp$ ./test2 <<< "1 2 3 4 5 $ 3 -9 0 0 0 $ 900 9000 $ unparsed trailing text"
data: {1, 2, 3, 4, 5}
min: 1
max: 5
sum: 15
avg: 3
data: {3, -9, 0, 0, 0}
min: -9
max: 3
sum: -6
avg: -1.2
data: {900, 9000}
min: 900
max: 9000
sum: 9900
avg: 4950
Remaining input unparsed: unparsed trailing text
You can use 'scanf("%s")' to read a group of characters. You can then check if the first character is a '%' and terminate if so. Otherwise, call atoi to convert to an integer. Store the largest and smallest in integer types, not character types.
Basically, the only time you have to deal with characters is when you read them in and check if it's a '$'. Otherwise, use integers all the way through.
If I'm getting what you want correctly it should be something like this:
int i = 0;
char c = getchar();
while (c != '$')
{
i = i * 10 + (c - '0');
c = getchar();
}
Hope it helped.
You can read char by char in a loop, check values and so on...
int i = 0;
char c = 0;
int size = 10;
int currentIndex = 0;
int* integers = malloc(sizeof(int) * size);
int counter = 0;
do
{
scanf("%c", &c);
if (c == ' ') // Match space, used a number separator
{
if (counter != 0 && i != 0)
{
if (currentIndex >= size)
{
size += 5;
integers = realloc(integers, size);
}
integers[currentIndex] = i;
currentIndex++;
}
counter = 0;
i = 0;
}
else if (c >= '0' && c <= '9')
{
i = (i * counter * 10) + (c - '0');
counter++;
}
}
while(c != '$');
Don't forget to free integers in the end!
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#define BUFF_SIZE 16
#define DATA_MAX_SIZE 64
int main(){
char buff[BUFF_SIZE];
int data[DATA_MAX_SIZE];
int i,value,counter = 0;
char *buffp,*p;
while(NULL!=fgets(buff,BUFF_SIZE,stdin)){
buff[BUFF_SIZE - 1]='\0';
buffp = buff;
next: while(isspace(*buffp))
++buffp;
if(*buffp == '\0')
continue;
value = strtol(buffp, &p, 0);
if(counter == DATA_MAX_SIZE){
printf("over data max size!\n");
break;
} else if(p != buffp){
data[counter++]=value;
if(*p == '\0' || *p == '\r'|| *p == '\n')
continue;
buffp = p;
goto next;
} else {
if(*p == '$')
break;
printf("format error\n");
break;
}
}
//check code
for(i=0;i<counter;++i){
printf("data[%d]=%d\n",i, data[i]);
}
return 0;
}
OUTPUT:
1 2 3
123
456
99 $
data[0]=1
data[1]=2
data[2]=3
data[3]=123
data[4]=456
data[5]=99
12345
4
$
data[0]=12345
data[1]=4

Resources