Snowflake - Read comma delimited file with blank values

Snowflake - Read comma delimited file with blank values - snowflake-cloud-data-platform

How to read a CSV file in Snowflake, file has commas between double quotes and some rows has 1/2 blank columns ?
This solution is working if there is not blank values on the row. Can you please help enhance it to accept blank values also.
create or replace function SPLIT_QUOTED_STRING(STR string)
returns array
language javascript
as
$$
var arr = STR.match(/(".*?"|[^",\s]+)(?=\s*,|\s*$)/g);
for (var i = 0; i < arr.length; i++) {
arr[i] = arr[i].replace(/['"]+/g, '')
}
return arr;
$$;
SPLIT_QUOTED_STRING('o,,,"sadasdasd",123123123,"asdasdasd.www.org,123123,link.com",0')[1]::string
--this must give a blank value!
Thanks for the help!

Apparently you want code to split CSV values in a JavaScript UDF.
Instead of regular expressions, this code does the job better:
create or replace function SPLIT_QUOTED_STRING(STR string)
returns array
language javascript
as
$$
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr[0];
}
return parseCSV(STR);
$$;
select SPLIT_QUOTED_STRING('o,,,"sadasdasd",123123123,"asdasdasd.www.org,123123,link.com",0')
JS taken from this answer: https://stackoverflow.com/a/14991797/132438

If you want to read a CSV file which separated with commas (which is the default delimiter for CSV files), and some columns are enclosed with double quotes, you can create a file format object and use the COPY command:
create or replace file format csvformat type=csv FIELD_OPTIONALLY_ENCLOSED_BY = '"';
select $1, $2, $3, $4, $5, $6, $7
from #mystage (file_format => csvformat);
+----+----+----+-----------+-----------+-----------------------------------+----+
| $1 | $2 | $3 | $4 | $5 | $6 | $7 |
+----+----+----+-----------+-----------+-----------------------------------+----+
| o | | | sadasdasd | 123123123 | asdasdasd.www.org,123123,link.com | 0 |
+----+----+----+-----------+-----------+-----------------------------------+----+
FIELD_OPTIONALLY_ENCLOSED_BY https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#type-csv

Related

How to count the frequency of the two first letters in a word from a dictionary?

I have a 143k lowcase word dictionary and I want to count the frequency of the first two letters
(ie: aa* = 14, ab* = 534, ac = 714 ... za = 65, ... zz = 0 ) and put it in a bidimensional array.
However I have no idea how to even go about iterating them without switches or a bunch of if elses I tried looking on google for a solution to this but I could only find counting amount of letters in the whole word and mostly only things in python.
I've sat here for a while thinking how could I do this and my brain keeps blocking this was what I came up with but I really don't know where to head.
int main (void) {
char *line = NULL;
size_t len = 0;
ssize_t read;
char *arr[143091];
FILE *fp = fopen("large", “r”);
if (*fp == NULL)
{
return 1;
}
int i = 0;
while ((read = getline(&line, &len, fp)) != -1)
{
arr[i] = line;
i++;
}
char c1 = 'a';
char c2 = 'a';
i = 0;
int j = 0;
while (c1 <= 'z')
{
while (arr[k][0] == c1)
{
while (arr[k][1] == c2)
{
}
c2++;
}
c1++;
}
fclose(fp);
if (line)
free(line);
return 0;
}
Am I being an idiot or am I just missing someting really basic? How can I go about this problem?
Edit: I forgot to mention that the dictionary is only lowercase and has some edge cases like just an a or an e and some words have ' (like e'erand e's) there are no accentuated latin characters and they are all accii lowercase

The code assumes that the input has one word per line without leading spaces and will count all words that start with two ASCII letters from 'a'..'z'. As the statement in the question is not fully clear, I further assume that the character encoding is ASCII or at least ASCII compatible. (The question states: "there are no accentuated latin characters and they are all accii lowercase")
If you want to include words that consist of only one letter or words that contain ', the calculation of the index values from the characters would be a bit more complicated. In this case I would add a function to calculate the index from the character value.
Also for non-ASCII letters the simple calculation of the array index would not work.
The program reads the input line by line without storing all lines, checks the input as defined above and converts the first two characters from range 'a'..'z' to index values in range 0..'z'-'a' to count the occurrence in a two-dimensional array.
#include <stdio.h>
#include <stdlib.h>
int main (void) {
char *line = NULL;
size_t len = 0;
ssize_t read;
/* Counter array, initialized with 0. The highest possible index will
* be 'z'-'a', so the size in each dimension is 1 more */
unsigned long count['z'-'a'+1]['z'-'a'+1] = {0};
FILE *fp = fopen("large", "r");
if (fp == NULL)
{
return 1;
}
while ((read = getline(&line, &len, fp)) != -1)
{
/* ignore short input */
if(read >= 2)
{
/* ignore other characters */
if((line[0] >= 'a') && (line[0] <= 'z') &&
(line[1] >= 'a') && (line[1] <= 'z'))
{
/* convert first 2 characters to array index range and count */
count[line[0]-'a'][line[1]-'a']++;
}
}
}
fclose(fp);
if (line)
free(line);
/* example output */
for(int i = 'a'-'a'; i <= 'z'-'a'; i++)
{
for(int j = 'a'-'a'; j <= 'z'-'a'; j++)
{
/* only print combinations that actually occurred */
if(count[i][j] > 0)
{
printf("%c%c %lu\n", i+'a', j+'a', count[i][j]);
}
}
}
return 0;
}
The example input
foo
a
foobar
bar
baz
fish
ford
results in
ba 2
fi 1
fo 3

The idea is to have a two-dimensional array, each dimension holding one of the first two characters of each line. The clever bit is that in C, even a string whose length as reported by strlen() to be 1 has two char's - the character and the trailing 0 at the end, so you don't need to special-case cases like "a". Its frequency is tracked in counts['a'][0].
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
/* Reads input on stdin, outputs to stdout. Using a multibyte
* character encoding will likely cause unusual output; don't do
* that. But it will work with encodings other than ASCII. Also handles
* mixed-cased input.
*/
int main(void) {
int *counts[UCHAR_MAX + 1] = { NULL };
char *line = NULL;
size_t bufsize = 0;
ssize_t len;
// Populate the frequency counts
while ((len = getline(&line, &bufsize, stdin)) > 0) {
if (line[len - 1] == '\n') { // Get rid of newline
line[len - 1] = 0;
}
if (line[0] == 0) { // Skip empty lines
continue;
}
unsigned fc = line[0];
unsigned sc = line[1];
if (!counts[fc]) { // Allocate the second dimension if needed
counts[fc] = calloc(UCHAR_MAX + 1, sizeof(int));
}
counts[fc][sc] += 1;
}
// Print out the frequency table.
for (int fc = 1; fc <= UCHAR_MAX; fc += 1) {
if (!counts[fc]) { // Skip unused first characters
continue;
}
if (counts[fc][0]) { // Single-character line count
printf("%c\t%d\n", fc, counts[fc][0]);
}
for (int sc = 1; sc <= UCHAR_MAX; sc += 1) {
if (counts[fc][sc]) {
printf("%c%c\t%d\n", fc, sc, counts[fc][sc]);
}
}
}
return 0;
}
Example:
$ perl -Ci -ne 'print if /^[[:ascii:]]+$/ && /^[[:lower:]]+$/' /usr/share/dict/american-english-large | ./freqs
a 1
aa 6
ab 483
ac 651
ad 497
ae 112
af 198
ag 235
ah 7
ai 161
etc.

How to count the frequency of the two first letters in a word from a dictionary?
Use a simple state machine to read one character at a time, detect when the character is first 2 letters of a word, then increment a 26x26 table. Words do not need to be on seperate lines. Any word length is allowed.
unsigned long long frequency[26][26] = { 0 }; // Set all to 0
FILE *fp = fopen("large", "r");
...
int ch;
// Below 2 objects are the state machine
int word[2];
int word_length = 0;
while ((ch = fgetc(fp)) != EOF) {
if (isalpha(ch)) {
if (word_length < 2) {
word[word_length++] = tolower(ch);
if (word_length == 2) { // 2nd letter just arrived
assert(word[0] >= 'a' && word[0] <= 'z'); // Note 1
assert(word[1] >= 'a' && word[1] <= 'z');
frequency[word[0] - 'a'][word[1] - 'a']++;
}
}
} else {
word_length = 0; // Make ready for a new word
}
}
for (int L0 = 'a'; L0 <= 'z'; L0++);
for (int L1 = 'a'; L1 <= 'z'; L1++);
unsigned long long sum = frequency[L0 - 'a'][L1 - 'a'];
if (sum) {
printf("%c%c %llu\n", L0, L1, sum);
...
Note 1, in locales that have more than a-z letters, like á, é, í, ó, ú, ü, ñ, additional needed. A simple approach is to use a frequency[256][256] - somewhat memory hoggish.

There is no need to read the entire dictionary into memory, or even to buffer lines. The dictionary consists of words, one per line. This means it has this structure:
"aardvark\nabacus\n"
The first two characters of the file are the first digraph. The other interesting digraphs are all characters which immediately follow a newline.
This can be read by a state machine, which we can code into a loop like this. Suppose f is the FILE * handle to the stream reading from the dictionary file:
for (;;) {
/* Read two characters from the dictionary file. */
int ch0 = getc(f);
int ch1 = getc(f);
/* Is the ch0 newline? That means we read an empty line,
and one character after that. So, let us move that character
into ch0, and read another ch1. Keep doing this until
ch0 is not a newline, and bail at EOF. */
while (ch0 == '\n' && ch1 != EOF) {
ch0 = ch1;
ch1 = getc(f);
}
/* After the above, if we have EOF, we are done: bail the loop */
if (ch0 == EOF || ch1 == EOF)
break;
/* We know that ch0 isn't newline. But ch1 could be newline;
i.e. we found a one-letter-long dictionary entry. We don't
process those, only two or more letters. */
if (ch1 != '\n') {
/* Here we put the code which looks up the ch0-ch1 pair
in our frequency table and increments the count. */
}
/* Now drop characters until the end of the line. If ch1
is newline, we are already there. If not, let's just use
ch1 for reading more characters until we get a newline. */
while (ch1 != '\n' && ch1 != EOF)
ch = getc(f);
/* Watch out for EOF in the middle of a line that isn't
newline-terminated. */
if (ch == EOF)
break;
}
I would do this with a state machine:
enum { begin, have_ch0, scan_eol } state = begin;
int ch0, ch1;
for (;;) {
int c = getc(f);
if (c == EOF)
break;
switch (state) {
case begin:
/* stay in begin state if newline seen */
if (c != \n') {
/* otherwise accumulate ch0,
and switch to have_ch0 state */
ch0 = c;
state = have_ch0;
}
break;
case have_ch0:
if (c == '\n') {
/* newline in ch0 state: back to begin */
state = begin;
} else {
/* we got a second character! */
ch1 = c;
/* code for processing ch0 and ch1 goes here! */
state = scan_eol; /* switch to scanning for EOL. */
}
break;
case scan_eol:
if (c == '\n') {
/* We got the newline we are looking for; go
to begin state. */
state = begin;
}
break;
}
}
Now we have a tidy loop around a single call to getc. EOF is checked in one place where we bail out of the loop. The state machine recognizes the situation when we have the first two characters of a line which is at least two characters long; there is a single place in the code where to put the logic for dealing with the two characters.
We are not allocating any buffers; we are not malloc-ing lines, so there is nothing to free. There is no limit on the dictionary size we can scan (just we have to watch for overflowing frequency counters).

You are started in the right direction. You do need a 2D array 27 x 27 for a single case (e.g. lowercase or uppercase), not including digits. To handle digits, just add another 11 x 11 array and map 2-digit frequencies there. The reason you can't use a flat 1D array and map to it without serious indexing gymnastics is that the ASCII sum of "ab" and "ba" would be the same.
The 2D array solves that problem allowing the map of the 1st character ASCII value to the first index, and the map of the ASCII of the 2nd character to the 2nd index or after in that row.
An easy way to think of it is to just take a lowercase example. Let's look at the word "accent". You have your 2D array:
+---+---+---+---+---+---+
| a | a | b | c | d | e | ...
+---+---+---+---+---+---+
| b | a | b | c | d | e | ...
+---+---+---+---+---+---+
| c | a | b | c | d | e | ...
+---+---+---+---+---+---+
...
The first column tracks the first letter and then the remaining columns (the next 'a' - 'z' characters) track the 2nd character that follows the first character. (you can do this will an array of struct holding the 1st char and a 26 char array as well -- up to you) This way, you remove ambiguity of which combination "ab" or "ba".
Now note -- you do not actually need a 27 x 27 arrays with the 1st column repeated. Recall, by mapping the ASCII value to the first index, it designates the first character associated with the row on its own, e.g. row[0][..] indicates the first character was 'a'. So a 26 x 26 array is fine (and the same for digits). So you simply need:
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
...
So the remainder of the approach is simple. Open the file, read the word into a buffer, validate there is a 1st character (e.g. not the nul-character), then validate the 2nd character (continue to get the next word if either validation fails). Convert both to lowercase (or add the additional arrays if tracking both cases -- that gets ugly). Now just map the ASCII value for each character to an index in the array, e.g.
int ltrfreq[ALPHABET][ALPHABET] = {{0}};
...
while (fgets (buf, SZBUF, fp)) { /* read each line into buf */
int ch1 = *buf, ch2; /* initialize ch1 with 1st char */
if (!ch1 || !isalpha(ch1)) /* validate 1st char or get next word */
continue;
ch2 = buf[1]; /* assign 2nd char */
if (!ch1 || !isalpha(ch2)) /* validate 2nd char or get next word */
continue;
ch1 = tolower (ch1); /* convert to lower to eliminate case */
ch2 = tolower (ch2);
ltrfreq[ch1-'a'][ch2-'a']++; /* map ASCII to index, increment */
}
With our example word "accent", that would increment the array element [0][2], so that corresponds to row 0 and column 2 for "ac" in:
+---+---+---+---+---+
| a | b | c | d | e | ...
+---+---+---+---+---+
... ^ [0][2]
Where you increment the value at that index. So ltrfreq[0][2]++ now holds the value 1 for the combination "ac" having been seen once. When encountered again, the element would be incremented to 2 and so on... Since the value is incremented it is imperative the array be initialized all zero when declared.
When you output the results, you just have to remember to subtract 1 from the j index when mapping from index back to ASCII, e.g.
for (int i = 0; i < ALPHABET; i++) /* loop over all 1st char index */
for (int j = 0; j < ALPHABET; j++) /* loop over all 2nd char index */
if (ltrfreq[i][j]) /* map i, j back to ASCII, output freq */
printf ("%c%c = %d\n", i + 'a', j + 'a', ltrfreq[i][j]);
That's it. Putting it altogether in an example that takes the filename to read as the first argument to the program (or reads from stdin if no argument is given), you would have:
#include <stdio.h>
#include <ctype.h>
#define ALPHABET 26
#define SZBUF 1024
int main (int argc, char **argv) {
char buf[SZBUF] = "";
int ltrfreq[ALPHABET][ALPHABET] = {{0}};
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, SZBUF, fp)) { /* read each line into buf */
int ch1 = *buf, ch2; /* initialize ch1 with 1st char */
if (!ch1 || !isalpha(ch1)) /* validate 1st char or get next word */
continue;
ch2 = buf[1]; /* assign 2nd char */
if (!ch1 || !isalpha(ch2)) /* validate 2nd char or get next word */
continue;
ch1 = tolower (ch1); /* convert to lower to eliminate case */
ch2 = tolower (ch2);
ltrfreq[ch1-'a'][ch2-'a']++; /* map ASCII to index, increment */
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
for (int i = 0; i < ALPHABET; i++) /* loop over all 1st char index */
for (int j = 0; j < ALPHABET; j++) /* loop over all 2nd char index */
if (ltrfreq[i][j]) /* map i, j back to ASCII, output freq */
printf ("%c%c = %d\n", i + 'a', j + 'a', ltrfreq[i][j]);
}
Example Input Dictionary
In the file dat/ltrfreq2.txt:
$ cat dat/ltrfreq2.txt
My
dog
has
fleas
and
my
cat
has
none
lucky
cat!
Example Use/Output
$ ./bin/ltrfreq2 dat/ltrfreq2.txt
an = 1
ca = 2
do = 1
fl = 1
ha = 2
lu = 1
my = 2
no = 1
Where both "cat" words accurately account for ca = 2, both "has" for ha = 2 and "My" and "my" for my = 2. The rest are just the 2 character prefixes for words that appear once in the dictionary.
Or with the entire 307993 words dictionary that comes with SuSE, timed to show the efficiency of the approach (all within 15 ms):
$ time ./bin/ltrfreq2 /var/lib/dict/words
aa = 40
ab = 990
ac = 1391
ad = 1032
ae = 338
af = 411
ag = 608
ah = 68
ai = 369
aj = 18
ak = 70
al = 2029
...
zn = 2
zo = 434
zr = 2
zs = 2
zu = 57
zw = 25
zy = 135
zz = 1
real 0m0.015s
user 0m0.015s
sys 0m0.001s
A bit about the array type. Since you have 143K words, that rules out using a short or unsigned short type -- just in case you have a bad dictionary with all 143K words being "aardvark".... The int type is more than capable of handling all words -- even if you have a bad dictionary containing only "aardvark".
Look things over and let me know if this is what you need, if not let me know where I misunderstood. Also, let me know if you have further questions.

Such job is more suitable for languages like Python, Perl, Ruby etc. instead of C. I suggest at least trying C++.
If you don't have to write it in C, here is my Python version: (since you didn't mention it in the question - are you working on an embedded system or something where C/ASM are the only options?)
FILENAME = '/etc/dictionaries-common/words'
with open(FILENAME) as f:
flattened = [ line[:2] for line in f ]
dic = {
key: flattened.count(key)
for key in sorted(frozenset(flattened))
}
for k, v in dic.items():
print(f'{k} = {v}')
Outputs:
A' = 1
AM = 2
AO = 2
AW = 2
Aa = 6
Ab = 44
Ac = 37
Ad = 68
Ae = 18
Af = 22
Ag = 36
Ah = 12
Ai = 17
Aj = 2
Ak = 14
Al = 284
Am = 91
An = 223
Ap = 44
Aq = 13
Ar = 185
As = 88
At = 56
Au = 81
Av = 28
Ax = 2
... ...

Parsing a CSV File Problems

I tried this to parse data given in a csv file into ID, AGE, and GPA fields in a "data" file, but I don't think I'm doing this right (when I tried printing the data, its printing weird numbers). What am I doing wrong?
char data[1000];
FILE *x = fopen("database.csv","rt");
char NAME[300];
int ID[300],AGE[300],GPA[300];
int i,j;
i = 0;
while(!feof(x)) {
fgets(data,999,x);
for (j = 0; j < 300 && data[i] != ','; j++, i++) {
ID[j] = data[i];
i++;
}
for (j = 0; j < 300 && data[i] != ','; j++, i++) {
NAME[j] = data[i];
i++;
}
for (j = 0; j < 300 && ( data[i] != '\0' || data[i] != '\r' || data[i] != data[i] != '\n'); j++, i++) {
GPA[j] = data[i];
}
}

First of all: for what you're doing, you probably want to look carefully at the function strtok and the atoi macro. But given the code you posted, that's perhaps still a bit too advanced, so I'm taking a longer way here.
Supposing that the line is something like
172,924,1182
then you need to parse those numbers. The number 172 is actually represented by two or four bytes in memory, in a very different format, and the byte "0" is nothing like the number 0. What you'll read is the ASCII code, which is 48 in decimal, or 0x30 in hex.
If you take the ASCII value of a single digit and subtract 48, you will get a number, because fortunately the numbers are stored in digit order, so "0" is 48, "1" is 49 and so on.
But you still have the problem of converting the three digits 1 7 2 into 172.
So once you have 'data':
(I have added commented code to deal with a unquoted, unescaped text field inside the CSV, since in your question you mention an AGE field, but then you seem to want to use a NAME field. The case when the text field is quoted or escaped is another can of worms entirely)
size_t i = 0;
int number = 0;
int c;
int field = 0; // Fields start at 0 (ID).
// size_t x = 0;
// A for loop that never ends until we issue a "break"
for(;;) {
c = data[i++];
// What character did we just read?
if ((',' == c) || (0x0c == c) || (0x0a == c) || (0x00 == c)) {
// We have completed read of a number field. Which field was it?
switch(field) {
case 0: ID[j] = number; break;
case 1: AGE[j] = number; break;
// case 1: NAME[j][x] = 0; break; // we have already read in NAME, but we need the ASCIIZ string terminator.
case 2: GPA[j] = number; break;
}
// Are we at the end of line?
if ((0x0a == c) || (0x0c == c)) {
// Yes, break the cycle and read the next line
break;
}
// Read the next field. Reinitialize number.
field++;
number = 0;
// x = 0; // if we had another text field
continue;
}
// Each time we get a digit, the old value of number is shifted one order of magnitude, and c gets added. This is called Horner's algorithm:
// Number Read You get
// 0 "1" 0*10+1 = 1
// 1 "7" 1*10+7 = 17
// 17 "2" 17*10+2 = 172
// 172 "," Finished. Store 172 in the appropriate place.
if (c >= '0' && c <= '9') {
number = number * 10 + (c - '0');
}
/*
switch (field) {
case 1:
NAME[j][x++] = c;
break;
}
*/
}

Reading in an array for maze file in C with an uneven size of cube and extra characters

I'm trying to figure out how to properly read in and store a maze from a .txt document in C. The max size of this maze will be 40x40 "cubes". After reading it in, I will be needing to solve it, by placing the path from the left hand corner to bottom right with a *. I'm used to using 2D Arrays, but this problem keeps tripping me up as I don't understand how to keep track of the rows and cols if they are not exactly even, or how I would even accurately print a * in the middle of each "cube" after I have solved it. I've read other examples of mazes that are composed of 1s and 0s, or all even # for the walls, which makes it easy to read in and keep track of, but not input like this. There will be other mazes on the same text file that I will need to read in after solving the first maze that will be separated by a double space. Below is an example of one of the mazes :
+---+---+---+---+---+
| | |
+---+ + + + +
| | | | | |
+ + + + +---+
| | | | |
+ + + +---+ +
| | | |
+ +---+---+ + +
| | |
+---+---+---+---+---+
Here is some of my code I am making so far to error check and read in characters. In it, I am attempting to initialize an array of 120x120, read in the current char, and convert these characters to either a -1 or 0 to correspond to a wall or empty space. :
/* Read in a grid as parameter of max 120x120 to account for '+' and'-' */
int readLine(int grid2D[120][120])
{
int row = 0;
int col = 0;
int isNewLine = TRUE;
/* Loop through while getchar does not equal EOF */
while ((c = getchar()) != EOF)
{
/* Check for foreign characters, return FALSE if found */
if ((c != '+') || (c != '-') || (c != '|') || (c != ' '))
{
/* If c = \n , avoid tripping error, and do nothing */
if(c == '\n'){}
else
errorFree = FALSE;
}
/* If character is a '+' '-' or '|', it is a wall, set to -1 to
use int's for wall tracking */
else if (row%2 == 0)
{
if(c == '|')
{
grid2D[row][col] = -1;
col++;
}
}
else if((c == '+') || (c == '-'))
{
grid2D[row][col] = -1;
col++;
}
else
{
if(c == '\n')
{
col = 0;
row++;
}
}
isNewLine = TRUE;
return isNewLine;
}
Any guidance would be greatly appreciated, I am not sure if the approach I am doing is correct. I believe I am currently error checking correctly, however I am struggling to understand how I should be keep track of the each "cube" since the chars for each "cube" are not even, they are more so dimensioned as 5x1 cubes (a +---+ for one side and a | for the other)

In response to your question and question in the comment, determining the row and column size is pretty straight forward. When you read a line of the array from the file with fgets, you can use strlen() to determine the number of characters (but note, it also contains the '\n' character - so you will need to subtract one - which you can do in combination with trimming the '\n' from the end)
Once you have read the first line and accounted for the '\n', set a variable that holds the number of characters (columns) in your array. Since you know your array is a cube, you can then compare the first-line length with the length of every other line read to validate all rows have the same number of columns.
As you are looping and handling the input of each line, you simply keep a row counter which when you are done with your read will hold the number of rows in your array.
There are two ways you can handle storage for your array. You can either declare an array large enough to hold your largest anticipated maze (while keeping it small enough to fit on the stack 256x512 is safe on both Linux and Windoze) or you can dynamically allocate storage for your columns and rows, using realloc() to allocate additional storage as required. (There you can handle maze sizes up to the memory limit of your computer - but it does add complexity)
Your "confusion" over my array needing to be, e.g. 11x21 is understandable. It all stems from the fact that characters on a terminal are roughly two-times taller than they are wide. So to print a "cube" of characters, you need roughly twice as many columns as you do rows. That isn't a problem at all. If you code your read of columns and rows properly and have variables tracking the number of columns and rows -- then the difference becomes nothing more than numbers that your code keeps track of in a couple of variables.
The following is a short example to address your stumbling blocks on the read of an unknown number or rows and columns up to a fixed maximum. (rather than dynamically allocating and reallocating -- which we can leave for later). To do so, we #define a constant for the maximum number of columns, and then knowing we need 1/2 that number of rows, #define a constant for that number of rows, e.g.
#include <stdio.h>
#include <string.h>
#define MAXC 512 /* declare consts for max NxN you can handle */
#define MAXR (MAXC + 1) / 2
Then it is a simple matter of declaring your variables to track the current row and col and the total number of rows and columns (nrow, ncol) along with declaring an array a[MAXR][MAXC] = {""}; to hold the maze. You can then either open your file if a filename is given as the 1st argument (or read from stdin by default if no argument is given). In either case, you can validate you have a stream open for reading, e.g.
size_t row = 0, col = 0, nrow = 0, ncol = 0;
char a[MAXR][MAXC+1] = {""}; /* delcare and initialize array */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
With your file stream open for reading, now it is just a matter of reading all rows of data in the file. Since you expect all lines to have an even number of characters so that your maze is actually a cube, you can save the number of characters in the first line (after trimming the '\n') and use that to compare against the number of characters in every other line to validate you have a cube. As you read lines, you also need to protect your array bounds so you don't attempt to store more lines in your array than you have rows to hold, so a simple check of row < MAXR combined with your fgets (a[row], MAXC, fp) will impose that limit, e.g.
while (row < MAXR && fgets (a[row], MAXC, fp)) {
size_t len = strlen (a[row]); /* get length of row */
if (len && a[row][len-1] == '\n') /* validate it fits in array */
a[row][--len] = 0; /* remove trailing '\n' char from end */
else if (len == MAXC) {
fprintf (stderr, "error: row exceeds %d chars.\n", MAXC);
return 1;
}
if (!row) /* if 1st row - set expected ncol for each row */
ncol = len;
if (ncol != len) { /* validate all other rows against 1st */
fprintf (stderr, "error: unequal columns (%lu) on row (%lu)\n",
len, row);
return 1;
}
/* your code goes here - example just outputs array */
for (col = 0; col < ncol; col++)
putchar (a[row][col]);
putchar ('\n');
row++; /* advance row counter when done processing row */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
nrow = row; /* save the total number of rows */
You now have all the rows and columns of stored and you have your nrow and ncol values set giving you your nrow x ncol array. The path logic I will leave to you, but I did want to provide an example of how to replace the ' ' with '*' in your path. The following does just that for every possible path character imposing the constrain that each '*' have an adjacent space (you can adjust as needed). Here we just loop 0 -> nrow-1 and nest a loop from 0 -> ncol-1 to loop over each character in the array.
The only wrinkle you have to pay attention to when checking adjacent cells in a row is that you must insure you are not on the left-edge of the maze when you check the column to the left and not on the right-edge of the maze when checking the column to the right (accessing elements beyond the bounds of your array will invoke Undefined Behavior)
You handle the edges checks as simply additions to the conditionals insider your if (...) statement, e.g.
/* you can make multiple passes over the array to determine your path.
* below is just an example of replacing the spaces in the path with
* asterisks.
*/
puts ("\nreplacing path spaces with asterisks\n");
for (row = 0; row < nrow; row++) {
for (col = 0; col < ncol; col++) {
/* if adjacents and current ' ', replace with '*' */
if (col && col < ncol - 1 && /* col > 0 && col < ncol-1 */
/* next checks adjacent and current all ' ' */
a[row][col-1] == ' ' && a[row][col] == ' ' &&
a[row][col+1] == ' ')
a[row][col] = '*'; /* if conditions met, set element '*' */
putchar (a[row][col]);
}
putchar ('\n');
}
Putting all the pieces together in a short example to read any maze up to 512 characters wide, you could do something like the following:
#include <stdio.h>
#include <string.h>
#define MAXC 512 /* declare consts for max NxN you can handle */
#define MAXR (MAXC + 1) / 2
int main (int argc, char **argv) {
size_t row = 0, col = 0, nrow = 0, ncol = 0;
char a[MAXR][MAXC+1] = {""}; /* delcare and initialize array */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (row < MAXR && fgets (a[row], MAXC, fp)) {
size_t len = strlen (a[row]); /* get length of row */
if (len && a[row][len-1] == '\n') /* validate it fits in array */
a[row][--len] = 0; /* remove trailing '\n' char from end */
else if (len == MAXC) {
fprintf (stderr, "error: row exceeds %d chars.\n", MAXC);
return 1;
}
if (!row) /* if 1st row - set expected ncol for each row */
ncol = len;
if (ncol != len) { /* validate all other rows against 1st */
fprintf (stderr, "error: unequal columns (%lu) on row (%lu)\n",
len, row);
return 1;
}
/* your code goes here - example just outputs array */
for (col = 0; col < ncol; col++)
putchar (a[row][col]);
putchar ('\n');
row++; /* advance row counter when done processing row */
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
nrow = row; /* save the total number of rows */
/* you can make multiple passes over the array to determine your path.
* below is just an example of replacing the spaces in the path with
* asterisks.
*/
puts ("\nreplacing path spaces with asterisks\n");
for (row = 0; row < nrow; row++) {
for (col = 0; col < ncol; col++) {
/* if adjacents and current ' ', replace with '*' */
if (col && col < ncol - 1 && /* col > 0 && col < ncol-1 */
/* next checks adjacent and current all ' ' */
a[row][col-1] == ' ' && a[row][col] == ' ' &&
a[row][col+1] == ' ')
a[row][col] = '*'; /* if conditions met, set element '*' */
putchar (a[row][col]);
}
putchar ('\n');
}
return 0;
}
Example Use/Output
As indicated, the code simply reads and outputs the original maze and then makes a second pass over the maze outputting it with the path filled with '*'
$ ./bin/array2dread <dat/arrmaze.txt
+---+---+---+---+---+
| | |
+---+ + + + +
| | | | | |
+ + + + +---+
| | | | |
+ + + +---+ +
| | | |
+ +---+---+ + +
| | |
+---+---+---+---+---+
replacing path spaces with asterisks
+---+---+---+---+---+
| * * * | * * * * * |
+---+ * + * + * + * +
| * | * | * | * | * |
+ * + * + * + * +---+
| * | * | * | * * * |
+ * + * + * +---+ * +
| * * * | * | * * * |
+ * +---+---+ * + * +
| * * * * * * * | * |
+---+---+---+---+---+
Look things over and let me know if you have further questions.

Replacing characters within a string in AngularJS

Although I've seen similar question's nothing quite answers why this doesn't work, and I'm unaware of an alternative.
I'm making a very simple calculator and when pulling the expression from the string I need to replace symbols such as '×' and '÷' with operators recognized by the eval.
Currently I'm trying to work through the string one character at a time and copy it into a new string, replacing where necessary. It seems that none of the if statements checking for characters in the string are ever called and I dont know why.
for (var i = 0; i < (expressionPre.length) ; i++) {
alert(expressionPre[i]);
if (expressionPre[i] == "÷") {
expressionPost += "/";
} else if (expressionPre[i] === '×') {
expressionPost += "*";
alert("Finally!");
} else if (expressionPre[i] == "−") {
expressionPost += "-";
} else if (expressionPre[i] % 1 == 0) {
expressionPost += expressionPre[i];
}
alert(expressionPost[i]);
}

as #beaver say, you should use the replace function directly.
this is a function who replace all occurence of text with another one
function tools_replaceAll(str, find, replace) {
return str.replace(new RegExp(find, 'g'), replace);
}
var str = "(1 ÷ 2 ÷ 2) × 3 × 3 − 4− 4 + 5 + 5";
str = tools_replaceAll(str, "÷" , "/" ) ;
str = tools_replaceAll(str, "×" , "*" ) ;
str = tools_replaceAll(str, "-" , "-" ) ;
alert( str ) ;

csv line parsing, printing extra line at end

I have been supplied a CSV line parser to use in a program, and it seems to have a bug where if there is a blank line at the end of the text file it prints out an empty line, like so
My input is this
test, does, this, work
1, second, line, same
My output looks like this. Not only does it cut off the last word, but it prints out the empty line.
test = 1
does = second
this = line
work = same
test =
What it's supposed to do is match every word with the coinciding one in the top line, which it does, but I don't know where the last "test = " is coming from. Here's the code I was supplied, any ideas about what's wrong with it would be a huge help. Thanks.
/*
* Just an array of characters representing a single filed.
*/
typedef char f_string[MAX_CHARS+1] ; /* string for each field */
/*
* A parsed CSV line, with the number of fields and upto MAX_FIELDS themselves.
*/
typedef struct {
int nfields ; /* 0 => end of file */
f_string field[MAX_FIELDS] ; /* array of strings for fields */
} csv_line ;
/*
* Returns true iff the character 'ch' ends a field. That is, ch is end of file,
* a comma, or a newline.
*/
bool is_end_of_field(char ch) {
return (ch == ',') || (ch == '\n') || (ch == EOF) ;
}
/*
* Return the minimum of two integers.
*/
int min(int x, int y) {
return x < y ? x : y ;
}
/*
* Read the next field from standard input. Returns the value of getchar() that
* stopped (terminated) the field.
*/
int get_field(f_string field) {
/**BEGIN_SOLN**/
int i ;
int next_char ;
next_char = getchar() ;
for ( i = 0 ; ! is_end_of_field(next_char) ; ++i ) {
field[i] = next_char ;
next_char = getchar() ;
}
field[i] = '\0' ;
return next_char ;
/**END_SOLN**/
}
/*
* Read in a CSV line. No error checking is done on the number of fields or
* the size of any one field.
* On return, the fields have been filled in (and properly NUL-terminated), and
* nfields is the count of the number of valid fields.
* nfields == 0 means end of file was encountered.
*/
csv_line get_line() {
/**BEGIN_SOLN**/
csv_line line ;
int fi = 0 ; /* index of current field in line */
int stop_ch ; /* character that terminated the last field */
stop_ch = get_field(line.field[fi++]) ;
while ( stop_ch == ',' ) {
stop_ch = get_field(line.field[fi++]) ;
}
line.nfields = (stop_ch == EOF) ? 0 : fi ;
return line ;
/**END_SOLN**/
}
/*
* Print a CSV line, associating the header fields with the
* data line fields.
* The minimum of the number of fields in the header and the data
* determines how many fields are printed.
*/
void print_csv(csv_line header, csv_line data) {
/**BEGIN_SOLN**/
int i ;
int nfields = min(header.nfields, data.nfields) ;
for ( i = 0 ; i < nfields ; ++i ) {
printf("%s = %s\n", header.field[i], data.field[i]) ;
}
/**END_SOLN**/
}
/*
* Driver - read a CSV line for the header then read and print data lines
* until end of file.
*/
int main() {
csv_line header ;
csv_line current ;
header = get_line() ;
current = get_line() ;
while ( current.nfields > 0 ) {
print_csv(header, current) ;
current = get_line() ;
}
return 0 ;
}

bool is_end_of_field(int ch) {
return (ch == ',') || (ch == '\n') || (ch == EOF) ;
}
bool get_field(f_string field){
int i ;
int next_char ;
for ( i = 0 ; ! is_end_of_field(next_char=getchar()) ; ++i ) {
field[i] = next_char ;
}
field[i] = '\0';
return (next_char == ',')? true : false;
}
csv_line get_line() {
csv_line line ;
int fi = 0;
while(get_field(line.field[fi++]))
;
line.nfields = line.field[fi-1][0] ? fi : 0;
return line ;
}

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Snowflake - Read comma delimited file with blank values - snowflake-cloud-data-platform

Related

How to count the frequency of the two first letters in a word from a dictionary?

Parsing a CSV File Problems

Reading in an array for maze file in C with an uneven size of cube and extra characters

Replacing characters within a string in AngularJS

csv line parsing, printing extra line at end

Categories

Resources