Related
This is the code I made so far. I apologize if my buffer sizes are an overkill.
The idea is to read the entire configuration file (in this example, it's file.conf), and for now we assume it exists. I'll add error checking later.
Once the file is read into stack space, then the getcfg() function searches the configuration data for the specified name, and if it's found, returns the corresponding value. My function works when the configuration file contains leading spaces before names or values; such spaces are ignored.
Say this is my configuration file:
something=data
apples=oranges
fruit=banana
animals= cats
fried =chicken
My code will work correctly with the first four entries of the config file. for example, if I use "something" as the name, then "data" will be returned.
The last item won't work as of yet because of the trailing spaces after "fried" and before the =. I want to be able to have my function automatically remove those spaces, too, especially in case an option format such as
somethingelse = items
begins to be used. (Note the spaces on both sides of the = sign.)
What can I do to make a less CPU-intensive version of my program that also detects and removes trailing spaces from the name and value when processing the name and values?
Here's my current code:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
int getcfg(char* buf, char *name, char *val) {
int fl = 0, n = 0;
char cfg[1][10000], *p = buf;
memset(cfg, 0, sizeof(cfg));
while (*p) {
if (*p == '\n') {
if (strcmp(cfg[0], name) == 0) {
strcpy(val, cfg[1]);
return 1;
}
memset(cfg, 0, sizeof(cfg));
n = 0;
fl = 0;
} else {
if (*p == '=') {
n = 0;
fl = 1;
} else {
if (n != 0 || *p != ' ') {
cfg[fl][n] = *p;
n++;
}
}
}
p++;
}
return 0;
}
int main() {
char val[10000], buf[100000]; //val=value of config item, buf=buffer for entire config file ( > 100KB config file is nuts)
memset(buf, 0, sizeof(buf));
memset(val, 0, sizeof(val));
int h = open("file.conf", O_RDONLY);
if (read(h, buf, sizeof(buf)) < 1) {
printf("Can't read\n");
}
close(h);
printf("Value stat = %d ", getcfg(buf, "Item", val));
printf("Result = '%s'\n", val);
return 0;
}
Behold is a small (~15 lines) sscanf-based read_params() function which does the job. As a bonus, it understands the comments and complains about erroneous lines (if any):
$ cat config_file.c
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/errno.h>
#define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof (a)[0]))
enum { MAX_LEN=128 };
struct param {
char name[MAX_LEN];
char value[MAX_LEN];
};
void strtrim(char *s)
{
char *p = s + strlen(s);
while (--p >= s && isspace(*p))
*p = '\0';
}
int read_params(FILE *in, struct param *p, int max_params)
{
int ln, n=0;
char s[MAX_LEN];
for (ln=1; max_params > 0 && fgets(s, MAX_LEN, in); ln++) {
if (sscanf(s, " %[#\n\r]", p->name)) /* emty line or comment */
continue;
if (sscanf(s, " %[a-z_A-Z0-9] = %[^#\n\r]",
p->name, p->value) < 2) {
fprintf(stderr, "error at line %d: %s\n", ln, s);
return -1;
}
strtrim(p->value);
printf("%d: name='%s' value='%s'\n", ln, p->name, p->value);
p++, max_params--, n++;
}
return n;
}
int main(int argc, char *argv[])
{
FILE *f;
struct param p[32];
f = argc == 1 ? stdin : fopen(argv[1], "r");
if (f == NULL) {
fprintf(stderr, "failed to open `%s': %s\n", argv[1],
strerror(errno));
return 1;
}
if (read_params(f, p, ARRAY_SIZE(p)) < 0)
return 1;
return 0;
}
Let's see how it works (quotes mark the beginning and the end of each line for clarity):
$ cat bb | sed -e "s/^/'/" -e "s/$/'/" | cat -n
1 'msg = Hello World! '
2 'p1=v1'
3 ' p2=v2 # comment'
4 ' '
5 'P_3 =v3'
6 'p4= v4#comment'
7 ' P5 = v5 '
8 ' # comment'
9 'p6 ='
$ ./config_file bb
1: name='msg' value='Hello World!'
2: name='p1' value='v1'
3: name='p2' value='v2'
5: name='P_3' value='v3'
6: name='p4' value='v4'
7: name='P5' value='v5'
error at line 9: p6 =
Note: as an additional bonus, the value can be anything, except #\n\r chars, including spaces, as can be seen above with the 'Hello World!' example. If it's not what needed, add space and tab into the exception list at the second sscanf() for the value (or specify accepted characters there instead) and drop strtrim() function.
I'll provide a straight-forward version, with everything being done in main and no key:value saving - the function only recognizes where they are and print them. I used the input file you gave and added one more line in the end as something = more_data.
This version of the parser does not recognize multiple data itens (itens separated by spaces in the data fields, you'll have to figure it out as an exercise).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(void)
{
int fd = open("file.conf", O_RDONLY, 0);
int i = 0;
char kv[100];
char c;
while (read(fd,&c,1) == 1) {
/* ignoring spaces and tabs */
if (c == '\t' || c == ' ') continue;
else if (c == '=') {
/* finished reading a key */
kv[i] = 0x0;
printf("key found [%s] ", kv);
i = 0;
continue;
} else if (c == '\n') {
/* finished reading a value */
kv[i] = 0x0;
printf(" with data [%s]\n", kv);
i = 0;
continue;
}
kv[i++] = c;
}
close(fd);
return 0;
}
And the output is:
key found [something] with data [data]
key found [apples] with data [oranges]
key found [fruit] with data [banana]
key found [animals] with data [cats]
key found [fried] with data [chicken]
key found [something] with data [more_data]
Explanation
while (read(fd,&c,1) == 1): reads one character at a time from the file.
if (c == '\t' || c == ' ') continue;: this is responsible for ignoring the white-spaces and tabs wherever they are.
else if (c == '='): If the program finds a = character, it concludes that what it just read was a key and treats it. What's inside that if should be easy to understand.
else if (c == '\n'): Then it uses a new-line character to recognize the end of a value. Again, what's inside the if is not hard to understand.
kv[i++] = c;: This is where we save the char value into the buffer kv.
So, with some minor changes, you can adapt this bit of code to become a parsing function that will suit your needs.
Edit and new code
As pointed out by John Bollinger in the comments, using read inside a while to read one character at a time is very costly. I'll post a second version of the program using the same input method OP was using (reading the whole file at once into a buffer) and then parsing it with another function.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
void parse(char *s)
{
char c, kv[100];
int i;
while ((c = *s++)) {
/* ignoring spaces and tabs */
if (c == '\t' || c == ' ') continue;
else if (c == '=') {
/* finished reading a key */
kv[i] = 0x0;
printf("key found [%s] ", kv);
i = 0;
continue;
} else if (c == '\n') {
/* finished reading a value */
kv[i] = 0x0;
printf(" with data [%s]\n", kv);
i = 0;
continue;
}
kv[i++] = c;
}
}
int main(void)
{
int fd = open("file.conf", O_RDONLY, 0);
char buffer[1000];
/* use the reading method that suits you best */
read(fd, buffer, sizeof buffer);
/* only thing parse() expects is a null-terminated string */
parse(buffer);
close(fd);
return 0;
}
It is very unusual to read a whole config file into memory as a flat image, and especially to keep such an image as the internal representation. One would ordinarily parse the file contents into key/value pairs as you go, and store a representation of those pairs.
Also, your use of read() is incorrect, as you cannot safely assume that it will read all bytes of the file in one call. One normally must call read() in a loop, keeping track of the return value from each call to know both when the end of the file is reached and where in the buffer to put the next bytes read.
If the configuration is supposed to be completely generic, so that you don't know in advance what keywords to expect, then you might organize the configuration data in a hash table or a binary search tree, with the parameter names as the keys. If you do know what parameters to expect (or at least which to allow), then you might have a variable or a struct member for each one.
Naturally, the approach to parameter lookup must be paired correctly with the data structure in which you store the parameters. Any of the approaches I suggested will make looking up multiple configuration parameters far faster. They would also avoid wasting memory, and would adapt to extremely large configurations (or at least could do so).
How best to approach reading the file depends on details of your config file format, such as whether keys and/or values are permitted to contain internal spaces, whether more than one key/value pair may appear on the same line, and whether there is an upper bound on the allowed length of config file lines or of keys and values. Here's an approach that expects one key/value pair per line, supports keys and values that contain internal whitespace (but not newlines), but neither of which is longer than 1023 characters, and where keys are not permitted to contain the '=' character:
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
int main() {
char key[1024];
char value[1024];
FILE *config;
int done;
config = fopen("file.conf", "r");
if (!config) {
perror("while opening file.conf");
return 1;
}
do {
char nl = '\0';
int nfields = fscanf(config, " %1023[^=\n]= %1023[^\n]%c", key, value, &nl);
int i;
done = 1;
if (nfields == EOF) {
if (ferror(config)) {
/* handle read error ... */
perror("while reading file.conf");
} else {
/* trailing empty line(s); ignore ... */
}
break;
} else if (nfields == 3) {
if (nl != '\n') {
/* handle excessive-length value ... */
} else {
done = 0;
}
} else if (nfields == 1) {
/* handle excessive-length key ... */
break;
} else {
assert(nfields == 2);
/* last key/value pair, not followed by a newline */
}
if (key[0] == '=') {
/* handle missing key ... */
break;
}
/* successfully read a key / value pair; truncate trailing whitespace */
for (i = strlen(key); key[--i] == ' '; ) {
/* nothing */
}
key[i + 1] ='\0';
for (i = strlen(value); value[--i] == ' '; ) {
/* nothing */
}
value[i + 1] ='\0';
/* record the key / value pair somewhere (but here we just print it) ... */
printf("key: [%s] value: [%s]\n", key, value);
} while (!done);
fclose(config);
return 0;
}
Important points to note about that include:
No mechanism for storing the key / value pairs is provided. I gave you a few options, and there are others, but you must decide what's best for your own purposes. Rather, the program above addresses the problem of parsing your config data once for all, so that you can avoid parsing it de novo every time you perform a lookup.
The code relies on fscanf() to consume any leading whitespace before the key and value, but in order to accommodate internal whitespace in the key and value, it cannot do the same for trailing whitespace.
Instead, it manually trims trailing whitespace from key and value.
The fscanf() format uses explicit field widths to avoid buffer overruns. It uses the %[ and %c field descriptors to scan data that may be or include whitespace.
Although it may look longish, do note how much of that code is dedicated to error handling.
Divide and conquer.
Getting the data and parsing it are best handled with 2 separate routines.
1) Use fgets() or other code with read() to read a line
int foo(FILE *inf) {
char buffer[1000];
while (fgets(buffer, sizeof buffer, inf)) {
if (Parse_KeyValue(buffer, &key_offset, &value_offset)) {
fprintf(stderr, "Bad Line '%s'\n", buffer);
return 1;
}
printf("'%s'='%s'\n", &buffer[key_offset], &buffer[value_offset]);
}
}
2) Parse the line. (Sample unchecked code)
// 0: Success
// 1: failure
int Parse_KeyValue(char *line, size_t *key_offset, size_t *value_offset) {
char *p = line;
while (isspace((unsigned char) *p)) p++;
*key_offset = p - line;
const char *end = p;
while (*p != '=') {
if (*p == '\0') return 1; // fail, no `=` found
if (!isspace((unsigned char) *p)) {
end = p+1;
}
p++;
}
*end = '\0';
p++; // consume `=`
while (isspace((unsigned char) *p)) p++;
*value_offset = p - line;
end = p;
while (*p) {
if (!isspace((unsigned char) *p)) {
end = p+1;
}
p++;
}
*end = '\0';
return 0;
}
This does allow for valid "" key and value. Adjust as needed.
I'm currently doing an assignment where we are to recreate three switches of the cat command, -n/-T/-E. We are to compile and enter in two parameters, the switch and the file name. I store the textfile contents into a buffer.
int main(int argc, char *argv[]){
int index = 0;
int number = 1;
int fd, n, e, t;
n = e = t = 0;
char command[5];
char buffer[BUFFERSIZE];
strcpy(command, argv[1]);
fd = open(argv[2], O_RDONLY);
if( fd == -1)
{
perror(argv[2]);
exit(1);
}
read(fd, buffer,BUFFERSIZE);
if( !strcmp("cat", command)){
printf("%s\n", buffer);
}
else if( !strcmp("-n", command)){
n = 1;
}
else if( !strcmp("-E", command)){
e = 1;
}
else if( !strcmp("-T", command)){
t = 1;
}
else if( !strcmp("-nE", command) || !strcmp("-En", command)){
n = e = 1;
}
else if( !strcmp("-nT", command) || !strcmp("-Tn", command)){
n = t = 1;
}
else if( !strcmp("-ET", command) || !strcmp("-TE", command)){
t = e = 1;
}
else if( !strcmp("-nET", command) || !strcmp("-nTE", command) ||
!strcmp("-TnE", command) || !strcmp("-EnT", command) ||
!strcmp("-ETn", command) || !strcmp("-TEn", command)){
n = e = t = 1;
}
else{
printf("Invalid Switch Entry");
}
if(n){
printf("%d ", number++);
}
while(buffer[index++] != '\0' && ( n || e || t)){
if(buffer[index] == '\n' && e && n){
printf("$\n%d ", number++);
}
else if(buffer[index] == '\n' && e){
printf("$\n");
}
else if(buffer[index] == '\t' && t){
printf("^I");
}
else if(buffer[index] == '\n' && n){
printf("\n%d ", number++);
}
else {
printf("%c", buffer[index]);
}
}
printf("\n");
close(fd);
return 0;
}
Everything works perfectly except when I try to use the -n command. It adds an extra new line. I use a textfile that has
hello
hello
hello world!
instead of
1 hello
2 hello
3 hello world!
it will print out this:
1 hello
2 hello
3 hello world!
4
For some reason it adds the extra line after the world!
Am I missing something simple?
This might not fix your problem, but I don't see any code to put the terminating null character in buffer. Try:
// Reserve one character for the null terminator.
ssize_t n = read(fd, buffer, BUFFERSIZE-1);
if ( n == -1 )
{
// Deal with error.
printf("Unable to read the contents of the file.\n");
exit(1); //???
}
buffer[n] = '\0';
The three cat options that you implement have different "modes":
-T replaces a character (no tab is written);
-E prepends a character with additional output (the new-line character is still written);
-n prepends each line with additional output.
You can handle the first two modes directly. The third mode requires information from the character before: A new line starts at the start of the file and after a new-line character has been read. So you need a flag to keep track of that.
(Your code prints a line number after a new-line character is found. That means that you have to treat the first line explicitly and that you get one too many line umber at the end. After all, a file with n lines has n new-line characters and you print n + 1 line numbers.)
Other issues:
As R Sahu has pointed out, your input isn't null-terminated. You don't really need a null terminator here: read returns the number of bytes read or an error code. You can use that number as limit for index.
You incmenet index in the while condition, which means that you look at the character after the one you checked inside the loop, which might well be the null character. You will also miss the first character in the file.
In fact, you don't need a buffer here. When the file is larger than you buffer, you truncate it. You could call read in a loop until you read fewer bytes than BUFFERSIZE, but the simplest way in this case is to read one byte after the other and process it.
You use too many compound conditions. This isn't wrong per se, but it makes for complicated code. Your main loop reads like a big switch when there are in fact only a few special cases to treat.
The way you determine the flags is both too complicated and too restricted. You chack all combinations of flags, which is 6 for the case that all flags are given. What if you add another flag? Are you going to write 24 more strcmps? Look for the minus sign as first character and then at the letters one by one, setting flags and printing error messages as you go.
You don't need to copy argv[1] to command; you are only inspecting it. And you are introducing a source of error: If the second argument is longer than 4 characters, you will get undefined behaviour, very likely a crash.
If you don't give any options, the file name should be argv[1] instead of argv[2].
Putting this (sans the flag parsing) into practice:
FILE *f = fopen(argv[2], "r");
int newline = 1; // marker for line numbers
// Error checking
for (;;)
{
int c = fgetc(f); // read one character
if (c == EOF) break; // terminate loop on end of file
if (newline) {
if (n) printf("%5d ", number++);
newline = 0;
}
if (c == '\n') {
newline = 1;
if (e) putchar('$');
}
if (c == '\t' && t) {
putchar('^');
putchar('I');
} else {
putchar(c);
}
}
fclose(f);
Edit: If you are restricted to using the Unix open, close and read, you can still use the approach above. You need an additional loop that reads blocks of a certain size with read. The read function returns the value of the bytes read. If that is less than the number of bytes asked for, stop the loop.
The example below adds yet an additional loop that allows to concatenate several files.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define BUFFERSIZE 0x400
int main(int argc, char *argv[])
{
int n = 0;
int e = 0;
int t = 0;
int number = 0;
int first = 1;
while (first < argc && *argv[first] == '-') {
char *str = argv[first] + 1;
while (*str) {
switch (*str) {
case 'n': n = 1; break;
case 'E': e = 1; break;
case 'T': t = 1; break;
default: fprintf(stderr, "Unknown switch -%c.\n", *str);
exit(0);
}
str++;
}
first++;
}
while (first < argc) {
int fd = open(argv[first], O_RDONLY);
int newline = 1;
int bytes;
if (fd == -1) {
fprintf(stderr, "Could not open %s.\n", argv[first]);
exit(1);
}
do {
char buffer[BUFFERSIZE];
int i;
bytes = read(fd, buffer,BUFFERSIZE);
for (i = 0; i < bytes; i++) {
int c = buffer[i];
if (newline) {
if (n) printf("%5d ", number++);
newline = 0;
}
if (c == '\n') {
newline = 1;
if (e) putchar('$');
}
if (c == '\t' && t) {
putchar('^');
putchar('I');
} else {
putchar(c);
}
}
} while (bytes == BUFFERSIZE);
close(fd);
first++;
}
return 0;
}
Currently have this code (see below). It was working fine, but I needed to be able to manage empty lines, and also lines with comment. These comment lines are defined as : having "#" as the first character of a line. Initially, I would just loop 100 times, because I limit the storage into variable to 100 as well, but when skipping empty lines and remark lines, the simple counter to 100 doesn't work. Still, only the first 100 valid lines may be read, and stored into the "menu_choices" variable. Also, the length of each line should be limited to 100 characters (or, I have a variable of 100 characters, so 99+enter). I'll need to include that as well. I can't decide wheter I need an IF statement, or a while, or whatever.
int x;
char inputfile[512];
char menu_number[100];
char menu_choices[100][100];
printf("\nopening:%s\n",inputfile);
p_datafile=fopen(inputfile,"r");
x=1;
//for (x=1 ; x <= 100 ; x++ )
do
{
// read next line into variable
fgets(menu_choices[x],100,p_datafile);
if ( strcmp ( menu_choices[x] , "" ) == 0 ) break;
if ( strncmp(menu_choices[x],"#",1) )
{
printf("%d[lngth=%d]=%s",x,strlen(menu_choices[x]),menu_choices[x]);
x++;
}
else
{
printf("\n LINE WITH #");
}
sleep (1);
} while (1);
fclose(inputfile);
Can you improve the above code ?
To achieve what you describe, may be this could work.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int
main()
{
int lineCount;
char filename[512];
/* if you want to read 100 characters you need 1 extra for the termination 0 */
char menu_choices[100][100 + 1 /* termination 0 */];
int extraLines;
int lineLength;
FILE *p_datafile; // p_datafile was not declared...
int character;
int skipLine;
printf("enter filename please: ");
scanf("%511s", filename);
printf("\topening:%s\n", filename);
lineCount = 0;
p_datafile = fopen(filename, "r");
if (p_datafile == NULL)
return 0; // or perhaps return a value, since there is no context here I don't know
memset(menu_choices[0], 0, 101);
extraLines = 0;
lineLength = 0;
skipLine = 0;
while ((p_datafile != NULL) && ((character = getc(p_datafile)) != EOF))
{
if (character == '\n')
{
if ((lineLength > 0) && (skipLine == 0))
{
menu_choices[lineCount][lineLength] = 0;
lineCount += 1;
printf("valid line %d [length -> %d] -> %s\n", lineCount, lineLength, menu_choices[lineCount - 1]);
memset(menu_choices[lineCount], 0, 101);
}
else if (skipLine != 0)
{
fprintf(stderr, "line starts with #\n");
extraLines += 1;
}
else if (lineLength == 0)
{
fprintf(stderr, "line is empty\n");
extraLines += 1;
}
skipLine = 0;
lineLength = 0;
}
else if ((lineLength == 0) && (isspace(character) != 0))
{
/* Ignore spaces if non-space characters where not found yet. */
}
else
{
if ((lineLength == 0) && (character == '#'))
skipLine = 1; /* Ignore this line, it starts with */
else if (lineLength < 100)
{
menu_choices[lineCount][lineLength] = (char)character;
lineLength += 1;
}
}
}
fclose(p_datafile); // the FILE * not the filename
return 0;
}
I'm not very sure if i have understood your question, but it seems following points can help you to achieve your goal.
add a NULL check on p_datafile to check the success of fopen(). [assuming p_datafile is already defined as FILE *, which part you din't show us.]
instead of break; after if ( strcmp ( menu_choices[x] , "" ) == 0 ), use continue.
add a continue; after printf("\n LINE WITH #"); inside else block.
after if...else block, check if (x == 100), if true, break;
in fclose(), use p_datafile. It expects the file pointer, not the filename.
One answer was just before me but I'll post anyway. Bear in mind that fgets() reads the newline too, so I have tested for/ removed it. Also, your indexing: as far as possible use 0-based indexing, and make any adjustments between 0.. and 1.. for the human at the point of input and output.
#include <stdio.h>
#include <string.h>
int main()
{
int x, len;
char inputfile[512] = "lines.txt";
char menu_number[100];
char menu_choices[100][100];
FILE *p_datafile;
printf ("\nopening:%s\n",inputfile);
p_datafile=fopen (inputfile,"rt"); // it's a text file
if (p_datafile == NULL) {
printf ("Can't open file %s\n", inputfile);
exit (1);
}
x = 0; // get the array indexing right
while (x<100 && fgets(menu_choices[x], 100, p_datafile)) {
if (menu_choices[x][0] != '\n' && menu_choices[x][0] != '#') {
len = strlen (menu_choices[x]);
if (menu_choices[x][len-1] == '\n') // remove newline
menu_choices[x][len-1] = 0;
printf ("%s\n", menu_choices[x]);
// sleep (1);
x++;
}
}
fclose(p_datafile); // corrected mistake (don't use filename)
return 0;
}
Input file:
Line 1
Line 02
#not line 3
line three
Program output:
opening:lines.txt
Line 1
Line 02
line three
I'm working on the second half of a program for class and the objective of the program is simple, but I can't figure out what's causing this output for my program. Basically, we have to read a file, using a function we wrote for the string header. We should then print out all the four-letter words in that file, obviously ignoring punctuation and whitespace. I've got the logic for that down, but what I can't figure out is why, even though I check to see if the length of the string is 4 before printing it, I sometimes get output that's clearly longer than 4. Here is input text from the file I'm using.
This is a test of the program which will only print out the four letter words in this file. Let's see if it works!
And this is the output I'm getting...
This
test
willham
onlyham
fourtam
thissrm
filesrm
Here is the main program: http://pastebin.com/xviETPFm
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "mystring.h"
int fTerminate(char ch, int * pbDiscardChar);
int main(int argc, char ** argv) {
MYSTRING str;
FILE * in;
if((str = mystring_init_default()) == MYSTRING_STATUS_ERROR) {
printf("Error initializing MYSTRING object.\n");
return -1;
}
if((in = fopen("book.txt", "r")) == NULL) {
printf("Error opening file \"book.txt\". Does the file exist?\n");
return -1;
}
while(mystring_input(str, in, 1, fTerminate) != MYSTRING_STATUS_ERROR) {
if(mystring_size(str) == 4) {
mystring_output(str, stdout);
printf("\n");
}
}
mystring_destroy(&str);
return 0;
}
int fTerminate(char ch, int * pbDiscardChar) {
// Terminate on whitespace characters or non-alpha characters.
return (*pbDiscardChar = ((isspace(ch) || (isalpha(ch) == 0))?1:0));
}
And just in case you need it, here is the input function: http://pastebin.com/vD71hGEt
MyString_Status mystring_input(MYSTRING hString,
FILE * hFile,
int bIgnoreLeadingWhiteSpace,
int (*fTerminate)(char ch, int * pbDiscardChar)) {
char ch = '\0';
int eofCheck = 0;
int t, discard;
mystring_truncate(hString, 0);
if(hFile == NULL) return MYSTRING_STATUS_ERROR;
eofCheck = fscanf(hFile, "%c", &ch);
// If bIgnoreWhiteSpace is true, gobble leading whitespace.
if(bIgnoreLeadingWhiteSpace) {
while(isspace(ch)) {
eofCheck = fscanf(hFile, "%c", &ch);
if(eofCheck == EOF) return MYSTRING_STATUS_ERROR;
}
}
// Add all valid characters to the string, overwriting the old string.
while(eofCheck != EOF) {
t = fTerminate(ch, &discard);
if(discard == 0) mystring_push(hString, ch);
if(t) return MYSTRING_STATUS_SUCCESS;
eofCheck = fscanf(hFile, "%c", &ch);
}
if(eofCheck == EOF) return MYSTRING_STATUS_ERROR;
return MYSTRING_STATUS_SUCCESS;
}
It clearly works for the first two strings, so what happened with the rest of them? Does my computer just like ham?
I love the ideas presented in Brian Kernighan and Rob Pike's book, "The UNIX Programming Environment," where they focus on the point of working within an environment where you can put together many (small, precise, well understood) programs on the command line to accomplish many programming tasks.
I'm brushing up on strict ANSI C conventions and trying to stick to this philosophy. Somewhere in this book (I can get an exact page number if needed) they suggest that all programs in this environment should adhere to the following principles:
If input is presented on the command line, as an argument to the program itself, process that input.
If no input is presented on the command line, process input from stdin.
Here's a C program I wrote that will echo any input (numeric or alphabetic) that is a palindrome. My question specifically:
Is this a well behaved C program? In other words, is this what Kernighan and Pike were suggesting is the optimal behavior for a command line application like this?
#include <stdio.h>
#include <string.h> /* for strlen */
int main(int argc, char* argv[]) {
char r_string[100];
if (argc > 1) {
int length = (int)strlen(argv[1]);
int i = 0;
int j = length;
r_string[j] = (char)NULL;
j--;
for (i = 0; i < length; i++, j--) {
r_string[j] = argv[1][i];
}
if (strcmp(argv[1], r_string) == 0) {
printf("%s\n", argv[1]);
}
} else {
char* i_string;
while (scanf("%s", i_string) != EOF) {
int length = (int)strlen(i_string);
int i = 0;
int j = length;
r_string[j] = (char)NULL;
j--;
for (i = 0; i < length; i++, j--) {
r_string[j] = i_string[i];
}
if (strcmp(i_string, r_string) == 0) {
printf("%s\n", i_string);
}
}
}
return 0;
}
Yes, I think that you are following the R&K advice. As Hugo said, you could take the argumentas a filename, bu,t IMHO, for this simple program, I'd say that taking the parameter as the palindrome itself may make more sense.
Also, if you allow me extra advice, I would separate the functionality of reading a string from checking whether it is a palindrome or not, because you have that code duplicated right now.
int ispalindrome(const char* c) {
size_t len = strlen(c);
size_t limit = len/2;
size_t i;
for (i = 0; i < limit; i++) {
if(c[i]!=c[len-i-1]) break; /* Different character found */
}
return i==limit; /* If we reached limit, it's a palyndrome */
}
Of course, I am pretty sure this can be improved (it may even have a bug, I am typping quite fast), but once that you have your string, be either from command line or user input, you can call this function or a functiom like this.
NOTE: Edited to reflect comment from Mark, thanks a lot, Mark!
One problem that you have is a potential buffer overflow because you are writing an input of arbitrary length into a buffer with a fixed size. You can fix this by rejecting too long inputs or creating an array of the correct size dynamically. I would avoid using scanf.
Regarding the actual algorithm, you don't need to copy the string reversed and then compare the two strings. You could do the check using only a single copy of the string and a pointer at both ends, both moving in towards the middle.
Here is some code to show the principle:
char* a = /* pointer to first character in string */;
char* b = /* pointer to last character in string (excluding the null terminator) */;
while (a < b && *a == *b)
{
a++;
b--;
}
if (a >= b)
{
// Is palindrome.
}
I agree with Javier that you factor the palindrome checking code out into a separate function.
Regarding the principles you specified, I believe that these tools usually take their arguments as filenames whose content is to be processed. Instead, you are treating them like the input itself.
Take sort, for example. If you don't specify any arguments, the contents from stdin will be sorted. Otherwise, the contents in the file whose filename you specified will be sorted. It is not the arguments themselves that are processed.
The code for this would be something along these lines:
FILE * input = stdin;
if (argc > 1)
{
input = fopen(argv[1], "r");
// handle possible errors from the fopen
}
while (fscanf(input, "%s", i_string) != EOF)
// check if i_string is a palindrome and output to stdout
Also, you should be careful with the buffer overflow specified by Mark Byers.
You're not handling the string reading correctly. The i_string buffer is not initialized, and even if it were, you're should limit the number of bytes that scanf reads to avoid the mentioned overflow:
char i_string[1000];
while (scanf("999%s", i_string) != EOF)
if (is_palindrome(i_string)) /* Use any function defined in the other answers */
printf("%s\n", i_string);
You must always reserve one more byte (1000 vs 999) to account for the NULL string terminator. If you want to allow arbitrary length strings, I think you'll have to dinamically allocate the buffer, and resize it in case bigger strings are present. This would be slightly more complicated.
It is useful for text filters such as a program that prints only lines with palindromes to specify input files via command line arguments e.g., it allows:
$ palindromes input*.txt # file patterns
$ find -name '*.txt' -print0 | xargs -0 palindromes
It is common convention that is supported by many languages. Below are scripts in Perl, Python, C that has the same usage:
Usage: palindromes [FILE]
Print lines that are polindromes in each FILE.
With no FILE, or when FILE is -, read standard input.
in Perl
#!/usr/bin/perl -w
while (<>) { # read stdin or file(s) specified at command line
$line = $_;
s/^\s+//; # remove leading space
s/\s+$//; # remove trailing space
print $line if $_ eq reverse $_; # print line with a palindrome
}
in Python
#!/usr/bin/env python
import fileinput, sys
for line in fileinput.input(): # read stdin or file(s) specified at command line
s = line.strip() # strip whitespace characters
if s == s[::-1]: # is palindrome
sys.stdout.write(line)
in C
#!/usr/local/bin/tcc -run -Wall
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
enum {
MATCH,
NO_MATCH,
ERROR
};
bool is_palindrome(char *first, char *last) {
/** Whether a line defined by range [first, last) is a palindrome.
`last` points either to '\0' or after the last byte if there is no '\0'.
Leading and trailing spaces are ignored.
All characters including '\0' are allowed
*/
--last; // '\0'
for ( ; first < last && isspace(*first); ++first); // skip leading space
for ( ; first < last && isspace(*last); --last); // skip trailing space
for ( ; first < last; ++first, --last)
if (*first != *last)
return false;
return true;
}
int palindromes(FILE *fp) {
/** Print lines that are palindromes from the file.
Return 0 if any line was selected, 1 otherwise;
if any error occurs return 2
*/
int ret = NO_MATCH;
char *line = NULL;
size_t line_size = 0; // line size including terminating '\0' if any
ssize_t len = -1; // number of characters read, including '\n' if any,
// . but not including the terminating '\0'
while ((len = getline(&line, &line_size, fp)) != -1) {
if (is_palindrome(line, line + len)) {
if (printf("%s", line) < 0) {
ret = ERROR;
break;
}
else
ret = MATCH;
}
}
if (line)
free(line);
else
ret = ERROR;
if (!feof(fp))
ret = ERROR;
return ret;
}
int main(int argc, char* argv[]) {
int exit_code = NO_MATCH;
if (argc == 1) // no input file; read stdin
exit_code = palindromes(stdin);
else {
// process each input file
FILE *fp = NULL;
int ret = 0;
int i;
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-") == 0)
ret = palindromes(stdin);
else if ((fp = fopen(argv[i], "r")) != NULL) {
ret = palindromes(fp);
fclose(fp);
} else {
fprintf(stderr, "%s: %s: could not open: %s\n",
argv[0], argv[i], strerror(errno));
exit_code = ERROR;
}
if (ret == ERROR) {
fprintf(stderr, "%s: %s: error: %s\n",
argv[0], argv[i], strerror(errno));
exit_code = ERROR;
} else if (ret == MATCH && exit_code != ERROR)
// return MATCH if at least one line is a MATCH, propogate error
exit_code = MATCH;
}
}
return exit_code;
}
Exit status is 0 if any line was selected, 1 otherwise;
if any error occurs, the exit status is 2. It uses GNU getline() that allows arbitrary large lines as an input.