C Program - printf overlapping with other printf chars - c

I'm working on a program in C that will open, read, and close a file with Linux system calls, and print the contents of the file(s) to the screen. The command format is
$ mycat [-bens] f1 [f2 ...].
The switches are as follows:
-b displays the line number for each non-blank line, starting at 1
-e displays a '$' at the end of each line
-n displays the line number for every line
-s removes all empty lines from the output (effectively single-spacing the output)
The problem is that when I use the -b or -n switch, printf appears to be "overlapping" the line number with what the buffer is trying to print from the text file itself.
Here is the code I have written for the program:
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#include <fcntl.h>
#include <getopt.h>
#define BUFFERSIZE 4096
void oops(char *, char *);
int main(int ac, char *av[])
{
int fd, numRead, curr, i, c;
char buf[BUFFERSIZE] = {0};
extern char *optarg;
extern int optind;
int tmpS = 0;
int tmpB = 0;
int bFlag = 0;
int eFlag = 0;
int nFlag = 0;
int sFlag = 0;
int bLineNum = 1;
int nLineNum = 1;
/* Flag processing in argument list */
while( (c = getopt(ac, av, "bens")) != -1)
{
switch(c)
{
case 'b':
bFlag = 1;
break;
case 'e':
eFlag = 1;
break;
case 'n':
nFlag = 1;
break;
case 's':
sFlag = 1;
break;
default:
exit(EXIT_FAILURE);
}
}
/* Scan through each argument after flag */
for(i = optind; i < ac; i++)
{
/* Error handling when opening each file */
if((fd = open(av[i], O_RDONLY)) == -1)
oops("Cannot open ", av[i]);
/* Read from file to buffer, until end is reached */
while( (numRead = read(fd, buf, BUFFERSIZE)) > 0)
{
/* Once buffer is filled, process each address in buffer */
for(curr = 0; curr < BUFFERSIZE; curr++)
{
/* sFlag squeezes output, eliminating blank lines */
if(sFlag && buf[curr] == '\n')
{
tmpS = curr + 1;
while(buf[tmpS] != '\r')
{
if(isspace(buf[tmpS]))
tmpS++;
else
break;
}
curr = tmpS + 1;
}
/* nFlag numbers each line, starting from 1 */
if(nFlag && buf[curr] == '\n')
printf("%d ", nLineNum++);
/* eFlag puts a '$' at the end of every line */
if(eFlag && buf[curr] == '\r')
printf(" $");
/* bFlag numbers every non-blank line, starting from 1 */
if(bFlag && buf[curr] == '\n')
{
tmpB = curr + 1;
if(isEmptyLine(buf, tmpB))
printf("%d ", bLineNum++);
}
/* Print the current character in the buffer address */
printf("%c", buf[curr]);
}
}
if(numRead == -1)
oops("Read error from ", av[i]);
}
return 0;
}
void oops(char *s1, char *s2)
{
fprintf(stderr, "Error: %s ", s1);
perror(s2);
exit(1);
}
int isEmptyLine(char *buf, int tmp)
{
while(buf[tmp] != '\n')
{
if(!isspace(buf[tmp]))
return 0;
tmp++;
}
return 1;
}
Sample input (file1.txt):
An excerpt from LEARNING DOS FOR THE COMPLETE NOVICE, by Steven Woas, copyright 1993.
1. Change to the compressed drive and then issue a CHKDSK
command like so:
c: <ENTER>
chkdsk /f <ENTER>
The /F tells DOS to fix errors.
Another option is to do it like so:
dblspace /chkdsk /f <ENTER>
A shortcut for the DBLSPACE /CHKDSK /F command is:
dblspace /chk /f <ENTER>
Output with -n flag on and running:
sh-4.2$ ./main -n file1.txt
1 excerpt from LEARNING DOS FOR THE COMPLETE NOVICE, by Steven Woas, copyright 1993.
2
3 Change to the compressed drive and then issue a CHKDSK
4 command like so:
5
6 c: <ENTER>
7
8 chkdsk /f <ENTER>
9
10 The /F tells DOS to fix errors.
11
12 Another option is to do it like so:
13
14 dblspace /chkdsk /f <ENTER>
15
16 A shortcut for the DBLSPACE /CHKDSK /F command is:
17
18 dblspace /chk /f <ENTER>
I'm having the same problem with the -b flag and I don't know why. Does it have to do with \r and \n not being read properly?

Your program exhibits the misbehavior you describe for files with Windows- (DOS-)style line endings (\r\n), but different misbehavior for files with UNIX-style line endings (\n alone) and yet different misbehavior for files with MacOS class-style line endings (\r alone). Inasmuch as you seem to be assuming Windows-style line endings overall, I'll focus on that.
Consider what happens when your program reaches the end of a line. It first processes the \r character, ultimately printing it. This causes the output position to return to the beginning of the line (which is possible because the standard output is line-buffered by default). You then print the line number, overwriting whatever may have been there before, and finally print the \n character, causing the buffer to be flushed and the output to move to the next line.
You probably ought to recognize the \r\n sequence as a line ending, instead of trying to handle these characters individually. That may prove to be a bit challenging, as you need to account for the possibility that the pair is split across two read()s, but that's shouldn't be too hard. This will also give you the opportunity to consider what to do if you encounter a lone \n and / or a lone \r, which your program could handle more gracefully than it now does.

Related

Read data to Text File and reserve de output with N character

I want read data from console and output to Text file with reserve of N character per variable of structure type.
The Text file is similar to:
1 111 1 Peter
22 22 2 John Lays
3 3 3 Anne Belgs
I do not know if I'm using the most correct functions.
Also I can not read ("carro.name") more than 1 word (example: John Lays)
struct estruturaCarro {
int id, potencia, avariado;
char name[11];
} carro;
...
//Read data to Text File:
...
printf("\n ID......:"); scanf("%d", &carro.id);
printf("\n Potencia:"); scanf("%d", &carro.potencia);
printf("\n Avariado:"); scanf("%d", &carro.avariado);
printf("\n NAME:"); scanf("%10[0-9a-zA-Z ]", carro.name); // or scanf("%[^\n]s",...)
fprintf(fp, "%-2d %-3d %-1d %-10s \n\n", carro.id, carro.potencia, carro.avariado, carro.name);
...
//Show File Text data:
...
int registos=0;
while(1)
{
fscanf(fp, "%d %d %d %-10s", &carro.id, &carro.potencia, &carro.avariado, carro.name);
if(feof(fp)){ break; }
printf("%-2d %-3d %-1d %-10s\n", carro.id, carro.potencia, carro.avariado, carro.name);
registos++;
}
printf("\nCarros=%d", registos);
As you say in your question you cannot use scanf to read a complex name including spaces.
But before to search how to do it is needed to decide what to do.
Probably you do not want to memorize the extra spaces at the beginning and at the end (including the newline), and probably a name must not be empty.
But what about inside a complex name ? If the user enter John Lays do you save the name with the two spaces or you want to simplify to have only one ? Do you have to manage other special character like '-' (are John - Lays / John- Lays / John -Lays read as John-Lays ?).
What to do if the input string is longer than 10 characters ? Just to stop to read letting the rest for the next read or to bypass up to a newline ? Because you print a message before each input you clearly want an input per line and the rest of the line must be bypassed.
If you do not want to read the string as it is enter the best way is probably to write your own read string function.
You also have to decide what to do if the user do not enter a number for ID or Potencia or Avariado, currently you do not even detect the error, this is not a good way. So in that case do you abort all (exit program), or you redo the read ? Probably you prefer to read again, for that you need to bypass the invalid input, but what that means, to bypass all up to a newline ?
For instance :
#include <stdio.h>
#include <string.h>
#include <ctype.h>
/* read an int memorizing its value in v,
return 0 in case of EOF else a non null value */
int readInt(const char * msg, int * v)
{
for (;;) {
fputs(msg, stdout);
if (scanf("%d", v) == 1)
return 1;
/* not a number or EOF, bypass all up to \n */
int c;
while ((c = fgetc(stdin)) != '\n')
if (c == EOF)
return 0;
puts("invalid value"); /* message may be also get in argument */
}
}
/* read a string up to a \n
remove extra spaces at the beginning and end
simplify internal multiple spaces
accept any character and do not manage in a special way characters like like '-'
a non empty string must be read
read at most sz-1 characters in s then place the null character (as fgets), sz must be > 1
if the line too long bypass the rest of the input up to \n
return 0 in case of EOF else a non null value */
int readStr(const char * msg, char * s, size_t sz)
{
fputs(msg, stdout);
/* read the first char bypassing spaces including \n */
if (scanf(" %c", s) == 0)
// EOF
return 0;
size_t index = 1;
int c;
sz -= 1;
while (index != sz) {
c = fgetc(stdin);
if ((c == EOF) || (c == '\n'))
break;
if (!isspace(c))
s[index++] = c;
else if (s[index - 1] != ' ')
s[index++] = ' ';
}
s[(s[index - 1] != ' ') ? index : index-1] = 0;
// bypass possible rest of the line
while ((c != EOF) && (c != '\n'))
c = fgetc(stdin);
return 1;
}
/* ******************* */
struct estruturaCarro {
int id, potencia, avariado;
char name[11];
} carro;
int main()
{
do {
if (!readInt("\n ID......:", &carro.id) ||
!readInt("\n Potencia:", &carro.potencia) ||
!readInt("\n Avariado:", &carro.avariado) ||
!readStr("\n NAME:", carro.name, sizeof(carro.name))) {
puts("EOF");
return -1;
}
else
printf("%-2d %-3d %-1d '%-10s' \n\n", carro.id, carro.potencia, carro.avariado, carro.name);
} while (strcmp(carro.name, "end"));
return 0;
}
Compilation and execution:
pi#raspberrypi:/tmp $ gcc -pedantic -Wextra -Wall r.c
pi#raspberrypi:/tmp $ ./a.out
ID......:aze
invalid value
ID......:qsd
invalid value
ID......:1
Potencia:2
Avariado:3
NAME:aze u iiiiiiiiiiiiiiiiii
1 2 3 'aze u iiii'
ID......:11
Potencia:22
Avariado:0
NAME: end
11 22 0 'end '
pi#raspberrypi:/tmp $
When you read in your file and supposing it was produced doing fprintf(fp, "%-2d %-3d %-1d %-10s", ...) :
char line[21]; /* each line has 20 characters newline included */
while (fgets(line, sizeof(line), fp) != NULL) {
if (sscanf(line, "%d %d %d", &carro.id, &carro.potencia, &carro.avariado) != 3)
/* abnormal case, invalid file */
break; /* anything else you want to do */
/* the string starts at the index 9 and has 10 characters out of the newline */
memcpy(carro.name, line + 9, 10);
carro.name[10] = 0;
/* ... */
}
note the name have spaces at the end if its length is less than 10 characters
Or you can read in a way similar to the previous on stdin.

Prints new line after '\0' character in C

I'm currently doing an assignment where we are to recreate three switches of the cat command, -n/-T/-E. We are to compile and enter in two parameters, the switch and the file name. I store the textfile contents into a buffer.
int main(int argc, char *argv[]){
int index = 0;
int number = 1;
int fd, n, e, t;
n = e = t = 0;
char command[5];
char buffer[BUFFERSIZE];
strcpy(command, argv[1]);
fd = open(argv[2], O_RDONLY);
if( fd == -1)
{
perror(argv[2]);
exit(1);
}
read(fd, buffer,BUFFERSIZE);
if( !strcmp("cat", command)){
printf("%s\n", buffer);
}
else if( !strcmp("-n", command)){
n = 1;
}
else if( !strcmp("-E", command)){
e = 1;
}
else if( !strcmp("-T", command)){
t = 1;
}
else if( !strcmp("-nE", command) || !strcmp("-En", command)){
n = e = 1;
}
else if( !strcmp("-nT", command) || !strcmp("-Tn", command)){
n = t = 1;
}
else if( !strcmp("-ET", command) || !strcmp("-TE", command)){
t = e = 1;
}
else if( !strcmp("-nET", command) || !strcmp("-nTE", command) ||
!strcmp("-TnE", command) || !strcmp("-EnT", command) ||
!strcmp("-ETn", command) || !strcmp("-TEn", command)){
n = e = t = 1;
}
else{
printf("Invalid Switch Entry");
}
if(n){
printf("%d ", number++);
}
while(buffer[index++] != '\0' && ( n || e || t)){
if(buffer[index] == '\n' && e && n){
printf("$\n%d ", number++);
}
else if(buffer[index] == '\n' && e){
printf("$\n");
}
else if(buffer[index] == '\t' && t){
printf("^I");
}
else if(buffer[index] == '\n' && n){
printf("\n%d ", number++);
}
else {
printf("%c", buffer[index]);
}
}
printf("\n");
close(fd);
return 0;
}
Everything works perfectly except when I try to use the -n command. It adds an extra new line. I use a textfile that has
hello
hello
hello world!
instead of
1 hello
2 hello
3 hello world!
it will print out this:
1 hello
2 hello
3 hello world!
4
For some reason it adds the extra line after the world!
Am I missing something simple?
This might not fix your problem, but I don't see any code to put the terminating null character in buffer. Try:
// Reserve one character for the null terminator.
ssize_t n = read(fd, buffer, BUFFERSIZE-1);
if ( n == -1 )
{
// Deal with error.
printf("Unable to read the contents of the file.\n");
exit(1); //???
}
buffer[n] = '\0';
The three cat options that you implement have different "modes":
-T replaces a character (no tab is written);
-E prepends a character with additional output (the new-line character is still written);
-n prepends each line with additional output.
You can handle the first two modes directly. The third mode requires information from the character before: A new line starts at the start of the file and after a new-line character has been read. So you need a flag to keep track of that.
(Your code prints a line number after a new-line character is found. That means that you have to treat the first line explicitly and that you get one too many line umber at the end. After all, a file with n lines has n new-line characters and you print n + 1 line numbers.)
Other issues:
As R Sahu has pointed out, your input isn't null-terminated. You don't really need a null terminator here: read returns the number of bytes read or an error code. You can use that number as limit for index.
You incmenet index in the while condition, which means that you look at the character after the one you checked inside the loop, which might well be the null character. You will also miss the first character in the file.
In fact, you don't need a buffer here. When the file is larger than you buffer, you truncate it. You could call read in a loop until you read fewer bytes than BUFFERSIZE, but the simplest way in this case is to read one byte after the other and process it.
You use too many compound conditions. This isn't wrong per se, but it makes for complicated code. Your main loop reads like a big switch when there are in fact only a few special cases to treat.
The way you determine the flags is both too complicated and too restricted. You chack all combinations of flags, which is 6 for the case that all flags are given. What if you add another flag? Are you going to write 24 more strcmps? Look for the minus sign as first character and then at the letters one by one, setting flags and printing error messages as you go.
You don't need to copy argv[1] to command; you are only inspecting it. And you are introducing a source of error: If the second argument is longer than 4 characters, you will get undefined behaviour, very likely a crash.
If you don't give any options, the file name should be argv[1] instead of argv[2].
Putting this (sans the flag parsing) into practice:
FILE *f = fopen(argv[2], "r");
int newline = 1; // marker for line numbers
// Error checking
for (;;)
{
int c = fgetc(f); // read one character
if (c == EOF) break; // terminate loop on end of file
if (newline) {
if (n) printf("%5d ", number++);
newline = 0;
}
if (c == '\n') {
newline = 1;
if (e) putchar('$');
}
if (c == '\t' && t) {
putchar('^');
putchar('I');
} else {
putchar(c);
}
}
fclose(f);
Edit: If you are restricted to using the Unix open, close and read, you can still use the approach above. You need an additional loop that reads blocks of a certain size with read. The read function returns the value of the bytes read. If that is less than the number of bytes asked for, stop the loop.
The example below adds yet an additional loop that allows to concatenate several files.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define BUFFERSIZE 0x400
int main(int argc, char *argv[])
{
int n = 0;
int e = 0;
int t = 0;
int number = 0;
int first = 1;
while (first < argc && *argv[first] == '-') {
char *str = argv[first] + 1;
while (*str) {
switch (*str) {
case 'n': n = 1; break;
case 'E': e = 1; break;
case 'T': t = 1; break;
default: fprintf(stderr, "Unknown switch -%c.\n", *str);
exit(0);
}
str++;
}
first++;
}
while (first < argc) {
int fd = open(argv[first], O_RDONLY);
int newline = 1;
int bytes;
if (fd == -1) {
fprintf(stderr, "Could not open %s.\n", argv[first]);
exit(1);
}
do {
char buffer[BUFFERSIZE];
int i;
bytes = read(fd, buffer,BUFFERSIZE);
for (i = 0; i < bytes; i++) {
int c = buffer[i];
if (newline) {
if (n) printf("%5d ", number++);
newline = 0;
}
if (c == '\n') {
newline = 1;
if (e) putchar('$');
}
if (c == '\t' && t) {
putchar('^');
putchar('I');
} else {
putchar(c);
}
}
} while (bytes == BUFFERSIZE);
close(fd);
first++;
}
return 0;
}

The number of characters of comments in a file (C programming)

I can't seem to get it right, tried everything, but..
int commentChars() {
char str[256], fileName[256];
FILE *fp;
int i;
do{
long commentCount=0;
fflush(stdin);
printf("%s\nEnter the name of the file in %s/", p, dir);
gets(fileName);
if(!(fp=fopen(fileName, "r"))) {
printf("Error! File not found, try again");
return 0;
}
while(!feof(fp)) {
fgets(str,sizeof str,fp);
for(int i=0;i<=sizeof str;i++) {
if(str[i] == '/' && str[i+1] == '/') {
commentCount += (strlen(str)-2);
}
}
}
fclose(fp);
printf("All the chars, contained in a comment: %ld\n", commentCount);
puts(p);
printf("Do you want to search for another file?<Y/N>: ");
i=checker();
}while(i);}
The result is "All the chars, containted in a comment: 0", even though I have comments.
And my second question was.. Analogically, how can I do the same for comments, containing /* */, seems like an impossible job for me.
I think you best use regular expressions. They seem scary, but they're really not that bad for things like this. You can always try playing some regex golf to practice ;-)
I'd approach it as follows:
Build a regular expression that captures comments
Scan your file for it
Count the characters in the match
Using some regex code and a bit about matching comments in C, I hacked this together which should allow you to count all the bytes that are part of a block style comment /* */ - Including the delimiters. I only tested it on OS X. I suppose you can handle the rest?
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#define MAX_ERROR_MSG 0x1000
int compile_regex(regex_t *r, char * regex_text)
{
int status = regcomp (r, regex_text, REG_EXTENDED|REG_NEWLINE|REG_ENHANCED);
if (status != 0) {
char error_message[MAX_ERROR_MSG];
regerror (status, r, error_message, MAX_ERROR_MSG);
printf ("Regex error compiling '%s': %s\n",
regex_text, error_message);
return 1;
}
return 0;
}
int match_regex(regex_t *r, const char * to_match, long long *nbytes)
{
/* Pointer to end of previous match */
const char *p = to_match;
/* Maximum number of matches */
size_t n_matches = 10;
/* Array of matches */
regmatch_t m[n_matches];
while(1) {
int i = 0;
int nomatch = regexec (r, p, n_matches, m, 0);
if(nomatch) {
printf("No more matches.\n");
return nomatch;
}
//Just handle first match (the entire match), don't care
//about groups
int start;
int finish;
start = m[0].rm_so + (p - to_match);
finish = m[0].rm_eo + (p - to_match);
*nbytes += m[0].rm_eo - m[0].rm_so;
printf("match length(bytes) : %lld\n", m[0].rm_eo - m[0].rm_so);
printf("Match: %.*s\n\n", finish - start, to_match + start);
p += m[0].rm_eo;
}
return 0;
}
int main(int argc, char *argv[])
{
regex_t r;
char regex_text[128] = "/\\*(.|[\r\n])*?\\*/";
long long comment_bytes = 0;
char *file_contents;
size_t input_file_size;
FILE *input_file;
if(argc != 2) {
printf("Usage : %s <filename>", argv[0]);
return 0;
}
input_file = fopen(argv[1], "rb");
fseek(input_file, 0, SEEK_END);
input_file_size = ftell(input_file);
rewind(input_file);
file_contents = malloc(input_file_size * (sizeof(char)));
fread(file_contents, sizeof(char), input_file_size, input_file);
compile_regex(&r, regex_text);
match_regex(&r, file_contents, &comment_bytes);
regfree(&r);
printf("Found %lld bytes in comments\n", comment_bytes);
return 0;
}
This basically trivial modification of your code deals with several problems in your code.
You should not use feof() like that — `while (!feof(file)) is always wrong.
You should not read data that is not part of the string just read.
I've also refactored your code so that the function takes a file name, opens, counts and closes it, and reports on what it found.
#include <stdio.h>
#include <string.h>
// Revised interface - process a given file name, reporting
static void commentChars(char const *file)
{
char str[256];
FILE *fp;
long commentCount = 0;
if (!(fp = fopen(file, "r")))
{
fprintf(stderr, "Error! File %s not found\n", file);
return;
}
while (fgets(str, sizeof(str), fp) != 0)
{
int len = strlen(str);
for (int i = 0; i <= len; i++)
{
if (str[i] == '/' && str[i + 1] == '/')
{
commentCount += (strlen(str) - 2);
break;
}
}
}
fclose(fp);
printf("%s: Number of characters contained in comments: %ld\n", file, commentCount);
}
int main(int argc, char **argv)
{
if (argc == 1)
commentChars("/dev/stdin");
else
{
for (int i = 1; i < argc; i++)
commentChars(argv[i]);
}
return 0;
}
When run on the source code (ccc.c), it yields:
ccc.c: Number of characters contained in comments: 58
The comment isn't really complete (oops), but it serves to show what goes on. It counts the newline which fgets() preserves as part of the comment, though the // introducer is not counted.
Dealing with /* comments is harder. You need to spot a slash followed by a star, and then read up to the next star slash character pair. This is probably more easily done using character by character input than line-by-line input; you will, at least, need to be able to interleave character analysis with line input.
When you're ready for it, you can try this torture test on your program. It's what I use to check my comment stripper, SCC (which doesn't handle trigraphs — by conscious decision; if the source contains trigraphs, I have a trigraph remover which I use on the source first).
/*
#(#)File: $RCSfile: scc.test,v $
#(#)Version: $Revision: 1.7 $
#(#)Last changed: $Date: 2013/09/09 14:06:33 $
#(#)Purpose: Test file for program SCC
#(#)Author: J Leffler
*/
/*TABSTOP=4*/
// -- C++ comment
/*
Multiline C-style comment
#ifndef lint
static const char sccs[] = "#(#)$Id: scc.test,v 1.7 2013/09/09 14:06:33 jleffler Exp $";
#endif
*/
/*
Multi-line C-style comment
with embedded /* in line %C% which should generate a warning
if scc is run with the -w option
Two comment starts /* embedded /* in line %C% should generate one warning
*/
/* Comment */ Non-comment /* Comment Again */ Non-Comment Again /*
Comment again on the next line */
// A C++ comment with a C-style comment marker /* in the middle
This is plain text under C++ (C99) commenting - but comment body otherwise
// A C++ comment with a C-style comment end marker */ in the middle
The following C-style comment end marker should generate a warning
if scc is run with the -w option
*/
Two of these */ generate */ one warning
It is possible to have both warnings on a single line.
Eg:
*/ /* /* */ */
SCC has been trained to handle 'q' single quotes in most of
the aberrant forms that can be used. '\0', '\\', '\'', '\\
n' (a valid variant on '\n'), because the backslash followed
by newline is elided by the token scanning code in CPP before
any other processing occurs.
This is a legitimate equivalent to '\n' too: '\
\n', again because the backslash/newline processing occurs early.
The non-portable 'ab', '/*', '*/', '//' forms are handled OK too.
The following quote should generate a warning from SCC; a
compiler would not accept it. '
\n'
" */ /* SCC has been trained to know about strings /* */ */"!
"\"Double quotes embedded in strings, \\\" too\'!"
"And \
newlines in them"
"And escaped double quotes at the end of a string\""
aa '\\
n' OK
aa "\""
aa "\
\n"
This is followed by C++/C99 comment number 1.
// C++/C99 comment with \
continuation character \
on three source lines (this should not be seen with the -C flag)
The C++/C99 comment number 1 has finished.
This is followed by C++/C99 comment number 2.
/\
/\
C++/C99 comment (this should not be seen with the -C flag)
The C++/C99 comment number 2 has finished.
This is followed by regular C comment number 1.
/\
*\
Regular
comment
*\
/
The regular C comment number 1 has finished.
/\
\/ This is not a C++/C99 comment!
This is followed by C++/C99 comment number 3.
/\
\
\
/ But this is a C++/C99 comment!
The C++/C99 comment number 3 has finished.
/\
\* This is not a C or C++ comment!
This is followed by regular C comment number 2.
/\
*/ This is a regular C comment *\
but this is just a routine continuation *\
and that was not the end either - but this is *\
\
/
The regular C comment number 2 has finished.
This is followed by regular C comment number 3.
/\
\
\
\
* C comment */
The regular C comment number 3 has finished.
Note that \u1234 and \U0010FFF0 are legitimate Unicode characters
(officially universal character names) that could appear in an
id\u0065ntifier, a '\u0065' character constant, or in a "char\u0061cter\
string". Since these are mapped long after comments are eliminated,
they cannot affect the interpretation of /* comments */. In particular,
none of \u0002A. \U0000002A, \u002F and \U0000002F ever constitute part
of a comment delimiter ('*' or '/').
More double quoted string stuff:
if (logtable_out)
{
sprintf(logtable_out,
"insert into %s (bld_id, err_operation, err_expected, err_sql_stmt, err_sql_state)"
" values (\"%s\", \"%s\", \"%s\", \"", str_logtable, blade, operation, expected);
/* watch out for embedded double quotes. */
}
/* Non-terminated C-style comment at the end of the file
#include <stdio.h>
size_t counter(FILE *fp){
int ch, chn;
size_t count = 0;
enum { none, in_line_comment, in_range_comment, in_string, in_char_constant } status;
#if 0
in_range_comment : /* this */
in_line_comment : //this
in_string : "this"
in_char_constnt : ' '
#endif
status = none;
while(EOF!=(ch=fgetc(fp))){
switch(status){
case in_line_comment :
if(ch == '\n'){
status = none;
}
++count;
continue;
case in_range_comment :
if(ch == '*'){
chn = fgetc(fp);
if(chn == '/'){
status = none;
continue;
}
ungetc(chn, fp);
}
++count;
continue;
case in_string :
if(ch == '\\'){
chn = fgetc(fp);
if(chn == '"'){
continue;
}
ungetc(chn, fp);
} else {
if(ch == '"')
status = none;
}
continue;
case in_char_constant :
if(ch == '\\'){
chn = fgetc(fp);
if(chn == '\''){
continue;
}
ungetc(chn, fp);
} else {
if(ch == '\'')
status = none;
}
continue;
case none :
switch(ch){
case '/':
if('/' == (chn = fgetc(fp))){
status = in_line_comment;
continue;
} else if('*' == chn){
status = in_range_comment;
continue;
} else
ungetc(chn, fp);
break;
case '"':
status = in_string;
break;
case '\'':
status = in_char_constant;
break;
}
}
}
return count;
}
int main(void){
FILE *fp = stdin;
size_t c = counter(fp);
printf("%lu\n", c);
return 0;
}

How to remove ^M ^J characters in linux

I have an external machine which send me results in my Raspberry pi. In my emulator Cutecom I have the results line by line without problems. I use Codeblocks and I wrote my own C application to read these data every 10 seconds. But something strange happens. Sometimes I have the results line by line and sometimes I have the strange characters ^M ^J at the end of each line and as a result I have terrible finals results. I think that these EOF characters are because the external machine has developed in Windows.
The good results
+PARAMETERS: 45 BYTES FROM 0000:0000 (063)
MACHINE_1:(AN=23.45,H=34.56,D=12.34)
The bad results
+PARAMETERS: 45 BYTES FROM 0000:0000 (063)^M^JMACHINE_1:
(AN=21.45,H=33.56,D=10.34)
Ok, until here the only problem is the way the command line shows the result but my results are ok. But if I try to use strtok to get some tokens then I have serious problems because of these characters. What can I do? Can I add something to escape these characters?This is the part of the code which I use to read data from the machine
char buff[300];
memset(buff, 0, sizeof(buff));
for (;;)
{
n=read(fd,buff,sizeof(buff));
sleep(1);
printf("%s", buff);
printf("\n");
....
....
You're just reading blocks of 300 characters, so there is no string termination \0.
You'll have to look at n to see how much data you have read and then process the data before printing it i.e. look for the ^J^M and terminate the line,then continue reading the rest of the data.
FYI ^J^M is Windows line termination (it's just ^J form linux)
The following should read multiple messages and convert ^ and J to \n and ignore ^M.
Note this uses STDIN, not a serial port.
#include <stdio.h>
#include <unistd.h>
int main(int argc, char** argv)
{
int fd=STDIN_FILENO;
int i,n;
int c=0;
char buff[300];
memset(buff, 0, sizeof(buff));
for (;;)
{
n=read(fd,buff,sizeof(buff));
for (i=0; i<n; i++)
{
switch(buff[i])
{
case '^':
if(c)
{
// ^^ so output first ^
putchar('^');
}
else
{
// Possible ^M or ^J
c++;
}
break;
case 'M':
if (c)
{
// ignore ^M
c=0;
}
else
{
// just M
putchar(buff[i]);
}
break;
case 'J':
if (c)
{
// ^J is \n
putchar('\n');
c=0;
}
else
{
// just J
putchar(buff[i]);
}
break;
default:
if (c)
{
// ^ followed by other than J or M
putchar('^');
c=0;
}
putchar(buff[i]);
}
}
}
return 0;
}
I think you can still use strtok() with this. Just add ^M and ^J in the char *delimiters parameter.
Just execute the command "sed -e 's/\^\M$//g' filename"
or I got this from the website.
#!/usr/bin/python
while True:
file = raw_input('Input file name:(input "q" to quit)')
if file == 'q':
break
file_ = open(file).read()
list_ = list(file_)
new_file = ''
for x in list_:
if x != '^' and x != 'M':
new_file = new_file + x
file_ = open(file,'w')
file_.write(new_file)
file_.close()

Echo All Palindromes, in C

I love the ideas presented in Brian Kernighan and Rob Pike's book, "The UNIX Programming Environment," where they focus on the point of working within an environment where you can put together many (small, precise, well understood) programs on the command line to accomplish many programming tasks.
I'm brushing up on strict ANSI C conventions and trying to stick to this philosophy. Somewhere in this book (I can get an exact page number if needed) they suggest that all programs in this environment should adhere to the following principles:
If input is presented on the command line, as an argument to the program itself, process that input.
If no input is presented on the command line, process input from stdin.
Here's a C program I wrote that will echo any input (numeric or alphabetic) that is a palindrome. My question specifically:
Is this a well behaved C program? In other words, is this what Kernighan and Pike were suggesting is the optimal behavior for a command line application like this?
#include <stdio.h>
#include <string.h> /* for strlen */
int main(int argc, char* argv[]) {
char r_string[100];
if (argc > 1) {
int length = (int)strlen(argv[1]);
int i = 0;
int j = length;
r_string[j] = (char)NULL;
j--;
for (i = 0; i < length; i++, j--) {
r_string[j] = argv[1][i];
}
if (strcmp(argv[1], r_string) == 0) {
printf("%s\n", argv[1]);
}
} else {
char* i_string;
while (scanf("%s", i_string) != EOF) {
int length = (int)strlen(i_string);
int i = 0;
int j = length;
r_string[j] = (char)NULL;
j--;
for (i = 0; i < length; i++, j--) {
r_string[j] = i_string[i];
}
if (strcmp(i_string, r_string) == 0) {
printf("%s\n", i_string);
}
}
}
return 0;
}
Yes, I think that you are following the R&K advice. As Hugo said, you could take the argumentas a filename, bu,t IMHO, for this simple program, I'd say that taking the parameter as the palindrome itself may make more sense.
Also, if you allow me extra advice, I would separate the functionality of reading a string from checking whether it is a palindrome or not, because you have that code duplicated right now.
int ispalindrome(const char* c) {
size_t len = strlen(c);
size_t limit = len/2;
size_t i;
for (i = 0; i < limit; i++) {
if(c[i]!=c[len-i-1]) break; /* Different character found */
}
return i==limit; /* If we reached limit, it's a palyndrome */
}
Of course, I am pretty sure this can be improved (it may even have a bug, I am typping quite fast), but once that you have your string, be either from command line or user input, you can call this function or a functiom like this.
NOTE: Edited to reflect comment from Mark, thanks a lot, Mark!
One problem that you have is a potential buffer overflow because you are writing an input of arbitrary length into a buffer with a fixed size. You can fix this by rejecting too long inputs or creating an array of the correct size dynamically. I would avoid using scanf.
Regarding the actual algorithm, you don't need to copy the string reversed and then compare the two strings. You could do the check using only a single copy of the string and a pointer at both ends, both moving in towards the middle.
Here is some code to show the principle:
char* a = /* pointer to first character in string */;
char* b = /* pointer to last character in string (excluding the null terminator) */;
while (a < b && *a == *b)
{
a++;
b--;
}
if (a >= b)
{
// Is palindrome.
}
I agree with Javier that you factor the palindrome checking code out into a separate function.
Regarding the principles you specified, I believe that these tools usually take their arguments as filenames whose content is to be processed. Instead, you are treating them like the input itself.
Take sort, for example. If you don't specify any arguments, the contents from stdin will be sorted. Otherwise, the contents in the file whose filename you specified will be sorted. It is not the arguments themselves that are processed.
The code for this would be something along these lines:
FILE * input = stdin;
if (argc > 1)
{
input = fopen(argv[1], "r");
// handle possible errors from the fopen
}
while (fscanf(input, "%s", i_string) != EOF)
// check if i_string is a palindrome and output to stdout
Also, you should be careful with the buffer overflow specified by Mark Byers.
You're not handling the string reading correctly. The i_string buffer is not initialized, and even if it were, you're should limit the number of bytes that scanf reads to avoid the mentioned overflow:
char i_string[1000];
while (scanf("999%s", i_string) != EOF)
if (is_palindrome(i_string)) /* Use any function defined in the other answers */
printf("%s\n", i_string);
You must always reserve one more byte (1000 vs 999) to account for the NULL string terminator. If you want to allow arbitrary length strings, I think you'll have to dinamically allocate the buffer, and resize it in case bigger strings are present. This would be slightly more complicated.
It is useful for text filters such as a program that prints only lines with palindromes to specify input files via command line arguments e.g., it allows:
$ palindromes input*.txt # file patterns
$ find -name '*.txt' -print0 | xargs -0 palindromes
It is common convention that is supported by many languages. Below are scripts in Perl, Python, C that has the same usage:
Usage: palindromes [FILE]
Print lines that are polindromes in each FILE.
With no FILE, or when FILE is -, read standard input.
in Perl
#!/usr/bin/perl -w
while (<>) { # read stdin or file(s) specified at command line
$line = $_;
s/^\s+//; # remove leading space
s/\s+$//; # remove trailing space
print $line if $_ eq reverse $_; # print line with a palindrome
}
in Python
#!/usr/bin/env python
import fileinput, sys
for line in fileinput.input(): # read stdin or file(s) specified at command line
s = line.strip() # strip whitespace characters
if s == s[::-1]: # is palindrome
sys.stdout.write(line)
in C
#!/usr/local/bin/tcc -run -Wall
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
enum {
MATCH,
NO_MATCH,
ERROR
};
bool is_palindrome(char *first, char *last) {
/** Whether a line defined by range [first, last) is a palindrome.
`last` points either to '\0' or after the last byte if there is no '\0'.
Leading and trailing spaces are ignored.
All characters including '\0' are allowed
*/
--last; // '\0'
for ( ; first < last && isspace(*first); ++first); // skip leading space
for ( ; first < last && isspace(*last); --last); // skip trailing space
for ( ; first < last; ++first, --last)
if (*first != *last)
return false;
return true;
}
int palindromes(FILE *fp) {
/** Print lines that are palindromes from the file.
Return 0 if any line was selected, 1 otherwise;
if any error occurs return 2
*/
int ret = NO_MATCH;
char *line = NULL;
size_t line_size = 0; // line size including terminating '\0' if any
ssize_t len = -1; // number of characters read, including '\n' if any,
// . but not including the terminating '\0'
while ((len = getline(&line, &line_size, fp)) != -1) {
if (is_palindrome(line, line + len)) {
if (printf("%s", line) < 0) {
ret = ERROR;
break;
}
else
ret = MATCH;
}
}
if (line)
free(line);
else
ret = ERROR;
if (!feof(fp))
ret = ERROR;
return ret;
}
int main(int argc, char* argv[]) {
int exit_code = NO_MATCH;
if (argc == 1) // no input file; read stdin
exit_code = palindromes(stdin);
else {
// process each input file
FILE *fp = NULL;
int ret = 0;
int i;
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-") == 0)
ret = palindromes(stdin);
else if ((fp = fopen(argv[i], "r")) != NULL) {
ret = palindromes(fp);
fclose(fp);
} else {
fprintf(stderr, "%s: %s: could not open: %s\n",
argv[0], argv[i], strerror(errno));
exit_code = ERROR;
}
if (ret == ERROR) {
fprintf(stderr, "%s: %s: error: %s\n",
argv[0], argv[i], strerror(errno));
exit_code = ERROR;
} else if (ret == MATCH && exit_code != ERROR)
// return MATCH if at least one line is a MATCH, propogate error
exit_code = MATCH;
}
}
return exit_code;
}
Exit status is 0 if any line was selected, 1 otherwise;
if any error occurs, the exit status is 2. It uses GNU getline() that allows arbitrary large lines as an input.

Resources