Is there a C compiler that fails to compile this? - c

I was hanging out in my profiler for a while trying to figure out how to speed up a common log parser which was bottlenecked around the date parsing, and I tried various algorithms to speed things up.
The thing I tried that was fastest for me was also by far the most readable, but potentially non-standard C.
This worked quite well in GCC, icc, and my really old and picky SGI compiler. As it's a quite readable optimization, where doesn't it do what I want?
static int parseMonth(const char *input) {
int rv=-1;
int inputInt=0;
int i=0;
for(i=0; i<4 && input[i]; i++) {
inputInt = (inputInt << 8) | input[i];
}
switch(inputInt) {
case 'Jan/': rv=0; break;
case 'Feb/': rv=1; break;
case 'Mar/': rv=2; break;
case 'Apr/': rv=3; break;
case 'May/': rv=4; break;
case 'Jun/': rv=5; break;
case 'Jul/': rv=6; break;
case 'Aug/': rv=7; break;
case 'Sep/': rv=8; break;
case 'Oct/': rv=9; break;
case 'Nov/': rv=10; break;
case 'Dec/': rv=11; break;
}
return rv;
}

Solaris 10 - SPARC - SUN Compiler.
Test code:
#include <stdio.h>
static int parseMonth(const char *input) {
int rv=-1;
int inputInt=0;
int i=0;
for(i=0; i<4 && input[i]; i++) {
inputInt = (inputInt << 8) | input[i];
}
switch(inputInt) {
case 'Jan/': rv=0; break;
case 'Feb/': rv=1; break;
case 'Mar/': rv=2; break;
case 'Apr/': rv=3; break;
case 'May/': rv=4; break;
case 'Jun/': rv=5; break;
case 'Jul/': rv=6; break;
case 'Aug/': rv=7; break;
case 'Sep/': rv=8; break;
case 'Oct/': rv=9; break;
case 'Nov/': rv=10; break;
case 'Dec/': rv=11; break;
}
return rv;
}
static const struct
{
char *data;
int result;
} test_case[] =
{
{ "Jan/", 0 },
{ "Feb/", 1 },
{ "Mar/", 2 },
{ "Apr/", 3 },
{ "May/", 4 },
{ "Jun/", 5 },
{ "Jul/", 6 },
{ "Aug/", 7 },
{ "Sep/", 8 },
{ "Oct/", 9 },
{ "Nov/", 10 },
{ "Dec/", 11 },
{ "aJ/n", -1 },
};
#define DIM(x) (sizeof(x)/sizeof(*(x)))
int main(void)
{
size_t i;
int result;
for (i = 0; i < DIM(test_case); i++)
{
result = parseMonth(test_case[i].data);
if (result != test_case[i].result)
printf("!! FAIL !! %s (got %d, wanted %d)\n",
test_case[i].data, result, test_case[i].result);
}
return(0);
}
Results (GCC 3.4.2 and Sun):
$ gcc -O xx.c -o xx
xx.c:14:14: warning: multi-character character constant
xx.c:15:14: warning: multi-character character constant
xx.c:16:14: warning: multi-character character constant
xx.c:17:14: warning: multi-character character constant
xx.c:18:14: warning: multi-character character constant
xx.c:19:14: warning: multi-character character constant
xx.c:20:14: warning: multi-character character constant
xx.c:21:14: warning: multi-character character constant
xx.c:22:14: warning: multi-character character constant
xx.c:23:14: warning: multi-character character constant
xx.c:24:14: warning: multi-character character constant
xx.c:25:14: warning: multi-character character constant
$ ./xx
$ cc -o xx xx.c
$ ./xx
!! FAIL !! Jan/ (got -1, wanted 0)
!! FAIL !! Feb/ (got -1, wanted 1)
!! FAIL !! Mar/ (got -1, wanted 2)
!! FAIL !! Apr/ (got -1, wanted 3)
!! FAIL !! May/ (got -1, wanted 4)
!! FAIL !! Jun/ (got -1, wanted 5)
!! FAIL !! Jul/ (got -1, wanted 6)
!! FAIL !! Aug/ (got -1, wanted 7)
!! FAIL !! Sep/ (got -1, wanted 8)
!! FAIL !! Oct/ (got -1, wanted 9)
!! FAIL !! Nov/ (got -1, wanted 10)
!! FAIL !! Dec/ (got -1, wanted 11)
$
Note that the last test case still passed - that is, it generated a -1.
Here's a revised - more verbose - version of parseMonth() which does work the same under both GCC and Sun C compiler:
#include <stdio.h>
/* MONTH_CODE("Jan/") does not reduce to an integer constant */
#define MONTH_CODE(x) ((((((x[0]<<8)|x[1])<<8)|x[2])<<8)|x[3])
#define MONTH_JAN (((((('J'<<8)|'a')<<8)|'n')<<8)|'/')
#define MONTH_FEB (((((('F'<<8)|'e')<<8)|'b')<<8)|'/')
#define MONTH_MAR (((((('M'<<8)|'a')<<8)|'r')<<8)|'/')
#define MONTH_APR (((((('A'<<8)|'p')<<8)|'r')<<8)|'/')
#define MONTH_MAY (((((('M'<<8)|'a')<<8)|'y')<<8)|'/')
#define MONTH_JUN (((((('J'<<8)|'u')<<8)|'n')<<8)|'/')
#define MONTH_JUL (((((('J'<<8)|'u')<<8)|'l')<<8)|'/')
#define MONTH_AUG (((((('A'<<8)|'u')<<8)|'g')<<8)|'/')
#define MONTH_SEP (((((('S'<<8)|'e')<<8)|'p')<<8)|'/')
#define MONTH_OCT (((((('O'<<8)|'c')<<8)|'t')<<8)|'/')
#define MONTH_NOV (((((('N'<<8)|'o')<<8)|'v')<<8)|'/')
#define MONTH_DEC (((((('D'<<8)|'e')<<8)|'c')<<8)|'/')
static int parseMonth(const char *input) {
int rv=-1;
int inputInt=0;
int i=0;
for(i=0; i<4 && input[i]; i++) {
inputInt = (inputInt << 8) | input[i];
}
switch(inputInt) {
case MONTH_JAN: rv=0; break;
case MONTH_FEB: rv=1; break;
case MONTH_MAR: rv=2; break;
case MONTH_APR: rv=3; break;
case MONTH_MAY: rv=4; break;
case MONTH_JUN: rv=5; break;
case MONTH_JUL: rv=6; break;
case MONTH_AUG: rv=7; break;
case MONTH_SEP: rv=8; break;
case MONTH_OCT: rv=9; break;
case MONTH_NOV: rv=10; break;
case MONTH_DEC: rv=11; break;
}
return rv;
}
static const struct
{
char *data;
int result;
} test_case[] =
{
{ "Jan/", 0 },
{ "Feb/", 1 },
{ "Mar/", 2 },
{ "Apr/", 3 },
{ "May/", 4 },
{ "Jun/", 5 },
{ "Jul/", 6 },
{ "Aug/", 7 },
{ "Sep/", 8 },
{ "Oct/", 9 },
{ "Nov/", 10 },
{ "Dec/", 11 },
{ "aJ/n", -1 },
{ "/naJ", -1 },
};
#define DIM(x) (sizeof(x)/sizeof(*(x)))
int main(void)
{
size_t i;
int result;
for (i = 0; i < DIM(test_case); i++)
{
result = parseMonth(test_case[i].data);
if (result != test_case[i].result)
printf("!! FAIL !! %s (got %d, wanted %d)\n",
test_case[i].data, result, test_case[i].result);
}
return(0);
}
I wanted to use MONTH_CODE() but the compilers did not cooperate.

if ( !input[0] || !input[1] || !input[2] || input[3] != '/' )
return -1;
switch ( input[0] )
{
case 'F': return 1; // Feb
case 'S': return 8; // Sep
case 'O': return 9; // Oct
case 'N': return 10; // Nov
case 'D': return 11; // Dec;
case 'A': return input[1] == 'p' ? 3 : 7; // Apr, Aug
case 'M': return input[2] == 'r' ? 2 : 4; // Mar, May
default: return input[1] == 'a' ? 0 : (input[2] == 'n' ? 5 : 6); // Jan, Jun, Jul
}
Slightly less readable and not so much validating, but perhaps even faster, no?

You're just computing a hash of those four characters. Why not predefine some integer constants that compute the hash in the same way and use those? Same readability and you're not depending on any implementation specific idiosyncrasies of the compiler.
uint32_t MONTH_JAN = 'J' << 24 + 'a' << 16 + 'n' << 8 + '/';
uint32_t MONTH_FEB = 'F' << 24 + 'e' << 16 + 'b' << 8 + '/';
...
static uint32_t parseMonth(const char *input) {
uint32_t rv=-1;
uint32_t inputInt=0;
int i=0;
for(i=0; i<4 && input[i]; i++) {
inputInt = (inputInt << 8) | (input[i] & 0x7f); // clear top bit
}
switch(inputInt) {
case MONTH_JAN: rv=0; break;
case MONTH_FEB: rv=1; break;
...
}
return rv;
}

I only know what the C Standard says about this (C99):
The value of an integer character
constant containing more than one
character (e.g., 'ab'), or containing
a character or escape sequence that
does not map to a single-byte
execution character, is
implementation-defined. If an integer
character constant contains a single
character or escape sequence, its
value is the one that results when an
object with type char whose value is
that of the single character or escape
sequence is converted to type int.
(6.4.4.4/10 taken from a draft)
So it's implementation defined. Meaning it is not guaranteed it works the same everywhere, but the behavior must be documented by the implementation. For example if int is only 16 bits wide in a particular implementation, then 'Jan/' can't be represented anymore like you intend it (char must be at least 8 bits, while a character literal is always of type int).

char *months = "Jan/Feb/Mar/Apr/May/Jun/Jul/Aug/Sep/Oct/Nov/Dec/";
char *p = strnstr(months, input, 4);
return p ? (p - months) / 4 : -1;

There are at least 3 things that keep this program from being portable:
Multi-character constants are implementation-defined so different compilers may handle them differently.
A byte can be more than 8 bits, there is plenty of hardware where the smallest addressable unit of memory is 16 or even 32 bits, you often find this in DSPs for example. If a byte is more than 8 bits then so will char since char is by definition one byte long; your program will not function properly on such systems.
Lastly, there are many machines where int is only 16-bits (which is the smallest size allowed for int) including embedded devices and legacy machines, your program will fail on these machines as well.

National Instrument's CVI 8.5 for Windows compiler fails on your original code with multiple warnings:
Warning: Excess characters in multibyte character literal ignored.
and errors of the form:
Duplicate case label '77'.
It succeeds on Jonathan's code.

I get warnings, but no errors (gcc). Seems to compile and operate fine. May not work for big-endian systems, though!
I wouldn't suggest this method, though. Perhaps you can xor instead of or-shift, to create a single byte. Then use the case statement on a byte (or, faster, use a LUT of the first N bits).

The fact that a four character constant is equivalent to an particular 32-bit integer is a non-standard feature often seen on compilers for MS Windows and Mac computers (and PalmOS, AFAICR).
On theses systems a four character string is commonly used as a tag for identifying chunks of data files, or as an application / data-type identifier (e.g. "APPL").
It's a convenience then for the developer that they can store such a string into various data-structures without worrying about zero-byte termination, pointers, etc.

Comeau compiler
Comeau C/C++ 4.3.10.1 (Oct 6 2008 11:28:09) for ONLINE_EVALUATION_BETA2
Copyright 1988-2008 Comeau Computing. All rights reserved.
MODE:strict errors C99
"ComeauTest.c", line 11: warning: multicharacter character literal (potential
portability problem)
case 'Jan/': rv=0; break;
^
"ComeauTest.c", line 12: warning: multicharacter character literal (potential
portability problem)
case 'Feb/': rv=1; break;
^
"ComeauTest.c", line 13: warning: multicharacter character literal (potential
portability problem)
case 'Mar/': rv=2; break;
^
"ComeauTest.c", line 14: warning: multicharacter character literal (potential
portability problem)
case 'Apr/': rv=3; break;
^
"ComeauTest.c", line 15: warning: multicharacter character literal (potential
portability problem)
case 'May/': rv=4; break;
^
"ComeauTest.c", line 16: warning: multicharacter character literal (potential
portability problem)
case 'Jun/': rv=5; break;
^
"ComeauTest.c", line 17: warning: multicharacter character literal (potential
portability problem)
case 'Jul/': rv=6; break;
^
"ComeauTest.c", line 18: warning: multicharacter character literal (potential
portability problem)
case 'Aug/': rv=7; break;
^
"ComeauTest.c", line 19: warning: multicharacter character literal (potential
portability problem)
case 'Sep/': rv=8; break;
^
"ComeauTest.c", line 20: warning: multicharacter character literal (potential
portability problem)
case 'Oct/': rv=9; break;
^
"ComeauTest.c", line 21: warning: multicharacter character literal (potential
portability problem)
case 'Nov/': rv=10; break;
^
"ComeauTest.c", line 22: warning: multicharacter character literal (potential
portability problem)
case 'Dec/': rv=11; break;
^
"ComeauTest.c", line 1: warning: function "parseMonth" was declared but never
referenced
static int parseMonth(const char *input) {
^

Machine word size issues aside, your compiler may promote input[i] to a negative integer which will just set the upper bits of inputInt with or operation, so I suggest you to be explicit about signedness of char variables.
But since in US, no one cares about the 8th bit, it is probably a non-issue for you.

I'd sure love to see the profiling that shows this is your most significant bottleneck, but in any case if you're going to pull something like this, use a union instead of 50 instructions looping and shifting. Here's a little example program, I'll leave it to you to fit it into your program.
/* union -- demonstrate union for characters */
#include <stdio.h>
union c4_i {
char c4[5];
int i ;
} ;
union c4_i ex;
int main (){
ex.c4[0] = 'a';
ex.c4[1] = 'b';
ex.c4[2] = 'c';
ex.c4[3] = 'd';
ex.c4[4] = '\0';
printf("%s 0x%08x\n", ex.c4, ex.i );
return 0;
}
Here's example output:
bash $ ./union
abcd 0x64636261
bash $

As mentioned by others, that code throws a bunch of warnings and is probably not endian-safe.
Was your original date parser hand-written as well? Have you tried strptime(3)?

Related

Using POSIX glob() function, gl_pathv is returning NULL — how do I get the file name?

I'm trying to get the first file name that ends in .c.
I have a file.c in my directory, and I expect this to be printed. However, (null) keeps getting printed and I don't understand why.
This is my current code:
#include <glob.h>
#include <stddef.h>
#include <stdio.h>
int main(int argc, char **argv) {
glob_t buf;
buf.gl_offs = 2;
glob("*.c", GLOB_DOOFFS, NULL, &buf);
printf("%ld", buf.gl_pathc);
printf("%s", buf.gl_pathv[0]);
return 0;
}
Can anyone tell me what is going wrong?
You set buf.gl_offs = 2;, and you passed GLOB_DOOFFS to glob(), so you told it to leave two unused slots at the beginning of the gl_argv list of pointers; it did what you asked, and those slots are NULL pointers. The idea behind the empty slots is that the code is likely to want to add a command name and maybe some arguments where those slots are, so that buf.gl_pathv could be passed to execv(), for example. The POSIX specification of glob() says "The first pointer after the last pathname shall be a null pointer" which is required by execv(), of course.
Use:
#include <glob.h>
#include <stddef.h>
#include <stdio.h>
int main(void)
{
glob_t buf;
buf.gl_offs = 2;
if (glob("*.c", GLOB_DOOFFS, NULL, &buf) != 0)
{
fprintf(stderr, "glob() failed\n");
return 0;
}
printf("%ld\n", buf.gl_pathc);
printf("%s\n", buf.gl_pathv[0]);
for (size_t i = 0; i < buf.gl_pathc + buf.gl_offs + 1; i++)
{
printf("%zu: [%s]\n", i,
(buf.gl_pathv[i] == NULL ? "<null>" : buf.gl_pathv[i]));
}
globfree(&buf);
return 0;
}
This code is lazy; it is relying on printf("%s\n", buf.gl_pathv[0]); not to crash when passed a null pointer as a string (original code). That is not required by the C standard. The main loop shows what you should do for reliable code — you should map the null pointers before you call printf().
On my machine, it produced:
28
(null)
0: [<null>]
1: [<null>]
2: [alarm47.c]
3: [book41.c]
4: [checkeuid.c]
5: [dec37.c]
6: [glob61.c]
7: [grid17.c]
8: [hms37.c]
9: [int89.c]
10: [ll11.c]
11: [log41.c]
12: [pipe17.c]
13: [pipe29.c]
14: [pipe31.c]
15: [pipe43.c]
16: [pipe73.c]
17: [pipe83.c]
18: [prtree37.c]
19: [rc61.c]
20: [rep31.c]
21: [rn83.c]
22: [rn89.c]
23: [sh31.c]
24: [sh47.c]
25: [sh67.c]
26: [sh71.c]
27: [sh79.c]
28: [sh97.c]
29: [test-strsep.c]
30: [<null>]
Note that the count in buf.gl_pathc does not include the vacant slots specified by buf.gl_offs or the terminating null pointer. I had to check the list of file names in my directory — there are 28 .c files, as it happens. (An earlier version of the code didn't step all the way through the list of names.)
JFTR: I tested on a Mac running macOS Mojave 10.14.6.

What is the fastest way to reverse a power of two in C?

In the equation :
What is the fastest way in C language to find x with a given power of two value (a) ?
Edit :
The mathematical exact solution is :
As (a) is a positive integer and a power of two (no rational number, no equal to zero), this problem can be simplified as "looking for position of set bit".
This post is focused on lite embedded CPU systems. For example : ARM CORTEX M4.
a to x results :
a | x
-------
1 | 0
2 | 1
4 | 2
8 | 3
16 | 4
32 | 5
64 | 6
128 | 7
256 | 8
512 | 9
...
Option 1 : The dirty loop
unsigned int get_power_of_two_exponent(unsigned int value)
{
unsigned int x = 0;
while( ( 1 << x ) != value)
{
x ++;
}
return x;
}
Option 2 : The weird trick
#include <stdint.h>
#if defined(__GNUC__)
static int highest_bit_set(uint32_t value)
{
if (sizeof (unsigned int) == sizeof value)
return 31 - __builtin_clz(value);
else
if (sizeof (unsigned long) == sizeof value)
return 31 - __builtin_clzl(value);
else
exit(127); /* Weird architecture! */
}
#endif
Any faster options ?
Fastest in C is almost always look-up tables, at the expense of memory use. Assuming that the value is always exactly a power of 2, you can make a look-up table like this:
uint8_t get_exponent (uint8_t val)
{
static const uint8_t byte[256] =
{
[1] = 0,
[2] = 1,
[4] = 2,
[8] = 3,
[16] = 4,
[32] = 5,
[64] = 6,
[128] = 7,
};
return byte[val & 0xFF];
}
It will return 0 in case you pass a value which isn't a power of 2.
This can be expanded further either by looping through for example the 4 bytes of a uint32_t and do 4 table-lookups. Or by making even bigger look-up tables.
On x86 I get the above to boil down to this tiny, branch-free machine code:
get_exponent:
movzx edi, dil
movzx eax, BYTE PTR byte.2173[rdi]
ret
(Swapping to uint_fast8_t gives identical code in this case.)
This answer is in dispute - see comment.
The fastest way, somewhat facetiously1, is to write
switch (a)
{
case 1: return 0;
case 2: return 1;
case 4: return 2;
...
Clearly there are as many labels as there are bits in the type, but this is still O(1).
You could even truncate a to a power of two using the idiom a ^ (a & (a - 1)), at the expense of portability given that only works if a is a 2's complement type.
1Although in C++ you could get the compiler to build the table with constexpr and metaprogramming techniques.
The best performances (on my embedded ARM CORTEX M4 CPU core) are obtained with :
Builtin CLZ solution (Count Leading Zero’s)
Moreover, the CLZ solution is by far much more memory efficient than the lookup table method which take the second place.
Often, the LookUp table method still less efficient than the Builtin CLZ because the table is stored in RAM like a DDR for example. Thus, it can takes a dozen of cycle to access the data in this kind of RAM. In this example, this is amplified by the fact that the instruction cache is enabled but not the data cache. Besides, having this huge table stored in cache would not have been very appropriate.
It depends how big values you would like to search, and if there's the biggest possible input defined.
If x can be, for example, 100, searching from beginning (x = 0) with step x++, isn't elegant and optimized (100 checks). You can set step x+=5. If the result is lower than searched value, x+=5. If bigger - step back with x-- (max 4 Times). Size of step you can adjust to your needs.
If there's a "top-limit", you can create an array of possible x and implement binary search.
#Lundin's answer seems the best in terms of speed (just 3 assembly instructions!), but it may not be a good option for your embedded system. If huge LUTs are not an option:
The weird trick seems to be the a fast option, I guess (you should benchmark each option and see actual results, though). You could use that one in case it exists, and fallback to the usual shifting otherwise:
#include <stdint.h>
static int get_pow2_exp(uint32_t value)
{
#if defined(__GNUC__)
if (sizeof(unsigned int) == sizeof(value))
return 31 - __builtin_clz(value);
if (sizeof(unsigned long) == sizeof(value))
return 31 - __builtin_clzl(value);
#endif
int x;
for (x = -1; value; value >>= 1)
x++;
return x;
}
If you want to ensure that it is a power of two, you may use popcnt. Your while loop is an infinite loop in case the input is not a power of two, while mine just gives a solution based on the highest bit (which may be incorrect, depending on your needs).
2^x = a is the equation
Assuming 32 bit architecture and 'a' & 'x' as integers.
Here is my approach
uint32_t x;
uint8_t *ptr ;
uint8_t ByteNo,BitNo,i;
void My_Function(uint32_t a)
{
ByteNo = BitNo = 9;//some random number
ptr = (uint8_t*)&a;//Assuming points to LSB in variable a
for(i=0;i<4;i++)
{
switch(*ptr)
{
case 0x01: BitNo=0;break;
case 0x02: BitNo=1;break;
case 0x04: BitNo=2;break;
case 0x08: BitNo=3;break;
case 0x10: BitNo=4;break;
case 0x20: BitNo=5;break;
case 0x40: BitNo=6;break;
case 0x80: BitNo=7;break;
case 0x00: BitNo=9;break;
default : break;//take care error condition
}
if(9 != BitNo)
{
break;
}
else
{
ptr++;
}
}//for loop
ByteNo = i;
x = (BitNo) + (ByteNo*8);
}//My_Function
Another approach:
switch(a)
{
case 0x00000001: x=0; break;
case 0x00000002: x=1; break;
case 0x00000004: x=2; break;
case 0x00000008: x=3; break;
case 0x00000010: x=4; break;
case 0x00000020: x=5; break;
case 0x00000040: x=6; break;
case 0x00000080: x=7; break;
case 0x00000100: x=8; break;
case 0x00000200: x=9; break;
case 0x00000400: x=10; break;
case 0x00000800: x=11; break;
case 0x00001000: x=12; break;
case 0x00002000: x=13; break;
case 0x00004000: x=14; break;
case 0x00008000: x=15; break;
case 0x00010000: x=16; break;
case 0x00020000: x=17; break;
case 0x00040000: x=18; break;
case 0x00080000: x=19; break;
case 0x00100000: x=20; break;
case 0x00200000: x=21; break;
case 0x00400000: x=22; break;
case 0x00800000: x=23; break;
case 0x01000000: x=24; break;
case 0x02000000: x=25; break;
case 0x04000000: x=26; break;
case 0x08000000: x=27; break;
case 0x10000000: x=28; break;
case 0x20000000: x=29; break;
case 0x40000000: x=30; break;
case 0x80000000: x=31; break;
default: break;//error condition
}

How can I use ranges in a switch case statement in C?

My logic is:
if number is between 1 to 10, execute first case statement
if number is from 20 to 30, execute second case statement
is there a solution other than the one below?
case '1' ... '10':
case '20' ... '30':
The GCC compiler supports, as a language extension, case ranges like:
switch(i) {
case 0 ... 9: return true;
default: return false;
}
This language extension is also accepted by Clang/LLVM. So use it if you can afford restricting your code to GCC & Clang compilers.
See also this.
I have no idea why this extension was not included in C11 standard.
Notice also that GCC accepts computed or indirect goto and labels as values. There are cases (in particular in
generated C code) where these features are useful. Examples could include some efficient bytecode interpreter. Some implementations of the Ocaml virtual machine are a good example.
void SwitchDemo(int value)
{
switch(value / 10)
{
case 0: ...; break; // 0 - 9
case 1: ...; break; // 10 - 19
...
}
}
or, specific to the question ranges:
void SwitchDemo(int value)
{
switch((value-1) / 10)
{
case 0: ...; break; // 1 - 10
case 1: ...; break; // 11 - 20
...
}
}
Option 1: use case 0 for 0-9, case 1 for 11-20 and so on.
Option 2: use if
Option 3:
Another shabby way is using fall through cases like this:
#include <stdio.h>
int main(void) {
int i=1;
for(i=1;i<=25;i++)
{
switch(i)
{
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
printf("%d is in between 1-10\n", i);
break;
case 11:
case 12:
case 13:
case 14:
case 15:
case 16:
case 17:
case 18:
case 19:
case 20:
printf("%d is in between 11-20\n", i);
break;
default:
printf("%d is above 20\n", i);
}
}
return 0;
}
Output:
1 is in between 1-10
2 is in between 1-10
3 is in between 1-10
4 is in between 1-10
5 is in between 1-10
6 is in between 1-10
7 is in between 1-10
8 is in between 1-10
9 is in between 1-10
10 is in between 1-10
11 is in between 11-20
12 is in between 11-20
13 is in between 11-20
14 is in between 11-20
15 is in between 11-20
16 is in between 11-20
17 is in between 11-20
18 is in between 11-20
19 is in between 11-20
20 is in between 11-20
21 is above 20
22 is above 20
23 is above 20
24 is above 20
25 is above 20
https://ideone.com/Cw6HDO
C doesn't support case values other than single integers (or integer-like things -- characters, enumeration values). So your options are:
As suggested by pzaenger in a now-deleted comment: transform the number you're working with into something you can switch on (in this case, divide by 10).
Multiple case statements (taking advantage of fallthrough): case 1: case 2: case 3: ... case 10: do_something();
Use if rather than case.
In the C programming language the case statement used in a switch() statement must specify a value that the compiler can turn into a constant in some way. Each of the values used in the case statements must be unique within the scope of the switch(). The default keyword indicates the default if none of the case statements match the expression in the switch() statement.
As an aside, check out Duff's Device to show an interesting use of switch() and case. See How does Duff's device work?
So the following shows several examples of proper case statements in a switch():
#define XXVAL 2
#define CASETEST(x) (x + 5)
int iValue;
// set the value of the variable iValue at some point
switch (iValue) {
case 0:
// do the case if iValue == 0
break;
case XXVAL:
// do the case if iValue == XXVAL
break;
case CASETEST(3):
// do the case if iValue == CASETEST(3)
// works because preprocessor generates the source text which is
// then compiled and the expression can be resolved to a constant
break;
case CASETEST(5) * 2:
// do the case if iValue == CASETEST(5) * 2
// works because preprocessor generates the source text which is
// then compiled and the expression can be resolved to a constant
break;
default:
break;
}
What you can do if you still want to use a switch() with ranged case statements is to provide some mechanism to fold the expression into one or more specific constant values.
So in a simple, trivial example you could do something like the following. This is a trivial case to show the technique which ends up making the logic of the simple if statements opaque. This technique can be useful for complex decisions and classification that can be folded into a simple set of constants.
int foldit (int iValue)
{
if (iValue < 5000) return 0;
else if (iValue < 10000) return 1;
else if (ivalue < 20000) return 2;
else return 9999; // triggers the default part of the switch
}
switch (foldit(iValue)) {
case 0:
// do what is needed for up to but not including 5000
break;
case 1:
// do what is needed for 5000 up to but not including 10000
break;
case 2:
// do what is needed for 10000 up to but not including 20000
break;
default:
// handle anything else
break;
}
Where the fold approach can be helpful is when you have several different results perhaps using a filter to try to classify a data item.
#define type1 0x00001
#define type2 0x00002
#define type3 0x00004
#define type4 0x00008
struct datatype {
int iVal;
int jVal;
};
unsigned long is_a_type1(struct datatype * thing)
{
unsigned long retVal = 0; // initialize to not a type1, set to type1 if turns out to be
// do checks for the type and if so set retVal to type1 if it matches
return retVal;
}
unsigned long is_a_type2(struct datatype * thing)
{
unsigned long retVal = 0; // initialize to not a type2, set to type2 if turns out to be
// do checks for the type and if so set retVal to type2 if it matches
return retVal;
}
unsigned long is_a_type3(struct datatype * thing)
{
unsigned long retVal = 0; // initialize to not a type3, set to type3 if turns out to be
// do checks for the type and if so set retVal to type3 if it matches
return retVal;
}
unsigned long is_a_type4(struct datatype * thing)
{
unsigned long retVal = 0; // initialize to not a type4, set to type4 if turns out to be
// do checks for the type and if so set retVal to type4 if it matches
return retVal;
}
unsigned long classify (struct datatype *thing)
{
unsigned long ulTestResult = 0;
// test to see if this is a type1 thing
ulTestResult |= is_a_type1(thing);
// test to see if this is a type2 thing
ulTestResult |= is_a_type2(thing);
// test to see if this is a type3 thing
ulTestResult |= is_a_type3(thing);
// test to see if this is a type4 thing
ulTestResult |= is_a_type4(thing);
return ulTestResult;
}
int main ()
{
struct datatype myThing;
// other source code then
switch (classify(&myThing)) {
case type1 | type2 | type3:
// do stuff if this is a type1, type2, and type3 but not type4
// that is classify() determined that myThing matched all three types.
break;
case type1:
// do stuff if type1 which includes stuff you do for type2 as well under
// special values of myThing.
if (myThing.iVal < 50) {
case type2:
// at this point we have type2 case stuff that we do. Code above is skipped
// and the switch () will jump straight to here if classify() is type2.
//
// Also stuff we do if type1 and myThing.iVal < 50
// in other words this code is execute if classify(&myThing) is type2 or
// if classify(&myThink) is type1 and there is a special processing for myThing.iVal < 50
break; // if classify() type2 or if classify() type1 and myThing.ival < 50
}
// do stuff if only type1 and myThing.iVal >= 50
break;
case type2 | type3:
// do stuff if type2 and type3 matched but none of the others.
break;
default:
// any other case
break;
}
return 0;
}
Switch statements in c can only operate on a constant expression, the case statements cannot include dynamic comparisons.
Example of something which is, and is not, a "Constant Expression" in C?
For something this simple an if/else structure could be clearer and simpler, depending on the compiler your case statement may be translated into a series of branching comparison statements anyways.

Is there a way to easily check if a string is filled with either spaces / tabs / EOL and nothing else? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 7 years ago.
Improve this question
I am creating a program where I read lines 1 by 1 from a file. I wanted to implement a check where after the line is read and stored in a string it is checked to see if any one of its characters are not a space/tab/EOL. So essentially what I am trying to do is skip over the line and not store it if the entire line doesn't have any content other then spaces/tabs/EOL's.
Use strpbrk to search characters in a string. Returns a pointer to the position of the first occurrence or NULL if none found.
http://www.tutorialspoint.com/c_standard_library/c_function_strpbrk.htm
Let's call your string buffer and let's go through it one character at a time.
char *tmp = buffer; // give tmp the address of the buffer's first character.
// Assuming you have bool defined.
// If you don't, just switch this with whatever you prefer to use for bools.
bool bHasValidChar = false;
while(*tmp){
if(*tmp != ' ' && *tmp != '\t' && *tmp != '\n'){
// Cool, this character is not whitespace
bHasValidChar = true;
break;
}else{
// The character is whitespace
// Move on and test the next character
tmp++;
}
}
if(bHasValidChar){
// Contains non-whitespace. Do something.
}else{
// Str is only whitespace. Do something.
}
If you don't have bool (you may not have it if you're using an old version of C), then use an int instead of bool (with 1 & 0 in place of true and false), or whatever you prefer.
Hope this helps.
Note: This was typed off of my phone, so it has not been tested. I apologize for any errors in my post & code, but hopefully this will get you started down the right direction.
3 ways: see if any non-white-space exist
char nws[2];
if (sscanf(buf, "%1s", nws) == 1) foo(buf);
else skip(); // all white-space
// or if code wants to use specific ws characters
if (buf[strspn(buf, " \t\r\n")] != '\0') skip();
int Interesting(const char *s) {
// Take advantage that the null character is not a white-space
while (isspace((unsigned char) *s)) {
s++;
}
return *s;
}
Basically, you need to loop over the string until the end or until you find a character that is not one of the "allowed characters":
char only_whitespace(char const * str /* NULL terminated */) {
for (; *str; ++str) {
switch (*str) {
case 32: /* space */
case 9: /* TAB */
case 10: /* line feed */
case 13: /* carriage return, windows... */
break;
default: return 0;
}
}
return 1;
}
If you want to be the cause of some WTFs in the future (by the one maintaining your code, this could also be your future self), add this for extra speed:
char only_whitespace(char const * str /* NULL terminated */) {
for (; *str; ++str) {
if ((*str >> 6) || (*str & 16)) {
return 0;
}
switch (*str) {
case 32: /* space */
case 9: /* TAB */
case 10: /* line feed */
case 13: /* carriage return, windows... */
break;
default: return 0;
}
}
return 1;
}
Reason:
0b00100000 = 32
0b00001001 = 9
0b00001010 = 10
0b00001101 = 13
0b00010000 = 16
Wait a second...
I am creating a program where I read lines 1 by 1 from a file. [..]
If you're reading line by line, then there won't be any "end of line" character(s) in your line. That's the point of reading line by line. So you only need to check for space or tab characters.
With this, we can also go crazy:
0b00100000 = 32 (space)
0b00001001 = 9 (tab)
---------- bitwise or
0b00101001
---------- bitwise not
0b11010110 = 0xD6 (mask of bits that must not be set)
Now, assuming a 64-bit architecture, we can replicate that bit mask 8 times and use it to do a quick "pre scan" of the string, speeding up the check if we expect a high rate of strings that do not only consist of whitespace characters. Though we should know the length before, otherwise obtaining the length will probably ruin any performance gains:
char only_whitespace_with_prescan64(char const * const str /* one line */, size_t length) {
uint64_t const * cursor = (uint64_t const *) str;
uint32_t mask32 = 0xD6 | (0xD6 << 8) | (0xD6 << 16) | (0xD6 << 24);
uint64_t mask64 = ((uint64_t) mask32) << 32 | mask32;
for (; length != 0; --length, ++cursor) {
if (*cursor & mask64) {
return 0;
}
}
return only_whitespace(str);
}
Though whether this really brings any (noticeable) performance gain depends both on the frequency of its use and of course on the type of data you expect. Using stuff like that should only be done when you noticed while profiling your program that the function is actually a performance bottleneck.
NOTE: All of the above code is entirely untested.

How to indicate an error because all numbers are valid return values?

Compiler: GCC 4.4.5 (Code::Blocks)
Platform: Linux Kernel Version 2.6.32-5-686 (Debian)
I am currently writing a function that converts character strings into double longs. I am aware that there is already a function that does this. I am simply writing this function for practice but I currently don't know what to do.
My function worked correctly when it could only handle positive double longs because I could return -1 if their was an invalid character in the string. But I want the function to be able to handle negative double longs as well as positive. I don't know what I am supposed to do in this situation because now all real numbers are valid return values. I thought about continuing the conversion even if invalid characters are present by either converting the invalid characters into their decimal values or simply ignoring the invalid characters and picking out only valid characters 0-9 (decimal 48-57). I also thought about returning a pointer to a double long and using a NULL address to indicate that an invalid character was found or maybe I could just set errno like the function readdir(). I am not sure how I would go about setting errno or if that is even allowed. So my question overall is what would you guys recommend I do in this situation? Also please note that I haven't included handling for negative double longs yet and the function will simply ignore invalid characters: for example $&3%7AJ89 will be converted to 3789.
double long cstrtodl(const char *cstr)
{
double long power;
double long dl = 0;
int decimal_place;
int bool_decimal = 0;
for(decimal_place = 1; cstr[decimal_place] != '\0'; decimal_place++)
{
if(cstr[decimal_place] == '.')
{
bool_decimal = decimal_place;
break;
}
}
for(decimal_place--, power = 1; decimal_place >= 0; decimal_place--, power *= 10)
{
printf("[%i] = %i(%c)\nPOWER = %LF\nINTEGER = %LF\n", decimal_place, (int)cstr[decimal_place], cstr[decimal_place], power, dl);
switch(cstr[decimal_place])
{
case 48:
dl += 0 * power;
break;
case 49:
dl += 1 * power;
break;
case 50:
dl += 2 * power;
break;
case 51:
dl += 3 * power;
break;
case 52:
dl += 4 * power;
break;
case 53:
dl += 5 * power;
break;
case 54:
dl += 6 * power;
break;
case 55:
dl += 7 * power;
break;
case 56:
dl += 8 * power;
break;
case 57:
dl += 9 * power;
break;
default:
power /= 10;
break;
}
}
if(bool_decimal > 0)
{
for(decimal_place = bool_decimal+1, power = 10; cstr[decimal_place] != '\0'; decimal_place++, power *= 10)
{
printf("[%i] = %i(%c)\nPOWER = %LF\nINTEGER = %LF\n", decimal_place, (int)cstr[decimal_place], cstr[decimal_place], power, dl);
switch(cstr[decimal_place])
{
case 48:
dl += 0 / power;
break;
case 49:
dl += 1 / power;
break;
case 50:
dl += 2 / power;
break;
case 51:
dl += 3 / power;
break;
case 52:
dl += 4 / power;
break;
case 53:
dl += 5 / power;
break;
case 54:
dl += 6 / power;
break;
case 55:
dl += 7 / power;
break;
case 56:
dl += 8 / power;
break;
case 57:
dl += 9 / power;
break;
default:
power /= 10;
break;
}
}
}
return dl;
}
Returning a pointer is complicated and inefficient, because you'd have to malloc a buffer and then remember to free it. That's a lot of overhead for returning a simple, fixed-size value. Instead, you can return a status code and write the result to a pointer:
// returns zero on success, -1 on error
int cstrtodl(const char *cstr, long double *result);
For some purposes, it might also be useful to know how much of the string was consumed, if you don't necessarily read all of it. In that case, you can return a size_t or ssize_t, and either 0 (no input consumed) or -1 on error. The caller can then check whether anything unexpected follows the number in the input string.
Setting errno is perfectly allowed in C.
You have several options:
Using NAN as an error value. Keep in mind that you cannot check for NAN with ==. You must use isnan(x) or x!=x to check whether x is a NAN.
Adding an additional int *errorp argument to store a flag (and possibly an error code) for whether an error occurred. This option has two sub-options: you may want to write a 0 on success, or you may want to leave the previous contents alone on success so that the caller can make multiple calls and only check at the very end whether any of them failed.
Switching things around: Passing a pointer to where to store the result, and using the return value for an error code. This approach encourages checking for errors after every call, but makes it difficult to use the result directly in expressions, which can be annoying.
Reporting the error through special thread-local state: either errno, the floating point exception flags (warning: some machines don't support fenv/exceptions!), or your own thread-local object. Depending on your perspective this may be wrong/ugly since it's a hidden information channel, but it can also be the most convenient for the caller.
Reporting the error through global state. Please, never do this. It precludes multi-threaded and clean library use of your code.
errno can be used for this. You need to #include <errno.h> in order to use it. You set errno to some predefined value, and the caller then checks it:
errno = 0;
cstrtodl(some_string);
if (errno != 0) {
// Error occured.
}
Note that errno is thread-safe. If you write to it in one thread, the value isn't changed in other threads. So it's not just some global variable. It can even be not a variable at all, but internal compiler magic. But the point is, you can just treat it as if it were a thread-local variable.
Another way is to pass the error in an extra argument:
double long cstrtodl(const char *cstr, int *error)
{
// ...
if (error != NULL) {
if (some_error_occured) {
*error = SOME_CONSTANT_OR_MACRO;
} else {
*error = 0;
}
}
}
The caller can then do:
int error;
cstrtodl(some_string, &error);
if (*error) {
// Error occured.
}
Or, if the caller isn't interested:
cstrtodl(some_string, NULL);
I would suggest either returning a pointer to the resulting double or NULL on error (as you suggested) or, as strtoul(), adding a pointer parameter which you point at the first unconverted character.
As for errno, it's just another global variable (just happens to be declared within libc) and it is just fine to assign your own value to it.

Resources