C libpcap resolve DLT entries, some nasty bug - c

while sort of writing my own sniffer, I found one example that only starts if it is talking ethernet. Other DLT_types have been ignored. They can be found in pcap-bpf.h I wrote some
lines, that try to implement a missing pcap_resolve_dlt(). It's really nasty code(1), seems to work, though I hit a nasty bug, where one needs to give a space to the corresponding number like:
user#debian:~/tmp$ ./resolve_dlt 114
DLT_LTALK 114
user#debian:~/tmp$ ./resolve_dlt 14
DLT_ATM_RFC1483 11
user#debian:~/tmp$ ./resolve_dlt " 14"
DLT_RAW 14
Maybe the approach itself is totally wrong and one should grep the pcap-bpf.h directly.
1) http://nopaste.info/4a2470cc83.html, uses strstr()
Kind Regards,
Charles
Tags: C libpcap DLT_

You are doing strstr(dlt[i],argv[1]) so the first "14" matches the text in "DLT_ATM_RFC1483", however the text " 14" matches the text in "DLT_RAW 14".

You could use the token-pasting operator to make this work a little better:
#include <stdlib.h>
#include <stdio.h>
#include <pcap-bpf.h>
#define TAB_ENTRY(x) { x, #x }
struct {
long dlt_code;
const char *dlt_name;
} dlt_tab[] = {
TAB_ENTRY(DLT_NULL),
TAB_ENTRY(DLT_EN10MB),
TAB_ENTRY(DLT_EN3MB),
TAB_ENTRY(DLT_AX25),
TAB_ENTRY(DLT_PRONET),
TAB_ENTRY(DLT_CHAOS),
TAB_ENTRY(DLT_IEEE802),
TAB_ENTRY(DLT_ARCNET),
TAB_ENTRY(DLT_SLIP),
TAB_ENTRY(DLT_PPP),
TAB_ENTRY(DLT_FDDI),
TAB_ENTRY(DLT_ATM_RFC1483),
TAB_ENTRY(DLT_RAW),
TAB_ENTRY(DLT_RAW),
TAB_ENTRY(DLT_SLIP_BSDOS),
TAB_ENTRY(DLT_PPP_BSDOS),
TAB_ENTRY(DLT_SLIP_BSDOS),
TAB_ENTRY(DLT_PPP_BSDOS),
TAB_ENTRY(DLT_ATM_CLIP),
TAB_ENTRY(DLT_REDBACK_SMARTEDGE),
TAB_ENTRY(DLT_PPP_SERIAL),
TAB_ENTRY(DLT_PPP_ETHER),
TAB_ENTRY(DLT_SYMANTEC_FIREWALL),
TAB_ENTRY(DLT_C_HDLC),
TAB_ENTRY(DLT_C_HDLC),
TAB_ENTRY(DLT_IEEE802_11),
TAB_ENTRY(DLT_FRELAY),
TAB_ENTRY(DLT_LOOP),
TAB_ENTRY(DLT_LOOP),
TAB_ENTRY(DLT_ENC),
TAB_ENTRY(DLT_ENC),
TAB_ENTRY(DLT_LINUX_SLL),
TAB_ENTRY(DLT_LTALK),
TAB_ENTRY(DLT_ECONET),
TAB_ENTRY(DLT_IPFILTER),
TAB_ENTRY(DLT_PFLOG),
TAB_ENTRY(DLT_CISCO_IOS),
TAB_ENTRY(DLT_PRISM_HEADER),
TAB_ENTRY(DLT_AIRONET_HEADER),
TAB_ENTRY(DLT_HHDLC),
TAB_ENTRY(DLT_IP_OVER_FC),
TAB_ENTRY(DLT_SUNATM),
TAB_ENTRY(DLT_RIO),
TAB_ENTRY(DLT_PCI_EXP),
TAB_ENTRY(DLT_AURORA),
TAB_ENTRY(DLT_IEEE802_11_RADIO),
TAB_ENTRY(DLT_TZSP),
TAB_ENTRY(DLT_ARCNET_LINUX),
TAB_ENTRY(DLT_JUNIPER_MLPPP),
TAB_ENTRY(DLT_JUNIPER_MLFR),
TAB_ENTRY(DLT_JUNIPER_ES),
TAB_ENTRY(DLT_JUNIPER_GGSN),
TAB_ENTRY(DLT_JUNIPER_MFR),
TAB_ENTRY(DLT_JUNIPER_ATM2),
TAB_ENTRY(DLT_JUNIPER_SERVICES),
TAB_ENTRY(DLT_JUNIPER_ATM1),
TAB_ENTRY(DLT_APPLE_IP_OVER_IEEE1394),
TAB_ENTRY(DLT_MTP2_WITH_PHDR),
TAB_ENTRY(DLT_MTP2),
TAB_ENTRY(DLT_MTP3),
TAB_ENTRY(DLT_SCCP),
TAB_ENTRY(DLT_DOCSIS),
TAB_ENTRY(DLT_LINUX_IRDA),
TAB_ENTRY(DLT_IBM_SP),
TAB_ENTRY(DLT_IBM_SN),
TAB_ENTRY(DLT_USER0),
TAB_ENTRY(DLT_USER1),
TAB_ENTRY(DLT_USER2),
TAB_ENTRY(DLT_USER3),
TAB_ENTRY(DLT_USER4),
TAB_ENTRY(DLT_USER5),
TAB_ENTRY(DLT_USER6),
TAB_ENTRY(DLT_USER7),
TAB_ENTRY(DLT_USER8),
TAB_ENTRY(DLT_USER9),
TAB_ENTRY(DLT_USER10),
TAB_ENTRY(DLT_USER11),
TAB_ENTRY(DLT_USER12),
TAB_ENTRY(DLT_USER13),
TAB_ENTRY(DLT_USER14),
TAB_ENTRY(DLT_USER15),
TAB_ENTRY(DLT_IEEE802_11_RADIO_AVS),
TAB_ENTRY(DLT_JUNIPER_MONITOR),
TAB_ENTRY(DLT_BACNET_MS_TP),
TAB_ENTRY(DLT_PPP_PPPD),
TAB_ENTRY(DLT_PPP_PPPD),
TAB_ENTRY(DLT_PPP_PPPD),
TAB_ENTRY(DLT_JUNIPER_PPPOE),
TAB_ENTRY(DLT_JUNIPER_PPPOE_ATM),
TAB_ENTRY(DLT_GPRS_LLC),
TAB_ENTRY(DLT_GPF_T),
TAB_ENTRY(DLT_GPF_F),
TAB_ENTRY(DLT_GCOM_T1E1),
TAB_ENTRY(DLT_GCOM_SERIAL),
TAB_ENTRY(DLT_JUNIPER_PIC_PEER),
TAB_ENTRY(DLT_ERF_ETH),
TAB_ENTRY(DLT_ERF_POS),
TAB_ENTRY(DLT_LINUX_LAPD),
TAB_ENTRY(DLT_JUNIPER_ETHER),
TAB_ENTRY(DLT_JUNIPER_PPP),
TAB_ENTRY(DLT_JUNIPER_FRELAY),
TAB_ENTRY(DLT_JUNIPER_CHDLC),
TAB_ENTRY(DLT_MFR),
TAB_ENTRY(DLT_JUNIPER_VP),
TAB_ENTRY(DLT_A429),
TAB_ENTRY(DLT_A653_ICM),
TAB_ENTRY(DLT_USB),
TAB_ENTRY(DLT_BLUETOOTH_HCI_H4),
TAB_ENTRY(DLT_IEEE802_16_MAC_CPS),
TAB_ENTRY(DLT_USB_LINUX),
TAB_ENTRY(DLT_CAN20B),
TAB_ENTRY(DLT_IEEE802_15_4_LINUX),
TAB_ENTRY(DLT_PPI),
TAB_ENTRY(DLT_IEEE802_16_MAC_CPS_RADIO),
TAB_ENTRY(DLT_JUNIPER_ISM),
TAB_ENTRY(DLT_IEEE802_15_4),
TAB_ENTRY(DLT_SITA),
TAB_ENTRY(DLT_ERF),
TAB_ENTRY(DLT_RAIF1),
TAB_ENTRY(DLT_IPMB),
TAB_ENTRY(DLT_JUNIPER_ST),
TAB_ENTRY(DLT_BLUETOOTH_HCI_H4_WITH_PHDR)
};
int main(int argc, char *argv[])
{
char *endptr = NULL;
long code;
int i, found;
if (argc > 1)
code = strtol(argv[1], &endptr, 0);
if (!endptr || endptr == argv[1]) {
fprintf(stderr, "Usage: %s <dlt_code>\n", argv[0]);
exit(2);
}
found = 0;
for (i = 0; i < (sizeof dlt_tab / sizeof dlt_tab[0]); i++) {
if (dlt_tab[i].dlt_code == code) {
found = 1;
break;
}
}
if (!found) {
printf("%ld not found\n", code);
exit(1);
}
printf("%ld is %s\n", code, dlt_tab[i].dlt_name);
return 0;
}
Example:
$ ./bpf 12
12 is DLT_RAW
$ ./bpf 120
120 is DLT_AIRONET_HEADER
(Note that 12 is DLT_RAW on Linux systems, not 14).

At least with newer versions of libpcap/WinPcap, you can use pcap_datalink_val_to_name to map a DLT_ value to the DLT_ name. resolve_dlt could just use strtol() on its first argument and pass the result to pcap_datalink_val_to_name (after, of course, checking for errors, and for values that don't fit in an int).

Related

C regex extraction

Please consider this C code:
#include <stdio.h>
#include <regex.h>
#include <string.h>
int main(){
char * our_string = "/var/www/html/cameras/cam7/2020-01/15/cam7-2020-01-15-17-45-20-1037-03.h264";
regex_t re;
//int regex_int = regcomp(&re, "cam[:digit:]", 0);
int regex_int = regcomp(&re, "cam", 0);
if (regex_int) {
fprintf(stderr, "regex failed to compile!");
return 1;
}
regmatch_t rm[2];
if ((regexec(&re, our_string, 2, rm,0)) ){
fprintf(stderr, "regex failed to exec!");
return 1;
}
char temp[8192] = {0};
memcpy(temp, our_string + rm[1].rm_so, rm[1].rm_eo - rm[1].rm_so);
printf("We got: %s\n", temp);
puts("Bye!");
return 0;
}
I am trying to extract camX out of our_string, and need help. In its current form, above code is turning blank:
$ ./a.out
We got:
Bye!
C regex is not my forte, Please help!
You have a couple of problems:
//int regex_int = regcomp(&re, "cam[:digit:]", 0)
If you want to match cam followed by a digit, you need (Besides uncommenting this line, of course, and commenting out the one beneath it), to put [:digit:] inside a bracket expression:
int regex_int = regcomp(&re, "cam[[:digit:]]", 0)
The second issue:
memcpy(temp, our_string + rm[1].rm_so, rm[1].rm_eo - rm[1].rm_so);
Neither of your regular expressions have any groups; the second element of the rm array is not going to have anything useful in it. You need to use the first element, which has the offsets of the complete match:
memcpy(temp, our_string + rm[0].rm_so, rm[0].rm_eo - rm[0].rm_so);
You also have a memory leak because you don't have a regfree(&re); to free up memory allocated for the regular expression. Not a big deal in a simple demo program like this, but in something bigger or longer running or that does the matching in a loop, it'll become an issue.

How to compare my string, which is stored in an array, to function names from a complete library in c

After I enter a string in c and store it in for example char s[100], how can I compare that string to all function names in a math.h? For example, I enter pow and the result will look like this in stored form.
s[0]='p'
s[1]='o'
s[2]='w'
s[3]='\0'
Since my string is the equivalent of pow(), I want my program to recognise that and then call pow() during execution of my program. I know it is not that hard to do string comparison within the code, but that would mean that I would have to do string comparison for every function name in the library. I don't want to do that. How is it possible to compare my string against all names in the library without hard coding every comparison?
Thank you :)
You can't, not without doing work yourself. There are no names of functions present at runtime in general, and certainly not of functions you haven't called.
C is not a dynamic language, names are only used when compiling/linking.
Regular expressions in C
Try parsing the header files using FILE and use aforementioned link as a guide to check whether the function exists or not.
I tried to make a little sample about what I assume the questioner is looking for (eval.c):
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <assert.h>
/* mapping function names to function pointers and number of parameters */
struct Entry {
const char *name; /* function name */
double (*pFunc)(); /* function pointer */
int nArgs; /* number of arguments */
} table[] = {
#define REGISTER(FUNC, N_ARGS) { #FUNC, &FUNC, N_ARGS }
REGISTER(atan2, 2),
REGISTER(pow, 2),
REGISTER(modf, 2),
REGISTER(sin, 1),
REGISTER(cos, 1)
#undef REGISTER
};
/* let compiler count the number of entries */
enum { sizeTable = sizeof table / sizeof *table };
void printUsage(const char *argv0)
{
int i;
printf(
"Usage:\n"
" %s FUNC\n"
" where FUNC must be one of:\n", argv0);
for (i = 0; i < sizeTable; ++i) printf(" - %s\n", table[i].name);
}
int main(int argc, char **argv)
{
int i;
char *func;
struct Entry *pEntry;
/* read command line argument */
if (argc <= 1) {
fprintf(stderr, "ERROR: Missing function argument!\n");
printUsage(argv[0]);
return -1;
}
func = argv[1];
/* find function by name */
for (i = 0; i < sizeTable && strcmp(func, table[i].name) != 0; ++i);
if (i >= sizeTable) {
fprintf(stderr, "ERROR! Unknown function '%s'!\n", func);
printUsage(argv[0]);
return -1;
}
/* perform found function on all (standard) input */
pEntry = table + i;
for (;;) { /* endless loop (bail out at EOF or error) */
switch (pEntry->nArgs) {
case 1: {
double arg1, result;
/* get one argument */
if (scanf("%lf", &arg1) != 1) {
int error;
if (error = !feof(stdin)) fprintf(stderr, "Input ERROR!\n");
return error; /* bail out at EOF or error */
}
/* compute */
result = (*pEntry->pFunc)(arg1);
/* output */
printf("%s(%f): %f\n", pEntry->name, arg1, result);
} break;
case 2: {
double arg1, arg2, result;
/* get two arguments */
if (scanf("%lf %lf", &arg1, &arg2) != 2) {
int error;
if (error = !feof(stdin)) fprintf(stderr, "Input ERROR!\n");
return error; /* bail out at EOF or error */
}
/* compute */
result = (*pEntry->pFunc)(arg1, arg2);
/* output */
printf("%s(%f, %f): %f\n", pEntry->name, arg1, arg2, result);
} break;
default: /* should never happen */
fprintf(stderr,
"ERROR! Functions with %d arguments not yet implemented!\n",
pEntry->nArgs);
assert(0);
return -1; /* bail out at error */
}
}
}
I compiled and tested this with gcc in cygwin on Windows (64 bit):
$ gcc -std=c11 -o eval eval.c
$ ./eval
ERROR: Missing function argument!
Usage:
./eval FUNC
where FUNC must be one of:
- atan2
- pow
- modf
- sin
- cos
$ echo "1 2 3 4 5 6 7 8 9 10" | ./eval pow
pow(1.000000, 2.000000): 1.000000
pow(3.000000, 4.000000): 81.000000
pow(5.000000, 6.000000): 15625.000000
pow(7.000000, 8.000000): 5764801.000000
pow(9.000000, 10.000000): 3486784401.000000
$ echo "1 2 3 4 5 6 7 8 9 10" | ./eval sin
sin(1.000000): 0.841471
sin(2.000000): 0.909297
sin(3.000000): 0.141120
sin(4.000000): -0.756802
sin(5.000000): -0.958924
sin(6.000000): -0.279415
sin(7.000000): 0.656987
sin(8.000000): 0.989358
sin(9.000000): 0.412118
sin(10.000000): -0.544021
The usage of this application: the name of the function to apply is provided as command line argument. The values (to apply function to) are provided via standard input. In the sample session, I used echo and a pipe (|) to redirect the output of echo to the input of eval. (If eval is called stand-alone the numbers may be typed in by keyboard.)
Notes:
The table does the actual mapping of strings to function pointers. To solve that issue about the number of parameters, I considered this in struct Entry also.
The REGISTER macro is a trick to use the identifier as string constant also. The #FUNC is a stringize macro-operation (a typical C trick to prevent errors due to typos).
The sizeTable is another trick to prevent redundant definitions. I let the compiler count the number of entries. Thus, new entries may be added and it still will work without any other editing.
The actual trick is to provide a function pointer where the arguments are "left out". When it is called, the correct number of arguments is used and it works. (assuming, of course, the table initialization has been implemented carefully.) However, it would be a pain to do this in C++ because the functions with distinct number of arguments would need an appropriate function pointer with matching signature - horrible casts would be necessary. (Try to compile this with g++ -std=c++11 -c eval.c to see what I mean.)
For a productive solution, I would sort the entries by names (lexicographically) and apply a binary search (or even use hashing to be faster and more sophisticated). For this sample, I wanted to keep it simple.
math.h provides a lot of functions in "float flavor" also. These may not be added to this sample without additional effort. To support other than double arguments
some type info had to been added to the table entries
the type info has to be considered somehow in the switch statement of evaluation.
...not to mention functions where argument types are distinct to each other (or return type). (I cannot remember whether math.h even provides such functions.)
Btw. this will work for non-math.h functions also...

How to extract filename from path

There should be something elegant in Linux API/POSIX to extract base file name from full path
See char *basename(char *path).
Or run the command "man 3 basename" on your target UNIX/POSIX system.
Use basename (which has odd corner case semantics) or do it yourself by calling strrchr(pathname, '/') and treating the whole string as a basename if it does not contain a '/' character.
Here's an example of a one-liner (given char * whoami) which illustrates the basic algorithm:
(whoami = strrchr(argv[0], '/')) ? ++whoami : (whoami = argv[0]);
an additional check is needed if NULL is a possibility. Also note that this just points into the original string -- a "strdup()" may be appropriate.
You could use strstr in case you are interested in the directory names too:
char *path ="ab/cde/fg.out";
char *ssc;
int l = 0;
ssc = strstr(path, "/");
do{
l = strlen(ssc) + 1;
path = &path[strlen(path)-l+2];
ssc = strstr(path, "/");
}while(ssc);
printf("%s\n", path);
The basename() function returns the last component of a path, which could be a folder name and not a file name. There are two versions of the basename() function: the GNU version and the POSIX version.
The GNU version can be found in string.h after you include #define _GNU_SOURCE:
#define _GNU_SOURCE
#include <string.h>
The GNU version uses const and does not modify the argument.
char * basename (const char *path)
This function is overridden by the XPG (POSIX) version if libgen.h is included.
char * basename (char *path)
This function may modify the argument by removing trailing '/' bytes. The result may be different from the GNU version in this case:
basename("foo/bar/")
will return the string "bar" if you use the XPG version and an empty string if you use the GNU version.
References:
basename (3) - Linux Man Pages
Function: char * basename (const char *filename), Finding Tokens in a String.
Of course if this is a Gnu/Linux only question then you could use the library functions.
https://linux.die.net/man/3/basename
And though some may disapprove these POSIX compliant Gnu Library functions do not use const. As library utility functions rarely do. If that is important to you I guess you will have to stick to your own functionality or maybe the following will be more to your taste?
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
char *fn;
char *input;
if (argc > 1)
input = argv[1];
else
input = argv[0];
/* handle trailing '/' e.g.
input == "/home/me/myprogram/" */
if (input[(strlen(input) - 1)] == '/')
input[(strlen(input) - 1)] = '\0';
(fn = strrchr(input, '/')) ? ++fn : (fn = input);
printf("%s\n", fn);
return 0;
}
template<typename charType>
charType* getFileNameFromPath( charType* path )
{
if( path == NULL )
return NULL;
charType * pFileName = path;
for( charType * pCur = path; *pCur != '\0'; pCur++)
{
if( *pCur == '/' || *pCur == '\\' )
pFileName = pCur+1;
}
return pFileName;
}
call:
wchar_t * fileName = getFileNameFromPath < wchar_t > ( filePath );
(this is a c++)
You can escape slashes to backslash and use this code:
#include <stdio.h>
#include <string.h>
int main(void)
{
char path[] = "C:\\etc\\passwd.c"; //string with escaped slashes
char temp[256]; //result here
char *ch; //define this
ch = strtok(path, "\\"); //first split
while (ch != NULL) {
strcpy(temp, ch);//copy result
printf("%s\n", ch);
ch = strtok(NULL, "\\");//next split
}
printf("last filename: %s", temp);//result filename
return 0;
}
I used a simpler way to get just the filename or last part in a path.
char * extract_file_name(char *path)
{
int len = strlen(path);
int flag=0;
printf("\nlength of %s : %d",path, len);
for(int i=len-1; i>0; i--)
{
if(path[i]=='\\' || path[i]=='//' || path[i]=='/' )
{
flag=1;
path = path+i+1;
break;
}
}
return path;
}
Input path = "C:/Users/me/Documents/somefile.txt"
Output = "somefile.txt"
#Nikolay Khilyuk offers the best solution except.
1) Go back to using char *, there is absolutely no good reason for using const.
2) This code is not portable and is likely to fail on none POSIX systems where the / is not the file system delimiter depending on the compiler implementation. For some windows compilers you might want to test for '\' instead of '/'. You might even test for the system and set the delimiter based on the results.
The function name is long but descriptive, no problem there. There is no way to ever be sure that a function will return a filename, you can only be sure that it can if the function is coded correctly, which you achieved. Though if someone uses it on a string that is not a path obviously it will fail. I would have probably named it basename, as it would convey to many programmers what its purpose was. That is just my preference though based on my bias your name is fine. As far as the length of the string this function will handle and why anyone thought that would be a point? You will unlikely deal with a path name longer than what this function can handle on an ANSI C compiler. As size_t is defined as a unsigned long int which has a range of 0 to 4,294,967,295.
I proofed your function with the following.
#include <stdio.h>
#include <string.h>
char* getFileNameFromPath(char* path);
int main(int argc, char *argv[])
{
char *fn;
fn = getFileNameFromPath(argv[0]);
printf("%s\n", fn);
return 0;
}
char* getFileNameFromPath(char* path)
{
for(size_t i = strlen(path) - 1; i; i--)
{
if (path[i] == '/')
{
return &path[i+1];
}
}
return path;
}
Worked great, though Daniel Kamil Kozar did find a 1 off error that I corrected above. The error would only show with a malformed absolute path but still the function should be able to handle bogus input. Do not listen to everyone that critiques you. Some people just like to have an opinion, even when it is not worth anything.
I do not like the strstr() solution as it will fail if filename is the same as a directory name in the path and yes that can and does happen especially on a POSIX system where executable files often do not have an extension, at least the first time which will mean you have to do multiple tests and searching the delimiter with strstr() is even more cumbersome as there is no way of knowing how many delimiters there might be. If you are wondering why a person would want the basename of an executable think busybox, egrep, fgrep etc...
strrchar() would be cumbersome to implement as it searches for characters not strings so I do not find it nearly as viable or succinct as this solution. I stand corrected by Rad Lexus this would not be as cumbersome as I thought as strrchar() has the side effect of returning the index of the string beyond the character found.
Take Care
My example (improved):
#include <string.h>
const char* getFileNameFromPath(const char* path, char separator = '/')
{
if(path != nullptr)
{
for(size_t i = strlen(path); i > 0; --i)
{
if (path[i-1] == separator)
{
return &path[i];
}
}
}
return path;
}

Getting file extension in C language

Say there is a file called 12345.jpg. In C, how can I get the file extension so that I can compare with some file extension? If there are any inbuilt functions, kindly please let me know.
A function to do that, along with a test harness:
#include <stdio.h>
#include <string.h>
const char *getExt (const char *fspec) {
char *e = strrchr (fspec, '.');
if (e == NULL)
e = ""; // fast method, could also use &(fspec[strlen(fspec)]).
return e;
}
int main (int argc, char *argv[]) {
int i;
for (i = 1; i < argc; i++) {
printf ("[%s] - > [%s]\n", argv[i], getExt (argv[i]));
}
return 0;
}
Running this with:
./program abc abc. abc.1 .xyz abc.def abc.def.ghi
gives you:
[abc] - > []
[abc.] - > [.]
[abc.1] - > [.1]
[.xyz] - > [.xyz]
[abc.def] - > [.def]
[abc.def.ghi] - > [.ghi]
Probably:
#include <string.h>
char *extn = strrchr(filename, '.');
That will give you a pointer to the period of the extension, or a null pointer if there is no extension. You might need to do some more due diligence to ensure that there isn't a slash after the dot, amongst other things.
There's a portable CRT solution: _splitpath.
In windows there's also an undocumented shell32 API called PathGetExtension, but that's evil in so many ways that I probably shouldn't have noted that.
Use strchr
First array member will give you filename
Second array member will give you extension

Smart variadic expansion based on format string

I have a daemon that reads a configuration file in order to know where to write something. In the configuration file, a line like this exists:
output = /tmp/foo/%d/%s/output
Or, it may look like this:
output = /tmp/foo/%s/output/%d
... or simply like this:
output = /tmp/foo/%s/output
... or finally:
output = /tmp/output
I have that line as cfg->pathfmt within my program. What I am trying to do now is to come up with some clever way of using it.
A little more explanation, the path can contain up to two components to be formatted. %d will be expanded as a job ID (int), %s as a job name (string). The user may want to use one, both or none in the configuration file. I need to know what they want and in what order before I finally pass it to snprintf(). I can kind of narrow it down, but I keep wanting to talk to strtok() and that seems ugly.
I want to give users this kind of flexibility, however I'm getting lost looking for a sensible, portable way to implement it. I'm also at a complete and total loss for how to begin searching for this.
I'd be very happy if:
Someone could help me narrow down the search phrase to find good examples
Someone could post a link to some OSS project implementing this
Someone could post some psuedo code
I don't want the code written for me, I'm just really stuck on what (I think) should be something very simple and need some help taking the first bite. I really feel like I'm over thinking and overlooking the obvious.
The end result should be a boolean function like this:
bool output_sugar(const char *fmt, int jobid, const char *jobname, struct job *j);
It would then call snprintf() (sensibly) on j->outpath, returning false if some kind of garbage (i.e. % followed by something not s, d or %) is in the config line (or its null). The sanity checks are easy, I'm just having a bit of a time getting the number (and order) of arguments to format correct.
Thanks in advance. Also, feel free to edit this title if you have the reputation to do so, as I said, I'm not quite sure how to ask the question in a single line. I think what I need is a parser, but it feels awkward using a full blown lexer / parser to handle one simple string.
Yes, you need a parser of some sort. It need not be complex, though:
void format_filename(const char *fmt, int jobid, const char *jobname,
char *buffer, size_t buflen)
{
char *end = buffer + buflen - 1;
const char *src = fmt;
char *dst = buffer;
char c;
assert(buffer != 0 && fmt != 0 && buflen != 0 && jobname != 0);
while ((c = *src++) != '\0')
{
if (dst >= end)
err_exit("buffer overflow in %s(): format = %s\n",
__func__, fmt);
else if (c != '%')
*dst++ = c;
else if ((c = *src++) == '\0' || c == '%')
{
*dst++ = '%';
if (c == '\0')
break;
}
else if (c == 's')
{
size_t len = strlen(jobname);
if (len > end - dst)
err_exit("buffer overflow on jobname in %s(): format = %s\n",
__func__, fmt);
else
{
strcpy(dst, jobname);
dst += len;
}
}
else if (c == 'd')
{
int nchars = snprintf(dst, end - dst, "%d", jobid);
if (nchars < 0 || nchars >= end - dst)
err_exit("format error on jobid in %s(); format = %s\n",
__func__, fmt);
dst += nchars;
}
else
err_exit("invalid format character %d in %s(): format = %s\n",
c, __func__, fmt);
}
*dst = '\0';
}
Now tested code. Note that it supports the '%%' notation to allow the user to embed a single '%' in the output. Also, it treats a single '%' at the end of the string as valid and equivalent to '%%'. It calls err_exit() on error; you can choose alternative error strategies as suits your system. I simply assume you have included <assert.h>, <stdio.h> and <string.h> and the header for the err_exit() (variadic) function.
Test code...
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
static void err_exit(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(1);
}
... then format_filename() as above, then ...
#define DIM(x) (sizeof(x)/sizeof(*(x)))
static const char *format[] =
{
"/tmp/%d/name/%s",
"/tmp/%s/number/%d",
"/tmp/%s.%d%%",
"/tmp/%",
};
int main(void)
{
char buffer[64];
size_t i;
for (i = 0; i < DIM(format); i++)
{
format_filename(format[i], 1234, "job-name", buffer, sizeof(buffer));
printf("fmt = %-20s; name = %s\n", format[i], buffer);
}
return(0);
}
Using strtok is a error prone. You can treat your variables as a mini language using (fl)lex and yacc. There is simple tutorial here
%{
#include <stdio.h>
%}
%%
%d printf("%04d",jobid);
%s printf("%s",stripspaces(dirname));
%%
I made an ODBC wrapper that would let you do stuff like dbprintf("insert into blah values %s %D %T %Y", stuff here...); But it was many years ago and I bit it and parsed the format string using strtok.
If the number of options is small and you don't otherwise want/need the extra flexibility and complexity of a parser, you could simply search for each potential replacement substring using strstr().
If you have only the two options, you could tolerably create a four-branched if/else structure (only A, only B, both with A before B, both with B before A) in which to call sprintf() with the correctly ordered arguments. Otherwise, make multiple sprintf() calls, each of which replaces only the first replacement-marker in the format string. (This implies building a list of which replacements are needed and sorting them in appearance-order...)

Resources