Getting file extension in C language - c

Say there is a file called 12345.jpg. In C, how can I get the file extension so that I can compare with some file extension? If there are any inbuilt functions, kindly please let me know.

A function to do that, along with a test harness:
#include <stdio.h>
#include <string.h>
const char *getExt (const char *fspec) {
char *e = strrchr (fspec, '.');
if (e == NULL)
e = ""; // fast method, could also use &(fspec[strlen(fspec)]).
return e;
}
int main (int argc, char *argv[]) {
int i;
for (i = 1; i < argc; i++) {
printf ("[%s] - > [%s]\n", argv[i], getExt (argv[i]));
}
return 0;
}
Running this with:
./program abc abc. abc.1 .xyz abc.def abc.def.ghi
gives you:
[abc] - > []
[abc.] - > [.]
[abc.1] - > [.1]
[.xyz] - > [.xyz]
[abc.def] - > [.def]
[abc.def.ghi] - > [.ghi]

Probably:
#include <string.h>
char *extn = strrchr(filename, '.');
That will give you a pointer to the period of the extension, or a null pointer if there is no extension. You might need to do some more due diligence to ensure that there isn't a slash after the dot, amongst other things.

There's a portable CRT solution: _splitpath.
In windows there's also an undocumented shell32 API called PathGetExtension, but that's evil in so many ways that I probably shouldn't have noted that.

Use strchr
First array member will give you filename
Second array member will give you extension

Related

Unable to read memory error comes up every time I use the function below,

I tried to write a function similar to sscanf that forwards the input string pointer, so it could be used like scanf to scan multiple strings one after the other. when I tried to use the function
there was an exception so I ran it in debug and it is unable to read the input Strings value.
int scanStrAndMove(char **readString, char* formatString, char * writeString){
int forwardBy = 0;
while(isspace(*readString[forwardBy])){
forwardBy++;
}
int retVal = sscanf(*readString,formatString,writeString);
forwardBy += strlen(writeString) + strlen(formatString) - 2;
if(retVal > 0) *readString += forwardBy;
return retVal;
}
a screenshot of the problem
the calling of the function(line is of type char*)
edit: I tried to replace readString with another char to and it didn't solve the problem, thanks for all the answers.
edit: I saw the comments and tried to make a minimal reproducible example, try to run this with the function above:
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
char inputStr[10] = " cde fg";
char outputSrt[10];
scanStrAndMove(&inputStr,"%s",outputSrt);
printf("%s",outputSrt);
scanStrAndMove(&inputStr,"%s",outputSrt);
printf("%s",outputSrt)
}

Which lines are necessary to use espeak in our C/C++ program?

I found this code on the internet:
#include <string.h>
#include <malloc.h>
#include <espeak/speak_lib.h>
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 1000, Options=0;
void* user_data;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
char Voice[] = {"English"};
char text[30] = {"this is a english test"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
int main(int argc, char* argv[] )
{
output = AUDIO_OUTPUT_PLAYBACK;
int I, Run = 1, L;
espeak_Initialize(output, Buflength, path, Options );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en"; //Default to US English
espeak_VOICE voice;
memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
espeak_SetVoiceByProperties(&voice);
Size = strlen(text)+1;
espeak_Synth( text, Size, position, position_type, end_position, flags,
unique_identifier, user_data );
espeak_Synchronize( );
return 0;
}
I only want the espeak reads my strings in my program, and the above code can do it, but I want to know, are all of this code necessary for that purpose? (I mean is it possible to simplifying it?)
***Also I like to know are there a way to using espeak as a system function? I mean system("espeak "something" "); ?
The usage of eSpeak itself seems pretty minimal - you need to read the documentation for that. There are some minor C coding simplifications possible, but perhaps hardly worth the effort:
The memset() is unnecessary. The structure can be initialised to zero thus:
espeak_VOICE voice = {0} ;
If you declare text thus:
char text[] = "this is a English test";
Then you can avoid using strlen() and replace Size with sizeof(text).
The variables I, Run and L are unused and can be removed.
To be able to pass the text as a string on the command line, and thus be able to issue system( "espeak \"Say Something\"") ; for example, you simply need to pass argv[1] to espeak_Synth() instead of text (but you will need to reinstate the strlen() call to get the size.

pattern matching / extracting in c using regex.h

I need help extracting a substring from a string using regex.h in C.
In this example, I am trying to extract all occurrences of character 'e' from a string 'telephone'. Unfortunately, I get stuck identifying the offsets of those characters. I am listing code below:
#include <stdio.h>
#include <regex.h>
int main(void) {
const int size=10;
regex_t regex;
regmatch_t matchStruct[size];
char pattern[] = "(e)";
char str[] = "telephone";
int failure = regcomp(&regex, pattern, REG_EXTENDED);
if (failure) {
printf("Cannot compile");
}
int matchFailure = regexec(&regex, pattern, size, matchStruct, 0);
if (!matchFailure) {
printf("\nMatch!!");
} else {
printf("NO Match!!");
}
return 0;
}
So per GNU's manual, I should get all of the occurrences of 'e' when a character is parenthesized. However, I always get only the first occurrence.
Essentially, I want to be able to see something like:
matchStruct[1].rm_so = 1;
matchStruct[1].rm_so = 2;
matchStruct[2].rm_so = 4;
matchStruct[2].rm_so = 5;
matchStruct[3].rm_so = 7;
matchStruct[3].rm_so = 8;
or something along these lines. Any advice?
Please note that you are in fact not comparing your compiled regex against str ("telephone") but rather to your plain-text pattern. Check your second attribute to regexec. That fixed, proceed for instance to "regex in C language using functions regcomp and regexec toggles between first and second match" where the answer to your question is already given.

How to extract filename from path

There should be something elegant in Linux API/POSIX to extract base file name from full path
See char *basename(char *path).
Or run the command "man 3 basename" on your target UNIX/POSIX system.
Use basename (which has odd corner case semantics) or do it yourself by calling strrchr(pathname, '/') and treating the whole string as a basename if it does not contain a '/' character.
Here's an example of a one-liner (given char * whoami) which illustrates the basic algorithm:
(whoami = strrchr(argv[0], '/')) ? ++whoami : (whoami = argv[0]);
an additional check is needed if NULL is a possibility. Also note that this just points into the original string -- a "strdup()" may be appropriate.
You could use strstr in case you are interested in the directory names too:
char *path ="ab/cde/fg.out";
char *ssc;
int l = 0;
ssc = strstr(path, "/");
do{
l = strlen(ssc) + 1;
path = &path[strlen(path)-l+2];
ssc = strstr(path, "/");
}while(ssc);
printf("%s\n", path);
The basename() function returns the last component of a path, which could be a folder name and not a file name. There are two versions of the basename() function: the GNU version and the POSIX version.
The GNU version can be found in string.h after you include #define _GNU_SOURCE:
#define _GNU_SOURCE
#include <string.h>
The GNU version uses const and does not modify the argument.
char * basename (const char *path)
This function is overridden by the XPG (POSIX) version if libgen.h is included.
char * basename (char *path)
This function may modify the argument by removing trailing '/' bytes. The result may be different from the GNU version in this case:
basename("foo/bar/")
will return the string "bar" if you use the XPG version and an empty string if you use the GNU version.
References:
basename (3) - Linux Man Pages
Function: char * basename (const char *filename), Finding Tokens in a String.
Of course if this is a Gnu/Linux only question then you could use the library functions.
https://linux.die.net/man/3/basename
And though some may disapprove these POSIX compliant Gnu Library functions do not use const. As library utility functions rarely do. If that is important to you I guess you will have to stick to your own functionality or maybe the following will be more to your taste?
#include <stdio.h>
#include <string.h>
int main(int argc, char *argv[])
{
char *fn;
char *input;
if (argc > 1)
input = argv[1];
else
input = argv[0];
/* handle trailing '/' e.g.
input == "/home/me/myprogram/" */
if (input[(strlen(input) - 1)] == '/')
input[(strlen(input) - 1)] = '\0';
(fn = strrchr(input, '/')) ? ++fn : (fn = input);
printf("%s\n", fn);
return 0;
}
template<typename charType>
charType* getFileNameFromPath( charType* path )
{
if( path == NULL )
return NULL;
charType * pFileName = path;
for( charType * pCur = path; *pCur != '\0'; pCur++)
{
if( *pCur == '/' || *pCur == '\\' )
pFileName = pCur+1;
}
return pFileName;
}
call:
wchar_t * fileName = getFileNameFromPath < wchar_t > ( filePath );
(this is a c++)
You can escape slashes to backslash and use this code:
#include <stdio.h>
#include <string.h>
int main(void)
{
char path[] = "C:\\etc\\passwd.c"; //string with escaped slashes
char temp[256]; //result here
char *ch; //define this
ch = strtok(path, "\\"); //first split
while (ch != NULL) {
strcpy(temp, ch);//copy result
printf("%s\n", ch);
ch = strtok(NULL, "\\");//next split
}
printf("last filename: %s", temp);//result filename
return 0;
}
I used a simpler way to get just the filename or last part in a path.
char * extract_file_name(char *path)
{
int len = strlen(path);
int flag=0;
printf("\nlength of %s : %d",path, len);
for(int i=len-1; i>0; i--)
{
if(path[i]=='\\' || path[i]=='//' || path[i]=='/' )
{
flag=1;
path = path+i+1;
break;
}
}
return path;
}
Input path = "C:/Users/me/Documents/somefile.txt"
Output = "somefile.txt"
#Nikolay Khilyuk offers the best solution except.
1) Go back to using char *, there is absolutely no good reason for using const.
2) This code is not portable and is likely to fail on none POSIX systems where the / is not the file system delimiter depending on the compiler implementation. For some windows compilers you might want to test for '\' instead of '/'. You might even test for the system and set the delimiter based on the results.
The function name is long but descriptive, no problem there. There is no way to ever be sure that a function will return a filename, you can only be sure that it can if the function is coded correctly, which you achieved. Though if someone uses it on a string that is not a path obviously it will fail. I would have probably named it basename, as it would convey to many programmers what its purpose was. That is just my preference though based on my bias your name is fine. As far as the length of the string this function will handle and why anyone thought that would be a point? You will unlikely deal with a path name longer than what this function can handle on an ANSI C compiler. As size_t is defined as a unsigned long int which has a range of 0 to 4,294,967,295.
I proofed your function with the following.
#include <stdio.h>
#include <string.h>
char* getFileNameFromPath(char* path);
int main(int argc, char *argv[])
{
char *fn;
fn = getFileNameFromPath(argv[0]);
printf("%s\n", fn);
return 0;
}
char* getFileNameFromPath(char* path)
{
for(size_t i = strlen(path) - 1; i; i--)
{
if (path[i] == '/')
{
return &path[i+1];
}
}
return path;
}
Worked great, though Daniel Kamil Kozar did find a 1 off error that I corrected above. The error would only show with a malformed absolute path but still the function should be able to handle bogus input. Do not listen to everyone that critiques you. Some people just like to have an opinion, even when it is not worth anything.
I do not like the strstr() solution as it will fail if filename is the same as a directory name in the path and yes that can and does happen especially on a POSIX system where executable files often do not have an extension, at least the first time which will mean you have to do multiple tests and searching the delimiter with strstr() is even more cumbersome as there is no way of knowing how many delimiters there might be. If you are wondering why a person would want the basename of an executable think busybox, egrep, fgrep etc...
strrchar() would be cumbersome to implement as it searches for characters not strings so I do not find it nearly as viable or succinct as this solution. I stand corrected by Rad Lexus this would not be as cumbersome as I thought as strrchar() has the side effect of returning the index of the string beyond the character found.
Take Care
My example (improved):
#include <string.h>
const char* getFileNameFromPath(const char* path, char separator = '/')
{
if(path != nullptr)
{
for(size_t i = strlen(path); i > 0; --i)
{
if (path[i-1] == separator)
{
return &path[i];
}
}
}
return path;
}

C libpcap resolve DLT entries, some nasty bug

while sort of writing my own sniffer, I found one example that only starts if it is talking ethernet. Other DLT_types have been ignored. They can be found in pcap-bpf.h I wrote some
lines, that try to implement a missing pcap_resolve_dlt(). It's really nasty code(1), seems to work, though I hit a nasty bug, where one needs to give a space to the corresponding number like:
user#debian:~/tmp$ ./resolve_dlt 114
DLT_LTALK 114
user#debian:~/tmp$ ./resolve_dlt 14
DLT_ATM_RFC1483 11
user#debian:~/tmp$ ./resolve_dlt " 14"
DLT_RAW 14
Maybe the approach itself is totally wrong and one should grep the pcap-bpf.h directly.
1) http://nopaste.info/4a2470cc83.html, uses strstr()
Kind Regards,
Charles
Tags: C libpcap DLT_
You are doing strstr(dlt[i],argv[1]) so the first "14" matches the text in "DLT_ATM_RFC1483", however the text " 14" matches the text in "DLT_RAW 14".
You could use the token-pasting operator to make this work a little better:
#include <stdlib.h>
#include <stdio.h>
#include <pcap-bpf.h>
#define TAB_ENTRY(x) { x, #x }
struct {
long dlt_code;
const char *dlt_name;
} dlt_tab[] = {
TAB_ENTRY(DLT_NULL),
TAB_ENTRY(DLT_EN10MB),
TAB_ENTRY(DLT_EN3MB),
TAB_ENTRY(DLT_AX25),
TAB_ENTRY(DLT_PRONET),
TAB_ENTRY(DLT_CHAOS),
TAB_ENTRY(DLT_IEEE802),
TAB_ENTRY(DLT_ARCNET),
TAB_ENTRY(DLT_SLIP),
TAB_ENTRY(DLT_PPP),
TAB_ENTRY(DLT_FDDI),
TAB_ENTRY(DLT_ATM_RFC1483),
TAB_ENTRY(DLT_RAW),
TAB_ENTRY(DLT_RAW),
TAB_ENTRY(DLT_SLIP_BSDOS),
TAB_ENTRY(DLT_PPP_BSDOS),
TAB_ENTRY(DLT_SLIP_BSDOS),
TAB_ENTRY(DLT_PPP_BSDOS),
TAB_ENTRY(DLT_ATM_CLIP),
TAB_ENTRY(DLT_REDBACK_SMARTEDGE),
TAB_ENTRY(DLT_PPP_SERIAL),
TAB_ENTRY(DLT_PPP_ETHER),
TAB_ENTRY(DLT_SYMANTEC_FIREWALL),
TAB_ENTRY(DLT_C_HDLC),
TAB_ENTRY(DLT_C_HDLC),
TAB_ENTRY(DLT_IEEE802_11),
TAB_ENTRY(DLT_FRELAY),
TAB_ENTRY(DLT_LOOP),
TAB_ENTRY(DLT_LOOP),
TAB_ENTRY(DLT_ENC),
TAB_ENTRY(DLT_ENC),
TAB_ENTRY(DLT_LINUX_SLL),
TAB_ENTRY(DLT_LTALK),
TAB_ENTRY(DLT_ECONET),
TAB_ENTRY(DLT_IPFILTER),
TAB_ENTRY(DLT_PFLOG),
TAB_ENTRY(DLT_CISCO_IOS),
TAB_ENTRY(DLT_PRISM_HEADER),
TAB_ENTRY(DLT_AIRONET_HEADER),
TAB_ENTRY(DLT_HHDLC),
TAB_ENTRY(DLT_IP_OVER_FC),
TAB_ENTRY(DLT_SUNATM),
TAB_ENTRY(DLT_RIO),
TAB_ENTRY(DLT_PCI_EXP),
TAB_ENTRY(DLT_AURORA),
TAB_ENTRY(DLT_IEEE802_11_RADIO),
TAB_ENTRY(DLT_TZSP),
TAB_ENTRY(DLT_ARCNET_LINUX),
TAB_ENTRY(DLT_JUNIPER_MLPPP),
TAB_ENTRY(DLT_JUNIPER_MLFR),
TAB_ENTRY(DLT_JUNIPER_ES),
TAB_ENTRY(DLT_JUNIPER_GGSN),
TAB_ENTRY(DLT_JUNIPER_MFR),
TAB_ENTRY(DLT_JUNIPER_ATM2),
TAB_ENTRY(DLT_JUNIPER_SERVICES),
TAB_ENTRY(DLT_JUNIPER_ATM1),
TAB_ENTRY(DLT_APPLE_IP_OVER_IEEE1394),
TAB_ENTRY(DLT_MTP2_WITH_PHDR),
TAB_ENTRY(DLT_MTP2),
TAB_ENTRY(DLT_MTP3),
TAB_ENTRY(DLT_SCCP),
TAB_ENTRY(DLT_DOCSIS),
TAB_ENTRY(DLT_LINUX_IRDA),
TAB_ENTRY(DLT_IBM_SP),
TAB_ENTRY(DLT_IBM_SN),
TAB_ENTRY(DLT_USER0),
TAB_ENTRY(DLT_USER1),
TAB_ENTRY(DLT_USER2),
TAB_ENTRY(DLT_USER3),
TAB_ENTRY(DLT_USER4),
TAB_ENTRY(DLT_USER5),
TAB_ENTRY(DLT_USER6),
TAB_ENTRY(DLT_USER7),
TAB_ENTRY(DLT_USER8),
TAB_ENTRY(DLT_USER9),
TAB_ENTRY(DLT_USER10),
TAB_ENTRY(DLT_USER11),
TAB_ENTRY(DLT_USER12),
TAB_ENTRY(DLT_USER13),
TAB_ENTRY(DLT_USER14),
TAB_ENTRY(DLT_USER15),
TAB_ENTRY(DLT_IEEE802_11_RADIO_AVS),
TAB_ENTRY(DLT_JUNIPER_MONITOR),
TAB_ENTRY(DLT_BACNET_MS_TP),
TAB_ENTRY(DLT_PPP_PPPD),
TAB_ENTRY(DLT_PPP_PPPD),
TAB_ENTRY(DLT_PPP_PPPD),
TAB_ENTRY(DLT_JUNIPER_PPPOE),
TAB_ENTRY(DLT_JUNIPER_PPPOE_ATM),
TAB_ENTRY(DLT_GPRS_LLC),
TAB_ENTRY(DLT_GPF_T),
TAB_ENTRY(DLT_GPF_F),
TAB_ENTRY(DLT_GCOM_T1E1),
TAB_ENTRY(DLT_GCOM_SERIAL),
TAB_ENTRY(DLT_JUNIPER_PIC_PEER),
TAB_ENTRY(DLT_ERF_ETH),
TAB_ENTRY(DLT_ERF_POS),
TAB_ENTRY(DLT_LINUX_LAPD),
TAB_ENTRY(DLT_JUNIPER_ETHER),
TAB_ENTRY(DLT_JUNIPER_PPP),
TAB_ENTRY(DLT_JUNIPER_FRELAY),
TAB_ENTRY(DLT_JUNIPER_CHDLC),
TAB_ENTRY(DLT_MFR),
TAB_ENTRY(DLT_JUNIPER_VP),
TAB_ENTRY(DLT_A429),
TAB_ENTRY(DLT_A653_ICM),
TAB_ENTRY(DLT_USB),
TAB_ENTRY(DLT_BLUETOOTH_HCI_H4),
TAB_ENTRY(DLT_IEEE802_16_MAC_CPS),
TAB_ENTRY(DLT_USB_LINUX),
TAB_ENTRY(DLT_CAN20B),
TAB_ENTRY(DLT_IEEE802_15_4_LINUX),
TAB_ENTRY(DLT_PPI),
TAB_ENTRY(DLT_IEEE802_16_MAC_CPS_RADIO),
TAB_ENTRY(DLT_JUNIPER_ISM),
TAB_ENTRY(DLT_IEEE802_15_4),
TAB_ENTRY(DLT_SITA),
TAB_ENTRY(DLT_ERF),
TAB_ENTRY(DLT_RAIF1),
TAB_ENTRY(DLT_IPMB),
TAB_ENTRY(DLT_JUNIPER_ST),
TAB_ENTRY(DLT_BLUETOOTH_HCI_H4_WITH_PHDR)
};
int main(int argc, char *argv[])
{
char *endptr = NULL;
long code;
int i, found;
if (argc > 1)
code = strtol(argv[1], &endptr, 0);
if (!endptr || endptr == argv[1]) {
fprintf(stderr, "Usage: %s <dlt_code>\n", argv[0]);
exit(2);
}
found = 0;
for (i = 0; i < (sizeof dlt_tab / sizeof dlt_tab[0]); i++) {
if (dlt_tab[i].dlt_code == code) {
found = 1;
break;
}
}
if (!found) {
printf("%ld not found\n", code);
exit(1);
}
printf("%ld is %s\n", code, dlt_tab[i].dlt_name);
return 0;
}
Example:
$ ./bpf 12
12 is DLT_RAW
$ ./bpf 120
120 is DLT_AIRONET_HEADER
(Note that 12 is DLT_RAW on Linux systems, not 14).
At least with newer versions of libpcap/WinPcap, you can use pcap_datalink_val_to_name to map a DLT_ value to the DLT_ name. resolve_dlt could just use strtol() on its first argument and pass the result to pcap_datalink_val_to_name (after, of course, checking for errors, and for values that don't fit in an int).

Resources