Parse out the file extension from a file-path in C - c

I was previously using the following code to determine if a file was an .exe or .o file and thus set binFile to 1:
if(strstr(fpath,".exe") != NULL || strstr(fpath,".o") != NULL)
binFile = 1;
Through debugging, I noticed that this method will also set binFile to 1 with files like foo.out or foo.execute. What I really want is to match '.exe\0' and '.o\0' but strstr() says it ignores the terminating NUL bytes. How should I go about this?
Thanks

#include <stdio.h>
#include <string.h>
int endswith(const char* haystack, const char* needle)
{
size_t hlen;
size_t nlen;
/* find the length of both arguments -
if needle is longer than haystack, haystack can't end with needle */
hlen = strlen(haystack);
nlen = strlen(needle);
if(nlen > hlen) return 0;
/* see if the end of haystack equals needle */
return (strcmp(&haystack[hlen-nlen], needle)) == 0;
}
int main(int argc, char** argv) {
if(argc != 3) {
printf("Usage: %s <string> <test-ending>\n", argv[0]);
return 1;
}
printf("Does \"%s\" end with \"%s\"? ", argv[1], argv[2]);
if(endswith(argv[1], argv[2])) {
printf("Yes!\n");
} else {
printf("No!\n");
}
return 0;
}

char *ext = strrchr(fpath, '.');
if (ext && (!strcmp(ext, ".exe") || !strcmp(ext, ".o")))
binfile = 1;
If your system has the BSD/POSIX strcasecmp, you should probably use that instead of strcmp.

int iLen = strlen(fpath);
if ((iLen >= 4 && strcmp(&fpath[iLen - 4], ".exe") == 0)
|| (iLen >= 2 && strcmp(&fpath[iLen - 2], ".o") == 0))
binfile = 1;
Edit added test on length, to handle very short file names.

You could check one past the result of strstr (taking into account the length of the search string) to see if it is NULL. Example:
const char* p = strstr(fpath,".exe");
if (p != NULL && *(p + 4 + 1) == 0) // 4 is the length of ".exe"; +1 should get you to \0
binFile = 1;

I like to get the extension and then check it.
char *suffix = strrchr(fpath,'.');
if (suffix)
{
suffix++;
if (!strcasecmp(suffix,"exe"))
{
// you got it
}
}
Incrementing suffix is okay since you know it points at a found a period at that point. Incrementing it will at worst make it point at the null termination character, which will not bother strcasecmp at all.
You can easily check against a list of extensions this way too.

one way of doing it:
char * suffix = fpath;
char * i = fpath;
while (*i++) {
if (*i == '.') {
suffix = i;
}
}
if (!strcmp(suffix, ".o") || !strcmp(suffix, ".exe")) {
/* do stuff */
}

You can also use _splitpath/_wsplitpath which is part of the CRT.
http://msdn.microsoft.com/en-us/library/e737s6tf(v=vs.80).aspx

Related

C code to read config file and parse directives

I'm trying to read a config file and parse the config directives. So far I have the following code, I need advice on how to improve this or change it. Is this efficient? Thanks!
struct config
{
char host;
char port;
}
void parse_line(char *buf) {
char *line;
if(strstr(buf, "host=") || strstr(buf, "host = ") || strstr(buf, "host= ") || strstr(buf, "host =")) {
line = strstr(buf, "=");
printf("Host: %s", &line[2]);
} else if(strstr(buf, "port=") || strstr(buf, "port = ") || strstr(buf, "port= ") || strstr(buf, "port =")) {
line = strstr(buf, "=");
printf("Port: %s", &line[2]);
}
}
int main(int argc, char *argv[])
{
char *file_name;
FILE *file;
file_name = argv[1];
file = fopen(file_name, "r");
// check if file is NULL, etc..
char buffer[BUFSIZ];
char *line;
int i;
while(fgets(buffer, sizeof(buffer), file) != NULL) {
for(i = 0; i < strlen(buffer); i++) { // iterate through the chars in a line
if(buffer[i] == '#') { // if char is a #, stop processing chars on this line
break;
} else if(buffer[i] == ' ') { // if char is whitespace, continue until something is found
continue;
} else {
parse_line(buffer); // if char is not a # and not whitespace, it is a config directive, parse it
break;
}
}
}
fclose(file);
return 0;
}
I am looking for a way to ignore # if it is a first character on a line, and also lines that are white spaces. I think my code does that, but is that efficient?
EDIT:
Thanks everyone for all the suggestions, I have managed to do this simple code to trim the white spaces, so that I wouldn't need all the strstr() calls.
void trim(char *src)
{
int i, len;
len = strlen(src);
for(i = 0; i < len; i++) {
if(src[i] == ' ') {
continue;
}
if(src[i] == '\n' || src[i] == '#') {
break;
}
printf("%c", src[i]); // prints: host=1.2.3.4
}
}
int main(void)
{
char *str = "host = 1.2.3.4 # this is a comment\n";
trim(str);
return EXIT_SUCCESS;
}
It prints correctly: host=1.2.3.4 but now I need this in a variable to be further parsed. I think I will try to use strcpy.
EDIT 2:
I do not think that strcpy is the right choice. Those chars are printed out in a loop, so every time I use strcpy, the previous char is overwritten. I have tried this, but it does not work because only the host= part is placed into arr. The IP part is not placed into arr.. how can this be fixed..
char arr[sizeof(src)];
for(i = 0; i < len; i++) {
if(src[i] == ' ') {
continue;
}
if(src[i] == '\n' || src[i] == '#') {
break;
}
printf("%c", src[i]); // prints: host=1.2.3.4
arr[i] = src[i];
}
int j;
for(j = 0; j < sizeof(arr); j++) {
printf("%c", arr[j]); //prints: host=
}
EDIT 3:
I found the correct way of placing chars into arr:
int i, count = 0;
for(i = 0; i < len; i++) {
if(src[i] == ' ') {
continue;
}
if(src[i] == '\n' || src[i] == '#') {
break;
}
arr[count] = src[i];
count++;
}
Your implementation is pretty fragile. Parsers really ought to verify syntax and return errors when they see something unexpected. For example, yours should detect missing fields and multiply defined ones.
Fortunately this parsing problem is simple enough for sscanf to handle everything:
skip blank lines,
skip comments
ignore any amount of whitespace
extract the key/value pairs
Here's code:
#include <stdio.h>
#define CONFIG_SIZE (256)
#define HOST_SET (1)
#define PORT_SET (2)
typedef struct config {
unsigned set;
char host[CONFIG_SIZE];
unsigned long port;
} CONFIG;
// Parse the buffer for config info. Return an error code or 0 for no error.
int parse_config(char *buf, CONFIG *config) {
char dummy[CONFIG_SIZE];
if (sscanf(buf, " %s", dummy) == EOF) return 0; // blank line
if (sscanf(buf, " %[#]", dummy) == 1) return 0; // comment
if (sscanf(buf, " host = %s", config->host) == 1) {
if (config->set & HOST_SET) return HOST_SET; // error; host already set
config->set |= HOST_SET;
return 0;
}
if (sscanf(buf, " port = %lu", &config->port) == 1) {
if (config->set & PORT_SET) return PORT_SET; // error; port already set
config->set |= PORT_SET;
return 0;
}
return 3; // syntax error
}
void init_config(CONFIG *config) {
config->set = 0u;
}
void print_config(CONFIG *config) {
printf("[host=%s,port=", config->set & HOST_SET ? config->host : "<unset>");
if (config->set & PORT_SET) printf("%lu]", config->port); else printf("<unset>]");
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s CONFIG_FILE\n", argv[0]);
return 1;
}
FILE *f = fopen(argv[1], "r");
char buf[CONFIG_SIZE];
CONFIG config[1];
init_config(config);
int line_number = 0;
while (fgets(buf, sizeof buf, f)) {
++line_number;
int err = parse_config(buf, config);
if (err) fprintf(stderr, "error line %d: %d\n", line_number, err);
}
print_config(config);
return 0;
}
With this input:
# This is a comment
This isn't
# Non-leading comment
host = 123.456.789.10
###
port =42
port= 1
host=fruit.foo.bar
the output is
error line 3: 3
error line 10: 2
error line 11: 1
[host=fruit.foo.bar,port=1]
Note that when the parser discovers a field has already been set, it still uses the latest value in the config. It's easy enough to keep the original instead. I'll let you have that fun.
I think parse_line is a little bit rigid for my taste, I would use strtok
instead. Then you don't have to worry too much about spaces, like you do if you
have a space before the = sign.
Your struct is also wrong, host and port would only hold a character.
Besides port should be an integer. And you need a semicolon ; after the
struct definition.
struct config
{
char host[100];
int port;
};
int parse_line(struct config *config, char *buf)
{
if(config == NULL || buf == NULL)
return 0;
char varname[100];
char value[100];
const char* sep = "=\n"; // get also rid of newlines
char *token;
token = strtok(buf, sep);
strncpy(varname, token, sizeof varname);
varname[sizeof(varname) - 1] = 0; // making sure that varname is C-String
trim(varname);
token = strtok(NULL, sep);
if(token == NULL)
{
// line not in format var=val
return 0;
}
strncpy(value, token, sizeof value);
value[sizeof(varname) - 1] = 0
trim(value);
if(strcmp(varname, "port") == 0)
{
config->port = atoi(value);
return 1;
}
if(strcmp(varname, "host") == 0)
{
strncpy(config->host, value, siezof config->host);
config->host[(sizeof config->host) - 1] = 0;
return 1;
}
// var=val not recognized
return 0;
}
Note that I used a function called trim. This function is not part of the
standard library. Below I posted a possible implementation of such a function.
I like using trim because it gets rid of white spaces. Now you can do this in
main:
struct config config;
// initializing
config.port = 0;
config.host[0] = 0;
int linecnt = 0;
while(fgets(buffer, sizeof(buffer), file) != NULL) {
linecnt++;
trim(buffer);
if(buffer[0] == '#')
continue;
if(!parse_line(&config, buffer))
{
fprintf(stderr, "Error on line %d, ignoring.\n", linecnt);
continue;
}
}
A possible implementation of trim
void rtrim(char *src)
{
size_t i, len;
volatile int isblank = 1;
if(src == NULL) return;
len = strlen(src);
if(len == 0) return;
for(i = len - 1; i > 0; i--)
{
isblank = isspace(src[i]);
if(isblank)
src[i] = 0;
else
break;
}
if(isspace(src[i]))
src[i] = 0;
}
void ltrim(char *src)
{
size_t i, len;
if(src == NULL) return;
i = 0;
len = strlen(src);
if(len == 0) return;
while(src[i] && isspace(src[i]))
i++;
memmove(src, src + i, len - i + 1);
return;
}
void trim(char *src)
{
rtrim(src);
ltrim(src);
}
There are a few ways that you can improve performance:
Calling strstr() in this scenario is inefficient, because the presence of the "host" part of buf can be checked once instead of multiple times every time strstr() is called. Instead, make an if statement that checks if buf begins with "host", then check if buf contains the other elements. The same thing applies to the portion of code checking for the presence of "port".
In the loop in main, instead of doing this:
for(i = 0; i < strlen(buffer); i++) { // iterate through the chars in a line
if(buffer[i] == '#') { // if char is a #, stop processing chars on this line
break;
} else if(buffer[i] == ' ') { // if char is whitespace, continue until something is found
continue;
} else {
parse_line(buffer); // if char is not a # and not whitespace, it is a config directive, parse it
break;
}
do this:
for(i = 0; i < strlen(buffer); i++) { // iterate through the chars in a line
char temp = buffer[i];
if(temp == '#') { // if char is a #, stop processing chars on this line
break;
} else if (temp != ' ') {
parse_line(buffer); // if char is not a # and not whitespace, it is a config directive, parse it
break;
}
Checking to see if something is not equal to another is likely to be just as fast as checking if they are equal (at least on Intel, the je (jump equal) and jne (jump not equal) instructions exhibit the same latency of 1 cycle each), so the statement with the continue in it is not necessary. The temp variable is so that buffer[i] does not need to be calculated in the second if again in case the first if is false. Also, do what user3121023 stated below (same reason for performance as creating the temp variable).
You can use operating-system-specific functions (such as thos from the library WINAPI/WIN32/WIN64 (synonyms) on windows) instead of C standard library functions. Microsoft has very good documentation about their functions in the MSDN (Microsoft Developer Network) web site.
Use uint_fast8_t (defined in stdint.h, this typedef is set to the fastest integer type greater than or equal to the size in bits specified in the typedef) when performing operations on the host and port (but use chars when storing the variables on the disk, in order to make read i/o operations faster).
This isn't related to performance , but use return EXIT_SUCCESS; in main instead of return 0;, since using EXIT_SUCCESS is more readable and exhibits the same performance.
Honestly, I can't help but wonder if rolling your own parser is so great.
Why not use an existing JSON or YAML parser and test for keys in the parsed data?
This will be easily extendible by allowing for new keys to be added with very little effort and the common format of the configuration file makes it very easy for developers to edit.
If you are going to roll out your own parser, than some of the previously mentioned advice makes a lot of sense.
The biggest ones are: don't seek the whole buffer, read the single line that's in front of you and report any errors. Also, advance as you go.
Your parser should work correctly if someone would dump a GigaByte of garbage into the configuration file, so make no assumptions about the data.

Is there a fast way to interpose a character between strings?

I wrote this function that will generate a single string out of a file list.
(e.g. if I have a folder with FileA.txt, FileB.png and FileC I'll get as output this string: FileA.txtFileB.pngFileC). Now I want to add a / character between each filename. (e.g. FileA.txt/FileB.png/FileC/) Is there a way to do it in "one blow" without having to repeat the same operation twice?
In other words, is there a way to do something like:
original_string = append2(original_string, new_string, '/');
instead of having to do
append(original_string, new_string);
append(original_string, "/");
?
Here's the function I wrote as reference:
/**
* #brief Concatenate all file names in a file list (putting a '/' between each of them)
* #param file_list The file list to serialize.
* #return A string containing all files in the file list.
*/
char *file_list_tostring(struct file_list *file_list) {
char *final_string = NULL;
size_t final_len = 0;
struct file_node *list_iter = file_list->first;
while (list_iter != NULL) {
char *tmp = list_iter->filename;
size_t tmp_len = strlen(tmp);
char *s = realloc(final_string, final_len + tmp_len + 1); // +1 for '\0'
if (s == NULL) {
perror("realloc");
exit(EXIT_FAILURE);
}
final_string = s;
memcpy(final_string + final_len, tmp, tmp_len + 1);
final_len += tmp_len;
list_iter = list_iter->next;
}
return final_string;
}
Maybe there is a simple way to interpose a single character between two strings?
Note: I know there's nothing wrong in repeating the same operation twice, I'm asking this question to know if there is a better way of doing so!
Yes, you can do sprintf:
#include <stdio.h>
int main()
{
char var1[] = "FileA.txt";
char var2[] = "FileB.png";
char var3[] = "FileC";
char result[30];
sprintf(result, "%s/%s/%s", var1, var2,var3);
printf("result: %s\n", result);
return 0;
}
And the result is like this:
result: FileA.txt/FileB.png/FileC
If you need, the variable result can be a pointer and allocate space based on your needs.
As Michael Burr mentioned in a comment to the question, it is best to walk the list/array twice. On the first pass, calculate the total length of the string needed. Next, allocate the memory needed for the entire string. On the second pass, copy the contents. Do not forget to account for, and append, the string-terminating nul byte (\0).
Consider the following example functions dupcat() and dupcats():
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
char *dupcat(const size_t count, const char *parts[])
{
size_t i, len = 0;
char *dst, *end;
/* Calculate total length of parts. Skip NULL parts. */
for (i = 0; i < count; i++)
len += (parts[i]) ? strlen(parts[i]) : 0;
/* Add room for '\0'.
We add an extra 8 to 15 '\0's, just because
it is sometimes useful, and we do a dynamic
allocation anyway. */
len = (len | 7) + 9;
/* Allocate memory. */
dst = malloc(len);
if (!dst) {
fprintf(stderr, "dupcat(): Out of memory; tried to allocate %zu bytes.\n", len);
exit(EXIT_FAILURE);
}
/* Copy parts. */
end = dst;
for (i = 0; i < count; i++) {
const char *src = parts[i];
/* We could use strlen() and memcpy(),
but a loop like this will work just as well. */
if (src)
while (*src)
*(end++) = *(src++);
}
/* Sanity check time! */
if (end >= dst + len) {
fprintf(stderr, "dupcat(): Arguments were modified during duplication; buffer overrun!\n");
free(dst); /* We can omit this free(), but only in case of exit(). */
exit(EXIT_FAILURE);
}
/* Terminate string (and clear padding). */
memset(end, '\0', (size_t)(dst + len - end));
/* Done! */
return dst;
}
char *dupcats(const size_t count, ...)
{
size_t i, len = 0;
char *dst, *end;
va_list args;
/* Calculate total length of 'count' source strings. */
va_start(args, count);
for (i = 0; i < count; i++) {
const char *src = va_arg(args, const char *);
if (src)
len += strlen(src);
}
va_end(args);
/* Add room for end-of-string '\0'.
Because it is often useful to know you have
at least one extra '\0' at the end of the string,
and we do a dynamic allocation anyway,
we pad the string with 9 to 16 '\0',
aligning 'len' to a multiple of 8. */
len = (len | 7) + 9;
/* Allocate memory for the string. */
dst = malloc(len);
if (!dst) {
fprintf(stderr, "dupcats(): Out of memory; tried to allocate %zu bytes.\n", len);
exit(EXIT_FAILURE);
}
/* Copy the source strings. */
end = dst;
va_start(args, count);
for (i = 0; i < count; i++) {
const char *src = va_arg(args, const char *);
/* We could use strlen() and memcpy() here;
however, this loop is easier to follow. */
if (src)
while (*src)
*(end++) = *(src++);
}
va_end(args);
/* Sanity check. */
if (end >= dst + len) {
fprintf(stderr, "dupcats(): Arguments were modified during duplication; buffer overrun!\n");
free(dst); /* We can omit this free(), but only in case of exit(). */
exit(EXIT_FAILURE);
}
/* Add end-of-string '\0' (filling the padding). */
memset(end, '\0', dst + len - end);
/* Done. */
return dst;
}
int main(int argc, char *argv[])
{
char *result;
result = dupcat(argc - 1, (const char **)(argv + 1));
printf("Arguments concatenated: '%s'.\n", result);
free(result);
result = dupcats(5, "foo", "/", "bar", "/", "baz");
printf("Concatenating 'foo', '/', 'bar', '/', and 'baz': '%s'.\n", result);
free(result);
return EXIT_SUCCESS;
}
Neither dupcat() nor dupcats() will ever return NULL: they will print an error message to standard error and exit, if an error occurs.
dupcat() takes an array of strings, and returns a dynamically allocated concatenated copy with at least eight bytes of nul padding.
dupcats() takes a variable number of pointers, and returns a dynamically allocated concatenated copy with at least eight bytes of nul padding.
Both functions treat NULL pointers as if they were empty strings. For both functions, the first parameter is the number of strings to concatenate.
(Since OP did not show the definitions of struct file_list or struct file_node, I did not bother to write a list-based version. However, it should be trivial to adapt from one of the two versions shown.)
In some cases, a variant that constructs a valid path from a fixed base part, with one or more relative file or directory names concatenated, and POSIXy ./ removed and ../ backtracked (but not out of base subtree), is very useful.
If carefully written, it allows the program to accept untrusted paths, relative to a specific subtree. (The combined paths are confined to that subtree, but symlinks and hardlinks can still be used to escape the subtree.)
One possible implementation is as follows:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
char *dynamic_path(const char *const subtree,
const size_t parts,
const char *part[])
{
const size_t subtree_len = (subtree) ? strlen(subtree) : 0;
size_t parts_len = 0;
size_t total_len, i;
char *path, *mark, *curr;
/* Calculate the length of each individual part.
Include room for a leading slash.
*/
for (i = 0; i < parts; i++)
parts_len += (part[i]) ? 1 + strlen(part[i]) : 0;
/* Add room for the string-terminating '\0'.
We're paranoid, and add a bit more padding. */
total_len = ((subtree_len + parts_len) | 7) + 9;
/* Allocate memory for the combined path. */
path = malloc(total_len);
if (!path) {
errno = ENOMEM;
return NULL;
}
/* If the user specified a subtree, we use it as the fixed prefix. */
if (subtree_len > 0) {
memcpy(path, subtree, subtree_len);
mark = path + subtree_len;
/* Omit a trailing /. We enforce it below anyway. */
if (parts > 0 && subtree_len > 1 && mark[-1] == '/')
--mark;
} else
mark = path;
/* Append the additional path parts. */
curr = mark;
for (i = 0; i < parts; i++) {
const size_t len = (part[i]) ? strlen(part[i]) : 0;
if (len > 0) {
/* Each path part is a separate file/directory name,
so there is an (implicit) slash before each one. */
if (part[i][0] != '/')
*(curr++) = '/';
memcpy(curr, part[i], len);
curr += len;
}
}
/* Sanity check. */
if (curr >= path + total_len) {
/* Buffer overrun occurred. */
fprintf(stderr, "Buffer overrun in dynamic_path()!\n");
free(path); /* Can be omitted if we exit(). */
exit(EXIT_FAILURE);
}
/* Terminate string (and clear padding). */
memset(curr, '\0', (size_t)(path + total_len - curr));
/* Cleanup pass.
Convert "/foo/../" to "/", but do not backtrack over mark.
Combine consecutive slashes and /./ to a single slash.
*/
{
char *src = mark;
char *dst = mark;
while (*src)
if (src[0] == '/' && src[1] == '.' && src[2] == '.' && (!src[3] || src[3] == '/')) {
src += 3; /* Skip over /.. */
/* Backtrack, but do not underrun mark. */
if (dst > mark) {
dst--;
while (dst > mark && *dst != '/')
dst--;
}
/* Never consume the mark slash. */
if (dst == mark)
dst++;
} else
if (src[0] == '/' && src[1] == '.' && (!src[2] || src[2] == '/')) {
src += 2; /* Skip over /. */
if (dst == mark || dst[-1] != '/')
*(dst++) = '/';
} else
if (src[0] == '/') {
src++;
if (dst == mark || dst[-1] != '/')
*(dst++) = '/';
} else
*(dst++) = *(src++);
/* Clear removed part. */
if (dst < src)
memset(dst, '\0', (size_t)(src - dst));
}
return path;
}
int main(int argc, char *argv[])
{
char *path;
if (argc < 2) {
fprintf(stderr, "\nUsage: %s PREFIX [ PATHNAME ... ]\n\n", argv[0]);
return EXIT_FAILURE;
}
path = dynamic_path(argv[1], argc - 2, (const char **)(argv + 2));
if (!path) {
fprintf(stderr, "dynamic_path(): %s.\n", strerror(errno));
return EXIT_FAILURE;
}
printf("%s\n", path);
free(path);
return EXIT_SUCCESS;
}
Note that I wrote the above version from scratch (and dedicate it to public domain (CC0)), so you should thoroughly test it before relying it on production use. (My intent is for it to be an useful example or basis, that will help you write your own implementation tailored to your needs.)
If you do find any bugs or issues in it, let me know in a comment, so I can verify and fix.

conflict of using strcpy , strcat in C?

In the following code I'm trying to load a text file of words character by character
then I'm trying to save each whole word in hash table (array of strings)
but it seems that strcpy saves a whole word not a single char and I don't know why. Am I misusing strcpy and strcat?
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
# include <stdbool.h>
bool load(const char* dictionary);
#define LENGTH 45
int main (int argc, char* argv[])
{
char* dictionary = argv[1];
load(dictionary);
return 0;
}
bool load(const char* dictionary)
{
int index = 0, words = 0, kk = 0;
int lastl = 0, midl = 0;
char word[LENGTH + 1];
char *wholeword[1001];
FILE* dic = fopen(dictionary, "r");
if (dic == NULL)
{
printf("Could not open %s.\n", dictionary);
return false;
}
for (int c = fgetc(dic); c != EOF; c = fgetc(dic))
{
// allow only alphabetical characters and apostrophes
if (isalpha(c) || (c == '\'' && index > 0))
{
// append character to word
word[index] = c;
index++;
// ignore alphabetical strings too long to be words
if (index > LENGTH)
{
// consume remainder of alphabetical string
while ((c = fgetc(dic)) != EOF && isalpha(c));
// prepare for new word
index = 0;
}
}
// ignore words with numbers (like MS Word can)
else if (isdigit(c))
{
// consume remainder of alphanumeric string
while ((c = fgetc(dic)) != EOF && isalnum(c));
// prepare for new word
index = 0;
}
// we must have found a whole word
else if (index > 0)
{
// terminate current word
word[index] = '\0';
lastl = index - 1;
midl = (index - 1) % 3;
words++;
index = 0;
int hashi = (word[0] + word[lastl]) * (word[midl] + 17) % 1000;
wholeword[hashi] = (char*) malloc(sizeof(char) * (lastl + 2));
strcpy(wholeword[hashi], &word[0]); // ***
for (kk = 1; kk <= lastl + 1; kk++)
{
strcat(wholeword[words], &word[kk]);
}
}
}
fclose(dic);
return true;
}
Strcpy doesn't copy a single char, it copies all chars until the next null ('\0') byte. To copy a single char in your code try:
wholeword[hashi] = &word[0];
instead of:
strcpy(wholeword[hashi], &word[0]);
Yes you are misusing strcpy and strcat: these functions copy a whole source string to the destination array (at the end of an existing string there for strcat).
The following lines:
wholeword[hashi] = (char*) malloc(sizeof(char) * (lastl + 2));
strcpy(wholeword[hashi], &word[0]); // ***
for (kk = 1; kk <= lastl + 1; kk++)
{
strcat(wholeword[words], &word[kk]);
}
}
Can be replaced with a single call to
wholeword[hashi] = strdup(word);
strdup() allocates the memory, copies the argument string to it and returns the pointer. It is available on all Posix systems, if you do not have it, use these 2 lines:
wholeword[hashi] = malloc(lastl + 2);
strcpy(wholeword[hashi], word);
Notes:
you assume your hash to be perfect, without collisions. As currently coded, a collision causes the previous word to be removed from the dictionary and its corresponding memory to be lost.
the dictionary char *wholeword[1001]; is a local variable in the load function. It is uninitialized, so there is no way to know if an entry is a valid pointer to a word. It should be allocated, initialized to NULL and returned to the caller.

Extract the file name and its extension in C

So we have a path string /home/user/music/thomas.mp3.
Where is the easy way to extract file name(without extension, "thomas") and it's extension ("mp3") from this string? A function for filename, and for extension. And only GNU libc in our hands.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_FILENAME_SIZE 256
char *filename(char *str) {
char *result;
char *last;
if ((last = strrchr(str, '.')) != NULL ) {
if ((*last == '.') && (last == str))
return str;
else {
result = (char*) malloc(MAX_FILENAME_SIZE);
snprintf(result, sizeof result, "%.*s", (int)(last - str), str);
return result;
}
} else {
return str;
}
}
char *extname(char *str) {
char *result;
char *last;
if ((last = strrchr(str, '.')) != NULL) {
if ((*last == '.') && (last == str))
return "";
else {
result = (char*) malloc(MAX_FILENAME_SIZE);
snprintf(result, sizeof result, "%s", last + 1);
return result;
}
} else {
return ""; // Empty/NULL string
}
}
Use basename to get the filename and then you can use something like this to get the extension.
char *get_filename_ext(const char *filename) {
const char *dot = strrchr(filename, '.');
if(!dot || dot == filename) return "";
return dot + 1;
}
Edit:
Try something like.
#include <string.h>
#include <libgen.h>
static void printFileInfo(char *path) {
char *bname;
char *path2 = strdup(path);
bname = basename(path2);
printf("%s.%s\n",bname, get_filename_ext(bname));
free(path2);
}
Regarding your actual code (all the other answers so far say to scrap that and do something else, which is good advice, however I am addressing your code as it contains blunders that it'd be good to learn about in advance of next time you try to write something).
Firstly:
strncpy(str, result, (size_t) (last-str) + 1);
is not good. You have dest and src around the wrong way; and further this function does not null-terminate the output (unless the input is short enough, which it isn't). Generally speaking strncpy is almost never a good solution to a problem; either strcpy if you know the length, or snprintf.
Simpler and less error-prone would be:
snprintf(result, sizeof result, "%.*s", (int)(last - str), str);
Similary in the other function,
snprintf(result, sizeof result, "%s", last + 1);
The snprintf function never overflows buffer and always produces a null-terminated string, so long as you get the buffer length right!
Now, even if you fixed those then you have another fundamental problem in that you are returning a pointer to a buffer that is destroyed when the function returns. You could fix ext by just returning last + 1, since that is null-terminated anyway. But for filename you have the usual set of options:
return a pointer and a length, and treat it as a length-counted string, not a null-terminated one
return pointer to mallocated memory
return pointer to static buffer
expect the caller to pass in a buffer and a buffer length, which you just write into
Finally, returning NULL on failure is probably a bad idea; if there is no . then return the whole string for filename, and an empty string for ext. Then the calling code does not have to contort itself with checks for NULL.
Here is a routine I use for that problem:
Separates original string into separate strings of path, file_name and extension.
Will work for Windows and Linux, relative or absolute style paths. Will handle directory names with embedded ".". Will handle file names without extensions.
/////////////////////////////////////////////////////////
//
// Example:
// Given path == "C:\\dir1\\dir2\\dir3\\file.exe"
// will return path_ as "C:\\dir1\\dir2\\dir3"
// Will return base_ as "file"
// Will return ext_ as "exe"
//
/////////////////////////////////////////////////////////
void GetFileParts(char *path, char *path_, char *base_, char *ext_)
{
char *base;
char *ext;
char nameKeep[MAX_PATHNAME_LEN];
char pathKeep[MAX_PATHNAME_LEN];
char pathKeep2[MAX_PATHNAME_LEN]; //preserve original input string
char File_Ext[40];
char baseK[40];
int lenFullPath, lenExt_, lenBase_;
char *sDelim={0};
int iDelim=0;
int rel=0, i;
if(path)
{ //determine type of path string (C:\\, \\, /, ./, .\\)
if( (strlen(path) > 1) &&
(
((path[1] == ':' ) &&
(path[2] == '\\'))||
(path[0] == '\\') ||
(path[0] == '/' ) ||
((path[0] == '.' ) &&
(path[1] == '/' ))||
((path[0] == '.' ) &&
(path[1] == '\\'))
)
)
{
sDelim = calloc(5, sizeof(char));
/* // */if(path[0] == '\\') iDelim = '\\', strcpy(sDelim, "\\");
/* c:\\ */if(path[1] == ':' ) iDelim = '\\', strcpy(sDelim, "\\"); // also satisfies path[2] == '\\'
/* / */if(path[0] == '/' ) iDelim = '/' , strcpy(sDelim, "/" );
/* ./ */if((path[0] == '.')&&(path[1] == '/')) iDelim = '/' , strcpy(sDelim, "/" );
/* .\\ */if((path[0] == '.')&&(path[1] == '\\')) iDelim = '\\' , strcpy(sDelim, "\\" );
/* \\\\ */if((path[0] == '\\')&&(path[1] == '\\')) iDelim = '\\', strcpy(sDelim, "\\");
if(path[0]=='.')
{
rel = 1;
path[0]='*';
}
if(!strstr(path, ".")) // if no filename, set path to have trailing delim,
{ //set others to "" and return
lenFullPath = strlen(path);
if(path[lenFullPath-1] != iDelim)
{
strcat(path, sDelim);
path_[0]=0;
base_[0]=0;
ext_[0]=0;
}
}
else
{
nameKeep[0]=0; //works with C:\\dir1\file.txt
pathKeep[0]=0;
pathKeep2[0]=0; //preserves *path
File_Ext[0]=0;
baseK[0]=0;
//Get lenth of full path
lenFullPath = strlen(path);
strcpy(nameKeep, path);
strcpy(pathKeep, path);
strcpy(pathKeep2, path);
strcpy(path_, path); //capture path
//Get length of extension:
for(i=lenFullPath-1;i>=0;i--)
{
if(pathKeep[i]=='.') break;
}
lenExt_ = (lenFullPath - i) -1;
base = strtok(path, sDelim);
while(base)
{
strcpy(File_Ext, base);
base = strtok(NULL, sDelim);
}
strcpy(baseK, File_Ext);
lenBase_ = strlen(baseK) - lenExt_;
baseK[lenBase_-1]=0;
strcpy(base_, baseK);
path_[lenFullPath -lenExt_ -lenBase_ -1] = 0;
ext = strtok(File_Ext, ".");
ext = strtok(NULL, ".");
if(ext) strcpy(ext_, ext);
else strcpy(ext_, "");
}
memset(path, 0, lenFullPath);
strcpy(path, pathKeep2);
if(rel)path_[0]='.';//replace first "." for relative path
free(sDelim);
}
}
}
Here is an old-school algorithm that will do the trick.
char path[100] = "/home/user/music/thomas.mp3";
int offset_extension, offset_name;
int len = strlen(path);
int i;
for (i = len; i >= 0; i--) {
if (path[i] == '.')
break;
if (path[i] == '/') {
i = len;
break;
}
}
if (i == -1) {
fprintf(stderr,"Invalid path");
exit(EXIT_FAILURE);
}
offset_extension = i;
for (; i >= 0; i--)
if (path[i] == '/')
break;
if (i == -1) {
fprintf(stderr,"Invalid path");
exit(EXIT_FAILURE);
}
offset_name = i;
char *extension, name[100];
extension = &path[offset_extension+1];
memcpy(name, &path[offset_name+1], offset_extension - offset_name - 1);
Then you have both information under the variables name and extension
printf("%s %s", name, extension);
This will print:
thomas mp3
I know this is old. But I tend to use strtok for things like this.
/* strtok example */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_TOKENS 20 /* Some reasonable values */
#define MAX_STRING 128 /* Easy enough to make dynamic with mallocs */
int main ()
{
char str[] ="/home/user/music/thomas.mp3";
char sep[] = "./";
char collect[MAX_TOKENS][MAX_STRING];
/* Not really necessary, since \0 is added inplace. I do this out of habit. */
memset(collect, 0, MAX_TOKENS * MAX_STRING);
char * pch = strtok (str, sep);
int ccount = 0;
if(pch != NULL) {
/* collect all seperated text */
while(pch != NULL) {
strncpy( collect[ccount++], pch, strlen(pch));
pch = strtok (NULL, sep);
}
}
/* output tokens. */
for(int i=0; i<ccount; ++i)
printf ("Token: %s\n", collect[i]);
return 0;
}
This is a rough example, and it makes it easy to deal with the tokens afterwards. Ie the last token is the extension. Second last is the basename and so on.
I also find it useful for rebuilding paths for different platforms - replace / with \.

how to remove extension from file name?

I want to throw the last three character from file name and get the rest?
I have this code:
char* remove(char* mystr) {
char tmp[] = {0};
unsigned int x;
for (x = 0; x < (strlen(mystr) - 3); x++)
tmp[x] = mystr[x];
return tmp;
}
Try:
char *remove(char* myStr) {
char *retStr;
char *lastExt;
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
strcpy (retStr, myStr);
lastExt = strrchr (retStr, '.');
if (lastExt != NULL)
*lastExt = '\0';
return retStr;
}
You'll have to free the returned string yourself. It simply finds the last . in the string and replaces it with a null terminator character. It will handle errors (passing NULL or running out of memory) by returning NULL.
It won't work with things like /this.path/is_bad since it will find the . in the non-file portion but you could handle this by also doing a strrchr of /, or whatever your path separator is, and ensuring it's position is NULL or before the . position.
A more general purpose solution to this problem could be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// remove_ext: removes the "extension" from a file spec.
// myStr is the string to process.
// extSep is the extension separator.
// pathSep is the path separator (0 means to ignore).
// Returns an allocated string identical to the original but
// with the extension removed. It must be freed when you're
// finished with it.
// If you pass in NULL or the new string can't be allocated,
// it returns NULL.
char *remove_ext (char* myStr, char extSep, char pathSep) {
char *retStr, *lastExt, *lastPath;
// Error checks and allocate string.
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
// Make a copy and find the relevant characters.
strcpy (retStr, myStr);
lastExt = strrchr (retStr, extSep);
lastPath = (pathSep == 0) ? NULL : strrchr (retStr, pathSep);
// If it has an extension separator.
if (lastExt != NULL) {
// and it's to the right of the path separator.
if (lastPath != NULL) {
if (lastPath < lastExt) {
// then remove it.
*lastExt = '\0';
}
} else {
// Has extension separator with no path separator.
*lastExt = '\0';
}
}
// Return the modified string.
return retStr;
}
int main (int c, char *v[]) {
char *s;
printf ("[%s]\n", (s = remove_ext ("hello", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/has.dot/in.path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', 0))); free (s);
return 0;
}
and this produces:
[hello]
[hello]
[hello]
[hello.txt]
[/no.dot/in_path]
[/has.dot/in]
[/no]
Use rindex to locate the "." character. If the string is writable, you can replace it with the string terminator char ('\0') and you're done.
char * rindex(const char *s, int c);
DESCRIPTION
The rindex() function locates the last character matching c (converted to a char) in the null-terminated string s.
If you literally just want to remove the last three characters, because you somehow know that your filename has an extension exactly three chars long (and you want to keep the dot):
char *remove_three(const char *filename) {
size_t len = strlen(filename);
char *newfilename = malloc(len-2);
if (!newfilename) /* handle error */;
memcpy(newfilename, filename, len-3);
newfilename[len - 3] = 0;
return newfilename;
}
Or let the caller provide the destination buffer (which they must ensure is long enough):
char *remove_three(char *dst, const char *filename) {
size_t len = strlen(filename);
memcpy(dst, filename, len-3);
dst[len - 3] = 0;
return dst;
}
If you want to generically remove a file extension, that's harder, and should normally use whatever filename-handling routines your platform provides (basename on POSIX, _wsplitpath_s on Windows) if there's any chance that you're dealing with a path rather than just the final part of the filename:
/* warning: may modify filename. To avoid this, take a copy first
dst may need to be longer than filename, for example currently
"file.txt" -> "./file.txt". For this reason it would be safer to
pass in a length with dst, and/or allow dst to be NULL in which
case return the length required */
void remove_extn(char *dst, char *filename) {
strcpy(dst, dirname(filename));
size_t len = strlen(dst);
dst[len] = '/';
dst += len+1;
strcpy(dst, basename(filename));
char *dot = strrchr(dst, '.');
/* retain the '.' To remove it do dot[0] = 0 */
if (dot) dot[1] = 0;
}
Come to think of it, you might want to pass dst+1 rather than dst to strrchr, since a filename starting with a dot maybe shouldn't be truncated to just ".". Depends what it's for.
I would try the following algorithm:
last_dot = -1
for each char in str:
if char = '.':
last_dot = index(char)
if last_dot != -1:
str[last_dot] = '\0'
Just replace the dot with "0". If you know that your extension is always 3 characters long you can just do:
char file[] = "test.png";
file[strlen(file) - 4] = 0;
puts(file);
This will output "test". Also, you shouldn't return a pointer to a local variable. The compiler will also warn you about this.
To get paxdiablo's second more general purpose solution to work in a C++ compiler I changed this line:
if ((retstr = malloc (strlen (mystr) + 1)) == NULL)
to:
if ((retstr = static_cast<char*>(malloc (strlen (mystr) + 1))) == NULL)
Hope this helps someone.
This should do the job:
char* remove(char* oldstr) {
int oldlen = 0;
while(oldstr[oldlen] != NULL){
++oldlen;
}
int newlen = oldlen - 1;
while(newlen > 0 && mystr[newlen] != '.'){
--newlen;
}
if (newlen == 0) {
newlen = oldlen;
}
char* newstr = new char[newlen];
for (int i = 0; i < newlen; ++i){
newstr[i] = oldstr[i];
}
return newstr;
}
Get location and just copy up to that location into a new char *.
i = 0;
n = 0;
while(argv[1][i] != '\0') { // get length of filename
i++; }
for(ii = 0; i > -1; i--) { // look for extension working backwards
if(argv[1][i] == '.') {
n = i; // char # of exension
break; } }
memcpy(new_filename, argv[1], n);
This is simple way to change extension name.
....
char outputname[255]
sscanf(inputname,"%[^.]",outputname); // foo.bar => foo
sprintf(outputname,"%s.txt",outputname) // foo.txt <= foo
....
With configurable minimum file length and configurable maximum extension length. Returns index where extension was changed to null character, or -1 if no extension was found.
int32_t strip_extension(char *in_str)
{
static const uint8_t name_min_len = 1;
static const uint8_t max_ext_len = 4;
/* Check chars starting at end of string to find last '.' */
for (ssize_t i = sizeof(in_str); i > (name_min_len + max_ext_len); i--)
{
if (in_str[i] == '.')
{
in_str[i] = '\0';
return i;
}
}
return -1;
}
I use this code:
void remove_extension(char* s) {
char* dot = 0;
while (*s) {
if (*s == '.') dot = s; // last dot
else if (*s == '/' || *s == '\\') dot = 0; // ignore dots before path separators
s++;
}
if (dot) *dot = '\0';
}
It handles the Windows path convention correctly (both / and \ can be path separators).

Resources