Extract the file name and its extension in C - c

So we have a path string /home/user/music/thomas.mp3.
Where is the easy way to extract file name(without extension, "thomas") and it's extension ("mp3") from this string? A function for filename, and for extension. And only GNU libc in our hands.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_FILENAME_SIZE 256
char *filename(char *str) {
char *result;
char *last;
if ((last = strrchr(str, '.')) != NULL ) {
if ((*last == '.') && (last == str))
return str;
else {
result = (char*) malloc(MAX_FILENAME_SIZE);
snprintf(result, sizeof result, "%.*s", (int)(last - str), str);
return result;
}
} else {
return str;
}
}
char *extname(char *str) {
char *result;
char *last;
if ((last = strrchr(str, '.')) != NULL) {
if ((*last == '.') && (last == str))
return "";
else {
result = (char*) malloc(MAX_FILENAME_SIZE);
snprintf(result, sizeof result, "%s", last + 1);
return result;
}
} else {
return ""; // Empty/NULL string
}
}

Use basename to get the filename and then you can use something like this to get the extension.
char *get_filename_ext(const char *filename) {
const char *dot = strrchr(filename, '.');
if(!dot || dot == filename) return "";
return dot + 1;
}
Edit:
Try something like.
#include <string.h>
#include <libgen.h>
static void printFileInfo(char *path) {
char *bname;
char *path2 = strdup(path);
bname = basename(path2);
printf("%s.%s\n",bname, get_filename_ext(bname));
free(path2);
}

Regarding your actual code (all the other answers so far say to scrap that and do something else, which is good advice, however I am addressing your code as it contains blunders that it'd be good to learn about in advance of next time you try to write something).
Firstly:
strncpy(str, result, (size_t) (last-str) + 1);
is not good. You have dest and src around the wrong way; and further this function does not null-terminate the output (unless the input is short enough, which it isn't). Generally speaking strncpy is almost never a good solution to a problem; either strcpy if you know the length, or snprintf.
Simpler and less error-prone would be:
snprintf(result, sizeof result, "%.*s", (int)(last - str), str);
Similary in the other function,
snprintf(result, sizeof result, "%s", last + 1);
The snprintf function never overflows buffer and always produces a null-terminated string, so long as you get the buffer length right!
Now, even if you fixed those then you have another fundamental problem in that you are returning a pointer to a buffer that is destroyed when the function returns. You could fix ext by just returning last + 1, since that is null-terminated anyway. But for filename you have the usual set of options:
return a pointer and a length, and treat it as a length-counted string, not a null-terminated one
return pointer to mallocated memory
return pointer to static buffer
expect the caller to pass in a buffer and a buffer length, which you just write into
Finally, returning NULL on failure is probably a bad idea; if there is no . then return the whole string for filename, and an empty string for ext. Then the calling code does not have to contort itself with checks for NULL.

Here is a routine I use for that problem:
Separates original string into separate strings of path, file_name and extension.
Will work for Windows and Linux, relative or absolute style paths. Will handle directory names with embedded ".". Will handle file names without extensions.
/////////////////////////////////////////////////////////
//
// Example:
// Given path == "C:\\dir1\\dir2\\dir3\\file.exe"
// will return path_ as "C:\\dir1\\dir2\\dir3"
// Will return base_ as "file"
// Will return ext_ as "exe"
//
/////////////////////////////////////////////////////////
void GetFileParts(char *path, char *path_, char *base_, char *ext_)
{
char *base;
char *ext;
char nameKeep[MAX_PATHNAME_LEN];
char pathKeep[MAX_PATHNAME_LEN];
char pathKeep2[MAX_PATHNAME_LEN]; //preserve original input string
char File_Ext[40];
char baseK[40];
int lenFullPath, lenExt_, lenBase_;
char *sDelim={0};
int iDelim=0;
int rel=0, i;
if(path)
{ //determine type of path string (C:\\, \\, /, ./, .\\)
if( (strlen(path) > 1) &&
(
((path[1] == ':' ) &&
(path[2] == '\\'))||
(path[0] == '\\') ||
(path[0] == '/' ) ||
((path[0] == '.' ) &&
(path[1] == '/' ))||
((path[0] == '.' ) &&
(path[1] == '\\'))
)
)
{
sDelim = calloc(5, sizeof(char));
/* // */if(path[0] == '\\') iDelim = '\\', strcpy(sDelim, "\\");
/* c:\\ */if(path[1] == ':' ) iDelim = '\\', strcpy(sDelim, "\\"); // also satisfies path[2] == '\\'
/* / */if(path[0] == '/' ) iDelim = '/' , strcpy(sDelim, "/" );
/* ./ */if((path[0] == '.')&&(path[1] == '/')) iDelim = '/' , strcpy(sDelim, "/" );
/* .\\ */if((path[0] == '.')&&(path[1] == '\\')) iDelim = '\\' , strcpy(sDelim, "\\" );
/* \\\\ */if((path[0] == '\\')&&(path[1] == '\\')) iDelim = '\\', strcpy(sDelim, "\\");
if(path[0]=='.')
{
rel = 1;
path[0]='*';
}
if(!strstr(path, ".")) // if no filename, set path to have trailing delim,
{ //set others to "" and return
lenFullPath = strlen(path);
if(path[lenFullPath-1] != iDelim)
{
strcat(path, sDelim);
path_[0]=0;
base_[0]=0;
ext_[0]=0;
}
}
else
{
nameKeep[0]=0; //works with C:\\dir1\file.txt
pathKeep[0]=0;
pathKeep2[0]=0; //preserves *path
File_Ext[0]=0;
baseK[0]=0;
//Get lenth of full path
lenFullPath = strlen(path);
strcpy(nameKeep, path);
strcpy(pathKeep, path);
strcpy(pathKeep2, path);
strcpy(path_, path); //capture path
//Get length of extension:
for(i=lenFullPath-1;i>=0;i--)
{
if(pathKeep[i]=='.') break;
}
lenExt_ = (lenFullPath - i) -1;
base = strtok(path, sDelim);
while(base)
{
strcpy(File_Ext, base);
base = strtok(NULL, sDelim);
}
strcpy(baseK, File_Ext);
lenBase_ = strlen(baseK) - lenExt_;
baseK[lenBase_-1]=0;
strcpy(base_, baseK);
path_[lenFullPath -lenExt_ -lenBase_ -1] = 0;
ext = strtok(File_Ext, ".");
ext = strtok(NULL, ".");
if(ext) strcpy(ext_, ext);
else strcpy(ext_, "");
}
memset(path, 0, lenFullPath);
strcpy(path, pathKeep2);
if(rel)path_[0]='.';//replace first "." for relative path
free(sDelim);
}
}
}

Here is an old-school algorithm that will do the trick.
char path[100] = "/home/user/music/thomas.mp3";
int offset_extension, offset_name;
int len = strlen(path);
int i;
for (i = len; i >= 0; i--) {
if (path[i] == '.')
break;
if (path[i] == '/') {
i = len;
break;
}
}
if (i == -1) {
fprintf(stderr,"Invalid path");
exit(EXIT_FAILURE);
}
offset_extension = i;
for (; i >= 0; i--)
if (path[i] == '/')
break;
if (i == -1) {
fprintf(stderr,"Invalid path");
exit(EXIT_FAILURE);
}
offset_name = i;
char *extension, name[100];
extension = &path[offset_extension+1];
memcpy(name, &path[offset_name+1], offset_extension - offset_name - 1);
Then you have both information under the variables name and extension
printf("%s %s", name, extension);
This will print:
thomas mp3

I know this is old. But I tend to use strtok for things like this.
/* strtok example */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_TOKENS 20 /* Some reasonable values */
#define MAX_STRING 128 /* Easy enough to make dynamic with mallocs */
int main ()
{
char str[] ="/home/user/music/thomas.mp3";
char sep[] = "./";
char collect[MAX_TOKENS][MAX_STRING];
/* Not really necessary, since \0 is added inplace. I do this out of habit. */
memset(collect, 0, MAX_TOKENS * MAX_STRING);
char * pch = strtok (str, sep);
int ccount = 0;
if(pch != NULL) {
/* collect all seperated text */
while(pch != NULL) {
strncpy( collect[ccount++], pch, strlen(pch));
pch = strtok (NULL, sep);
}
}
/* output tokens. */
for(int i=0; i<ccount; ++i)
printf ("Token: %s\n", collect[i]);
return 0;
}
This is a rough example, and it makes it easy to deal with the tokens afterwards. Ie the last token is the extension. Second last is the basename and so on.
I also find it useful for rebuilding paths for different platforms - replace / with \.

Related

Trying to replace text in files throws a error

The point of the script is to take three parameters. Find, replace, prefix. Find being the text to replace, replace being what to replace the text with, and prefix is a special case. If prefix is in the text, you replace the prefix (some text) with prefix+replace. I would like to know why the below code throws a error right after saying opened file. It only seems to throw an error if the text being replaced is repeated like "aaa", "bbb" where "a" is what is being replaced.
Opened file.txt
*** Error in `./a.out': malloc(): memory corruption: 0x00005652fbc55980 ***
There's also the occasionally seg fault after printing "Trying to replace for file ...". I'm not fluent in C and GDB on my system resulted in just missing library errors which has nothing to do with this.
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
char concat(const char *s1, const char *s2)
{
char *result = calloc(strlen(s1)+strlen(s2)+1, 1);
strcpy(result, s1);
strcat(result, s2);
printf("Prefix will be replaced with %s.\n", result);
return result;
}
static int replaceString(char *buf, const char *find, const char *replace, const char *prefix)
{
int olen, rlen;
char *s, *d;
char *tmpbuf;
if (!buf || !*buf || !find || !*find || !replace)
return 0;
tmpbuf = calloc(strlen(buf) + 1, 1);
if (tmpbuf == NULL)
return 0;
olen = strlen(find);
rlen = strlen(replace);
s = buf;
d = tmpbuf;
while (*s) {
if (strncmp(s, find, olen) == 0) {
strcpy(d, replace);
s += olen;
d += rlen;
}
else
{
*d++ = *s++;
}
}
*d = '\0';
if(strcmp(buf, tmpbuf) == 0)
{
free(tmpbuf);
return 0;
}
else
{
strcpy(buf, tmpbuf);
free(tmpbuf);
printf("%s", buf);
printf("Replaced!\n");
return 1;
}
}
void getAndReplace(char* filename, char* find, char* replace, char* prefix)
{
long length;
FILE* f = fopen (filename, "r");
char* buffer = 0;
if (f)
{
fseek (f, 0, SEEK_END);
length = ftell (f);
fseek (f, 0, SEEK_SET);
buffer = calloc(length+1, 1); //If i use malloc here, any file other than the first has garbage added to it. Why?
if (buffer)
{
fread(buffer, 1, length, f);
}
fclose(f);
}
if(buffer)// && strlen(buffer) > 1)
{
int result = replaceString(buffer, find, replace, prefix);
if(result == 0)
{
printf("Trying to replace prefix.\n");
replace = concat(prefix, replace);
result = replaceString(buffer, prefix, replace, "");
}
else
{
printf("Successfully replaced %s with %s\n", find, replace);
}
if(result == 1)
{
FILE* fp = fopen(filename, "w+");
if(fp)
{
printf("Opened %s\n", filename);
fprintf(fp, buffer);
fclose(fp);
printf("File %s overwritten with changes.\n", filename);
}
}
else
{
printf("Nothing to replace for %s\n", filename);
}
}
else
{
printf("Empty file.");
}
if(buffer)
{
free(buffer);
}
}
int main(int argc, char **argv)
{
if(argc < 4)
{
printf("Not enough arguments given: ./hw3 <find> <replace> <prefix>\n");
return 1;
}
struct dirent *de;
DIR *dr = opendir(".");
if (dr == NULL)
{
printf("Could not open current directory\n");
return 0;
}
while ((de = readdir(dr)) != NULL)
{
if(strlen(de->d_name) > 4 && !strcmp(de->d_name + strlen(de->d_name) - 4, ".txt"))
{
printf("Trying to replace for file %s\n", de->d_name);
getAndReplace(de->d_name, argv[1], argv[2], argv[3]);
}
}
closedir(dr);
return 0;
}
I hope that you concat function
char concat(const char *s1, const char *s2);
is just a typo and you meant
char *concat(const char *s1, const char *s2);
otherwise the function would be returning a pointer as if it were a char.
Using valgrind would give more details where exactly you are reading/writing where you are not allowed to and
where you are leaking memory. Without that it's hard to pinpoint the exact
place. One thing I noticed is that depending on the length of find and replace,
you might not have enough memory for tmpbuf which would lead to a buffer
overflow.
I think that the best way to write the replaceString is by making it
allocate the memory it needs itself, rather than providing it a buffer to write into.
Because you are getting both find and replace from the user, you don't know
how large the resulting buffer will need to be. You could calculate it
beforehand, but you don't do that. If you want to pass a pre-allocated buffer to
replaceString, I'd pass it as a double pointer, so that replaceString can do
realloc on it when needed. Or allocate the memory in the function and return a
pointer to the allocated memory.
This would be my version:
char *replaceString(const char *haystack, const char *needle, const char *replace)
{
if(haystack == NULL || needle == NULL || replace == NULL)
return NULL;
char *dest = NULL, *tmp;
size_t needle_len = strlen(needle);
size_t replace_len = strlen(replace);
size_t curr_len = 0;
while(*haystack)
{
char *found = strstr(haystack, needle);
size_t copy_len1 = 0;
size_t new_size = 0;
size_t pre_found_len = 0;
if(found == NULL)
{
copy_len1 = strlen(haystack) + 1;
new_size = curr_len + copy_len1;
} else {
pre_found_len = found - haystack;
copy_len1 = pre_found_len;
new_size = curr_len + pre_found_len + replace_len + 1;
}
tmp = realloc(dest, new_size);
if(tmp == NULL)
{
free(dest);
return NULL;
}
dest = tmp;
strncpy(dest + curr_len, haystack, copy_len1);
if(found == NULL)
return dest; // last replacement, copied to the end
strncpy(dest + curr_len + pre_found_len, replace, replace_len + 1);
curr_len += pre_found_len + replace_len;
haystack += pre_found_len + needle_len;
}
return dest;
}
The idea in this version is similar to yours, but mine reallocates the memory as
it goes. I changed the name of the arguments to have the same name as the
strstr function does based on my documentation:
man strstr
char *strstr(const char *haystack, const char *needle);
Because I'm going to update haystack to point past the characters copied, I
use this loop:
while(*haystack)
{
...
}
which means it is going to stop when the '\0'-terminating byte is reached.
The first thing is to use strstr to locate a substring that matches needle.
Base on whether a substring is found, I calculate how much bytes I would need to
copy until the substring, and the new size of the buffer. After that I
reallocate the memory for the buffer and copy everything until the substring,
then append the replacement, update the curr_len variable and update the
haystack pointer to point past the substring.
If the substring is not found, no more replacements are needed. So we have to
copy the string pointed to by haystack and return the constructed string. The
new size of the destination is curr_len + strlen(haystack) + 1 (the +1
because I want the strncpy function to also copy the '\0'-terminating byte).
And it has to copy strlen(haystack) + 1 bytes. After the first strncpy, the
function returns dest.
If the substring is found, then we have to copy everything until the substring,
append the replacement and update the current length and the haystack pointer.
First I calculate the string until the found substring and save it in
pre_found_len. The new size of the destination will be
curr_len + pre_found_len + replace_len + 1 (the current length + length of
string until substring + the length of the replacement + 1 for the
'\0'-terminating byte). Now the first strncpy copies only pre_found_len
bytes. Then it copies the replacement.
Now you can call it like this:
int main(void)
{
const char *orig = "Is this the real life? Is this just fantasy?";
char *text = replaceString(orig, "a", "_A_");
if(text)
{
puts(orig);
puts(text);
}
free(text);
}
which will output:
Is this the real life? Is this just fantasy?
Is this the re_A_l life? Is this just f_A_nt_A_sy?
Now you can use this function in getAndReplace to replace the prefix:
char *getAndReplace(char* filename, char* find, char* replace, char* prefix)
{
...
char *rep1 = replaceString(buffer, find, replace);
if(rep1 == NULL)
{
// error
free(buffer);
return NULL;
}
char *prefix_rep = malloc(strlen(replace) + strlen(prefix) + 1);
if(prefix_rep == NULL)
{
// error
free(buffer);
free(rep1);
return NULL;
}
sprintf(prefix_rep, "%s%s", replace, prefix);
char *rep2 = replaceString(rep1, prefix, prefix_rep);
if(rep2 == NULL)
{
// error
free(buffer);
free(rep1);
free(prefix_rep);
return NULL;
}
// rep2 has all the replacements
...
// before leaving
free(buffer);
free(rep1);
free(prefix_rep);
// returning all replacements
return rep2;
}
When using malloc & co, don't forget to check if they return NULL and don't
forget to free the memory when not needed.

Is there a fast way to interpose a character between strings?

I wrote this function that will generate a single string out of a file list.
(e.g. if I have a folder with FileA.txt, FileB.png and FileC I'll get as output this string: FileA.txtFileB.pngFileC). Now I want to add a / character between each filename. (e.g. FileA.txt/FileB.png/FileC/) Is there a way to do it in "one blow" without having to repeat the same operation twice?
In other words, is there a way to do something like:
original_string = append2(original_string, new_string, '/');
instead of having to do
append(original_string, new_string);
append(original_string, "/");
?
Here's the function I wrote as reference:
/**
* #brief Concatenate all file names in a file list (putting a '/' between each of them)
* #param file_list The file list to serialize.
* #return A string containing all files in the file list.
*/
char *file_list_tostring(struct file_list *file_list) {
char *final_string = NULL;
size_t final_len = 0;
struct file_node *list_iter = file_list->first;
while (list_iter != NULL) {
char *tmp = list_iter->filename;
size_t tmp_len = strlen(tmp);
char *s = realloc(final_string, final_len + tmp_len + 1); // +1 for '\0'
if (s == NULL) {
perror("realloc");
exit(EXIT_FAILURE);
}
final_string = s;
memcpy(final_string + final_len, tmp, tmp_len + 1);
final_len += tmp_len;
list_iter = list_iter->next;
}
return final_string;
}
Maybe there is a simple way to interpose a single character between two strings?
Note: I know there's nothing wrong in repeating the same operation twice, I'm asking this question to know if there is a better way of doing so!
Yes, you can do sprintf:
#include <stdio.h>
int main()
{
char var1[] = "FileA.txt";
char var2[] = "FileB.png";
char var3[] = "FileC";
char result[30];
sprintf(result, "%s/%s/%s", var1, var2,var3);
printf("result: %s\n", result);
return 0;
}
And the result is like this:
result: FileA.txt/FileB.png/FileC
If you need, the variable result can be a pointer and allocate space based on your needs.
As Michael Burr mentioned in a comment to the question, it is best to walk the list/array twice. On the first pass, calculate the total length of the string needed. Next, allocate the memory needed for the entire string. On the second pass, copy the contents. Do not forget to account for, and append, the string-terminating nul byte (\0).
Consider the following example functions dupcat() and dupcats():
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
char *dupcat(const size_t count, const char *parts[])
{
size_t i, len = 0;
char *dst, *end;
/* Calculate total length of parts. Skip NULL parts. */
for (i = 0; i < count; i++)
len += (parts[i]) ? strlen(parts[i]) : 0;
/* Add room for '\0'.
We add an extra 8 to 15 '\0's, just because
it is sometimes useful, and we do a dynamic
allocation anyway. */
len = (len | 7) + 9;
/* Allocate memory. */
dst = malloc(len);
if (!dst) {
fprintf(stderr, "dupcat(): Out of memory; tried to allocate %zu bytes.\n", len);
exit(EXIT_FAILURE);
}
/* Copy parts. */
end = dst;
for (i = 0; i < count; i++) {
const char *src = parts[i];
/* We could use strlen() and memcpy(),
but a loop like this will work just as well. */
if (src)
while (*src)
*(end++) = *(src++);
}
/* Sanity check time! */
if (end >= dst + len) {
fprintf(stderr, "dupcat(): Arguments were modified during duplication; buffer overrun!\n");
free(dst); /* We can omit this free(), but only in case of exit(). */
exit(EXIT_FAILURE);
}
/* Terminate string (and clear padding). */
memset(end, '\0', (size_t)(dst + len - end));
/* Done! */
return dst;
}
char *dupcats(const size_t count, ...)
{
size_t i, len = 0;
char *dst, *end;
va_list args;
/* Calculate total length of 'count' source strings. */
va_start(args, count);
for (i = 0; i < count; i++) {
const char *src = va_arg(args, const char *);
if (src)
len += strlen(src);
}
va_end(args);
/* Add room for end-of-string '\0'.
Because it is often useful to know you have
at least one extra '\0' at the end of the string,
and we do a dynamic allocation anyway,
we pad the string with 9 to 16 '\0',
aligning 'len' to a multiple of 8. */
len = (len | 7) + 9;
/* Allocate memory for the string. */
dst = malloc(len);
if (!dst) {
fprintf(stderr, "dupcats(): Out of memory; tried to allocate %zu bytes.\n", len);
exit(EXIT_FAILURE);
}
/* Copy the source strings. */
end = dst;
va_start(args, count);
for (i = 0; i < count; i++) {
const char *src = va_arg(args, const char *);
/* We could use strlen() and memcpy() here;
however, this loop is easier to follow. */
if (src)
while (*src)
*(end++) = *(src++);
}
va_end(args);
/* Sanity check. */
if (end >= dst + len) {
fprintf(stderr, "dupcats(): Arguments were modified during duplication; buffer overrun!\n");
free(dst); /* We can omit this free(), but only in case of exit(). */
exit(EXIT_FAILURE);
}
/* Add end-of-string '\0' (filling the padding). */
memset(end, '\0', dst + len - end);
/* Done. */
return dst;
}
int main(int argc, char *argv[])
{
char *result;
result = dupcat(argc - 1, (const char **)(argv + 1));
printf("Arguments concatenated: '%s'.\n", result);
free(result);
result = dupcats(5, "foo", "/", "bar", "/", "baz");
printf("Concatenating 'foo', '/', 'bar', '/', and 'baz': '%s'.\n", result);
free(result);
return EXIT_SUCCESS;
}
Neither dupcat() nor dupcats() will ever return NULL: they will print an error message to standard error and exit, if an error occurs.
dupcat() takes an array of strings, and returns a dynamically allocated concatenated copy with at least eight bytes of nul padding.
dupcats() takes a variable number of pointers, and returns a dynamically allocated concatenated copy with at least eight bytes of nul padding.
Both functions treat NULL pointers as if they were empty strings. For both functions, the first parameter is the number of strings to concatenate.
(Since OP did not show the definitions of struct file_list or struct file_node, I did not bother to write a list-based version. However, it should be trivial to adapt from one of the two versions shown.)
In some cases, a variant that constructs a valid path from a fixed base part, with one or more relative file or directory names concatenated, and POSIXy ./ removed and ../ backtracked (but not out of base subtree), is very useful.
If carefully written, it allows the program to accept untrusted paths, relative to a specific subtree. (The combined paths are confined to that subtree, but symlinks and hardlinks can still be used to escape the subtree.)
One possible implementation is as follows:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
char *dynamic_path(const char *const subtree,
const size_t parts,
const char *part[])
{
const size_t subtree_len = (subtree) ? strlen(subtree) : 0;
size_t parts_len = 0;
size_t total_len, i;
char *path, *mark, *curr;
/* Calculate the length of each individual part.
Include room for a leading slash.
*/
for (i = 0; i < parts; i++)
parts_len += (part[i]) ? 1 + strlen(part[i]) : 0;
/* Add room for the string-terminating '\0'.
We're paranoid, and add a bit more padding. */
total_len = ((subtree_len + parts_len) | 7) + 9;
/* Allocate memory for the combined path. */
path = malloc(total_len);
if (!path) {
errno = ENOMEM;
return NULL;
}
/* If the user specified a subtree, we use it as the fixed prefix. */
if (subtree_len > 0) {
memcpy(path, subtree, subtree_len);
mark = path + subtree_len;
/* Omit a trailing /. We enforce it below anyway. */
if (parts > 0 && subtree_len > 1 && mark[-1] == '/')
--mark;
} else
mark = path;
/* Append the additional path parts. */
curr = mark;
for (i = 0; i < parts; i++) {
const size_t len = (part[i]) ? strlen(part[i]) : 0;
if (len > 0) {
/* Each path part is a separate file/directory name,
so there is an (implicit) slash before each one. */
if (part[i][0] != '/')
*(curr++) = '/';
memcpy(curr, part[i], len);
curr += len;
}
}
/* Sanity check. */
if (curr >= path + total_len) {
/* Buffer overrun occurred. */
fprintf(stderr, "Buffer overrun in dynamic_path()!\n");
free(path); /* Can be omitted if we exit(). */
exit(EXIT_FAILURE);
}
/* Terminate string (and clear padding). */
memset(curr, '\0', (size_t)(path + total_len - curr));
/* Cleanup pass.
Convert "/foo/../" to "/", but do not backtrack over mark.
Combine consecutive slashes and /./ to a single slash.
*/
{
char *src = mark;
char *dst = mark;
while (*src)
if (src[0] == '/' && src[1] == '.' && src[2] == '.' && (!src[3] || src[3] == '/')) {
src += 3; /* Skip over /.. */
/* Backtrack, but do not underrun mark. */
if (dst > mark) {
dst--;
while (dst > mark && *dst != '/')
dst--;
}
/* Never consume the mark slash. */
if (dst == mark)
dst++;
} else
if (src[0] == '/' && src[1] == '.' && (!src[2] || src[2] == '/')) {
src += 2; /* Skip over /. */
if (dst == mark || dst[-1] != '/')
*(dst++) = '/';
} else
if (src[0] == '/') {
src++;
if (dst == mark || dst[-1] != '/')
*(dst++) = '/';
} else
*(dst++) = *(src++);
/* Clear removed part. */
if (dst < src)
memset(dst, '\0', (size_t)(src - dst));
}
return path;
}
int main(int argc, char *argv[])
{
char *path;
if (argc < 2) {
fprintf(stderr, "\nUsage: %s PREFIX [ PATHNAME ... ]\n\n", argv[0]);
return EXIT_FAILURE;
}
path = dynamic_path(argv[1], argc - 2, (const char **)(argv + 2));
if (!path) {
fprintf(stderr, "dynamic_path(): %s.\n", strerror(errno));
return EXIT_FAILURE;
}
printf("%s\n", path);
free(path);
return EXIT_SUCCESS;
}
Note that I wrote the above version from scratch (and dedicate it to public domain (CC0)), so you should thoroughly test it before relying it on production use. (My intent is for it to be an useful example or basis, that will help you write your own implementation tailored to your needs.)
If you do find any bugs or issues in it, let me know in a comment, so I can verify and fix.

String Search and format in C

Just a quick one: in C I have a buffer full of data like below:
char buffer[255]="CODE=12345-MODE-12453-CODE1-12355"
My question is how to search through this. For example for the CODE=12345, section bear in mind that the numbers change, so I would like to search like this CODE=***** using wildcard or preset amount of spaces after the CODE= part.
This method wont compile last one left to try
#include <stdio.h>
#include <string.h>
#include <windows.h>
int main ()
{
char buf[255]="CODE=12345-MODE-12453-CODE1-12355";
#define TRIMSPACES(p) while(*p != '\0' && isspace((unsigned char)*p) != 0) ++p
#define NSTRIP(p, n) p += n
#define STRIP(p) ++p
char* getcode(const char *input)
{
char *p = (char*) input, *buf, *pbuf;
if((buf = malloc(256)) == NULL)
return NULL;
pbuf = buf;
while(*p != '\0') {
if(strncmp(p, "CODE", 3) == 0) {
NSTRIP(p, 4); //remove 'code'
TRIMSPACES(p);//trim white-space after 'code'
if(*p != '=')
return NULL;
STRIP(p); // remove '='
TRIMSPACES(p); //trim white-spaces after '='
/* copy the value until found a '-'
note: you must be control the size of it,
for avoid overflow. we allocated size, that's 256
or do subsequent calls to realloc()
*/
while(*p != '\0' && *p != '-')
*pbuf ++ = *p++;
// break;
}
p ++;
}
//put 0-terminator.
*pbuf ++ = '\0';
return buf;
}
//
}
You could use the sscanf() function:
int number;
sscanf(buffer, "CODE = %i", &number);
for that to work well your buffer has to be null terminated.
Another way to do it instead of sscanf():
char *input, *code;
input = strstr(buf, "CODE");
if(input == NULL) {
printf("Not found CODE=\n");
return -1;
}
code = strtok(strdup(input), "=");
if(code != NULL) {
code = strtok(NULL, "-");
printf("%s\n", code); // code = atoi(code);
} else {
//not found '='
}
Or more robust way.. a bit more complex:
#define TRIMSPACES(p) while(*p != '\0' && isspace((unsigned char)*p) != 0) ++p
#define NSTRIP(p, n) p += n
#define STRIP(p) ++p
char* getcode(const char *input, size_t limit)
{
char *p = (char*) input, *buf, *pbuf;
size_t i = 0;
while(*p != '\0') {
if(strncmp(p, "CODE", 3) == 0) {
NSTRIP(p, 4); //remove 'code'
TRIMSPACES(p);//trim all white-spaces after 'code'
/* check we have a '=' after CODE (without spaces).
if there is not, returns NULL
*/
if(*p != '=')
return NULL;
/* ok. We have.. now we don't need of it
just remove it from we output string.
*/
STRIP(p);
/* remove again all white-spaces after '=' */
TRIMSPACES(p);
/* the rest of string is not valid,
because are white-spaces values.
*/
if(*p == '\0')
return NULL;
/* allocate space for store the value
between code= and -.
this limit is set into second parameter.
*/
if((buf = malloc(limit)) == NULL)
return NULL;
/* copy the value until found a '-'
note: you must be control the size of it,
for don't overflow. we allocated 256 bytes.
if the string is greater it, do implementation with
subjecents call to realloc()
*/
pbuf = buf;
while(*p != '\0' && *p != '-' && i < limit) {
*pbuf ++ = *p++;
i ++;
}
*pbuf ++ = '\0';
return buf;
}
p ++;
}
return NULL;
}
And then:
char buf[255] = "foo baa CODE = 12345-MODE-12453-CODE-12355";
char *code = getcode(buf,256);
if(code != NULL) {
printf("code = %s\n", code);
free(code);
} else {
printf("Not found code.\n");
}
output:
code = 12345
Check out this online.
if you want to don't differentiate case, you can use the strncasecmp() that's POSIX function.
Assuming the CODE= part always comes at the beginning of the string, it's pretty easy:
sscanf(buffer, "CODE = %d", &number);
...but you want buffer to be char[255], not unsigned long.
Edit: If the CODE= part isn't necessarily at the beginning of the string, you can use strstr to find CODE in the buffer, do your sscanf starting from that point, then look immediately following that:
int codes[256];
char *pos = buffer;
size_t current = 0;
while ((pos=strstr(pos, "CODE")) != NULL) {
if (sscanf(pos, "CODE = %d", codes+current))
++current;
pos += 4;
}
Edit2:
For example, you'd use this something like this:
#include <stdio.h>
#include <string.h>
#include <windows.h>
int main ()
{
// This is full of other junk as well
char buffer[255]="CODE=12345 MODE-12453 CODE=12355" ;
int i;
int codes[256];
char *pos = buffer;
size_t current = 0;
while ((pos=strstr(pos, "CODE")) != NULL) {
if (sscanf(pos, "CODE = %d", codes+current))
++current;
pos += 4;
}
for (i=0; i<current; i++)
printf("%d\n", codes[i]);
return 0;
}
For me, this produces the following output:
12345
12355
...correctly reading the two "CODE=xxx" sections, but skipings over the "MODE=yyy" section.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *getcode(const char *str, const char *pattern){
//pattern: char is match, space is skip, * is collect
static const char *p=NULL;
char *retbuf, *pat;
int i, match, skip, patlen;
if(str != NULL) p=str;
if(p==NULL || *p=='\0') return NULL;
if(NULL==(retbuf=(char*)malloc((strlen(p)+1)*sizeof(char))))
return NULL;
pat = (char*)pattern;
patlen = strlen(pat);
i = match = skip = 0;
while(*p){
if(isspace(*p)){
++p;
++skip;
continue;
}
if(*pat){
if(*p == *pat){
++match;
++p;
++pat;
} else if(*pat == '*'){
++match;
retbuf[i++]=*p++;
++pat;
} else {
if(match){//reset
pat=(char*)pattern;
p -= match + skip -1;
i = match = skip = 0;
} else //next
++p;
}
} else {
break;
}
}
if(i){//has match
retbuf[i++]='\0';
retbuf=realloc(retbuf, i);
return retbuf;
} else {
free(retbuf);
return NULL;
}
}
int main (){
char *code;
code=getcode("CODE=12345-MODE-12453-CODE1-12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode(" CODE = 12345-MODE-12453-CODE1-12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode("CODE-12345-MODE-12453-CODE1-12355", "CODE=*****");
if(code==NULL)printf("not match\n");//not match
code=getcode("CODE=12345-MODE-12453-CODE=12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode(NULL, "CODE=*****");
printf("\"%s\"\n",code);//"12355"
free(code);
code=getcode("CODE=12345-MODE-12453-CODE1-12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode(NULL, "CODE1-*****");
printf("\"%s\"\n",code);//"12355"
free(code);
return 0;
}

how to remove extension from file name?

I want to throw the last three character from file name and get the rest?
I have this code:
char* remove(char* mystr) {
char tmp[] = {0};
unsigned int x;
for (x = 0; x < (strlen(mystr) - 3); x++)
tmp[x] = mystr[x];
return tmp;
}
Try:
char *remove(char* myStr) {
char *retStr;
char *lastExt;
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
strcpy (retStr, myStr);
lastExt = strrchr (retStr, '.');
if (lastExt != NULL)
*lastExt = '\0';
return retStr;
}
You'll have to free the returned string yourself. It simply finds the last . in the string and replaces it with a null terminator character. It will handle errors (passing NULL or running out of memory) by returning NULL.
It won't work with things like /this.path/is_bad since it will find the . in the non-file portion but you could handle this by also doing a strrchr of /, or whatever your path separator is, and ensuring it's position is NULL or before the . position.
A more general purpose solution to this problem could be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// remove_ext: removes the "extension" from a file spec.
// myStr is the string to process.
// extSep is the extension separator.
// pathSep is the path separator (0 means to ignore).
// Returns an allocated string identical to the original but
// with the extension removed. It must be freed when you're
// finished with it.
// If you pass in NULL or the new string can't be allocated,
// it returns NULL.
char *remove_ext (char* myStr, char extSep, char pathSep) {
char *retStr, *lastExt, *lastPath;
// Error checks and allocate string.
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
// Make a copy and find the relevant characters.
strcpy (retStr, myStr);
lastExt = strrchr (retStr, extSep);
lastPath = (pathSep == 0) ? NULL : strrchr (retStr, pathSep);
// If it has an extension separator.
if (lastExt != NULL) {
// and it's to the right of the path separator.
if (lastPath != NULL) {
if (lastPath < lastExt) {
// then remove it.
*lastExt = '\0';
}
} else {
// Has extension separator with no path separator.
*lastExt = '\0';
}
}
// Return the modified string.
return retStr;
}
int main (int c, char *v[]) {
char *s;
printf ("[%s]\n", (s = remove_ext ("hello", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/has.dot/in.path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', 0))); free (s);
return 0;
}
and this produces:
[hello]
[hello]
[hello]
[hello.txt]
[/no.dot/in_path]
[/has.dot/in]
[/no]
Use rindex to locate the "." character. If the string is writable, you can replace it with the string terminator char ('\0') and you're done.
char * rindex(const char *s, int c);
DESCRIPTION
The rindex() function locates the last character matching c (converted to a char) in the null-terminated string s.
If you literally just want to remove the last three characters, because you somehow know that your filename has an extension exactly three chars long (and you want to keep the dot):
char *remove_three(const char *filename) {
size_t len = strlen(filename);
char *newfilename = malloc(len-2);
if (!newfilename) /* handle error */;
memcpy(newfilename, filename, len-3);
newfilename[len - 3] = 0;
return newfilename;
}
Or let the caller provide the destination buffer (which they must ensure is long enough):
char *remove_three(char *dst, const char *filename) {
size_t len = strlen(filename);
memcpy(dst, filename, len-3);
dst[len - 3] = 0;
return dst;
}
If you want to generically remove a file extension, that's harder, and should normally use whatever filename-handling routines your platform provides (basename on POSIX, _wsplitpath_s on Windows) if there's any chance that you're dealing with a path rather than just the final part of the filename:
/* warning: may modify filename. To avoid this, take a copy first
dst may need to be longer than filename, for example currently
"file.txt" -> "./file.txt". For this reason it would be safer to
pass in a length with dst, and/or allow dst to be NULL in which
case return the length required */
void remove_extn(char *dst, char *filename) {
strcpy(dst, dirname(filename));
size_t len = strlen(dst);
dst[len] = '/';
dst += len+1;
strcpy(dst, basename(filename));
char *dot = strrchr(dst, '.');
/* retain the '.' To remove it do dot[0] = 0 */
if (dot) dot[1] = 0;
}
Come to think of it, you might want to pass dst+1 rather than dst to strrchr, since a filename starting with a dot maybe shouldn't be truncated to just ".". Depends what it's for.
I would try the following algorithm:
last_dot = -1
for each char in str:
if char = '.':
last_dot = index(char)
if last_dot != -1:
str[last_dot] = '\0'
Just replace the dot with "0". If you know that your extension is always 3 characters long you can just do:
char file[] = "test.png";
file[strlen(file) - 4] = 0;
puts(file);
This will output "test". Also, you shouldn't return a pointer to a local variable. The compiler will also warn you about this.
To get paxdiablo's second more general purpose solution to work in a C++ compiler I changed this line:
if ((retstr = malloc (strlen (mystr) + 1)) == NULL)
to:
if ((retstr = static_cast<char*>(malloc (strlen (mystr) + 1))) == NULL)
Hope this helps someone.
This should do the job:
char* remove(char* oldstr) {
int oldlen = 0;
while(oldstr[oldlen] != NULL){
++oldlen;
}
int newlen = oldlen - 1;
while(newlen > 0 && mystr[newlen] != '.'){
--newlen;
}
if (newlen == 0) {
newlen = oldlen;
}
char* newstr = new char[newlen];
for (int i = 0; i < newlen; ++i){
newstr[i] = oldstr[i];
}
return newstr;
}
Get location and just copy up to that location into a new char *.
i = 0;
n = 0;
while(argv[1][i] != '\0') { // get length of filename
i++; }
for(ii = 0; i > -1; i--) { // look for extension working backwards
if(argv[1][i] == '.') {
n = i; // char # of exension
break; } }
memcpy(new_filename, argv[1], n);
This is simple way to change extension name.
....
char outputname[255]
sscanf(inputname,"%[^.]",outputname); // foo.bar => foo
sprintf(outputname,"%s.txt",outputname) // foo.txt <= foo
....
With configurable minimum file length and configurable maximum extension length. Returns index where extension was changed to null character, or -1 if no extension was found.
int32_t strip_extension(char *in_str)
{
static const uint8_t name_min_len = 1;
static const uint8_t max_ext_len = 4;
/* Check chars starting at end of string to find last '.' */
for (ssize_t i = sizeof(in_str); i > (name_min_len + max_ext_len); i--)
{
if (in_str[i] == '.')
{
in_str[i] = '\0';
return i;
}
}
return -1;
}
I use this code:
void remove_extension(char* s) {
char* dot = 0;
while (*s) {
if (*s == '.') dot = s; // last dot
else if (*s == '/' || *s == '\\') dot = 0; // ignore dots before path separators
s++;
}
if (dot) *dot = '\0';
}
It handles the Windows path convention correctly (both / and \ can be path separators).

Parse out the file extension from a file-path in C

I was previously using the following code to determine if a file was an .exe or .o file and thus set binFile to 1:
if(strstr(fpath,".exe") != NULL || strstr(fpath,".o") != NULL)
binFile = 1;
Through debugging, I noticed that this method will also set binFile to 1 with files like foo.out or foo.execute. What I really want is to match '.exe\0' and '.o\0' but strstr() says it ignores the terminating NUL bytes. How should I go about this?
Thanks
#include <stdio.h>
#include <string.h>
int endswith(const char* haystack, const char* needle)
{
size_t hlen;
size_t nlen;
/* find the length of both arguments -
if needle is longer than haystack, haystack can't end with needle */
hlen = strlen(haystack);
nlen = strlen(needle);
if(nlen > hlen) return 0;
/* see if the end of haystack equals needle */
return (strcmp(&haystack[hlen-nlen], needle)) == 0;
}
int main(int argc, char** argv) {
if(argc != 3) {
printf("Usage: %s <string> <test-ending>\n", argv[0]);
return 1;
}
printf("Does \"%s\" end with \"%s\"? ", argv[1], argv[2]);
if(endswith(argv[1], argv[2])) {
printf("Yes!\n");
} else {
printf("No!\n");
}
return 0;
}
char *ext = strrchr(fpath, '.');
if (ext && (!strcmp(ext, ".exe") || !strcmp(ext, ".o")))
binfile = 1;
If your system has the BSD/POSIX strcasecmp, you should probably use that instead of strcmp.
int iLen = strlen(fpath);
if ((iLen >= 4 && strcmp(&fpath[iLen - 4], ".exe") == 0)
|| (iLen >= 2 && strcmp(&fpath[iLen - 2], ".o") == 0))
binfile = 1;
Edit added test on length, to handle very short file names.
You could check one past the result of strstr (taking into account the length of the search string) to see if it is NULL. Example:
const char* p = strstr(fpath,".exe");
if (p != NULL && *(p + 4 + 1) == 0) // 4 is the length of ".exe"; +1 should get you to \0
binFile = 1;
I like to get the extension and then check it.
char *suffix = strrchr(fpath,'.');
if (suffix)
{
suffix++;
if (!strcasecmp(suffix,"exe"))
{
// you got it
}
}
Incrementing suffix is okay since you know it points at a found a period at that point. Incrementing it will at worst make it point at the null termination character, which will not bother strcasecmp at all.
You can easily check against a list of extensions this way too.
one way of doing it:
char * suffix = fpath;
char * i = fpath;
while (*i++) {
if (*i == '.') {
suffix = i;
}
}
if (!strcmp(suffix, ".o") || !strcmp(suffix, ".exe")) {
/* do stuff */
}
You can also use _splitpath/_wsplitpath which is part of the CRT.
http://msdn.microsoft.com/en-us/library/e737s6tf(v=vs.80).aspx

Resources