Related
I am currently working on a spam filter program that takes in and reads text files. In the initializeTraining function, I call the preprocess function which reads in each string from each line in a given text-file.
Once the newDict function is executed from the line first=newDict(string, NULL);, the program, however, returns an error stating that there is a load of null pointer of type 'char' at line while(string[i] !='\0' && i<WORDLENGTH) { in the newDict function.
It seems that the preprocess function is returning null pointers despite the fact that it still takes in the passed-in strings from the text file. Is there something that I'm doing wrong in the preprocess function?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
WORDLENGTH is the length of the word in the linked list named as dictionary
MAILSEPARATOR is the totken to differentiate the mails that are included in one file.
it is also the token to update updated in linkedlist
*/
#define WORDLENGTH 20
#define MAILSEPARATOR "#####"
/*
define DEBUG as 0 to disable debug mode and to 1 to enable the mode.
*/
#define DEBUG 0
typedef struct dictionary dict;
typedef dict* word_dict;
typedef enum {false, true} bool;
/*
linked list, count is for the total word count and
occur is the numbers of the mails that had the word
*/
struct dictionary{
char word[WORDLENGTH];
int occur;
int count;
word_dict next;
bool updated;
};
// if there is no matching words after searching, create a new node
word_dict newDict(char *string, word_dict next){
word_dict target = (word_dict)malloc(sizeof(dict));
int i = 0;
while(string[i] !='\0' && i<WORDLENGTH) {
target->word[i] = string[i];
i++;
}
target->count = 1;
target->next = next;
target->occur = 1;
target->updated = true;
return target;
}
/*
preprocessor, convert string to lowercase
and trim the puctuations at the back
*/
char* preprocess(char* string){
#if DEBUG
printf("\nbefore preprocess, string: %s \n", string);
#endif
int i=0;
while(string[i] != '\0') { // convert to lower case
if (string[i] >= 65 && string[i] < 90) {
string[i] += 32;
i++;
}
while(true) {
i--;
if(i < 0) {
#if DEBUG
printf("word of only punctuations \n");
#endif
return NULL;
} else if((string[i] >= 97 && string[i] <= 122) || (string[i] >= 48 && string[i] <= 57)){
string[i+1]='\0';
break;
}
}
i=0;
while(true) {
if ((string[i] >= 97 && string[i] <= 122) || (string[i] >= 48 && string[i] <= 57)){
break;
} else {
string = &string[i+1];
}
i++;
}
}
#if DEBUG
printf("_after preprocess, string: %s\n", string);
#endif
return string;
}
/*
initialize training
reads the sample mails and creates a linked list of
the percentages of the words occuring in the sample mails
*/
word_dict initializeTraining(char* filename){
FILE *fp = NULL;
fp = fopen(filename, "r");
if(fp == NULL) {
printf("no file found\n");
return NULL;
}
char* string;
string = (char*)malloc(sizeof(char)*50);
word_dict first = NULL;
fscanf(fp, "%s\n", string);
string = preprocess(string);
first = newDict(string, NULL);
while(fscanf(fp,"%s", string) == 1) {
first = searchDict(string, first);
}
fclose(fp);
free(string);
return first;
}
/*
tests whether the mail is pam or not
takes the filename of the test mail,
returns true or false depending on the email's content
*/
bool bayesian_spam_filter(char * filename_for_test_email) {
word_dict spamDict=initializeTraining("spam.txt");
word_dict nonspamDict=initializeTraining("not_spam.txt");
#if DEBUG
printDict(spamDict);
printDict(nonspamDict);
#endif
FILE *stream=NULL;
stream = fopen(filename_for_test_email, "r");
if(stream == NULL){
printf("no file found\n");
return false;
}
char* string;
string = (char*)malloc(sizeof(char)*50);
int ps, pn; // probability of spam mail and non-spam mail
double prob = 0.5;
while(fscanf(stream,"%s", string) == 1){
char* tempString; // for handling the errors happening from string being null during preprocessing
tempString = preprocess(string);
if(tempString == NULL){
continue;
}
if((ps = searchTest(tempString, spamDict)) != 0) {
if((pn = searchTest(tempString, nonspamDict)) != 0) {
printf("ps:%3d, pn:%3d, %s\n", ps, pn, tempString);
prob = prob * (double) ps / ((prob* (double)ps + (1 - prob) * (double) pn));
printf("this prob: %.10f\n", prob);
}
}
}
//printf("%d, %d \n", pSProduct, pNProduct);
//proba=(float)(pSProduct/(pSProduct+pNProduct));
printf("Probability of mail being spam: %.10f\n", prob);
fclose(stream);
free(string);
if (prob > 0.9) {
return true;
}
return false;
}
It seems that the preprocess function is returning null pointers
Hardly surprising when it contains a line return NULL;. At that point, you should instead set the first character of the string to '\0' and return it since the surrounding code expects to get the string returned in all cases.
Another problem can be found in this section:
i=0;
while(true) {
if ((string[i] >= 97 && string[i] <= 122) || (string[i] >= 48 && string[i] <= 57)){
break;
} else {
string = &string[i+1];
}
i++;
}
Let's say the string is ".a" and go through the loop. Since the first character is not a letter, we do not break, instead we update the string pointer so that it points to "a" now. Then, we increment i. In the next iteration, string[i] is the null terminator, which is not a letter, so we continue. Since we are past the string's data, what follows is undefined behavior.
The simple fix for this is to not increment i and rather stick with [0] since you always want to remove from the beginning. The proper fix is to use i but without incrementing the string pointer, since you want to free it later – you must call free on the pointer returned by malloc, therefore modifying the pointer leads to undefined behavior! Instead of returning the string from preprocess, return the offset from the start (as counted by i), this way freeing the string later will work properly. The calling code would then look like this:
int offset = preprocess(string);
first = newDict(string + offset, NULL);
My str_split function returns (or at least I think it does) a char** - so a list of strings essentially. It takes a string parameter, a char delimiter to split the string on, and a pointer to an int to place the number of strings detected.
The way I did it, which may be highly inefficient, is to make a buffer of x length (x = length of string), then copy element of string until we reach delimiter, or '\0' character. Then it copies the buffer to the char**, which is what we are returning (and has been malloced earlier, and can be freed from main()), then clears the buffer and repeats.
Although the algorithm may be iffy, the logic is definitely sound as my debug code (the _D) shows it's being copied correctly. The part I'm stuck on is when I make a char** in main, set it equal to my function. It doesn't return null, crash the program, or throw any errors, but it doesn't quite seem to work either. I'm assuming this is what is meant be the term Undefined Behavior.
Anyhow, after a lot of thinking (I'm new to all this) I tried something else, which you will see in the code, currently commented out. When I use malloc to copy the buffer to a new string, and pass that copy to aforementioned char**, it seems to work perfectly. HOWEVER, this creates an obvious memory leak as I can't free it later... so I'm lost.
When I did some research I found this post, which follows the idea of my code almost exactly and works, meaning there isn't an inherent problem with the format (return value, parameters, etc) of my str_split function. YET his only has 1 malloc, for the char**, and works just fine.
Below is my code. I've been trying to figure this out and it's scrambling my brain, so I'd really appreciate help!! Sorry in advance for the 'i', 'b', 'c' it's a bit convoluted I know.
Edit: should mention that with the following code,
ret[c] = buffer;
printf("Content of ret[%i] = \"%s\" \n", c, ret[c]);
it does indeed print correctly. It's only when I call the function from main that it gets weird. I'm guessing it's because it's out of scope ?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define DEBUG
#ifdef DEBUG
#define _D if (1)
#else
#define _D if (0)
#endif
char **str_split(char[], char, int*);
int count_char(char[], char);
int main(void) {
int num_strings = 0;
char **result = str_split("Helo_World_poopy_pants", '_', &num_strings);
if (result == NULL) {
printf("result is NULL\n");
return 0;
}
if (num_strings > 0) {
for (int i = 0; i < num_strings; i++) {
printf("\"%s\" \n", result[i]);
}
}
free(result);
return 0;
}
char **str_split(char string[], char delim, int *num_strings) {
int num_delim = count_char(string, delim);
*num_strings = num_delim + 1;
if (*num_strings < 2) {
return NULL;
}
//return value
char **ret = malloc((*num_strings) * sizeof(char*));
if (ret == NULL) {
_D printf("ret is null.\n");
return NULL;
}
int slen = strlen(string);
char buffer[slen];
/* b is the buffer index, c is the index for **ret */
int b = 0, c = 0;
for (int i = 0; i < slen + 1; i++) {
char cur = string[i];
if (cur == delim || cur == '\0') {
_D printf("Copying content of buffer to ret[%i]\n", c);
//char *tmp = malloc(sizeof(char) * slen + 1);
//strcpy(tmp, buffer);
//ret[c] = tmp;
ret[c] = buffer;
_D printf("Content of ret[%i] = \"%s\" \n", c, ret[c]);
//free(tmp);
c++;
b = 0;
continue;
}
//otherwise
_D printf("{%i} Copying char[%c] to index [%i] of buffer\n", c, cur, b);
buffer[b] = cur;
buffer[b+1] = '\0'; /* extend the null char */
b++;
_D printf("Buffer is now equal to: \"%s\"\n", buffer);
}
return ret;
}
int count_char(char base[], char c) {
int count = 0;
int i = 0;
while (base[i] != '\0') {
if (base[i++] == c) {
count++;
}
}
_D printf("Found %i occurence(s) of '%c'\n", count, c);
return count;
}
You are storing pointers to a buffer that exists on the stack. Using those pointers after returning from the function results in undefined behavior.
To get around this requires one of the following:
Allow the function to modify the input string (i.e. replace delimiters with null-terminator characters) and return pointers into it. The caller must be aware that this can happen. Note that supplying a string literal as you are doing here is illegal in C, so you would instead need to do:
char my_string[] = "Helo_World_poopy_pants";
char **result = str_split(my_string, '_', &num_strings);
In this case, the function should also make it clear that a string literal is not acceptable input, and define its first parameter as const char* string (instead of char string[]).
Allow the function to make a copy of the string and then modify the copy. You have expressed concerns about leaking this memory, but that concern is mostly to do with your program's design rather than a necessity.
It's perfectly valid to duplicate each string individually and then clean them all up later. The main issue is that it's inconvenient, and also slightly pointless.
Let's address the second point. You have several options, but if you insist that the result be easily cleaned-up with a call to free, then try this strategy:
When you allocate the pointer array, also make it large enough to hold a copy of the string:
// Allocate storage for `num_strings` pointers, plus a copy of the original string,
// then copy the string into memory immediately following the pointer storage.
char **ret = malloc((*num_strings) * sizeof(char*) + strlen(string) + 1);
char *buffer = (char*)&ret[*num_strings];
strcpy(buffer, string);
Now, do all your string operations on buffer. For example:
// Extract all delimited substrings. Here, buffer will always point at the
// current substring, and p will search for the delimiter. Once found,
// the substring is terminated, its pointer appended to the substring array,
// and then buffer is pointed at the next substring, if any.
int c = 0;
for(char *p = buffer; *buffer; ++p)
{
if (*p == delim || !*p) {
char *next = p;
if (*p) {
*p = '\0';
++next;
}
ret[c++] = buffer;
buffer = next;
}
}
When you need to clean up, it's just a single call to free, because everything was stored together.
The string pointers you store into the res with ret[c] = buffer; array point to an automatic array that goes out of scope when the function returns. The code subsequently has undefined behavior. You should allocate these strings with strdup().
Note also that it might not be appropriate to return NULL when the string does not contain a separator. Why not return an array with a single string?
Here is a simpler implementation:
#include <stdlib.h>
char **str_split(const char *string, char delim, int *num_strings) {
int i, n, from, to;
char **res;
for (n = 1, i = 0; string[i]; i++)
n += (string[i] == delim);
*num_strings = 0;
res = malloc(sizeof(*res) * n);
if (res == NULL)
return NULL;
for (i = from = to = 0;; from = to + 1) {
for (to = from; string[to] != delim && string[to] != '\0'; to++)
continue;
res[i] = malloc(to - from + 1);
if (res[i] == NULL) {
/* allocation failure: free memory allocated so far */
while (i > 0)
free(res[--i]);
free(res);
return NULL;
}
memcpy(res[i], string + from, to - from);
res[i][to - from] = '\0';
i++;
if (string[to] == '\0')
break;
}
*num_strings = n;
return res;
}
I have buffer problem on this line
strcpy_s(*(pWords + word_count), word_length, pWord);
I'm trying to read a file from argv[1] and print out every single word in that file and their occurrence, but I can't figure out whats wrong..?!?
int main(int argc, char* argv[])
{
char *delimiters = argv[2]; // Prose delimiters
char buf[BUF_LEN]; // Buffer for a line of keyboard input
size_t str_size = INIT_STR_EXT; // Current memory to store prose
char* filePath = argv[1];
FILE *fP ;
char* pStr = malloc(str_size); // Pointer to prose to be tokenized
*pStr = '\0'; // Set 1st character to null
fopen_s(&fP, filePath, "r");
fread(buf, BUF_LEN, 10, fP);
size_t maxWords = 10; // Current maximum word count
int word_count = 0; // Current word count
size_t word_length = 0; // Current word length
char** pWords = calloc(maxWords, sizeof(char*)); // Stores pointers to the words
int* pnWord = calloc(maxWords, sizeof(int)); // Stores count for each word
size_t str_len = strnlen_s(buf, BUF_LEN); // Length used by strtok_s()
char* ptr = NULL; // Pointer used by strtok_s()
char* pWord = strtok_s(buf, delimiters, &ptr); // Find 1st word
if (!pWord)
{
printf("No words found. Ending program.\n");
return 1;
}
bool new_word = true; // False for an existing word
while (pWord)
{
// Check for existing word
for (int i = 0; i < word_count; ++i)
if (strcmp(*(pWords + i), pWord) == 0)
{
++*(pnWord + i);
new_word = false;
break;
}
if (new_word) // Not NULL if new word
{
//Check for sufficient memory
if (word_count == maxWords)
{ // Get more space for pointers to words
maxWords += WORDS_INCR;
pWords = realloc(pWords, maxWords*sizeof(char*));
// Get more space for word counts
pnWord = realloc(pnWord, maxWords*sizeof(int));
}
// Found a new word so get memory for it and copy it there
word_length = ptr - pWord; // Length of new word
*(pWords + word_count) = malloc(word_length);
strcpy_s(*(pWords + word_count), word_length, pWord); // Copy to array
*(pnWord + word_count++) = 1; // Increment word count
}
else
new_word = true; // Reset new word flag
pWord = strtok_s(NULL, delimiters, &ptr); // Find subsequent word
}
strcpy_s adds a null byte to the end of the string. You need to malloc(word_length+1).
There are two problems with this line:
fread(buf, BUF_LEN, 10, fP);
Firstly the buffer is too small by a factor of 10 as you read 10 elements.
Second, it does not read the file further than BUF_LEN (previously, *10).
Also the code does not take care of newline chars, as I cannot pass that in argv[2] delimiter spec, even as " \\n".
I suggest you replace fread() with a loop of fgets(), and redefine the word delimiters.
#define BUF_LEN 1000 // plenty of room
...
char buf[BUF_LEN+1]; // allow for 0 terminator
char delimiters[] = " \n\t"; // predefined
...
//size_t str_len = strnlen_s(buf, BUF_LEN); // unnecessary
while (fgets(buf, BUF_LEN, fP) != NULL) { // new outer loop
char* ptr = NULL; // carry on as you were
...
}
Next, as others commented, increase the string space allocation
*(pWords + word_count) = malloc(word_length+1);
In addition, although you have used the "safe" string functions, you did not check argc or the result of any of fopen_s(), fread(), malloc(), calloc(), realloc(), nor have you closed the file or released memory.
Looks to me like you forgot to get an additional byte for the 0 character.
Despite that: Instead of allocating a fixed buffer size for your file, you could get the filesize with fseek using SEEK_END and an offset of 0 to allocate that much memory+1 byte
I want to read input from user using C program. I don't want to use array like,
char names[50];
because if the user gives string of length 10, then the remaining spaces are wasted.
If I use character pointer like,
char *names;
then I need to allocate memory for that in such a way of,
names = (char *)malloc(20 * sizeof(char));
In this case also, there is a possibility of memory wastage.
So, what I need is to dynamically allocate memory for a string which is of exactly same as the length of the string.
Lets assume,
If the user input is "stackoverflow", then the memory allocated should be of 14 (i.e. Length of the string = 13 and 1 additional space for '\0').
How could I achieve this?
Read one character at a time (using getc(stdin)) and grow the string (realloc) as you go.
Here's a function I wrote some time ago. Note it's intended only for text input.
char *getln()
{
char *line = NULL, *tmp = NULL;
size_t size = 0, index = 0;
int ch = EOF;
while (ch) {
ch = getc(stdin);
/* Check if we need to stop. */
if (ch == EOF || ch == '\n')
ch = 0;
/* Check if we need to expand. */
if (size <= index) {
size += CHUNK;
tmp = realloc(line, size);
if (!tmp) {
free(line);
line = NULL;
break;
}
line = tmp;
}
/* Actually store the thing. */
line[index++] = ch;
}
return line;
}
You could have an array that starts out with 10 elements. Read input character by character. If it goes over, realloc another 5 more. Not the best, but then you can free the other space later.
You can also use a regular expression, for instance the following piece of code:
char *names
scanf("%m[^\n]", &names)
will get the whole line from stdin, allocating dynamically the amount of space that it takes. After that, of course, you have to free names.
If you ought to spare memory, read char by char and realloc each time. Performance will die, but you'll spare this 10 bytes.
Another good tradeoff is to read in a function (using a local variable) then copying. So the big buffer will be function scoped.
Below is the code for creating dynamic string :
void main()
{
char *str, c;
int i = 0, j = 1;
str = (char*)malloc(sizeof(char));
printf("Enter String : ");
while (c != '\n') {
// read the input from keyboard standard input
c = getc(stdin);
// re-allocate (resize) memory for character read to be stored
str = (char*)realloc(str, j * sizeof(char));
// store read character by making pointer point to c
str[i] = c;
i++;
j++;
}
str[i] = '\0'; // at the end append null character to mark end of string
printf("\nThe entered string is : %s", str);
free(str); // important step the pointer declared must be made free
}
First, define a new function to read the input (according to the structure of your input) and store the string, which means the memory in stack used. Set the length of string to be enough for your input.
Second, use strlen to measure the exact used length of string stored before, and malloc to allocate memory in heap, whose length is defined by strlen. The code is shown below.
int strLength = strlen(strInStack);
if (strLength == 0) {
printf("\"strInStack\" is empty.\n");
}
else {
char *strInHeap = (char *)malloc((strLength+1) * sizeof(char));
strcpy(strInHeap, strInStack);
}
return strInHeap;
Finally, copy the value of strInStack to strInHeap using strcpy, and return the pointer to strInHeap. The strInStack will be freed automatically because it only exits in this sub-function.
This is a function snippet I wrote to scan the user input for a string and then store that string on an array of the same size as the user input. Note that I initialize j to the value of 2 to be able to store the '\0' character.
char* dynamicstring() {
char *str = NULL;
int i = 0, j = 2, c;
str = (char*)malloc(sizeof(char));
//error checking
if (str == NULL) {
printf("Error allocating memory\n");
exit(EXIT_FAILURE);
}
while((c = getc(stdin)) && c != '\n')
{
str[i] = c;
str = realloc(str,j*sizeof(char));
//error checking
if (str == NULL) {
printf("Error allocating memory\n");
free(str);
exit(EXIT_FAILURE);
}
i++;
j++;
}
str[i] = '\0';
return str;
}
In main(), you can declare another char* variable to store the return value of dynamicstring() and then free that char* variable when you're done using it.
Here's a snippet which I wrote which performs the same functionality.
This code is similar to the one written by Kunal Wadhwa.
char *dynamicCharString()
{
char *str, c;
int i = 0;
str = (char*)malloc(1*sizeof(char));
while(c = getc(stdin),c!='\n')
{
str[i] = c;
i++;
realloc(str,i*sizeof(char));
}
str[i] = '\0';
return str;
}
char* load_string()
{
char* string = (char*) malloc(sizeof(char));
*string = '\0';
int key;
int sizer = 2;
char sup[2] = {'\0'};
while( (key = getc(stdin)) != '\n')
{
string = realloc(string,sizer * sizeof(char));
sup[0] = (char) key;
strcat(string,sup);
sizer++
}
return string;
}
int main()
{
char* str;
str = load_string();
return 0;
}
realloc is a pretty expensive action...
here's my way of receiving a string, the realloc ratio is not 1:1 :
char* getAString()
{
//define two indexes, one for logical size, other for physical
int logSize = 0, phySize = 1;
char *res, c;
res = (char *)malloc(sizeof(char));
//get a char from user, first time outside the loop
c = getchar();
//define the condition to stop receiving data
while(c != '\n')
{
if(logSize == phySize)
{
phySize *= 2;
res = (char *)realloc(res, sizeof(char) * phySize);
}
res[logSize++] = c;
c = getchar();
}
//here we diminish string to actual logical size, plus one for \0
res = (char *)realloc(res, sizeof(char *) * (logSize + 1));
res[logSize] = '\0';
return res;
}
I want to throw the last three character from file name and get the rest?
I have this code:
char* remove(char* mystr) {
char tmp[] = {0};
unsigned int x;
for (x = 0; x < (strlen(mystr) - 3); x++)
tmp[x] = mystr[x];
return tmp;
}
Try:
char *remove(char* myStr) {
char *retStr;
char *lastExt;
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
strcpy (retStr, myStr);
lastExt = strrchr (retStr, '.');
if (lastExt != NULL)
*lastExt = '\0';
return retStr;
}
You'll have to free the returned string yourself. It simply finds the last . in the string and replaces it with a null terminator character. It will handle errors (passing NULL or running out of memory) by returning NULL.
It won't work with things like /this.path/is_bad since it will find the . in the non-file portion but you could handle this by also doing a strrchr of /, or whatever your path separator is, and ensuring it's position is NULL or before the . position.
A more general purpose solution to this problem could be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// remove_ext: removes the "extension" from a file spec.
// myStr is the string to process.
// extSep is the extension separator.
// pathSep is the path separator (0 means to ignore).
// Returns an allocated string identical to the original but
// with the extension removed. It must be freed when you're
// finished with it.
// If you pass in NULL or the new string can't be allocated,
// it returns NULL.
char *remove_ext (char* myStr, char extSep, char pathSep) {
char *retStr, *lastExt, *lastPath;
// Error checks and allocate string.
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
// Make a copy and find the relevant characters.
strcpy (retStr, myStr);
lastExt = strrchr (retStr, extSep);
lastPath = (pathSep == 0) ? NULL : strrchr (retStr, pathSep);
// If it has an extension separator.
if (lastExt != NULL) {
// and it's to the right of the path separator.
if (lastPath != NULL) {
if (lastPath < lastExt) {
// then remove it.
*lastExt = '\0';
}
} else {
// Has extension separator with no path separator.
*lastExt = '\0';
}
}
// Return the modified string.
return retStr;
}
int main (int c, char *v[]) {
char *s;
printf ("[%s]\n", (s = remove_ext ("hello", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/has.dot/in.path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', 0))); free (s);
return 0;
}
and this produces:
[hello]
[hello]
[hello]
[hello.txt]
[/no.dot/in_path]
[/has.dot/in]
[/no]
Use rindex to locate the "." character. If the string is writable, you can replace it with the string terminator char ('\0') and you're done.
char * rindex(const char *s, int c);
DESCRIPTION
The rindex() function locates the last character matching c (converted to a char) in the null-terminated string s.
If you literally just want to remove the last three characters, because you somehow know that your filename has an extension exactly three chars long (and you want to keep the dot):
char *remove_three(const char *filename) {
size_t len = strlen(filename);
char *newfilename = malloc(len-2);
if (!newfilename) /* handle error */;
memcpy(newfilename, filename, len-3);
newfilename[len - 3] = 0;
return newfilename;
}
Or let the caller provide the destination buffer (which they must ensure is long enough):
char *remove_three(char *dst, const char *filename) {
size_t len = strlen(filename);
memcpy(dst, filename, len-3);
dst[len - 3] = 0;
return dst;
}
If you want to generically remove a file extension, that's harder, and should normally use whatever filename-handling routines your platform provides (basename on POSIX, _wsplitpath_s on Windows) if there's any chance that you're dealing with a path rather than just the final part of the filename:
/* warning: may modify filename. To avoid this, take a copy first
dst may need to be longer than filename, for example currently
"file.txt" -> "./file.txt". For this reason it would be safer to
pass in a length with dst, and/or allow dst to be NULL in which
case return the length required */
void remove_extn(char *dst, char *filename) {
strcpy(dst, dirname(filename));
size_t len = strlen(dst);
dst[len] = '/';
dst += len+1;
strcpy(dst, basename(filename));
char *dot = strrchr(dst, '.');
/* retain the '.' To remove it do dot[0] = 0 */
if (dot) dot[1] = 0;
}
Come to think of it, you might want to pass dst+1 rather than dst to strrchr, since a filename starting with a dot maybe shouldn't be truncated to just ".". Depends what it's for.
I would try the following algorithm:
last_dot = -1
for each char in str:
if char = '.':
last_dot = index(char)
if last_dot != -1:
str[last_dot] = '\0'
Just replace the dot with "0". If you know that your extension is always 3 characters long you can just do:
char file[] = "test.png";
file[strlen(file) - 4] = 0;
puts(file);
This will output "test". Also, you shouldn't return a pointer to a local variable. The compiler will also warn you about this.
To get paxdiablo's second more general purpose solution to work in a C++ compiler I changed this line:
if ((retstr = malloc (strlen (mystr) + 1)) == NULL)
to:
if ((retstr = static_cast<char*>(malloc (strlen (mystr) + 1))) == NULL)
Hope this helps someone.
This should do the job:
char* remove(char* oldstr) {
int oldlen = 0;
while(oldstr[oldlen] != NULL){
++oldlen;
}
int newlen = oldlen - 1;
while(newlen > 0 && mystr[newlen] != '.'){
--newlen;
}
if (newlen == 0) {
newlen = oldlen;
}
char* newstr = new char[newlen];
for (int i = 0; i < newlen; ++i){
newstr[i] = oldstr[i];
}
return newstr;
}
Get location and just copy up to that location into a new char *.
i = 0;
n = 0;
while(argv[1][i] != '\0') { // get length of filename
i++; }
for(ii = 0; i > -1; i--) { // look for extension working backwards
if(argv[1][i] == '.') {
n = i; // char # of exension
break; } }
memcpy(new_filename, argv[1], n);
This is simple way to change extension name.
....
char outputname[255]
sscanf(inputname,"%[^.]",outputname); // foo.bar => foo
sprintf(outputname,"%s.txt",outputname) // foo.txt <= foo
....
With configurable minimum file length and configurable maximum extension length. Returns index where extension was changed to null character, or -1 if no extension was found.
int32_t strip_extension(char *in_str)
{
static const uint8_t name_min_len = 1;
static const uint8_t max_ext_len = 4;
/* Check chars starting at end of string to find last '.' */
for (ssize_t i = sizeof(in_str); i > (name_min_len + max_ext_len); i--)
{
if (in_str[i] == '.')
{
in_str[i] = '\0';
return i;
}
}
return -1;
}
I use this code:
void remove_extension(char* s) {
char* dot = 0;
while (*s) {
if (*s == '.') dot = s; // last dot
else if (*s == '/' || *s == '\\') dot = 0; // ignore dots before path separators
s++;
}
if (dot) *dot = '\0';
}
It handles the Windows path convention correctly (both / and \ can be path separators).