Does a string contains a word from a list - c

I have a string and an array of keywords. If the string contains one of the keywords on the list, I want to check if the keyword is the only element of this string. If it's not, I want to return an error. Last thing, the string will always end with \n.
My keyword array is the following:
const char * keywordsTable[] =
{
"INIT",
"BEGIN",
"END",
"ROUTINES",
"ENDROUTINES",
"ENDWHEN",
"WHEN",
"WHILE"
};
For instance if my string is "BEGIN\n", everything is fine. If my string is "BEGIN FOO\n" or "FOO BEGIN\n", I have to return an error. Finally if my string is "BEGINFOO\n", everything is fine. (error code is 1, else it's 0)
I've tried something (I don't know how to proceed):
int CheckKeyword(char * str)
{
int nKeywords = sizeof(keywordsTable) / sizeof(keywordsTable[0]);
char * strTok = NULL;
char * keywrdWithLF = malloc(20);
// I don't want to check for the last two keywords nor the first
for (int i = 1; i < nKeywords - 2; i++)
{
strcpy_s(keywrdWithLF, 20, keywordsTable[i]);
strcat_s(keywrdWithLF, 20, "\n");
strTok = strstr(str, keywrdWithLF);
// If my string contains a keyword
if (strTok != NULL)
{
// If the string contains other characters... and I'm stuck
if (strcmp(str, keywrdWithLF))
{
}
else
{
free(keywrdWithLF);
return 1;
}
}
}
free(keywrdWithLF);
return 0;
}
Thank you in advance (please don't complain bout my indent style, I have to use Whitesmith indent) !

int CheckKeyword(char * str)
{
int nKeywords = sizeof(keywordsTable) / sizeof(keywordsTable[0]);
char * strTok = NULL;
for (int i = 1; i < nKeywords - 2; i++)
{
if(NULL!=(strTok = strstr(str, keywordsTable[i])))
{
int len = strlen(keywordsTable[i]);
if(strTok == str)
{
if(str[len]==' ' || str[len]=='\t')
return 1;
}
else
{
if((strTok[-1]==' ' || strTok[-1]=='\t') && isspace(strTok[len]))//isspace in <ctype.h>
return 1;
}
}
}
return 0;
}

Perhaps another method?
int CheckKeyword(char * str)
{
int rCode=0;
int nKeywords = sizeof(keywordsTable) / sizeof(keywordsTable[0]);
char *keyword;
char *cp = keywordsTable;
I assume that since str is defined as "char * str" and not "const char * str", it is OK to modify the input string. Hence, why not just eliminate the '\n' problem from the equation?
/* Elininate the newline character from the end of the string. */
if((cp = strchr(str, '\n'))
*cp = \0;
// I don't want to check for the last two keywords nor the first.
nKeywords -= 3;
++keyword;
/* Loop through the keywords. */
while(nKeywords)
{
// "I want to check if the keyword is the only element of this string."
// "If it's not, I want to return an error."
if((cp=strstr(str, keyword))
{
/* Check for stuff prior to the keyword. */
if(cp != str)
rCode=1;
/* Check for stuff after the keyword. */
// Finally if my string is "BEGINFOO\n", everything is fine.
if(' ' == str[strlen[keyword])
rCode=1;
if(strcmp(cp, keyword))
rCode=1
break;
}
++keyword;
--nKeywords;
}
return(rCode);
}

Related

program adds on a string seemingly out of nowhere

I'm writing a code that reads in an input file from stdin, and outputs the exact same content (to stdout) except that replaces any words found in the "dictionary" in the following way, in the exact order:
if the word exactly as it is stored as a key in dictionary appears, then find the corresponding value pair and print that out instead.
if the word that's capitalized properly (eg., Thomas, with capital first letter and lowercase for everything else) is a valid key in the dictionary, print out the corresponding value pair instead
if the lowercased version is a valid key, print out its corresponding value
if there are no matches, just print things out as they are.
(All non-alphabetical characters are just printed out "normally".)
A problem I've been having though is that when I'm doing (2), somehow a character ('U') gets tagged on to the end of the "string" or copy2 array when I'm testing "IPSUM" (all cap).
For instance, see this output:
My outputs are in the lines with "<", and the ">" indicate what should've been. Based on the order I'm checking things, since IPSUM is not in the dictionary (see the end of this post for contents of dictionary), it goes to (2) where IPSUM should become Ipsum, and it should print out the corresponding value of Ipsum. But instead I get IpsumU, and so the dictionary doesn't recognize the word. But I'm not sure where the 'U' is coming from, since the input is exactly
IPSUM (all cap).
Could anyone help me figure out what might be wrong with my code?
//for reference:
typedef struct HashBucketEntry {
void *key;
void *data;
struct HashBucketEntry *next;
} HashBucketEntry;
typedef struct HashTable {
int size;
unsigned int (*hashFunction)(void *);
int (*equalFunction)(void*, void*);
HashBucketEntry **buckets;
} HashTable;
//We have a Hashtable *dictionary.
void processInput() {
//char c;
int c;
int i = 0;
//char * word = (char *) malloc(60 * sizeof(char));
char word[60];
while (c = getchar()) {
if (isalpha(c)) {
word[i] = c;
i++;
} else {
word[i] = '\0';
if (word[0] != '\0') {
//char * copy = (char *) malloc(60 * sizeof(char));
char copy[60];
strcpy(copy, word);
unsigned int location = (dictionary->hashFunction)(copy) % (dictionary->size);
char * word_in_dict;
if (dictionary->buckets[location] != NULL) {
word_in_dict = (char *) dictionary->buckets[location]->data;
} else {
word_in_dict = NULL;
}
char copy2[60];
copy2[0] = toupper(copy[0]);
for(int i = 1; copy[i]; i++){
copy2[i] = tolower(copy[i]);
}
unsigned int location2 = (dictionary->hashFunction)(copy2) % (dictionary->size);
char * word_in_dict2;
if (dictionary->buckets[location2] != NULL) { //somehow this is NULL when IPSUM, even though copy2 has correct string
word_in_dict2 = (char *) dictionary->buckets[location2]->data;
} else {
word_in_dict2 = NULL;
}
char copy3[60];
for(int i = 0; copy[i]; i++){
copy3[i] = tolower(copy[i]);
}
unsigned int location3 = (dictionary->hashFunction)(copy3) % (dictionary->size);
char * word_in_dict3;
if (dictionary->buckets[location3] != NULL) {
word_in_dict3 = (char *) dictionary->buckets[location3]->data;
} else {
word_in_dict3 = NULL;
}
if (word_in_dict != NULL) {
fprintf(stdout, "%s", word_in_dict);
} else if (word_in_dict2 != NULL) {
fprintf(stdout, "%s", word_in_dict2);
} else if (word_in_dict3 != NULL) {
fprintf(stdout, "%s", word_in_dict3);
} else {
//fprintf(stdout, "%s", copy);
printf("%s", copy);
}
putchar(c);
i = 0;
} else if (c != EOF) {
putchar(c);
} else {
break;
}
}
}
}
The dictionary contains only these entries:
ipsum i%##!
fubar fubar
IpSum XXXXX24
Ipsum YYYYY211
Any help would be really appreciated!
Update in response to the answer:
I changed the code for copy2 to this:
for(j = 1; j < strlen(copy); j++) {
if (j < sizeof(copy2)) {
copy2[j] = tolower(copy[j]);
}
}
(and did a similar thing to copy3). The second case works, but now the third case fails; things only seem to work if I change the second case but not the third case. Does anyone know why this is the case?
Your code to creaty modified copies of your input strings, e.g.
char copy2[60];
copy2[0] = toupper(copy[0]);
for(int i = 1; copy[i]; i++){
copy2[i] = tolower(copy[i]);
}
does not copy the terminating '\0'. As automatic variables are not implicitly initialized, the corresponding memory may contain any data (from prevous loop cycles or from unrelated code) which may appear as trailing characters. You must append a '\0' character after the last character of your string.
This error may result in out-of-bounds access to the array when you access it as a string if there is no '\0' within the array bounds. (undefined behavior)
Your code itself might result in out-of-bounds access if the input string is too long. You should add a check to prevent access to array elements at i >= sizeof(copy2).
I suggest something like this:
char copy2[60];
copy2[0] = toupper(copy[0]);
/* avoid reading past the end of an empty string */
if(copy[0]) {
for(int i = 1; copy[i] && (i < sizeof(copy)-1); i++){
copy2[i] = tolower(copy[i]);
}
/* variable i will already be incremented here */
copy2[i] = '\0';
}
Edit as a response to a question in a comment:
You cannot combine strcpy and tolower, but you can copy the string first and modify the characters in-place afterwards.
Example:
char copy2[60];
if(strlen(copy) y sizeof(copy2)) {
strcpy(copy2, copy);
copy2[0] = toupper(copy2[0]);
if(copy[0]) {
/* The length has been checked before, no need to check again here */
for(int i = 1; copy[i]; i++) {
copy2[i] = tolower(copy2[i]);
}
/* the string is already terminated */
}
} else {
/* string too long, handle error */
}
or with truncating instead of reporting an error
char copy2[60];
strncpy(copy2, copy, sizeof(copy)-1);
copy2[sizeof(copy2)-1] = '\0';
copy2[0] = toupper(copy2[0]);
if(copy[0]) {
/* A long string would have been truncated before, no need to check the length here */
for(int i = 1; copy[i]; i++) {
copy2[i] = tolower(copy2[i]);
}
/* the string is already terminated */
}

How to detect if user inserts data with commas (in a desired format) or not?

User inserts data in a format:" [NAME], [SURNAME], [INDEX] ". Errors codes:
0 -- everything is loaded to the structure properly
1 -- not loaded to structure properly (user did not use commas or p->name went wrong)
2 -- only name loaded properly
3 -- name and surname loaded properly (index went wrong)
struct student_t
{
char name[20];
char surname[40];
int index;
};
exapmples:
input: John, Deep
err_code: 2
input: John, Deep, 999
err_code: 0
input: NULL //(nothing)
err_code: 1
... so I'm unable to detect if user inserts for example: "John, Deep" (err 2) or "John, Deep, "(err 3) or "John, " (err 2),..(it results in err 1; or if everything is fine, err 0)
My attempts: //edit1: working version using this approach, further below this one.
char buffer[1024], *pch1, *pch2;
if (fgets(buffer,1024, stdin)!=NULL)
{
pch1=strchr(buffer, ',');
pch2=strrchr(buffer, ',');
if (pch1!=pch2 && pch1!=NULL) //detects if inserted are 2 different commas
{
char *name = strtok(buffer,","); // returns pointer to the beginning of the token
if (name) {//the place where "," occurs, becomes a "NULL" character
sscanf(name," %19s", p->name); // skip leading spaces
char *surname = strtok(NULL,",");
if (surname) {
sscanf(surname," %39s", p->surname); // skip leading spaces
char *index = strtok(NULL,",");
if (index) {
p->index = (int)strtol(index, NULL, 10);
} else {*err_code=3; return NULL;} //only NAME and SURNAME correctly, INDEX is loaded wrong
} else {*err_code=2; return NULL;} //only NAME loaded correctly
}
} else if (pch1==pch2 && pch1!=NULL)
{//but if there is 1 comma, input may be like: "John, Deep" so name'd be ok
char *name = strtok(buffer,",");
if (name) {
sscanf(name," %19s", p->name);
char *surname = strtok(NULL,",");
if (surname) {
sscanf(surname," %39s", p->surname);
char *index = strtok(NULL,",");
if (index) {
p->index = (int)strtol(index, NULL, 10);
}else if (p->index==0||p->index==0||p->index==' ') {*err_code=2; return NULL;}
}
}
} else {*err_code=1; return NULL;} //if there were 0 commas, err_code=1
}
if (p==NULL || p->name==NULL)
{
*err_code=1;
return NULL;
}
if (p->surname && p->name==NULL)
{
*err_code=2;
return NULL;
}
//because the above "if" construction didn't work, I added another one here:
if (p->index==NULL || p->index==0) //so if input was "John, Deep" then p->index should be NULL?
{
*err_code=3;
return NULL;
}
//edit1: Okay, this code works for me, everything goes as enwisaged. However it is very messy, so I'll try to adopt and use answers in another versions...
char buffer[1024], *pch1, *pch2;
if (fgets(buffer,1024, stdin)!=NULL)
{
pch1=strchr(buffer, ',');
pch2=strrchr(buffer, ',');
if (pch1!=pch2 && pch1!=NULL)
{
char *name = strtok(buffer,","); // returns pointer to the beginning of the token
if (name) { //the place where "," is occured becomes a "NULL" character
sscanf(name," %19s", p->name); // skip leading spaces
char *surname = strtok(NULL,",");
if (surname) {
sscanf(surname," %39[^\t\n]", p->surname); // skip leading spaces
char *index = strtok(NULL,",");
if (index) {
p->index = (int)strtol(index, NULL, 10);
if (p->index==0) {*err_code=3; return NULL;}
} //else {*err_code=3; return NULL;} //only NAME and SURNAME correctly, INDEX is loaded wrong
} else {*err_code=2; return NULL;} //only NAME loaded correctly
}
} else if (pch1==pch2 && pch1!=NULL)
{
char *name = strtok(buffer,","); // returns pointer to the beginning of the token
if (name) { //the place where "," is occured becomes a "NULL" character
sscanf(name," %19s", p->name); // skip leading spaces
char *surname = strtok(NULL,",");
if (surname) {
sscanf(surname," %39[^\t\n]", p->surname); // skip leading spaces
char *index = strtok(NULL,",");
if (index) {
p->index = (int)strtol(index, NULL, 10);
} else if (p->index==0||p->index==' ') {*err_code=2; return NULL;}
} else {*err_code=1; return NULL;}
} else {*err_code=2; return NULL;}
} else {*err_code=1; return NULL;}
}
if (p==NULL || p->name==NULL)
{
*err_code=1;
return NULL;
}
I have a feeling it could be done in a totally different way... I'll take all hints and answers to my heart and do my best to understand and learn them all.
//edit1: if my ugly code infuriated someone, I'd really be happy to do some gardener work and cut off some of these demonic bushes, to clean it a bit. I think some of if cases are not at all necessary for it to work...
PS. (It's a continuation of my previous problem where typed commas were assigned to structure: How to scanf commas, but with commas not assigned to a structure? C but in this topic there, I ask about doing it in a way that there'd be an information what user typed wrong)
Here what you need if beyond what the scanf familly functions can do, because you want to strip blank characters at the beginning or the end of a name but still want to allow spaces inside a name. IMHO, you should use a dedicated function for that parsing. Code could be:
/* Find a string delimited with a character from delims.
* Blanks at the beginning or the end of the string will be trimed out
* At return, *end points one past the end of the string and
* *next points after the delimiter (so delimiter will be next[-1])
* Returns a pointer to the beginning of the string, or NULL if
* no delimiter was found
* */
const char* find(const char * start, char **end, char **next, const char *delims) {
static const char blanks[] = " \t\r";
start += strspn(start, blanks); // trim blanks at the beginning
*end = strpbrk(start, delims); // search first delimiter
if (end == NULL) {
return NULL;
}
*next = *end + 1; // next find will start after the delimiter
while(*end > start) { // trim blanks at the end
bool found = false;
for (int i=0; i<sizeof(blanks); i++) {
if ((*end)[-1] == blanks[i]) {
--*end ;
found = true;
break;
}
}
if (! found) break;
}
return start;
}
// parse a line to fill a student_t
struct student_t* getstruct(struct student_t *p, int *err_code) {
char buffer[1024];
*err_code = 1; // be prepared to worst case
*buffer = '\0';
if (fgets(buffer,1024, stdin)!=NULL)
{
char *end, *next;
const char delims[] = ",\r\n";
const char *name = find(buffer, &end, &next, delims) ; // returns pointer to the beginning of the token
if (name && (next[-1] == ',')) { // control the delimiter
int l = end - name;
if (l > 19) l = 19;
memcpy(p->name, name, l);
p->name[l] = '\0';
*err_code = 2; // Ok, we have a name followed with a comma
const char *surname = find(next, &end, &next, delims);
if (surname && (next[-1] == ',')) { // control delimiter
int l = end - surname;
if (l > 19) l = 19;
memcpy(p->surname, surname, l);
p->surname[l] = '\0';
*err_code = 3; // Ok, we have a name followed with a comma
if (*end != ',') return NULL;
const char *index = find(next, &end, &next, delims);
if (index) { // no need to control the delimiter: scanf will control
char dummy[2]; // that there is no non blank char after the index
if (1 == sscanf(index, "%d%1s", &(p->index), dummy)) {
*err_code = 0;
}
}
}
}
}
return (*err_code == 0) ? p : NULL;
}
I would use this pseudo-code:
isolate first comma-separated token
if no token, return 1
if length >= sizeof(s.name), return 1
copy first token to s.name
isolate second token
if no token, return 2
if length >= sizeof(s.surname), return 2
copy first token to s.surname
isolate third token
if no token, return 3
if token not numeric, return 3
set s.index = atoi( third token )
return 0
If you code that up in C, you should end up with something nice and short and clean and reliable, without too many annoyingly redundant checking and backtracking.
(Actually, if it was me, I'd use one general-purpose function to do the token isolating all at once, up front, then simply test if the number of found tokens was 0, 1, 2, 3, or more than 3. See this web page for additional ideas.)
Consider using return value from sscanf:
#include <stdio.h>
#include <assert.h>
typedef struct student_t
{
char name[20];
char surname[40];
int index;
} student_t;
enum parse_error_t {
E_OK,
E_NOT_LOADED_PROPERLY,
E_ONLY_NAME_LOADED,
E_NAME_SURNAME_LOADED
};
enum parse_error_t parse_input(char *buf, student_t *out) {
int matches_count;
if (buf == NULL) {
return E_NOT_LOADED_PROPERLY;
}
matches_count = sscanf(buf, "%20s %[^, 40]%*[, ]%d", out->name, out->surname, &out->index);
switch(matches_count) {
case 0:
return E_NOT_LOADED_PROPERLY;
case 1:
return E_ONLY_NAME_LOADED;
case 2:
return E_NAME_SURNAME_LOADED;
case 3:
return E_OK;
default:
return E_NOT_LOADED_PROPERLY;
}
}
int main() {
char *in1 = NULL;
char *in2 = "John";
char *in3 = "John, Deep";
char *in4 = "John, Deep, 999";
student_t student;
assert(parse_input(in1, &student) == E_NOT_LOADED_PROPERLY);
assert(parse_input(in2, &student) == E_ONLY_NAME_LOADED);
assert(parse_input(in3, &student) == E_NAME_SURNAME_LOADED);
assert(parse_input(in4, &student) == E_OK);
}
String matching expression is based on this answer.

Tokenize String by using pointer

I'm trying to tokenize a sting and here is my attempt.
char new_str[1024];
void tokenize_init(const char str[]){//copy the string into global section
strcpy(new_str,str);
}
int i = 0;
char *tokenize_next() {
const int len = strlen(new_str);
for(; i <= len; i++) {
if ( i == len) {
return NULL;
}
if ((new_str[i] >= 'a' && new_str[i] <= 'z') ||
(new_str[i] >= 'A' && new_str[i] <= 'Z')) {
continue;
}else {
new_str[i] = '\0';
i = i + 1;
return new_str;
}
}
return NULL;
}
//main function
int main(void) {
char sentence[] = "This is a good-sentence for_testing 1 neat function.";
printf("%s\n", sentence);
tokenize_init(sentence);
for (char *nt = tokenize_next();
nt != NULL;
nt = tokenize_next())
printf("%s\n",nt);
}
However, it just print out the first word of the sentence(which is "This") and then stop. Can someone tell me why? My guess is my new_str is not persisent and when the main function recall tokenize_next() the new_str become just the first word of the sentence. Thanks in advance.
The reason that it only prints out "This" is because you iterate to the first non-letter character which happens to be a space, and you replace this with a null terminating character at this line:
new_str[i] = '\0';
After that, it doesn't matter what you do to the rest of the string, it will only print up to that point. The next time tokenize_next is called the length of the string is no longer what you think it is because it is only counting the word "This" and since "i" has already reached that amount the function returns and so does every successive call to it:
if ( i == len)
{
return NULL;
}
To fix the function you would need to somehow update your pointer to look past that character on the next iteration.
However, this is quite kludgy. You are much better off using one of the mentioned functions such as strtok or strsep
UPDATE:
If you cannot use those functions then a redesign of your function would be ideal, however, per your request, try the following modifications:
#include <string.h>
#include <cstdio>
char new_str[1024];
char* str_accessor;
void tokenize_init(const char str[]){//copy the string into global section
strcpy(new_str,str);
str_accessor = new_str;
}
int i = 0;
char* tokenize_next(void) {
const int len = strlen(str_accessor);
for(i = 0; i <= len; i++) {
if ( i == len) {
return NULL;
}
if ((str_accessor[i] >= 'a' && str_accessor[i] <= 'z') ||
(str_accessor[i] >= 'A' && str_accessor[i] <= 'Z')) {
continue;
}
else {
str_accessor[i] = '\0';
char* output = str_accessor;
str_accessor = str_accessor + i + 1;
if (strlen(output) <= 0)
{
str_accessor++;
continue;
}
return output;
}
}
return NULL;
}
//main function
int main(void) {
char sentence[] = "This is a good-sentence for_testing 1 neater function.";
printf("%s\n", sentence);
tokenize_init(sentence);
for (char *nt = tokenize_next(); nt != NULL; nt = tokenize_next())
printf("%s\n",nt);
}

how to remove extension from file name?

I want to throw the last three character from file name and get the rest?
I have this code:
char* remove(char* mystr) {
char tmp[] = {0};
unsigned int x;
for (x = 0; x < (strlen(mystr) - 3); x++)
tmp[x] = mystr[x];
return tmp;
}
Try:
char *remove(char* myStr) {
char *retStr;
char *lastExt;
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
strcpy (retStr, myStr);
lastExt = strrchr (retStr, '.');
if (lastExt != NULL)
*lastExt = '\0';
return retStr;
}
You'll have to free the returned string yourself. It simply finds the last . in the string and replaces it with a null terminator character. It will handle errors (passing NULL or running out of memory) by returning NULL.
It won't work with things like /this.path/is_bad since it will find the . in the non-file portion but you could handle this by also doing a strrchr of /, or whatever your path separator is, and ensuring it's position is NULL or before the . position.
A more general purpose solution to this problem could be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// remove_ext: removes the "extension" from a file spec.
// myStr is the string to process.
// extSep is the extension separator.
// pathSep is the path separator (0 means to ignore).
// Returns an allocated string identical to the original but
// with the extension removed. It must be freed when you're
// finished with it.
// If you pass in NULL or the new string can't be allocated,
// it returns NULL.
char *remove_ext (char* myStr, char extSep, char pathSep) {
char *retStr, *lastExt, *lastPath;
// Error checks and allocate string.
if (myStr == NULL) return NULL;
if ((retStr = malloc (strlen (myStr) + 1)) == NULL) return NULL;
// Make a copy and find the relevant characters.
strcpy (retStr, myStr);
lastExt = strrchr (retStr, extSep);
lastPath = (pathSep == 0) ? NULL : strrchr (retStr, pathSep);
// If it has an extension separator.
if (lastExt != NULL) {
// and it's to the right of the path separator.
if (lastPath != NULL) {
if (lastPath < lastExt) {
// then remove it.
*lastExt = '\0';
}
} else {
// Has extension separator with no path separator.
*lastExt = '\0';
}
}
// Return the modified string.
return retStr;
}
int main (int c, char *v[]) {
char *s;
printf ("[%s]\n", (s = remove_ext ("hello", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("hello.txt.txt", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/has.dot/in.path", '.', '/'))); free (s);
printf ("[%s]\n", (s = remove_ext ("/no.dot/in_path", '.', 0))); free (s);
return 0;
}
and this produces:
[hello]
[hello]
[hello]
[hello.txt]
[/no.dot/in_path]
[/has.dot/in]
[/no]
Use rindex to locate the "." character. If the string is writable, you can replace it with the string terminator char ('\0') and you're done.
char * rindex(const char *s, int c);
DESCRIPTION
The rindex() function locates the last character matching c (converted to a char) in the null-terminated string s.
If you literally just want to remove the last three characters, because you somehow know that your filename has an extension exactly three chars long (and you want to keep the dot):
char *remove_three(const char *filename) {
size_t len = strlen(filename);
char *newfilename = malloc(len-2);
if (!newfilename) /* handle error */;
memcpy(newfilename, filename, len-3);
newfilename[len - 3] = 0;
return newfilename;
}
Or let the caller provide the destination buffer (which they must ensure is long enough):
char *remove_three(char *dst, const char *filename) {
size_t len = strlen(filename);
memcpy(dst, filename, len-3);
dst[len - 3] = 0;
return dst;
}
If you want to generically remove a file extension, that's harder, and should normally use whatever filename-handling routines your platform provides (basename on POSIX, _wsplitpath_s on Windows) if there's any chance that you're dealing with a path rather than just the final part of the filename:
/* warning: may modify filename. To avoid this, take a copy first
dst may need to be longer than filename, for example currently
"file.txt" -> "./file.txt". For this reason it would be safer to
pass in a length with dst, and/or allow dst to be NULL in which
case return the length required */
void remove_extn(char *dst, char *filename) {
strcpy(dst, dirname(filename));
size_t len = strlen(dst);
dst[len] = '/';
dst += len+1;
strcpy(dst, basename(filename));
char *dot = strrchr(dst, '.');
/* retain the '.' To remove it do dot[0] = 0 */
if (dot) dot[1] = 0;
}
Come to think of it, you might want to pass dst+1 rather than dst to strrchr, since a filename starting with a dot maybe shouldn't be truncated to just ".". Depends what it's for.
I would try the following algorithm:
last_dot = -1
for each char in str:
if char = '.':
last_dot = index(char)
if last_dot != -1:
str[last_dot] = '\0'
Just replace the dot with "0". If you know that your extension is always 3 characters long you can just do:
char file[] = "test.png";
file[strlen(file) - 4] = 0;
puts(file);
This will output "test". Also, you shouldn't return a pointer to a local variable. The compiler will also warn you about this.
To get paxdiablo's second more general purpose solution to work in a C++ compiler I changed this line:
if ((retstr = malloc (strlen (mystr) + 1)) == NULL)
to:
if ((retstr = static_cast<char*>(malloc (strlen (mystr) + 1))) == NULL)
Hope this helps someone.
This should do the job:
char* remove(char* oldstr) {
int oldlen = 0;
while(oldstr[oldlen] != NULL){
++oldlen;
}
int newlen = oldlen - 1;
while(newlen > 0 && mystr[newlen] != '.'){
--newlen;
}
if (newlen == 0) {
newlen = oldlen;
}
char* newstr = new char[newlen];
for (int i = 0; i < newlen; ++i){
newstr[i] = oldstr[i];
}
return newstr;
}
Get location and just copy up to that location into a new char *.
i = 0;
n = 0;
while(argv[1][i] != '\0') { // get length of filename
i++; }
for(ii = 0; i > -1; i--) { // look for extension working backwards
if(argv[1][i] == '.') {
n = i; // char # of exension
break; } }
memcpy(new_filename, argv[1], n);
This is simple way to change extension name.
....
char outputname[255]
sscanf(inputname,"%[^.]",outputname); // foo.bar => foo
sprintf(outputname,"%s.txt",outputname) // foo.txt <= foo
....
With configurable minimum file length and configurable maximum extension length. Returns index where extension was changed to null character, or -1 if no extension was found.
int32_t strip_extension(char *in_str)
{
static const uint8_t name_min_len = 1;
static const uint8_t max_ext_len = 4;
/* Check chars starting at end of string to find last '.' */
for (ssize_t i = sizeof(in_str); i > (name_min_len + max_ext_len); i--)
{
if (in_str[i] == '.')
{
in_str[i] = '\0';
return i;
}
}
return -1;
}
I use this code:
void remove_extension(char* s) {
char* dot = 0;
while (*s) {
if (*s == '.') dot = s; // last dot
else if (*s == '/' || *s == '\\') dot = 0; // ignore dots before path separators
s++;
}
if (dot) *dot = '\0';
}
It handles the Windows path convention correctly (both / and \ can be path separators).

How to split a string into tokens in C?

How to split a string into tokens by '&' in C?
strtok / strtok_r
char *token;
char *state;
for (token = strtok_r(input, "&", &state);
token != NULL;
token = strtok_r(NULL, "&", &state))
{
...
}
I would do it something like this (using strchr()):
#include <string.h>
char *data = "this&&that&other";
char *next;
char *curr = data;
while ((next = strchr(curr, '&')) != NULL) {
/* process curr to next-1 */
curr = next + 1;
}
/* process the remaining string (the last token) */
strchr(const char *s, int c) returns a pointer to the next location of c in s, or NULL if c isn't found in s.
You might be able to use strtok(), however, I don't like strtok(), because:
it modifies the string being tokenized, so it doesn't work for literal strings, or is not very useful when you want to keep the string for other purposes. In that case, you must copy the string to a temporary first.
it merges adjacent delimiters, so if your string was "a&&b&c", the returned tokens are "a", "b", and "c". Note that there is no empty token after "a".
it is not thread-safe.
You can use the strok() function as shown in the example below.
/// Function to parse a string in separate tokens
int parse_string(char pInputString[MAX_STRING_LENGTH],char *Delimiter,
char *pToken[MAX_TOKENS])
{
int i;
i = 0;
pToken[i] = strtok(pInputString, Delimiter);
i++;
while ((pToken[i] = strtok(NULL, Delimiter)) != NULL){
i++;
}
return i;
}
/// The array pTokens[] now contains the pointers to the start of each token in the (unchanged) original string.
sprintf(String,"Token1&Token2");
NrOfParameters = parse_string(String,"&",pTokens);
sprintf("%s, %s",pToken[0],pToken[1]);
For me, using strtok() function is unintuitive and too complicated, so I managed to create my own one. As arguments it accepts a string to split, character which determinates spaces between tokens and pointer representing number of found tokens (useful when printing these tokens in loop). A disadvantage of this function is fixed maximum lenght of each token.
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_LEN 32
char **txtspt(const char *text, char split_char, int *w_count)
{
if(strlen(text) <= 1)
return NULL;
char **cpy0 = NULL;
int i, j = 0, k = 0, words = 1;
//Words counting
for(i = 0; i < strlen(text); ++i)
{
if(text[i] == split_char && text[i + 1] != '\0')
{
++words;
}
}
//Memory reservation
cpy0 = (char **) malloc(strlen(text) * words);
for(i = 0; i < words; ++i)
{
cpy0[i] = (char *) malloc(MAX_WORD_LEN);
}
//Splitting
for(i = 0; i < strlen(text) + 1; ++i)
{
if(text[i] == split_char)
{
cpy0[k++][j] = '\0';
j = 0;
}
else
{
if(text[i] != '\n') //Helpful, when using fgets()
cpy0[k][j++] = text[i]; //function
}
}
*w_count = words;
return cpy0;
}

Resources