C99: Why is my string changing? - c

I have a function that is basically a rewritten strtok_r because that function is causing me grief.
char *betterStrtok(char *str, char *delim, char **loc)
{
int iter = 0;
char *tmp;
if(str)
{
char mod[strlen(str) + 2];
char *out = malloc(strlen(str) + 2);
char curr = str[0];
strcpy(mod, str);
while(curr)
{
tmp = strchr(delim, curr);
if(tmp)
{
mod[iter] = 0;
strcpy(out, mod);
*loc = &mod[iter + 1];
//printf("Inside function: \"%s\"\n", *loc);
return out;
}
if(curr)
{
curr = mod[++iter];
}
else
{
*loc = &mod[0];
strcpy(out, mod);
return out;
}
}
return NULL;
}
else
{
char mod[strlen(*loc) + 2];
strcpy(mod, *loc);
char *tloc = malloc(sizeof loc + 2);
char *out = malloc(strlen(*loc) + 2);
char curr = mod[0];
while(curr)
{
tmp = strchr(delim, curr);
if(tmp)
{
mod[iter] = 0;
strcpy(out, mod);
tloc = &mod[iter + 1];
strcpy(*loc, tloc);
return out;
}
if(curr)
{
curr = mod[++iter];
}
else
{
*loc = &mod[0];
strcpy(out, mod);
return out;
}
}
return NULL;
}
}
So my issue is *loc has the appropriate thing in it after the first pass, and when I check what's in it outside the function, it's mostly there except the last character is something weird. Let's say this is the setup:
char *addr = malloc(60);
char **supaddr = &addr;
char *strtotok = "Hello, world!";
char *thetok;
thetok = betterStrtok(strtotok, ",", supaddr);
printf("Outside function: \"%s\"\n", addr);
Adding print statements right before the return and right after calling the function shows something like this:
Inside function: " world!"
Outside function: " w"
The question is: how can I prevent the string from changing or how can I do something else so I can save the "rest" of the original string without returning it?

If you start replacing standard library (or POSIX) functions with your own implementations, first look hard at how the facility is used. For example, compare fgets() and getline().
If I were you, I'd probably use
size_t extract_token(const char *src_ptr, const size_t src_len,
char **token_ptr, size_t *token_size, size_t *token_len);
which extracts a token from a src_len -byte buffer at src_ptr. (Unlike string-based methods, this can handle embedded nul bytes.)
The return value is the number of characters consumed from src_ptr. The token is copied (expanded?) to dynamically allocated token_ptr. The allocated length is in token_size, and the length of the token in token_len.
If extract_token() encounters only whitespace but no token, it returns the number of whitespace chars consumed, with zero assigned to token_len. For simplicity, let's assume the function always sets errno; to zero if success, to a nonzero error code if an error occurs.
A simple loop that tokenizes lines read from standard input would be
char *line_ptr = NULL;
size_t line_size = 0;
ssize_t line_len;
long line_num = 0;
char *token_ptr = NULL;
size_t token_size = 0;
size_t token_len;
char *cur, *end;
size_t n;
while (1) {
line_len = getline(&line_ptr, &line_size, stdin);
line_num++;
if (line_len < 1) {
if (ferror(stdin) || !feof(stdin)) {
fprintf(stderr, "Standard input: Line %ld: Read error.\n", line_num);
return EXIT_FAILURE;
}
break;
}
cur = line_ptr;
end = line_ptr + line_len;
while (1) {
if (cur >= end) {
errno = 0;
cur = end;
break;
}
n = extract_token(cur, (size_t)(end - cur),
&token_ptr, &token_size, &token_len);
if (errno) {
/* cur + n is the offending character in input */
fprintf(stderr, "Standard input: Line %ld: Cannot tokenize line.\n", line_num);
exit(EXIT_FAILURE);
}
/* Do something with token;
token_ptr points to the token,
token_len is the length of the token
token_size is the size allocated for the token
*/
}
}
/* Since the line and token buffers are no longer needed,
free them. I like to clear the variables too, just in
case.
*/
free(line_ptr);
line_ptr = NULL;
line_size = 0;
free(token_ptr);
token_ptr = NULL;
token_size = 0;
Note that when reading files with clear record-and-field formatting, like CSV files, I do prefer to read the tokens directly from the file using a getline()-like interface, either
int next_field(char **ptr, size_t *size, size_t *len, FILE *in);
int next_record(FILE *in);
or
int next_wfield(wchar_t **ptr, size_t *size, size_t *len, FILE *in);
int next_wrecord(FILE *in);
where next_field() (or next_wfield() for wide input) obtains the next field in the current record, preferably handling de-quoting and de-escaping, and next_wrecord() skips any leftover fields in the current record, and moves to the beginning of next record.
Using fgetc() or fgetwc() the code implementing the above is quite straightforward (even if CSV quoting rules are implemented), although it won't be as fast as possible using more advanced methods. Because CSV and other such file formats are not optimal anyway, the slight loss in speed is normally neglible/unnoticeable. Most importantly, if you try it out, you'll see that code that uses next_field()/next_record() is quite robust, and easy to read, write, and maintain in the long term.

The answer was in the comments. Turns out if I changed mod to a pointer it worked perfectly.

Related

Free, invalid pointer

I have a program, that splits strings based on the delimiter. I have also, 2 other functions, one that prints the returned array and another that frees the array.
My program prints the array and returns an error when the free array method is called. Below is the full code.
#include "stringsplit.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
unsigned long int getNofTokens(const char *string) {
char *stringCopy;
unsigned long int stringLength;
unsigned long int count = 0;
stringLength = (unsigned)strlen(string);
stringCopy = malloc((stringLength + 1) * sizeof(char));
strcpy(stringCopy, string);
if (strtok(stringCopy, " \t") != NULL) {
count++;
while (strtok(NULL, " \t") != NULL)
count++;
}
free(stringCopy);
return count;
}
char **split_string(const char *str, const char *split) {
unsigned long int count = getNofTokens(str);
char **result;
result = malloc(sizeof(char *) * count + 1);
char *tmp = malloc(sizeof(char) * strlen(str));
strcpy(tmp, str);
char *token = strtok(tmp, split);
int idx = 0;
while (token != NULL) {
result[idx++] = token;
token = strtok(NULL, split);
}
return result;
}
void print_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (int i = 0; split_string[i] != NULL; i++) {
char *currentPointer = split_string[i];
free(currentPointer);
}
free(split_string);
}
Also, do I need to explicitly add \0 at the end of the array or does strtok add it automatically?
There are some problems in your code:
[Major] the function getNofTokens() does not take the separator string as an argument, it counts the number of words separated by blanks, potentially returning an inconsistent count to its caller.
[Major] the size allocated in result = malloc(sizeof(char *) * count + 1); is incorrect: it should be:
result = malloc(sizeof(char *) * (count + 1));
Storing the trailing NULL pointer will write beyond the end of the allocated space.
[Major] storing the said NULL terminator at the end of the array is indeed necessary, as the block of memory returned by malloc() is uninitialized.
[Major] the copy of the string allocated and parsed by split_string cannot be safely freed because the pointer tmp is not saved anywhere. The pointer to the first token will be different from tmp in 2 cases: if the string contains only delimiters (no token found) or if the string starts with a delimiter (the initial delimiters will be skipped). In order to simplify the code and make it reliable, each token could be duplicated and tmp should be freed. In fact your free_split_string() function relies on this behavior. With the current implementation, the behavior is undefined.
[Minor] you use unsigned long and int inconsistently for strings lengths and array index variables. For consistency, you should use size_t for both.
[Remark] you should allocate string copies with strdup(). If this POSIX standard function is not available on your system, write a simple implementation.
[Major] you never test for memory allocation failure. This is OK for testing purposes and throw away code, but such potential failures should always be accounted for in production code.
[Remark] strtok() is a tricky function to use: it modifies the source string and keeps a hidden static state that makes it non-reentrant. You should avoid using this function although in this particular case it performs correctly, but if the caller of split_string or getNofTokens relied on this hidden state being preserved, it would get unexpected behavior.
Here is a modified version:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *string, const char *split) {
char *tmp = strdup(string);
size_t count = 0;
if (strtok(tmp, split) != NULL) {
count++;
while (strtok(NULL, split) != NULL)
count++;
}
free(tmp);
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
char *tmp = strdup(str);
char *token = strtok(tmp, split);
size_t idx = 0;
while (token != NULL && idx < count) {
result[idx++] = strdup(token);
token = strtok(NULL, split);
}
result[idx] = NULL;
free(tmp);
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
Here is an alternative without strtok() and without intermediary allocations:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
size_t getNofTokens(const char *str, const char *split) {
size_t count = 0;
size_t pos = 0, len;
for (pos = 0;; pos += len) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
count++;
}
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t pos, len, idx;
for (pos = 0, idx = 0; idx < count; pos += len, idx++) {
pos += strspn(str + pos, split); // skip delimiters
len = strcspn(str + pos, split); // parse token
if (len == '\0')
break;
result[idx] = strndup(str + pos, len);
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
EDIT After re-reading the specification in your comment, there seems to be some potential confusion as to the semantics of the split argument:
if split is a set of delimiters, the above code does the job. And the examples will be split as expected.
if split is an actual string to match explicitly, the above code only works by coincidence on the examples given in the comment.
To implement the latter semantics, you should use strstr() to search for the split substring in both getNofTokens and split_string.
Here is an example:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "stringsplit.h"
/* Split string by another string, return split parts + NULL in array.
*
* Parameters:
* str: the string to split
* split: the string to split str with
*
* Returns:
* A dynamically reserved array of dynamically reserved string parts.
*
* For example called with "Test string split" and " ",
* returns ["Test", "string", "split", NULL].
* Or called with "Another - test" and " - ",
* returns ["Another", "test", NULL].
*/
size_t getNofTokens(const char *str, const char *split) {
const char *p;
size_t count = 1;
size_t len = strlen(split);
if (len == 0)
return strlen(str);
for (p = str; (p = strstr(p, split)) != NULL; p += len)
count++;
return count;
}
char **split_string(const char *str, const char *split) {
size_t count = getNofTokens(str, split);
char **result = malloc(sizeof(*result) * (count + 1));
size_t len = strlen(split);
size_t idx;
const char *p = str;
for (idx = 0; idx < count; idx++) {
const char *q = strstr(p, split);
if (q == NULL) {
q = p + strlen(p);
} else
if (q == p && *q != '\0') {
q++;
}
result[idx] = strndup(p, q - p);
p = q + len;
}
result[idx] = NULL;
return result;
}
void print_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
printf("%s\n", split_string[i]);
}
}
void free_split_string(char **split_string) {
for (size_t i = 0; split_string[i] != NULL; i++) {
free(split_string[i]);
}
free(split_string);
}
When debugging, take note of values that you got from malloc, strdup, etc. Let's call these values "the active set". It's just a name, so that we can refer to them. You get a pointer from those functions, you mentally add it to the active set. When you call free, you can only pass values from the active set, and after free returns, you mentally remove them from the set. Any other use of free is invalid and a bug.
You can easily find this out by putting breakpoints after all memory allocations, so that you can write down the pointer values, and then breakpoints on all frees, so that you can see if one of those pointer values got passed to free - since, again, to do otherwise is to misuse free.
This can be done also using "printf" debugging. Like this:
char *buf = malloc(...); // or strdup, or ...
fprintf(stderr, "+++ Alloc %8p\n", buf);
And then whenever you have free, do it again:
fprintf(stderr, "--- Free %8p\n", ptr);
free(ptr);
In the output of the program, you must be able to match every +++ with ---. If you see any --- with a value that wasn't earlier listed with a +++, there's your problem: that's the buggy invocation of free :)
I suggest using fprintf(stderr, ... instead of printf(..., since the former is typically unbuffered, so if your program crashes, you won't miss any output. printf is buffered on some architectures (and not buffered on others - so much for consistency).

Searching an array for a specific character [duplicate]

I want to write a program in C that displays each word of a whole sentence (taken as input) at a seperate line. This is what I have done so far:
void manipulate(char *buffer);
int get_words(char *buffer);
int main(){
char buff[100];
printf("sizeof %d\nstrlen %d\n", sizeof(buff), strlen(buff)); // Debugging reasons
bzero(buff, sizeof(buff));
printf("Give me the text:\n");
fgets(buff, sizeof(buff), stdin);
manipulate(buff);
return 0;
}
int get_words(char *buffer){ // Function that gets the word count, by counting the spaces.
int count;
int wordcount = 0;
char ch;
for (count = 0; count < strlen(buffer); count ++){
ch = buffer[count];
if((isblank(ch)) || (buffer[count] == '\0')){ // if the character is blank, or null byte add 1 to the wordcounter
wordcount += 1;
}
}
printf("%d\n\n", wordcount);
return wordcount;
}
void manipulate(char *buffer){
int words = get_words(buffer);
char *newbuff[words];
char *ptr;
int count = 0;
int count2 = 0;
char ch = '\n';
ptr = buffer;
bzero(newbuff, sizeof(newbuff));
for (count = 0; count < 100; count ++){
ch = buffer[count];
if (isblank(ch) || buffer[count] == '\0'){
buffer[count] = '\0';
if((newbuff[count2] = (char *)malloc(strlen(buffer))) == NULL) {
printf("MALLOC ERROR!\n");
exit(-1);
}
strcpy(newbuff[count2], ptr);
printf("\n%s\n",newbuff[count2]);
ptr = &buffer[count + 1];
count2 ++;
}
}
}
Although the output is what I want, I have really many black spaces after the final word displayed, and the malloc() returns NULL so the MALLOC ERROR! is displayed in the end.
I can understand that there is a mistake at my malloc() implementation, but I do not know what it is.
Is there another more elegant or generally better way to do it?
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
Take a look at this, and use whitespace characters as the delimiter. If you need more hints let me know.
From the website:
char * strtok ( char * str, const char * delimiters );
On a first call, the function expects a C string as argument for str, whose first character is used as the starting location to scan for tokens. In subsequent calls, the function expects a null pointer and uses the position right after the end of last token as the new starting location for scanning.
Once the terminating null character of str is found in a call to strtok, all subsequent calls to this function (with a null pointer as the first argument) return a null pointer.
Parameters
str
C string to truncate.
Notice that this string is modified by being broken into smaller strings (tokens).
Alternativelly [sic], a null pointer may be specified, in which case the function continues scanning where a previous successful call to the function ended.
delimiters
C string containing the delimiter characters.
These may vary from one call to another.
Return Value
A pointer to the last token found in string.
A null pointer is returned if there are no tokens left to retrieve.
Example
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
For the fun of it here's an implementation based on the callback approach:
const char* find(const char* s,
const char* e,
int (*pred)(char))
{
while( s != e && !pred(*s) ) ++s;
return s;
}
void split_on_ws(const char* s,
const char* e,
void (*callback)(const char*, const char*))
{
const char* p = s;
while( s != e ) {
s = find(s, e, isspace);
callback(p, s);
p = s = find(s, e, isnotspace);
}
}
void handle_word(const char* s, const char* e)
{
// handle the word that starts at s and ends at e
}
int main()
{
split_on_ws(some_str, some_str + strlen(some_str), handle_word);
}
malloc(0) may (optionally) return NULL, depending on the implementation. Do you realize why you may be calling malloc(0)? Or more precisely, do you see where you are reading and writing beyond the size of your arrays?
Consider using strtok_r, as others have suggested, or something like:
void printWords(const char *string) {
// Make a local copy of the string that we can manipulate.
char * const copy = strdup(string);
char *space = copy;
// Find the next space in the string, and replace it with a newline.
while (space = strchr(space,' ')) *space = '\n';
// There are no more spaces in the string; print out our modified copy.
printf("%s\n", copy);
// Free our local copy
free(copy);
}
Something going wrong is get_words() always returning one less than the actual word count, so eventually you attempt to:
char *newbuff[words]; /* Words is one less than the actual number,
so this is declared to be too small. */
newbuff[count2] = (char *)malloc(strlen(buffer))
count2, eventually, is always one more than the number of elements you've declared for newbuff[]. Why malloc() isn't returning a valid ptr, though, I don't know.
You should be malloc'ing strlen(ptr), not strlen(buf). Also, your count2 should be limited to the number of words. When you get to the end of your string, you continue going over the zeros in your buffer and adding zero size strings to your array.
Just as an idea of a different style of string manipulation in C, here's an example which does not modify the source string, and does not use malloc. To find spaces I use the libc function strpbrk.
int print_words(const char *string, FILE *f)
{
static const char space_characters[] = " \t";
const char *next_space;
// Find the next space in the string
//
while ((next_space = strpbrk(string, space_characters)))
{
const char *p;
// If there are non-space characters between what we found
// and what we started from, print them.
//
if (next_space != string)
{
for (p=string; p<next_space; p++)
{
if(fputc(*p, f) == EOF)
{
return -1;
}
}
// Print a newline
//
if (fputc('\n', f) == EOF)
{
return -1;
}
}
// Advance next_space until we hit a non-space character
//
while (*next_space && strchr(space_characters, *next_space))
{
next_space++;
}
// Advance the string
//
string = next_space;
}
// Handle the case where there are no spaces left in the string
//
if (*string)
{
if (fprintf(f, "%s\n", string) < 0)
{
return -1;
}
}
return 0;
}
you can scan the char array looking for the token if you found it just print new line else print the char.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char *s;
s = malloc(1024 * sizeof(char));
scanf("%[^\n]", s);
s = realloc(s, strlen(s) + 1);
int len = strlen(s);
char delim =' ';
for(int i = 0; i < len; i++) {
if(s[i] == delim) {
printf("\n");
}
else {
printf("%c", s[i]);
}
}
free(s);
return 0;
}
char arr[50];
gets(arr);
int c=0,i,l;
l=strlen(arr);
for(i=0;i<l;i++){
if(arr[i]==32){
printf("\n");
}
else
printf("%c",arr[i]);
}

read user input without maxsize in C

In C i can use the char *fgets(char *s, int size, FILE *stream) function to read user input from stdin. But the size of the user input is limited to size.
How can i read user input of variable size?
In C you are responsible for your buffers, and responsible for their size. So you can not have some dynamic buffer ready for you.
So the only solution is to use a loop (either of fgets or fgetc - depends on your processing and on your stop condition)
If you go beyond C to C++, you will find that you can accept std::string objects of variable sizes (there you need to deal with word and/or line termination instead - and loop again)
This function reads from standard input until end-of-file is encountered, and returns the number of characters read. It should be fairly easy to modify it to read exactly one line, or alike.
ssize_t read_from_stdin(char **s)
{
char buf[1024];
char *p;
char *tmp;
ssize_t total;
size_t len;
size_t allocsize;
if (s == NULL) {
return -1;
}
total = 0;
allocsize = 1024;
p = malloc(allocsize);
if (p == NULL) {
*s = NULL;
return -1;
}
while(fgets(buf, sizeof(buf), stdin) != NULL) {
len = strlen(buf);
if (total + len >= allocsize) {
allocsize <<= 1;
tmp = realloc(p, allocsize);
if (tmp == NULL) {
free(p);
*s = NULL;
return -1;
}
p = tmp;
}
memcpy(p + total, buf, len);
total += len;
}
p[total] = 0;
*s = p;
return total;
}

Split string in C every white space

I want to write a program in C that displays each word of a whole sentence (taken as input) at a seperate line. This is what I have done so far:
void manipulate(char *buffer);
int get_words(char *buffer);
int main(){
char buff[100];
printf("sizeof %d\nstrlen %d\n", sizeof(buff), strlen(buff)); // Debugging reasons
bzero(buff, sizeof(buff));
printf("Give me the text:\n");
fgets(buff, sizeof(buff), stdin);
manipulate(buff);
return 0;
}
int get_words(char *buffer){ // Function that gets the word count, by counting the spaces.
int count;
int wordcount = 0;
char ch;
for (count = 0; count < strlen(buffer); count ++){
ch = buffer[count];
if((isblank(ch)) || (buffer[count] == '\0')){ // if the character is blank, or null byte add 1 to the wordcounter
wordcount += 1;
}
}
printf("%d\n\n", wordcount);
return wordcount;
}
void manipulate(char *buffer){
int words = get_words(buffer);
char *newbuff[words];
char *ptr;
int count = 0;
int count2 = 0;
char ch = '\n';
ptr = buffer;
bzero(newbuff, sizeof(newbuff));
for (count = 0; count < 100; count ++){
ch = buffer[count];
if (isblank(ch) || buffer[count] == '\0'){
buffer[count] = '\0';
if((newbuff[count2] = (char *)malloc(strlen(buffer))) == NULL) {
printf("MALLOC ERROR!\n");
exit(-1);
}
strcpy(newbuff[count2], ptr);
printf("\n%s\n",newbuff[count2]);
ptr = &buffer[count + 1];
count2 ++;
}
}
}
Although the output is what I want, I have really many black spaces after the final word displayed, and the malloc() returns NULL so the MALLOC ERROR! is displayed in the end.
I can understand that there is a mistake at my malloc() implementation, but I do not know what it is.
Is there another more elegant or generally better way to do it?
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
Take a look at this, and use whitespace characters as the delimiter. If you need more hints let me know.
From the website:
char * strtok ( char * str, const char * delimiters );
On a first call, the function expects a C string as argument for str, whose first character is used as the starting location to scan for tokens. In subsequent calls, the function expects a null pointer and uses the position right after the end of last token as the new starting location for scanning.
Once the terminating null character of str is found in a call to strtok, all subsequent calls to this function (with a null pointer as the first argument) return a null pointer.
Parameters
str
C string to truncate.
Notice that this string is modified by being broken into smaller strings (tokens).
Alternativelly [sic], a null pointer may be specified, in which case the function continues scanning where a previous successful call to the function ended.
delimiters
C string containing the delimiter characters.
These may vary from one call to another.
Return Value
A pointer to the last token found in string.
A null pointer is returned if there are no tokens left to retrieve.
Example
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
For the fun of it here's an implementation based on the callback approach:
const char* find(const char* s,
const char* e,
int (*pred)(char))
{
while( s != e && !pred(*s) ) ++s;
return s;
}
void split_on_ws(const char* s,
const char* e,
void (*callback)(const char*, const char*))
{
const char* p = s;
while( s != e ) {
s = find(s, e, isspace);
callback(p, s);
p = s = find(s, e, isnotspace);
}
}
void handle_word(const char* s, const char* e)
{
// handle the word that starts at s and ends at e
}
int main()
{
split_on_ws(some_str, some_str + strlen(some_str), handle_word);
}
malloc(0) may (optionally) return NULL, depending on the implementation. Do you realize why you may be calling malloc(0)? Or more precisely, do you see where you are reading and writing beyond the size of your arrays?
Consider using strtok_r, as others have suggested, or something like:
void printWords(const char *string) {
// Make a local copy of the string that we can manipulate.
char * const copy = strdup(string);
char *space = copy;
// Find the next space in the string, and replace it with a newline.
while (space = strchr(space,' ')) *space = '\n';
// There are no more spaces in the string; print out our modified copy.
printf("%s\n", copy);
// Free our local copy
free(copy);
}
Something going wrong is get_words() always returning one less than the actual word count, so eventually you attempt to:
char *newbuff[words]; /* Words is one less than the actual number,
so this is declared to be too small. */
newbuff[count2] = (char *)malloc(strlen(buffer))
count2, eventually, is always one more than the number of elements you've declared for newbuff[]. Why malloc() isn't returning a valid ptr, though, I don't know.
You should be malloc'ing strlen(ptr), not strlen(buf). Also, your count2 should be limited to the number of words. When you get to the end of your string, you continue going over the zeros in your buffer and adding zero size strings to your array.
Just as an idea of a different style of string manipulation in C, here's an example which does not modify the source string, and does not use malloc. To find spaces I use the libc function strpbrk.
int print_words(const char *string, FILE *f)
{
static const char space_characters[] = " \t";
const char *next_space;
// Find the next space in the string
//
while ((next_space = strpbrk(string, space_characters)))
{
const char *p;
// If there are non-space characters between what we found
// and what we started from, print them.
//
if (next_space != string)
{
for (p=string; p<next_space; p++)
{
if(fputc(*p, f) == EOF)
{
return -1;
}
}
// Print a newline
//
if (fputc('\n', f) == EOF)
{
return -1;
}
}
// Advance next_space until we hit a non-space character
//
while (*next_space && strchr(space_characters, *next_space))
{
next_space++;
}
// Advance the string
//
string = next_space;
}
// Handle the case where there are no spaces left in the string
//
if (*string)
{
if (fprintf(f, "%s\n", string) < 0)
{
return -1;
}
}
return 0;
}
you can scan the char array looking for the token if you found it just print new line else print the char.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char *s;
s = malloc(1024 * sizeof(char));
scanf("%[^\n]", s);
s = realloc(s, strlen(s) + 1);
int len = strlen(s);
char delim =' ';
for(int i = 0; i < len; i++) {
if(s[i] == delim) {
printf("\n");
}
else {
printf("%c", s[i]);
}
}
free(s);
return 0;
}
char arr[50];
gets(arr);
int c=0,i,l;
l=strlen(arr);
for(i=0;i<l;i++){
if(arr[i]==32){
printf("\n");
}
else
printf("%c",arr[i]);
}

remove characters from a c string

gcc 4.4.4 c89
I am reading in from a text file and the text file consists of names in double quotes.
"Simpson, Homer"
etc
However, I want to remove the double quotes from the string.
This is how I have done it, but I am not sure its the best way.
int get_string(FILE *in, char *temp)
{
char *quote = NULL;
/* Get the first line */
fgets(temp, STRING_SIZE, in);
printf("temp before [ %s ]\n", temp);
/* Find the second quote */
if((quote = strrchr(temp, '"')) == NULL) {
fprintf(stderr, "Text file incorrectly formatted\n");
return FALSE;
}
/* Replace with a nul to get rid of the second quote */
*quote = '\0';
/* Move the pointer to point pass the first quote */
temp++;
printf("temp after [ %s ]\n", temp);
return TRUE;
}
Many thanks for any suggestions,
No, this won't work. You are changing the parameter temp, but the calling function will still have an old value. The temp outside the function will point to the opening quote. You ought to move the characters in your buffer.
However I would suggest allocating the buffer in heap and returning a pointer to it, letting the caller free the buffer when needed. This seems to be a cleaner solution. Again, this way you won't rely on the caller to pass a sufficiently large buffer.
In general, a robust reading lines from a text file is not a trivial task in C, with its lack of automatic memory allocating functions. If possible to switch to C++, I would suggest trying much simpler C++ getline.
char *foo(char *str, int notme)
{
char *tmp=strdup(str);
char *p, *q;
for(p=str, q=tmp; *p; p++)
{
if((int)*p == notme) continue;
*q=*p;
q++;
}
strcpy(str, tmp);
free(tmp);
return str;
}
simple generic remove a char
is all lines look that way why not simple remove the first and the last char?
quote++; // move over second char
quote[strlen(quote)-1]='\0'; // remove last char
Don't know if this will help, it is a simple tokenizer i use
#include <stdlib.h>
#include <string.h>
int token(char* start, char* delim, char** tok, char** nextpos, char* sdelim, char* edelim) {
// Find beginning:
int len = 0;
char *scanner;
int dictionary[8];
int ptr;
for(ptr = 0; ptr < 8; ptr++) {
dictionary[ptr] = 0;
}
for(; *delim; delim++) {
dictionary[*delim / 32] |= 1 << *delim % 32;
}
if(sdelim) {
*sdelim = 0;
}
for(; *start; start++) {
if(!(dictionary[*start / 32] & 1 << *start % 32)) {
break;
}
if(sdelim) {
*sdelim = *start;
}
}
if(*start == 0) {
if(nextpos != NULL) {
*nextpos = start;
}
*tok = NULL;
return 0;
}
for(scanner = start; *scanner; scanner++) {
if(dictionary[*scanner / 32] & 1 << *scanner % 32) {
break;
}
len++;
}
if(edelim) {
*edelim = *scanner;
}
if(nextpos != NULL) {
*nextpos = scanner;
}
*tok = (char*)malloc(sizeof(char) * (len + 1));
if(*tok == NULL) {
return 0;
}
memcpy(*tok, start, len);
*(*tok + len) = 0;
return len + 1;
}
The parameters are:
char* start, (pointer to the string)
char* delim, (pointer to the delimiters used to break up the string)
char** tok, a reference (using &) to a char* variable that will hold the toke
char** nextpos, a reference (using &) to a char* variable that will hold the position after the last token.
char* sdelim, a reference (using &) to a char variable that will hold the value of the -start delimiter
char* edelim, a reference (using &) to a char varaible that will hold the value of the end delimiter
The last three are optional.
Pass in the start address, the delimeter is a ", and pass reference to a char * to hold the actual middle string.
The result is a newly allocated string so you have to free it.
int get_string(FILE *in, char *temp)
{
char *token = NULL;
/* Get the first line */
fgets(temp, STRING_SIZE, in);
printf("temp before [ %s ]\n", temp);
/* Find the second quote */
int length = token(temp, "\"", &token, NULL, NULL, NULL)
// DO STUFF WITH THE TOKEN
printf("temp after [ %s ]\n", token);
// DO STUFF WITH THE TOKEN
// FREE IT!!!
free(token);
return TRUE;
}
The tokenizer is a multipurpose tool that can be used in a crap ton of places, this being a very small example.
Suppose
string="\"Simpson, Homer\""
then
string_without_quotes=string+1;
string_without_quotes[strlen(string)-2]='\0';
ready!

Resources