Simple question:
If i have a line in an input file that looks looks like:
Hello#Great#Day#Today
how can I scan in each word individually as its own array, in other words tell C to stop scanning when it reaches the # character and then go in the next iteration of the loop to scan the next word as a separate array?
This is assuming you are reading through stdin. Definitely take a look at #Whoz kick start approach as well (very similar to this).
What you would want to do is create a dynamic array and populate it with every byte read through stdin. You would then want to create an array of character pointers that will point to the first character in every "word", where you define a word as every character before a '#' character (delimiter). You would then iterate through that array of characters and populate the array of character pointers with the memory addresses of the first character in each word.
Use strtok() to tokenize your input by the specified character.
http://www.cplusplus.com/reference/cstring/strtok/
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str,"#");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, "#");
}
In two stages, I have used something like this:
#include <ansi_c.h>
//tokenizing a string
int GetCount(char *in, char *delim, int *m);
int GetStrings(char *in, char *delim, int count, char **out);
void main(void)
{
int count, maxlen, i;
char inpString[]={"Hello#Greatest#Day#Today"};
char *resultBuf[10];
//get a count of strings to store
count = GetCount(inpString, "#", &maxlen);
for(i=0;i<10;i++)
{
resultBuf[i] = calloc(maxlen+1, sizeof(char));
}
//Store strings in arrays
GetStrings(inpString, "#", count, resultBuf);
for(i=0;i<count;i++)
{
printf("%s\n", resultBuf[i]);
free(resultBuf[i];
}
}
//Gets count of tokens (delimited strings)
int GetCount(char *in, char *delim, int *m)
{
char *buf=0;
char temp1[10]={0};
char *inStr;
int count = 0;
int max = 0, keepMax = 0;
if(in)
{
inStr = calloc(strlen(in)+1, sizeof(char));
strcpy(inStr, in);
if(strlen(inStr) > 1)
{
count = 0;
buf = strtok(inStr, delim);
while(buf)
{
strcpy(temp1, buf);
max = strlen(temp1);
(max > keepMax)?(keepMax = max):(keepMax == keepMax);
count++;
buf = strtok(NULL, delim);
}
*m = keepMax;
}
free(inStr);
}
return count;
}
//Gets array of strings
int GetStrings(char *in, char *delim, int count, char **out)
{
char *buf=0;
char *inStr;
int i = 0;
if(in)
{
inStr = calloc(strlen(in)+1, sizeof(char));
strcpy(inStr, in);
if(strlen(inStr) > 1)
{
buf = strtok(inStr, delim);
while(buf)
{
strcpy(out[i], buf);
buf = strtok(NULL, delim);
i++;
}
}
free(inStr);
}
return 0;
}
Related
I want to write a program in C that displays each word of a whole sentence (taken as input) at a seperate line. This is what I have done so far:
void manipulate(char *buffer);
int get_words(char *buffer);
int main(){
char buff[100];
printf("sizeof %d\nstrlen %d\n", sizeof(buff), strlen(buff)); // Debugging reasons
bzero(buff, sizeof(buff));
printf("Give me the text:\n");
fgets(buff, sizeof(buff), stdin);
manipulate(buff);
return 0;
}
int get_words(char *buffer){ // Function that gets the word count, by counting the spaces.
int count;
int wordcount = 0;
char ch;
for (count = 0; count < strlen(buffer); count ++){
ch = buffer[count];
if((isblank(ch)) || (buffer[count] == '\0')){ // if the character is blank, or null byte add 1 to the wordcounter
wordcount += 1;
}
}
printf("%d\n\n", wordcount);
return wordcount;
}
void manipulate(char *buffer){
int words = get_words(buffer);
char *newbuff[words];
char *ptr;
int count = 0;
int count2 = 0;
char ch = '\n';
ptr = buffer;
bzero(newbuff, sizeof(newbuff));
for (count = 0; count < 100; count ++){
ch = buffer[count];
if (isblank(ch) || buffer[count] == '\0'){
buffer[count] = '\0';
if((newbuff[count2] = (char *)malloc(strlen(buffer))) == NULL) {
printf("MALLOC ERROR!\n");
exit(-1);
}
strcpy(newbuff[count2], ptr);
printf("\n%s\n",newbuff[count2]);
ptr = &buffer[count + 1];
count2 ++;
}
}
}
Although the output is what I want, I have really many black spaces after the final word displayed, and the malloc() returns NULL so the MALLOC ERROR! is displayed in the end.
I can understand that there is a mistake at my malloc() implementation, but I do not know what it is.
Is there another more elegant or generally better way to do it?
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
Take a look at this, and use whitespace characters as the delimiter. If you need more hints let me know.
From the website:
char * strtok ( char * str, const char * delimiters );
On a first call, the function expects a C string as argument for str, whose first character is used as the starting location to scan for tokens. In subsequent calls, the function expects a null pointer and uses the position right after the end of last token as the new starting location for scanning.
Once the terminating null character of str is found in a call to strtok, all subsequent calls to this function (with a null pointer as the first argument) return a null pointer.
Parameters
str
C string to truncate.
Notice that this string is modified by being broken into smaller strings (tokens).
Alternativelly [sic], a null pointer may be specified, in which case the function continues scanning where a previous successful call to the function ended.
delimiters
C string containing the delimiter characters.
These may vary from one call to another.
Return Value
A pointer to the last token found in string.
A null pointer is returned if there are no tokens left to retrieve.
Example
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
For the fun of it here's an implementation based on the callback approach:
const char* find(const char* s,
const char* e,
int (*pred)(char))
{
while( s != e && !pred(*s) ) ++s;
return s;
}
void split_on_ws(const char* s,
const char* e,
void (*callback)(const char*, const char*))
{
const char* p = s;
while( s != e ) {
s = find(s, e, isspace);
callback(p, s);
p = s = find(s, e, isnotspace);
}
}
void handle_word(const char* s, const char* e)
{
// handle the word that starts at s and ends at e
}
int main()
{
split_on_ws(some_str, some_str + strlen(some_str), handle_word);
}
malloc(0) may (optionally) return NULL, depending on the implementation. Do you realize why you may be calling malloc(0)? Or more precisely, do you see where you are reading and writing beyond the size of your arrays?
Consider using strtok_r, as others have suggested, or something like:
void printWords(const char *string) {
// Make a local copy of the string that we can manipulate.
char * const copy = strdup(string);
char *space = copy;
// Find the next space in the string, and replace it with a newline.
while (space = strchr(space,' ')) *space = '\n';
// There are no more spaces in the string; print out our modified copy.
printf("%s\n", copy);
// Free our local copy
free(copy);
}
Something going wrong is get_words() always returning one less than the actual word count, so eventually you attempt to:
char *newbuff[words]; /* Words is one less than the actual number,
so this is declared to be too small. */
newbuff[count2] = (char *)malloc(strlen(buffer))
count2, eventually, is always one more than the number of elements you've declared for newbuff[]. Why malloc() isn't returning a valid ptr, though, I don't know.
You should be malloc'ing strlen(ptr), not strlen(buf). Also, your count2 should be limited to the number of words. When you get to the end of your string, you continue going over the zeros in your buffer and adding zero size strings to your array.
Just as an idea of a different style of string manipulation in C, here's an example which does not modify the source string, and does not use malloc. To find spaces I use the libc function strpbrk.
int print_words(const char *string, FILE *f)
{
static const char space_characters[] = " \t";
const char *next_space;
// Find the next space in the string
//
while ((next_space = strpbrk(string, space_characters)))
{
const char *p;
// If there are non-space characters between what we found
// and what we started from, print them.
//
if (next_space != string)
{
for (p=string; p<next_space; p++)
{
if(fputc(*p, f) == EOF)
{
return -1;
}
}
// Print a newline
//
if (fputc('\n', f) == EOF)
{
return -1;
}
}
// Advance next_space until we hit a non-space character
//
while (*next_space && strchr(space_characters, *next_space))
{
next_space++;
}
// Advance the string
//
string = next_space;
}
// Handle the case where there are no spaces left in the string
//
if (*string)
{
if (fprintf(f, "%s\n", string) < 0)
{
return -1;
}
}
return 0;
}
you can scan the char array looking for the token if you found it just print new line else print the char.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char *s;
s = malloc(1024 * sizeof(char));
scanf("%[^\n]", s);
s = realloc(s, strlen(s) + 1);
int len = strlen(s);
char delim =' ';
for(int i = 0; i < len; i++) {
if(s[i] == delim) {
printf("\n");
}
else {
printf("%c", s[i]);
}
}
free(s);
return 0;
}
char arr[50];
gets(arr);
int c=0,i,l;
l=strlen(arr);
for(i=0;i<l;i++){
if(arr[i]==32){
printf("\n");
}
else
printf("%c",arr[i]);
}
I was having some difficulties when trying to print out the string pointer after dynamically insert a character at the front of char array.
The parameter *str is a dynamic char array from my main whereas the input is a single character which should append to the first element of the dynamic array after executing the insert().
int main(){
//code snippet. I removed other part to keep the question short
printf("How many characters do you want to input: ");
scanf("%d", &n);
str = malloc(n + 1);
printf("Input the string class: ");
scanf("%s", str);
//switch statement
case '1':
printf("What is the character you want to insert: ");
scanf(" %c", &input);
insert(&str, input);
break;
}
return 0;
}
void insert(char *str, char input) {
char *new_str;
int i, len = strlen(str);
new_str = malloc(len + 1);
new_str[0] = input;
strncpy(&new_str[1], str, len - 1);
new_str[len] = 0;
for (i = 0; i < len; i++) {
printf("%c", new_str[i]);
}
}
When I tried to loop thru the new_str and print out the string array, it gives me weird symbols and I have no idea what are they. Any ideas?
EDIT
The expected output as below:
How many characters do you want to input: 5
Input the string:datas
The string is: datas
Do you want to 1-insert or 2-remove or 3-quit?: 1
What is the character you want to insert: a
Resulting string: adata
The output I am getting:
Alternative version, avoiding any string copy functions. (Since, alter the strlen() you already know the string length to copy, you don't need any more string functions)
char * insert_a_character(char * str, char ch)
{
char * new;
size_t len;
if (!str) return NULL;
len = strlen(str);
new = malloc (1+len+1);
if (!new) retun NULL;
new[0] = ch;
memcpy(new+1, str, len);
new[len+1] = 0;
return new;
}
I assume that the caller will free if required for orig
char * insert(char *orig, char input) {
char * new_str = malloc(strlen(orig) + 2); // An extra one for null
strcpy(new_str + 1, orig);
new_str[0] = input;
printf("%s", new_str); // To print it out
return new_str; // The caller needs to free this;
}
That should work.
Assembling all comments:
void insert(char *str, char input) {
char *new_str;
int i, len = strlen(str);
new_str = malloc(len + 2);
new_str[0] = input;
strcpy(new_str+1, str);
new_str[len+1] = 0;
for (i = 0; i <= len; i++) {
printf("%c", new_str[i]);
}
}
Of course you still need to do something with the new string, such as returning it or freeing it.
I have this code which will remove the first occurrence of the word from the sentence:
#include "stdio.h"
#include "string.h"
int delete(char *source, char *word);
void main(void) {
char sentence[500];
char word[30];
printf("Please enter a sentence. Max 499 chars. \n");
fgets(sentence, 500, stdin);
printf("Please enter a word to be deleted from sentence. Max 29 chars. \n");
scanf("%s", word);
delete(sentence, word);
printf("%s", sentence);
}
int delete(char *source, char *word) {
char *p;
char temp[500], temp2[500];
if(!(p = strstr(source, word))) {
printf("Word was not found in the sentence.\n");
return 0;
}
strcpy(temp, source);
temp[p - source] = '\0';
strcpy(temp2, p + strlen(word));
strcat(temp, temp2);
strcpy(source, temp);
return 1;
}
How would I modify it to delete all occurrences of the word in the given sentence? Can i still use the strstr function in this case?
Thanks for the help!
Open to completely different ways of doing this too.
P.S. This might sound like a homework question, but it's actually a past midterm question which I'd like to resolve to prepare for my midterm!
As a side question, if I use fgets(word, 30, stdin) instead of scanf("%s", word), it no longer works and tells me that the word was not found in the sentence. Why?
Try the following
#include <stdio.h>
#include <string.h>
size_t delete( char *source, const char *word )
{
size_t n = strlen( word );
size_t count = 0;
if ( n != 0 )
{
char *p = source;
while ( ( p = strstr( p, word ) ) != NULL )
{
char *t = p;
char *s = p + n;
while ( ( *t++ = *s++ ) );
++count;
}
}
return count;
}
int main( void )
{
char s[] = "abxabyababz";
printf( "%zu\n", delete( s, "ab" ) );
puts( s );
return 0;
}
The output is
4
xyz
As for the question about fgets then it includes the new line character in the string. You have to remove it from the string.
How would I modify it to delete all occurrences of the word in the given sentence?
There are many ways, as you have suggested, and since you are Open to completely different ways of doing this too...
Here is a different idea:
A sentence uses white space to separate words. You can use that to help solve the problem. Consider implementing these steps using fgets(), strtok() and strcat() to break apart the string, and reassemble it without the string to remove.
0) create line buffer sufficient length to read lines from file
(or pass in line buffer as an argument)
1) use while(fgets(...) to get new line from file
2) create char *buf={0};
3) create char *new_str; (calloc() memory to new_str >= length of line buffer)
4) loop on buf = strtok();, using " \t\n" as the delimiter
Inside loop:
a. if (strcmp(buf, str_to_remove) != 0) //approve next token for concatenation
{ strcat(new_str, buf); strcat(new_str, " ");}//if not str_to_remove,
//concatenate token, and a space
5) free allocated memory
new_str now contains sentence without occurrences of str_to_remove.
Here is a demo using this set of steps (pretty much)
int delete(char *str, char *str_to_remove)
{
char *buf;
char *new_str;
new_str = calloc(strlen(str)+1, sizeof(char));
buf = strtok(str, " \t\n");
while(buf)
{
if(strcmp(buf, str_to_remove) != 0)
{
strcat(new_str, buf);
strcat(new_str, " ");
}
buf = strtok(NULL, " \t\n");
}
printf("%s\n", new_str);
free(new_str);
getchar();
return 0;
}
int main(void)
{
delete("this sentence had a withh bad withh word", "withh");
return 0;
}
I need to split a char array into CSV's. Actually we can do the reverse of it using strtok() like:
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="This,a,sample,string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str,",");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, ",");
}
return 0;
}
But in my case, there's an char array suppose char bits[1024]="abcdefghijklmn". I need to get the output as a,b,c,d,e,f,g,h,i,j,k,m,n.
Is there any function or library to do this i.e. in terms of raw meaning, for every character it has to put a comma.
Just iterate over the string until you hit the end-of-string '\0' character. Or use the length of the data in the array (which may be smaller than the array size) and use a simple for loop.
This works for a null terminated string. But it will leave a dangling comma at the end.
void tokenise(char *s, char *d)
{
while(*d++ = *s++) *d++ = ',';
}
If you know the length of the string already, you can pass that through. This will not leave a dangling comma.
void tokenise(char *s, char *d, int length)
{
int i = 0;
while((*d++ = *s++) && ((i++)<(length-1))) *d++ = ',';
}
In both examples, s is a pointer to the source string and d points to the output tokenised string. It is up to the calling code to ensure the buffer d points to is sufficiently large.
you can use this simple function from old basic :
// ............................................................. string word at
char * word_at(char *tString, int upTo, char *dilim) {
int wcount;
char *rString, *temp;
temp= (char *) malloc(sizeof(char) * (strlen(tString)+1));
strcpy(temp, tString);
rString= strtok(temp, dilim);
wcount=1;
while (rString != NULL){
if (wcount==upTo) {
return rString;
}
rString= strtok(NULL, dilim);
wcount++;
}
return tString ;
}
parameter : string , index and character delimiter
return : word : ( char *)
If you find easy to implement it, then this could help you to start
char* split_all( char arr[], char ch )
{
char *new, *ptr;
new = ptr = calloc( 1, 2*strlen( arr ) ); // FIXME : Error checks
for( ; *(arr + 1) ; new++, arr++ )
{
*new = *arr;
new++;
*new = ch;
}
*new = *arr;
return ptr;
}
You can re-use, optimize this for your requirement. Its a quick and dirty solution, feel free to fix it..
I want to write a program in C that displays each word of a whole sentence (taken as input) at a seperate line. This is what I have done so far:
void manipulate(char *buffer);
int get_words(char *buffer);
int main(){
char buff[100];
printf("sizeof %d\nstrlen %d\n", sizeof(buff), strlen(buff)); // Debugging reasons
bzero(buff, sizeof(buff));
printf("Give me the text:\n");
fgets(buff, sizeof(buff), stdin);
manipulate(buff);
return 0;
}
int get_words(char *buffer){ // Function that gets the word count, by counting the spaces.
int count;
int wordcount = 0;
char ch;
for (count = 0; count < strlen(buffer); count ++){
ch = buffer[count];
if((isblank(ch)) || (buffer[count] == '\0')){ // if the character is blank, or null byte add 1 to the wordcounter
wordcount += 1;
}
}
printf("%d\n\n", wordcount);
return wordcount;
}
void manipulate(char *buffer){
int words = get_words(buffer);
char *newbuff[words];
char *ptr;
int count = 0;
int count2 = 0;
char ch = '\n';
ptr = buffer;
bzero(newbuff, sizeof(newbuff));
for (count = 0; count < 100; count ++){
ch = buffer[count];
if (isblank(ch) || buffer[count] == '\0'){
buffer[count] = '\0';
if((newbuff[count2] = (char *)malloc(strlen(buffer))) == NULL) {
printf("MALLOC ERROR!\n");
exit(-1);
}
strcpy(newbuff[count2], ptr);
printf("\n%s\n",newbuff[count2]);
ptr = &buffer[count + 1];
count2 ++;
}
}
}
Although the output is what I want, I have really many black spaces after the final word displayed, and the malloc() returns NULL so the MALLOC ERROR! is displayed in the end.
I can understand that there is a mistake at my malloc() implementation, but I do not know what it is.
Is there another more elegant or generally better way to do it?
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
Take a look at this, and use whitespace characters as the delimiter. If you need more hints let me know.
From the website:
char * strtok ( char * str, const char * delimiters );
On a first call, the function expects a C string as argument for str, whose first character is used as the starting location to scan for tokens. In subsequent calls, the function expects a null pointer and uses the position right after the end of last token as the new starting location for scanning.
Once the terminating null character of str is found in a call to strtok, all subsequent calls to this function (with a null pointer as the first argument) return a null pointer.
Parameters
str
C string to truncate.
Notice that this string is modified by being broken into smaller strings (tokens).
Alternativelly [sic], a null pointer may be specified, in which case the function continues scanning where a previous successful call to the function ended.
delimiters
C string containing the delimiter characters.
These may vary from one call to another.
Return Value
A pointer to the last token found in string.
A null pointer is returned if there are no tokens left to retrieve.
Example
/* strtok example */
#include <stdio.h>
#include <string.h>
int main ()
{
char str[] ="- This, a sample string.";
char * pch;
printf ("Splitting string \"%s\" into tokens:\n",str);
pch = strtok (str," ,.-");
while (pch != NULL)
{
printf ("%s\n",pch);
pch = strtok (NULL, " ,.-");
}
return 0;
}
For the fun of it here's an implementation based on the callback approach:
const char* find(const char* s,
const char* e,
int (*pred)(char))
{
while( s != e && !pred(*s) ) ++s;
return s;
}
void split_on_ws(const char* s,
const char* e,
void (*callback)(const char*, const char*))
{
const char* p = s;
while( s != e ) {
s = find(s, e, isspace);
callback(p, s);
p = s = find(s, e, isnotspace);
}
}
void handle_word(const char* s, const char* e)
{
// handle the word that starts at s and ends at e
}
int main()
{
split_on_ws(some_str, some_str + strlen(some_str), handle_word);
}
malloc(0) may (optionally) return NULL, depending on the implementation. Do you realize why you may be calling malloc(0)? Or more precisely, do you see where you are reading and writing beyond the size of your arrays?
Consider using strtok_r, as others have suggested, or something like:
void printWords(const char *string) {
// Make a local copy of the string that we can manipulate.
char * const copy = strdup(string);
char *space = copy;
// Find the next space in the string, and replace it with a newline.
while (space = strchr(space,' ')) *space = '\n';
// There are no more spaces in the string; print out our modified copy.
printf("%s\n", copy);
// Free our local copy
free(copy);
}
Something going wrong is get_words() always returning one less than the actual word count, so eventually you attempt to:
char *newbuff[words]; /* Words is one less than the actual number,
so this is declared to be too small. */
newbuff[count2] = (char *)malloc(strlen(buffer))
count2, eventually, is always one more than the number of elements you've declared for newbuff[]. Why malloc() isn't returning a valid ptr, though, I don't know.
You should be malloc'ing strlen(ptr), not strlen(buf). Also, your count2 should be limited to the number of words. When you get to the end of your string, you continue going over the zeros in your buffer and adding zero size strings to your array.
Just as an idea of a different style of string manipulation in C, here's an example which does not modify the source string, and does not use malloc. To find spaces I use the libc function strpbrk.
int print_words(const char *string, FILE *f)
{
static const char space_characters[] = " \t";
const char *next_space;
// Find the next space in the string
//
while ((next_space = strpbrk(string, space_characters)))
{
const char *p;
// If there are non-space characters between what we found
// and what we started from, print them.
//
if (next_space != string)
{
for (p=string; p<next_space; p++)
{
if(fputc(*p, f) == EOF)
{
return -1;
}
}
// Print a newline
//
if (fputc('\n', f) == EOF)
{
return -1;
}
}
// Advance next_space until we hit a non-space character
//
while (*next_space && strchr(space_characters, *next_space))
{
next_space++;
}
// Advance the string
//
string = next_space;
}
// Handle the case where there are no spaces left in the string
//
if (*string)
{
if (fprintf(f, "%s\n", string) < 0)
{
return -1;
}
}
return 0;
}
you can scan the char array looking for the token if you found it just print new line else print the char.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
char *s;
s = malloc(1024 * sizeof(char));
scanf("%[^\n]", s);
s = realloc(s, strlen(s) + 1);
int len = strlen(s);
char delim =' ';
for(int i = 0; i < len; i++) {
if(s[i] == delim) {
printf("\n");
}
else {
printf("%c", s[i]);
}
}
free(s);
return 0;
}
char arr[50];
gets(arr);
int c=0,i,l;
l=strlen(arr);
for(i=0;i<l;i++){
if(arr[i]==32){
printf("\n");
}
else
printf("%c",arr[i]);
}