How to delete Text in C? - c

Here is basically the problem. I am given a huge file. A text, that has a lot of blank spaces. I must write a program that removes the blank spaces, creates lines of exactly 80 characters long without splitting any word, and it will align the text to left and right simultaneously (justify text); The text is justified by placing additional spaces between words so that the line will end with a word and start with word, being exactly 80 chars long.
Yes this is a homework, but I am allowed to get any kind of online help. My code this far is able to do everything but align the text (justify):
Code:
#include <stdio.h>
#include "catalin.h"
int main()
{
char text[145000], blank[1450000],c;
FILE *input, *output;
int n,f=80,i=0,j,l;
input = fopen("asimov.in", "r");
while ((c=fgetc(input))!=EOF){
if (c=='\n') c=' ';
text[i]=c;
i++;
}
fclose(input);
blankremove(text,blank);
wrap(blank,f);
l=lenght(blank);
output = fopen("out.out", "w");
fprintf(output,blank);
}
int blankremove(char text[], char blank[])
{
int c = 0, d = 0;
while (text[c] != '\0') {
if (text[c] == ' ') {
int temp = c + 1;
if (text[temp] != '\0') {
while (text[temp] == ' ' && text[temp] != '\0') {
if (text[temp] == ' ') {
c++;
}
temp++;
}
}
}
blank[d] = text[c];
c++;
d++;
}
blank[d] = '\0';
}
void wrap(char s[], const int wrapline)
{
int i, k, wraploc, lastwrap;
lastwrap = 0;
wraploc = 0; //catalin
for (i = 0; s[i] != '\0'; ++i, ++wraploc) {
if (wraploc >= wrapline) {
for (k = i; k > 0; --k) {
// posibil are overflow
if (k - lastwrap <= wrapline && s[k] == ' ') {
s[k] = '\n';
lastwrap = k+1;
break;
}
}
wraploc = i-lastwrap;
}
}
for (i = 0; i < wrapline; ++i) printf(" ");
printf("|\n");
}
All I need is some help on creating a function that justifies the text. "justified—text is aligned along the left margin, and letter- and word-spacing is adjusted so that the text falls flush with both margins, also known as fully justified or full justification;" The spaces created when doing justification should be placed uniformly. No libraries should be used other than the default.

Ignoring the many bugs in your existing code, you need to think about what you're trying to achieve.
Think about a more simple example to start with. Say your source text is "Hello world" and you're justifying it to a width of 15. "Hello world" is 11 characters long, which is 4 less than we need. There is 1 space in the string, so you know you need to make that space become 5 spaces so that it becomes "Hello world".
Next example: "I like bees!" - that is 12 characters, but it has 2 spaces and you need an extra 3 spaces in there. One of those spaces has to become 2 spaces and the other 3 spaces to fill out the 15 characters.
So your code needs to firstly, count how many spaces are in the line you're currently working with. You can do that whilst you're working out where to wrap the line and also if you track where the last space is, you don't then need to back track to find it again.
Secondly, know how many extra characters it needs to pad it out by.
And finally find the spaces within the line and add extra spaces evenly amongst them. You'd be better off working with a new string at this point because whilst it's possible to insert spaces into s, it's complicated and more likely to introduce more bugs.

Using fscanf will read words and exclude whitespace.
Then add words while the length of the line is less than 80.
Add extra spaces to right justify the line.
#include <stdio.h>
int len ( char *str);
char *cat ( char *to, char *from);
char *cpy ( char *to, char *from);
char *lastchr ( char *str, int ch);
char *justify ( char *str, int wordcount, int width);
int main( void) {
char word[100] = "";
char line[100] = "";
char filenamein[] = "asimov.in";
char filenameout[] = "out.out";
int length = 0;
int wordcount = 0;
int pending = 0;
FILE *pfin = NULL;
FILE *pfout = NULL;
if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
perror ( filenamein);
return 0;
}
if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
fclose ( pfin);
perror ( filenameout);
return 0;
}
while ( 1 == fscanf ( pfin, "%99s", word)) {//read a word from file. will exclude whitespace
length = len ( word);
if ( 80 > len ( line) + length) {//add to line if it will fit
cat ( line, word);
cat ( line, " ");
wordcount++;//needed in case more than one extra space per word
pending = 1;
}
else {//adding last word would be more than 80
justify ( line, wordcount, 80);
fprintf ( pfout, "%s\n", line);
cpy ( line, word);//copy pending word to line
cat ( line, " ");//add a space
wordcount = 1;//reset wordcount
pending = 0;
}
}
if ( pending) {
justify ( line, wordcount, 80);
fprintf ( pfout, "%s\n", line);
}
fclose ( pfin);
fclose ( pfout);
return 0;
}
int len ( char *str) {
int length = 0;
while ( *str) {//not at terminating zero
length++;
str++;
}
return length;
}
char *cat ( char *to, char *from) {
char *start = to;
while ( *to) {//not at terminating zero
to++;
}
while ( *from) {
*to = *from;//assign from to to
to++;
from++;
}
*to = 0;//terminate
return start;
}
char *cpy ( char *to, char *from) {
*to = 0;//set first character of to as terminating zero
cat ( to, from);
return to;
}
char *lastchr ( char *str, int ch) {
char *found = NULL;
while ( *str) {//not at terminating zero
if ( ch == *str) {
found = str;//set pointer
}
str++;//keep searching
}
return found;//return NULL or last found match
}
char *justify ( char *str, int wordcount, int width) {
int length = 0;
int addspaces = 0;
int extraspace = 0;
char *space = lastchr ( str, ' ');//find the last space
*space = 0;//set it to terminate the line
space--;//deduct one
length = len ( str);
addspaces = width - length;//difference is number of spaces needed
extraspace = addspaces / wordcount;//may need more than one extra space
char *end = space + addspaces;
while ( addspaces) {
*end = *space;//shift characters toward end
if ( ' ' == *space) {//found a space
for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
end--;
*end = ' ';
addspaces--;
if ( ! addspaces) {
break;//do not need to add more spaces
}
}
}
end--;
space--;
if ( space <= str) {//reached the start of the line
break;
}
}
return str;
}
EDIT:
#include <stdio.h>
#define WIDTH 80
#define SIZE ( WIDTH + 20)
int len ( char *str);
char *cat ( char *to, char *from);
char *cpy ( char *to, char *from);
char *lastchr ( char *str, int ch);
char *justify ( char *str, int wordcount, int width);
int scanword ( FILE *pfread, int size, char *word);
int main( void) {
char word[SIZE] = "";
char line[SIZE] = "";
char filenamein[] = "asimov.in";
char filenameout[] = "out.out";
int length = 0;
int wordcount = 0;
int pending = 0;
//int paragraph = 1;
FILE *pfin = NULL;
FILE *pfout = NULL;
if ( NULL == ( pfin = fopen ( filenamein, "r"))) {
perror ( filenamein);
return 0;
}
if ( NULL == ( pfout = fopen ( filenameout, "w"))) {
fclose ( pfin);
perror ( filenameout);
return 0;
}
while ( 1 == scanword ( pfin, WIDTH, word)) {//read a word from file
length = len ( word);
if ( '\n' != word[0] && WIDTH > len ( line) + length) {//add to line if it will fit
if ( 0 != word[0]) {
cat ( line, word);
cat ( line, " ");
wordcount++;//needed in case more than one extra space per word
pending = 1;//a line is pending
}
}
else {//paragraph or adding last word would be more than 80
if ( len ( line)) {//line has content
justify ( line, wordcount, WIDTH);
fprintf ( pfout, "%s\n", line);
//paragraph = 1;//could have a blank line
}
if ( /*paragraph &&*/ '\n' == word[0]) {
fprintf ( pfout, "\n");//print a blank line for paragraph
//paragraph = 0;//only allow one blank line
}
line[0] = 0;
wordcount = 0;//reset wordcount
if ( 0 != word[0] && '\n' != word[0]) {//word is not empty and is not newline
cpy ( line, word);//copy pending word to line
cat ( line, " ");//add a space
wordcount = 1;//reset wordcount
}
pending = 0;//nothing pending
}
}
if ( pending) {//print pending line
if ( len ( line)) {//line has content
justify ( line, wordcount, WIDTH);
fprintf ( pfout, "%s\n", line);
}
}
fclose ( pfin);
fclose ( pfout);
return 0;
}
int scanword ( FILE *pfread, int size, char *word) {
static int nl = 0;//static to retain value between function calls
int ch = 0;
int max = size - 1;//max characters that can fit in word and leave one to terminate
*word = 0;//first character. zero terminate. empty line
while ( max && ( ch = fgetc ( pfread))) {//read a character until max is zero
if ( EOF == ch) {//end of file
if ( max == size - 1) {
return 0;//no other characters read
}
return 1;//process the other characters that were read
}
if ( '\n' == ch) {//read a newline
if ( '\n' == nl) {//consecutive newlines
*word = nl;
word++;
*word = 0;
//nl = 0;//reset since just had two consceutive newlines
return 1;
}
nl = ch;//set for first single newline
return 1;
}
nl = 0;//reset to zero as prior character was not newline
if ( ' ' == ch || '\t' == ch) {//read space or tab
if ( max == size - 1) {//no characters in word so far
continue;//consume leading space and tab
}
return 1;//process the word read
}
*word = ch;//assign character to word
word++;//increment pointer to next character
*word = 0;//zero terminate
max--;//deduct. one less charater can be read into word
}
return 0;
}
int len ( char *str) {
int length = 0;
while ( *str) {//character pointed to is not terminating zero
length++;
str++;//increment pointer to point to next character
}
return length;
}
char *cat ( char *to, char *from) {
char *iterate = to;
while ( *iterate) {//character pointed to is not terminating zero
iterate++;//increment pointer to point to next character
}
while ( *from) {//character pointed to is not terminating zero
*iterate = *from;//assign from to iterate
iterate++;//increment pointer to point to next character
from++;
}
*iterate = 0;//terminate
return to;
}
char *cpy ( char *to, char *from) {
*to = 0;//set first character of to as terminating zero
cat ( to, from);
return to;
}
char *lastchr ( char *str, int ch) {
char *found = NULL;
while ( *str) {//character pointed to is not terminating zero
if ( ch == *str) {//character pointed to matches ch
found = str;//assign pointer str to found
}
str++;//increment pointer to point to next character. keep searching
}
return found;//return NULL or pointer to last found match
}
char *justify ( char *str, int wordcount, int width) {
int length = 0;
int addspaces = 0;
int extraspace = 0;
char *space = lastchr ( str, ' ');//find the last space
*space = 0;//set it to terminate the line
space--;//deduct one
length = len ( str);
addspaces = width - length;//difference is number of spaces needed
extraspace = addspaces;//may need more than one extra space
if ( wordcount > 2) {
extraspace = addspaces / ( wordcount - 1);//may need more than one extra space
}
char *end = space + addspaces;//set pointer end to point beyond wheree space points
while ( addspaces) {//stop when addspaces is zero
*end = *space;//assign character pointed to by space to the location pointed to by end
if ( ' ' == *space) {//found a space
for ( int each = 0; each <= extraspace; ++each) {//will add at least one space
end--;
*end = ' ';
addspaces--;
if ( ! addspaces) {
break;//do not need to add more spaces
}
}
}
end--;
space--;
if ( space <= str) {//reached the start of the line
break;
}
}
return str;
}

Related

What causes the crash while using strcat and and memset?

I am trying to implement a split funtion, which receives an array of chars and a delimiter that will decide what chars go to each array in the split array.
I have a problem with strcat and memset, can somone explain my mistakes to me?
char** split(const char* str, char delimiter)
{
int ch=0;
int word=0;
const char * zero="\0";
unsigned int size=num_items(str,delimiter);
/* get the size of split[][] */
char** split= calloc(size+1,sizeof(char*));
for(int i=0; i<strlen(str); i++)
{
if(ch==0)
{
memset(split[word],'\0',1);
/* set the first value to '\0' */
ch++;
}
if(str[i]!=delimiter)
{
/* adding char by char to the split */
strcat(split[word],&str[i]);
ch++;
}else{
ch=0;
word++;
}
}
return split;
}
Memory needs to be allocated for the pointers and the strings they point to.
For a single character, it can be assigned directly. No need for strcat and strcat expects pointers to zero terminated strings.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char** split(const char* str, char delimiter)
{
char** split= NULL;
int ch=0;
unsigned int size=0;
size_t len = strlen ( str);
for(int i=0; i<len; i++)
{
if(ch==0)
{
char** temp= realloc(split, sizeof *split * (size+2));//allocate pointer
if ( ! temp) {
fprintf ( stderr, "problem malloc\n");
return split;
}
split = temp;
split[size] = calloc ( 1, len + 1);//allocate for string
if ( ! split[size]) {
fprintf ( stderr, "problem calloc\n");
return split;
}
split[size + 1] = NULL;//sentinel
}
if(str[i]!=delimiter)
{
split[size][ch] = str[i];//assign character
ch++;
}else{
size_t length = strlen ( split[size]);
char *tmp = realloc ( split[size], length + 1);//reallocate to exact size
if ( ! tmp) {
fprintf ( stderr, "problem realloc\n");
return split;
}
ch=0;
size++;
}
}
return split;
}
int main ( void) {
char **words = NULL;
char *text = "a bc def ghij klmno pqr st u v wzyx";
char space = ' ';
words = split ( text, space);
int each = 0;
while ( words && words[each]) {
printf ( "%s\n", words[each]);
++each;
}
each = 0;
while ( words && words[each]) {
free ( words[each]);
++each;
}
free ( words);
return 0;
}

searching for EXACT string with spaces in . txt file from C program

struct Book {
char *title;
char *authors;
unsigned int year;
unsigned int copies;
};
int existance_of_book(char title[])
{
char string[30];
ptr_to_library = fopen("library.txt", "r");
if(ptr_to_library == NULL)
{
printf("\nERROR: cannot open file\n");
return -1;
}
while (fgets(title, sizeof(title), ptr_to_library) != NULL)
{
if(strstr(string, title)!=0)
{
printf("book found\n");
return 1;
}
}
return 0;
}
I am Trying to search for a string in a file, but since the string I will be searching for has a space in it this function is unable to find the string. this function will also find a match if for example the string in the .txt file reads "hello", and the string entered in the function is "he". Is there a way to search for the exact string in a file even if there are spaces
Use strstr to find the sub-string.
Check that the sub-string is either at the beginning of the line or is preceded by punctuation or whitespace.
Also check that the sub-string is either at the end of the line or trailed by punctuation or whitespace.
If fgets is used to obtain the sub-string to find, be sure to use strcspn to remove the trailing newline. In the line from the file, the trailing newline should not matter, but this code uses strcspn to remove it.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define SIZE 1024
int main ( void) {
char find[SIZE] = "he";
char line[SIZE] = "";
char const *filename = "library.txt";
char *match = NULL;
FILE *pf = NULL;
if ( NULL == ( pf = fopen ( filename, "r"))) {
perror ( filename);
exit ( EXIT_FAILURE);
}
int length = strlen ( find);
while ( fgets ( line, SIZE, pf)) {//read lines until end of file
line[strcspn ( line, "\n")] = 0;//remove newline
char *temp = line;
while ( ( match = strstr ( temp, find))) {//look for matches
if ( match == line //first of line
|| ispunct ( (unsigned char)*(match - 1))
|| isspace ( (unsigned char)*(match - 1))) {
if ( 0 == *(match + length)//end of line
|| ispunct ( (unsigned char)*(match + length))
|| isspace ( (unsigned char)*(match + length))) {
printf ( "found %s in %s\n", find, line);
break;//found a match
}
}
temp = match + 1;//advance temp and check again for matches.
}
}
fclose ( pf);
return 0;
}

In C, trying to remove extra whiteSpace in char* and replace with only one space between words

I am trying to remove unnecessary whitespace from my char* for future use. Basically, I want to only have one space between words and remove any additional spaces, tabs, or new line characters in between words. The current code I have almost works I believe, but I am unable to read the memory of the individual characters I am storing in my array. Also any solution must not have a maximum character size so if dynamic memory allocation is needed that would need to be considered as well. Is there a way to get this working? Thanks
EDIT 1: trailing and leading spaces should also be removed. Thanks to #Vlad from Moscow for the clarification
int main()
{
char* fileString1;
fileString1=removeAdditionalWhiteSpace(fileString1);
}
char* removeAdditionalWhiteSpace(char* wordList)
{
char characterHolder;
char* finalList[strlen(wordList)];
char* delimeter = wordList;
int i = 0;
do
{
finalList[i] += characterHolder;
char* hasSpace = NULL;
while (*delimeter == ' ' || *delimeter == '\n' || *delimeter == '\t')
{
if(*delimeter == ' ')
{
if(hasSpace==NULL)
{
hasSpace = delimeter;
characterHolder = *delimeter;
}
else
{
++delimeter;
}
}
else if(*delimeter == '\n' || *delimeter == '\t')
{
*delimeter = ' ';
if(hasSpace==NULL)
{
hasSpace = delimeter;
characterHolder = *delimeter;
}
else
{
++delimeter;
}
}
}
hasSpace=NULL;
characterHolder = *delimeter;
i++;
}
while( (*wordList++ = *delimeter++) );
return *finalList;
}
Your function does not make sense and has undefined behavior.
For example the variable characterHolder was not initialized and it is added to pointer finalList[i]
char characterHolder; // <===
char* finalList[strlen(wordList)];
char* delimeter = wordList;
int i = 0;
do
{
finalList[i] += characterHolder; // <===
//….
If you need to remove redundant white spaces from a string including its leading and trailing white spaces then the function can look as it is shown in the demonstrative program below.
#include <stdio.h>
#include <ctype.h>
char * remove_duplicate_spaces( char *s )
{
char *src = s, *dsn = s;
while ( isspace( ( unsigned char )*src ) ) ++src;
do
{
char c = *src;
if ( isspace( ( unsigned char )c ) ) c = ' ';
if ( c == ' ' )
{
while ( isspace( ( unsigned char ) *++src ) );
if ( *src )
{
*dsn++ = c;
}
}
*dsn++ = *src;
} while ( *src++ );
return s;
}
int main(void)
{
char s[] = "\t\tIt is\t\ta very\nlong\nstring.\t\t";
printf( "\"%s\"\n", s );
printf( "\"%s\"\n", remove_duplicate_spaces( s ) );
return 0;
}
Its output is
" It is a very
long
string. "
"It is a very long string."
Man that looks super complicated.
Here's a simple function to remove whitespace from the string:" This is a test \t of some \n Extra white space. "
#include <stdio.h>
#include <ctype.h>
void removeWS(char* text)
{
char* d = text;
while (isspace(*text)) ++text;
while(*text)
{
*d = isspace(*text)? ' ' : *text;
text++;
while (isspace(*d) && isspace(*text)) ++text;
if (*text) d++;
}
*d = *text;
}
int main(void) {
char text[] = " This is a test \t of some \n Extra white space. ";
removeWS(text);
printf("%s\n", text);
return 0;
}
Sample Output:
Success #stdin #stdout 0s 4284KB
This is a test of some Extra white space.
Minimalistic approach:
size_t delspaces(char * str)
{
size_t src, dst;
size_t cnt;
for (cnt=src=dst=0; str[dst] = str[src++]; ) {
if (isspace(str[dst])) {
if (dst && !cnt++) str[dst++] = ' '
continue;
}
cnt=0;
dst++;
}
// remove trailing spaces
while (dst && isspace(str[dst-1])) str[--dst] = 0;
// return the string length of the resulting string
// (which could be useful for the caller)
return dst;
}
Final note: the last while() could be an if(), since there can be only one trailig space.

Merge two specific columns from a line in C language

I want to merge two specific columns from a line in C language. The line is like "hello world hello world". It consists of some words and some white space. The below is my code. In this function, c1 and c2 represent the number of the column, and array key is mergeed string. But it's not good to run.
char *LinetoKey(char *line, int c1, int c2, char key[COLSIZE]){
char *col2 = (char *)malloc(sizeof(char));
while (*line != '\0' && isspace(*line) )
line++;
while(*line != '\0' && c1 != 0){
if(isspace(*line)){
while(*line != '\0' && isspace(*line))
line++;
c1--;
c2--;
}else
line++;
}
while (*line != '\0' && *line != '\n' && (isspace(*line)==0))
*key++ = *line++;
*key = '\0';
while(*line != '\0' && c2 != 0){
if(isspace(*line)){
while(*line != '\0' && isspace(*line))
line++;
c2--;
}else
line++;
}
while (*line != '\0' && *line != '\n' && isspace(*line)==0)
*col2++ = *line++;
*col2 = '\0';
strcat(key,col2);
return key;
}
Here's a possible solution using strtok(). It can handle an arbitrary number of columns (increase the size of buf if necessary), and will still work if the order of the columns is reversed (i.e. c1 > c2). The function returns 1 on success (tokens successfully merged), 0 otherwise.
Note that strtok() modifies its argument - so I have copied input to a temporary buffer, char buf[64].
/*
* Merge space-separated 'tokens' in a string.
* Columns are zero-indexed.
*
* Return: 1 on success, 0 on failure
*/
int merge_cols(char *input, int c1, int c2, char *dest) {
char buf[64];
int col = 0;
char *tok = NULL, *first = NULL, *second = NULL, *tmp = NULL;
if (c1 == c2) {
fprintf(stderr, "Columns can not be the same !");
return 0;
}
if (strlen(input) > sizeof(buf) - 1) return 0;
/*
* strtok() is _destructive_, so copy the input to
* a buffer.
*/
strcpy(buf, input);
tok = strtok(buf, " ");
while (tok) {
if (col == c1 || col == c2) {
if (!first)
first = tok;
else if (first && !second)
second = tok;
}
if (first && second) break;
tok = strtok(NULL, " ");
col++;
}
// In case order of columns is swapped ...
if (c1 > c2) {
tmp = second;
second = first;
first = tmp;
}
if (first) strcpy(dest, first);
if (second) strcat(dest, second);
return first && second;
}
Sample usage:
char *input = "one two three four five six seven eight";
char dest[128];
// The columns can be reversed ...
int merged = merge_cols(input, 7, 1, dest);
if (merged)
puts(dest);
Note also that it is very easy to use different delimiters when using strtok() - so if you wanted to use comma- or tab-separated input instead of spaces, you just change the second argument when calling it.
It is not clear what you're trying to do. If what David Collins suggested is what you're looking for - word concatenation by word index - here's a starting point (demo):
Your function must minimize the number of string traversals. To help this, the code below is using char** instead of char* (sort of "char streams").
The function must be able to count the number of characters the result will have, prior to actual concatenation, in order to be able to allocate the destination string on free store. If catwords is called with null destination, it only counts the length of the result string.
Regarding the actual implementation, you will have to traverse the string word by word and decide whether to copy or skip the word. See the code below for the following functions:
nextword - skips white-spaces until it finds a non-white-space character.
copyword - copies the current word if destination is valid or skips it if not. It returns the number of copied/skipped characters.
#include <ctype.h>
#include <stdlib.h>
void nextword( const char** ps )
{
while ( **ps && isspace( **ps ) )
++*ps;
}
int copyword( char** const pd, const char** ps )
{
// remember the starting point
const char* b = *ps;
// actual copy
if ( pd && *pd ) while ( **ps && !isspace( **ps ) )
*( *pd )++ = *( *ps )++;
// skip the word (no destination)
else while ( **ps && !isspace( **ps ) )
( *ps )++;
// return the length
return *ps - b;
}
int catwords( char* d, const char* s, const int* c )
{
int len = 0;
int iw = 0;
int ic = 0;
const char** ps = &s;
char** pd = &d;
for ( nextword( ps ); **ps && c[ ic ] > -1; nextword( ps ), ++iw )
if ( iw == c[ ic ] )
{
len += copyword( pd, ps );
++ic;
}
else
{
copyword( 0, ps ); // just skip the current word
}
if ( d )
**pd = '\0';
return len;
}
int main()
{
// static buffer test
{
char d[ 1024 ];
int t[] = { 0, 3, -1 };
catwords( d, "Hello world. Hello world!", t );
puts( d );
}
// dynamic buffer test
{
const char* s = "The greatness of a man is not in how much wealth he acquires, but in his integrity and his ability to affect those around him positively.";
int t[] = { 1, 5, 16, -1 };
int dstcharcount = catwords( 0, s, t ) + 1;
char* d = (char*)malloc( dstcharcount * sizeof( char ) );
catwords( d, s, t );
puts( d );
free( d );
}
return 0;
}

Trim a string in C [duplicate]

This question already has answers here:
How do I trim leading/trailing whitespace in a standard way?
(40 answers)
Closed 5 years ago.
Briefly:
I'm after the equivalent of .NET's String.Trim in C using the win32 and standard C api (compiling with MSVC2008 so I have access to all the C++ stuff if needed, but I am just trying to trim a char*).
Given that there is strchr, strtok, and all manner of other string functions, surely there should be a trim function, or one that can be repurposed...
Thanks
There is no standard library function to do this, but it's not too hard to roll your own. There is an existing question on SO about doing this that was answered with source code.
This made me want to write my own - I didn't like the ones that had been provided. Seems to me there should be 3 functions.
char *ltrim(char *s)
{
while(isspace(*s)) s++;
return s;
}
char *rtrim(char *s)
{
char* back = s + strlen(s);
while(isspace(*--back));
*(back+1) = '\0';
return s;
}
char *trim(char *s)
{
return rtrim(ltrim(s));
}
You can use the standard isspace() function in ctype.h to achieve this. Simply compare the beginning and end characters of your character array until both ends no longer have spaces.
"spaces" include:
' ' (0x20) space (SPC)
'\t' (0x09) horizontal tab (TAB)
'\n' (0x0a) newline (LF)
'\v' (0x0b) vertical tab (VT)
'\f' (0x0c) feed (FF)
'\r' (0x0d) carriage return (CR)
although there is no function which will do all of the work for you, you will have to roll your own solution to compare each side of the given character array repeatedly until no spaces remain.
Edit:
Since you have access to C++, Boost has a trim implementation waiting for you to make your life a lot easier.
Surprised to see such implementations. I usually do trim like this:
char *trim(char *s) {
char *ptr;
if (!s)
return NULL; // handle NULL string
if (!*s)
return s; // handle empty string
for (ptr = s + strlen(s) - 1; (ptr >= s) && isspace(*ptr); --ptr);
ptr[1] = '\0';
return s;
}
It is fast and reliable - serves me many years.
/* Function to remove white spaces on both sides of a string i.e trim */
void trim (char *s)
{
int i;
while (isspace (*s)) s++; // skip left side white spaces
for (i = strlen (s) - 1; (isspace (s[i])); i--) ; // skip right side white spaces
s[i + 1] = '\0';
printf ("%s\n", s);
}
#include "stdafx.h"
#include <string.h>
#include <ctype.h>
char* trim(char* input);
int _tmain(int argc, _TCHAR* argv[])
{
char sz1[]=" MQRFH ";
char sz2[]=" MQRFH";
char sz3[]=" MQR FH";
char sz4[]="MQRFH ";
char sz5[]="MQRFH";
char sz6[]="M";
char sz7[]="M ";
char sz8[]=" M";
char sz9[]="";
char sz10[]=" ";
printf("sz1:[%s] %d\n",trim(sz1), strlen(sz1));
printf("sz2:[%s] %d\n",trim(sz2), strlen(sz2));
printf("sz3:[%s] %d\n",trim(sz3), strlen(sz3));
printf("sz4:[%s] %d\n",trim(sz4), strlen(sz4));
printf("sz5:[%s] %d\n",trim(sz5), strlen(sz5));
printf("sz6:[%s] %d\n",trim(sz6), strlen(sz6));
printf("sz7:[%s] %d\n",trim(sz7), strlen(sz7));
printf("sz8:[%s] %d\n",trim(sz8), strlen(sz8));
printf("sz9:[%s] %d\n",trim(sz9), strlen(sz9));
printf("sz10:[%s] %d\n",trim(sz10), strlen(sz10));
return 0;
}
char *ltrim(char *s)
{
while(isspace(*s)) s++;
return s;
}
char *rtrim(char *s)
{
char* back;
int len = strlen(s);
if(len == 0)
return(s);
back = s + len;
while(isspace(*--back));
*(back+1) = '\0';
return s;
}
char *trim(char *s)
{
return rtrim(ltrim(s));
}
Output:
sz1:[MQRFH] 9
sz2:[MQRFH] 6
sz3:[MQR FH] 8
sz4:[MQRFH] 7
sz5:[MQRFH] 5
sz6:[M] 1
sz7:[M] 2
sz8:[M] 2
sz9:[] 0
sz10:[] 8
I like it when the return value always equals the argument. This way, if the string array has been allocated with malloc(), it can safely be free() again.
/* Remove leading whitespaces */
char *ltrim(char *const s)
{
size_t len;
char *cur;
if(s && *s) {
len = strlen(s);
cur = s;
while(*cur && isspace(*cur))
++cur, --len;
if(s != cur)
memmove(s, cur, len + 1);
}
return s;
}
/* Remove trailing whitespaces */
char *rtrim(char *const s)
{
size_t len;
char *cur;
if(s && *s) {
len = strlen(s);
cur = s + len - 1;
while(cur != s && isspace(*cur))
--cur, --len;
cur[isspace(*cur) ? 0 : 1] = '\0';
}
return s;
}
/* Remove leading and trailing whitespaces */
char *trim(char *const s)
{
rtrim(s); // order matters
ltrim(s);
return s;
}
void ltrim(char str[PATH_MAX])
{
int i = 0, j = 0;
char buf[PATH_MAX];
strcpy(buf, str);
for(;str[i] == ' ';i++);
for(;str[i] != '\0';i++,j++)
buf[j] = str[i];
buf[j] = '\0';
strcpy(str, buf);
}
static inline void ut_trim(char * str) {
char * start = str;
char * end = start + strlen(str);
while (--end >= start) { /* trim right */
if (!isspace(*end))
break;
}
*(++end) = '\0';
while (isspace(*start)) /* trim left */
start++;
if (start != str) /* there is a string */
memmove(str, start, end - start + 1);
}
How about this... It only requires one iteration over the string (doesn't use strlen, which iterates over the string). When the function returns you get a pointer to the start of the trimmed string which is null terminated. The string is trimmed of spaces from the left (until the first character is found). The string is also trimmed of all trailing spaces after the last nonspace character.
char* trim(char* input) {
char* start = input;
while (isSpace(*start)) { //trim left
start++;
}
char* ptr = start;
char* end = start;
while (*ptr++ != '\0') { //trim right
if (!isSpace(*ptr)) { //only move end pointer if char isn't a space
end = ptr;
}
}
*end = '\0'; //terminate the trimmed string with a null
return start;
}
bool isSpace(char c) {
switch (c) {
case ' ':
case '\n':
case '\t':
case '\f':
case '\r':
return true;
break;
default:
return false;
break;
}
}
/* iMode 0:ALL, 1:Left, 2:Right*/
char* Trim(char* szStr,const char ch, int iMode)
{
if (szStr == NULL)
return NULL;
char szTmp[1024*10] = { 0x00 };
strcpy(szTmp, szStr);
int iLen = strlen(szTmp);
char* pStart = szTmp;
char* pEnd = szTmp+iLen;
int i;
for(i = 0;i < iLen;i++){
if (szTmp[i] == ch && pStart == szTmp+i && iMode != 2)
++pStart;
if (szTmp[iLen-i-1] == ch && pEnd == szTmp+iLen-i && iMode != 1)
*(--pEnd) = '\0';
}
strcpy(szStr, pStart);
return szStr;
}
Here's my implementation, behaving like the built-in string functions in libc (that is, it expects a c-string, it modifies it and returns it to the caller).
It trims leading spaces & shifts the remaining chars to the left, as it parses the string from left to right. It then marks a new end of string and starts parsing it backwards, replacing trailing spaces with '\0's until it finds either a non-space char or the start of the string. I believe those are the minimum possible iterations for this particular task.
// ----------------------------------------------------------------------------
// trim leading & trailing spaces from string s (return modified string s)
// alg:
// - skip leading spaces, via cp1
// - shift remaining *cp1's to the left, via cp2
// - mark a new end of string
// - replace trailing spaces with '\0', via cp2
// - return the trimmed s
//
char *s_trim(char *s)
{
char *cp1; // for parsing the whole s
char *cp2; // for shifting & padding
// skip leading spaces, shift remaining chars
for (cp1=s; isspace(*cp1); cp1++ ) // skip leading spaces, via cp1
;
for (cp2=s; *cp1; cp1++, cp2++) // shift left remaining chars, via cp2
*cp2 = *cp1;
*cp2-- = 0; // mark new end of string for s
// replace trailing spaces with '\0'
while ( cp2 > s && isspace(*cp2) )
*cp2-- = 0; // pad with '\0's
return s;
}
Not the best way but it works
char* Trim(char* str)
{
int len = strlen(str);
char* buff = new char[len];
int i = 0;
memset(buff,0,len*sizeof(char));
do{
if(isspace(*str)) continue;
buff[i] = *str; ++i;
} while(*(++str) != '\0');
return buff;
}
void inPlaceStrTrim(char* str) {
int k = 0;
int i = 0;
for (i=0; str[i] != '\0';) {
if (isspace(str[i])) {
// we have got a space...
k = i;
for (int j=i; j<strlen(str)-1; j++) {
str[j] = str[j+1];
}
str[strlen(str)-1] = '\0';
i = k; // start the loop again where we ended..
} else {
i++;
}
}
}
Easiest thing to do is a simple loop. I'm going to assume that you want the trimmed string returned in place.
char *
strTrim(char * s){
int ix, jx;
int len ;
char * buf
len = strlen(s); /* possibly should use strnlen */
buf = (char *) malloc(strlen(s)+1);
for(ix=0, jx=0; ix < len; ix++){
if(!isspace(s[ix]))
buf[jx++] = s[ix];
buf[jx] = '\0';
strncpy(s, buf, jx); /* always looks as far as the null, but who cares? */
free(buf); /* no good leak goes unpunished */
return s; /* modifies s in place *and* returns it for swank */
}
This gets rid of embedded blanks too, if String.Trim doesn't then it needs a bit more logic.

Resources