C - Determining alphabetical order of characters/strings - c

I'm trying to write a function that compares two strings (s1 and s2) and works out whether s1 comes before, after or is equal to the s2 string, alphabetically (in the same way as a dictionary is read). If s1 comes before s2 it should return -1. If it's equal to s2 it should return 0. If it comes after s2 it should return 1.
I'm having difficulty getting the function to work - I can only seem to get returns for the first chars in each string and only using the same case. Grateful for any help you can give.
Here's the code so far:
#include <stdio.h>
#include <stdlib.h>
int cmpstr(const char *, const char *);
int main()
{
printf("Test 1: %d\n", cmpstr( "Hello", "World"));
printf("Test 2: %d\n", cmpstr( "Hello", "Hello"));
printf("Test 3: %d\n", cmpstr( "World", "Hello"));
return 0;
}
int cmpstr(const char *s1, const char *s2)
{
/*compare corresponding string characters until null is reached*/
while(*s1 != '\0' && *s2 != '\0' )
{
if (*s1 < *s2)
{
return -1;
}
else if (*s1 > *s2)
{
return 1;
}
else
{
return 0;
s1++;
s2++;
}
}
return 0;
}

just remove the last else part and put return 0 out of loop because both string are only equal if if part and else-if part will not be true, when it will come out from loop it will return 0.
int cmpstr(const char *s1, const char *s2)
{
/*compare corresponding string characters until null is reached*/
while(*s1 != '\0' && *s2 != '\0' )
{
if (*s1 < *s2)
{
return -1;
}
else if (*s1 > *s2)
{
return 1;
}
s1++;
s2++;
}
return 0;
}

Your code has a very obvious mistake, which is the return 0-statement making the s1++;s2++ to unreachable code (your compiler should have warned you about that).
But it has also a conceptual mistake, as it ignores situations where s1 is longer than s2 or vice versa. So in your approach (once corrected the return 0-thing, "Hello" and "Hello there" would compare equal.
See the following code with works in a different manner. It skips equal characters until one (or both) strings has (have) ended. Then, according to this state, result is determined:
int cmpstr(const char *s1, const char *s2)
{
while (*s1 && *s2 && *s1 == *s2) { // move forward until either one of the strings ends or the first difference is detected.
s1++;
s2++;
}
int result = (*s1 - *s2);
// if both strings are equal, s1 and s2 have reached their ends and result is 0
// if *s1 > *s2, s1 is lexographically greater than s2 and result is positive
// if *s1 < *s2, s1 is lexographically lower than s2 and result is negative
// normalize "positive" and "negative" to 1 and -1, respectively
if (result < 0)
result = -1;
else if (result > 0)
result = 1;
return result;
}

Removing 'return 0' in else statement will work. If the chars are equal in same level, you need to look next ones until the equality breaks.
Edit: Also, you need to think about when lengths of strings are not equal.
int cmpstrMY(const char *s1, const char *s2)
{
char sc1, sc2;
/*compare corresponding string characters until null is reached*/
while (1)
{
sc1 = towlower(*s1);
sc2 = towlower(*s2);
if (sc1 == '\0' && sc2 == '\0') {
break;
}
else if (sc1 == '\0' && sc2 != '\0') {
return -1;
}
else if (sc1 != '\0' && sc2 == '\0') {
return 1;
}
else if (sc1 < sc2)
{
return -1;
}
else if (sc1 > sc2)
{
return 1;
}
else
{
s1++;
s2++;
}
}
return 0;
}
Your cmpstr must be something like the code above.

Related

How to compare char type pointers using _Bool function?

I should check if 2 arrays are equal using exact function _Bool areEqual(char *str1,char *str2). My code terminates. What should I do in order to effectively use _Bool function to print true (if they are equal) and false( not equal).
My code:
#include <stdio.h>
#include <math.h>
#include <stdbool.h>
_Bool areEqual(char* str1, char* str2) {
while (str1 != '\0') {
if (str1 == str2) {
str1++;
if (str1 = '\0') {
return true;
}
}
else {
return false;
}
}
}
int main() {
char arr[] = "abcdef";
char* ptr = "abcdef";
areEqual(arr, ptr);
return 0;
}
Firstly, to simplify bool is an alias to _Bool so they are similar.
Secondly, your function AreEqual is actually comparing the content of s1 with the first charachter of s2, if you want to compare all the content of the strings you need to iterate on both
_Bool areEqual(char *s1, char *s2)
{
int i = 0;
while (s1[i] != '\0' && s2[i] != '\0') {
if (s1[i] != s2[i])
return false;
i++;
}
if (s1[i] != '\0' || s2[i] != '\0') //string with different length
return false;
return true
}
Finaly to print the corresponding result you just need to check the return of your function
if (areEqual(str1, str2) == true)
printf("True");
else
printf("False");

How to make strcmp function?

I want to make my own strcmp function, like the one in C.
int my_cmp(const char* str1, const char* str2)
{
int index;
for (index = 0; str1[index] != '\0' && str2[index] != '\0'; index++)
if (str1[index] != str2[index])
return (str1[index] - str2[index]);
return 0;
}
Am I right?
I know that not all the strings have the same length.
I'm not sure about condition of for statement.
Here is one of the Official implemention.
int strcmp(const char *s1, const char *s2)
{
for ( ; *s1 == *s2; s1++, s2++)
if (*s1 == '\0')
return 0;
return ((*(unsigned char *)s1 < *(unsigned char *)s2) ? -1 : +1);
}
Update:
Problems of your code:
your code works fine for string of the same length, the other cases it will false.
For Extended ASCII(range between 128~255), you use sign char, so their value would overflow to an negative value, then you may get a wrong value.
fix version:
int my_cmp(const char* str1, const char* str2)
{
int index;
for (index = 0; str1[index] != '\0' && str2[index] != '\0'; index++)
if (str1[index] != str2[index])
return ((*(unsigned char *)str1 < *(unsigned char *)str2) ? -1 : +1);
// here is the fix code.
if (str1[index] != '\0') {
return 1;
} else if (str2[index] != '\0') {
return -1;
}
return 0;
}
the following code snippet shows you how you could implement an "strcmp" function:
int myStrCmp (const char *s1, const char *s2) {
const unsigned char *p1 = (const unsigned char *)s1;
const unsigned char *p2 = (const unsigned char *)s2;
while (*p1 != '\0') {
if (*p2 == '\0') return 1;
if (*p2 > *p1) return -1;
if (*p1 > *p2) return 1;
p1++;
p2++;
}
if (*p2 != '\0') return -1;
return 0;
}
Am I right? I know that not all the strings have the same length. I'm not sure about condition of for statement.
You are almost right. Your if statement
if (str1[index] != str2[index])
return (str1[index] - str2[index]);
is basically correct (though the characters should be subtracted as unsigned chars), but the for loop itself
for (index = 0; str1[index] != '\0' && str2[index] != '\0'; index++)
is wrong. Specifically the condition:
str1[index] != '\0' && str2[index] != '\0'
This is wrong because it checks to make sure that both characters at the given index are not '\0', rather than either character. This can be fixed by replacing && with ||.
Here's how a seasoned C programmer might write the strcmp function (I wrote this :p (EDIT: #chux suggested an improvement)):
int strcmp(const char *s1, const char *s2) {
for (; *s1 && (*s1 == *s2); s1++, s2++) {}
return (unsigned char)(*s1) - (unsigned char)(*s2);
}

Function To Match The Last Character Of A String

So this is a problem : Write the function strend(s,t), which returns 1 if the char t occurs at the end of the string s, and zero otherwise.
This is my code:
int strend(char*, char);
int main()
{
int n = -1;
char str1[6] = "Hello", char1;
printf("Enter a character: ");
char1 = getchar();
n = strend(str1, char1);
printf("n = %d", n);
return 0;
}
int strend(char* str1, char str2)
{
while(*str1 != '\0')
{
str1++;
}
if(*str1 == str2)
{
return 1;
}
else
{
return 0;
}
}
However the character matching does not perform as intended. Where the mistake?
Thanks.
You're comparing the character to the \0 string terminator.
int strend(char* str1, char str2)
{
if (*str1 == '\0') {
return 0;
}
while(*str1 != '\0') /* removed ; that shouldn't be there */
{
str1++;
}
/* at this point, str1 is pointing to the 0-terminator */
str1--; /* pointer now points to last character of the string, not 0-terminator */
if(*str1 == str2)
{
return 1;
}
else
{
return 0;
}
}
I'll try my own explanation.
Suppose your while() loop has reached the last non-zero character of your string str1.
In this case, the line while( *str1 != '\0' ) is "asking" if this character is zero or not.
Since it is the character that you are looking for, logically it cannot be '\0'.
Then the comparison expressión is "true", and the increment str1++; is performed.
Now *str1 is the character '\0', and the immediate next iteration gives "false" when evaluating *str1 != '\0'.
Then the while() block is finished, and the program continues in the line if(*str1 == str2).
Here, the value *str1, which is '\0', is compared against str2, giving always the result "false".
However, the desired character is still in the immediate previous memory position of str1.
So, you can decrement str1 and then comparing, or well you can compare str2 against (str1 - 1).
// Option 1
str1--;
if(*str1 == str2)
//Option 2
if ((str1 - 1) == str2)
Without any error checking, (you can do that) here is a one liner that will check that the last character is matched:
int strend(char* str1, char str2)
{
return ((str1[strlen(str1)-1]) == str2)?(1):(0);
}
Or in a more readable form:
int strend(char* str1, char str2)
{
return ((str1[strlen(str1)-1]) == str2);
}

Creating my own strcmp () function in C

I was assigned by my teacher to write my own strcmp() function in C. I did create my own version of said function, and I was hoping to get some feedback.
int CompareTwoStrings ( char *StringOne, char *StringTwo ) {
// Evaluates if both strings have the same length.
if ( strlen ( StringOne ) != strlen ( StringTwo ) ) {
// Given that the strings have an unequal length, it compares between both
// lengths.
if ( strlen ( StringOne ) < strlen ( StringTwo ) ) {
return ( StringOneIsLesser );
}
if ( strlen ( StringOne ) > strlen ( StringTwo ) ) {
return ( StringOneIsGreater );
}
}
int i;
// Since both strings are equal in length...
for ( i = 0; i < strlen ( StringOne ); i++ ) {
// It goes comparing letter per letter.
if ( StringOne [ i ] != StringTwo [ i ] ) {
if ( StringOne [ i ] < StringTwo [ i ] ) {
return ( StringOneIsLesser );
}
if ( StringOne [ i ] > StringTwo [ i ] ) {
return ( StringOneIsGreater );
}
}
}
// If it ever reaches this part, it means they are equal.
return ( StringsAreEqual );
}
StringOneIsLesser, StringOneIsGreater, StringsAreEqual are defined as const int with the respective values: -1, +1, 0.
Thing is, I'm not exactly sure if, for example, my StringOne has a lesser length than my StringTwo, that automatically means StringTwo is greater, because I don't know how strcmp() is particularly implemented. I need some of your feedback for that.
So much for such a simple task. I believe something simple as this would do:
int my_strcmp(const char *a, const char *b)
{
while (*a && *a == *b) { ++a; ++b; }
return (int)(unsigned char)(*a) - (int)(unsigned char)(*b);
}
strcmp compares alphabetically: so "aaa" < "b" even though "b" is shorter.
Because of this, you can skip the length check and just do the letter by letter comparison. If you get to a NULL character while both strings are equal so far, then the shorter one is the lesser one.
Also: make StringsAreEqual == 0, not 1 for compatibility with standard sorting functions.
int mystrncmp(const char * str1, const char * str2, unsigned int n)
{
while (*str1 == *str2) {
if (*str1 == '\0' || *str2 == '\0')
break;
str1++;
str2++;
}
if (*str1 == '\0' && *str2 == '\0')
return 0;
else
return -1;
}
strcmp() is fairly easy to code. The usual mis-codings issues include:
Parameter type
strcmp(s1,s2) uses const char * types, not char *. This allows the function to be called with pointers to const data. It conveys to the user the function's non-altering of data. It can help with optimization.
Sign-less compare
All str...() function perform as if char was unsigned char, even if char is signed. This readily affects the result when strings differ and a character outside the range [1...CHAR_MAX] is found.
Range
On select implementations, the range of unsigned char minus unsigned char is outside the int range. Using 2 compares (a>b) - (a-b) avoids any problem rather than a-b;. Further: many compilers recognized that idiom and emit good code.
int my_strcmp(const char *s1, const char *s2) {
// All compares done as if `char` was `unsigned char`
const unsigned char *us1 = (const unsigned char *) s1;
const unsigned char *us2 = (const unsigned char *) s2;
// As long as the data is the same and '\0' not found, iterate
while (*us1 == *us2 && *us1 != '\0') {
us1++;
us2++;
}
// Use compares to avoid any mathematical overflow
// (possible when `unsigned char` and `unsigned` have the same range).
return (*us1 > *us2) - (*us1 < *us2);
}
Dinosaur computers
Machines that use a signed char and non-2's complement, the following can be wrong or a trap with *s1 != '\0'. Such machines could have a negative 0 - which does not indicate the end of a string, yet quits the loop. Using unsigned char * pointers solves that.
int my_strcmp(const char *s1, const char *s2) {
while (*s1 == *s2 && *s1 != '\0') { // Error!
s1++;
s2++;
}
Try this also for your better understanding:
#include <stdio.h>
#include <string.h>
int main(void)
{
char string1[20], string2[20];
int i=0,len=0, count=0;
puts("enter the stirng one to compare");
fgets(string1, sizeof(string1), stdin);
len = strlen(string1);
if(string1[len-1]=='\n')
string1[len-1]='\0';
puts("enter the stirng two to compare");
fgets(string2, sizeof(string2), stdin);
len = strlen(string2);
if(string2[len-1]=='\n')
string2[len-1]='\0';
if(strlen(string1)==strlen(string2))
{
for(i=0;string1[i]!='\0', string2[i]!='\0', i<strlen(string1);i++)
{
count=string1[i]-string2[i];
count+=count;
}
if(count==0)
printf("strings are equal");
else if(count<0)
printf("string1 is less than string2");
else if(count>0)
printf("string2 is less than string1");
}
if(strlen(string1)<strlen(string2))
{
for(i=0;string1[i]!='\0', i<strlen(string1);i++)
{
count=string1[i]-string2[i];
count+=count;
}
if(count==0)
printf("strings are equal");
else if(count<0)
printf("string1 is less than string2");
else if(count>0)
printf("string2 is less than string1");
}
if(strlen(string1)>strlen(string2))
{
for(i=0;string2[i]!='\0', i<strlen(string2);i++)
{
count=string1[i]-string2[i];
count+=count;
}
if(count==0)
printf("strings are equal");
else if(count<0)
printf("string1 is less than string2");
else if(count>0)
printf("string2 is less than string1");
}
return 0;
}
bool str_cmp(char* str1,char* str2)
{
if (str1 == nullptr || str2 == nullptr)
return false;
const int size1 = str_len_v(str1);
const int size2 = str_len_v(str2);
if (size1 != size2)
return false;
for(int i=0;str1[i] !='\0' && str2[i] !='\0';i++)
{
if (str1[i] != str2[i])
return false;
}
return true;
}

Checking if \n exists in String - C

I'm trying to compare two strings, and even though they look the same, I wasn't getting a match. Turns out one string contains \n.
So my question is, is a way to check if a string contains '\n'?
I'm using the strcmp function;
char *tempData;
char *checkThis;
tempData = "Hello \n";
checkThis = "Hello";
if(strcmp(tempData, checkThis) == 0)
{
printf("Match");
}
You could strip the white-space before comparing, then you do not require a check for '\n'. But instead you can just compare the strings, assuming that is what you want to do.
This question has some answers on how to do that in C.
Create your own comparing function, that ignores the \n or any other char you pass in:
int strcmp_ignoring_char(const char* s1, const char* s2, const char ignore)
{
while ( *s1 != '\0' && *s1 != '\0' )
{
if ( *s1 == ignore )
{
s1++;
continue;
}
if ( *s2 == ignore )
{
s2++;
continue;
}
if ( *s1 != *s2 )
return *s1 > *s2 ? 1 : -1;
s1++;
s2++;
}
/* modified to account for trailing ignore chars, as per Lundin comment */
if ( *s1 == '\0' && *s2 == '\0' )
return 0;
const char* nonEmpty = *s1 == '\0' ? s2 : s1;
while ( *nonEmpty != '\0' )
if ( *nonEmpty++ != ignore )
return 1;
return 0;
}
This way you won't scan the strings twice.
You could also create a variation that ignores a string, not a single char:
int strcmp_ignoring_char(const char* s1, const char* s2, const char* ignore)
Here is my attempt. I have tried to keep it MISRA-C compliant, save for the C99 features.
#include <stdint.h>
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
int8_t strcmp_ignore_space (const uint8_t* s1, const uint8_t* s2)
{
while ((*s1 != '\0') && (*s2 != '\0'))
{
bool space1 = isspace(*s1);
bool space2 = isspace(*s2);
if(space1)
{
s1++;
}
if(space2)
{
s2++;
}
if (!space1 && !space2)
{
if (*s1 != *s2)
{
break;
}
else
{
s1++;
s2++;
}
}
} // while ((*s1 != '\0') && (*s2 != '\0'))
if(*s1 != '\0') // remove trailing white spaces
{
while(isspace(*s1))
{
s1++;
}
}
if(*s2 != '\0') // remove trailing white spaces
{
while(isspace(*s2))
{
s2++;
}
}
return (int8_t)( (int16_t)*s1 - (int16_t)*s2 );
}
int main()
{
// obscure strings with various white space characters, but otherwise equal
if(strcmp_ignore_space(" He\vllo \n",
"\r He\fll o ") == 0)
{
printf("Same\n");
}
else
{
printf("Different\n");
}
return 0;
}

Resources