Check whether a given substring is present in the given string

Check whether a given substring is present in the given string - c

I should write a program in C to check whether a given substring is present in the given string. The code I wrote is below but it doesn't work. Can anyone tell me where the problem is?
#include <stdio.h>
#include <string.h>
int main(void)
{
char str[30]="the test string";
char sbstr[30]="test";
char strcp[30];
int len = strlen(str);
int i=0;
int p=0;
while(i<len)
{
while (str[i] != '\0' && str[i] != ' ')
{
strcp[i] = str[i];
++i;
}
strcp[i] = '\0';
p = strcmp(sbstr, strcp);
if (p==0)
{
printf("exist");
break;
}
++i;
}
}

For the array strcp
char strcp[30];
you need to support a separate index.
Something like
int j = 0;
while (str[i] != '\0' && str[i] != ' ')
{
strcp[j++] = str[i++];
}
strcp[j] = '\0';
Pay attention to that there is standard C function strstr that can be used to perform the task.

I know you've already accepted an answer, but here's a slightly more efficient way to do a substring comparison that does not involve making a copy of the candidate substring to begin with in each iteration.
char str[30]="the test string";
char sbstr[30]="test";
int len = strlen(str);
int sublen = strlen(sbstr);
int found = 0;
int i = 0; // starting index in str to start comparing on
while (!found && sublen <= len) {
found = 1;
// found = !strncmp(str+i, sbstr, sublen);
for (int j = 0; j < sublen; j++) {
if (str[i+j] != sbstr[j]) {
found = 0;
break;
}
}
if (!found) {
i++;
len--;
}
}
if (found) {
printf("Exists starting at index %d\n", i);
}
And if you really want to get hardcore, there are well known algorithms such as the Boyer–Moore string-search algorithm which can search faster by using a table-lookup scheme IIRC.

Related

Counting occurrences of words within an inputted string in c

I'm currently struggling with counting the occurrences of the words within an inputted string. I believe it is just my logic that is off but I've been scratching my head for a while and I've just hit a wall.
The problems I'm currently yet to solve are:
With longer inputs the ends of the string is sometimes cut off.
Incrementing the counter for each word when repeated
I know the code has things that may not be the most ideal way for it to work but I'm fairly new to C so any pointers are really helpful.
To sum it up I'm looking for pointers to help solve the issues I'm facing above
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#define MAX_WORDS 1000
int main(void) {
int i,j,isUnique,uniqueLen;
char word[MAX_WORDS];
char words[200][30];
char uniqueWords[200][30];
int count[200];
char *p = strtok(word, " ");
int index=0;
//read input until EOF is reached
scanf("%[^EOF]", word);
//initialize count array
for (i = 0; i < 200; i++) {
count[i] = 0;
}
//convert lower case letters to upper
for (i = 0; word[i] != '\0'; i++) {
if (word[i] >= 'a' && word[i] <= 'z') {
word[i] = word[i] - 32;
}
}
//Split work string into an array and save each token into the array words
p = strtok(word, " ,.;!\n");
while (p != NULL)
{
strcpy(words[index], p);
p = strtok(NULL, " ,.;!\n");
index++;
}
/*
Check each string in the array word for occurances within the uniqueWords array. If it is unique then
copy the string from word into the unique word array. Otherwise the counter for the repeated word is incremented.
*/
uniqueLen = 0;
for (i = 0; i < index; i++) {
isUnique = 1;
for (j = 0; j < index; j++) {
if (strcmp(uniqueWords[j],words[i])==0) {
isUnique = 0;
break;
}
else {
}
}
if (isUnique) {
strcpy(uniqueWords[uniqueLen], words[i]);
count[uniqueLen] += 1;
uniqueLen++;
}
else {
}
}
for (i = 0; i < uniqueLen; i++) {
printf("%s => %i\n", uniqueWords[i],count[i]);
}
}

This is the code i ended up using, this turned out to be mainly an issue with using the scanf function. Placing it in a while loop made it much easier to edit words as inputted.
Thankyou for all the help :)
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
int main(void) {
// Create all variables
int i, len, isUnique, index;
char word[200];
char uniqueWords[200][30];
int count[200];
// Initialize the count array
for (i = 0; i < 200; i++) {
count[i] = 0;
}
// Set the value for index to 0
index = 0;
// Read all words inputted until the EOF marker is reached
while (scanf("%s", word) != EOF) {
/*
For each word being read if the characters within it are lowercase
then each are then incremented into being uppercase values.
*/
for (i = 0; word[i] != '\0'; i++) {
if (word[i] >= 'a' && word[i] <= 'z') {
word[i] = word[i] - 32;
}
}
/*
We use len to find the length of the word being read. This is then used
to access the final character of the word and remove it if it is not an
alphabetic character.
*/
len = strlen(word);
if (ispunct(word[len - 1]))
word[len - 1] = '\0';
/*
The next part removes the non alphabetic characters from within the words.
This happens by incrementing through each character of the word and by
using the isalpha and removing the characters if they are not alphabetic
characters.
*/
size_t pos = 0;
for (char *p = word; *p; ++p)
if (isalpha(*p))
word[pos++] = *p;
word[pos] = '\0';
/*
We set the isUnique value to 1 as upon comparing the arrays later we
change this value to 0 to show the word is not unique.
*/
isUnique = 1;
/*
For each word through the string we use a for loop when the counter i
is below the index and while the isUnique value is 1.
*/
for (i = 0; i < index && isUnique; i++)
{
/*
Using the strcmp function we are able to check if the word in
question is in the uniqueWords array. If it is found we then
change the isUnique value to 0 to show that the value is not
unique and prevent the loop happening again.
*/
if (strcmp(uniqueWords[i], word) == 0)
isUnique = 0;
}
/* If word is unique then add it to the uniqueWords list
and increment index. Otherwise increment occurrence
count of current word.
*/
if (isUnique)
{
strcpy(uniqueWords[index], word);
count[index]++;
index++;
}
else
{
count[i - 1]++;
}
}
/*
For each item in the uniqueWords list we iterate through the words
and print them out in the correct format with the word and the following count of them.
*/
for (i = 0; i < index; i++)
{
printf("%s => %d\n", uniqueWords[i], count[i]);
}
}

I don't know if you are facing some requirements, but for all it's limitations in terms of standard library functions, C does have one that would make your job much easier, strstr, e.g.:
Live demo
#include <stdio.h>
#include <string.h>
int main() {
const char str[] = "stringstringdstringdstringadasstringipoistring";
const char* substr = "string";
const char* orig = str;
const char* temp = substr;
int length = 0;
while(*temp++){length++;} // length of substr
int count = 0;
char *ret = strstr(orig, substr);
while (ret != NULL){
count++;
//check next occurence
ret = strstr(ret + length, substr);
}
printf("%d", count);
}
The output should be 6.
Regarding user3121023's comment, scanf("%999[^\n]", word); parses all characters until it finds a \n or it reaches the width limit, and I agree fgets ( word, sizeof word, stdin); is better.

remove specific characters from a string by traversing string only once

I was asked this question in an interview but I was not able to answer.
Question was: To remove a specific characters from a given string by traversing string only once.
e.g. Given string is: "aaabbcdabe"
remove all 'b'
output: ""aaacdae"
I made this logic but it was traversing string more than once:
for(int i=0; str[i]!='\0'; i++)
{
if(str[i] == 'b')
{
for(j=i; str[j]!='\0'; j++)
{
str[j] = str[j+1];
}
}
}
With this logic, string is getting traversed more than once, once in outer for loop and many times in shifting operation.
Is there any other way to do this?

Keep a pointer to the read location and a pointer to the write location. Each time the read-pointer is advanced, only write through the write-pointer if the character is not being removed. Advance the write-pointer only when a character is written:
#include <stdio.h>
void remove_chars(char *str, const char c);
int main(void)
{
char test_str[] = "aaabbcdabe";
puts(test_str);
remove_chars(test_str, 'b');
puts(test_str);
return 0;
}
void remove_chars(char *str, const char c)
{
char *write_ptr = str;
while (*str) {
if (*str != c) {
*write_ptr = *str;
++write_ptr;
}
++str;
}
*write_ptr = '\0';
}
Program output:
λ> ./a.out
aaabbcdabe
aaacdae

This should work. It's pretty short and sweet.
int newLen = 0;
int oldLen = strlen(str);
for(int i=0; i<oldLen; i++){
if(str[i] != 'b'){
str[newLen] = str[i];
newLen++;
}
}
str[newLen] = '\0';

Longest Substring Palindrome issue

I feel like I've got it almost down, but for some reason my second test is coming up with a shorter palindrome instead of the longest one. I've marked where I feel the error may be coming from, but at this point I'm kind of at a loss. Any direction would be appreciated!
#include <stdio.h>
#include <string.h>
/*
* Checks whether the characters from position first to position last of the string str form a palindrome.
* If it is palindrome it returns 1. Otherwise it returns 0.
*/
int isPalindrome(int first, int last, char *str)
{
int i;
for(i = first; i <= last; i++){
if(str[i] != str[last-i]){
return 0;
}
}
return 1;
}
/*
* Find and print the largest palindrome found in the string str. Uses isPalindrome as a helper function.
*/
void largestPalindrome(char *str)
{
int i, last, pStart, pEnd;
pStart = 0;
pEnd = 0;
int result;
for(i = 0; i < strlen(str); i++){
for(last = strlen(str); last >= i; last--){
result = isPalindrome(i, last, str);
//Possible error area
if(result == 1 && ((last-i)>(pEnd-pStart))){
pStart = i;
pEnd = last;
}
}
}
printf("Largest palindrome: ");
for(i = pStart; i <= pEnd; i++)
printf("%c", str[i]);
return;
}
/*
* Do not modify this code.
*/
int main(void)
{
int i = 0;
/* you can change these strings to other test cases but please change them back before submitting your code */
//str1 working correctly
char *str1 = "ABCBACDCBAAB";
char *str2 = "ABCBAHELLOHOWRACECARAREYOUIAMAIDOINEVERODDOREVENNGGOOD";
/* test easy example */
printf("Test String 1: %s\n",str1);
largestPalindrome(str1);
/* test hard example */
printf("\nTest String 2: %s\n",str2);
largestPalindrome(str2);
return 0;
}

Your code in isPalindrome doesn't work properly unless first is 0.
Consider isPalindrome(6, 10, "abcdefghhgX"):
i = 6;
last - i = 4;
comparing str[i] (aka str[6] aka 'g') with str[last-i] (aka str[4] aka 'e') is comparing data outside the range that is supposed to be under consideration.
It should be comparing with str[10] (or perhaps str[9] — depending on whether last is the index of the final character or one beyond the final character).
You need to revisit that code. Note, too, that your code will test each pair of characters twice where once is sufficient. I'd probably use two index variables, i and j, set to first and last. The loop would increment i and decrement j, and only continue while i is less than j.
for (int i = first, j = last; i < j; i++, j--)
{
if (str[i] != str[j])
return 0;
}
return 1;

In isPalindrome, replace the line if(str[i] != str[last-i]){ with if(str[i] != str[first+last-i]){.

Here's your problem:
for(i = first; i <= last; i++){
if(str[i] != str[last-i]){
return 0;
}
}
Should be:
for(i = first; i <= last; i++, last--){
if(str[i] != str[last]){
return 0;
}
}
Also, this:
for(last = strlen(str); last >= i; last--){
Should be:
for(last = strlen(str) - 1; last >= i; last--){

Less Code to Achieve this Effect?

I am working on some string manipulation functions, just out of my own interest. However, I may want to use these functions sometime in future code. I wrote the following to check if a substring exists within a string. I ran into a problem though. My program compares each char in both strings, however it had problems with out of order chars. It would take a long time to explain so I will just give an example:
if checking if "oobar" exists in the string "fooobar" my program would have trouble finding the location of the substrings because it would trip up on the first instances of the char 'o'
I developed a work around to this, but that's what it is a work around and not really a solid solution. So I was wondering if anybody could tell me how they would improve the following code (keep in mind I do NOT want to use any additional libraries):
int chksbstr(char *str, char *sbstr)
{
int i, sbstrlen, strlen, p = 0;
for(i = 0; sbstr[i] != '\0'; i++);
sbstrlen = i;
for(i = 0; str[i] != '\0'; i++);
strlen = i;
if(sbstrlen > strlen)
{
printf("\n**Error substring is larger than base string!");
return 2;
}
if(sbstrlen == strlen)
{
if(str == sbstr) return 0;
else return 1;
}
for(i = 0; i <= strlen; i++)
{
if(str[i] == sbstr[p]) p++;
else if(str[i] != str[i - 1]) p = 0;
if(p == sbstrlen) return 0;
}
return 1;
}

23 lines fewer:
if (strstr(baseString, subString) != NULL)
{
/* contains */
}

Recursion can do this better. Oh, and by the way, descriptive identifiers are way better. There's no reason to randomly remove all the vowels, and don't forget const.
int check_substring(const char* str, const char* to_go, const char* substr) {
if (*to_go == '\0') return 1; // Hit all
if (*str == '\0') return 0; // Ran out of string to check
if (*str == *to_go) return check_substring(str + 1, to_go + 1, substr);
else {
if (*str == *substr)
return check_substring(str + 1, substr + 1, substr);
else
return check_substring(str + 1, substr, substr);
}
}
int does_contain_substring(const char* str, const char* sbstr) {
return check_substring(str, sbstr, sbstr);
}

C program to remove repeated char from a string

I came across a interview question that asked to remove the repeated char from a given string, in-place.
So if the input was "hi there" the output expected was "hi ter". It was also told to consider only alphabetic repititions and all the
alphabets were lower case. I came up with the following program. I have comments to make my logic clear. But the program does not work as expectd for some inputs. If the input is "hii" it works, but if its "hi there" it fails. Please help.
#include <stdio.h>
int main()
{
char str[] = "programming is really cool"; // original string.
char hash[26] = {0}; // hash table.
int i,j; // loop counter.
// iterate through the input string char by char.
for(i=0,j=0;str[i];)
{
// if the char is not hashed.
if(!hash[str[i] - 'a'])
{
// hash it.
hash[str[i] - 'a'] = 1;
// copy the char at index i to index j.
str[j++] = str[i++];
}
else
{
// move to next char of the original string.
// do not increment j, so that later we can over-write the repeated char.
i++;
}
}
// add a null char.
str[j] = 0;
// print it.
printf("%s\n",str); // "progamin s ely c" expected.
return 0;
}

when str[i] is a non-alphabet, say a space and when you do:
hash[str[i] - 'a']
your program can blow.
ASCII value of space is 32 and that of a is 97 so you are effectively accessing array hash with a negative index.
To solve this you can ignore non-alphabets by doing :
if(! isalpha(str[i]) {
str[j++] = str[i++]; // copy the char.
continue; // ignore rest of the loop.
}

This is going to break on any space characters (or anything else outside the range 'a'..'z') because you are accessing beyond the bounds of your hash array.

void striprepeatedchars(char *str)
{
int seen[UCHAR_MAX + 1];
char *c, *n;
memset(seen, 0, sizeof(seen));
c = n = str;
while (*n != '\0') {
if (!isalpha(*n) || !seen[(unsigned char) *n]) {
*c = *n;
seen[(unsigned char) *n]++;
c++;
}
n++;
}
*c = '\0';
}

This is code golf, right?
d(s){char*i=s,*o=s;for(;*i;++i)!memchr(s,*i,o-s)?*o++=*i:0;*o=0;}

...
// iterate through the input string char by char.
for(i=0,j=0;str[i];)
{
if (str[i] == ' ')
{
str[j++] = str[i++];
continue;
}
// if the char is not hashed.
if(!hash[str[i] - 'a'])
{
...

#include <stdio.h>
#include <string.h>
int hash[26] = {0};
static int in_valid_range (char c);
static int get_hash_code (char c);
static char *
remove_repeated_char (char *s)
{
size_t len = strlen (s);
size_t i, j = 0;
for (i = 0; i < len; ++i)
{
if (in_valid_range (s[i]))
{
int h = get_hash_code (s[i]);
if (!hash[h])
{
s[j++] = s[i];
hash[h] = 1;
}
}
else
{
s[j++] = s[i];
}
}
s[j] = 0;
return s;
}
int
main (int argc, char **argv)
{
printf ("%s\n", remove_repeated_char (argv[1]));
return 0;
}
static int
in_valid_range (char c)
{
return (c >= 'a' && c <= 'z');
}
static int
get_hash_code (char c)
{
return (int) (c - 'a');
}

char *s;
int i = 0;
for (i = 0; s[i]; i++)
{
int j;
int gap = 0;
for (j = i + 1; s[j]; j++)
{
if (gap > 0)
s[j] = s[j + gap];
if (!s[j])
break;
while (s[i] == s[j])
{
s[j] = s[j + gap + 1];
gap++;
}
}
}

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Check whether a given substring is present in the given string - c

For the array strcp char strcp[30]; you need to support a separate index. Something like int j = 0; while (str[i] != '\0' && str[i] != ' ') { strcp[j++] = str[i++]; } strcp[j] = '\0'; Pay attention to that there is standard C function strstr that can be used to perform the task.

Related

Counting occurrences of words within an inputted string in c

remove specific characters from a string by traversing string only once

Longest Substring Palindrome issue

Less Code to Achieve this Effect?

C program to remove repeated char from a string

Categories

Resources