Related
Suppose I have the following string:
in the interior of the inside is an inner inn
and I want to search, say, for the occurences of "in" (how often "in" appears).
In my program, I've used strstr to do so, but it returns false positives. It will return:
- in the interior of the inside is an inner inn
- interior of the inside is an inner inn
- inside is an inner inn
- inner inn
- inn
Thus thinking "in" appears 5 times, which is obviously not true.
How should I proceed in order to search exclusively for the word "in"?
Try the following
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main(void)
{
char *s = "in the interior of the inside is an inner inn";
char *t = "in";
size_t n = strlen( t );
size_t count = 0;
char *p = s;
while ( ( p = strstr( p, t ) ) != NULL )
{
char *q = p + n;
if ( p == s || isblank( ( unsigned char ) *( p - 1 ) ) )
{
if ( *q == '\0' || isblank( ( unsigned char ) *q ) ) ++count;
}
p = q;
}
printf( "There are %zu string \"%s\"\n", count, t );
return 0;
}
The output is
There are 1 string "in"
You can also add a check for ispunct if the source string can contain puctuations.
Search for " in "; note the spaces. Then consider the edge cases of a sentence starting with "in " and ending with " in".
One more way to do it is:
Use strtok() on your whole sentence with space as delimiter.
So now you can check your token against "in"
Add a isdelimiter() to check the before and after result of strstr().
// Adjust as needed.
int isdelimiter(char ch) {
return (ch == ' ') || (ch == '\0');
}
int MatchAlex(const char *haystack, const char *needle) {
int match = 0;
const char *h = haystack;
const char *m;
size_t len = strlen(needle);
while ((m = strstr(h, needle)) != NULL) {
if ((m == haystack || isdelimiter(m[-1])) && isdelimiter(m[len])) {
// printf("'%s'",m);
match++;
h += len;
} else {
h++;
}
}
return match;
}
int main(void) {
printf("%d\n",
MatchAlex("in the interior of the inside is an inner inn xxin", "in"));
return 0;
}
I would like to check for certain characters in an array at certain positions.
The array starts with $$$$ then has eight characters then another $, eight more characters and finishes with $$$$. For example char my_array[50] = "$$$$01FF4C68$02543EFE$$$$";
I want to check that all the positions where there are supposed to be $ do have them.
I could split the array into the three parts that contain the characters and then test for them separately but is there a better way of doing this?
Why complicate things?
if (my_array[0] != '$'
|| my_array[1] != '$'
|| my_array[2] != '$'
|| my_array[3] != '$'
|| my_array[12] != '$'
|| my_array[21] != '$'
|| my_array[22] != '$'
|| my_array[23] != '$'
|| my_array[24] != '$')
{
printf("Wrong!\n");
}
Use strstr()
To check if the array begins with eight $ : strstr(my_array, "$$$$$$$$")
To check if the array ends with eight $ : strstr(my_array + 16, "$$$$$$$$")
The +16 is here to shift the pointer so the beginning of my_array + 16 will be the place were the $ are supposed to be.
You might want to use the strstr functinn to find the $$$....
yes there is, you might want to use Regular Expressions, Please read http://www.peope.net/old/regex.html
If you use POSIX-compatible platform and some more complex patterns are about to emerge in your code, you can take a look at regular expressions, e.g. PCRE
You could also avoid using strstr since the format is simple and fixed; until the example format holds::
bool ok = strlen(my_array) >= 25 /* just be sure there are at least all expected chars */ &&
strncmp(my_array, "$$$$", 4) == 0 &&
strncmp(my_array + 12, "$", 1) == 0 /* my_array[12] == '$' */&&
strncmp(my_array + 21, "$$$$", 4) == 0;
A long option without using the string.h library is, make 3 tests:
#include <stdio.h>
int firstTest( char a[] );
int secondTest( char a[] );
int thirdTest( char a[] );
int main (void)
{
int result;
char my_array[50] = "$$$$01FF4C68$02543EFE$$$$";
if( ( firstTest( my_array ) == 1 ) && ( secondTest( my_array ) == 1 ) && ( thirdTest( my_array ) == 1 ) ){
printf( "The string is valid.\n" );
result = 1;
}
else{
printf( "The string is invalid.\n" );
result = 0;
}
return 0;
}
int firstTest( char a[] )
{
int i;
for( i = 0; i < 4; i++ ){
if ( a[i] != '$' ){
return 0;
break;
}
return 1;
}
}
int secondTest( char a[] )
{
if( my_array[12] != '$' )
return 0;
else
return 1;
}
int thirdTest( char a[] )
{
int i;
for( i = 21; i < 25; i++ ){
if ( a[i] != '$' ){
return 0;
break;
}
return 1;
}
}
sscanf should do the work
char my_array[50] = "$$$$01FF4C68$02543EFE$$$$";
int n,m;
if( !sscanf(my_array,"$$$$%*8[0-9A-H]%n$%*8[0-9A-H]$$$$%n",&n,&m) && n==12 && m==25 )
puts("ok");
else
puts("not ok");
I was trying to search for a particular word in a matrix of characters through C but was unable to come to a fixed solution.
For ex:
Suppose I have to search for the word INTELLIGENT in a matrix of characters (3*9)
(Once you have picked a character from the matrix to form a sentence, you cannot pick it again to form the same sentence.There is a path from any cell to all its neighboring cells. A neighbor may share an edge or a corner.)
IIIINN.LI
....TTEGL
.....NELI
Output: YES (the word INTELLIGENT can be found)
Can anybody please give a solution to the above problem !!!!
Use a depth first search.
You can do this using a recursive algorthm. Find all the (unused) places containing the first letter then see if it is possible to find the rest of the word on the remaining board by starting from one of the adjacent squares.
#include <stdio.h>
char Matrix[3][9] = {
{ 'I','I','I','I','N','N','.','L','I'},
{ '.','.','.','.','T','T','E','G','L'},
{ '.','.','.','.',',','N','E','L','I'}
};
char Choice[3][9] = { { 0 }, { 0 }, { 0 } };
const char WORD[] = "INTELLIGENT";
const int Len = sizeof(WORD)-1;
int Path[sizeof(WORD)-1] = { 0 };
char get(int row, int col){
if(1 > col || col > 9) return '\0';
if(1 > row || row > 3) return '\0';
if(Choice[row-1][col-1] || Matrix[row-1][col-1] == '.')
return '\0';
else
return Matrix[row-1][col-1];
}
#define toLoc(r, c) (r)*10+(c)
#define getRow(L) L/10
#define getCol(L) L%10
int search(int loc, int level){
int r,c,x,y;
char ch;
if(level == Len) return 1;//find it
r = getRow(loc);
c = getCol(loc);
ch = get(r,c);
if(ch == 0 || ch != WORD[level]) return 0;
Path[level]=toLoc(r,c);
Choice[r-1][c-1] = 'v';//marking
for(x=-1;x<=1;++x){
for(y=-1;y<=1;++y){
if(search(toLoc(r+y,c+x), level + 1)) return 1;
}
}
Choice[r-1][c-1] = '\0';//reset
return 0;
}
int main(void){
int r,c,i;
for(r=1;r<=3;++r){
for(c=1;c<=9;++c){
if(search(toLoc(r,c), 0)){
printf("YES\nPath:");
for(i=0;i<Len;++i){
printf("(%d,%d)", getRow(Path[i]), getCol(Path[i]));
}
printf("\n");
return 0;
}
}
}
printf("NO\n");
return 0;
}
I think this is what you mean..... Though it seems simpler to what you currently have been offered, so I may have misunderstood the question.
I use Numpy to reshape an arbitrary array into a single
list of letters, then we create a mask of the search term and
a copy of the input list.
I tick off each letter to search for while updating the mask.
import numpy as np
import copy
def findInArray(I,Word):
M=[list(x) for x in I]
M=list(np.ravel(M))
print "Letters to start: %s"%"".join(M)
Mask=[False]*len(Word)
T = copy.copy(M)
for n,v in enumerate(Word):
try:
p=T.index(v)
except ValueError:
pass
else:
T[p]=''
Mask[n]=True
print "Letters left over: %s"%"".join(T)
if all(Mask):print "Found %s"%Word
else:print "%s not Found"%Word
print "\n"
return all(Mask)
I=["IIIINN.LI","....TTEGL",".....NELI"]
findInArray(I,"INTEL")
findInArray(I,"INTELLIGENT")
findInArray(I,"INTELLIGENCE")
Example output
Letters to start: IIIINN.LI....TTEGL.....NELI
Letters left over: IIIN.I....TGL.....NELI
Found INTEL
Letters to start: IIIINN.LI....TTEGL.....NELI
Letters left over: II.I.........NLI
Found INTELLIGENT
Letters to start: IIIINN.LI....TTEGL.....NELI
Letters left over: II.I....T.....NLI
INTELLIGENCE not Found
#include <stdio.h>
#define ROW 1
#define COL 11
char Matrix[ROW][COL] = { { 'I','N','T','E','L','L','I','G','E', 'N', 'T'} };
char Choice[ROW][COL] = { { 0 } };
const char WORD[] = "INTELLIGENT";
const int Len = sizeof(WORD)-1;
int Path[sizeof(WORD)-1] = { 0 };
char get(int row, int col){
if(1 > col || col > COL) return '\0';
if(1 > row || row > ROW) return '\0';
if(Choice[row-1][col-1] || Matrix[row-1][col-1] == '.')
return '\0';
else
return Matrix[row-1][col-1];
}
#define toLoc(r, c) (r)*16+(c)
#define getRow(L) L/16
#define getCol(L) L%16
int search(int loc, int level){
int r,c,x,y;
char ch;
if(level == Len) return 1;//find it
r = getRow(loc);
c = getCol(loc);
ch = get(r,c);
if(ch == 0 || ch != WORD[level]) return 0;
Path[level]=toLoc(r,c);
Choice[r-1][c-1] = 'v';//marking
for(x=-1;x<=1;++x){
for(y=-1;y<=1;++y){
if(search(toLoc(r+y,c+x), level + 1)) return 1;
}
}
Choice[r-1][c-1] = '\0';//reset
return 0;
}
int main(void){
int r,c,i;
for(r=1;r<=ROW;++r){
for(c=1;c<=COL;++c){
if(search(toLoc(r,c), 0)){
printf("YES\nPath:");
for(i=0;i<Len;++i){
printf("(%d,%d)", getRow(Path[i]), getCol(Path[i]));
}
printf("\n");
return 0;
}
}
}
printf("NO\n");
return 0;
}
Hey there!
I'm stuck on an ANSI C problem which I think should be pretty trivial (it is at least in any modern language :/).
The (temporary) goal of my script is to split a string (array of char) of 6 characters ("123:45") which represents a timestamp minutes:seconds (for audio files so it's ok to have 120 minutes) into just the minutes and just the seconds.
I tried several approaches - a general one with looking for the ":" and a hardcoded one just splitting the string by indices but none seem to work.
void _splitstr ( char *instr, int index, char *outstr ) {
char temp[3];
int i;
int strl = strlen ( instr );
if ( index == 0 ) {
for ( i = 0; i < 3; ++i ) {
if ( temp[i] != '\0' ) {
temp[i] = instr[i];
}
}
} else if ( index == 1 ) {
for ( i = 6; i > 3; i-- ) {
temp[i] = instr[i];
}
}
strcpy ( outstr, temp );
}
Another "funny" thing is that the string length of an char[3] is 6 or 9 and never actually 3. What's wrong with that?
How about using sscanf(). As simple as it can get.
char time[] = "123:45";
int minutes, seconds;
sscanf(time, "%d:%d", &minutes, &seconds);
This works best if you can be sure that time string syntax is always valid. Otherwise you must add check for that. On success, sscanf function returns the number of items succesfully read so it's pretty easy to detect errors too.
Working example: http://ideone.com/vVoBI
How about...
int seconds, minutes;
minutes = atoi(instr);
while(*instr != ':' && *++instr != '\0');
seconds = atoi(instr);
Should be pretty fast.
You have basically three options
change the input string (can't be a string literal)
copy data to output strings (input can be a literal)
transform sequences of characters to numbers
Changing the input string implies transforming "123:45" to "123\0" "45" with an embedded null.
Copying data implies managing storage for the copy.
Transforming sequences of characters implies using, for example, strtol.
You aren't putting a terminating null on your string in temp[], so when you do a strlen(temp), you are accessing arbitrary memory.
Using your known lengths, you can use something like this:
char temp[4];
if (index==0)
{
strncpy(temp, instr, 3);
temp[3] = 0;
}
else if (index==1)
{
strncpy(temp, instr+4, 2);
temp[2] = 0;
}
strcpy(outstr, temp);
But, I'll caution that I've skipped all sorts of checking for valid lengths in instr and outstr.
you can try something like that:
void break_string(const char* input, char* hours, char* minutes)
{
if(input == 0 || hours == 0 || minutes == 0)
return;
while(*input != ':')
*hours++ = *input++;
*hours = '\0';
++input;
while(*minutes++ = *input++);
return;
}
Here is the same function a bit simplified:
void break_string(const char* input, char* hours, char* minutes)
{
if(input == 0 || hours == 0 || minutes == 0)
return;
while(*input != ':')
{
*hours = *input;
++hours;
++input;
}
*hours = '\0';
++input; //ignore the ':' part
while(*input)
{
*minutes = *input;
++minutes;
++input;
}
*minutes = '\0';
return;
}
int main()
{
char* test = "123:45";
char* minutes = malloc( sizeof(char) * 12 );
char* hours = malloc( sizeof(char) * 12 );
break_string( test , hours , minutes );
printf("%s , %s \n" , hours , minutes );
//...
free( minutes );
free( hours ) ;
}
This?
char *mins, *secs;
mins = str;
while(*(++str) != ':');
str[0] = '\0';
secs = s + 1;
Here's one way, I have ignore the "index" argument above:
#include <stdio.h>
#include <string.h>
void _splitstr ( char *instr, char *outstr ) {
char temp[10];
char* end = strchr(instr, ':');
int i = 0;
if(end != 0) {
while(instr != end)
temp[i++] = *instr++;
temp[i] = '\0';
strcpy(outstr, temp);
} else {
outstr = '\0';
}
}
I was going through some Amazon interview questions on CareerCup.com, and I came across this interesting question which I haven't been able to figure out how to do. I have been thinking on this since 2 days. Either I am taking a way off approach, or its a genuinely hard function to write.
Question is as follows:
Write a function in C that can find if a string is a sub-string of another. Note that a mismatch of one character
should be ignored.
A mismatch can be an extra character: ’dog’ matches ‘xxxdoogyyyy’
A mismatch can be a missing character: ’dog’ matches ‘xxxdgyyyy’
A mismatch can be a different character: ’dog’ matches ‘xxxdigyyyy’
The return value wasn't mentioned in the question, so I assume the signature of the function can be something like this:
char * MatchWithTolerance(const char * str, const char * substr);
If there is a match with the given rules, return the pointer to the beginning of matched substring within the string. Else return null.
Bonus
If someone can also figure out a generic way of making the tolerance to n instead of 1, then that would be just brilliant.
In that case the signature would be:
char * MatchWithTolerance(const char * str, const char * substr, unsigned int tolerance = 1);
This seems to work, let me know if you find any errors and I'll try to fix them:
int findHelper(const char *str, const char *substr, int mustMatch = 0)
{
if ( *substr == '\0' )
return 1;
if ( *str == '\0' )
return 0;
if ( *str == *substr )
return findHelper(str + 1, substr + 1, mustMatch);
else
{
if ( mustMatch )
return 0;
if ( *(str + 1) == *substr )
return findHelper(str + 1, substr, 1);
else if ( *str == *(substr + 1) )
return findHelper(str, substr + 1, 1);
else if ( *(str + 1) == *(substr + 1) )
return findHelper(str + 1, substr + 1, 1);
else if ( *(substr + 1) == '\0' )
return 1;
else
return 0;
}
}
int find(const char *str, const char *substr)
{
int ok = 0;
while ( *str != '\0' )
ok |= findHelper(str++, substr, 0);
return ok;
}
int main()
{
printf("%d\n", find("xxxdoogyyyy", "dog"));
printf("%d\n", find("xxxdgyyyy", "dog"));
printf("%d\n", find("xxxdigyyyy", "dog"));
}
Basically, I make sure only one character can differ, and run the function that does this for every suffix of the haystack.
This is related to a classical problem of IT, referred to as Levenshtein distance.
See Wikibooks for a bunch of implementations in different languages.
This is slightly different than the earlier solution, but I was intrigued by the problem and wanted to give it a shot. Obviously optimize if desired, I just wanted a solution.
char *match(char *str, char *substr, int tolerance)
{
if (! *substr) return str;
if (! *str) return NULL;
while (*str)
{
char *str_p;
char *substr_p;
char *matches_missing;
char *matches_mismatched;
str_p = str;
substr_p = substr;
while (*str_p && *substr_p && *str_p == *substr_p)
{
str_p++;
substr_p++;
}
if (! *substr_p) return str;
if (! tolerance)
{
str++;
continue;
}
if (strlen(substr_p) <= tolerance) return str;
/* missed due to a missing letter */
matches_missing = match(str_p, substr_p + 1, tolerance - 1);
if (matches_missing == str_p) return str;
/* missed due to a mismatch of letters */
matches_mismatched = match(str_p + 1, substr_p + 1, tolerance - 1);
if (matches_mismatched == str_p + 1) return str;
str++;
}
return NULL;
}
Is the problem to do this efficiently?
The naive solution is to loop over every substring of size substr in str, from left to right, and return true if the current substring if only one of the characters is different in a comparison.
Let n = size of str
Let m = size of substr
There are O(n) substrings in str, and the matching step takes time O(m). Ergo, the naive solution runs in time
O(n*m)
With arbitary no. of tolerance levels.
Worked for all the test cases I could think of. Loosely based on |/|ad's solution.
#include<stdio.h>
#include<string.h>
report (int x, char* str, char* sstr, int[] t) {
if ( x )
printf( "%s is a substring of %s for a tolerance[%d]\n",sstr,str[i],t[i] );
else
printf ( "%s is NOT a substring of %s for a tolerance[%d]\n",sstr,str[i],t[i] );
}
int find_with_tolerance (char *str, char *sstr, int tol) {
if ( (*sstr) == '\0' ) //end of substring, and match
return 1;
if ( (*str) == '\0' ) //end of string
if ( tol >= strlen(sstr) ) //but tol saves the day
return 1;
else //there's nothing even the poor tol can do
return 0;
if ( *sstr == *str ) { //current char match, smooth
return find_with_tolerance ( str+1, sstr+1, tol );
} else {
if ( tol <= 0 ) //that's it. no more patience
return 0;
for(int i=1; i<=tol; i++) {
if ( *(str+i) == *sstr ) //insertioan of a foreign character
return find_with_tolerance ( str+i+1, sstr+1, tol-i );
if ( *str == *(sstr+i) ) //deal with dletion
return find_with_tolerance ( str+1, sstr+i+1, tol-i );
if ( *(str+i) == *(sstr+i) ) //deal with riplacement
return find_with_tolerance ( str+i+1, sstr+i+1, tol-i );
if ( *(sstr+i) == '\0' ) //substr ends, thanks to tol & this loop
return 1;
}
return 0; //when all fails
}
}
int find (char *str, char *sstr, int tol ) {
int w = 0;
while (*str!='\0')
w |= find_with_tolerance ( str++, sstr, tol );
return (w) ? 1 : 0;
}
int main() {
const int n=3; //no of test cases
char *sstr = "dog"; //the substr
char *str[n] = { "doox", //those cases
"xxxxxd",
"xxdogxx" };
int t[] = {1,1,0}; //tolerance levels for those cases
for(int i = 0; i < n; i++) {
report( find ( *(str+i), sstr, t[i] ), *(str+i), sstr, t[i] );
}
return 0;
}