Substring in C Programming - c

I just want to extract the particular word from the string.
My program is:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFFER_SIZE 100
int main() {
FILE *f;
char buffer[100];
char buf[100];
int count=0;
char res[100];
f=fopen("1JAC.pdb","rb");
while(fgets(buffer,BUFFER_SIZE,f))
{
if(strncmp(buffer,"ATOM",4)==0 && strncmp(buffer+13,"CA",2)==0 && strncmp(buffer+21,"A",1)==0)
{
strcpy(buf,buffer);
}
printf (buf);
Output of the program is
ATOM 1033 CA LEU A 133 33.480 94.428 72.166 1.00 16.93 C
I just want to extract the word "LEU" using substring. I tried something like this:
Substring(17,3,buf);
But it doesn't work...
Could someone please tell about the substring in C.

Memcpy seems to be best way to do this ...
memcpy( destBuff, sourceBuff + 17, 3 );
destBuff[ 3 ] = '\0';
Please remember to add the null terminators if needed (as I have done in the example).
Also this has been answered before, several times on Stack-overflow
(Get a substring of a char*)

//Use the following substring function,it will help you.
int main(int argc, char *argv[])
{
FILE *filepointer;
char string[1700];
filepointer=fopen("agg.txt", "r");
if (filepointer==NULL)
{
printf("Could not open data.txt!\n");
return 1;
}
while (fgets(string, sizeof(string), filepointer) != NULL)
{
char* temp=substring(string,17,3);/*here 17 is the start position and 3 is the length of the string to be extracted*/
}
return 0;
}
char *substring(char *string, int position, int length)
{
char *pointer;
int c;
pointer = (char*) malloc(length+1);
if (pointer == NULL)
{
printf("Unable to allocate memory.\n");
exit(1);
}
for (c = 0 ; c < length ; c++)
{
*(pointer+c) = *(string+position-1);
string++;
}
*(pointer+c) = '\0';
return pointer;
}

char out[4] = {0};
strncpy(out, buf+17, 3);

Related

Longest word in file

My program needs to print longest word which contains only letters from a file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
int checkString(const char s[]) {
unsigned char c;
while ((c = *s) && (isalpha(c) || isblank(c)))
++s;
return *s == '\0';
}
int main() {
char file_name[]="document.txt";
FILE *fp = fopen(file_name, "r");
char *largest = str;
int largest_len = 0;
while (fgets(file_name, 1000, fp) != NULL) {
char *temp = strtok(file_name, " ");
while (temp != NULL) {
if (strlen(temp) > largest_len) {
strcpy(largest, temp);
largest_len = strlen(largest);
}
temp = strtok(NULL, "\",.,1,2,4,5,6,7,8,9 ");
}
}
if(checkString(largest))
printf("%s", largest);
fclose(fp);
return 0;
}
In my code, if the largest word contains only letters it will be printed. How to modify this code to check next words if the largest doesn't contain only letters?
First of all, you cannot store the pointer to longest word like that. You re-use str for the next line and so the pointer is not likely to point to something useful.
Second, while strtok() appears simple, initially, I tend to apply a straightforward approach to a straightforward problem.
The problem is O(n) (where n is the length of the document). You just need to go through it character by character. Of course, since every line is ended by a \n, you can use the line based approach in this case.
So, instead of strtok, simply check each character, if it is a legal word character (an alphanumeric character, that is). You can easily do so with the standard library function isalpha() from header ctype.h.
Below is the program, copying the longest string into a dedicated buffer, using isalpha() and doing the line based reading of the file, just like the code in the original question did.
Of course, this code assumes, no line is ever longer than 999 characters.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <ctype.h>
static size_t gulp(const char* line, size_t istart, size_t len) {
size_t n = 0;
for (size_t i = istart; i < len; i++, n++) {
if (!isalpha(line[i])) {
break;
}
}
return n;
}
int main(int argc, const char * argv[]) {
FILE* f = fopen("document.txt","r");
char line[1000];
char longest_word[1000];
size_t longest_word_length = 0;
while (fgets(line, sizeof(line), f) != NULL) {
size_t i0 = 0;
size_t line_length = strlen(line);
while (i0 < line_length) {
if (isalpha(line[i0])) {
size_t n = gulp(line, i0, line_length);
if (n > longest_word_length) {
strncpy(longest_word, &line[i0], n);
longest_word[n] = '\0';
longest_word_length = n;
}
i0 = i0 + n;
} else {
i0++;
}
}
}
fclose(f);
f = NULL;
if (longest_word_length > 0) {
printf("longest word: %s (%lu characters)\n",
longest_word, longest_word_length);
}
return 0;
}
There are a number of problems here:
you use the same buffer (str) for two different uses: as a read buffer and to store the longest word. If you find the largest word in the first line, the word will be erased when reading the second line. Furthemore, if you find a rather long word at the beginning of a line, the strings pointed to by largest and temp could overlap which leads to undefined behaviour => use a different array or strdup (and free) for largest
you only use the space as possible separator. You should wonder whether you should add tab and/or punctuations
once you have got a word you should ensure that it only contains valid letters before testing its length and ignore it if for example it contains digits.
if a single line can be longer than 1000 characters, you should wrap the end of the current part before the beginning of the next one for the possible case where a long word would be splitted there.
For additional corner case processing, you should specify what to do if a word contains illegal characters but only at one side. For example if . is not used as a word delimiter, a word with an embedded . like "a.b" should be ignored, but a terminating . should only be stripped (like "example." should become "example"
I think the order you do things should be a bit different, here is an example
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
int isCandidate(char* word);
int main(int argc, char* argv[])
{
if (--argc == 0)
{
perror("not enough command line arguments, expecting a filename");
return -1;
}
++argv;
FILE* fp = fopen(*argv, "r");
if (fp == NULL)
{
perror(*argv);
return -1;
}
// get size of file
fseek(fp, 0L, SEEK_END);
long fileLength = ftell(fp);
if (fileLength < 1)
{
perror("file is empty");
return -1;
}
fseek(fp, 0L, SEEK_SET); // position file pointer at the beginning again
// allocate space for the whole file and then read it in
// for a text file it should be OK to do so since they
// normally are not that large.
char* buffer = malloc(fileLength+1);
if (fread(buffer, 1, fileLength, fp) != 0)
{
buffer[fileLength] = '\0'; // make sure the buffer ends with \0
}
else
{
perror("Failed reading into buffer");
return -1;
}
fclose(fp); // we are done with the file
const char filter[] = " \n\r";
char* longestWord = malloc(fileLength+1); // max length in theory
long unsigned int maxLength = 0;
for (char* token = strtok(buffer, filter); token != NULL; token = strtok(NULL, filter))
{
if (isCandidate(token))
{
if (strlen(token) > maxLength)
{
strcpy(longestWord, token);
maxLength = strlen(token);
}
}
}
printf("Longest word:'%s', len=%lu\n", longestWord, maxLength);
free(longestWord);
free(buffer);
}
int isCandidate(char* word)
{
if (word == NULL)
{
perror("invalid argument to isCandidate");
return 0;
}
for (char* ch = word; *ch; ++ch)
{
if (!isalpha(*ch)) return 0;
}
return 1;
}

Is there a way combine a specific number of chars from fgetc?

I have found some information about strcat and experimented with it but it doesn't work the way i expected for example :
char a = 'a', b = 'b';
strcat(a,b);
printf("%c", a);
this will produce an error "initialization of 'char' from 'char *' makes integer from pointer without a cast". Is there a way to unite chars until the wanted word is complete and store it in 1 variable? Or am i going completely wrong about this. The purpose of the code is to read an xml file and build a tree with the tags.
Any help is or advice is very much appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
int count = 0;
char c, word;
FILE *file = fopen("example.xml", "r");
if (file == NULL) {
return 0;
}
do {
c = fgetc(file);
if (c == '<') {
count = 1;
}
if (c == '>') {
count = 0;
printf(">");
}
if (count == 1) {
printf("%c", c);
}
if (feof(file)){
break ;
}
} while(1);
fclose(file);
return(0);
}
I'm not sure exactly what you're trying to accomplish, but you could try something like the following, which will print every <tag>, i.e., every string in the file between <...>'s , and will also accumulate them in an array of strings called tags[]. And note that you'd might want to add checks that avoid going over the 99 chars/tag and 999 tags total. But if this isn't anything like what you're actually trying to do, maybe clarify the question.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
int ntags=0, ichar=0,nchars=0;
char c='\000', tags[999][99];
FILE *file = fopen("example.xml","r");
if (file==NULL) return(0);
while((ichar=fgetc(file))!=EOF) {
c = (char)ichar;
if (nchars==0 && c!='<') continue;
tags[ntags][nchars++] = c;
if (c=='>') {
tags[ntags][nchars] = '\000';
printf("tag#%d = %s\n",ntags+1,tags[ntags]);
nchars=0; ntags++; }
}
/* do you now want to do anything with your tags[] ??? */
fclose(file);
return(0);
}
You are trying to use a function, those parameters are char *
char *strcat(char *dest, const char *src)
but you gave strcat a char but it wants a char*
int main()
{
char str1[20] = "this";
char str2[] = "is";
strcat(str1, str2);
printf("%s", str1);
return 0;
}
this is the way i thinkt you want it

How to split with multiple delimiters in C

I have this line of text:
32+-#3#2-#3#3
I need to separate numbers from each other. So basically the result would be like this:
3
2+-
3
2-
3
3
This is my code but it's not working properly because I have numbers with two digits:
#include <stdio.h>
#include <string.h>
int main(void) {
char string[50] = "32-#3#2-#3#3";
// Extract the first token
char *token = strtok(string, "#");
// loop through the string to extract all other tokens
while (token != NULL) {
printf(" %s\n", token); //printing each token
token = strtok(NULL, "#");
}
return 0;
}
You can't do it with strtok (alone), because there is no delimiter between the numbers you want to split. It's easier without strtok, just print what you want printed and add a separator unless a character which belongs to the token follows:
#include <stdio.h>
int main()
{
char string[] = "32+-#3#2-#3#3";
for (char *token = string; *token; ++token)
if ('0'<=*token && *token<='9' || *token=='+' || *token=='-')
{
putchar(*token);
if (token[1]!='+' && token[1]!='-') putchar('\n');
}
}
If you consider this too easy, you can use a regular expression to match the tokens:
#include <stdio.h>
#include <regex.h>
int main()
{
char *string = "32+-#3#2-#3#3";
regex_t reg;
regcomp(&reg, "[0-9][+-]*", 0);
regmatch_t match = {0};
while (regexec(&reg, string+=match.rm_eo, 1, &match, 0) == 0)
printf("%.*s\n", (int)(match.rm_eo-match.rm_so), string+match.rm_so);
}
There is a simple way to achieve this, but in C is a bit more complicated since we don't have vector as in C++ but I can suggest a pure C implementation which can be improved:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void split_ss(const char* src,const char* pattern, char** outvec, size_t* outsize)
{
const size_t pat_len = strlen(pattern);
char* begin = (char*) src;
const char* next = begin;
if ((begin = strstr((const char*)begin, pattern)) != 0x00) {
unsigned int size = begin - next;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec , next, size);
outvec++;
(*outsize)+=1;
split_ss(begin+pat_len, pattern, outvec, outsize);
} else {
unsigned int size = &src[strlen(src)-1] - next + 1;
*outvec = malloc(sizeof(char) * size);
memcpy(*outvec, next, size);
(*outsize) += 1;
}
}
int main()
{
char* outdata[64] = {0};
size_t size, i=0;
split_ss("32+-#3#2-#3#3", "#", outdata, &size);
for(i=0; i < size; i++) {
printf("[%s]\r\n", outdata[i]);
}
// make sure to free it
return 0;
}
strstr is used to split by string rather than a character. Also output is a poorman 2D array with out size to iterate it and don't forget to free it.
strtok() is not the right tool for you purpose... As a matter of fact strtok() is rarely the right tool for any purpose because of its tricky semantics and side effects.
A simple loop will do:
#include <stdio.h>
int main(void) {
char string[50] = "32+-#3#2-#3#3";
for (char *p = string; *p; p++) {
if (*p == '#')
continue;
putchar(*p);
while (p[1] == '+' || p[1] == '-')
putchar(*++p);
putchar('\n');
}
return 0;
}

I need to read a file until "over" is encountered [duplicate]

This question already has an answer here:
C: How to compare two strings? [duplicate]
(1 answer)
Closed 9 years ago.
I have trouble reading a file until a word is encountered, this is what I have done but I don't think the if statement allows strings to be written within them
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
int main()
{
int i;
char buffer[100];
FILE *fptr = fopen("testing.txt", "r");
if(fptr != NULL)
printf("file opened successfully\n");
else {
printf("file error occured\n");
printf("terminating program...\n");
return 0;
}
while (fgets(buffer, 100,fptr))
{
if(buffer != "over") {
printf("%s ", buffer);
}
else
return 0;
}
}
When you do
if(buffer != "over")
you compare two pointers, the pointer to buffer and the pointer to the string literal "over". Those pointers will never be the same.
To compare strings in C you have to use the strcmp function.
For comparing string you need to use strcmp() function. You can't do directly by if(buffer != "over"). It compares pointers instead of stings.
#include <stdio.h>
#include <string.h>
int isEndWith(const char *string, const char *word){
int len = strlen(string);
int lenw = strlen(word);
if(len >= lenw)
return strncmp(string + len - lenw, word, lenw)==0;//or use strcmp
else
return 0;
}
int main(void){
char str1[] = "how are you over i am busy over\n";
char str2[] = "how are you over i am busy over\n";
char *p;
if(p=strchr(str1, '\n'))
*p = '\0';//chomp \n
if(!isEndWith(str1, "over"))//check case end string
printf("%s", str1);
else {
int len = strlen(str1);
str1[len - 4] = '\0';// 4 : strlen("over");
printf("%s\n", str1);//print "how are you over i am busy \n"
}
if(p=strstr(str2, "over"))//check case contain string
*p = '\0';
printf("%s\n", str2);//print "how are you \n"
return 0;
}

How can I stop scanf-ing input after a certain character?

I'm working on a function that takes filepaths and dices them up into smaller sections.
For example, if the input parameter was "cd mypath/mystuff/stack/overflow/string", I want to be able to return "cd" "mypath", "mystuff", "stack", "overflow", and "string" in succession.
While I could simply continually use "getchar", appending the results to an ever-increasing string, stopping when getchar returns a '/', I feel like there must be a more elegant way to achieve the same functionality.
Any ideas?
You can use the char * strtok ( char * str, const char * delimiters ); using / as separator.
An example here:
http://www.cplusplus.com/reference/clibrary/cstring/strtok/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char* argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s path\n", argv[0]);
exit(EXIT_FAILURE);
}
char* saveptr = NULL;
for (char* str = argv[1]; ; str = NULL) {
char *token = strtok_r(str, "/", &saveptr);
if (token == NULL)
break;
printf("%s\n", token);
}
return 0;
}
Example
clang -Wall *.c && ./a.out mypath/mystuff/stack/overflow/string
mypath
mystuff
stack
overflow
string
Here's an example of how sscanf() can stop after a certain character.
sscanf("abcd/efgh", "%[^/]", &buf);
printf("%s\n", buf);
Should produce
abcd
EDIT: You could try something like this to advance sscanf() input. I have not tested this for various edge cases, but it should get the idea across.
char *str = "abcd/efgh/ijk/xyz";
while (sscanf(str, "%[^/]%n", &buf, &n)) {
printf("%s\n", buf);
str += n;
if (*str == '\0')
break;
++str;
}
should produce
abcd
efgh
ijk
xyz
Here is an example using regcomp, regexec. Compile and run it with the first arg being the character you are searching on, while the second arg is the string to search.
For example, a.out X abcXdefXghiXjkl will print abc def ghi jkl on separate lines.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <regex.h>
int
main(int argc, char *argv[]) {
int len;
char *cp;
char *token;
regex_t preg;
regmatch_t match;
if (regcomp(&preg, argv[1], REG_EXTENDED) != 0) {
return 0;
}
for (cp = argv[2]; *cp != '\0'; cp += len) {
len = (regexec(&preg, cp, 1, &match, 0) == 0) ? match.rm_eo : strlen(cp);
token = malloc(len);
strncpy(token, cp, len);
printf("%s\n", token);
}
return 0;
}

Resources