C Find all instances of a word - c

So I'm trying to find all instances of a word using this section of code
fseek(src, 0, SEEK_END);
int size = ftell(src);
fseek(src, 0, SEEK_SET);
int line = 0;
int i = 0;
char ch = ' ';
char *word;
char lastch = 'x';
int j = 0;
int lines[getLines(src)];
for(i = 0; i < getLines(src); i++){
lines[i] = -1;
}
for(i = 0; i < size; i++){
ch = fgetc(src);
printf("%s %c", word, '\n');
if(!isspace(lastch) && isspace(ch)){
if(strcmp(word, find) == 0){
if(lines[j - 1] != line){
lines[j] = line;
j++;
}
}
}else{
if(isspace(lastch) && isspace(ch)){
continue;
}else if(isspace(lastch) && !isspace(ch)){
word = "";
append(word, ch);
}else if(!isspace(lastch) && !isspace(ch)){
append(word, ch);
}
}
lastch = ch;
}
Where as append() is
char *append(const char *s, char c) {
int len = strlen(s);
char buf[len+2];
strcpy(buf, s);
buf[len] = c;
buf[len + 1] = 0;
return strdup(buf);
}
Could anyone explain to me why the String word does not get any characters added into it?
I'm pretty sure it has to do with the append function, thanks.

So after having read my comment, read my answer as well:
word = "";
append(word, ch);
doesn't do what you think it does. It does not append a character to word itself, instead it appends a character to a writable copy of word. You should really write
} else if(isspace(lastch) && !isspace(ch)) {
word = append("", ch);
} else if(!isspace(lastch) && !isspace(ch)) {
char *tmp = append(word, ch);
free(word);
word = tmp;
}
instead. Also, please use whitespaces and indentation properly, it's hard to read your code.

Related

Removing a newline from the middle of a string using getchar

Right now I have a string looking like this:
A sentence
with a newline.
I'm reading the string in via console input like so:
ch = getchar();
while (ch != '.') {
msg[i] = ch;
i++;
ch = getchar();
}
And, after reading it in, I remove the whitespace present by doing this in a function and applying to the msg char array:
char *remove_white_spaces(char *str) {
int i = 0, j = 0;
while (str[i]) {
if (str[i] != ' ')
str[j++] = str[i];
i++;
}
str[j] = '\0';
return str;
}
I've tried looping over it and stopping at \n but that leaves an output of "Asentence", as the string terminates as the \n is set to 0.
Whole main:
int main(void) {
char msg[MAX_MSG_LEN+1];
char ch;
int i = 0;
ch = getchar();
while (ch != '.') {
msg[i] = ch;
i++;
ch = getchar();
}
msg[i] = '.';
msg[i + 1] = '\0';
remove_white_spaces(msg);
printf("%s\n", msg);
return 0;
}
You can use the isspace function to test for and skip any/all whitespace characters, include the normal space and the newline character(s):
#include <ctype.h> // For definition of "isspace"
char *remove_white_spaces(char *str) {
int i = 0, j = 0;
while (str[i]) {
if (!isspace((unsigned char)(str[i])))
str[j++] = str[i];
i++;
}
str[j] = '\0';
return str;
}
On the reason for casting the argument to isspace to an unsigned char, see this discussion.
Function removing and replacing any chars in the string.
toRemove - chars to remove
addSpace - replace with space
allowMultiple - allow multiple spaces when replacing more adjanced
characters
allowEdges - allow adding spaces at the from and at the end
char *removeChars(char *str, const char *toRemove, const int addSpace, const int allowMultiple, int const allowEdges)
{
char *rd = str, *wr = str;
int replaced = 0;
if(rd)
{
while(*rd)
{
if(strchr(toRemove, *rd))
{
if(addSpace)
{
if(!replaced || allowMultiple)
{
if(wr != str || (wr == str && allowEdges))
{
*wr++ = ' ';
replaced = 1;
}
}
}
}
else
{
*wr++ = *rd;
replaced = 0;
}
rd++;
}
if(allowEdges) *wr = 0;
else
while((wr - 1) > str)
{
if(*(wr - 1) == ' ') {*(wr - 1) = 0; wr--;}
else break;
}
}
return str;
}
int main(void)
{
char str[] = "%%%%%A sentence\n\n\nwith!##$%^a newline.%%%%%%%";
printf("`%s`\n", removeChars(str,"\n!##$%^", 1, 0, 0));
}
Following the suggestion of #MarkBenningfield I did the following and checked for '\n' and just replaced it with a space.
while (ch != '.') {
msg[i] = ch;
i++;
ch = getchar();
if (ch == '\n') {
ch = ' ';
}
}

How to parse bigger amount of words?

I have a program, which receives filename as an input, saves file contents into 2d char array and then outputs words. It works absolutely fine for about 400 words, but then, when I add more words, it crashes. Debugging showed that i am trying to access unused address, and I don't understand how is that possible considering that previous tests with lesser amount of words were successful.
The question is: what am i missing here?
FILE: functions.c
#include "Lab10.h"
#include <stdio.h>
#include <malloc.h>
char** parser(char* filename) {
FILE* fp;
fp = fopen(filename, "r");
char** str = (char**)calloc(N, sizeof(char*) * N);
if (!str)
{
printf("\n Allocation error");
return NULL;
}
char ch;
int space = 0, words = 0;
for (int i = 0; !feof(fp); i++) // Memory allocation
{
ch = fgetc(fp);
if (!is_ch(ch))
{
if (i != space)
{
if (!(str[words] = (char*)calloc(i - space, sizeof(char) * (i - space))))
{
printf("\n Allocation error");
return NULL;
}
words++;
}
while (!is_ch(ch) && !feof(fp))
{
ch = fgetc(fp);
i++;
}
if(!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
i--;
space = i;
}
}
fseek(fp, 0, SEEK_SET);
for (int i = 0; i < words; i++) // Copying words into 2d array
{
while (!is_ch(fgetc(fp)));
if (!feof(fp))
fseek(fp, -(int)sizeof(char), 1);
int j = 0;
do {
if (((fscanf(fp, "%c", &str[i][j])) != 1))
break;
j++;
} while (is_ch(str[i][j-1]) && !feof(fp));
}
return str;
}
int is_ch(char ch)
{
return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'));
}
FILE: main.c
#define _CRT_SECURE_NO_WARNINGS
#include "Lab10.h"
#include <stdio.h>
#include <stdlib.h>
int main() {
char* filename = (char*)malloc(sizeof(char) * N);
if (!scanf("%s", filename) || filename == 0)
{
printf("\n Incorrect filename input");
return -1;
}
char** str = parser(filename);
printf("\n Contents of .txt file:");
for (int i = 0; str[i] != NULL; i++) {
printf("\n\t%d) ", i+1);
for (int j = 0; is_ch(str[i][j]); j++) {
printf("%c", str[i][j]);
}
}
return 0;
}
This answer was posted as a reply to one of the comments below the question itself. I tried writing readWord function, which recieves filepointer, reads one word and then returns pointer to the resulting array - that's eases the procedure, making it less complex. It works almost like fgets(), but it reads till non-character, instead of a newline
readWord function itself:
char* readWord(FILE* fp) {
char ch = 0;
while (!is_ch(ch))
{
ch = fgetc(fp);
if (ch == EOF || !ch)
return NULL;
}
int size = 1;
while (is_ch(ch))
{
if ((ch = fgetc(fp)) == EOF || !ch)
break;
size++;
}
fseek(fp, -(size * (int)sizeof(char)), 1);
if (ch != EOF || !ch)
size--;
char* word = (char*)calloc(size, sizeof(char) * size + 1);
if (!word)
{
printf("\n Allocation error.");
return NULL;
}
for (int i = 0; i < size; i++)
word[i] = fgetc(fp);
word[size] = '\0';
return word;
}
That's how i use it in main():
FILE* fp = fopen("test.txt", "r");
char* word;
while ((word = readWord(fp)) != NULL)
{
for (int i = 0; word[i] != '\0'; i++)
printf("%c", word[i]);
printf(" ");
}
Is there is anything i need to improve here? It works fine, but is it possible to somehow make it better?

How to split the string into subsctring without using strtok?

In this code, the string is split by the difference of the space. I could do that through strtok but I didn't. I just want to know that how can split the strings by assigning tokens to them, like if I want to print the first token then it should print the first word from the string. Similarly, if I want to print the second word then it should print the second word after the first space occurred and so on.
int main(){
char inputString[100], words[10][10];
int indexCtr = 0, wordIndex = 0, totalWords = 0;
printf("Input a string: ");
fgets(inputString, sizeof(inputString), stdin);
for(indexCtr = 0; indexCtr <= strlen(inputString); indexCtr++){
if(inputString[indexCtr] == ' ' || inputString[indexCtr] == '\0'){
words[totalWords][wordIndex] = '\0';
totalWords++;
wordIndex = 0;
}
else{
words[totalWords][wordIndex] = inputString[indexCtr];
wordIndex++;
}
}
printf("\nWords from the string are:\n");
for(indexCtr = 0; indexCtr < totalWords; indexCtr++){
printf("%s\n", words[indexCtr]);
}
return 0;
}
as an idea with strncpy(...):
char input[100] = " 1 2 3 4 5 "
"one two three four five";
char words[10][10] = { 0 };
size_t w_counter = 0;
for (size_t i = 0; i < strlen (input); i++) {
while (input[i] != '\0' && isspace (input[i])) {
i++;
}
char* start = &input[i];
while (input[i] != '\0' && !isspace (input[i])) {
i++;
}
strncpy (words[w_counter++],
start,
&input[i] - start);
}
//output
for (size_t i = 0; i < w_counter; i++){
puts (words[i]);
}

Reading In txt File and Extracting Elements from Input Problem

I have been attempting to fix this for hours. I am trying to read a file called wiktionary called "stuff.txt" which has input of the form:
10000
5627187200 the
3395006400 of
2994418400 and
2595609600 to
1742063600 in
...
The 10000 represents the number of lines. I am then reading it in using a function read_in_terms, and extracting the number (weight) and the word (word) from it. I have tested my get_word and get_weight functions seperately and they seem to be working. However, when I am reading in, my get_word function is not returning the word .... the get_weight number is...
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char* concat(const char *s1, const char *s2) {
char *result = malloc(strlen(s1) + strlen(s2) + 1);
strcpy(result, s1);
strcat(result, s2);
return result;
}
// from a string of the form " 29480398934 the" it returns number
long get_weight(char *string) {
long length = strlen(string);
char number[200];
int count = 0;
// Here I am cycling through until X__X combination
// I only add to the number if it is not " "
for (int i = 0; i < length; i++) {
if (string[i-1] != ' ' && string[i] == ' ' && string[i+1] == ' ' && string[i+1] != ' ') {
break;
} else if (string[i] != ' '){
number[count] = string[i];
count++;
}
}
number[count] = '\0';
return atol(number); // converts it to a long and then returns
}
// from a string of the form " 453495834 word" it returns word
char * get_word(char* string) {
long length = strlen(string);
char word[200];
int count = 0;
// the double space detection only works once we reach a character
// this becomes 1 once that happens, allowing us to proceed to the next part
int blocker = 0;
for (int i = 0; i < length; i++) {
if (string[i] != ' '){
blocker++;
}else if (blocker > 0 && string[i] == ' ' && string[i+1] == ' ') {
int int_count = i+1;
while (string[int_count] != '\0') {
int_count++;
word[count] = string[int_count];
count++;
}
}
}
word[count] = '\0';
// now I need to copy this string into malloc
char *str = (char *)malloc(count*sizeof(char));
strcpy(str, word);
return str;
}
void read_in_terms(int *pnterms, char *filename) {
char part1[] = "/Users/adammartinez/Desktop/";
char* title = concat(part1,filename);
FILE *fp = fopen(title, "r");
char line[200];
fgets(line, sizeof(line), fp);
int num_lines = atoi(line);
for (int i = 0; i < num_lines; i++){
fgets(line, sizeof(line), fp);
printf("%ld \n", get_weight(line));
printf("%s \n", get_word(line)); // NOT RETURNING WORDS
}
}
int main(void) {
//char* test_str = " 547584758475 the";
//printf("%s %ld", get_word(test_str), get_weight(test_str));
int terms = 1000;
int *pointer = &terms;
read_in_terms(pointer, "stuff.txt");
return 0;
}
It seems to work fine on my computer. Please look at the output below and tell me if it's what you're looking for. I set part1 to "" and I changed the number of lines in stuff.txt to 5.
5627187200
the
3395006400
of
2994418400
and
2595609600
to
1742063600
in

Splitting a string to an array of strings

I'm trying to split a sentence the user inputs to an array of words so I can later manipulate the words separately as strings.
The code is compiling but prints only garbage after the user input.
I tried debugging but don't see the problem. Can someone help me fix it?
#include <stdio.h>
#include <string.h>
int main() {
char str[1000];
int i = 0;
char rev[1000][1000];
int r = 0;
puts("Enter text:");
gets(str);
int k, length = 0;
printf_s("So the words are:\n");
while (str[i] != '\0') {
if (str[i] == ' ') {
k = i - length;
do {
rev[r][k] = (str[k]);
k++;
} while (str[k] != ' ');
printf(" ");
length = (-1);
r++;
} else
if (str[i + 1] == '\0') {
k = i - length;
do {
rev[r][k] = (str[k]);
k++;
} while (str[k] != '\0');
length = 0;
r++;
}
length++;
i++;
}
for (int r = 0; r < 1000; r++)
printf("%s ", rev[r]);
return 0;
}
fix like this
#include <stdio.h>
int main(void) {
char str[1000];
char rev[1000][1000];
puts("Enter text:");
fgets(str, sizeof str, stdin);//Use fgets instead of gets. It has already been abolished.
int r = 0;
int k = 0;
for(int i = 0; str[i] != '\0'; ++i){
if (str[i] == ' ' || str[i] == '\n'){//is delimiter
if(k != 0){
rev[r++][k] = '\0';//add null-terminator and increment rows
k = 0;//reset store position
}
} else {
rev[r][k++] = str[i];
}
}
if(k != 0)//Lastly there was no delimiter
rev[r++][k] = '\0';
puts("So the words are:");
for (int i = 0; i < r; i++){
printf("%s", rev[i]);
if(i < r - 2)
printf(", ");
else if(i == r - 2)
printf(" and ");
}
return 0;
}
Replace you declaration
char rev[1000][1000];
with
char * rev[1000]; // We will need pointers only
int i = 0; // Index to previous array
and all your code after
puts( "Enter text:" );
with this:
fgets( str, 998, stdin ); // Safe way; don't use gets(str)
const char delim[] = ",; "; // Possible delimiters - comma, semicolon, space
char *word;
/* Get the first word */
word = strtok( str, delim );
rev[i++] = word;
/* Get the next words */
while( word != NULL )
{
word = strtok( NULL, delim );
rev[i++] = word;
}
/* Testing */
for (int r = 0; r < i - 1; r++)
printf( "%s\n", rev[r] );
return 0
}
As you can see, all dirty work is done with the strtok() function ("string to tokens") which walks through other and other words ("tokens"), recognizing them as delimited by one or more characters from the string delim.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int count_spaces(char *str)
{
if (str == NULL || strlen(str) <= 0)
return (0);
int i = 0, count = 0;
while (str[i])
{
if (str[i] == ' ')
count++;
i++;
}
return (count);
}
int count_char_from_pos(char *str, int pos)
{
if (str == NULL || strlen(str) <= 0)
return 0;
int i = pos, count = 0;
while (str[i] && str[i] != ' ')
{
count++;
i++;
}
return count;
}
char **get_words(char *str)
{
if (str == NULL || strlen(str) <= 0)
{
printf("Bad string inputed");
return NULL;
}
int i = 0, j = 0, k = 0;
char **dest;
if ((dest = malloc(sizeof(char*) * (count_spaces(str) + 1))) == NULL
|| (dest[0] = malloc(sizeof(char) * (count_char_from_pos(str, 0) + 1))) == NULL)
{
printf("Malloc failed\n");
return NULL;
}
while (str[i])
{
if (str[i] == ' ') {
dest[j++][k] = '\0';
if ((dest[j] = malloc(sizeof(char) * (count_char_from_pos(str, i) + 1))) == NULL)
{
printf("Malloc failed\n");
return NULL;
}
k = 0;
}
else {
dest[j][k++] = str[i];
}
i++;
}
dest[j][k] = 0;
dest[j + 1] = NULL;
return dest;
}
int main(void) {
char *line = NULL;
size_t n = 0;
getline(&line, &n, stdin);
printf("%s\n", line);
line[strlen(line) - 1] = 0;
printf("%s\n", line);
char **tab = get_words(line);
int i = 0;
while (tab[i])
{
printf("%s\n", tab[i++]);
}
}
here is a long but fully working example
get the user input
then send it to get_words function. It will get the number of words, the number of characters for each words, allocate everything in memory and writes chars then return it. You get a char ** and prints it just tested it it works
If you wish to split a string into an array of strings, you should consider the strtok function from #include <string.h>. The strtok function will the split the string on the given delimiter(s). For your case, it would the " ".
Using the strtok example from Tutorials Point:
#include <string.h>
#include <stdio.h>
int main(){
char str[80] = "This is - www.tutorialspoint.com - website";//The string you wish to split
const char s[] = "-";//The thing you want it to split from. But there is no need to this.
char *token;//Storing the string
/* get the first token */
token = strtok(str, s);//Split str one time using the delimiter s
/* walk through other tokens */
while( token != NULL )
{
printf( " %s\n", token );//Print the string
token = strtok(NULL, s);//Split the string again using the delimiter
}
return(0);
}

Resources