I am trying to solve this challenge:
https://www.hackerrank.com/challenges/structuring-the-document/problem
Basically I have been given a locked stub of code with structs in it and I am supposed to parse a given text. This is an abridged version of my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_CHARACTERS 1005
#define MAX_PARAGRAPHS 5
#include <ctype.h>
struct word {
char* data;
};
struct sentence {
struct word* data;
int word_count;//denotes number of words in a sentence
};
struct paragraph {
struct sentence* data ;
int sentence_count;//denotes number of sentences in a paragraph
};
struct document {
struct paragraph* data;
int paragraph_count;//denotes number of paragraphs in a document
};
struct document get_document(char* text) {
int spaces = 0, periods = 0, newlines = 0;
for(int i = 0; i < strlen(text); i++)
if(text[i] == ' ')
spaces++;
else if(text[i] == '.')
periods++;
else if(text[i] == '\n')
newlines++;
struct document doc;
doc.paragraph_count = newlines + 1;
doc.data = malloc((newlines + 1) * sizeof(struct paragraph));
struct paragraph para[doc.paragraph_count];
for(int i = 0; i < doc.paragraph_count; i++) {
para[i].sentence_count = periods + 1;
para[i].data = malloc((periods + 1) * sizeof(struct sentence));
}
struct sentence sen[para[0].sentence_count];
for(int i = 0; i < para[0].sentence_count; i++) {
sen[i].word_count = spaces + 1;
sen[i].data = malloc((spaces + 1) * sizeof(struct word));
}
struct word word[spaces + periods + 1];
int start = 0, k = 0, wordsub = 0, sensub = 0, parasub = 0, docsub = 0, wordno = 0, parano = 0;
for(int i = 0; i < strlen(text); i++) {
if(text[i] == ' ' || text[i] == '.') {
word[wordsub].data = malloc((i - start) * sizeof(char) + 1);
for(int j = start; j < i; j++)
word[wordsub].data[k++] = text[j];
word[wordsub].data[k++] = '\0';
k = 0;
if(i < strlen(text) - 1 && text[i + 1] == '\n')
start = i + 2;
else
start = i + 1;
if(text[i] == ' ') {
sen[sensub].data[wordno++] = word[wordsub++]; //wordno can be 0 or 1
}
if(i != strlen(text) && isalpha(text[i + 1]) && text[i] == '.') {
sen[sensub].data[wordno++] = word[wordsub++];
wordno = 0;
para[parasub].data[parano++] = sen[sensub++];
}
if((i != strlen(text) && text[i + 1] == '\n') || i + 1 == strlen(text)) {
sen[sensub++].data[wordno++] = word[wordsub];
wordno = 0;
parano = 0;
para[parasub].data[parano++] = sen[sensub];
doc.data[docsub++] = para[parasub++];
}
}
}
printf("%s\n", para[0].data[0].data[0].data);// should print "hello"
return doc;
}
int main() {
struct document doc;
char * text = "hello world.\nhi.bye.\nwow.";
doc = get_document(text);
printf("%s\n", doc.data[0].data[0].data[0].data);//should also print "hello"
}
The problem is the print statements are not printing "hello". Also if I change the indices in the print statements I get a segmentation error.
Here:
word[wordsub].data[k++] = text[j];
you are accessing data member out of allocated memory.
The problem statement specifies that there are never two terminators after a word. There should also be one word at least.
So, the test phrase
"hello world.\nhi.bye.\nwow."
does not fit, but
"hello world\nhi.bye\nwow"
fits and you will have "hello" printed.
Besides, your algorithm is very complex while the code could be simpler. It was fun to try and I did it.
First, let's use some typedef to write less text!
typedef struct word {
char* data;
} W;
typedef struct sentence {
W* data;
int word_count;//denotes number of words in a sentence
} S;
typedef struct paragraph {
S* data ;
int sentence_count;//denotes number of sentences in a paragraph
} P;
typedef struct document {
P* data;
int paragraph_count;//denotes number of paragraphs in a document
} DOC;
Then the function itself. The logic is simple, do all of the following for each char of text in sequence
in case we have any separator (' ', '.' or '\n') record the word
in case we have a separator ('.' or '\n') record the sentence
in case we have a separator ('\n') record a paragraph
The end of the string counts as the end of a paragraph.
Code
struct document get_document(char* text) {
DOC doc = { NULL, 0 }; // you're the doc, doc
P parr = { NULL, 0 };
S sarr = { NULL, 0 };
int wpos=0;
for(int i=0, l=strlen(text) ; i<=l ; i++) { // <= length! (to deal with \0)
char c = text[i];
if ( ! c) c = '\n'; // End of string simulates end of paragraph
if (c == '\n' || c == '.' || c == ' ') {
// End of word, add it to sentence
W word;
word.data = malloc(i - wpos + 1); // +1 for '\0'
strncpy(word.data, text + wpos, i - wpos); // Copy only the word
word.data[i - wpos] = 0; // 0 terminate it
sarr.data = realloc(sarr.data, sizeof(W) * (sarr.word_count+1));
sarr.data[ sarr.word_count++ ] = word;
wpos = i+1;
if (c == '\n' || c == '.') {
// End of sentence, add it to paragraph
parr.data = realloc(parr.data, sizeof(S) * (parr.sentence_count+1));
parr.data[ parr.sentence_count++ ] = sarr;
sarr.data = NULL; // clear sentences
sarr.word_count = 0;
}
if (c == '\n') {
// End of paragraph, add it to doc
doc.data = realloc(doc.data, sizeof(P) * (doc.paragraph_count+1));
doc.data[ doc.paragraph_count++ ] = parr;
parr.data = NULL; // clear paragraphs
parr.sentence_count = 0;
}
}
}
return doc;
}
Finally, to see if that's working, print all members (using a compliant text!)
int main(int argc, char **argv) {
DOC doc;
char * text = "hello world\nhi.bye\nwow";
doc = get_document(text);
for(int i=0 ; i<doc.paragraph_count ; i++) {
printf("Para %d / %d\n", i, doc.paragraph_count-1);
P para = doc.data[i];
for(int j=0 ; j<para.sentence_count ; j++) {
printf("Sent %d / %d\n", j, para.sentence_count-1);
S sent = para.data[j];
for(int k=0 ; k<sent.word_count ; k++) {
W word = sent.data[k];
printf("Word %d / %d: %s\n", k, sent.word_count-1, word.data);
}
}
}
return 0;
}
We could add a bit of code to avoid the processing of two separators (like a trailing '\n', or ' .'
Related
Using the code below it only reads one char and does not convert morse to letter. My idea was to create a string of one morse "letter" and put it in the convert function, however only 1 char is being read since I am only seeing a single 1 printed on the screen after the string itself is printed. The string only consists of '-' , '.' , ' '. I was wondering if anyone knows what the solution might be.
char convertToLetter(M* data, char word[10]) {
int size = 0;
char correct;
while (size < 60)
{
int compare = strcmp(word, data->morse);
if (compare == 0) {
correct = data->letter;
}
data++;
size++;
}
correct = '\0';
return correct;
}
int main(){
//some code here for opening a file.
char curSent[200];
char letter[6] = "";
int i = 0;
char* fullString = (char*)malloc(1000 * sizeof(char));
fullString[0] = '\0';
while (fgets(curSent, 200, inFile) != NULL) {
if (curSent[0] != '\n') {
curSent[strlen(curSent) - 1] = '\0';
strcat_s(fullString,1000, curSent);
}
else {
printf("%s", fullString);
printf("\n\n");
int j = 0;
while (i < strlen(fullString)) {
if (fullString[i] != ' ') {
fullString[i] = letter[j];
i++;
j++;
printf("%d \n", 1);
}else if (fullString[i + 1] == ' ' && fullString[i] == ' ') {
printf("%d", 2);
printf(" %c", convertToLetter(dictionary, letter));
memset(letter, 0, strlen(letter));
j = 0;
i = i + 2;
}else if (fullString[i] == ' ') {
printf("%d", 3);
printf("%c", convertToLetter(dictionary, letter));
memset(letter, 0, strlen(letter));
j = 0;
i = i++;
}
}
memset(fullString, 0, strlen(fullString));
i = 0;
}
}
//printf("%s", fullString);
getchar();
return 0;
}
after a long time spent trying to debug this I've come for your help.
Basically in this exercise I'm trying to read the string "31|Name1;23|Name2;15|Name3" and store it in an array of struct s_perso where the | are marking the end of an age and the beginning of a name, and where the ; are marking the beginning of a new struct.
Here's the given ft_perso.h :
#include <string.h>
#ifndef FT__PERSO__H
#define FT__PERSO__H
typedef struct s_perso
{
char *name;
float life;
int age;
char *profession;
}
t_perso;
#endif
We will only use the datas age and name from this struct s_perso.
Here's my code :
#include "ft_perso.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int numberofstructs(char *str)
{
int i;
int length;
i = 0;
length = 0;
if (str[0])
length = 0;
else
{
while (str[i])
{
if (str[i] == ';')
length += 1;
i++;
}
}
return (length);
}
int get_data_length(char *str, int i)
{
int length;
length = 0;
while (str[i] != '|' && str[i] != ';' && str[i] != '\0')
{
length++;
i++;
}
return (length);
}
char *get_data(char *str, int i)
{
int j;
char *str2;
j = 0;
str2 = (char *)malloc(sizeof(char) * get_data_length(str, i) + 1);
while (str[i] != '|' && str[i] != ';' && str[i] != '\0')
{
str2[j] = str[i];
i++;
j++;
}
str2[j] = '\0';
return (str2);
}
t_perso **ft_decrypt(char *str)
{
int i;
int j;
t_perso **textttt_perso;
i = 0;
j = 0;
textttt_perso = (t_perso **)malloc(sizeof(t_perso **));
*textttt_perso = (t_perso *)malloc(sizeof(t_perso *) * numberofstructs(str));
while (j <= strlen(str) && str[j])
{
if (str[j] == ';')
{
i++;
j++;
}
textttt_perso[i]->age = atoi(get_data(str, j));
j = j + get_data_length(str, j) + 1;
textttt_perso[i]->name = get_data(str, j);
j = j + get_data_length(str, j);
}
textttt_perso[i+1] = 0;
return (textttt_perso);
}
int main(void)
{
int i;
t_perso **tab;
i = 0;
char str[29] = "31|Name1;23|Name2;15|Name3";
tab = ft_decrypt(str);
while(i <= numberofstructs(str))
{
printf("age = %d\n", tab[i]->age);
printf("age = %s\n", tab[i]->.name);
i++;
}
}
From my debugging, I get the segfault error on the second call (when i = 1 and we are working on the substring 23) instruction of t_perso **ft_decrypt(char *str) :
textttt_perso[i]->age = atoi(get_data(str, j));
My guess is that my allocation of memory either for the array of struct in itself or the number of arrays it can contain is wrong. I can't point my finger on the problem tho...
Thanks in advance for your help, have a nice day !
You never allocate space for an actual structure. In your example:
textttt_perso = (t_perso **)malloc(sizeof(t_perso **));
allocates space for one pointer and:
*textttt_perso = (t_perso *)malloc(sizeof(t_perso *) * numberofstructs(str));
allocates enough space for 3 pointers. At some point you need to allocate space for the actual structures.
You also have other issues. In numberofstructs() you have if(str[0]) that will cause length to always be zero. Also in numberofstructs(), you count the semi-colons. If there is data after the last sem-colon you would need to add 1 to length.
You have many other issues in this code that will show up if the data isn't perfect but here is an implementation of ft_decrypt that should work. Initial malloc should be to hold the array of pointers. Then the loop should allocate a structure for each array entry.
t_perso** ft_decrypt(char* str)
{
int i = 0;
int j = 0;
t_perso** textttt_perso;
textttt_perso = malloc(sizeof(*textttt_perso) * numberofstructs(str));
while (j <= strlen(str) && str[j])
{
if (str[j] == ';')
{
i++;
j++;
}
textttt_perso[i] = malloc(sizeof(*textttt_perso[i]));
textttt_perso[i]->age = atoi(get_data(str, j));
j = j + get_data_length(str, j) + 1;
textttt_perso[i]->name = get_data(str, j);
j = j + get_data_length(str, j);
}
return (textttt_perso);
}
This is the challenge I'm trying to solve:
https://www.hackerrank.com/challenges/querying-the-document/
So far I have made progress but I am stuck at a Segmentation Fault and I can't figure out why.
The following is an abridged version of the whole source code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char**** get_document(char* text) {
int spaces = 0, periods = 0, newlines = 0;
for(int i = 0; i != strlen(text); i++) {
if(text[i] == ' ')
spaces++;
else if(text[i] == '.')
periods++;
else if(text[i] == '\n')
newlines++;
}
char** para[periods + 1]; // each paragraph stores that many sentences
char* senten[spaces + 1]; // each sentence stores that many words
char*** doc[newlines + 1]; // each document stores that many paragraphs
int start = 0, k = 0, m, p = 0, x = 0;
for(int i = 0; i != strlen(text); i++) {
if(text[i] == ' ' || text[i] == '.') { // space or period means there was a word before it
senten[k] = (char*) malloc(sizeof(char) * (i - start)); // store each word
m = 0;
for(int j = start; j < i; )
senten[k][m++] = text[j++];
senten[k][m++] = '\0'; // append a '\0' to end the string
if(text[i + 1] == '\n') { // newline means that a new paragraph is starting
para[p] = senten; // store pointer to sentence in para
while(k)
senten[k--] = NULL; // don't need now
start = i + 2;
p++;
}
else
start = i + 1;
k++;
}
if(i == strlen(text) - 1) { // end of file
doc[x++] = para; // store pointer to paragraph in doc
while(p)
para[p--] = NULL; // don't need
}
}
return doc;
}
int main()
{
char* text = "Hello World.\nHi.Bye.";
char**** doc = get_document(text);
printf("%s",doc[0][0][0]); // should print "Hello"
return 0;
}
In short the program is supposed to take a char* and output a char****.
Example:
If char * text = "Hello World.\nHi.Bye.";
Then char **** doc = {{{"Hello","World"}},{{"Hi"},{"Bye"}}};
You return the local variable in get_document function:
char** para[periods + 1]; // each paragraph stores that many sentences
char* senten[spaces + 1];
char*** doc[newlines + 1];
...
return doc;
It's bad idea, Because out of get_document function, the variable doc maybe does not exist.
You should use:
char*** para = malloc((periods + 1) * sizeof(char **));
char** senten = malloc((spaces + 1) * sizeof(char *));
char**** doc = malloc((newlines + 1)*sizeof(char ***));
This is how I modified my code. Now it passes all the test cases.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
char**** get_document(char* newtext) {
int spaces = 0, periods = 0, newlines = 0;
for(int i = 0; i != strlen(newtext); i++) {
if(newtext[i] == ' ')
spaces++;
else if(newtext[i] == '.')
periods++;
else if(newtext[i] == '\n')
newlines++;
}
char*** para = malloc((periods + 1) * sizeof(char **));
char** senten = malloc((spaces + 1) * sizeof(char *));
char**** doc = malloc((newlines + 1) * sizeof(char ***));
int start = 0, k = 0, m, p = 0, x = 0, f = 0, pp = 0;
int pcount = 0;
for(int i = 0; i != strlen(newtext); i++) {
if(newtext[i] == '.') pcount++;
if(newtext[i] == ' ' || newtext[i] == '.') { // space or period means there was a word before it
senten[k] = (char*) malloc(sizeof(char) * (i - start) + 1); // store each word
m = 0;
for(int j = start; j < i; )
senten[k][m++] = newtext[j++];
senten[k][m++] = '\0'; // append a '\0' to end the string
k++;
if(i != strlen(newtext) && newtext[i + 1] == '\n') // newline means that a new paragraph is starting
start = i + 2;
else
start = i + 1;
if(i != strlen(newtext) && isalpha(newtext[i + 1]) && newtext[i] == '.') {
para[p++] = senten;
senten = malloc((spaces + 1) * sizeof(char *));
k = 0;
}
if((i != strlen(newtext) && newtext[i + 1] == '\n') || i + 1 == strlen(newtext)) {
para[p++] = senten;
senten = malloc((spaces + 1) * sizeof(char *));
k = 0;
doc[f++] = &(para[pp]);
pp += pcount;
pcount = 0;
}
}
}
return doc;
}
int main()
{
char* newtext = "Hello World.\nHi.Bye.\nWow.";
char**** doc = get_document(newtext);
printf("%s\n", doc[0][0][0]);
printf("%s\n", doc[0][0][1]);
printf("%s\n", doc[1][0][0]);
printf("%s\n", doc[1][1][0]);
printf("%s\n", doc[2][0][0]);
return 0;
}
I have following function in c code
void analyze_text(char text[]) {
...
for (int i = 0; i < text_length || text[i] != '\0'; i++) {
...
}
}
In main function i would like to pass some string to it. If i do something like this
char text[4000] = "some text here";
analyze_text(text);
this is cool and do the goal, but i would like to have some user input present and I am not sure how to get char[] out of it. I tried following 2 and none of them seemed to work:
char text[4000];
scanf("%s",text);
analyze_text(text);
OR
char text[4000];
int c;
int count=0;
c = getchar();
count = 0;
while ((count < 4000) && (c != EOF)) {
text[count] = c;
++count;
c = getchar();
}
analyze_text(text);
I know that the first one should return pointer to char array, but second one should return char array itself, or not?
Its been like 10 years since i havent been working with c/c++. Can anybody give me some hint please?
update (whole function):
void analyze_text(char text[]) {
int printable_text_length = 0;
int text_length = strlen(text);
int word_count = 0;
int sentence_count = 0;
int in_sentence = 0;
int in_word = 0;
int count[ASCII_SIZE] = { 0 };
for (int i = 0; i < text_length || text[i] != '\0'; i++) {
int c = text[i];
if (!isspace(c)) {
printable_text_length++;
}
if (isalpha(c)) {
in_word = 1;
in_sentence = 1;
count[tolower(c)]++;
}
if (text[i] == ' ' && text[i + 1] != ' ' && in_word==1) {
word_count++;
in_word = 0;
}
if (text[i] == '.' && in_sentence==1) {
sentence_count++;
in_sentence = 0;
}
}
if (in_word == 1) { word_count++; }
if (in_sentence == 1) { sentence_count++; }
char charIndexes[ASCII_SIZE];
for (int i = 97; i <= 122; i++) {
charIndexes[i] = i;
}
for (int i=97; i <= 122; i++) {
for (int j = i + 1; j <= 122; j++) {
if (count[i] > count[j]) {
int temp = count[j];
count[j] = count[i];
count[i] = temp;
int temp2 = charIndexes[j];
charIndexes[j] = charIndexes[i];
charIndexes[i] = temp2;
}
}
}
...printf...
}
The issue with
char text[4000];
scanf("%s",text);
analyze_text(text);
is that scanf identifies space-separated chunks, so you'll only read the first one.
In order to read up to a whole line from the user, try fgets:
char text[4000];
fgets(text, 4000, stdin);
analyze_text(text);
You may want to check the return value of fgets for error detection.
You can use dyanamic array of char to pass it into the function.
Here is the code
#include <stdio.h>
#include <stdlib.h>
void analyze_text(char* text) {
for (int i = 0; text[i] != '\0'; i++) {
printf("%c\n",text[i] );
}
}
int main() {
char* text = (char *)malloc(4000 * sizeof(char));
scanf("%s", text);
analyze_text(text);
return 0;
}
and here is the output with input = 'abhishek'
a
b
h
i
s
h
e
k
remember that strlen in dyanamc array will not give the length of input array.
I'm trying to reverse the letters for words in a sentence. I am also trying to store these words in a new char array. At the moment I getting a runtime error, which for all my tweaking I can not solve. My approach is to create a new char array the same length as the sentence. Then loop through the sentence until I reach a ' ' character. Then loop backwards and add these characters to a word. Then add the word to the new Sentence. Any help would be much appreciated.
int main(void) {
char sentence [] = "this is a sentence";
char *newSentence = malloc(strlen(sentence)+1);
int i,j,start;
start = 0;
for(i = 0; i <= strlen(sentence); i++)
{
if(sentence[i] == ' ')
{
char *word = malloc((i - start)+1);
for(j = sentence[i]; j >= start; j--)
{
word[j] = sentence[j];
}
strcat(newSentence,word);
start =sentence[i +1];
}
}
printf("%s",newSentence);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
char sentence [] = "this is a sentence";
char *newSentence;
int i,j,start, len;
start = 0;
len = strlen(sentence);
newSentence = malloc(len+1);
*newSentence = '\0';
for(i = 0; i <= len; i++)
{
if(sentence[i] == ' ' || sentence[i] == '\0')
{
char *word = malloc((i - start)+1);
int c = 0;
for(j = i - 1; j >= start; j--)
{
word[c++] = sentence[j];
}
word[c]='\0';
strcat(newSentence,word);
if(sentence[i] == ' ')
strcat(newSentence," ");
start = i + 1;
free(word);
}
}
printf("%s",newSentence);
return 0;
}
Logically, here:
j = sentence[i]
start =sentence[i +1];
start and j are index positions in the char array, you are trying to assign a char to them, which screws everything up.
should be:
j= i;
start = i +1;
if your algorithm is right.
Yet another variant of the same...
int main(int argc, const char *argv[])
{
char sentence [] = "this is a sentence";
size_t len = strlen(sentence);
char *newSentence = malloc(len + 1);
char *ptr_src = sentence;
char *ptr_dst = newSentence;
while(ptr_src)
{
char *next, *t;
next = strchr(ptr_src, ' '); // find next space
if (!next) next = sentence + len; // if not found, next = EOL
for (t = next; t > ptr_src;)
{
*ptr_dst++ = *--t;
}
if (*next)
{
*ptr_dst++ = *next++;
ptr_src = next;
}
else
{
*ptr_dst = 0;
break;
}
}
printf("[%s]",newSentence);
return 0;
}
Your program had few bugs. Which I've tried to remove in this program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
char sentence [] = "this is a sentence";
char *newSentence = (char *)malloc(strlen(sentence)+1);
int i,j,start, k;
start = 0;
for(i = 0;; i++)
{
if(sentence[i] == ' ' || sentence[i] == '\0') //sentence[i] == '\0' for the last word.
{
char *word = (char *) malloc((i - start)+1);
for(j = i-1, k = 0; j >= start; j--, k++)
{
word[k] = sentence[j];
}
word[k++] = ' '; //space after each word
word[k] = '\0';
strcat(newSentence,word);
start = i+1;
}
if (sentence[i] == '\0')
break;
}
printf("%s\n",newSentence);
return 0;
}
Check live at http://ideone.com/Z9ogGk
strcat(newSentence,word);
newSentence has to be a string. And a string is a contiguous sequence of characters terminated by and including the first null character
EDIT: this answer has been downvoted 4 times for what is written above. If you think it is incorrect, please explain. Otherwise please remove your downvote.