This is the challenge I'm trying to solve:
https://www.hackerrank.com/challenges/querying-the-document/
So far I have made progress but I am stuck at a Segmentation Fault and I can't figure out why.
The following is an abridged version of the whole source code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char**** get_document(char* text) {
int spaces = 0, periods = 0, newlines = 0;
for(int i = 0; i != strlen(text); i++) {
if(text[i] == ' ')
spaces++;
else if(text[i] == '.')
periods++;
else if(text[i] == '\n')
newlines++;
}
char** para[periods + 1]; // each paragraph stores that many sentences
char* senten[spaces + 1]; // each sentence stores that many words
char*** doc[newlines + 1]; // each document stores that many paragraphs
int start = 0, k = 0, m, p = 0, x = 0;
for(int i = 0; i != strlen(text); i++) {
if(text[i] == ' ' || text[i] == '.') { // space or period means there was a word before it
senten[k] = (char*) malloc(sizeof(char) * (i - start)); // store each word
m = 0;
for(int j = start; j < i; )
senten[k][m++] = text[j++];
senten[k][m++] = '\0'; // append a '\0' to end the string
if(text[i + 1] == '\n') { // newline means that a new paragraph is starting
para[p] = senten; // store pointer to sentence in para
while(k)
senten[k--] = NULL; // don't need now
start = i + 2;
p++;
}
else
start = i + 1;
k++;
}
if(i == strlen(text) - 1) { // end of file
doc[x++] = para; // store pointer to paragraph in doc
while(p)
para[p--] = NULL; // don't need
}
}
return doc;
}
int main()
{
char* text = "Hello World.\nHi.Bye.";
char**** doc = get_document(text);
printf("%s",doc[0][0][0]); // should print "Hello"
return 0;
}
In short the program is supposed to take a char* and output a char****.
Example:
If char * text = "Hello World.\nHi.Bye.";
Then char **** doc = {{{"Hello","World"}},{{"Hi"},{"Bye"}}};
You return the local variable in get_document function:
char** para[periods + 1]; // each paragraph stores that many sentences
char* senten[spaces + 1];
char*** doc[newlines + 1];
...
return doc;
It's bad idea, Because out of get_document function, the variable doc maybe does not exist.
You should use:
char*** para = malloc((periods + 1) * sizeof(char **));
char** senten = malloc((spaces + 1) * sizeof(char *));
char**** doc = malloc((newlines + 1)*sizeof(char ***));
This is how I modified my code. Now it passes all the test cases.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
char**** get_document(char* newtext) {
int spaces = 0, periods = 0, newlines = 0;
for(int i = 0; i != strlen(newtext); i++) {
if(newtext[i] == ' ')
spaces++;
else if(newtext[i] == '.')
periods++;
else if(newtext[i] == '\n')
newlines++;
}
char*** para = malloc((periods + 1) * sizeof(char **));
char** senten = malloc((spaces + 1) * sizeof(char *));
char**** doc = malloc((newlines + 1) * sizeof(char ***));
int start = 0, k = 0, m, p = 0, x = 0, f = 0, pp = 0;
int pcount = 0;
for(int i = 0; i != strlen(newtext); i++) {
if(newtext[i] == '.') pcount++;
if(newtext[i] == ' ' || newtext[i] == '.') { // space or period means there was a word before it
senten[k] = (char*) malloc(sizeof(char) * (i - start) + 1); // store each word
m = 0;
for(int j = start; j < i; )
senten[k][m++] = newtext[j++];
senten[k][m++] = '\0'; // append a '\0' to end the string
k++;
if(i != strlen(newtext) && newtext[i + 1] == '\n') // newline means that a new paragraph is starting
start = i + 2;
else
start = i + 1;
if(i != strlen(newtext) && isalpha(newtext[i + 1]) && newtext[i] == '.') {
para[p++] = senten;
senten = malloc((spaces + 1) * sizeof(char *));
k = 0;
}
if((i != strlen(newtext) && newtext[i + 1] == '\n') || i + 1 == strlen(newtext)) {
para[p++] = senten;
senten = malloc((spaces + 1) * sizeof(char *));
k = 0;
doc[f++] = &(para[pp]);
pp += pcount;
pcount = 0;
}
}
}
return doc;
}
int main()
{
char* newtext = "Hello World.\nHi.Bye.\nWow.";
char**** doc = get_document(newtext);
printf("%s\n", doc[0][0][0]);
printf("%s\n", doc[0][0][1]);
printf("%s\n", doc[1][0][0]);
printf("%s\n", doc[1][1][0]);
printf("%s\n", doc[2][0][0]);
return 0;
}
Related
I am trying to solve this challenge:
https://www.hackerrank.com/challenges/structuring-the-document/problem
Basically I have been given a locked stub of code with structs in it and I am supposed to parse a given text. This is an abridged version of my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_CHARACTERS 1005
#define MAX_PARAGRAPHS 5
#include <ctype.h>
struct word {
char* data;
};
struct sentence {
struct word* data;
int word_count;//denotes number of words in a sentence
};
struct paragraph {
struct sentence* data ;
int sentence_count;//denotes number of sentences in a paragraph
};
struct document {
struct paragraph* data;
int paragraph_count;//denotes number of paragraphs in a document
};
struct document get_document(char* text) {
int spaces = 0, periods = 0, newlines = 0;
for(int i = 0; i < strlen(text); i++)
if(text[i] == ' ')
spaces++;
else if(text[i] == '.')
periods++;
else if(text[i] == '\n')
newlines++;
struct document doc;
doc.paragraph_count = newlines + 1;
doc.data = malloc((newlines + 1) * sizeof(struct paragraph));
struct paragraph para[doc.paragraph_count];
for(int i = 0; i < doc.paragraph_count; i++) {
para[i].sentence_count = periods + 1;
para[i].data = malloc((periods + 1) * sizeof(struct sentence));
}
struct sentence sen[para[0].sentence_count];
for(int i = 0; i < para[0].sentence_count; i++) {
sen[i].word_count = spaces + 1;
sen[i].data = malloc((spaces + 1) * sizeof(struct word));
}
struct word word[spaces + periods + 1];
int start = 0, k = 0, wordsub = 0, sensub = 0, parasub = 0, docsub = 0, wordno = 0, parano = 0;
for(int i = 0; i < strlen(text); i++) {
if(text[i] == ' ' || text[i] == '.') {
word[wordsub].data = malloc((i - start) * sizeof(char) + 1);
for(int j = start; j < i; j++)
word[wordsub].data[k++] = text[j];
word[wordsub].data[k++] = '\0';
k = 0;
if(i < strlen(text) - 1 && text[i + 1] == '\n')
start = i + 2;
else
start = i + 1;
if(text[i] == ' ') {
sen[sensub].data[wordno++] = word[wordsub++]; //wordno can be 0 or 1
}
if(i != strlen(text) && isalpha(text[i + 1]) && text[i] == '.') {
sen[sensub].data[wordno++] = word[wordsub++];
wordno = 0;
para[parasub].data[parano++] = sen[sensub++];
}
if((i != strlen(text) && text[i + 1] == '\n') || i + 1 == strlen(text)) {
sen[sensub++].data[wordno++] = word[wordsub];
wordno = 0;
parano = 0;
para[parasub].data[parano++] = sen[sensub];
doc.data[docsub++] = para[parasub++];
}
}
}
printf("%s\n", para[0].data[0].data[0].data);// should print "hello"
return doc;
}
int main() {
struct document doc;
char * text = "hello world.\nhi.bye.\nwow.";
doc = get_document(text);
printf("%s\n", doc.data[0].data[0].data[0].data);//should also print "hello"
}
The problem is the print statements are not printing "hello". Also if I change the indices in the print statements I get a segmentation error.
Here:
word[wordsub].data[k++] = text[j];
you are accessing data member out of allocated memory.
The problem statement specifies that there are never two terminators after a word. There should also be one word at least.
So, the test phrase
"hello world.\nhi.bye.\nwow."
does not fit, but
"hello world\nhi.bye\nwow"
fits and you will have "hello" printed.
Besides, your algorithm is very complex while the code could be simpler. It was fun to try and I did it.
First, let's use some typedef to write less text!
typedef struct word {
char* data;
} W;
typedef struct sentence {
W* data;
int word_count;//denotes number of words in a sentence
} S;
typedef struct paragraph {
S* data ;
int sentence_count;//denotes number of sentences in a paragraph
} P;
typedef struct document {
P* data;
int paragraph_count;//denotes number of paragraphs in a document
} DOC;
Then the function itself. The logic is simple, do all of the following for each char of text in sequence
in case we have any separator (' ', '.' or '\n') record the word
in case we have a separator ('.' or '\n') record the sentence
in case we have a separator ('\n') record a paragraph
The end of the string counts as the end of a paragraph.
Code
struct document get_document(char* text) {
DOC doc = { NULL, 0 }; // you're the doc, doc
P parr = { NULL, 0 };
S sarr = { NULL, 0 };
int wpos=0;
for(int i=0, l=strlen(text) ; i<=l ; i++) { // <= length! (to deal with \0)
char c = text[i];
if ( ! c) c = '\n'; // End of string simulates end of paragraph
if (c == '\n' || c == '.' || c == ' ') {
// End of word, add it to sentence
W word;
word.data = malloc(i - wpos + 1); // +1 for '\0'
strncpy(word.data, text + wpos, i - wpos); // Copy only the word
word.data[i - wpos] = 0; // 0 terminate it
sarr.data = realloc(sarr.data, sizeof(W) * (sarr.word_count+1));
sarr.data[ sarr.word_count++ ] = word;
wpos = i+1;
if (c == '\n' || c == '.') {
// End of sentence, add it to paragraph
parr.data = realloc(parr.data, sizeof(S) * (parr.sentence_count+1));
parr.data[ parr.sentence_count++ ] = sarr;
sarr.data = NULL; // clear sentences
sarr.word_count = 0;
}
if (c == '\n') {
// End of paragraph, add it to doc
doc.data = realloc(doc.data, sizeof(P) * (doc.paragraph_count+1));
doc.data[ doc.paragraph_count++ ] = parr;
parr.data = NULL; // clear paragraphs
parr.sentence_count = 0;
}
}
}
return doc;
}
Finally, to see if that's working, print all members (using a compliant text!)
int main(int argc, char **argv) {
DOC doc;
char * text = "hello world\nhi.bye\nwow";
doc = get_document(text);
for(int i=0 ; i<doc.paragraph_count ; i++) {
printf("Para %d / %d\n", i, doc.paragraph_count-1);
P para = doc.data[i];
for(int j=0 ; j<para.sentence_count ; j++) {
printf("Sent %d / %d\n", j, para.sentence_count-1);
S sent = para.data[j];
for(int k=0 ; k<sent.word_count ; k++) {
W word = sent.data[k];
printf("Word %d / %d: %s\n", k, sent.word_count-1, word.data);
}
}
}
return 0;
}
We could add a bit of code to avoid the processing of two separators (like a trailing '\n', or ' .'
I'm stuck with an unidentified Segmentation Fault.
My erroneous function receives a string text. It should convert it into a document and return it. A document is made of paragraphs(separated by '\n') which is made of sentences(separated by '.') which is made of words(separated by ' '). You may refer the complete problem statement here.
Here is the relevant part of my code:
char**** get_document(char* text) {
int p = 0, s = 0, w = 0, c = 0;
char**** document;
document = malloc(sizeof(char***));
document[0] = malloc(sizeof(char**));
document[0][0] = malloc(sizeof(char*));
document[0][0][0] = malloc(sizeof(char));
while (*text)
{
if (*text == ' ')
{
c = 0;
++w;
document[p][s] = realloc(document[p][s], sizeof(char*) * (w + 1));
}
else if (*text == '.')
{
c = 0;
w = 0;
++s;
document[p] = realloc(document[p], sizeof(char**) * (s + 1));
}
else if (*text == '\n')
{
c = 0;
w = 0;
s = 0;
++p;
document = realloc(document, sizeof(char***) * (p + 1));
}
else
{
++c;
document[p][s][w] = realloc(document[p][s][w], sizeof(char) * (c + 1));
document[p][s][w][c - 1] = *text;
document[p][s][w][c] = '\0';
}
++text;
}
return document;
}
After debugging, I came to know that the program crashes when
w = 1 at document[p][s][w][c - 1] = *text;
I have no idea why this is happening. I checked the values of p, s, w, and c before the execution of that statement, and if the realloc statements were executing properly.
But in vain!
What might be going wrong in my code?
You need to allocate memory for the new paragraphs, sentences and words. By reallocation you increased the actual dimension size, but the new element was a null pointer which caused the segfault.
char**** get_document(char* text) {
int p = 0, s = 0, w = 0, c = 0;
char**** document;
document = malloc(sizeof(char***));
document[0] = malloc(sizeof(char**));
document[0][0] = malloc(sizeof(char*));
document[0][0][0] = malloc(sizeof(char));
while (*text)
{
if (*text == ' ')
{
c = 0;
++w;
document[p][s] = realloc(document[p][s], sizeof(char**) * (w + 1));
document[p][s][w] = malloc(sizeof(char*));
}
else if (*text == '.')
{
c = 0;
w = 0;
++s;
document[p] = realloc(document[p], sizeof(char**) * (s + 1));
document[p][s] = malloc(sizeof(char**));
document[p][s][w] = malloc(sizeof(char*));
}
else if (*text == '\n')
{
c = 0;
w = 0;
s = 0;
++p;
document = realloc(document, sizeof(char****) * (p + 1));
document[p] = malloc(sizeof(char***));
document[p][s] = malloc(sizeof(char**));
document[p][s][w] = malloc(sizeof(char*));
}
else
{
++c;
document[p][s][w] = realloc(document[p][s][w], sizeof(char) * (c + 1));
document[p][s][w][c - 1] = *text;
document[p][s][w][c] = '\0';
}
++text;
}
return document;
}
Moreover, your printing method in the main does not work, because you did not save the spaces (you needn't anyways). So, I fixed it:
int main()
{
char* text = "New word.No space before a sentence.\nThis is a new paragraph.";
char**** doc = get_document(text);
int p = 0, s = 0, w = 0, c = 0;
char ch;
while (ch = *text)
{
if (ch == ' ')
{
putchar(' ');
c = 0;
++w;
}
else if (ch == '.')
{
putchar('.');
c = 0;
w = 0;
++s;
}
else if (ch == '\n')
{
putchar('\n');
c = 0;
w = 0;
s = 0;
++p;
}
else putchar(doc[p][s][w][c++]);;
text++;
}
return 0;
}
The output seems correct:
New word.No space before a sentence.
This is a new paragraph.
I think you don't have to free the doc because you are returning from the main after it and Hackerrank will handle that if needed. But just note that you should take care of it otherwise.
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
void main()
{
char str[1000], *arr;
int i = 0,len,counter=0,j=0;
arr = (char*)malloc((sizeof(char*) * 1000));
for (i = 0; str[i] != '\0'; i++)
{
str[i] = getchar();
if (str[i] == ' ')
{
str[i] = '\0';
arr[j] = malloc(sizeof(char)*counter);
strcpy_s(&arr[j], counter * sizeof(char), &str[i - counter]);//i dont know why but this line does me some problems
j++;
counter = 0;
}
counter++;
}
}
I am trying to create an array of strings but the strcpy is not letting me and i do not know why. help will be much appreciated.
This error is because str[i - counter] is a char, not a pointer. You need to write &str[i - counter] or (str + i - counter).
See the signature of strcpy_s: errno_t strcpy_s(char *restrict dest, rsize_t destsz, const char *restrict src);
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
int count_spaces(char *str)
{
int count = 0;
if (str == NULL || strlen(str) == 0)
return (0);
int i = 0;
while (str[i])
{
if (str[i] == ' ')
count++;
i++;
}
return count++;
}
char **get_strings(char *str)
{
if (str == NULL || strlen(str) == 0)
return NULL;
char **dest;
if ((dest = malloc(sizeof(char*) * (count_spaces(str) + 1))) == NULL || (dest[0] = malloc(sizeof(char) * (strlen(str) + 1))) == NULL)
return NULL;
int i = 0, j = 0, k = 0;
while (str[i])
{
if (str[i] == ' ')
{
dest[k][j] = '\0';
j = 0;
dest[++k] = malloc(sizeof(char) * (strlen(str) + 1));
}
else
dest[k][j++] = str[i];
i++;
}
dest[k + 1] = NULL;
return dest;
}
int main()
{
char **dest;
dest = get_strings("this is a test for stackoverflow");
int i = 0;
while (dest[i] != NULL)
printf("%s\n", dest[i++]);
}
here is a quick (non-perfect) example to do it with char **
you give a string as parameter and the function will store each segment using space as delimiter in a char **
you can print it as i did in the main. Don't forget to free when you're done
Eg: input: char *str1 = "the are all is well";
char *str2 = "is who the";
output: common words in two given strings, return 2D array of strings.
#define SIZE 31
char ** commonWords(char *str1, char *str2) {
int i,j=0,count1,count2,k=0,a,b,m=0,n;
char str3[100][100], str4[100][100];
char **output;
output = (char **)malloc(SIZE*sizeof(char*));
if (str1 == NULL || str2 == NULL)
{
return NULL;
}
for (i = 0; str1[i] != '\0'; i++)
{
if (str1[i] != ' ')
{
str3[j][k++] = str1[i];
}
else
{
str3[j][k++] = '\0';
j++;
k = 0;
}
}
str3[j][k++] = '\0';
count1 = j > 0 ? j + 1 : j;
j = k = 0;
for (i = 0; str2[i] != '\0'; i++)
{
if (str2[i] != ' ')
{
str4[j][k++] = str2[i];
}
else
{
str4[j][k++] = '\0';
j++;
k = 0;
}
}
str4[j][k++] = '\0';
count2 = j > 0 ? j + 1 : j;
for (i = 0; i < count1; i++)
{
for (j = 0; j < count2; j++)
{
if (str3[i][k] == str4[j][k])
{
if (str3[i][k + 1] == str4[j][k + 1] && str3[i][k + 2] == str4[j][k + 2] == '\0')
{
a = i;
b = k;
while (str3[a][b] != '\0')
{
output = (char **)malloc(SIZE*sizeof(char));
output[m][n] = str3[a][b];
n++;
b++;
}
output[m][n] = '\0';
}
else if (str3[i][k + 1] == str4[j][k + 1] && str3[i][k + 2] == str4[j][k + 2])
{
a = i;
b = k;
while (str3[a][b] != '\0')
{
output = (char **)malloc(SIZE*sizeof(char));
output[m][n] = str3[a][b];
n++;
b++;
}
output[m][n] = '\0';
m++;
}
}
}
}
return output;
}
I am debugging this code in visual studios and the test is failed.Its showing this " message: Exception code: C0000005" .It means error related to memory space allocation.So where did i go wrong?
You have the statement
output = (char **)malloc(SIZE*sizeof(char));
at two lines of your program.
You have to modify this statement in order allocate memory for the double pointer output of type char**, but you also need to allocate memory for every element of output like this :
int i;
output = (char **)malloc(SIZE*sizeof(char*));
for (i = 0; i < SIZE; i++)
output[i] = (char *)malloc(x*sizeof(char));
where x is the desired size.
Also check for NULL pointer return, for instance
if (output[i] == NULL)
....
I need to change the text in array in a way, that everywhere I have a character, I need to change it to123.
Example: for the given text: ayasxka I should got this: 12123k123 or this 12323k123 text.
I almost got it to work, but instead of k between numbers, I got s, I mean, this is my result: 12123s123.
int main()
{
int i, j = 0;
char t[] = "ayasxka";
char *r = malloc(sizeof(char) * (strlen(t) + 2));
memset(r, '\0', (strlen(t) + 1));
for(i=0; t[i] != '\0'; i++)
{
if(t[i] == 'a')
{
r[i] = '1';
r[i+1] = '2';
r[i+2] = '3';
}
else
r[i+2] = t[i];
}
printf("%s\n", r);
free(r);
return 0;
}
In your algorithm you should use:
for(i=0; t[i] != '\0'; i++)
{
if(t[i] == 'a')
{
j = i;
r[j++] = '1';
r[j++] = '2';
r[j++] = '3';
}
else if(j==i)
r[j++] = t[i];
}
r[j] = '\0';
and in your malloc you should add +1 character for the '\0', because strlen() doesn't count that, so
char *r = malloc(3 + strlen(t));
instead of
char *r = malloc(2 + strlen(t));
This will give you 12123k123
The problem is strlen(t) + 2 is not enough, consider the worst case i.e. the string is just made of only a characters, then it should be
char *r = malloc(3 * strlen(t) + 1);
and szieof(char) == 1 is madatory by the way.
And you will need a counter for the position in the r string, say j
Try this
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
int main()
{
int i, j = 0;
char t[] = "ayasxka";
/* don't call strlen multiple times, store the value if it wont change */
size_t length = strlen(t);
/* it doesn't matter how unlikely malloc will fail, check that + */
char *r = malloc(3 * length + 1); /* | */
if (r == NULL) /* <--------------------------------------------+ */
return -1;
for (i = 0 ; t[i] != '\0' ; i++)
{
if(t[i] == 'a')
{
r[j++] = '1';
r[j++] = '2';
r[j++] = '3';
}
else
r[j++] = t[i];
}
/* you don't need the memset */
r[j] = '\0';
printf("%s\n", r);
free(r);
return 0;
}
int i;// j = 0;//unused `j`
char t[] = "ayasxka";
char *r = calloc(strlen(t) + 2 + 1, sizeof(char));//change size, +2: for last a, +1: for NUL
//memset(r, '\0', (strlen(t) + 1));//calloc initialize by 0
for(i=0; t[i] != '\0'; i++){
if(t[i] == 'a'){
r[i] = '1';
r[i+1] = '2';
r[i+2] = '3';
}
else if(r[i] == '\0'){//Not yet been set to the value
r[i] = t[i];
}
}
printf("%s\n", r);//12123k123
free(r);
char t[] = "ayasxka";
int i, len = strlen(t);
char *r = calloc(len + 2 + 1, sizeof(char));
for(i=len-1; i>=0; --i){
if(t[i] == 'a'){
r[i] = '1';
r[i+1] = '2';
r[i+2] = '3';
}
else if(r[i] == '\0'){
r[i] = t[i];
}
}
printf("%s\n", r);//12323k123
free(r);