This block of code reads a dictionary file and stores it in a hashed array. This hashing array uses linked list collision resolution. But, for some incomprehensible reason, the reading stops in the middle. (i'm assuming some problem occurs when linked list is made.) Everything works fine when data is being stored in a empty hashed array element.
#define SIZE_OF_ARRAY 350
typedef struct {
char* key;
int status; // (+1) filled, (-1) deleted, 0 empty
LIST* list;
}HASHED_ARRAY;
void insertDictionary (HASHED_ARRAY hashed_array[])
{
//Local Declaration
FILE* data;
char word[30];
char* pWord;
int index;
int length;
int countWord = 0;
//Statement
if (!(data = fopen("dictionaryWords.txt", "r")))
{
printf("Error Opening File");
exit(1);
}
SetStatusToNew (hashed_array); //initialize all status to 'empty'
while(fscanf(data, "%s\n", word) != EOF)
{
length = strlen(word) + 1;
index = hashing_function(word);
if (hashed_array[index].status == 0)//empty
{
hashed_array[index].key = (char*) malloc(length * sizeof(char));//allocate word.
if(!hashed_array[index].key)//check error
{
printf("\nMemory Leak\n");
exit(1);
}
strcpy(hashed_array[index].key, word); //insert the data into hashed array.
hashed_array[index].status = 1;//change hashed array node to filled.
}
else
{
//collision resolution (linked list)
pWord = (char*) malloc(length * sizeof(char));
strcpy (pWord, word);
if (hashed_array[index].list == NULL) // <====== program doesn't enter
//this if statement although the list is NULL.
//So I'm assuming this is where the program stops reading.
{
hashed_array[index].list = createList(compare);
}
addNode(hashed_array[index].list, pWord);
}
countWord++;
//memory allocation for key
}
printStatLinkedList(hashed_array, countWord);
fclose(data);
return;
}
createList and addNode are both ADT function. Former takes a function pointer (compare is a function that I build inside the main function) as a parameter, and latter takes list name, and void type data as parameters. compare sorts linked list. Please spot me the problem .
Depending on where you declare the hashed_array you pass to this function, the contents of it may not be initialized. This means that all contents of all entries is random. This includes the list pointer.
You need to initialize this array properly first. The easiest way is to simple use memset:
memset(hashed_array, 0, sizeof(HASHED_ARRAY) * whatever_size_it_is);
This will set all members to zero, i.e. NULL for pointers.
Related
I just started learning C recently, and am having issues figuring out memory allocation. I have spent about the last 2~3 days in my extra time trying to figure this out, but have not found a solution yet. So first, I have two structs:
struct _list {
// arr is an array of string arrays
char **arr;
// recs tracks how many records are in the list
size_t recs;
// arrSizes records the size of each string array in arr
size_t *arrSizes;
};
typedef struct _list list_t;
and
struct _string {
char *string;
// size is used to store strlen
size_t size;
};
typedef struct _string string_t;
I initialize the above structs respectively in the following ways.
list_t:
list_t *NewList() {
list_t *List = NULL;
List = malloc(sizeof(*List));
if (List == NULL) {
fprintf(stderr, "Failed to allocate memory to list structure.\n");
return NULL;
}
List->arr = malloc(sizeof(List->arr));
if (List->arr == NULL) {
free(List);
fprintf(stderr, "Failed to allocate memory to list array.\n");
return NULL;
}
List->arrSizes = malloc(sizeof(List->arrSizes));
if (List->arr == NULL) {
free(List);
fprintf(stderr, "Failed to allocate memory to size array.\n");
return NULL;
}
List->recs = 0;
return List;
}
string_t:
// a string array read in by the program is passed with "char* record"
string_t *NewString(char *record)
{
string_t *String = NULL;
String = malloc(sizeof * String);
if (String == NULL) {
fprintf(stderr, "Failed to allocate memory to string structure.\n");
return NULL;
}
String->size = strlen(record) + 1;
String->string = malloc(String->size);
if (String->string == NULL) {
free(String);
fprintf(stderr, "Failed to allocate memory to string array.\n");
return NULL;
}
strcpy(String->string, record);
return String;
}
I read lines from a file and load them into a "matching results" buffer using something like the following code. Please ignore exits and the fact that I don't have null handling after struct initialization is complete; I will add something more useful later. Also, sorry about the length. I edited quite a bit to produce the smallest example I could think of that reproduces the issue.
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
// Check if File exists
void FileExists(FILE *FilePath) {
if (FilePath == NULL) {
fprintf(stderr, "Error: File not found.\n");
exit(1);
}
}
// Delete a string_t struct
int delString(string_t *Structure)
{
if (Structure != NULL) {
free(Structure->string);
free(Structure);
return 0;
}
return 1;
}
// Allocate memory for additional elements added to members of list_t struct
void AllocList(list_t *List, size_t StrLen)
{
char **ArrStrArr_tmp;
size_t *SizeArr_tmp;
char *StrArr_tmp;
ArrStrArr_tmp = realloc(*List->arr, sizeof(**ArrStrArr_tmp) * List->recs);
SizeArr_tmp = realloc(List->arrSizes, sizeof(*SizeArr_tmp) * List->recs);
StrArr_tmp = malloc(sizeof(*StrArr_tmp) * StrLen);
if ((ArrStrArr_tmp == NULL) || (SizeArr_tmp == NULL)
|| (StrArr_tmp == NULL)) {
fprintf(stderr, "Failed to allocate memory.\n");
exit(1);
}
else {
List->arr = ArrStrArr_tmp;
List->arrSizes = SizeArr_tmp;
(List->arr)[List->recs-1]= StrArr_tmp;
}
}
// Add a record to a buffer
int AddRecord(list_t *List, char *AppendRecord)
{
string_t *line = NewString(AppendRecord);
List->recs++;
AllocList(List, line->size);
(List->arr)[List->recs - 1] = line->string;
(List->arrSizes)[List->recs - 1] = line->size;
delString(line);
return 0;
}
// Sends entire string array to lowercase
void tolowerString(char *UpperString, size_t StrLen)
{
int i;
for (i = 0; i < (int)StrLen; i++) {
UpperString[i] = (char)tolower(UpperString[i]);
}
}
// Attempt to match string in lines from a file; lines with matches are read into a buffer
int main()
{
char line[80];
int PrintedLines = 0;
list_t *ResultList = NewList();
char *MyString = "theme";
char *Filename = "List.txt";
FILE *in = fopen(Filename, "r");
// Check if file exists
FileExists(in);
while (fscanf(in, "%79[^\n]\n", line) == 1)
{
char LookString[80];
strcpy(LookString, line);
LookString[strlen(LookString) - 1] = '\0';
// send lookstring to lowercase
tolowerString(LookString, strlen(LookString));
// add line to buffer ResultList if it contains MyString
if (strstr(LookString, MyString)) {
AddRecord(ResultList, line);
PrintedLines++;
}
}
// If PrintedLines is at zero after the while statement terminates, return in abnormal state
if (PrintedLines == 0) {
fprintf(stderr, "No matches found. Please check your input if you are sure there is a match.\n");
return 1;
}
fclose(in);
return 0;
}
When trying to read the 5th matching record into my buffer, my program crashes at this line in the AllocList function:
ArrStrArr_tmp = realloc(*List->arr, sizeof(**ArrStrArr_tmp) * List->recs);
I get the following message on the version I have posted above:
realloc(): invalid old size
aborted (core dumped)
My guess is that I'm running into an error after some default amount of memory from my initial malloc is used, but I have no clue what is actually causing this. In my actual code I'm printing all sorts of things (pointer sizes, etc.), but I still can't spot anything. What's strange is, before writing this post, I was actually seeing the error:
realloc(): invalid next size
aborted (core dumped)
But I can't reproduce it now for some reason...
I have also read that I should reallocating memory for my list_t struct whenever I add an element to one of it's members, but reallocating it actually doesn't change where or how this program crashes. In any case, I'm not sure how I should be reallocating memory for my struct. To clarify, my questions are:
What is causing this memory issue?
Should I be reallocating memory for my list struct, and how much should I be reallocating given that I'm adding an extra element to the arr and arrSizes members?
As the crash suggests, the line
ArrStrArr_tmp = realloc(*List->arr, sizeof(**ArrStrArr_tmp) * List->recs);
is wrong.
This have it read an uninitialized buffer allocated via malloc(), whose value is indeterminate.
The intension of this line is to re-allocate the array pointed at by List->arr, which is an array of char*.
Therefore, the line should be
ArrStrArr_tmp = realloc(List->arr, sizeof(*ArrStrArr_tmp) * List->recs);
just like the following line, which is re-allocating an array of size_t.
SizeArr_tmp = realloc(List->arrSizes, sizeof(*SizeArr_tmp) * List->recs);
Also I found 2 more points for improvement:
Firstly, the usage of some malloc() in the function NewList are not good.
The function is creating zero-element array, so you won't need space for List->arr and List->arrSizes.
Also note that realloc() accepts NULL as the buffer to re-allocate.
list_t *NewList() {
list_t *List = NULL;
List = malloc(sizeof(*List));
if (List == NULL) {
fprintf(stderr, "Failed to allocate memory to list structure.\n");
return NULL;
}
List->arr = NULL;
List->arrSizes = NULL;
List->recs = 0;
return List;
}
Secondly, you are copying pointer instead of string in AddRecord,
so you have problems of memory leak and potential use-after-free.
It seems the string should be copied:
(List->arr)[List->recs - 1] = line->string;
should be
strcpy((List->arr)[List->recs - 1], line->string);
I am trying to assign element hash table[TABLE_SIZE] with input text file. So I used strcpy function. But it didn't copy into the array.
I've tried malloc to make the array in order to write in it. But it didn't work so I made a element* type variable pointing the hash_table array.But still didn't work.
//this is the header file
typedef struct {
char key[100];
char data[100];
} element;
element hash_table[TABLE_SIZE];
// For caomparison count
int num_comparison;
// 파일에서 단어들을 읽어 해시테이블 구성
int build_dictionary(char *fname);
int build_dictionary(char *fname) {
int i = 0; // num of data
char key[100], data[200];
FILE *ifp;
//pointing to the hash_table array
element* hash_table_p = hash_table;
hash_table_p = (element*)malloc(sizeof(element));
//file opening error
if ((ifp = fopen(fname, "r")) == NULL) {
printf("No such file ! \n");
exit(1);
}
while (fscanf(ifp, "%s %s", key, data) == 2) {
// (key data) assigning to array
//i've tried this because hash_table[i].data didn't work
strcpy(hash_table_p->data, data);
strcpy(hash_table_p->key, key);
strcpy(hash_table[i].data, hash_table_p->data);
strcpy(hash_table[i].key, hash_table_p->key);
i++;
//checking if it is well done
printf(" %s %s \n", hash_table_p->key, hash_table_p->data);
printf(" %d %s %s \n",i , hash_table[i].data, hash_table[i].key );
}
fclose(ifp);
return(i);
}
//the input text file went as below
one 하나
two 둘
three 셋
four 넷
five 다섯
When i executed the build_dictionary function, only one with the hash_table_p strcpy was assigned well and the hash_table had nothing in it.
element* hash_table_p = hash_table;
hash_table_p = (element*)malloc(sizeof(element));
You are overwritting the first assignment (getting new room for hash_table_p when you call malloc), as far as I can see you already define the size for the table, so you don't need to reserve more space (delete the line with malloc) and just increment the position of the pointer on each iteration (as you are already doing).
so I'm having a little problem with my struct array not doing what its supposed to. I get no compiler warnings or errors when building the program.
int Array_Size=0;;
int Array_Index=0;
FILE *Writer;
struct WordElement
{
int Count;
char Word[50];
};
struct WordElement *StructPointer; //just a pointer to a structure
int Create_Array(int Size){
StructPointer = (struct WordElement *) malloc(Size * sizeof(StructPointer));
Array_Size = Size;
return 0;
}
int Add_To_Array(char Word[50]){
int Word_Found=0;
for(int i=0; i <= Array_Size && Word_Found!=1; i++)
{
if(strcmp(StructPointer[i].Word, Word)) // This should only run if the word exists in struct array
{
StructPointer[i].Count++;
Word_Found=1;
}
}
if(Word_Found==0) // if the above if statement doesnt evualate, this should run
{
strcpy(StructPointer[Array_Index].Word, Word); //copying the word passed by the main function to the struct array at a specific index
printf("WORD: %s\n", StructPointer[Array_Index].Word); // printing it just to make sure it got copied correctly
Array_Index++;
}
return 0;
}
int Print_All(char File_Name[50])
{
Writer = fopen(File_Name, "w");
printf("Printing starts now: \n");
for(int i=0; i < Array_Size; i++)
{
fprintf(Writer, "%s\t\t%d\n",StructPointer[i].Word, StructPointer[i].Count);
}
free(StructPointer);
return 0;
}
These functions get called from a different file, The Add_To_Array is called when the program reads a new word form the text file. That function is supposed to check if the word already exists in the struct array and if it does, it should just increment the counter. If it doesn't, then it adds it.
The Print_All function is called after all the words have been stored in the struct array. Its supposed to loop through them and print each word and their occurrence. In the text file, there are 2 of every words but my program outputs:
this 13762753
document -1772785369
contains 1129268256
two 6619253
of 5701679
every 5570645
word 3342389
doccontains 5374021
I don't know what to make of this as im really new to C programming... It's probably worth mentioning the if(Word_Foun==0) doesn't execute
StructPointer = malloc(Size * sizeof(*StructPointer));
This will be the correct allocation. Otherwise you will have erroneous behavior in your code. Also check the return value of malloc.
StructPointer = malloc(Size * sizeof(*StructPointer));
if(NULL == StructPointer){
perror("malloc failure");
exit(EXIT_FAILURE);
}
You are allocating for struct WordElement not a for a pointer to it. You already have a pointer to struct WordElement all that you needed was memory for a struct WordElement.
Also in the loop you are accessing array index out of bound
for(int i=0; i <= Array_Size && Word_Found!=1; i++)
^^^
It will be i < Array_Size.
In case match occurs you want to set the variable Word_found to 1.
if(strcmp(StructPointer[i].Word, Word) == 0){
/* it macthed */
}
Also Writer = fopen(File_Name, "w"); you should check the return value of fopen.
if(Writer == NULL){
fprintf(stderr,"Error in file opening");
exit(EXIT_FAILURE);
}
Also when you are increasing the Array_index place a check whether it might access the array index out of bound.
The more global variable you use for achieving a small task would make it more difficult to track down a bug. It is always problematic because the places from which data might change is scattered - making it difficult to manage.
First post, extremely limited in coding knowledge and new to C. Be gentle! I am at the point where "trying" different things is just confusing me more and more. I need someone's correct guidance!
This particular problem is from an online edX course I am attempting which ultimately when implemented correctly, checks a given word read in from a text file (the 'check' function) and compares it to each word read into (from the 'load' function) a linked list of structs.
I believe I have the load function implemented correctly as when I use gdb, as I am seeing what I anticipate as I step through it, but my question and my problem relates specifically to the check function. I still have a lot to implement to finish my code but while testing with gdb, I am not seeing values of the char* member of the struct correspond with what I anticipate I should see.
When using gdb and stepping through the 'check' function and trying to access the dword member of the struct nodes in the linked list I created in the load function, I anticipate I should see a string for the char* member. For instance, I anticipate the word "cat" assigned to current->dword , but am instead seeing in gdb when I test:
~(gdb) print current->dword
$13 = 0xbfffede2 "\004\b\214\365\372D\300\355\377\277"
My thoughts are that I'm still only accessing an address somehow and not the actual value, but I'm oblivious as to why this is. When the node is created in the load function, a value is assigned to the dword member correctly (at least as far as I can tell while stepping through the code in gdb) but doesn't seem to be accessed correctly in the check function. Any help for a newbie would be appreciated!
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dictionary.h"
typedef struct node
{
char* dword;
struct node* next;
}
node;
// keep track of #of words in dictionary loaded
int wordCounter = 0;
// create root for hash table
node* root[26];
// create cursor to keep place in creating, pointing, and traversing through nodes
node* current = NULL;
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
// size of word read into buffer
int wordSize = sizeof(word);
// prepare to make a new lowercase only word for comparison to lowercase only dictionary
char bufWord[wordSize];
// make it
for(int i = 0; i < wordSize; i++)
{
if (i == wordSize - 1)
{
bufWord[i] = '\0';
}
else
{
bufWord[i] = tolower(word[i]);
}
}
// hash word to achieve proper root node location
int hash = bufWord[0] - 97;
// point to the correct root node to begin traversing
current = root[hash];
// make sure there is even a word in hash table location
if(root[hash] == NULL)
{
return false;
}
else if(root[hash] != NULL)
{
// progress through the nodes until the last node's next pointer member is NULL
while(current != NULL)
{
// compare 1st letter only of current->dword[i] to bufWord[i] to save time
// if they don't match, return false
// if they do match then continue
\
char dictWord[wordSize];
// hold copy of struct member value to compare to dictWord
char* wordTemp = current->dword;
//
for(int i = 0; i < wordSize; i++)
{
dictWord[i] = wordTemp[i];
}
// do a spell check
if(strcmp(bufWord, dictWord) == 0)
{
return true;
}
else
{
// set current to the next node if any or NULL if it's already the last node in the list
current = current->next;
}
}
}
return false;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
// buffer for reading in dictionary words
char wordIn[LENGTH + 1];
// open the dictionary file
FILE* newDict = fopen(dictionary, "r");
for (int i = 0; i < 27; i++)
{
root[i] = NULL;
}
// while there are words to read
while(fscanf(newDict, "%s ", wordIn) > 0)
{
// keep track of #of words for constant time read in size function
wordCounter++;
// hash the first letter for the location in root
int hash = wordIn[0] - 97;
// malloc space for a new node
node* newNode = malloc(sizeof(node));
// error check
if (newNode == NULL)
{
return false;
}
// set value member of node to current word
newNode->dword = wordIn;
// first insertion into linked list if that root node has not been used yet
if(root[hash] == NULL)
{
// sets to NULL
newNode->next = root[hash];
// link it
root[hash] = newNode;
}
else if(root[hash] != NULL)
{
// starts at the root
node* current = root[hash];
// insert into new beginning of list
newNode->next = current;
root[hash] = newNode;
}
}
fclose(newDict);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordCounter;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
// TODO
return false;
}
The source of your problem is the line:
newNode->dword = wordIn;
wordIn is a local array in load. You are storing the address of wordIn in the dword of your nodes. When you return from load, those addresses are no valid any longer.
What you need to do is allocate memory for the string in wordIn, assign the allocated memory to newNode->dword and copy the contents of wordIn to newNode->dword.
If your platform provides the non-standard function strdup, you can change the above line to:
newNode->dword = strdup(wordIn);
If not, it is easily implemented:
char* strdup(char const* in)
{
char* r = malloc(strlen(in)+1);
strcpy(r, in);
return r;
}
The following program stores every word and then prints them with a number of occurrences.
Global typedef declaration:
typedef struct {
char * word;
int occ;
}
words;
words *data=NULL;
I have a problem with the search function. I've created a function returning int that looks like this: (max is the constantly updated size of an array of structures, that's why I call search function after EOF is reached.)
int search(char *word,int max)
{
int i;
for(i=0; i<max; i++)
{
if(!strcmp(data[i].word,word)) return i;
}
return -1;
}
But I noticed I'm supposed to write a search function having that prototype:
struct abc *find(char *word)
So I've created the following code:
struct words *findword(char *word)
{
struct words *ptr;
for (ptr = data; ptr != NULL; ptr++) { /* IS THE STOP CONDITION OK? */
if (strcmp(word, ptr->word) == 0)
return ptr;
}
return NULL;
}
And I receive many errors during compilation:
reverse.c: In function ‘findword’:
reverse.c:73: warning: assignment from incompatible pointer type
reverse.c:73: error: increment of pointer to unknown structure
reverse.c:73: error: arithmetic on pointer to an incomplete type
reverse.c:74: error: dereferencing pointer to incomplete type
reverse.c: In function ‘main’:
reverse.c:171: error: ‘which’ undeclared (first use in this function)
reverse.c:171: error: (Each undeclared identifier is reported only once
reverse.c:171: error: for each function it appears in.)
make: * [reverse.o] Error 1
which is an int variable assigned to the return of my firstly written search function.
The error with which is easily fixed, but I don't know how to replace that (solution working with my base search function):
data[which].occ++;
How to fix it so that it'll work with my new approach to search?
EDIT
main() added:
int main(int argc, char **argv)
{
char *word;
words *temp;
int c,i,num;
/*int which;*/
FILE *infile;
if(argc!=2) {}
if((infile=fopen(argv[1],"r"))==NULL) {}
num=0;
while(1)
{
c=fgetc(infile);
if(c==EOF) break;
if(!isalpha(c)) continue;
else ungetc(c,infile);
word=getword(infile);
word=convert(word);
/*which=search(word,num);*/
if(findword(word))
{
if(!(temp=realloc(data,sizeof(words)*(num+1))))
{}
else
data=temp;
data[num].word=strdup(word);
data[num].occ=1;
num++;
}
else
data[which].occ++;
free(word);
}
sort(num-1);
for(i=0;i<num;i++)
{}
free(data);
if(fclose(infile))
{}
return 0;
}
I've left {} for the irrelevant pieces of code eg. error handling.
EDIT2
The things I'm asking for above, are fixed. However, I get a seg fault now.
I'll give a link to the whole code, I don't want to put it in an edited post since it'd create a big mess. Seg fault is caused by lines 73 and 152 (strcmp is not working somehow). Hope that full code will be easier to understand.
FULL CODE
The problems are with your findword function, lets go through all the lines
struct words *ptr;
This is not what you ment to do. The typedef you used in defining the structure allows you to not need to write struct anymore. This is why you're getting the error: reverse.c:73: error: increment of pointer to unknown structure. What you want is just:
words *ptr;
Next, the loop:
for(ptr=data; //This is fine, you're assigning your local ptr to the global data. I assume that's something valid
ptr != NULL; //That could OK too... we can loop while ptr is not NULL
ptr++) //This line makes no sense...
You may want to look up how for loops work again, the point is you're incrementing something until it hits a condition. ptr++ will move where you're pointing too, so you'll no longer be pointing to your structure.
I need to see your main() function to understand what you're trying to accomplish, but based on the prototype you have to follow, I think the easiest solution would be something like:
void main()
{
// init your vars
bool more_words_to_add = true;
words *ptr = NULL;
int i;
// populate your array of words
while(more_words_to_add) {
for(i = 0; i<max; i++) {
if(ptr = findword("word")) //if we find the word
ptr->occ++; //increment the number of times we found it
else {
//I don't know what you want to do here, it's not clear what your goal is.
//Add the new word to your array of words and set occ to 1,
//then increment max because there's one more word in your array?
}
}
//get the next word to fine, or else set more_words_to_add = false to break
}
}
If this type of solution is what you're looking to do, then you can adjust your findwords function to be very simple:
struct words *findword(char *word)
{
words *ptr = data;
if (strcmp(word, ptr->word) == 0)
return ptr;
return NULL;
}
EDIT: For your new error I suspect the problem is with your memory allocation, see this short example of using your structure:
words *findword(char *word)
{
words *ptr = data;
if(strcmp(word, ptr->word) == 0)
return ptr;
return NULL;
}
int main(){
words *ptr;
data = realloc(data, sizeof(words));
data->word = "hello"; //DO NOT SKIP THESE LINES
data->occ = 0; //DO NOT SKIP THESE LINES
if(ptr = findword("hello")) {
ptr->occ++;
printf("I found %d %s's\n",ptr->occ, ptr->word);
}
}
mike#linux-4puc:~> ./a.out
I found 1 hello's
You can see here that you need to alloc some memory for the global structure then you can store data in it and pass pointers to it.
EDIT 2:
Your main() code does this:
if((ptr = findword(word)))
{
//do stuff
}
else
ptr->occ++;
That's not going to work because if findword() fails it returns NULL, so in the if check ptr is set to NULL, then in the else you're trying to deference NULL. If (and keep in mind I'm not really reading your logic so this is up to you) you really want to increment ptr->occ if a word is not found then you want this instead:
if(findword(word))
{
ptr = findword(word);
//do stuff
}
else
ptr->occ++; //increments the current ptr's occ, no new ptr was assigned.
for (ptr = data; ptr != NULL; ptr++) {
/* IS THE STOP CONDITION OK? */
No. Your pointer just keeps getting incremented. The only thing that would make it NULL in that code is integer overflow. You could look at what it points to, and see if that is NULL, IF you preset the data area to 0's:
#define NUM_WORDS 100
data = calloc(NUM_WORDS,sizeof(words));
Or
#define NUM_WORDS 100
int bytes = NUM_WORDS * sizeof(words);
data = malloc(bytes);
memset(data,0,bytes);
....
for (ptr = data; ptr->word != NULL; ptr++) {
If you don't want to preset the data area to 0 then you will have to pass the current amount of structs currently held in the data area to your function in order to know how much to loop.
There's no such thing as struct words in your program; there's an unnamed struct type, and a typedef words to that type. Either use struct words or words consistently.
You'll then need to replace
data[which].occ++;
with
result->occ++;
where result is the return value from your new search function.