Issue with infinite loop in radix tree implementation - c

I'm having trouble with my radix tree implementation. The idea is that I create the first node, then enter a number of binary numbers. The binary numbers determine whether a left node (0) or a right node (1) is created. Once I reach the end of the binary number, I set a node to "active".
Then I search through the tree to find an active node, and output the original binary numbers again by checking in which direction I had to go to reach the active node.
Here is the complete code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
enum { false, true };
typedef struct radixNode {
bool active;
struct radixNode * pnt;
struct radixNode * l;
struct radixNode * r;
} node;
void insert(node *root, char * B) {
printf("String: %s\n", B);
printf("1st: %c", B[0]);
printf("\n\n", B);
// digit is zero so we go left
if (B[0] == '0') {
printf("till here if");
// left child doesn't exist, create it
if (root->l == NULL) {
root->l = malloc(sizeof(node));
/* if the next index in the string does NOT contain a 1 or 0,
the current index is the last index and the node is activated */
if (B[1] == 1 || B[1] == 0)
root->l->active = false;
else
root->l->active = true;
root->l->pnt = root;
root->l->l = NULL;
root->l->r = NULL;
insert(root->l,B++); // B++ removes the first digit of the string
}
// left child exists, traverse
else {
insert(root->l,B++);
}
}
// digit is one, go right
else if (B[0] == '1') {
printf("first was 1\n");
// right child doesn't exist, create it
if (root->r == NULL) {
printf("if triggered\n");
root->r = malloc(sizeof(node));
/* if the next index in the string does NOT contain a 1 or 0,
the current index is the last index and the node is activated */
if (B[1] == 1 || B[1] == 0)
root->r->active = false;
else
root->r->active = true;
root->r->pnt = root;
root->r->l = NULL;
root->r->r = NULL;
insert(root->r,B++);
}
// left child exists, traverse
else {
printf("else triggered\n");
insert(root->r,B++);
}
}
}
node * printTreeMin(node *root) {
char C[10];
/* goes left until it can't, appends 0 to string
till it can't. if node is active, print the string */
while (root->l != NULL) {
C[strlen(C)] = '0';
if (root->active == true)
printf("%s\n",C);
root = root->l;
}
return root;
}
// prints the next smallest binary number in the tree, returns the node it printed
node * printNextSmallest(node * root) {
char C[10];
// if right child exists, go there and find lowest node (after if same deal as printTreeMin() )
if (root->r != NULL) {
C[strlen(C)] = '1';
if (root->active == true)
printf("%s\n",C);
root = root->r;
while (root->l != NULL) {
C[strlen(C)] = '0';
if (root->active == true)
printf("%s\n",C);
root = root->l;
}
return root;
}
node * temp = root->pnt;
while (temp != NULL && root == temp->r) {
root = temp;
temp = temp->pnt;
}
return temp;
}
void printRadixTree(node *root) {
root = printTreeMin(root);
while (printNextSmallest(root) != NULL)
root = printNextSmallest(root);
}
void test() {
node * tree = malloc(sizeof(node));
tree->l = NULL;
tree->r = NULL;
// a)
insert(tree,"101000");
insert(tree,"10100");
insert(tree,"10110");
insert(tree,"101");
insert(tree,"1111");
// b)
printRadixTree(tree);
}
int main() {
test();
}
Here is the output:
if triggered
String: 101000
1st: 1
first was 1
if triggered
String: 101000
1st: 1
first was 1
if triggered
String: 101000
1st: 1
(and continuing ad infinitum)
Clearly I have an issue within the insert() function's recursion but considering I remove the first char of the binary number string when doing the recurrence, I don't understand how it can run infinitely.

The reason for the infinite recursion is your choice of auto-increment operator. You want the prefix, not suffix form.
insert(..., B++)
increments the pointer (stripping the first character) after calling insert.
Instead the calls should be
insert (..., ++B)
You also have problems with your active flag, and this is your culprit
if (B[1] == 1 || B[1] == 0)
I think you meant
if (B[1] == '1' || B[1] == '0')
The first form is checking for a binary zero or one, rather than an ASCII character.
The result of this is that your active flag will probably be set incorrectly for most nodes. I expect that will then cause problems when traversing the tree. In fact, active will only be set to false when you are looking at the last '0' or '1' in the string (as B[1] at that point will be the terminating '\0').
Also, with recusive routines it is always a good idea to make the base case explicit, rather than implicit. Thus, one of the first blocks of code in insert should probably be
if (B[0] != '1' && B[0] != `0`)
return;
then you can replace the else if with a simple else
if (B[0] == '0')
{
// ... go left
}
else
{
// ... go right
}

Related

Binary Search Tree not sorting properly/not following my order

I am trying to create a Binary Search Tree (BST) for a really large txt file (around 150000 lines), but my BST is not sorting properly. My current theory is, when I fetch the key from the txt file, it doesn't register properly, making it fetch a random number from memory. Other than that, I have no idea whats wrong.
NOTE: the txt file has the following format (key on left, value on right)
0016718719 #:#-;QZL=!9v
0140100781 5:`ziuiCMMUC
0544371484 W{<_|b5Qd534
0672094320 QcvX=;[lpR("
0494074201 FB[?T5VHc7Oc
0317651971 K`9#Qn{#h]1z
0635368102 KGVm-?hX{Rv7
0107206064 =n1AsY32_.J9
0844660357 L4qL)x{>5e8H
0699014627 v/<4%"sJ4eHR
0786095462 G!cl'YMAL*#S
0067578317 6{"W,j2>#{p*
0730012647 rAi?q<X5NaKT
0715302988 ,8SrSw0rEEc&
0234601050 PRg$$:b|B0'x
0537081097 fgoDc05rc,n|
0226858124 OV##d6th'<us
1059497442 2,'n}YmK,s^i
0597822915 LhicQ#r<Yh\8
0742176394 g`XkLi.>}s+Q
0984120927 DyB:-u*}E&X)
0202768627 8(&zqlPV#DCb
0089402669 tv-vTkn"AIxt
1045610730 hOxZQ<"yyew`
0671297494 )r7gD;:9FHrq
0245267004 f0oO:/Zul0<"
0766946589 n/03!]3t0Lux
0521860458 _D+$,j#YT$cS
0891617938 t%gYiWV17Z/'
0566759626 r2A'PB'xhfw#
0221374897 e[-Nf"#<o9^p
0428608071 46S4!vZA.S&.
0755431241 mgE?2IewG!=g
0534588781 %P|b"_d'VF0S
0030447903 Q&Dow27tkc9+
0957065636 [pHMrM*q*ED7
0739800529 wR;u\Ct/-Vzo
0556668090 =|T.z]?.:DnC
0649777919 2}5M=.u'#1,L
0464018855 x+JImm6w/eG]
0460707117 lxY}\Cdn%!rs
0273053706 s9GmIAE."j|2
0596408906 %'1|R%3tI-Tz
0473143619 k,h&_7rT)?Nb
0922139211 [e0Q1].<Qb;[
0207160144 t!&lXR7`eW#n
0128147823 L,d'7]ZTvPDQ
0178779865 (&--sQ..)7d'
0531711943 4o'^xS6rK]yl
0429655621 eyd7UwKQ][%i
0566959905 k{)d*OH&w2P<
0472331841 DiZF(W"wO42H
0589473577 V0$9-X%YD_kD
0272100993 i%c&R{^#SM$#
0956804045 BtY'cQ){wR{{
0635780805 dWnP0sP2]Tu[
0874803681 swn\*HS08v<w
1027292189 w#E:LaCg(L(I
0592836099 ]&Q({r^(/H%0
0882899568 zb_4acX8E<2-
0542667063 n'xbSaoXArp6
0289624942 G5X#aqr7+*pb
0682188682 H^o)>1\4o5WV
0984355947 =Z{wmP'Z(#2r
0459720821 1vNg_4`3IUUJ
0563538441 uA>QKi]Z31#x
1032927818 $jReN<b/(e{E
0299897321 j=PAkNj#H(L^
0428967901 8lszH<!m\C`w
0668128293 SO("{Rm29l#Y
0354915591 2coM%<Iiwwn<
0672908146 r3VRE;Q3)zi>
0435139431 d_q_)mM"X]N-
0728369037 >X_!}vtc;G(M
0982520682 {h\5gbvzsqGZ
0396776915 $py=A?iNde7(
0511806860 #T+Y0HI9/U6K
0013335601 <$8f|iV\=/RD
0511264736 NFI-#xssP)F*
0727884351 5ZMcmA0[K3P2
0460487630 .D'h(f"LV]#x
0178037927 o3a&fO}="I.S
Here is my Main file:
#include "LAB3BST2.h"
#include <string.h>
#define HEIGHT_WRITTEN 1
#define FINDPARENTHELPER_WRITTEN 1
#define DELETE_WRITTEN 1
#define LOOKUP_written 1
int digit(char *key) {
int number = 0;//create a
while (*key != '\0') {//loop until the end of the string (number)
number = 10 * number + *key - '0';//(10*number) this represents moving the current value of key one up
//(*key - '0') the current char subtracted by '0' or the value of 48
// example: (char '1') - '0' == int 1. Reference ASCII chart to see hexadecimal logic
*key++;
}
return number;
}
int main(void) {
Node *n = NULL; // eliminates compiler warning
FILE *fp;
int c;
Tree *t = NULL;
char *pbuff = (char *)malloc(256);
char *p, *key, *pass;
int temp = 0;
long bst_node = 0;
fp = fopen("IDENTS.txt", "r");
if (!fp) {
printf("File Open Failed\n");
return 0;
}//initialize the head of the tree
while (1) {
p = fgets(pbuff, 256, fp);
if (p == NULL)
break; //memory not allocated, or end of file
while (*p == ' ')
p++; //if spaces, iterate through string
key = p;
p++;
while ((*p) >= 48 && (*p) <= 57)
p++;//if a digit character (47<p<58 or 0-9), iterate through key
*p = '\0';//null everything after the key (digits)
p++; //iterate onto the password
while (*p == ' ')
p++;//if spaces, iterate through string
pass = p;
p++;
while ((*p) != '\r' && (*p) != '\n') {
p++;
}// iterate until the end of the string ('\n')
*p = '\0';//null the rest, and reset "p"
temp = digit(key);
if (temp < 0) {
continue;
}
if (temp == 170696526) {
//nothing
}
if (t == NULL) {
t = initTree(temp, pass);
} else
insert(temp, pass, t->root);//WE NEED TO BE ABLE TO CREATE A PASS THAT DOES NOT CHANGE
bst_node++;
}
printf("\nBST NODES: %ld", bst_node);
fclose(fp);
/*
printf("Original Tree: \n");
printTree(t->root);
printf("\n\n");
if (HEIGHT_WRITTEN == 1) {
printf("Height of tree: %d\n\n", height(t->root));
}
*/
if (DELETE_WRITTEN == 1) {
FILE *fp_del;
fp_del = fopen("DELETES.txt", "r");
while (1) {
p = fgets(pbuff, 256, fp_del);
if (p == NULL)
break;
while (*p == ' ')
p++;
key = p;
p++;
while (*p != '\r' && *p != '\n') {
p++;
}
*p = '\0';
int k = withdraw(digit(key), t->root);
if (k)
bst_node--;
}
}
printf("\nNODES AFTER DELETES: %ld \n", bst_node);
if (!bst_check(t->root))
printf("NOT BST\n");
else
printf("IS A BST\n");
if (LOOKUP_written) {
FILE *fp_look;
fp_look = fopen("LOOKUPS.txt", "r");
int nnkey = 0;
while (1) {
p = fgets(pbuff, 256, fp_look);
if (p == NULL)
break;
while (*p == ' ')
p++;
key = p;
p++;
while (*p != '\r' && *p != '\n') {
p++;
}
*p = '\0';
nnkey = digit(key);
Node* k = find(nnkey, t->root);
if (!k) {
printf("ID: %13d PASSWORD: <NOT FOUND>\n", nnkey);
} else {
printf("ID: %13d PASSWORD: %s\n", nnkey, k->value);
}
}
}
return 0;
}//main()
Here is my function file
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "LAB3BST2.h"
Node *initNode(Key k, char *v)
// Allocate memory for new node and initialize fields.
// Returns pointer to node created.
{
Node *n = malloc(sizeof(Node));
// initialize node if memory obtained
if (n != NULL) {
n->key = k;
n->value = strdup(v);
n->leftChild = NULL;
n->rightChild = NULL;
}
return n;
}//initNode()
Tree *initTree(Key k, char *v)
// Set up new tree. Allocates memory for Tree structure, then
// calls initNode() to allocate first node.
{
Tree *t = malloc(sizeof(Tree));
if (t != NULL)
t->root = initNode(k, v);
return t;
}//initTree()
void printTreeExplanation(void)
// Prints hint to reader what to expect on screen
{
static int done = 0;
if (!done) {
printf("First time explanation of tree display:\n");
printf("Every node is displayed as a comma-separated pair within brackets:");
printf(" (kk,vv)\n");
printf("where kk is the key and vv is the value\n");
printf("A tree starts with a curly bracket { and ends with a curly bracket }.\n");
printf("An empty tree will be {}\n");
printf("A tree with no children will be { (kk,vv),{},{} }\n");
printf("If either subtree is populated, it will be shown using the same ");
printf("technique as described above\n");
printf("(Hint: Start at root - and then match up all the remaining\n");
printf("brackets, then interpret what those bracket pairs are telling\n");
printf("you.)\n============\n\n");
done = 1;
}
}//printTreeExplanation()
void printTree(Node *root)
// Print whole tree. We cannot make it look pretty graphically, so we add some
// characters to make it a little easier to understand. We also don't really
// know what the value field is - it is declared to be a void pointer - so we
// treat it as though it points to an integer.
{
// assume printTree magically knows the types in the tree node
printTreeExplanation();
// start of this tree
printf("{");
// values in the root node (assuming value is pointing to an integer)
printf("(%d,%s),", root->key, root->value);
// Now show left subtree or {} if there is no left subtree
if (root->leftChild != NULL)
printTree(root->leftChild);
else
printf("{}");
// Marker between left and right subtrees
printf(",");
// Now show right subtree or {} if there is no right subtree
if (root->rightChild != NULL)
printTree(root->rightChild);
else
printf("{}");
// Close display of this tree with closing curly bracket
printf("}");
}//printTree()
Node *find(Key k, Node *root)
{
// termination conditions - either true, search is ended
if ((root == NULL) || (root->key == k))
return root;
if (k > root->key) //traverse through the right subtree (larger)
return find(k, root->rightChild);
else //traverse through the right
return find(k, root->leftChild);
}//find()
int insert(Key k, char *v, Node *root)
{
int result = BST_FAIL;
// this if statement can only be true with first root (root of whole tree)
if (root == NULL) {
Node *n = initNode(k, v);
root = n;
return BST_SUCCESS;
}
if (root->key == k)
root->value = strdup(v);//replace password
else
if (k < root->key) {
// key value less than key value in root node - try to insert into left
// subtree, if it exists.
if (root->leftChild != NULL)
// there is a left subtree - insert it
result = insert(k, v, root->leftChild);
else {
// new Node becomes the left subtree
Node *n = initNode(k, v);
root->leftChild = n;
result = BST_SUCCESS;
}
} else
if (k > root->key) { // test actually redundant
// key is greater than this nodes key value, so value goes into right
// subtree, if it exists
if (root->rightChild != NULL)
// there is a right subtree - insert new node
result = insert(k, v, root->rightChild);
else {
// no right subtree - new node becomes right subtree
Node *n = initNode(k, v);
root->rightChild = n;
result = BST_SUCCESS;
}
}
return result;
}//insert()
int intmax(int a, int b) {
return (a >= b) ? a : b;
}//intmax()
int height(Node *root)
// Height definition:
// Height of an empty tree is -1. Height of a leaf node is 0. Height of other
// nodes is 1 more than larger height of node's two subtrees.
{
int nodeheight = -1;
int right, left;// default returned for empty tree
if (root != NULL) {
left = height(root->leftChild);
right = height(root->rightChild);
nodeheight = intmax(left, right);
}
return nodeheight;
}//height()
Node *findParentHelper(Key k, Node *root)
// Help find parent of node with key == k. Parameter root is node with
// at least one child (see findParent()).
{
if (root->leftChild != NULL) {
if (root->leftChild->key == k)
return root;
}
if (root->rightChild != NULL) {
if (root->rightChild->key == k)
return root;
}
if (k > root->key)
return findParentHelper(k, root->rightChild);
else
return findParentHelper(k, root->leftChild);
}//findparenthelper()
Node *findParent(Key k, Node *root)
// root
{
// Deal with special special cases which could only happen for root
// of whole tree
if (root == NULL)
return root;
// real root doesn't have parent so we make it parent of itself
if (root->key == k)
return root;
// root has no children
if ((root->leftChild == NULL) && (root->rightChild == NULL))
return NULL;
// Deal with cases where root has at least one child
return findParentHelper(k, root);
}//findParent()
Node *findMin(Node *root) {
if (root->leftChild == NULL)
return root;
return findMin(root->leftChild);
}
Node *findMax(Node *root) {
if (root->rightChild == NULL)
return root;
return findMax(root->rightChild);
}
int check(Node *p, Node *n) {
if (p->rightChild == n)
return 1; //1==right, 0==left
return 0;
}
void delete(Node *p, Node *n)
// Delete node pointed to by n.
// Parameters:
// n - points to node to be deleted
// p - points to parent of node to be deleted.
{
// Deletion has 3 cases - no subtrees, only left or right subtree, or both
// left and right subtrees.
if (p == n) { //if the root is the node to be deleted
Node *temp;
int key;
char *pass;
if (p->rightChild) {
temp = findMin(p->rightChild);
key = temp->key;
pass = strdup(temp->value);
delete(findParent(temp->key, n), temp);
p->key = key;
p->value = pass;
} else
if (p->leftChild) {
temp = findMax(p->leftChild);
key = temp->key;
pass = strdup(temp->value);
delete(findParent(temp->key, n), temp);
p->key = key;
p->value = pass;
}
return;
}
if (n->leftChild != NULL) { // there is left child
if (n->rightChild) { //if both
Node *temp = findMin(n->rightChild);
n->key = temp->key;
n->value = strdup(temp->value);
delete(findParent(temp->key, n), temp);//delete the min value found (which is a leaf on the left most right branch)
} else { //if only left
if (check(p, n)) {
p->rightChild = n->leftChild;
} else
p->leftChild = n->leftChild;
free(n);
}
} else
if (n->rightChild) { // there is only a right child
if (check(p, n)) {
p->rightChild = n->rightChild;
} else
p->leftChild = n->rightChild;
free(n);
} else {// no children
if (check(p, n)) {
p->rightChild = NULL;
} else
p->leftChild = NULL;
free(n);
}
}//delete()
int withdraw(Key k, Node *root)
// Withdraw does two things:
// return a copy of the node with key k (and value v)
// Delete the node with key k from the tree while ensuring the tree remains valid
{
Node *p, *m;
m = find(k, root);
if (m != NULL) {
// create a copy of the node with the same key and value
//n = initNode(m->key, m->value);
p = findParent(k, root);
// can delete the node
delete(p, m);
return 1;
}
return 0;
}//withdraw()
int bst_check(Node *root) {
if (root == NULL)
return 1; // if on a leaf (return back up to root) //170696526
if (root->leftChild != NULL && root->leftChild->key > root->key)
//if the left child exists and its key is greater than the root
return 0;
if (root->rightChild != NULL && root->rightChild->key < root->key)
// if the right child exists and is smaller than the root
return 0;
if (!bst_check(root->leftChild) || !bst_check(root->rightChild))
//if the check was unsuccessful for both the right and left subtrees
//also recursively checks the left and right child
return 0;
//if all pass, then the tree was a bst
return 1;
}
Here is my function file (.h file):
// LAB3_BST.H
// Header file to be used with code for ELEC278 Lab 3.
//
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
typedef int Key;
#define BST_FAIL 0 // return value when BST function fails
#define BST_SUCCESS 1 // return value when BST function succeeds
// Node in tree has key and pointer to value associated with key.
// Also contains structural components - two pointers to left and
// right subtrees.
typedef struct password {
char *word;
struct password *next;
} pnode;
typedef struct Node {
Key key;
char *value;
struct Node *leftChild, *rightChild;
} Node, pNode;
// Tree is basically pointer to top node in a tree.
typedef struct Tree {
Node *root;
} Tree;
Node *initNode(int k, char *v);
// Create new tree by creating new node with key = k and value = v
// and making it root
Tree *initTree(int k, char *v);
// Find node with key k in tree. Returns pointer to Node if found;
// Returns NULL if not found
Node *find(Key k, Node *root);
// Create new node with key=k, value=v and insert it into tree
// Returns 1 upon success, 0 failure
int insert(int k, char *v, Node *root);
// Print text representation of tree (starting at any Node)
void printTree(Node *root);
// Returns Maximum of two integer numbers
int intmax(int a, int b);
// Find parent of node n where n->key = k
// Returns pointer to parent node if found; Returns NULL if not found
Node *findParent(Key k, Node *root);
// 1. Make copy of node with key=k and returns it
// 2. Delete node with key=k from tree
// Return pointer of node created in 1; Returns NULL if no node
// with specified key value is found
int withdraw(Key k, Node *root);
// Return height of tree (height of specified root)
int height(Node *root);
// Helper function for findParent - see specification in lab
// instructions
Node *findParentHelper(Key k, Node *root);
// Delete node from tree while ensuring tree remains valid
void delete(Node *p, Node *n);
Node* inorder(Node *pn);
int bst_check(Node *root);
I dont know where to start.
There are some problems in function insert:
if the root argument is NULL, the new node is just stored into the argument pointer and BST_SUCCESS is returned. The caller's node variable is not updated. This function should take the address of the Node* as an argument. In your case, the tree is initialized as non empty, so this never occurs, but the tree will become empty after removing all elements and in this case, insert will always fail in spite of returning BST_SUCCESS.
if root->key == k, a new value is allocated for this duplicate key, but the previous value is not freed, hence there is a memory leak.
the test else if (k > root->key) is indeed redundant
Here is a modified and much simpler version:
int insert(Key k, const char *v, Node **np) {
Node *node = *np;
if (node == NULL) {
*np = initNode(k, v);
if (*np == NULL)
return BST_FAIL;
else
return BST_SUCCESS;
}
if (k == node->key) {
// node exists, replace password
char *str = strdup(v);
if (str == NULL) {
return BST_FAIL;
} else {
free(node->value);
node->value = str;
return BST_SUCCESS; // no new node, but insertion successful
}
}
if (k < node->key) {
// key value is less than key value in this node
// insert it into left subtree, creating it if needed.
return insert(k, v, &node->leftChild);
} else {
// key value is greater than key value in this node
// insert it into right subtree, creating it if needed.
return insert(k, v, &node->rightChild);
}
}
Here is a non recursive version:
int insert(Key k, const char *v, Node **np) {
while (*np) {
Node *node = *np;
if (k == node->key) {
// node exists, replace password
char *str = strdup(v);
if (str == NULL) {
return BST_FAIL;
} else {
free(node->value);
node->value = str;
return BST_SUCCESS; // no new node, but insertion successful
}
}
if (k < node->key) {
// key value is less than key value in this node
// insert it into left subtree, creating it if needed.
np = &root->leftChild;
} else {
// key value is greater than key value in this node
// insert it into right subtree, creating it if needed.
np = &root->rightChild;
}
}
*np = initNode(k, v);
if (*np == NULL)
return BST_FAIL;
else
return BST_SUCCESS;
}
Note however that neither of the above functions implement a balanced tree (BST). The tree needs rebalancing if the height of left and right child nodes' heights become too different.
This is not an answer but wanted to add a graph of the input data. I don't see anything out of order (i.e. non-reproducable):

How can I implement a check function to check the validity of the properties of a b-tree?

I have recently implemented a normal B-tree (without any variant) in C, but I would like to check if my implementation is valid i.e. if it does not violate the following properties:
Every node has at most m children.
Every non-leaf node (except root) has at least ⌈m/2⌉ child nodes.
The root has at least two children if it is not a leaf node.
A non-leaf node with k children contains k − 1 keys.
All leaves appear in the same level and carry no information.
Could help me with the implementation of this procedure giving me an example with some code in C or with some suggestions?
#include <stdio.h>
#include <stdlib.h>
#define TRUE 1
#define FALSE 0
#define EMPTY 0
#define NODE_ORDER 3 /*The degree of the tree.*/
#define NODE_POINTERS (NODE_ORDER*2)
#define NODE_KEYS NODE_POINTERS-1
typedef unsigned char bool;
typedef struct tree_node {
int key_array[NODE_KEYS];
struct tree_node *child_array[NODE_POINTERS];
unsigned int key_index;
bool leaf;
} node_t;
typedef struct {
node_t *node_pointer;
int key;
bool found;
unsigned int depth;
} result_t;
typedef struct {
node_t *root;
unsigned short order;
bool lock;
} btree_t;
static int BTreeGetLeftMax(node_t *T);
static int BTreeGetRightMin(node_t *T);
/* The AllocateNode operation allocate a b-tree node.And then set the node's
** properties to the defualt value :
** BTreeNode => K[i] = 0
** BTreeNode => child_array[i] = NULL
** BTreeNode => key_index = 0
** BTreeNode => isLeaf = 1;
*/
static node_t *create_node()
{
int i;
node_t *new_node = (node_t *)malloc(sizeof(node_t));
if(!new_node){
printf("Out of memory");
exit(0);
}
// Set Keys
for(i = 0;i < NODE_KEYS; i++){
new_node->key_array[i] = 0;
}
// Set ptr
for(i = 0;i < NODE_POINTERS; i++){
new_node->child_array[i] = NULL;
}
new_node->key_index = EMPTY;
new_node->leaf = TRUE;
return new_node;
}
/* The CreatBTree operation creates an empty b-tree by allocating a new root
** that has no keys and is a leaf node.Only the root node is permitted to
** have this properties.
*/
btree_t *create_btree()
{
btree_t *new_root = (btree_t *)malloc(sizeof(btree_t));
if(!new_root){
return NULL;
}
node_t *head = create_node();
if(!head){
return NULL;
}
new_root->order = NODE_ORDER;
new_root->root = head;
new_root->lock = FALSE;
return new_root;
}
static result_t *get_resultset()
{
result_t *ret = (result_t *)malloc(sizeof(result_t));
if(!ret){
printf("ERROR! Out of memory.");
exit(0);
}
ret->node_pointer = NULL;
ret->key = 0;
ret->found = FALSE;
ret->depth = 0;
return ret;
}
/* The BTreeSearch operation is to search X in T.Recursively traverse the tree
** from top to bottom.At each level, BTreeSearch choose the maximum key whose
** value is greater than or equal to the desired value X.If equal to the
** desired ,found.Otherwise continue to traverse.
*/
result_t *search(int key, node_t *node)
{
print_node(node);
int i = 0;
while((i < node->key_index) && (key > node->key_array[i])){
//printf("it %d is <= %d and key %d > than %d\n", i, node->key_index, key, node->key_array[i]);
i++;
}
//printf("end iterator: %d\n", i);
//printf("better: \n");
/*
int c = 0;
while((c < node->key_index) && (key > node->key_array[c])){
printf("it %d is <= %d and key %d > than %d\n", c, node->key_index, key, node->key_array[c]);
c++;
}
*/
// HACK /// may not be working
if(i == 6){
i--;
}
// Check if we found it
if((i <= node->key_index) && (key == node->key_array[i])){
result_t *result = get_resultset();
result->node_pointer = node;
result->key = i;
result->found = TRUE;
return result;
}
// Not found check leaf or child
if(node->leaf){
result_t *result = get_resultset();
result->node_pointer = node;
result->found = FALSE;
return result;
}else{
result_t *result = get_resultset();
return search(key, node->child_array[i]);
}
}
/* The split_child operation moves the median key of node child_array into
** its parent ptrParent where child_array is the ith child of ptrParent.
*/
static void split_child(node_t *parent_node, int i, node_t *child_array)
{
int j;
//Allocate a new node to store child_array's node.
node_t *new_node = create_node();
new_node->leaf = child_array->leaf;
new_node->key_index = NODE_ORDER-1;
//Move child_array's right half nodes to the new node.
for(j = 0;j < NODE_ORDER-1;j++){
new_node->key_array[j] = child_array->key_array[NODE_ORDER+j];
}
//If child_array is not leaf node,then move child_array's [child_array]s to the new
//node's [child_array]s.
if(child_array->leaf == 0){
for(j = 0;j < NODE_ORDER;j++){
new_node->child_array[j] = child_array->child_array[NODE_ORDER+j];
}
}
child_array->key_index = NODE_ORDER-1;
//Right shift ptrParent's [child_array] from index i
for(j = parent_node->key_index;j>=i;j--){
parent_node->child_array[j+1] = parent_node->child_array[j];
}
//Set ptrParent's ith child_array to the newNode.
parent_node->child_array[i] = new_node;
//Right shift ptrParent's Keys from index i-1
for(j = parent_node->key_index;j>=i;j--){
parent_node->key_array[j] = parent_node->key_array[j-1];
}
//Set ptrParent's [i-1]th Key to child_array's median [child_array]
parent_node->key_array[i-1] = child_array->key_array[NODE_ORDER-1];
//Increase ptrParent's Key number.
parent_node->key_index++;
}
/* The BTreeInsertNonFull operation insert X into a non-full node T.before
** execute this operation,guarantee T is not a full node.
*/
static void insert_nonfull(node_t *n, int key){
int i = n->key_index;
if(n->leaf){
// Shift until we fit
while(i>=1 && key<n->key_array[i-1]){
n->key_array[i] = n->key_array[i-1];
i--;
}
n->key_array[i] = key;
n->key_index++;
}else{
// Find the position i to insert.
while(i>=1 && key<n->key_array[i-1]){
i--;
}
//If T's ith child_array is full,split first.
if(n->child_array[i]->key_index == NODE_KEYS){
split_child(n, i+1, n->child_array[i]);
if(key > n->key_array[i]){
i++;
}
}
//Recursive insert.
insert_nonfull(n->child_array[i], key);
}
}
/* The BTreeInsert operation insert key into T.Before insert ,this operation
** check whether T's root node is full(root->key_index == 2*d -1) or not.If full,
** execute split_child to guarantee the parent never become full.And then
** execute BTreeInsertNonFull to insert X into a non-full node.
*/
node_t *insert(int key, btree_t *b)
{
if(!b->lock){
node_t *root = b->root;
if(root->key_index == NODE_KEYS){ //If node root is full,split it.
node_t *newNode = create_node();
b->root = newNode; //Set the new node to T's Root.
newNode->leaf = 0;
newNode->key_index = 0;
newNode->child_array[0] = root;
split_child(newNode, 1, root);//Root is 1th child of newNode.
insert_nonfull(newNode, key); //Insert X into non-full node.
}else{ //If not full,just insert X in T.
insert_nonfull(b->root, key);
}
}else{
printf("Tree is locked\n");
}
return b->root;
}
/* The merge_children operation merge the root->K[index] and its two child
** and then set chlid1 to the new root.
*/
static void merge_children(node_t *root, int index, node_t *child1, node_t *child2){
child1->key_index = NODE_KEYS;
int i;
//Move child2's key to child1's right half.
for(i=NODE_ORDER;i<NODE_KEYS;i++)
child1->key_array[i] = child2->key_array[i-NODE_ORDER];
child1->key_array[NODE_ORDER-1] = root->key_array[index]; //Shift root->K[index] down.
//If child2 is not a leaf node,must copy child2's [ptrchlid] to the new
//root(child1)'s [child_array].
if(0 == child2->leaf){
for(i=NODE_ORDER;i<NODE_POINTERS;i++)
child1->child_array[i] = child2->child_array[i-NODE_ORDER];
}
//Now update the root.
for(i=index+1;i<root->key_index;i++){
root->key_array[i-1] = root->key_array[i];
root->child_array[i] = root->child_array[i+1];
}
root->key_index--;
free(child2);
}
/* The BTreeBorrowFromLeft operation borrows a key from leftPtr.curPtr borrow
** a node from leftPtr.root->K[index] shift down to curPtr,shift leftPtr's
** right-max key up to root->K[index].
*/
static void BTreeBorrowFromLeft(node_t *root, int index, node_t *leftPtr, node_t *curPtr){
curPtr->key_index++;
int i;
for(i=curPtr->key_index-1;i>0;i--)
curPtr->key_array[i] = curPtr->key_array[i-1];
curPtr->key_array[0] = root->key_array[index];
root->key_array[index] = leftPtr->key_array[leftPtr->key_index-1];
if(0 == leftPtr->leaf)
for(i=curPtr->key_index;i>0;i--)
curPtr->child_array[i] = curPtr->child_array[i-1];
curPtr->child_array[0] = leftPtr->child_array[leftPtr->key_index];
leftPtr->key_index--;
}
/* The BTreeBorrowFromLeft operation borrows a key from rightPtr.curPtr borrow
** a node from rightPtr.root->K[index] shift down to curPtr,shift RightPtr's
** left-min key up to root->K[index].
*/
static void BTreeBorrowFromRight(node_t *root, int index, node_t *rightPtr, node_t *curPtr){
curPtr->key_index++;
curPtr->key_array[curPtr->key_index-1] = root->key_array[index];
root->key_array[index] = rightPtr->key_array[0];
int i;
for(i=0;i<rightPtr->key_index-1;i++)
rightPtr->key_array[i] = rightPtr->key_array[i+1];
if(0 == rightPtr->leaf){
curPtr->child_array[curPtr->key_index] = rightPtr->child_array[0];
for(i=0;i<rightPtr->key_index;i++)
rightPtr->child_array[i] = rightPtr->child_array[i+1];
}
rightPtr->key_index--;
}
/* The BTreeDeleteNoNone operation recursively delete X in root,handle both leaf
** and internal node:
** 1. If X in a leaf node,just delete it.
** 2. If X in a internal node P:
** a): If P's left neighbor -> prePtr has at least d keys,replace X with
** prePtr's right-max key and then recursively delete it.
** b): If P's right neighbor -> nexPtr has at least d keys,replace X with
** nexPtr's left-min key and then recursively delete it.
** c): If both of prePtr and nexPtr have d-1 keys,merge X and nexPtr into
** prePtr.Now prePtr have 2*d-1 keys,and then recursively delete X in
** prePtr.
** 3. If X not in a internal node P,X must in P->child_array[i] zone.If child_array[i]
** only has d-1 keys:
** a): If child_array[i]'s neighbor have at least d keys,borrow a key from
** child_array[i]'s neighbor.
** b): If both of child_array[i]'s left and right neighbor have d-1 keys,merge
** child_array[i] with one of its neighbor.
** finally,recursively delete X.
*/
static void BTreeDeleteNoNone(int X, node_t *root){
int i;
//Is root is a leaf node ,just delete it.
if(1 == root->leaf){
i=0;
while( (i<root->key_index) && (X>root->key_array[i])) //Find the index of X.
i++;
//If exists or not.
if(X == root->key_array[i]){
for(;i<root->key_index-1;i++)
root->key_array[i] = root->key_array[i+1];
root->key_index--;
}
else{
printf("Node not found.\n");
return ;
}
}
else{ //X is in a internal node.
i = 0;
node_t *prePtr = NULL, *nexPtr = NULL;
//Find the index;
while( (i<root->key_index) && (X>root->key_array[i]) )
i++;
if( (i<root->key_index) && (X == root->key_array[i]) ){ //Find it in this level.
prePtr = root->child_array[i];
nexPtr = root->child_array[i+1];
/*If prePtr at least have d keys,replace X by X's precursor in
*prePtr*/
if(prePtr->key_index > NODE_ORDER-1){
int aPrecursor = BTreeGetLeftMax(prePtr);
root->key_array[i] = aPrecursor;
//Recursively delete aPrecursor in prePtr.
BTreeDeleteNoNone(aPrecursor,prePtr);
}
else
if(nexPtr->key_index > NODE_ORDER-1){
/*If nexPtr at least have d keys,replace X by X's successor in
* nexPtr*/
int aSuccessor = BTreeGetRightMin(nexPtr);
root->key_array[i] = aSuccessor;
BTreeDeleteNoNone(aSuccessor,nexPtr);
}
else{
/*If both of root's two child have d-1 keys,then merge root->K[i]
* and prePtr nexPtr. Recursively delete X in the prePtr.*/
merge_children(root,i,prePtr,nexPtr);
BTreeDeleteNoNone(X,prePtr);
}
}
else{ //Not find in this level,delete it in the next level.
prePtr = root->child_array[i];
node_t *leftBro = NULL;
if(i<root->key_index)
nexPtr = root->child_array[i+1];
if(i>0)
leftBro = root->child_array[i-1];
/*root->child_array[i] need to borrow from or merge with his neighbor
* and then recursively delete. */
if(NODE_ORDER-1 == prePtr->key_index){
//If left-neighbor have at least d-1 keys,borrow.
if( (leftBro != NULL) && (leftBro->key_index > NODE_ORDER-1))
BTreeBorrowFromLeft(root,i-1,leftBro,prePtr);
else //Borrow from right-neighbor
if( (nexPtr != NULL) && (nexPtr->key_index > NODE_ORDER-1))
BTreeBorrowFromRight(root,i,nexPtr,prePtr);
//OR,merge with its neighbor.
else if(leftBro != NULL){
//Merge with left-neighbor
merge_children(root,i-1,leftBro,prePtr);
prePtr = leftBro;
}
else //Merge with right-neighbor
merge_children(root,i,prePtr,nexPtr);
}
/*Now prePtr at least have d keys,just recursively delete X in
* prePtr*/
BTreeDeleteNoNone(X,prePtr);
}
}
}
/*Get T's left-max key*/
static int BTreeGetLeftMax(node_t *T){
if(0 == T->leaf){
return BTreeGetLeftMax(T->child_array[T->key_index]);
}else{
return T->key_array[T->key_index-1];
}
}
/*Get T's right-min key*/
static int BTreeGetRightMin(node_t *T){
if(0 == T->leaf){
return BTreeGetRightMin(T->child_array[0]);
}else{
return T->key_array[0];
}
}
/* The BTreeDelete operation delete X from T up-to-down and no-backtrack.
** Before delete,check if it's necessary to merge the root and its children
** to reduce the tree's height.Execute BTreeDeleteNoNone to recursively delete
*/
node_t *delete(int key, btree_t *b)
{
if(!b->lock){
//if the root of T only have 1 key and both of T's two child have d-1
//key,then merge the children and the root. Guarantee not need to backtrack.
if(b->root->key_index == 1){
node_t *child1 = b->root->child_array[0];
node_t *child2 = b->root->child_array[1];
if((!child1) && (!child2)){
if((NODE_ORDER-1 == child1->key_index) && (NODE_ORDER-1 == child2->key_index)){
//Merge the children and set child1 to the new root.
merge_children(b->root, 0, child1, child2);
free(b->root);
BTreeDeleteNoNone(key, child1);
return child1;
}
}
}
BTreeDeleteNoNone(key, b->root);
}else{
printf("Tree is locked\n");
}
return b->root;
}
void tree_unlock(btree_t *r)
{
r->lock = FALSE;
}
bool tree_lock(btree_t *r)
{
if(r->lock){
return FALSE;
}
r->lock = TRUE;
return TRUE;
}
You have not shown any code, which makes it difficult to come up with code examples that could fit to your implementation. However, in principle you could take the following approaches:
Write unit-tests for your code. With the B-Tree that would mean to start with small trees (even with an empty tree), and use checks in your tests to verify the properties. You would then add more and more tests, specifically checking for bugs also in the "tricky" scenarios. There is a lot of general information about unit-testing available, you should be able to adapt it to your specific problem.
Add assertions to your code (read about the assert macro in C). Many of the properties you have mentioned could be checked directly within the code at appropriate places.
Certainly, there is more you could do, like, having the code reviewed by some colleague, or using some formal verification tools, but the abovementioned two approaches are good starting points.
UPDATE (after code was added):
Some more hints about how you could approach unit-testing. In principle, you should write your tests with the help of a so called test framework, which is a helper library to make writing tests easier. To explain the concept, however, I just use plain C or even pseudo-code.
Moreover, you would also put some declarations and/or definitions into a header file, like "btree.h". For the sake of example, however, I will just #include "btree.c" in the code examples below.
Create a file "btree-test.c" (the name is a proposal, you can name it as you like).
A first test would look a bit like:
#include "btree.c"
#include <assert.h>
void test_create_empty_btree() {
btree_t *actual_btree = create_btree();
// now, check that the created btree has all desired properties
// for example:
assert(actual_btree != NULL);
assert(actual_btree->order == NODE_ORDER);
assert(actual_btree->lock == FALSE);
assert(actual_btree->root->key_index == EMPTY);
assert(actual_btree->root->leaf == TRUE);
printf("PASSED: test_create_empty_btree");
}
The code above is just an example, I have not even tried compiling it. Note also that the test is not quite clean yet: there will be memory leaks, because the btree is not properly deleted at the end of the test, which would be better practice. It should, however, give you an idea how to start writing unit-tests.
A second test could then again create a btree, but in addition insert some data. In your tests you would then check that the btree has the expected form. And so on, adding more and more tests. It is good practice to have one function per test case...

Mergesort in C and I keep getting errors. Is my approach correct?

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
// PURPOSE:
const int LINE_LEN = 256;
#define STOP_CMD "quit"
const int STOP_CMD_LEN = sizeof(STOP_CMD) - 1;
// PURPOSE: To hold a node in a linked list of integers.
struct Node {
int value_;
struct Node *nextPtr_;
};
// PURPOSE: To create and return a linked list of integer pairs.
struct Node *makeList() {
struct Node *list = NULL;
struct Node *end = NULL;
struct Node *next;
int value;
char line[LINE_LEN];
while (1) {
printf("Integer (or %s) to quit: ", STOP_CMD);
fgets(line, LINE_LEN, stdin);
if (strncmp(line, STOP_CMD, STOP_CMD_LEN) == 0)
break;
value = strtol(line,NULL,10);
next = (struct Node *)malloc(sizeof(struct Node));
if (list == NULL) {
list = next;
} else {
end->nextPtr_ = next;
}
end = next;
next->value_ = value;
next->nextPtr_ = NULL;
}
return (list);
}
// PURPOSE: To print the 'value_' values found in 'list'. No return value.
void print(const struct Node *list) {
const struct Node *run;
for (run = list; run != NULL; run = run->nextPtr_)
printf("%d\n", run->value_);
}
// PURPOSE: To sort the items in 'list' in ascending order according to
// their value for 'value_'. Returns sorted list.
struct Node *sort(struct Node *list) {
// Merge-sort is recursive, so there is/are base case(s):
// For merge-sort, the base cases are when 'list' has either 0 or 1
// node it it. If 'list' points to either 0 or 1 thing then return 'list':
// YOUR CODE HERE
if (list == NULL || list->nextPtr_ == NULL) {
return list;
}
// This is the beginning of the recursive case.
// We have to split 'list' into two lists of approximately the same length.
// A straight-forward way to do that is with loop and 3 pointer variables:
// (*) 'full' races through the list starting at 'list' 2 nodes at a time.
// The loop should stop when either 'full' lands on 'NULL', or the
// next of 'full' is 'NULL'.
// (*) 'half' starts at 'list' but goes through at 1/2 the speed by only
// getting the next (not the next-next like 'full'). When 'full'
// reaches the end then 'half' will point to the *beginning* of the
// second list. We need this node, but we *also* need the one just
// before 'half'. So that is why there is also . . .
// (*) 'prevHalf', whose job is to take the value 'half' just had.
// Just before advancing 'half' to its next, give 'prevHalf' the old
// value of 'half'.
struct Node *full;
struct Node *half;
struct Node *prevHalf = NULL;
// YOUR CODE HERE
full = list;
half = list;
while (full != NULL) {
full = full->nextPtr_;
if (full != NULL){
prevHalf = half;
half = half->nextPtr_;
}
}
// Here we separate both sublists:
prevHalf->nextPtr_ = NULL;
// Here we recursively sort both sublists:
struct Node *firstHalf = sort(list);
struct Node *secondHalf = sort(half);
struct Node *end = NULL;
// Here we merge the lists pointed to by 'firstHalf' and 'secondHalf'.
// Make 'list' point to the beginning of the combined list, and use 'end'
// to keep track of the end.
// Use a loop to while both 'firstHalf' and 'secondHalf' are both not yet
// at the end of their lists, then compare the values that both point to.
// If 'firstHalf' points to the lower value then put it at the end of the
// combined list. Else put 'secondHalf' at the end.
// You do not have to remove the node from the old list, but you *do* have
// to advance which ever pointer you used (either 'firstHalf' or
// 'secondHalf') to point to the next node in the list.
// When either 'firstHalf' or 'secondHalf' are 'NULL' then quit the loop:
list = NULL;
// YOUR CODE HERE
// if (firstHalf value_->secondHalf->value_) {
if (firstHalf->value_ & secondHalf->value_) {
list = firstHalf;
end = firstHalf;
firstHalf = firstHalf->nextPtr_;
} else {
list = secondHalf;
end = secondHalf;
secondHalf = secondHalf->nextPtr_;
}
while ((firstHalf != NULL) && (secondHalf != NULL)) {
if (firstHalf->value_ & secondHalf->value_) {
list->nextPtr_ = firstHalf;
end->nextPtr_ = firstHalf;
firstHalf = firstHalf->nextPtr_;
end = end->nextPtr_;
} else {
list->nextPtr_ = secondHalf;
end->nextPtr_ = secondHalf;
secondHalf = secondHalf->nextPtr_;
end = end->nextPtr_;
}
}
// Almost finished!
// You made it to the end of one list, but there still is the other one.
// Make the node pointed to by 'end' point to which ever list that you did
// *not* go all the way through.
// YOUR CODE HERE
while (firstHalf != NULL && secondHalf == NULL) {
list->nextPtr_ = firstHalf;
end->nextPtr_ = firstHalf;
firstHalf = firstHalf->nextPtr_;
end = end->nextPtr_;
}
while (firstHalf == NULL && secondHalf != NULL) {
list->nextPtr_ = secondHalf;
end->nextPtr_ = secondHalf;
secondHalf = secondHalf->nextPtr_;
end = end->nextPtr_;
}
return (list);
}
// PURPOSE: To do nothing if 'list' is NULL. Otherwise to 'free()' both
// 'nextPtr_' and 'namePtr_' for 'list', and all of 'nextPtr_' successors.
// No return value.
void release(struct Node *list) {
if (list == NULL)
return;
release(list->nextPtr_);
free(list);
}
// PURPOSE: To create, print, and 'free()' a linked list of the 'argc-1'
// items on 'argv[]', from 'argv[1]' to 'argv[argc-1]'. Returns
// 'EXIT_SUCCESS' to OS.
int main(int argc, char *argv[]) {
// I.
// II. :
struct Node *list;
list = makeList();
printf("Before sort:\n");
print(list);
list = sort(list);
printf("After sort:\n");
print(list);
release(list);
// III. Finished:
return (EXIT_SUCCESS);
}
The program asks the user for an input and then shows the user input before sorting it and displaying it
The expected output should be
$ ./mergeSort
Integer (or quit) to quit: 9
Integer (or quit) to quit: 8
Integer (or quit) to quit: 7
Integer (or quit) to quit: 1
Integer (or quit) to quit: 2
Integer (or quit) to quit: 3
Integer (or quit) to quit: 6
Integer (or quit) to quit: 4
Integer (or quit) to quit: 5
Integer (or quit) to quit: quit
Before sort:
9
8
7
1
2
3
6
4
5
After sort:
1
2
3
4
5
6
7
8
9
Will fixing these errors get the expected output? and if not, what can I do differently to generate the same output?
I do not know if my approach is correct or not. Any help will be appreciated.
Now after fixing some errors. I get this
Integer (or quit) to quit: 1
Integer (or quit) to quit: 2
Integer (or quit) to quit: 3
Integer (or quit) to quit: 4
Integer (or quit) to quit: 5
Integer (or quit) to quit: 6
Integer (or quit) to quit: 7
Integer (or quit) to quit: 8
Integer (or quit) to quit: 9
Integer (or quit) to quit: quit
Before sort:
1
2
3
4
5
6
7
8
9
After sort:
8
9
Why isn't it sorting it all the input starting?
There are some problems in your code:
in makelist() you should check the return value of fgets() to avoid an infinite loop upon unexpected end of file.
you should also check for malloc() failure to avoid undefined behavior.
your implementation of release() recurses as many levels as there are nodes in the list. Any sufficiently long list will cause a stack overflow. A non-recursive version is preferable.
the loop to split the list is incorrect: full does not skip twice as fast as half.
You should modify this way:
full = list;
half = list;
while (full != NULL) {
full = full->nextPtr_;
if (full != NULL) {
prevHalf = half;
half = half->nextPtr_;
full = full->nextPtr_;
}
}
when comparing node values, you must use <= instead of &:
if (firstHalf->value_ <= secondHalf->value_)
in the main merging loop, you should not modify list->nextPtr_, only end->nextPtr_.
the last merging loops are useless. You are supposed to just set end->nextPtr_ to leftHalf or rightHalf depending on which list is empty.
Here is a correct version:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
// PURPOSE:
const int LINE_LEN = 256;
#define STOP_CMD "quit"
const int STOP_CMD_LEN = sizeof(STOP_CMD) - 1;
// PURPOSE: To hold a node in a linked list of integers.
struct Node {
int value_;
struct Node *nextPtr_;
};
// PURPOSE: To create and return a linked list of integer pairs.
struct Node *makeList(void) {
struct Node *list = NULL;
struct Node *end = NULL;
struct Node *next;
int value;
char line[LINE_LEN];
while (1) {
printf("Integer (or %s) to quit: ", STOP_CMD);
if (!fgets(line, LINE_LEN, stdin))
break;
if (strncmp(line, STOP_CMD, STOP_CMD_LEN) == 0)
break;
value = strtol(line, NULL, 10);
next = (struct Node *)malloc(sizeof(struct Node));
if (next == NULL) {
fprintf(stderr, "out of memory\n");
exit(EXIT_FAILURE);
}
next->value_ = value;
next->nextPtr_ = NULL;
if (list == NULL) {
list = next;
} else {
end->nextPtr_ = next;
}
end = next;
}
return list;
}
// PURPOSE: To print the 'value_' values found in 'list'. No return value.
void print(const struct Node *list) {
const struct Node *run;
for (run = list; run != NULL; run = run->nextPtr_)
printf("%d\n", run->value_);
}
// PURPOSE: To sort the items in 'list' in ascending order according to
// their value for 'value_'. Returns sorted list.
struct Node *sort(struct Node *list) {
// Merge-sort is recursive, so there is/are base case(s):
// For merge-sort, the base cases are when 'list' has either 0 or 1
// node it it. If 'list' points to either 0 or 1 thing then return 'list':
// YOUR CODE HERE
if (list == NULL || list->nextPtr_ == NULL) {
return list;
}
// This is the beginning of the recursive case.
// We have to split 'list' into two lists of approximately the same length.
// A straight-forward way to do that is with loop and 3 pointer variables:
// (*) 'full' races through the list starting at 'list' 2 nodes at a time.
// The loop should stop when either 'full' lands on 'NULL', or the
// next of 'full' is 'NULL'.
// (*) 'half' starts at 'list' but goes through at 1/2 the speed by only
// getting the next (not the next-next like 'full'). When 'full'
// reaches the end then 'half' will point to the *beginning* of the
// second list. We need this node, but we *also* need the one just
// before 'half'. So that is why there is also . . .
// (*) 'prevHalf', whose job is to take the value 'half' just had.
// Just before advancing 'half' to its next, give 'prevHalf' the old
// value of 'half'.
struct Node *full;
struct Node *half;
struct Node *prevHalf = list;
// YOUR CODE HERE
full = list;
half = list;
while (full != NULL) {
full = full->nextPtr_;
if (full != NULL) {
prevHalf = half;
half = half->nextPtr_;
full = full->nextPtr_;
}
}
// Here we separate both sublists:
prevHalf->nextPtr_ = NULL;
// Here we recursively sort both sublists:
struct Node *firstHalf = sort(list);
struct Node *secondHalf = sort(half);
struct Node *end = NULL;
// Here we merge the lists pointed to by 'firstHalf' and 'secondHalf'.
// Make 'list' point to the beginning of the combined list, and use 'end'
// to keep track of the end.
// Use a loop to while both 'firstHalf' and 'secondHalf' are both not yet
// at the end of their lists, then compare the values that both point to.
// If 'firstHalf' points to the lower value then put it at the end of the
// combined list. Else put 'secondHalf' at the end.
// You do not have to remove the node from the old list, but you *do* have
// to advance which ever pointer you used (either 'firstHalf' or
// 'secondHalf') to point to the next node in the list.
// When either 'firstHalf' or 'secondHalf' are 'NULL' then quit the loop:
list = NULL;
// YOUR CODE HERE
if (firstHalf->value_ <= secondHalf->value_) {
list = firstHalf;
end = firstHalf;
firstHalf = firstHalf->nextPtr_;
} else {
list = secondHalf;
end = secondHalf;
secondHalf = secondHalf->nextPtr_;
}
while ((firstHalf != NULL) && (secondHalf != NULL)) {
if (firstHalf->value_ <= secondHalf->value_) {
end->nextPtr_ = firstHalf;
end = end->nextPtr_;
firstHalf = firstHalf->nextPtr_;
} else {
end->nextPtr_ = secondHalf;
end = end->nextPtr_;
secondHalf = secondHalf->nextPtr_;
}
}
// Almost finished!
// You made it to the end of one list, but there still is the other one.
// Make the node pointed to by 'end' point to which ever list that you did
// *not* go all the way through.
// YOUR CODE HERE
if (firstHalf != NULL) {
end->nextPtr_ = firstHalf;
} else {
end->nextPtr_ = secondHalf;
}
return list;
}
// PURPOSE: To do nothing if 'list' is NULL. Otherwise to 'free()' both
// 'nextPtr_' and 'namePtr_' for 'list', and all of 'nextPtr_' successors.
// No return value.
void release(struct Node *list) {
while (list != NULL) {
struct Node *next = list->nextPtr_;
free(list);
list = next;
}
}
// PURPOSE: To create, print, and 'free()' a linked list of the 'argc-1'
// items on 'argv[]', from 'argv[1]' to 'argv[argc-1]'. Returns
// 'EXIT_SUCCESS' to OS.
int main(int argc, char *argv[]) {
// I.
// II. :
struct Node *list;
list = makeList();
printf("Before sort:\n");
print(list);
list = sort(list);
printf("After sort:\n");
print(list);
release(list);
// III. Finished:
return EXIT_SUCCESS;
}

Segmentation Fault in Trie implementation in C

I'm trying to implement a trie data structure to spell-check a given text file. Currently, it seems to work for a couple words in the file, then it reaches a seg fault. I tried debugging to find the culprit, but all I found was that the value of "letter" is retaining seemingly random negative values (it should be between 1 and 27, inclusive). Normally the seg fault issue appears almost instantly after i start the program, so I'm not sure why the issue is popping up in the middle of the program.
/**
* Implements a dictionary's functionality.
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
//create global root node
Trienode *root;
//create word counter for size() function
unsigned int wordcount = 0;
//creates an empty node
Trienode * newnode()
{
Trienode *nnode = NULL;
nnode = (Trienode *)malloc(sizeof(Trienode));
//initialize new node with null pointers and values
nnode -> parent = NULL;
for(int i = 0; i < 27; i++)
{
nnode -> children[i] = NULL;
}
return nnode;
}
void cleartrie(Trienode *head)
{
//if child node exists, free it, else continue with next iteration in for loop
if(head)
{
for(int i = 0; i < 27; i++)
{
cleartrie(head -> children[i]);
}
free(head);
head = NULL;
}
}
/**
* Returns true if word is in dictionary else false.
*/
bool check(const char *word)
{
int i = 0;
int letter;
Trienode *head = root;
while(word[i] != '\0')
{
if(isalpha(word[i]))
{
letter = word[i] - 'a';
}
else //it must be an apostrophe
{
letter = word[i] - 13;
}
if(!(head -> children[letter]))
{
return false;
}
else //a pointer must exist
{
head = head -> children[letter];
}
i++;
}
return true;
}
/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char *dictionary)
{
//open file
FILE *infile = fopen(dictionary, "r");
Trienode *parnode; //parent node
root = newnode();
Trienode *curnode = root; //current node
int letter = 0;
//while not end of file, read words
while(fgetc(infile) != EOF)
{
//while not end of word, read letters
for(;;)
{
int c;
//read current letter in file
c = fgetc(infile);
//convert input char to corresponding array location (a - z = 0-25, apostrophe = 26)
if(isalpha(c))
{
letter = c - 'a';
}
else if (c == '\'')
{
letter = c - 13;
}
//if end of string, exit loop
else if (c == '\0')
{
//end of word, so endofstring = true
wordcount++;
break;
}
//move to next letter if not either apostrophe or alphabetical
else
{
break;
}
//if pointer to letter of word doesn't exist, create new node
if(curnode -> children[letter] == NULL)
{
curnode -> children[letter] = newnode();
}
//child node is the new current node
parnode = curnode;
curnode = curnode -> children[letter];
curnode -> parent = parnode;
}
//return to root node
curnode = root;
}
fclose(infile);
return true;
}
/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return wordcount;
}
/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
cleartrie(root);
if (root == NULL)
{
return true;
}
return false;
}
Sorry about the wall of text, but most of it is just there for context (I hope). The seg fault error is occurring on the if(!(head -> children[letter])) line of the check helper function.
Thanks in advance!
I suspect that your test file may contain some uppercase letters. If this is the case, then subtracting 'a' in an attempt to remap your letters will result in a negative number, since 'A' < 'a'. Have a look at the ASCII Table. Converting the letters to lowercase first should solve your problem.

Troubles with Trie

So, I was trying to read a Trie, relatively a new data structure for me. And where ever I read, every node in the trie, would consist of an integer variable which would mark the end of an word, and would also consist of 26 pointers, each pointing to nodes in the lower level(assuming the words only contain small letter characters).
Now the problem I am facing is, where ever I see/read the implementation, they mark the node with a character. Like in this case:
http://community.topcoder.com/i/education/alg_tries.png
But the way I am understanding Trie, I believe that every edge should be marked as a character. Although, I know we don't have a data structure for the edges, just for the nodes. But wouldn't marking the edges be more correct?
Also, this is my algorithm for implementing insert. Please tell me if you find something wrong with it.
struct trie
{
int val;
trie* aplha[26];
}
trie* insert (trie *root, char *inp)
{
if (*input == '\0')
return root;
if (root == NULL)
{
root = (trie *) malloc(sizeof(trie));
int i = 0;
for (i=0;i<26;i++)
root->alpha[i] = NULL;
}
temp = *input - 'a';
root->alpha[temp] = insert (root->alpha[temp],input+1);
if (*(input+1)=='\0')
root->val = 1;
return root;
}
I am stumped as to how I could implement the delete. If you can, please help me with a delete algorithm.
Here is a small program that shows a way you can do it. There is no serious effort put into error handling though:
http://pastebin.com/84TiPrtL
I've slightly edited your trie_insert function and show a trie_delete function here. The struct Vec inside the pastebin code can be changed to a std::vector if you are using C++.
struct trie *trie_insert(struct trie *root, char *input)
{
int idx;
if (!input) {
return root;
}
if (root == NULL) {
root = (struct trie *)calloc(1, sizeof(struct trie));
}
if (*input == '\0') {
// leaves have root->val set to 1
root->val = 1;
} else {
// carry on insertion
idx = *input - 'a';
root->alpha[idx] = trie_insert(root->alpha[idx], input+1);
}
return root;
}
struct trie *trie_delete(struct trie *root, char *s)
{
int i, idx, reap = 0;
if (!root || !s) {
return root;
}
if (!*s && root->val) {
// delete this string, and mark node as deletable
root->val = 0;
reap = 1;
} else {
// more characters to insert, carry on
idx = *s - 'a';
if (root->alpha[idx]) {
root->alpha[idx] = trie_delete(root->alpha[idx], s+1);
if (!root->alpha[idx]) {
// child node deleted, set reap = 1
reap = 1;
}
}
}
// We can delete this if both:
// 1. reap is set to 1, which is only possible if either:
// a. we are now at the end of the string and root->val used
// to be 1, but is now set to 0
// b. the child node has been deleted
// 2. The string ending at the current node is not inside the trie,
// so root->val = 0
if (reap && !root->val) {
for (i = 0; i < NRALPHA; i++) {
if (root->alpha[i]) {
reap = 0;
break;
}
}
// no more children, delete this node
if (reap) {
trie_free(root);
root = NULL;
}
}
return root;
}

Resources