Recursing through a AST using DFS - c

I am trying to recurse through an annotated syntax tree.
My aim is to increment a counter once it sees a particular type of node.
Void *DFS(State *N, IrNode *node, int Counter)
{
Counter=0
if (node->irLeftChild == NULL &&
node->irRightChild == NULL &&
node->isVisited == false &&
node->type == kNewtonIrNodeType_Tidentifier)
{
Counter+=1
node->isVisited = true;
return ;
}
DFS(N, node->irLeftChild);
DFS(N, node->irRightChild);
}
Is there a better way to recurse through the tree?

I'm not sure what you're trying to do; but if you're trying to return a total count of entries that matched the criteria to the caller you probably want something like:
int DFS(State *N, IrNode *node) {
int Counter = 0;
if (node->irLeftChild == NULL && node->irRightChild == NULL && node->isVisited == false && node->type == kNewtonIrNodeType_Tidentifier) {
Counter += 1;
node->isVisited = true;
}
Counter += DFS(N, node->irLeftChild);
Counter += DFS(N, node->irRightChild);
return Counter;
}

Related

What's the most efficient way to keep track of n largest values?

I have a program that receives in input a large number of values and has to keep track of the n largest received. For example, let's say n is 3 and the input is 1,6,3,5,2 the output would have to be 6,5,3 (not necessarily in this order).
At the moment I'm using a min heap implemented via an array, but that isn't quite cutting it time-wise. Are there any alternatives I can look into?
The "(not necessarily in this order)" implies that you can have a sorted numerically output. As you only have to track values greater or equal than n, with a very large input of integer values, a wise way would be then to keep track of integer ranges instead of values.
It will heavily depend of the inputs. For hughes word integer inputs with a discrete uniform distribution, it would cost less memory and would be faster.
A simple pseudo code implementation would require for each input value to check it against a min heap ordered stack of ranges :
Range is
min as integer
max as integer
next as Range
duplicates as stack of integer
Range(pMin, pMax, pNext)
self.min = pMin
self.max = pMax
self.next = pNext
self.duplicates = empty stack of integer
Range heap_top = NULL
Range current_range = NULL
Range previous_range = NULL
boolean merge_flag
integer value
While read value from input
if value >= n Then
current_range = heap_top
previous_range = NULL
merge_flag = false
While current_range is not null
If value is current_range.min - 1 Then
current_range.min = value
merge_flag = true
Break
End If
If value is current_range.max + 1 Then
current_range.max = value
merge_flag = true
Break
End If
If value < current_range.min Then
current_range = NULL
Break
End If
If value is between current_range.min and current_range.max Then
# Here we track duplicates value
current_range.duplicates.push value
Break
End If
previous_range = current_range
current_range = current_range->next
End While
If current_range is not NULL Then
If merge_flag is true Then
If previous_range is not NULL and current_range.min - 1 is previous_range.max Then
# merge current range into previous one
previous_range.max = current_range.max
# Here we track duplicates value
previous_range.duplicates.pushall current_range.duplicates
previous_range.next = current_range.next
drop current_range
# If we need to keep a track of the range where belong the value
# current_range = previous_range
Else
If current_range.next is not NULL and current_range.max + 1 is
current_range.next.min Then
# merge next range into current one
# We use previous_range to point the next range
previous_range = current_range.next
current_range.max = previous_range.max
# Here we track duplicates value
current_range.duplicates.pushall previous_range.duplicates
current_range.next = previous_range.next
drop previous_range
End If
End If
End If
Else
If previous_range is NULL Then
current_range = new Range(value, value, heap_top)
heap_top = current_range
Else
current_range = new Range(value, value, previous_range.next)
previous_range.next = current_range
End If
End If
End If
End While
Less nodes implies less node traversal processing on the long run if the input is uniformly distributed. Less node traversal processing for each input value to process means a faster global processing, as we have then an algorithm approaching O(N) instead of O(N!) with N as the number of input values.
An example of C implementation of the the previous algorithm :
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
struct s_range {
unsigned int min;
unsigned int max;
struct s_range *next;
unsigned int *duplicates;
unsigned int duplicates_count;
};
struct s_range *new_range(unsigned int pMin,unsigned int pMax, struct s_range *pNext) {
struct s_range *lRange = malloc(sizeof(struct s_range));
if (lRange == NULL) {
perror("new_range: Failed to allocate");
return NULL;
}
lRange->min = pMin;
lRange->max = pMax;
lRange->next = pNext;
lRange->duplicates = NULL;
lRange->duplicates_count = 0;
return lRange;
}
void drop_range(struct s_range *pRange) {
if (pRange != NULL) {
if (pRange->duplicates != NULL) {
free(pRange->duplicates);
}
free(pRange);
}
}
void drop_allranges(struct s_range *pTopRange) {
struct s_range *lRange;
while (pTopRange != NULL) {
lRange = pTopRange->next;
drop_range(pTopRange);
pTopRange = lRange;
}
}
int push_duplicates(struct s_range *pRange, unsigned int pValue) {
unsigned int *lDuplicates;
if (pRange == NULL) {
return 2;
}
if (pRange->duplicates == NULL) {
lDuplicates = malloc(sizeof(unsigned int));
} else {
lDuplicates = realloc(pRange->duplicates, (pRange->duplicates_count + 1) * sizeof(unsigned int));
}
if (lDuplicates == NULL) {
perror("push_duplicates: failed to allocate...");
return 1;
}
lDuplicates[pRange->duplicates_count++] = pValue;
pRange->duplicates = lDuplicates;
return 0;
}
int pushall_duplicates(struct s_range *pRangeDst, struct s_range *pRangeSrc) {
unsigned int *lDuplicates;
if (pRangeDst == NULL || pRangeSrc == NULL) {
return 2;
}
if (pRangeSrc->duplicates == NULL) {
return 0;
}
if (pRangeDst->duplicates == NULL) {
lDuplicates = malloc(pRangeSrc->duplicates_count * sizeof(unsigned int));
} else {
lDuplicates = realloc(pRangeDst->duplicates, (pRangeDst->duplicates_count + pRangeSrc->duplicates_count) * sizeof(unsigned int));
}
if (lDuplicates == NULL) {
perror("pushall_duplicates: failed to allocate...");
return 1;
}
memcpy(&lDuplicates[pRangeDst->duplicates_count], pRangeSrc->duplicates, pRangeSrc->duplicates_count * sizeof(unsigned int));
pRangeDst->duplicates_count += pRangeSrc->duplicates_count;
pRangeDst->duplicates = lDuplicates;
return 0;
}
int main(int nbargs, char *argv[]) {
struct s_range *lHeapTop = NULL;
struct s_range *lCurrentRange;
struct s_range *lPreviousRange;
unsigned int lMergeFlag;
unsigned int lValue;
unsigned int lN = 3;
unsigned int lDispFlag = 0;
if (nbargs > 1) {
lN = atoi(argv[1]);
}
if (nbargs > 2) {
lDispFlag = atoi(argv[2]);
}
while(fread(&lValue, sizeof(unsigned int), 1, stdin) > 0) {
if (lValue >= lN) {
lCurrentRange = lHeapTop;
lPreviousRange = NULL;
lMergeFlag = 0;
while(lCurrentRange != NULL) {
if (lCurrentRange->min - 1 == lValue) {
lCurrentRange->min = lValue;
lMergeFlag = 1;
break;
}
if (lCurrentRange->max + 1 == lValue) {
lCurrentRange->max = lValue;
lMergeFlag = 1;
break;
}
if (lValue < lCurrentRange->min) {
lCurrentRange = NULL;
break;
}
if (lValue >= lCurrentRange->min && lValue <= lCurrentRange->max) {
if (push_duplicates(lCurrentRange, lValue) != 0) {
drop_allranges(lHeapTop);
return 1;
}
break;
}
lPreviousRange = lCurrentRange;
lCurrentRange = lCurrentRange->next;
}
if (lCurrentRange != NULL) {
if (lMergeFlag == 1) {
if (lPreviousRange != NULL && lCurrentRange->min - 1 == lPreviousRange->max) {
lPreviousRange->max = lCurrentRange->max;
if (pushall_duplicates(lPreviousRange, lCurrentRange) != 0) {
drop_allranges(lHeapTop);
return 1;
}
lPreviousRange->next = lCurrentRange->next;
drop_range(lCurrentRange);
} else {
if (lCurrentRange->next != NULL && lCurrentRange->max + 1 == lCurrentRange->next->min) {
lPreviousRange = lCurrentRange->next;
lCurrentRange->max = lPreviousRange->max;
if (pushall_duplicates(lCurrentRange, lPreviousRange) != 0) {
drop_allranges(lHeapTop);
return 1;
}
lCurrentRange->next = lPreviousRange->next;
drop_range(lPreviousRange);
}
}
}
} else {
if (lPreviousRange == NULL) {
lCurrentRange = new_range(lValue, lValue, lHeapTop);
if (lCurrentRange == NULL) {
drop_allranges(lHeapTop);
return 1;
}
lHeapTop = lCurrentRange;
} else {
lCurrentRange = new_range(lValue, lValue, lPreviousRange->next);
if (lCurrentRange == NULL) {
drop_allranges(lHeapTop);
return 1;
}
lPreviousRange->next = lCurrentRange;
}
}
}
}
// Check the results
if (lDispFlag == 1) {
lCurrentRange = lHeapTop;
while(lCurrentRange != NULL) {
printf("From %u to %u dup:", lCurrentRange->min, lCurrentRange->max);
for (unsigned int lInd = 0; lInd < lCurrentRange->duplicates_count; lInd++) {
printf(" %u", lCurrentRange->duplicates[lInd]);
}
printf("\n");
lCurrentRange = lCurrentRange->next;
}
}
// Cleaning
drop_allranges(lHeapTop);
return 0;
}
With a discrete uniform distribution set of 65 536 words, on a x64 Debian Buster (4670K CPU), this code (range executable) is three times faster than a classic min heap one (node executable):
bash:~$ awk 'BEGIN { for (idx=0;idx<65536;idx++) { v=rand()*256; v2=rand()*256; printf("%c%c%c%c",v,v2,0,0); }}' > data.bin
bash:~$ time cat data.bin | ./range 3
real 0m5.629s
user 0m5.516s
sys 0m0.031s
bash:~$ time cat data.bin | ./node 3
real 0m15.618s
user 0m15.328s
sys 0m0.016s

Cycle through an array in either direction based on a bool

I'm looking for a method of looping through some array in either direction based on some passed bool value, with the same functionality as:
void Transfer(bool *_payload, int _size, bool _isLSB)
{
if (_isLSB)
{
for (int i = _size - 1; i >= 0; i--)
{
digitalWrite(dataPin, _payload[i]);
}
}
else
{
for (int i = 0; i < _size; i++)
{
digitalWrite(dataPin, _payload[i]);
}
}
}
or
void Transfer(bool *_payload, int _size, bool _isLSB)
{
int _index = 0;
if (_isLSB) _index = _size - 1;
for (;;)
{
printf("%d",_payload[_index]);
if (_isLSB) _index--;
else _index++;
if (_isLSB && _index < 0) break;
if (!_isLSB && _index >= _size) break;
}
}
Other than creating a method that reverses the array, is there a nice simplification of this?
You can define the starting and ending point and the increment conditionally:
void Transfer(bool *_payload, int _size, bool _isLSB)
{
int increment = _isLSB ? -1 : 1;
int i = _isLSB ? _size : -1; // one before the area to scan
int end = _isLSB ? -1 : _size; // one past the area
while ((i += increment) != end) // incr/decr before testing
{
digitalWrite(dataPin, _payload[i]);
}
}
We do not know in advance which way the index will be changing (incrementing or decrementing), so we can't use less-than or greater-than in the loop condition. And after processing the last item the index will be modified once more, hence the stopping point is one past the area being processed.
Similarly we need the starting point one position before the scanned area, so that after incrementing (or decrementing) the index we process the valid, first item.
You can calculate the direction and the start/end position for the for loop depending on _isLSB
void Transfer(bool* _payload, int _size, bool _isLSB) {
int dir;
int start;
int end;
if(_isLSB) {
dir = -1;
start = _size-1;
end = -1;
}else {
dir = 1;
start = 0;
end = _size;
}
for(int i = start; i != end; i+=dir) {
digitalWrite(dataPin, _payload[i]);
}
}
What you could do for example, since in C true and false are expanded to integer values in reality, is to use said integer value for calculations.
In the following example I will extract the main problem from your question which is: Looping over a size in a direction depending on a boolean value
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
int main()
{
int size = 5;
bool condition = false;
// Option #1
printf("Option #1\n");
for (int i = (size - 1) * condition; (i >= 0 && condition) || (i < size && !condition); i += 1 * pow(-1, condition))
{
printf("%d", i);
}
// Option #2
printf("\nOption #2\n");
int i = (size - 1) * condition;
for (;;)
{
printf("%d", i);
i += 1 * pow(-1, condition);
if ((i < 0 && condition) || (i >= size && !condition))
break;
}
return 0;
}
With the main function giving the following output for condition = true
Option #1
01234
Option #2
01234
And the following output for condition = false
Option #1
43210
Option #2
43210

pass bool variable by reference in function argument

I am trying to include a boolean flag in the below function argument to see whether a value already exists in the current tree structure of values.
VLTreeNode *addNewNode(AVLTreeNode *currentNode, int k, int v, bool *ifExist)
{
if (currentNode == NULL)
{
return newAVLTreeNode(k, v);
}
if (k == currentNode->key && v == currentNode->value)
{
*ifExist = true;
return currentNode;
}
else if ((k == currentNode->key && v < currentNode->value) || k < currentNode->key)
{
currentNode->left = addNewNode(currentNode->left, k, v, &ifExist);
currentNode->left->parent = currentNode;
}
else if ((k == currentNode->key && v > currentNode->value) || k > currentNode->key)
{
currentNode->right = addNewNode(currentNode->right, k, v, &ifExist);
currentNode->right->parent = currentNode;
}
}
The call of this function would be like the following:
bool ifExist = false;
Tree->root = addNewNode(Tree->root, k, v, &ifExist);
if (ifExist)
{
T->size++;
}
and it does not work... could someone give me some hints what goes wrong in the code. Much appreciated.
In addNewNode, in your last two else if blocks, you're passing in &ifExist. But ifExist is a bool* already, should just pass in ifExist.
I'm not sure, but It's possible that the return statements only in the first if blocks is ok so long as those cover all possible base cases of your recursive function. [I was not thinking straight, the return statements are definitely necessary in all branches - I meant that setting ifExist to true is only required in the base cases...]
But yeah, the passing ifExist by reference (effectively now bool **) inside addNewNode is definitely not right.
EDIT: To clarify, in your outermost call to addNewNode where you have...
bool ifExist = false;
Tree->root = addNewNode(Tree->root, k, v, &ifExist);
...you do need to pass in by reference.
But inside addNewNode, your last two if else blocks should be
else if ((k == currentNode->key && v < currentNode->value) || k < currentNode->key)
{
currentNode->left = addNewNode(currentNode->left, k, v, ifExist);
currentNode->left->parent = currentNode;
}
else if ((k == currentNode->key && v > currentNode->value) || k > currentNode->key)
{
currentNode->right = addNewNode(currentNode->right, k, v, ifExist);
currentNode->right->parent = currentNode;
}
I would advise doing instead:
bool exists = false;
bool * ifExist = &exists;
Tree->root = addNewNode(Tree->root, k, v, ifExist);
That way, the way addNewNode arguments look is consistent everywhere... which will prevent future arcidents due to copy paste.

Need to decrement a counter in a recursive tree function, but only when I am moving "upwards" in the tree

I am writing a recursive function to find if there is a path from the root to a leaf that sums up to a certain number or not (user inputs the sum). Each time I move forward into a new recursive call, I increment the value of current_sum with the value of node->data. Current_sum is declared/initialized outside of the function. So this works fine to get the sum to the left-ermost leaf. However after that, the current_sum just keeps increasing, as I don't have an appropriate decrement operation to go with it. So if there does exist a path that adds up to a certain number in the righter branches, for example: 1 2 # # 3 # #, and I check for path sum = 4, (1+3), it would not get that. (If i check for sum=3 (1+2), it does get it.)
So I am looking for the correct place in my code to put the decrement operation. I was thinking something like: current_sum -= root->data. However I've tried putting it a lot of different places, but all of them seem to be wrong places. Either they disrupt the original tracker to get to even the very first leftermost leaf. Or they don't decrement at all (if I put it after the both the left/right recursive calls). I also do need it to keep decrementing while it goes UP but increment while it goes DOWN. Is there a way to write this in code, I am curious? Or, is this just a bad algorithm/approach?
I've seen other ways of solving this problem, such as https://www.geeksforgeeks.org/root-to-leaf-path-sum-equal-to-a-given-number/, which seem really nice, I just wanted to know if there was a way to resolve the one I started.
int current_sum = 0;
int sumPath(Node * root, int sum)
{
if (root == NULL)
{
return 0;
}
current_sum += root->data;
if ((root->left == NULL) && (root->right == NULL))
{
if (current_sum == sum)
{
return 1;
}
else
{
return 0;
}
}
int the_left = sumPath(root->left, sum);
int the_right = sumPath(root->right, sum);
////////////////////current_sum -= root->data; (?)
if (the_left>0)
{
return the_left;
}
else if (the_right>0)
{
return the_right;
}
return 0;
}
You may get invalid output, because of not sending current_sum as a parameter. Because current_sum needs to be updated for a particular stack-trace or function call, not for commonly for all the function calls. and this may give you an invalid state.
UPDATE
int isPossible(Node * root, int currentSum, int sum) {
if(!root) return 0;
currentSum += root.node;
// when you find the sum, and can't move further down
if(sum == currentSum && root->left == null && root->right == null) return 1;
int flag = 0;
// going down on left side
flag = isPossible(root->left, currentSum, sum);
// needs to check right side, only when you couldn't find sum on left
if(!flag)
flag = isPossible(root->right, currentSum, sum);
// return the state
return flag;
}
your code is fine, u just need to pass sum - current_sum in the recursive call. This is your code with some hinted modifications.
#include <stdio.h>
// remove global current_sum
struct Node {
char* name;
int data;
struct Node* left;
struct Node* right;
};
int sumPath(struct Node* root, int sum) {
if (root == NULL) {
return 0;
}
if ((root->left == NULL) && (root->right == NULL)) {
if (current_sum == sum) {
printf("%s ", root->name); // if the branch matches, print name
return 1;
} else {
return 0;
}
}
int the_left = sumPath(root->left, sum - root->data); // pass the subtracted sum
int the_right = sumPath(root->right, sum - root->data); // pass the subtracted sum
if (the_left > 0) {
printf("%s ", root->name); // if the branch matches, print name
return the_left;
} else if (the_right > 0) {
printf("%s ", root->name); // if the branch matches, print name
return the_right;
}
return 0;
}
int main() {
struct Node n1 = {.data = 1, .name = "n1"}; // n1
struct Node n2 = {.data = 1, .name = "n2"}; // ___|___
struct Node n3 = {.data = 1, .name = "n3"}; // | |
struct Node n4 = {.data = 1, .name = "n4"}; // n2 n4
// ___|
n1.left = &n2; // |
n1.right = &n4; // n3
n2.left = &n3; //
sumPath(&n1, 3); // no. of steps including the root
return 0;
}
// output
// n3 n2 n1

Counting the sign changes in a list in C

I have to get the number of sign changes in a list of doubles. For example if there is a list like that: "1, -1, -1, 1" there are 2 sign changes, between the adjacent elements. I tried it like this but for some reason the program crashes if I try to compile it:
int number_of_sign_changes(DoubleList* list) {
int changes = 0;
for (DoubleNode *n = list->first; n != NULL; n = n->next) {
if ((n->value >= 0) && (n->next->value < 0)) {
changes += 1;
} else if ((n->value < 0) && (n->next->value >= 0)) {
changes += 1;
}
}
return changes;
}
The loop definitely works. I tried it in other functions but here it won't work. Does anyone have an idea? Btw you could actually put the 2 if statements into 1 and I tried that aswell but same problem.
This is in essence a fence post problem. The number of possible sign changes is one less than the number of elements in the list, so as you are checking on each item, you can tell you are doing something wrong. The problem in fact occurs when checking the last item on the list - it attempts to check for a sign change to the 'next' item, and there isn't one, as n->next is NULL.
This can be fixed by a simple change to the terminating condition in the for loop as follows:
int number_of_sign_changes(DoubleList* list) {
int changes = 0;
for (DoubleNode *n = list->first; n != NULL && n->next != NULL; n = n->next) {
if ((n->value >= 0) && (n->next->value < 0)) {
changes += 1;
} else if ((n->value < 0) && (n->next->value >= 0)) {
changes += 1;
}
}
return changes;
}

Resources