I have a requirement where I have to create a new by processing the given dynamic array. I have no idea what my new array size would be. but the max size is 2x size of old array.
Background:
Inside the function, it should iterate over each element in the array and based on the number it will add/delete either one or two elements into the new array.
Implementation:
How I implemented the above requirement was, I create a linked list and add one or two new nodes according the check condition.
pseudocode:
node_t *createNode() {
node_t *temp;
temp = malloc(sizeof(node_t));
if (temp == NULL) {
fputs("Error: Failed to allocate memory for temp.\n", stderr);
exit(1);
}
temp->next = NULL;
return temp;
}
/*
* Function: addNode
* -----------------
* add node to a existing linked list at end
*
* head: original linked list to add additional node at end
* newVal: value of the additional node
*
* return: new linked list that have an additional node
*/
node_t *addNode(node_t *head, double newVal) {
node_t *temp;
node_t *p;
// create additional node
temp = createNode();
temp->val = newVal;
if (head == NULL) {
head = temp;
} else {
p = head;
while (p->next != NULL) {
p = p->next;
}
p->next = temp;
}
return head;
}
/*
* Function: lastNodeDeletion
* --------------------------
* delete last node of the linked list
*
* head: linked list
*/
void lastNodeDeletion(node_t *head) {
node_t *toDelLast;
node_t *preNode;
if (head == NULL) {
printf("There is no element in the list.");
} else {
toDelLast = head;
preNode = head;
/* Traverse to the last node of the list */
while (toDelLast->next != NULL) {
preNode = toDelLast;
toDelLast = toDelLast->next;
}
if (toDelLast == head) {
/* If there is only one item in the list, remove it */
head = NULL;
} else {
/* Disconnects the link of second last node with last node */
preNode->next = NULL;
}
/* Delete the last node */
free(toDelLast);
}
}
/*
* Function: calculateLower
* ------------------------
* find the data set of lower tube curve
*
* reference: reference data curve
* tubeSize: data array specifying tube size that includes:
* tubeSize[0], x -- half width of rectangle
* tubeSize[1], y -- half height of rectangle
* tubeSize[2], baseX -- base of relative value is x direction
* tubeSize[3], baseY -- base of relative value is y direction
* tubeSize[4], ratio -- ratio y / x
*
* return : data set defining lower curve of the tube
*/
struct data calculateLower(struct data reference, double *tubeSize) {
int i;
struct data ref_norm;
/*
struct data contains two pointers double *x, double *y, int n;
*/
struct data lower;
node_t *lx = NULL;
node_t *ly = NULL;
// ===== 1. add corner points of the rectangle =====
double m0, m1; // slopes before and after point i of reference curve
double s0, s1; // sign of slopes of reference curve: 1 - increasing, 0 - constant, -1 - decreasing
double mx, my;
int b;
double xLen;
double yLen;
// Normalize data.
mx = fabs(mean(reference.x, reference.n));
my = fabs(mean(reference.y, reference.n));
ref_norm = normalizeData(reference, mx, my);
if equ(mx, 0.0) {
xLen = tubeSize[0];
} else {
xLen = tubeSize[0] / mx;
}
if equ(my, 0.0) {
yLen = tubeSize[1];
} else {
yLen = tubeSize[1] / my;
}
// ----- 1.1 Start: rectangle with center (x,y) = (reference.x[0], reference.y[0]) -----
// ignore identical point at the beginning
b = 0;
while ((b+1 < ref_norm.n) && equ(ref_norm.x[b], ref_norm.x[b+1]) && (equ(ref_norm.y[b], ref_norm.y[b+1])))
b = b+1;
// add down left point
lx = addNode(lx, (ref_norm.x[b] - xLen));
ly = addNode(ly, (ref_norm.y[b] - yLen));
if (b+1 < ref_norm.n) {
// slopes of reference curve (initialization)
s0 = sign(ref_norm.y[b+1] - ref_norm.y[b]);
if (!equ(ref_norm.x[b+1], ref_norm.x[b])) {
m0 = (ref_norm.y[b+1] - ref_norm.y[b]) / (ref_norm.x[b+1] - ref_norm.x[b]);
} else {
m0 = (s0 > 0) ? 1e+15 : -1e+15;
}
if equ(s0, 1) {
// add down right point
lx = addNode(lx, (ref_norm.x[b] + xLen));
ly = addNode(ly, (ref_norm.y[b] - yLen));
}
// ----- 1.2 Iteration: rectangle with center (x,y) = (reference.x[i], reference.y[i]) -----
for (i = b+1; i < ref_norm.n-1; i++) {
// ignore identical points
if (equ(ref_norm.x[i], ref_norm.x[i+1]) && equ(ref_norm.y[i], ref_norm.y[i+1]))
continue;
// slopes of reference curve
s1 = sign(ref_norm.y[i+1] - ref_norm.y[i]);
if (!equ(ref_norm.x[i+1], ref_norm.x[i])) {
m1 = (ref_norm.y[i+1] - ref_norm.y[i]) / (ref_norm.x[i+1] - ref_norm.x[i]);
} else {
m1 = (s1 > 0) ? (1e+15) : (-1e+15);
}
// add no point for equal slopes of reference curve
if (!equ(m0, m1)) {
if (!equ(s0, -1) && !equ(s1, -1)) {
// add down right point
lx = addNode(lx, (ref_norm.x[i] + xLen));
ly = addNode(ly, (ref_norm.y[i] - yLen));
} else if (!equ(s0, 1) && !equ(s1, 1)) {
// add down left point
lx = addNode(lx, (ref_norm.x[i] - xLen));
ly = addNode(ly, (ref_norm.y[i] - yLen));
} else if (equ(s0, -1) && equ(s1, 1)) {
// add down left point
lx = addNode(lx, (ref_norm.x[i] - xLen));
ly = addNode(ly, (ref_norm.y[i] - yLen));
// add down right point
lx = addNode(lx, (ref_norm.x[i] + xLen));
ly = addNode(ly, (ref_norm.y[i] - yLen));
} else if (equ(s0, 1) && equ(s1, -1)) {
// add down right point
lx = addNode(lx, (ref_norm.x[i] + xLen));
ly = addNode(ly, (ref_norm.y[i] - yLen));
// add down left point
lx = addNode(lx, (ref_norm.x[i] - xLen));
ly = addNode(ly, (ref_norm.y[i] - yLen));
}
int len = listLen(ly);
double lastY = getNth(ly, len-1);
// remove the last added points in case of zero slope of tube curve
if equ((ref_norm.y[i+1] - yLen), lastY) {
if (equ(s0 * s1, -1) && equ(getNth(ly, len-3), lastY)) {
// remove two points, if two points were added at last
// ((len-1) - 2 >= 0, because start point + two added points)
lastNodeDeletion(lx);
lastNodeDeletion(ly);
lastNodeDeletion(lx);
lastNodeDeletion(ly);
} else if (!equ(s0 * s1, -1) && equ(getNth(ly, len-2), lastY)) {
// remove one point, if one point was added at last
// ((len-1) - 1 >= 0, because start point + one added point)
lastNodeDeletion(lx);
lastNodeDeletion(ly);
}
}
}
s0 = s1;
m0 = m1;
}
// ----- 1.3. End: Rectangle with center (x,y) = (reference.x[reference.n - 1], reference.y[reference.n - 1]) -----
if equ(s0, -1) {
// add down left point
lx = addNode(lx, (ref_norm.x[ref_norm.n-1] - xLen));
ly = addNode(ly, (ref_norm.y[ref_norm.n-1] - yLen));
}
}
// add down right point
lx = addNode(lx, (ref_norm.x[ref_norm.n-1] + xLen));
ly = addNode(ly, (ref_norm.y[ref_norm.n-1] - yLen));
// ===== 2. Remove points and add intersection points in case of backward order =====
int lisLen = listLen(ly);
double *tempLX = malloc(lisLen * sizeof(double));
if (tempLX == NULL) {
fputs("Error: Failed to allocate memory for tempLX.\n", stderr);
exit(1);
}
double *tempLY = malloc(lisLen * sizeof(double));
if (tempLY == NULL) {
fputs("Error: Failed to allocate memory for tempLY.\n", stderr);
exit(1);
}
tempLX = getListValues(lx);
tempLY = getListValues(ly);
lower = removeLoop(tempLX, tempLY, lisLen, -1);
return denormalizeData(lower, mx, my);
}
It worked well for small examples. But when an array with 0.5 million values is passed as input to this function, the performance decreased drastically.
Perfomance decrease when the pointers belongs to an array of 0.5 million data points.
is there any alternative solution for the above problem or is there any other modification that needs to done to improve performance?
Thanks
I see nothing particularly inefficient about your linked-list code, but there are several reasons why a linked list approach in general might not be ideal performance-wise. Chief among those are
Dynamic memory allocation is fairly expensive on a per-call basis. With a linked list, the number of allocations performed by your code scales linearly with the problem size.
A linked list involves maintaining metadata, which takes up more memory and therefor reduces locality for accessing the data.
Per-node (or per-node-group) allocation is likely to inherently reduce data locality, as different allocated blocks are not certain to be adjacent to each other.
If you have a good, not-too-outrageous upper bound on the amount of memory you will need -- as it sounds like you do -- then I would strongly consider allocating the maximum you will need as a single chunk (just one malloc() call instead of many), using it as an array, and, optionally, shrinking the allocation with realloc() once you know how much you really needed.
Saving hundreds or thousands of malloc() calls is a sure win, though it's unclear how much of one. Likewise, having direct access to the data, with improved locality of access, is a sure win (of similarly uncertain magnitude). Also, even if you don't shrink the allocation at the end, you may well find out that you use less memory that way.
HOWEVER, as with any performance problem, you should test with a profiler to determine what parts of the program are taking the most time. Compilers are pretty good at producing fast code, and humans are pretty bad at identifying bottlenecks by code inspection. Do not risk spending hours or days making improvements to areas that don't really matter performance-wise. Find out first what parts you need to improve, then work on those.
Update:
On second look, I do see something dreadfully inefficient about your linked-list code: each time you add a node, you traverse the whole list so as to add it at the end. This makes building the list scale as O(n2). My comments about linked list usage in general notwithstanding, you could eliminate all that by either
adding new elements at the head instead of at the tail, or
maintaining a pointer to the tail of each list, so as to be able to add to the end without traversing the list.
Similar applies to function lastNodeDeletion(). Traversing the whole list to find the last node gets costly as the the list gets long. It's not clear to me how many times your program ends up doing that, but if that turns out to be slowing you down (see previous comments about profiling), then you could consider not only tracking the list tail, but additionally making it a doubly-linked list, so as to be able to delete from the end without traversing the list. (But note that "deleting" from the and of an array just means updating your count of how many elements are in use, which takes constant time for deleting a tail of any length.) Alternatively, if by adding at the head of the list you can also change your requirement to deleting from the head, then that also would eliminate the need to traverse the whole list to perform deletions.
It worked well for small examples. But when an array with 0.5 million values is passed as input to this function, the performance decreased drastically.
That makes sense, memory allocation is an expensive process, it certainly affects the performance of a program, that becomes apparent as you use it more often, i.e. in an extensive loop.
Is there any alternative solution for the above problem or is there any other modification that needs to done to improve performance?
If you want to use heap memory there is no faster way, you must use memory allocation, I don't see how you would significantly improve the performance while still using memory allocation, though you could somewhat improve on it by allocating larger blocks at a time thus avoiding allocating in every iteration, or even allocating all the needed memory in one go if you are aware of the total needed size.
Using stack memory is definitely faster and would be a good alternative, but in your case, since you are working with large amounts of data, that may not be possible, the stack size is very limited.
This said, the best way to assert the inefficiencies of your program is to test it yourself, surely memory management is not the only thing you can optimize. You can also use tools that allow you to profile the execution time of the program, for instance, in Linux systems, Callgrind.
Extending on #John's answer: when you look at the mallocs (aka addNode())
calculateLower() {
addNode();
addNode();
for (b+1 ... ref_norm.n-1) {
addNode();
addNode();
}
addNode();
addNode();
}
There are two independent lists, each having at most two allocations at the beginning and the end, and at most two allocations per iteration. This makes at most 2 + (ref_norm.n - 1 - (b + 1)) + 2 allocations, per list.
Switching from a linked list to an array (plus an index/pointer to the current end) has two advantages
it reduces the number of allocations
it reduces the complexity of addNode() and lastNodeDeletion() from O(n) to O(1)
Here is my code for gomoku AI. So now my AI is currently run over 5 seconds but the time limit is 5 seconds. I am trying to improve the performance so I try move ordering but it seems not works. I calculate the score first in getChildStates(int player) function and then sort the vector into a descending order. But it just not work. Can some body help me?
Also, my depth is two. transpotation table seems not help, so I haven't try it.
int minimax(int depth, GameState state, bool maximizingPlayer, int alpha, int beta)
{
if (depth == 2)
return state.score;
if (maximizingPlayer)
{
vector<GameState> children = state.getChildStates(1);
sort(children.begin(), children.end(), greaterA());
int best = MIN;
for (auto& value : children) {
int val = minimax(depth + 1, value,
false, alpha, beta);
int oldBest = best;
best = max(best, val);
alpha = max(alpha, best);
if (depth == 0 && oldBest != best){
bestMoveX = value.lastMove.x;
bestMoveY = value.lastMove.y;
}
// Alpha Beta Pruning
if (beta <= alpha)
break;
}
return best;
}
else
{
vector<GameState> children = state.getChildStates(2);
sort(children.begin(), children.end(),greaterA());
int best = MAX;
// Recur for left and right children
for (auto& value : children) {
int val = minimax(depth + 1, value,
true, alpha, beta);
best = min(best, val);
beta = min(beta, best);
// Alpha Beta Pruning
if (beta <= alpha)
break;
}
return best;
}
}
I won't recommend sorting the game states to prioritize the states thereby enabling force move as per set timeout. Even with alpha-beta pruning, the minimax tree may be just too big. For reference you can have a look in the GNU Chess at github. Here are some options to reduce the best move search time:
1) Reduce depth of search.
2) Weed out redundant moves from the possible moves.
3) Use multi-threading in the first ply to gain speed
4) Allow quiescence search mode, so that minimax tree branches could continue generating in the background when the human opponent is still thinking.
5) Instead of generating the minimax tree for every move, you can think about reusable minimax tree where you only pruned moves already made and only continue generating one ply every iteration ( instead of the whole tree, see this article ).
The code I wrote below is objection detection using size-invariant template matching. This technique was detailed in the following site: https://www.google.com/url?hl=en&q=http://www.pyimagesearch.com/2015/01/26/multi-scale-template-matching-using-python-opencv/&source=gmail&ust=1489552541603000&usg=AFQjCNHSfgL_RTy-o5SMyCmWELFbsfOOTw
The function works fine. However, when I am running too many iterations in the while loop, OpenCV Error: Insufficient Memory will occur.
I can't seem to figure out why I'm running into this error as I'm releasing the the matrix data that I'm creating in cvCreateMat in every iteration. The memory error occurs after running the loop several times and in the function cvMatchTemplate. Am I missing another source of error? I am writing this code in C on the LCDK.
Function:
double minVal = 0.0;
double maxVal = 0.0 ;
CvPoint minLoc;
CvPoint maxLoc;
double ratio = 1.0;
CvMat* mask2 = NULL;
//for containing the maximum values
CvMat* resized_source;
CvMat* result; //result stored for every shapes
while (1){
// All templates are sized around the same
if(width_curr <= template->cols || height_curr <= template->rows)
break;
resized_source = cvCreateMat(height_curr,width_curr,CV_8UC1);
cvResize(source_close_edge_dist_8,resized_source,CV_INTER_LINEAR);
result = cvCreateMat((resized_source->rows)-(template->rows)+1, (resized_source->cols)-(template->cols)+1, CV_32FC1);
cvMatchTemplate(resized_source, template, result, CV_TM_CCOEFF);
//Detecting several objects
cvMinMaxLoc(result, &minVal, &maxVal, &minLoc, &maxLoc, mask2);
*(max_all+*size) = maxVal;
(max_all_point+*size)->x = maxLoc.x;
(max_all_point+*size)->y = maxLoc.y;
*(max_all_ratio+*size) = sqrt(ratio);
*size = *size + 1;
// move on to next resizing
ratio -= 0.04;
width_curr = sqrt(ratio)*width;
height_curr = sqrt(ratio)*height;
minVal = 0.0;
maxVal =0.0 ;
cvReleaseData(resized_source);
cvReleaseMat(&resized_source);
cvReleaseData(result);
cvReleaseMat(&result);
}
I work in Code Composer Studio Version: 6.0.1.00040
with the card LCDK C6748, but I think this is a more general question, relating to CCS generally.
I'm trying to implement LMS for cancelling acoustic echoes,
this is the skeleton of my .c file:
void waitForInterrupt()
{
while (flag==0) {}
flag=0; // reach this line only when flag == 1
}
interrupt void interrupt4(void)
{
// Inputs
inputRight_micSignal = (float)input_right_sample();
// Outputs
outputLeft_referenceSignal= whiteNoiseSample;
codec_data.channel[RIGHT]= (uint16_t)outputRight_cleanedSound;
codec_data.channel[LEFT]= (uint16_t)outputLeft_referenceSignal;
output_sample(codec_data.uint);
flag = 1;
}
void main()
{
// variables decelerations
int i;
float filter_output;
// initialising filter coefficients
for (i=0 ; i<ADAPTIVE_FILT_SIZE ; i++) // initialise weights and delay line
{
w[i] = 0.0;
x[i] = 0.0;
}
// initialising the interrupt routine
L138_initialise_intr(FS_8000_HZ,ADC_GAIN_0DB,DAC_ATTEN_0DB,LCDK_MIC_INPUT);
while(1) // adaptive filtering routine
{
waitForInterrupt();
whiteNoiseSample = getPrnFiltered();
for (i = ADAPTIVE_FILT_SIZE-1; i > 0; i--) // update delay line - TDL:merge later with w loop (still make sure no magic NaN's appear)
{
x[i] = x[i-1];
}
x[0] = outputLeft_referenceSignal; // input to adaptive filter
filter_output = 0; //reseting filter output
// compute adaptive filter output
for (i = 0; i < ADAPTIVE_FILT_SIZE; i++)
filter_output += (w[i]*x[i]);
outputRight_cleanedSound = inputRight_micSignal - filter_output; // compute error
for (i = ADAPTIVE_FILT_SIZE-1; i >= 0; i--) // update weights and delay line
{
w[i] = w[i] + beta*outputRight_cleanedSound*x[i]; // w[i]+=beta*"error"*"reference"
}
from some reason when I put the arrays x[] and w[] in the "watch table"
and I suspend the running of the program (in order to examine w[] coefficients after awhile, I see that it is full of NaN's - while x[] contains "regular"
values.
when I put breakpoint inside the line where w[] is calculated:
w[i] = w[i] + beta*outputRight_cleanedSound*x[i]; // w[i]+=beta*"error"*"reference"
I see the flow goes there.
What could be the reason for the NaN's?
Is there a way to watch w[] in the "wach table"?
These three steps work for me:
1) First you need to make sure the variables are globally available (e.g. that they are not allocated on the stack).
2) You need to halt the processor before trying to read the variables. (In Debug view: Tools -> Debugger Options -> Auto Run and Launch Options).
3) Enable "halt the target before any debugger access" on the watch window and click the "auto-update" icon in the "Variables"-window.
I've uploaded a screenshot with red boxes around the stuff you need to touch.
See if that helps you :) Otherwise check out TI's Engineer2Engineer forum (E2E). In my experience the TI guys are quick to answer and I've gotten very competent help from them.
Tell me how it works for you :) ?
FWIW I'm using Code Composer Studio v.5.5.0.00077.
I've tried searching for anything similar about my issue on several websites, including this one, but none I've found so far are similar. After searching about the term 1.#QO, I found something about quiet NaN, but I'm new to C in general, so I don't really understand the issue .
I'm trying to take the x and y values of a joystick, and when then use a formula for distance to find the distance between the joystick's position, and the joystick's natural resting position (0,0).
If it matters, I'm trying to do this in RobotC.
#pragma config(Hubs, S1, HTMotor, none, none, none)
#pragma config(Sensor, S1, , sensorI2CMuxController)
#pragma config(Motor, mtr_S1_C1_1, DriveMotor, tmotorTetrix, openLoop)
//*!!Code automatically generated by 'ROBOTC' configuration wizard !!*//
#include "JoystickDriver.c"
int calculateDrivePower(int joyStickX, int joyStickY)
{
if (joyStickX != 0 & joyStickY != 0)
{
int joyDistance = (sqrt(pow((joyStickX - 0),2)+ pow((-joyStickY - 0),2)));
joyDistance = ((joyDistance/127)*100);
return (joyDistance);
}
else
{
return 0;
}
}
task main()
{
int previousJoySlope = 0;
int TurnSpeed = 70;
while (true)
{
getJoystickSettings(joystick);
if (abs(joystick.joy1_x1) > 10 || abs(joystick.joy1_y1) > 10)
{
writeDebugStreamLine("Test successful.");
motor[DriveMotor] = calculateDrivePower(joystick.joy1_x1,joystick.joy1_y1);
}
}
}
If anyone could provide any insight, that'd be fantastic, thanks.
if (joyStickX != 0 & joyStickY != 0)
{
int joyDistance = (sqrt(pow((joyStickX - 0),2)+ pow((-joyStickY - 0),2)));
joyDistance = ((joyDistance/127)*100);
return (joyDistance);
}
The first issues that appears is the conditional within the if statement. The you have a single & that most likely should be &&:
if (joyStickX != 0 && joyStickY != 0)
(note: likely should be is used above because you can provide a conditional using a logical & of the tests joystickx != 0, but in this case it provides the same result. In that case, I would suggest the more readable && be used)
The next part of the code is simply the vector distance between 0,0 and the present position of the joystick. Of the general form dist^2 = (x2-x1)^2 + (y2-y1)^2 in your case x1,y1 = 0,0. Taking the square root of both sides provides dist = sqrt((x2-x1)^2 + (y2-y1)^2), or in C notation:
dist = (sqrt(pow((joyStickX - 0),2)+ pow((-joyStickY - 0),2)));
Next you have a scaling applied of dist = dist * 100/127 which provides the final distance returned. Hopefully this will help you understand what the code is doing.