CPLEX MIP current node LP relaxation - c

During a MIP optimization with CPLEX C APIs is it possible to retrieve the linear relaxation (dual variables, reduced costs, etc ...) of the current node (i.e. every n nodes)?
I've registered a callback function (CPXsetsolvecallbackfunc) in order to be notified each time a new node is available. In the callback I use CPXgetcallbackinfo to retrieve the node information and CPXgetcallbacknodelp to retrieve the linear relaxation but unfortunately the procedure CPXsolution returns that no solution exists and the MIP optimization exits.
Here is a sample code realized starting from an IBM example where environment and problem are assumed to be properly initialized.
struct noderange {
int startnode;
int endnode;
};
typedef struct noderange NODERANGE;
NODERANGE nodeswritten;
nodeswritten.startnode = -1;
nodeswritten.endnode = 2100000000;
status = CPXsetsolvecallbackfunc(environment, usersolve, &nodeswritten);
if(status) {goto TERMINATE;}
status = CPXmipopt(environment, problem);
if(status) {goto TERMINATE;}
where the usersolve procedure is
static int CPXPUBLIC usersolve(CPXCENVptr env, void *cbdata, int wherefrom, void *cbhandle, int *useraction_p) {
int status = 0;
int nodecount;
static int count = 0;
CPXLPptr nodelp;
NODERANGE *nodeswritten;
*useraction_p = CPX_CALLBACK_DEFAULT;
nodeswritten = (NODERANGE *)cbhandle;
/* Find out what node is being processed */
status = CPXgetcallbackinfo(env, cbdata, wherefrom, CPX_CALLBACK_INFO_NODE_COUNT, &nodecount);
if (status) goto TERMINATE;
if (nodecount >= nodeswritten->startnode && nodecount <= nodeswritten->endnode) {
/* Get pointer to LP subproblem, then write a SAV file. */
status = CPXgetcallbacknodelp(env, cbdata, wherefrom, &nodelp);
if (status) goto TERMINATE;
int rows = CPXgetnumcols(env, nodelp);
int cols = CPXgetnumrows(env, nodelp);
int lpstat;
double objval;
double* x = (double*)malloc(sizeof(double) * CPXgetnumcols(env, nodelp));
double* dj = (double*)malloc(sizeof(double) * CPXgetnumcols(env, nodelp));
double* pi = (double*)malloc(sizeof(double) * CPXgetnumrows(env, nodelp));
double* slack = (double*)malloc(sizeof(double) * CPXgetnumrows(env, nodelp));
status = CPXsolution(env, nodelp, &lpstat, &objval, x, pi, slack, dj);
printf("Solutionstatus = %d\n", lpstat);
if (status) { goto TERMINATE; } // <--- HERE it returns no solution exists
free(x);
free(dj);
free(pi);
free(slack);
printf("[%d]\trows = %d cols = %d\t sol stat = %d\t z = %f\n", nodecount, rows, cols, lpstat, objval);
if (nodecount == nodeswritten->endnode) status = 1;
count++;
}
TERMINATE:
return (status);
}

I found out the problem, the callback is called before the subproblem has been solved so CPXsolution returns that no solution exists.

Related

Getting volume value from pulseaudio

I've written this code by looking at various examples: Python pulseaudio monitor, Pavumeter source, async playback example, and Pacat source.
I have successfully connected to a sink and am able to record it, but my problem is, I'm stuck at getting the volume value out. If I try printing value from the read function, I just get a bunch of random numbers at a second's interval.
Now I'm not asking for someone to finish writing the code for me, I'd just like some tips, help so that I could head towards the right direction. How do I retrieve the volume value?
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <pulse/pulseaudio.h>
static int latency = 20000; // start latency in micro seconds
static int sampleoffs = 0;
static short sampledata[300000];
static pa_buffer_attr bufattr;
static int underflows = 0;
static pa_sample_spec ss;
// This callback gets called when our context changes state. We really only
// care about when it's ready or if it has failed
void pa_state_cb(pa_context *c, void *userdata) {
pa_context_state_t state;
int *pa_ready = userdata;
state = pa_context_get_state(c);
switch (state) {
// These are just here for reference
case PA_CONTEXT_UNCONNECTED:
case PA_CONTEXT_CONNECTING:
case PA_CONTEXT_AUTHORIZING:
case PA_CONTEXT_SETTING_NAME:
default:
break;
case PA_CONTEXT_FAILED:
case PA_CONTEXT_TERMINATED:
*pa_ready = 2;
break;
case PA_CONTEXT_READY:
*pa_ready = 1;
break;
}
}
static void stream_read_cb(pa_stream *s, size_t length, void *userdata) {
const void *data;
pa_stream_peek(s, &data, &length);
data = (const unsigned char*) data;
printf("%u", data);
pa_stream_drop(s);
}
int main(int argc, char *argv[]) {
pa_mainloop *pa_ml;
pa_mainloop_api *pa_mlapi;
pa_context *pa_ctx;
pa_stream *recordstream;
int r;
int pa_ready = 0;
int retval = 0;
unsigned int a;
double amp;
int test = 0;
// Create a mainloop API and connection to the default server
pa_ml = pa_mainloop_new();
pa_mlapi = pa_mainloop_get_api(pa_ml);
pa_ctx = pa_context_new(pa_mlapi, "Simple PA test application");
pa_context_connect(pa_ctx, NULL, 0, NULL);
// This function defines a callback so the server will tell us it's state.
// Our callback will wait for the state to be ready. The callback will
// modify the variable to 1 so we know when we have a connection and it's
// ready.
// If there's an error, the callback will set pa_ready to 2
pa_context_set_state_callback(pa_ctx, pa_state_cb, &pa_ready);
// We can't do anything until PA is ready, so just iterate the mainloop
// and continue
while (pa_ready == 0) {
pa_mainloop_iterate(pa_ml, 1, NULL);
}
if (pa_ready == 2) {
retval = -1;
goto exit;
}
ss.rate = 44100;
ss.channels = 2;
ss.format = PA_SAMPLE_U8;
recordstream = pa_stream_new(pa_ctx, "Record", &ss, NULL);
if (!recordstream) {
printf("pa_stream_new failed\n");
}
pa_stream_set_read_callback(recordstream, stream_read_cb, NULL);
r = pa_stream_connect_record(recordstream, NULL, NULL, PA_STREAM_PEAK_DETECT);
if (r < 0) {
printf("pa_stream_connect_playback failed\n");
retval = -1;
goto exit;
}
// Run the mainloop until pa_mainloop_quit() is called
// (this example never calls it, so the mainloop runs forever).
// printf("%s", "Running Loop");
pa_mainloop_run(pa_ml, NULL);
exit:
// clean up and disconnect
pa_context_disconnect(pa_ctx);
pa_context_unref(pa_ctx);
pa_mainloop_free(pa_ml);
return retval;
}
Looking at the original question from UNIX.StackExchange, it looks like you're trying to create a VU meter. It can be done using an envelope detector. You have to read the input values and then average their rectified value. A simple envelope detector can be done as an exponential moving average filter.
float level = 0; // Init time
const float alpha = COEFFICIENT; // See below
...
// Inside sample loop
float input_signal = fabsf(get_current_sample());
level = level + alpha * (input_signal - level);
Here, alpha is the filter coefficient, which can be calculated as:
const float alpha = 1.0 - expf( (-2.0 * M_PI) / (TC * SAMPLE_RATE) );
Where TC is known as the "time constant" parameter, measured in seconds, which defines how fast you want to "follow" the signal. Setting it too short makes the VU meter very "bumpy" and setting it too long will miss transients in the signal. 10 mS is a good value to start from.

Linux DMA: Using the DMAengine for scatter-gather transactions

I try to use the DMAengine API from a custom kernel driver to perform a scatter-gather operation. I have a contiguous memory region as source and I want to copy its data in several distributed buffers through a scatterlist structure. The DMA controller is the PL330 one that supports the DMAengine API (see PL330 DMA controller).
My test code is the following:
In my driver header file (test_driver.h):
#ifndef __TEST_DRIVER_H__
#define __TEST_DRIVER_H__
#include <linux/platform_device.h>
#include <linux/device.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/of_dma.h>
#define SG_ENTRIES 3
#define BUF_SIZE 16
#define DEV_BUF 0x10000000
struct dma_block {
void * data;
int size;
};
struct dma_private_info {
struct sg_table sgt;
struct dma_block * blocks;
int nblocks;
int dma_started;
struct dma_chan * dma_chan;
struct dma_slave_config dma_config;
struct dma_async_tx_descriptor * dma_desc;
dma_cookie_t cookie;
};
struct test_platform_device {
struct platform_device * pdev;
struct dma_private_info dma_priv;
};
#define _get_devp(tdev) (&((tdev)->pdev->dev))
#define _get_dmapip(tdev) (&((tdev)->dma_priv))
int dma_stop(struct test_platform_device * tdev);
int dma_start(struct test_platform_device * tdev);
int dma_start_block(struct test_platform_device * tdev);
int dma_init(struct test_platform_device * tdev);
int dma_exit(struct test_platform_device * tdev);
#endif
In my source that contains the dma functions (dma_functions.c):
#include <linux/slab.h>
#include "test_driver.h"
#define BARE_RAM_BASE 0x10000000
#define BARE_RAM_SIZE 0x10000000
struct ram_bare {
uint32_t * __iomem map;
uint32_t base;
uint32_t size;
};
static void dma_sg_check(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
uint32_t * buf;
unsigned int bufsize;
int nwords;
int nbytes_word = sizeof(uint32_t);
int nblocks;
struct ram_bare ramb;
uint32_t * p;
int i;
int j;
ramb.map = ioremap(BARE_RAM_BASE,BARE_RAM_SIZE);
ramb.base = BARE_RAM_BASE;
ramb.size = BARE_RAM_SIZE;
dev_info(dev,"nblocks: %d \n",dma_priv->nblocks);
p = ramb.map;
nblocks = dma_priv->nblocks;
for( i = 0 ; i < nblocks ; i++ ) {
buf = (uint32_t *) dma_priv->blocks[i].data;
bufsize = dma_priv->blocks[i].size;
nwords = dma_priv->blocks[i].size/nbytes_word;
dev_info(dev,"block[%d],size %d: ",i,bufsize);
for ( j = 0 ; j < nwords; j++, p++) {
dev_info(dev,"DMA: 0x%x, RAM: 0x%x",buf[j],ioread32(p));
}
}
iounmap(ramb.map);
}
static int dma_sg_exit(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
int ret = 0;
int i;
for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
kfree(dma_priv->blocks[i].data);
}
kfree(dma_priv->blocks);
sg_free_table(&(dma_priv->sgt));
return ret;
}
int dma_stop(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
int ret = 0;
dma_unmap_sg(dev,dma_priv->sgt.sgl,\
dma_priv->sgt.nents, DMA_FROM_DEVICE);
dma_sg_exit(tdev);
dma_priv->dma_started = 0;
return ret;
}
static void dma_callback(void * param)
{
enum dma_status dma_stat;
struct test_platform_device * tdev = (struct test_platform_device *) param;
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
dev_info(dev,"Checking the DMA state....\n");
dma_stat = dma_async_is_tx_complete(dma_priv->dma_chan,\
dma_priv->cookie, NULL, NULL);
if(dma_stat == DMA_COMPLETE) {
dev_info(dev,"DMA complete! \n");
dma_sg_check(tdev);
dma_stop(tdev);
} else if (unlikely(dma_stat == DMA_ERROR)) {
dev_info(dev,"DMA error! \n");
dma_stop(tdev);
}
}
static void dma_busy_loop(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
enum dma_status status;
int status_change = -1;
do {
status = dma_async_is_tx_complete(dma_priv->dma_chan, dma_priv->cookie, NULL, NULL);
switch(status) {
case DMA_COMPLETE:
if(status_change != 0)
dev_info(dev,"DMA status: COMPLETE\n");
status_change = 0;
break;
case DMA_PAUSED:
if (status_change != 1)
dev_info(dev,"DMA status: PAUSED\n");
status_change = 1;
break;
case DMA_IN_PROGRESS:
if(status_change != 2)
dev_info(dev,"DMA status: IN PROGRESS\n");
status_change = 2;
break;
case DMA_ERROR:
if (status_change != 3)
dev_info(dev,"DMA status: ERROR\n");
status_change = 3;
break;
default:
dev_info(dev,"DMA status: UNKNOWN\n");
status_change = -1;
break;
}
} while(status != DMA_COMPLETE);
dev_info(dev,"DMA transaction completed! \n");
}
static int dma_sg_init(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct scatterlist *sg;
int ret = 0;
int i;
ret = sg_alloc_table(&(dma_priv->sgt), SG_ENTRIES, GFP_ATOMIC);
if(ret)
goto out_mem2;
dma_priv->nblocks = SG_ENTRIES;
dma_priv->blocks = (struct dma_block *) kmalloc(dma_priv->nblocks\
*sizeof(struct dma_block), GFP_ATOMIC);
if(dma_priv->blocks == NULL)
goto out_mem1;
for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
dma_priv->blocks[i].size = BUF_SIZE;
dma_priv->blocks[i].data = kmalloc(dma_priv->blocks[i].size, GFP_ATOMIC);
if(dma_priv->blocks[i].data == NULL)
goto out_mem3;
}
for_each_sg(dma_priv->sgt.sgl, sg, dma_priv->sgt.nents, i)
sg_set_buf(sg,dma_priv->blocks[i].data,dma_priv->blocks[i].size);
return ret;
out_mem3:
i--;
while(i >= 0)
kfree(dma_priv->blocks[i].data);
kfree(dma_priv->blocks);
out_mem2:
sg_free_table(&(dma_priv->sgt));
out_mem1:
ret = -ENOMEM;
return ret;
}
static int _dma_start(struct test_platform_device * tdev,int block)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
int ret = 0;
int sglen;
/* Step 1: Allocate and initialize the SG list */
dma_sg_init(tdev);
/* Step 2: Map the SG list */
sglen = dma_map_sg(dev,dma_priv->sgt.sgl,\
dma_priv->sgt.nents, DMA_FROM_DEVICE);
if(! sglen)
goto out2;
/* Step 3: Configure the DMA */
(dma_priv->dma_config).direction = DMA_DEV_TO_MEM;
(dma_priv->dma_config).src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
(dma_priv->dma_config).src_maxburst = 1;
(dma_priv->dma_config).src_addr = (dma_addr_t) DEV_BUF;
dmaengine_slave_config(dma_priv->dma_chan, \
&(dma_priv->dma_config));
/* Step 4: Prepare the SG descriptor */
dma_priv->dma_desc = dmaengine_prep_slave_sg(dma_priv->dma_chan, \
dma_priv->sgt.sgl, dma_priv->sgt.nents, DMA_DEV_TO_MEM, \
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (dma_priv->dma_desc == NULL) {
dev_err(dev,"DMA could not assign a descriptor! \n");
goto out1;
}
/* Step 5: Set the callback method */
(dma_priv->dma_desc)->callback = dma_callback;
(dma_priv->dma_desc)->callback_param = (void *) tdev;
/* Step 6: Put the DMA descriptor in the queue */
dma_priv->cookie = dmaengine_submit(dma_priv->dma_desc);
/* Step 7: Fires the DMA transaction */
dma_async_issue_pending(dma_priv->dma_chan);
dma_priv->dma_started = 1;
if(block)
dma_busy_loop(tdev);
return ret;
out1:
dma_stop(tdev);
out2:
ret = -1;
return ret;
}
int dma_start(struct test_platform_device * tdev) {
return _dma_start(tdev,0);
}
int dma_start_block(struct test_platform_device * tdev) {
return _dma_start(tdev,1);
}
int dma_init(struct test_platform_device * tdev)
{
int ret = 0;
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
dma_priv->dma_chan = dma_request_slave_channel(dev, \
"dma_chan0");
if (dma_priv->dma_chan == NULL) {
dev_err(dev,"DMA channel busy! \n");
ret = -1;
}
dma_priv->dma_started = 0;
return ret;
}
int dma_exit(struct test_platform_device * tdev)
{
int ret = 0;
struct dma_private_info * dma_priv = _get_dmapip(tdev);
if(dma_priv->dma_started) {
dmaengine_terminate_all(dma_priv->dma_chan);
dma_stop(tdev);
dma_priv->dma_started = 0;
}
if(dma_priv->dma_chan != NULL)
dma_release_channel(dma_priv->dma_chan);
return ret;
}
In my driver source file (test_driver.c):
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/version.h>
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/of_device.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/interrupt.h>
#include "test_driver.h"
static int dma_block=0;
module_param_named(dma_block, dma_block, int, 0444);
static struct test_platform_device tdev;
static struct of_device_id test_of_match[] = {
{ .compatible = "custom,test-driver-1.0", },
{}
};
static int test_probe(struct platform_device *op)
{
int ret = 0;
struct device * dev = &(op->dev);
const struct of_device_id *match = of_match_device(test_of_match, &op->dev);
if (!match)
return -EINVAL;
tdev.pdev = op;
dma_init(&tdev);
if(dma_block)
ret = dma_start_block(&tdev);
else
ret = dma_start(&tdev);
if(ret) {
dev_err(dev,"Error to start DMA transaction! \n");
} else {
dev_info(dev,"DMA OK! \n");
}
return ret;
}
static int test_remove(struct platform_device *op)
{
dma_exit(&tdev);
return 0;
}
static struct platform_driver test_platform_driver = {
.probe = test_probe,
.remove = test_remove,
.driver = {
.name = "test-driver",
.owner = THIS_MODULE,
.of_match_table = test_of_match,
},
};
static int test_init(void)
{
platform_driver_register(&test_platform_driver);
return 0;
}
static void test_exit(void)
{
platform_driver_unregister(&test_platform_driver);
}
module_init(test_init);
module_exit(test_exit);
MODULE_AUTHOR("klyone");
MODULE_DESCRIPTION("DMA SG test module");
MODULE_LICENSE("GPL");
However, the DMA never calls my callback function and I do not have any idea why it happens. Maybe, I am misunderstanding something...
Could anyone help me?
Thanks in advance.
Caveat: I don't have a definitive solution for you, but merely some observations and suggestions on how to debug this [based on many years of experience writing/debugging linux device drivers].
I presume you believe the callback is not being done because you don't get any printk messages. But, the callback is the only place that has them. But, is the printk level set high enough to see the messages? I'd add a dev_info to your module init, to prove it prints as expected.
Also, you [probably] won't get a callback if dma_start doesn't work as expected, so I'd add some dev_info calls there, too (e.g. before and after the call in step 7). I also notice that not all calls in dma_start check error returns [may be fine or void return, just mentioning in case you missed one]
At this point, it should be noted that there are really two questions here: (1) Did your DMA request start successfully [and complete]? (2) Did you get a callback?
So, I'd split off some code from dma_complete into (e.g.) dma_test_done. The latter does the same checking but only prints the "complete" message. You can call this in a poll mode to verify DMA completion.
So, if you [eventually] get a completion, then the problem reduces to why you didn't get the callback. If, however, you don't [even] get a completion, that's an even more fundamental problem.
This reminds me. You didn't show any code that calls dma_start or how you wait for the completion. I presume that if your callback were working, it would issue a wakeup of some sort that the base level would wait on. Or, the callback would do the request deallocate/cleanup (i.e. more code you'd write)
At step 7, you're calling dma_async_issue_pending, which should call pl330_issue_pending. pl330_issue_pending will call pl330_tasklet.
pl330_tasklet is a tasklet function, but it can also be called directly [to kick off DMA when there are no active requests].
pl330_tasklet will loop on its "work" queue and move any completed items to its "completed" queue. It then tries to start new requests. It then loops on its completed queue and issues the callbacks.
pl330_tasklet grabs the callback pointer, but if it's null it is silently ignored. You've set a callback, but it might be good to verify that where you set the callback is the same place [or propagates to] the place where pl330_tasklet will fetch it from.
When you make the call, everything may be busy, so there are no completed requests, no room to start a new request, so nothing to complete. In that case, pl330_tasklet will be called again later.
So, when dma_async_issue_pending returns, nothing may have happened yet. This is quite probable for your case.
pl330_tasklet tries to start new DMA by calling fill_queue. It will check that a descriptor is not [already] busy by looking at status != BUSY. So, you may wish to verify that yours has the correct value. Otherwise, you'd never get a callback [or even any DMA start].
Then, fill_queue will try to start the request via pl330_submit_req. But, that can return an error (e.g. queue already full), so, again, things are deferred.
For reference, notice the following comment at the top of pl330_submit_req:
Submit a list of xfers after which the client wants notification.
Client is not notified after each xfer unit, just once after all
xfer units are done or some error occurs.
What I'd do is start hacking up pl330.c and add debug messages and cross-checking. If your system is such that pl330 is servicing many other requests, you might limit the debug messages by checking that the device's private data pointer matches yours.
In particular, you'd like to get a message when your request actually gets started, so you could add a debug message to the end of pl330_submit_req
Then, adding messages within pl330_tasklet for requests will help, too.
Those are two good starting points. But, don't be afraid to add more printk calls as needed. You may be surprised by what gets called [or doesn't get called] or in what order.
UPDATE:
If I install the kernel module with the blocking behaviour, everything is initialized well. However, the dma_busy_loop function shows that the DMA descriptor is always IN PROGESS and the DMA transaction never completes. For this reason, the callback function is not executed. What could be happening?
Did a little more research. Cookies are just sequence numbers that increment. For example, if you issue a request that gets broken up into [say] 10 separate scatter/gather operations [descriptors], each one gets a unique cookie value. The cookie return value is the latest/last of the bunch (e.g. 10).
When you're calling (1) dma_async_is_tx_complete, (2) it calls chan->device->device_tx_status, (3) which is pl330_tx_status, (4) which calls dma_cookie_status
Side note/tip: When I was tracking this down, I just kept flipping back and forth between dmaengine.h and pl330.c. It was like: Look at (1), it calls (2). Where is that set? In pl330.c, I presume. So, I grepped for the string and got the name of pl330's function (i.e. (3)). So, I go there, and see that it does (4). So ... Back to dmaengine.h ...
However, when you make the outer call, you're ignoring [setting to NULL] the last two arguments. These can be useful because they return the "last" and "used" cookies. So, even if you don't get full completion, these values could change and show partial progress.
One of them should eventually be >= to the "return" cookie value. (i.e.) The entire operation should be complete. So, this will help differentiate what may be happening.
Also, note that in dmaengine.h, right below dma_async_is_tx_complete, there is dma_async_is_complete. This function is what decides whether to return DMA_COMPLETE or DMA_IN_PROGRESS, based on the cookie value you pass and the "last" and "used" cookie values. It's passive, and not used in the code path [AFAICT], but it does show how to calculate completion yourself.

C multithread performance issue

I am writing a multi-threaded program to traverse an n x n matrix, where the elements in the main diagonal are processed in a parallel manner, as shown in the code below:
int main(int argc, char * argv[] )
{
/* VARIABLES INITIALIZATION HERE */
gettimeofday(&start_t, NULL); //start timing
for (int slice = 0; slice < 2 * n - 1; ++slice)
{
z = slice < n ? 0 : slice - n + 1;
int L = 0;
pthread_t threads[slice-z-z+1];
struct thread_data td[slice-z-z+1];
for (int j=z; j<=slice-z; ++j)
{
td[L].index= L;
printf("create:%d\n", L );
pthread_create(&threads[L],NULL,mult_thread,(void *)&td[L]);
L++;
}
for (int j=0; j<L; j++)
{
pthread_join(threads[j],NULL);
}
}
gettimeofday(&end_t, NULL);
printf("Total time taken by CPU: %ld \n", ( (end_t.tv_sec - start_t.tv_sec)*1000000 + end_t.tv_usec - start_t.tv_usec));
return (0);
}
void *mult_thread(void *t)
{
struct thread_data *my_data= (struct thread_data*) t;
/* SOME ADDITIONAL CODE LINES HERE */
printf("ThreadFunction:%d\n", (*my_data).index );
return (NULL);
}
The problem is that this multithreaded implementation gave me a very bad performance compared with the serial (naive) implementation.
Are there some adjustments that could be done to improve the performance of the multithreaded version ??
a thread pool may make it better.
define a new struct type as follow.
typedef struct {
struct thread_data * data;
int status; // 0: ready
// 1: adding data
// 2: data handling, 3: done
int next_free;
} thread_node;
init :
size_t thread_size = 8;
thread_node * nodes = (thread_node *)malloc(thread_size * sizeof(thread_node));
for(int i = 0 ; i < thread_size - 1 ; i++ ) {
nodes[i].next_free = i + 1;
nodes[i].status = 0 ;
}
nodes[thread_size - 1].next_free = -1;
int current_free_node = 0 ;
pthread_mutex_t mutex;
get thread :
int alloc() {
pthread_mutex_lock(&mutex);
int rt = current_free_node;
if(current_free_node != -1) {
current_free_node = nodes[current_free_node].next_free;
nodes[rt].status = 1;
}
pthread_mutex_unlock(&mutex);
return rt;
}
return thread :
void back(int idx) {
pthread_mutex_lock(&mutex);
nodes[idx].next_free = current_free_node;
current_free_node = idx;
nodes[idx].status = 0;
pthread_mutex_unlock(&mutex);
}
create the threads first, and use alloc() to try to get a idle thread, update the pointer.
don't use join to judge the status.
modify your mult_thread as a loop and after the job finished , just change your status to 3
for each loop in the thread , you may give it more work
I wish it will give you some help.
------------ UPDATED Apr. 23, 2015 -------------------
here is a example.
compile & run with command
$ g++ thread_pool.cc -o tp -pthread --std=c++
yu:thread_pool yu$ g++ tp.cc -o tp -pthread --std=c++11 && ./tp
1227135.147 1227176.546 1227217.944 1227259.340...
time cost 1 : 1068.339091 ms
1227135.147 1227176.546 1227217.944 1227259.340...
time cost 2 : 548.221607 ms
you may also remove timer and it can also compiled as a std c99 file.
In current , the thread size has been limited to 2. You may also adjust the parameter thread_size, and recompile & run again. More threads may give your some more advantage(in my pc, if I change the thread size to 4, the task will finish in 280ms), while too much thread number may not help you too much if you have no enough cpu thread.

audio delay making it work

I am trying to implement a simple audio delay in C.
i previously made a test delay program which operated on a printed sinewave and worked effectively.
I tried incorporating my delay as the process in the SFProcess - libsndfile- replacing the sinewave inputs with my audio 'data' input.
I nearly have it but instead of a clean sample delay I am getting all sorts of glitching and distortion.
Any ideas on how to correct this?
#include <stdio.h>
#include </usr/local/include/sndfile.h>//libsamplerate libsamplerate
//#include </usr/local/include/samplerate.h>
#define BUFFER_LEN 1024 //defines buffer length
#define MAX_CHANNELS 2 //defines max channels
static void process_data (double *data, double*circular,int count, int numchannels, int circular_pointer );
enum {DT_PROGNAME,ARG_INFILE,ARG_OUTFILE,ARG_NARGS, DT_VOL};
int main (int argc, const char * argv[])//Main
{
static double data [BUFFER_LEN]; // the buffer that carries the samples
double circular [44100] = {0}; // the circular buffer for the delay
for (int i = 0; i < 44100; i++) { circular[i] = 0; } // zero the circular buffer
int circular_pointer = 0; // where we currently are in the circular buffer
//float myvolume; // the volume entered by the user as optional 3rd argument
SNDFILE *infile, *outfile;
SF_INFO sfinfo;
int readcount;
const char *infilename = NULL;
const char *outfilename = NULL;
if(argc < ARG_NARGS) {
printf("usage: %s infile outfile\n",argv[DT_PROGNAME]);
return 1;
}
//if(argc > ARG_NARGS) {
//
// myvolume = argv[DT_VOL];
//};
infilename = argv[ARG_INFILE];
outfilename = argv[ARG_OUTFILE];
if (! (infile = sf_open (infilename, SFM_READ, &sfinfo)))
{printf ("Not able to open input file %s.\n", infilename) ;
puts (sf_strerror (NULL)) ;
return 1 ;
};
if (! (outfile = sf_open (outfilename, SFM_WRITE, &sfinfo)))
{ printf ("Not able to open output file %s.\n", outfilename) ;
puts (sf_strerror (NULL)) ;
return 1 ;
} ;
while ((readcount = sf_read_double (infile, data, BUFFER_LEN)))
{ process_data (data, circular, readcount, sfinfo.channels, circular_pointer) ;
sf_write_double (outfile, data, readcount) ;
};
sf_close (infile) ;
sf_close (outfile) ;
printf("the sample rate is %d\n", sfinfo.samplerate);
return 0;
}
static void process_data (double *data, double *circular, int count, int numchannels, int circular_pointer) {
//int j,k;
//float vol = 1;
int playhead;
int wraparound = 10000;
float delay = 1000; // delay time in samples
for (int ind = 0; ind < BUFFER_LEN; ind++){
circular_pointer = fmod(ind,wraparound); // wrap around pointer
circular[circular_pointer] = data[ind];
playhead = fmod(ind-delay, wraparound); // read the delayed signal
data[ind] = circular[playhead]; // output delayed signal
circular[ind] = data[ind]; // write the incoming signal
};
//volume
/*for (j=0; j<numchannels; j++) {
for (k=0; k<count; k++){
data[k] = data[k]*-vol;*/
//}printf ("the volume is %f", vol);
return;
}
There are a few issues with your code that are causing you to access out of your array bounds and to not read\write your circular buffer in the way intended.
I would suggest reading http://en.wikipedia.org/wiki/Circular_buffer to get a better understanding of circular buffers.
The main issues your code is suffering:
circular_pointer should be initialised to the delay amount (essentially the write head is starting at 0 so there is never any delay!)
playhead and circular_buffer are not updated between calls to process_data (circular_buffer is passed by value...)
playhead is reading from negative indices. The correct playhead calculation is
#define MAX_DELAY 44100
playhead++;
playhead = playhead%MAX_DELAY;
The second write to circular_buffer at the end of process_data is unnecessary and incorrect.
I would strongly suggest spending some time running your code in a debugger and closely watching what your playhead and circular_pointer are doing.
Mike
At least one problem is that you pass circular_pointer by value, not by reference. When you update it in the function, it's back to the same value next time you call the function.
I think you are on the right track, here, but if you want something that's structured a bit better, you might also want to checkout this answer:
how to add echo effect on audio file using objective-c
delay in sample can be put as 100 ms would be sufficient

Using MPI_Reduce in c

I am trying to write a program in c using the MPI library.
In my program I am solving TSP (but not with any special algorithm...).
My input parameters are int nCites, int xCoord[] and int yCoord[].
I am bundling them into Coordinates and using MPI_Bcast to make them available to all the threads.
My problem is this: after I've finished calculating the weight of all the paths in each thread, I want to reduce them into one single result, the best one. I've tried using MPI_Reduce, but something, and this is where I get confused, causes a segmentation fault (only in one of the threads, usually root).
This is the main code and structs:
typedef struct Coordinates_t {
int* x;
int* y;
int n;
} Coordinates;
typedef struct PathAndLength_t {
int* path;
int pathSize;
int length;
} PathAndLength;
void comparePaths(void* a, void* b, int* len, MPI_Datatype* dataType) {
...
}
int tsp_main(int nCites, int xCoord[], int yCoord[], int P[]){
int numOfProcs, rank;
if (MPI_Comm_size(MPI_COMM_WORLD, &numOfProcs))
throw "Error: MPI_Comm_size failed";
if (MPI_Comm_rank(MPI_COMM_WORLD, &rank))
throw "Error: MPI_Comm_rank failed";
Coordinates crds;
crds.x = xCoord;
crds.y = yCoord;
crds.n = nCites;
MPI_Datatype data;
createInDataType(&crds, &data);
if (MPI_Bcast(&crds, 1, data, 0, MPI_COMM_WORLD))
throw "Error: MPI_Comm_size failed";
...
PathAndLength* pal = (PathAndLength*)malloc(sizeof(PathAndLength));
pal->path = (int*)malloc(sizeof(int)*crds.n);
pal->length = min_length;
for (int i = 0; i < crds.n; ++i) {
(pal->path)[i] = min_path[i];
}
pal->pathSize = crds.n;
MPI_Datatype outDatatype;
MPI_Op op;
createOutDataType(pal, &outDatatype);
MPI_Op_create(&comparePaths, 1, &op);
PathAndLength* result = (PathAndLength*)malloc(sizeof(PathAndLength));
result->path = (int*)malloc(sizeof(int)*crds.n);
MPI_Reduce(pal, result, crds.n, outDatatype, op, 0, MPI_COMM_WORLD);
...
return result->length;
}
And these are the createOutDataType and createInDataType I use in my code:
void createInDataType(Coordinates* indata, MPI_Datatype* message_type_ptr) {
// Build a derived datatype
int block_lengths[3];
MPI_Aint displacements[3];
MPI_Aint addresses[4];
MPI_Datatype typelist[3];
// First specify the types
typelist[0] = typelist[1] = typelist[2] = MPI_INT;
// Specify the number of elements of each type
block_lengths[0] = block_lengths[1] = indata->n;
block_lengths[2] = 1;
// Calculate the displacements of the members relative to indata
MPI_Address(indata, &addresses[0]);
MPI_Address(indata->x, &addresses[1]);
MPI_Address(indata->y, &addresses[2]);
MPI_Address(&indata->n, &addresses[3]);
displacements[0] = addresses[1] - addresses[0];
displacements[1] = addresses[2] - addresses[0];
displacements[2] = addresses[3] - addresses[0];
// Create the derived type
MPI_Type_struct(3, block_lengths, displacements, typelist, message_type_ptr);
// Commit it so that it can be used
MPI_Type_commit(message_type_ptr);
}
void createOutDataType(PathAndLength* outdata, MPI_Datatype* message_type_ptr) {
// Build a derived datatype
int block_lengths[2];
MPI_Aint displacements[2];
MPI_Aint addresses[3];
MPI_Datatype typelist[2];
// First specify the types
typelist[0] = MPI_INT;
typelist[1] = MPI_INT;
// Specify the number of elements of each type
block_lengths[0] = outdata->pathSize;
block_lengths[1] = 1;
// Calculate the displacements of the members relative to outdata
MPI_Address(outdata, &addresses[0]);
MPI_Address(outdata->path, &addresses[1]);
MPI_Address(&outdata->length, &addresses[2]);
displacements[0] = addresses[1] - addresses[0];
displacements[1] = addresses[2] - addresses[0];
// Create the derived type
MPI_Type_struct(2, block_lengths, displacements, typelist, message_type_ptr);
// Commit it so that it can be used
MPI_Type_commit(message_type_ptr);
}
Sorry for including so much code, but I couldn't decide what, if any, was irrelevant...
Thank you.
PathAndLength* result = (PathAndLength*)malloc(sizeof(PathAndLength));
result->path = (int*)malloc(sizeof(int)*crds.n);
MPI_Reduce(pal, result, crds.n, outDatatype, op, 0, MPI_COMM_WORLD);
you are receiving crds.n*outDatatypes into result buffer of size sizeof(PathAndLength). You seem to have design flaw.

Resources