kprobe, function scheduling - processor lockup - Linux kernel - c

I've a function I wrote in order to run a given function on all processors. It works perfectly well in all cases except the following case:
When I try to use it within a kprobe that I registered.
Here's some code:
static DEFINE_MUTEX(entryMutex);
static struct kretprobe my_kprobe = {
.entry_handler = (kprobe_opcode_t *) NULL,
.handler = (kprobe_opcode_t *) process_entry_callback,
.maxactive = 1000,
.data_size = 0
};
static int driver_init(void)
{
my_kprobe.kp.addr = (kprobe_opcode_t*)kallsyms_lookup_name("sys_execve");
if ((ret = register_kretprobe(&my_kprobe)) < 0)
return -1;
return 0;
}
void foo(void* nothing)
{
printk("In foo\n");
}
static int process_entry_callback(struct kretprobe_instance* instance, struct pt_regs* regs)
{
mutex_lock(&entryMutex);
for(int i = 0; i < 4; ++i) // assumes there are 4 processors
run_func(foo, NULL, i);
mutex_unlock(&entryMutex);
return 0;
}
void run_func_wrap(struct function_data* data)
{
data->func(data->context);
wake_up_process(data->waiting_task);
*(data->condition) = TRUE;
}
void run_func(SCHEDULED_FUNC func, void *context, int processor)
{
struct function_data data;
struct task_struct* th;
BOOLEAN condition = FALSE;
wait_queue_head_t queue;
init_waitqueue_head(&queue);
data.func = func;
data.waiting_task = current;
data.context = context;
data.condition = &condition;
th = kthread_create(sched_func_wrap, &data, "th");
kthread_bind(th, processor);
wake_up_process(th);
wait_event(queue, condition);
}
F
After the call to 'run_func' in process_entry_callback I can no longer run any programs. Every time I start a new program it just stuck. After a while I get 'processor lockup' warning in the system log.
I suspect that it has something to do with the IRQ levels.
Any suggestions ?
EDIT:
It also happens when using the following function:
smp_call_function_single
which can be found in smp.c # the Linux kernel source code.
instead of my function:
run_func

Related

Esp32-(xQueueGenericReceive)- assert failed

I have been working on a project in which the analog values are sampled at a particular frequency and stored in an array. Then the value will be sent to user application ESP32 using BLE. But I got stuck in this error.
/home/runner/work/esp32-arduino-lib-builder/esp32-arduino-lib-builder/esp-idf/components/freertos/queue.c:1443
(xQueueGenericReceive)- assert failed! abort() was called at PC
0x4008e1d5 on core 1
Backtrace: 0x40091b38:0x3ffe0b20 0x40091d69:0x3ffe0b40
0x4008e1d5:0x3ffe0b60 0x400d1a2d:0x3ffe0ba0 0x4008e525:0x3ffe0be0
Rebooting... ets Jun 8 2016 00:22:57
rst:0xc (SW_CPU_RESET),boot:0x13 (SPI_FAST_FLASH_BOOT) configsip: 0,
SPIWP:0xee
clk_drv:0x00,q_drv:0x00,d_drv:0x00,cs0_drv:0x00,hd_drv:0x00,wp_drv:0x00
mode:DIO, clock div:1 load:0x3fff0018,len:4 load:0x3fff001c,len:1044
load:0x40078000,len:8896 load:0x40080400,len:5816 entry 0x400806ac
I am Using Esp32arduino and FreeRTOS for programming. The error is in the semaphore from the interrupt but I couldn't be able to find out exact solution. Please help me out guys.
#include <ArduinoJson.h>
#include <BLEDevice.h>
#include <BLEServer.h>
#include <BLEUtils.h>
#include <BLE2902.h>
#if CONFIG_FREERTOS_UNICORE
static const BaseType_t app_cpu = 0;
#else
static const BaseType_t app_cpu = 1;
#endif
//ADC Related Global Variables
static const uint16_t timer_divider = 80;
static const uint64_t timer_max_count = 1000;
static const int adc_pin = A0;
static const int BUF_SIZE = 1000;
static int buf[BUF_SIZE];
int Buff_Len = 0;
static int Read = 0;
static int Write = 0;
static int count = 0;
static float avg = 0;
int i = 0;
int BLE_flag = 0;
String cmd;
static hw_timer_t *timer = NULL;
static uint16_t val;
static int count1 = 0;
static SemaphoreHandle_t bin_sem = NULL;
static SemaphoreHandle_t bin_sem2 = NULL;
static portMUX_TYPE spinlock = portMUX_INITIALIZER_UNLOCKED;
//ADC Related Global Variables
//BLE Global Variable
char Reading[4];
BLEServer *pServer = NULL;
BLECharacteristic *pTxCharacteristic;
bool deviceConnected = false;
bool oldDeviceConnected = false;
//Declaration BLE necessary Classes
#define SERVICE_UUID "6E400001-B5A3-F393-E0A9-E50E24DCCA9E" // UART service UUID
#define CHARACTERISTIC_UUID_TX "6E400003-B5A3-F393-E0A9-E50E24DCCA9E"
class MyServerCallbacks:public BLEServerCallbacks
{
void onConnect (BLEServer * pServer)
{
deviceConnected = true;
};
void onDisconnect (BLEServer * pServer)
{
deviceConnected = false;
}
};
//BLE Global Variables
//Task Section
void IRAM_ATTR onTimer ()
{
//sampling
xSemaphoreGiveFromISR (bin_sem2, &task_woken);
if (task_woken)
{
portYIELD_FROM_ISR ();
}
}
void move_to_Queue (void *parameters)
{
while (1)
{
xSemaphoreTake (bin_sem2, portMAX_DELAY);
if (Buff_Len == BUF_SIZE || count1 > 2000)
{
Serial.println ("Buffer is full");
xSemaphoreGive (bin_sem);
}
else
{
// storing the instantaneous sample value to buffer
}
}
}
void BLE_Task (void *parameters)
{
while (1) {
xSemaphoreTake (bin_sem, portMAX_DELAY);
Serial.println ("BLE");
// sending the data\lu
delay (10); // bluetooth stack will go into congestion, if too many packets are sent
}
}
Serial.println ();
}
}
void setup ()
{
// put your setup code here, to run once:
Serial.begin (115200);
vTaskDelay (1000 / portTICK_PERIOD_MS);
//BLE Declarations
BLEDevice::init ("UART Service");
pServer = BLEDevice::createServer ();
pServer->setCallbacks (new MyServerCallbacks ());
BLEService *pService = pServer->createService (SERVICE_UUID);
pTxCharacteristic = pService->createCharacteristic (CHARACTERISTIC_UUID_TX,
BLECharacteristic::
PROPERTY_NOTIFY);
pTxCharacteristic->addDescriptor (new BLE2902 ());
pService->start ();
pServer->getAdvertising ()->start ();
Serial.println ("Waiting a client connection to notify...");
//BLE Declaration
//ADC Semaphore and Timer Declarations
bin_sem = xSemaphoreCreateBinary ();
bin_sem2 = xSemaphoreCreateBinary ();
if (bin_sem == NULL || bin_sem2 == NULL)
{
Serial.println ("Could not create semaphore");
ESP.restart ();
}
xTaskCreatePinnedToCore (move_to_Queue,
"move_to_Queue", 1024, NULL, 2, NULL, app_cpu);
xTaskCreatePinnedToCore (BLE_Task,
"BLE_Task", 2048, NULL, 2, NULL, app_cpu);
timer = timerBegin (0, timer_divider, true);
// Provide ISR to timer (timer, function, edge)
timerAttachInterrupt (timer, &onTimer, true);
// At what count should ISR trigger (timer, count, autoreload)
timerAlarmWrite (timer, timer_max_count, true);
// Allow ISR to trigger
timerAlarmEnable (timer);
vTaskDelete (NULL);
}
void loop ()
{
// put your main code here, to run repeatedly:
}
`
Whole code: https://pastebin.com/K8ppkG28
Thanks in advance guys

Linux kernel windows-like event implementation

We need to implement Windows-like kernel event for Linux. These functions are intended to behave as corresponding KeInitializeEvent, KeSetEvent, KeResetEvent, KePulseEvent, and KeWaitForSingleObject from Windows kernel. Synchronization event is called auto reset here, and Notification event is called manual reset. Here is the code:
Event.h
#define WAIT_FOREVER -1
#define event_init(event, manual_reset, initial_state) __event_init(event, manual_reset, initial_state)
#define event_set(event) __event_set(event)
#define event_reset(event) __event_reset(event)
#define event_pulse(event) __event_pulse(event)
#define event_wait(event, ms_timeout) __event_wait(event, (ms_timeout == WAIT_FOREVER) ? (WAIT_FOREVER) : ((ms_timeout * HZ) / 1000))
typedef struct _wait_t
{
atomic_t b;
wait_queue_head_t q;
struct list_head list;
} wait_t;
typedef struct _event_t
{
struct list_head Wait;
bool AutoReset;
bool State;
} event_t;
void __event_init_lib(void);
void __event_init(event_t *event, bool manual_reset, bool initial_state);
bool __event_set(event_t *event);
bool __event_reset(event_t *event);
bool __event_pulse(event_t *event);
status_t __event_wait(event_t *event, time_t timeout);
Event.c
wait_t g_stor[100];
spinlock_t g_lock;
void __event_init_lib(void)
{
wait_t *ptr;
for (int i = 0; i < ARRAY_SIZE(g_stor); ++i)
{
ptr = &g_stor[i];
atomic_set(&ptr->b, 2);
init_waitqueue_head(&ptr->q);
INIT_LIST_HEAD(&ptr->list);
}
spin_lock_init(&g_lock);
}
void __event_init(event_t *event, bool manual_reset, bool initial_state)
{
INIT_LIST_HEAD(&event->Wait);
event->State = initial_state;
event->AutoReset = !manual_reset;
}
status_t __event_wait(event_t *event, time_t timeout)
{
bool b;
wait_t *ptr;
status_t status;
spin_lock(&g_lock);
if (event->State)
{
if (event->AutoReset) event->State = false;
spin_unlock(&g_lock);
return s_success;
}
for (int i = 0; i < ARRAY_SIZE(g_stor); ++i)
{
ptr = &g_stor[i];
if (atomic_cmpxchg(&ptr->b, 2, 0) == 2) break;
}
list_add_tail(&ptr->list, &event->Wait); // note: we need to insert in the end of the list
spin_unlock(&g_lock);
if (timeout == WAIT_FOREVER) wait_event(ptr->q, b = (atomic_cmpxchg(&ptr->b, 1, 2) == 1));
else wait_event_timeout(ptr->q, b = (atomic_cmpxchg(&ptr->b, 1, 2) == 1), timeout);
if (b) status = s_success;
else status = s_timeout;
return status;
}
bool __event_set(event_t *event)
{
bool PrevState;
struct list_head *entry;
wait_t *Wait;
//if (!event->AutoReset && event->State) return true;
spin_lock(&g_lock);
PrevState = event->State;
event->State = true;
if (!PrevState && !list_empty(&event->Wait)) // check if we became signaled right now
// and we have waiters
{
if (event->AutoReset)
{
entry = event->Wait.next;
Wait = container_of(entry, wait_t, list);
atomic_set(&Wait->b, 1);
wake_up(&(Wait->q));
event->State = false;
list_del(entry);
}
else
{
entry = event->Wait.next;
while (entry != &event->Wait)
{
Wait = container_of(entry, wait_t, list);
atomic_set(&Wait->b, 1);
wake_up(&(Wait->q));
entry = entry->next;
list_del(entry->prev);
}
}
}
spin_unlock(&g_lock);
return PrevState;
}
bool __event_reset(event_t *event)
{
bool PrevState;
spin_lock(&g_lock);
PrevState = event->State;
event->State = false;
spin_unlock(&g_lock);
return PrevState;
}
bool __event_pulse(event_t *event)
{
bool PrevState;
struct list_head *entry;
wait_t *Wait;
spin_lock(&g_lock);
PrevState = event->State;
if (!PrevState && !list_empty(&event->Wait)) // check if we became signaled right now
// and we have waiters
{
if (event->AutoReset)
{
entry = event->Wait.next;
Wait = container_of(entry, wait_t, list);
atomic_set(&Wait->b, 1);
wake_up(&(Wait->q));
list_del(entry);
}
else
{
entry = event->Wait.next;
while (entry != &event->Wait)
{
Wait = container_of(entry, wait_t, list);
atomic_set(&Wait->b, 1);
wake_up(&(Wait->q));
entry = entry->next;
list_del(entry->prev);
}
}
}
event->State = false;
spin_unlock(&g_lock);
return PrevState;
}
I think each waiting thread needs its own condition variable, because if we have one condition variable and set it to true, new waiter may arrive and pass through wait_event unintentionally without even falling to sleep. So we need to maintain list of condition variables, hence to wake up correct thread we also need multiple wait queues. Also ReactOS source suggest that event maintains the list of waiters.
Since we can't use thread local storage variables in kernel mode (at least in no easy way) I decided to implement array of wait blocks. When we need to insert waiter in the list we loop this array in search for free wait block. This leads me to believe that we need to use single global lock as ReactOS does (dispatcher lock), not separate lock for each event object.
We need event object for our video camera driver ported from Windows. Everything seems to work fine, however frames per second sometimes drops from 14 fps to 10 (and image flickers). It led me to believe that there is something wrong with the implementation of event.
If you have some suggestions, please share. Thank you.

Linux DMA: Using the DMAengine for scatter-gather transactions

I try to use the DMAengine API from a custom kernel driver to perform a scatter-gather operation. I have a contiguous memory region as source and I want to copy its data in several distributed buffers through a scatterlist structure. The DMA controller is the PL330 one that supports the DMAengine API (see PL330 DMA controller).
My test code is the following:
In my driver header file (test_driver.h):
#ifndef __TEST_DRIVER_H__
#define __TEST_DRIVER_H__
#include <linux/platform_device.h>
#include <linux/device.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/of_dma.h>
#define SG_ENTRIES 3
#define BUF_SIZE 16
#define DEV_BUF 0x10000000
struct dma_block {
void * data;
int size;
};
struct dma_private_info {
struct sg_table sgt;
struct dma_block * blocks;
int nblocks;
int dma_started;
struct dma_chan * dma_chan;
struct dma_slave_config dma_config;
struct dma_async_tx_descriptor * dma_desc;
dma_cookie_t cookie;
};
struct test_platform_device {
struct platform_device * pdev;
struct dma_private_info dma_priv;
};
#define _get_devp(tdev) (&((tdev)->pdev->dev))
#define _get_dmapip(tdev) (&((tdev)->dma_priv))
int dma_stop(struct test_platform_device * tdev);
int dma_start(struct test_platform_device * tdev);
int dma_start_block(struct test_platform_device * tdev);
int dma_init(struct test_platform_device * tdev);
int dma_exit(struct test_platform_device * tdev);
#endif
In my source that contains the dma functions (dma_functions.c):
#include <linux/slab.h>
#include "test_driver.h"
#define BARE_RAM_BASE 0x10000000
#define BARE_RAM_SIZE 0x10000000
struct ram_bare {
uint32_t * __iomem map;
uint32_t base;
uint32_t size;
};
static void dma_sg_check(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
uint32_t * buf;
unsigned int bufsize;
int nwords;
int nbytes_word = sizeof(uint32_t);
int nblocks;
struct ram_bare ramb;
uint32_t * p;
int i;
int j;
ramb.map = ioremap(BARE_RAM_BASE,BARE_RAM_SIZE);
ramb.base = BARE_RAM_BASE;
ramb.size = BARE_RAM_SIZE;
dev_info(dev,"nblocks: %d \n",dma_priv->nblocks);
p = ramb.map;
nblocks = dma_priv->nblocks;
for( i = 0 ; i < nblocks ; i++ ) {
buf = (uint32_t *) dma_priv->blocks[i].data;
bufsize = dma_priv->blocks[i].size;
nwords = dma_priv->blocks[i].size/nbytes_word;
dev_info(dev,"block[%d],size %d: ",i,bufsize);
for ( j = 0 ; j < nwords; j++, p++) {
dev_info(dev,"DMA: 0x%x, RAM: 0x%x",buf[j],ioread32(p));
}
}
iounmap(ramb.map);
}
static int dma_sg_exit(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
int ret = 0;
int i;
for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
kfree(dma_priv->blocks[i].data);
}
kfree(dma_priv->blocks);
sg_free_table(&(dma_priv->sgt));
return ret;
}
int dma_stop(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
int ret = 0;
dma_unmap_sg(dev,dma_priv->sgt.sgl,\
dma_priv->sgt.nents, DMA_FROM_DEVICE);
dma_sg_exit(tdev);
dma_priv->dma_started = 0;
return ret;
}
static void dma_callback(void * param)
{
enum dma_status dma_stat;
struct test_platform_device * tdev = (struct test_platform_device *) param;
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
dev_info(dev,"Checking the DMA state....\n");
dma_stat = dma_async_is_tx_complete(dma_priv->dma_chan,\
dma_priv->cookie, NULL, NULL);
if(dma_stat == DMA_COMPLETE) {
dev_info(dev,"DMA complete! \n");
dma_sg_check(tdev);
dma_stop(tdev);
} else if (unlikely(dma_stat == DMA_ERROR)) {
dev_info(dev,"DMA error! \n");
dma_stop(tdev);
}
}
static void dma_busy_loop(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
enum dma_status status;
int status_change = -1;
do {
status = dma_async_is_tx_complete(dma_priv->dma_chan, dma_priv->cookie, NULL, NULL);
switch(status) {
case DMA_COMPLETE:
if(status_change != 0)
dev_info(dev,"DMA status: COMPLETE\n");
status_change = 0;
break;
case DMA_PAUSED:
if (status_change != 1)
dev_info(dev,"DMA status: PAUSED\n");
status_change = 1;
break;
case DMA_IN_PROGRESS:
if(status_change != 2)
dev_info(dev,"DMA status: IN PROGRESS\n");
status_change = 2;
break;
case DMA_ERROR:
if (status_change != 3)
dev_info(dev,"DMA status: ERROR\n");
status_change = 3;
break;
default:
dev_info(dev,"DMA status: UNKNOWN\n");
status_change = -1;
break;
}
} while(status != DMA_COMPLETE);
dev_info(dev,"DMA transaction completed! \n");
}
static int dma_sg_init(struct test_platform_device * tdev)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct scatterlist *sg;
int ret = 0;
int i;
ret = sg_alloc_table(&(dma_priv->sgt), SG_ENTRIES, GFP_ATOMIC);
if(ret)
goto out_mem2;
dma_priv->nblocks = SG_ENTRIES;
dma_priv->blocks = (struct dma_block *) kmalloc(dma_priv->nblocks\
*sizeof(struct dma_block), GFP_ATOMIC);
if(dma_priv->blocks == NULL)
goto out_mem1;
for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
dma_priv->blocks[i].size = BUF_SIZE;
dma_priv->blocks[i].data = kmalloc(dma_priv->blocks[i].size, GFP_ATOMIC);
if(dma_priv->blocks[i].data == NULL)
goto out_mem3;
}
for_each_sg(dma_priv->sgt.sgl, sg, dma_priv->sgt.nents, i)
sg_set_buf(sg,dma_priv->blocks[i].data,dma_priv->blocks[i].size);
return ret;
out_mem3:
i--;
while(i >= 0)
kfree(dma_priv->blocks[i].data);
kfree(dma_priv->blocks);
out_mem2:
sg_free_table(&(dma_priv->sgt));
out_mem1:
ret = -ENOMEM;
return ret;
}
static int _dma_start(struct test_platform_device * tdev,int block)
{
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
int ret = 0;
int sglen;
/* Step 1: Allocate and initialize the SG list */
dma_sg_init(tdev);
/* Step 2: Map the SG list */
sglen = dma_map_sg(dev,dma_priv->sgt.sgl,\
dma_priv->sgt.nents, DMA_FROM_DEVICE);
if(! sglen)
goto out2;
/* Step 3: Configure the DMA */
(dma_priv->dma_config).direction = DMA_DEV_TO_MEM;
(dma_priv->dma_config).src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
(dma_priv->dma_config).src_maxburst = 1;
(dma_priv->dma_config).src_addr = (dma_addr_t) DEV_BUF;
dmaengine_slave_config(dma_priv->dma_chan, \
&(dma_priv->dma_config));
/* Step 4: Prepare the SG descriptor */
dma_priv->dma_desc = dmaengine_prep_slave_sg(dma_priv->dma_chan, \
dma_priv->sgt.sgl, dma_priv->sgt.nents, DMA_DEV_TO_MEM, \
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (dma_priv->dma_desc == NULL) {
dev_err(dev,"DMA could not assign a descriptor! \n");
goto out1;
}
/* Step 5: Set the callback method */
(dma_priv->dma_desc)->callback = dma_callback;
(dma_priv->dma_desc)->callback_param = (void *) tdev;
/* Step 6: Put the DMA descriptor in the queue */
dma_priv->cookie = dmaengine_submit(dma_priv->dma_desc);
/* Step 7: Fires the DMA transaction */
dma_async_issue_pending(dma_priv->dma_chan);
dma_priv->dma_started = 1;
if(block)
dma_busy_loop(tdev);
return ret;
out1:
dma_stop(tdev);
out2:
ret = -1;
return ret;
}
int dma_start(struct test_platform_device * tdev) {
return _dma_start(tdev,0);
}
int dma_start_block(struct test_platform_device * tdev) {
return _dma_start(tdev,1);
}
int dma_init(struct test_platform_device * tdev)
{
int ret = 0;
struct dma_private_info * dma_priv = _get_dmapip(tdev);
struct device * dev = _get_devp(tdev);
dma_priv->dma_chan = dma_request_slave_channel(dev, \
"dma_chan0");
if (dma_priv->dma_chan == NULL) {
dev_err(dev,"DMA channel busy! \n");
ret = -1;
}
dma_priv->dma_started = 0;
return ret;
}
int dma_exit(struct test_platform_device * tdev)
{
int ret = 0;
struct dma_private_info * dma_priv = _get_dmapip(tdev);
if(dma_priv->dma_started) {
dmaengine_terminate_all(dma_priv->dma_chan);
dma_stop(tdev);
dma_priv->dma_started = 0;
}
if(dma_priv->dma_chan != NULL)
dma_release_channel(dma_priv->dma_chan);
return ret;
}
In my driver source file (test_driver.c):
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/version.h>
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/of_device.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/interrupt.h>
#include "test_driver.h"
static int dma_block=0;
module_param_named(dma_block, dma_block, int, 0444);
static struct test_platform_device tdev;
static struct of_device_id test_of_match[] = {
{ .compatible = "custom,test-driver-1.0", },
{}
};
static int test_probe(struct platform_device *op)
{
int ret = 0;
struct device * dev = &(op->dev);
const struct of_device_id *match = of_match_device(test_of_match, &op->dev);
if (!match)
return -EINVAL;
tdev.pdev = op;
dma_init(&tdev);
if(dma_block)
ret = dma_start_block(&tdev);
else
ret = dma_start(&tdev);
if(ret) {
dev_err(dev,"Error to start DMA transaction! \n");
} else {
dev_info(dev,"DMA OK! \n");
}
return ret;
}
static int test_remove(struct platform_device *op)
{
dma_exit(&tdev);
return 0;
}
static struct platform_driver test_platform_driver = {
.probe = test_probe,
.remove = test_remove,
.driver = {
.name = "test-driver",
.owner = THIS_MODULE,
.of_match_table = test_of_match,
},
};
static int test_init(void)
{
platform_driver_register(&test_platform_driver);
return 0;
}
static void test_exit(void)
{
platform_driver_unregister(&test_platform_driver);
}
module_init(test_init);
module_exit(test_exit);
MODULE_AUTHOR("klyone");
MODULE_DESCRIPTION("DMA SG test module");
MODULE_LICENSE("GPL");
However, the DMA never calls my callback function and I do not have any idea why it happens. Maybe, I am misunderstanding something...
Could anyone help me?
Thanks in advance.
Caveat: I don't have a definitive solution for you, but merely some observations and suggestions on how to debug this [based on many years of experience writing/debugging linux device drivers].
I presume you believe the callback is not being done because you don't get any printk messages. But, the callback is the only place that has them. But, is the printk level set high enough to see the messages? I'd add a dev_info to your module init, to prove it prints as expected.
Also, you [probably] won't get a callback if dma_start doesn't work as expected, so I'd add some dev_info calls there, too (e.g. before and after the call in step 7). I also notice that not all calls in dma_start check error returns [may be fine or void return, just mentioning in case you missed one]
At this point, it should be noted that there are really two questions here: (1) Did your DMA request start successfully [and complete]? (2) Did you get a callback?
So, I'd split off some code from dma_complete into (e.g.) dma_test_done. The latter does the same checking but only prints the "complete" message. You can call this in a poll mode to verify DMA completion.
So, if you [eventually] get a completion, then the problem reduces to why you didn't get the callback. If, however, you don't [even] get a completion, that's an even more fundamental problem.
This reminds me. You didn't show any code that calls dma_start or how you wait for the completion. I presume that if your callback were working, it would issue a wakeup of some sort that the base level would wait on. Or, the callback would do the request deallocate/cleanup (i.e. more code you'd write)
At step 7, you're calling dma_async_issue_pending, which should call pl330_issue_pending. pl330_issue_pending will call pl330_tasklet.
pl330_tasklet is a tasklet function, but it can also be called directly [to kick off DMA when there are no active requests].
pl330_tasklet will loop on its "work" queue and move any completed items to its "completed" queue. It then tries to start new requests. It then loops on its completed queue and issues the callbacks.
pl330_tasklet grabs the callback pointer, but if it's null it is silently ignored. You've set a callback, but it might be good to verify that where you set the callback is the same place [or propagates to] the place where pl330_tasklet will fetch it from.
When you make the call, everything may be busy, so there are no completed requests, no room to start a new request, so nothing to complete. In that case, pl330_tasklet will be called again later.
So, when dma_async_issue_pending returns, nothing may have happened yet. This is quite probable for your case.
pl330_tasklet tries to start new DMA by calling fill_queue. It will check that a descriptor is not [already] busy by looking at status != BUSY. So, you may wish to verify that yours has the correct value. Otherwise, you'd never get a callback [or even any DMA start].
Then, fill_queue will try to start the request via pl330_submit_req. But, that can return an error (e.g. queue already full), so, again, things are deferred.
For reference, notice the following comment at the top of pl330_submit_req:
Submit a list of xfers after which the client wants notification.
Client is not notified after each xfer unit, just once after all
xfer units are done or some error occurs.
What I'd do is start hacking up pl330.c and add debug messages and cross-checking. If your system is such that pl330 is servicing many other requests, you might limit the debug messages by checking that the device's private data pointer matches yours.
In particular, you'd like to get a message when your request actually gets started, so you could add a debug message to the end of pl330_submit_req
Then, adding messages within pl330_tasklet for requests will help, too.
Those are two good starting points. But, don't be afraid to add more printk calls as needed. You may be surprised by what gets called [or doesn't get called] or in what order.
UPDATE:
If I install the kernel module with the blocking behaviour, everything is initialized well. However, the dma_busy_loop function shows that the DMA descriptor is always IN PROGESS and the DMA transaction never completes. For this reason, the callback function is not executed. What could be happening?
Did a little more research. Cookies are just sequence numbers that increment. For example, if you issue a request that gets broken up into [say] 10 separate scatter/gather operations [descriptors], each one gets a unique cookie value. The cookie return value is the latest/last of the bunch (e.g. 10).
When you're calling (1) dma_async_is_tx_complete, (2) it calls chan->device->device_tx_status, (3) which is pl330_tx_status, (4) which calls dma_cookie_status
Side note/tip: When I was tracking this down, I just kept flipping back and forth between dmaengine.h and pl330.c. It was like: Look at (1), it calls (2). Where is that set? In pl330.c, I presume. So, I grepped for the string and got the name of pl330's function (i.e. (3)). So, I go there, and see that it does (4). So ... Back to dmaengine.h ...
However, when you make the outer call, you're ignoring [setting to NULL] the last two arguments. These can be useful because they return the "last" and "used" cookies. So, even if you don't get full completion, these values could change and show partial progress.
One of them should eventually be >= to the "return" cookie value. (i.e.) The entire operation should be complete. So, this will help differentiate what may be happening.
Also, note that in dmaengine.h, right below dma_async_is_tx_complete, there is dma_async_is_complete. This function is what decides whether to return DMA_COMPLETE or DMA_IN_PROGRESS, based on the cookie value you pass and the "last" and "used" cookie values. It's passive, and not used in the code path [AFAICT], but it does show how to calculate completion yourself.

Writing a new system call

I have been trying to write a new system call(called sys_defclose) in the raspberry's kernel, but upon compiling i get this error:
arch/arm/kernel/built-in.o: In function `__sys_trace_return':
:(.text+0xd50): undefined reference to `sys_defclose'
i have modified the following file:
-include/linux/syscalls.h : where i put the prototype of my syscall
-arch/arm/include/asm/unistd.h : where i put the new raw of the syscall table:
#define __NR_sys_defclose (__NR_SYSCALL_BASE+380)
-arch/arm/kernel/calls.S : where i put:
CALL(sys_defclose)
-i put the source of sys_defclose in arch/arm/kernel and i have modified the makefile in the same directory with the new line
obj-y +=sys_defclose.o
the kernel version is 3.6 of raspberrypi.
can somebody explain me how to solve this error?
thanks
this is the implementation of my syscall
static struct task_struct* get_task_by_pid(pid_t pid)
{
return pid_task(find_pid_ns(pid, task_active_pid_ns(current)), PIDTYPE_PID);
}
static void close_files(struct files_struct * files)
{
int i, j;
struct fdtable *fdt;
j = 0;
rcu_read_lock();
fdt = files_fdtable(files);
rcu_read_unlock();
for (;;) {
unsigned long set;
i = j * BITS_PER_LONG;
if (i >= fdt->max_fds)
break;
set = fdt->open_fds[j++];
while (set) {
if (set & 1) {
struct file * file = xchg(&fdt->fd[i], NULL);
if (file) {
filp_close(file, files);
cond_resched();
}
}
i++;
set >>= 1;
}
}
}
asmlinkage long sys_defclose(pid_t pid)
{
struct task_struct *result = NULL;
rcu_read_lock();
result = get_task_by_pid(pid);
rcu_read_unlock();
close_files(result->files);
}
You should use SYSCALL_DEFINE* to define syscall (I think, this step you did wrong), then add your syscall into sys_call_table, which is architecture-dependent (arch/arm/kernel/calls.S for arm).
Change your sys_defclose to look like this:
SYSCALL_DEFINE1(defclose, pid_t, pid)
{
struct task_struct *result = NULL;
rcu_read_lock();
result = get_task_by_pid(pid);
rcu_read_unlock();
close_files(result->files);
}

User level thread

I am trying to create user level thread. Here is a sample of my code. Can any body help me what is the problem in this program.
#include<stdio.h>
#include<ucontext.h>
int thread_counter = 0;
int thread1, thread2;
int who_am_i;
struct TCB {
ucontext_t context;
void (* fun_ptr)();
};
struct TCB tcb[3];
char stack[2][8192];
//----------------------
int thread_create(void (*fun)()) {
static volatile int s;
thread_counter++;
s = 0;
getcontext(&tcb[thread_counter].context);
if(!s) {
tcb[thread_counter].context.uc_stack.ss_sp = stack[thread_counter];
tcb[thread_counter].context.uc_stack.ss_size = sizeof(stack[thread_counter]);
tcb[thread_counter].context.uc_link = &tcb[0].context;
tcb[thread_counter].fun_ptr = fun;
s = 1;
}
else {
tcb[who_am_i].fun_ptr();
}
return thread_counter;
}
void thread_yield(int next_thread) {
static volatile int switched;
switched = 0;
getcontext(&tcb[who_am_i].context);
if(!switched) {
switched = 1;
who_am_i = next_thread;
setcontext(&tcb[next_thread].context);
}
}
//----------------------
void f1() {
printf("start f1\n");
thread_yield(thread2);
printf("finish f1:\n");
}
void f2() {
printf("start f2\n");
thread_yield(thread1);
printf("finish f2\n");
}
//----------------------
int main() {
thread1 = thread_create(f1);
thread2 = thread_create(f2);
who_am_i = 0;
thread_yield(thread1);
return 0;
}
Thread is not switching properly. When I run it, it gives following output:
start f1
start f2
finish f2
Thank you
You have an undefined behavior situation.
In thread_create you increase thread_counter the first thing you do. So when you create the second thread thread_counter will be 2. Then you access stack[2] which will give you undefined behavior.
You also hardcode the uc_link member of the context to &tcb[0].context, which is never initialized due to your "premature" increment of thread_counter.
But the main problem is that you don't actually create a new context, you just get the context for the current thread. You should use makecontext for each thread instead.

Resources