I have created a small project (https://github.com/NHAS/wag) that uses XDP & eBPF to allow connections based on time over a wireguard VPN.
I have attached the XDP eBPF program to the wireguard TUN device, and am experiencing poor throughput (speedtest of down ~20 Mbps wireguard + eBPF, vs wireguard - eBPF ~100 Mbps). Additionally, pings to the wireguard server itself have inconsistent latency, and are dropped at a rate of 1 ICMP packet/~600 pings.
Please note that this occurs during unloaded periods. Where traffic will be less than 100 Mbps total.
The code below is loaded into the kernel with cilium.
// Kernel load
...
xdpLink, err = link.AttachXDP(link.XDPOptions{
Program: xdpObjects.XdpProgFunc,
Interface: iface.Index,
})
...
eBPF kernel:
// +build ignore
#include "bpf_endian.h"
#include "common.h"
char __license[] SEC("license") = "Dual MIT/GPL";
// One /24
#define MAX_MAP_ENTRIES 256
// Inner map is a LPM tri, so we use this as the key
struct ip4_trie_key
{
__u32 prefixlen; // first member must be u32
__u32 addr; // rest can are arbitrary
};
// Map of users (ipv4) to BOOTTIME uint64 timestamp denoting authorization status
struct bpf_map_def SEC("maps") sessions = {
.type = BPF_MAP_TYPE_HASH,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.map_flags = 0,
};
// Map of users (ipv4) to BOOTTIME uint64 timestamp denoting when the last packet was recieved
struct bpf_map_def SEC("maps") last_packet_time = {
.type = BPF_MAP_TYPE_HASH,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.map_flags = 0,
};
// A single variable in nano seconds
struct bpf_map_def SEC("maps") inactivity_timeout_minutes = {
.type = BPF_MAP_TYPE_ARRAY,
.max_entries = 1,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.map_flags = 0,
};
// Two tables of the same construction
// IP to LPM trie
struct bpf_map_def SEC("maps") mfa_table = {
.type = BPF_MAP_TYPE_HASH_OF_MAPS,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.map_flags = 0,
};
struct bpf_map_def SEC("maps") public_table = {
.type = BPF_MAP_TYPE_HASH_OF_MAPS,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.map_flags = 0,
};
/*
Attempt to parse the IPv4 source address from the packet.
Returns 0 if there is no IPv4 header field; otherwise returns non-zero.
*/
static int parse_ip_src_dst_addr(struct xdp_md *ctx, __u32 *ip_src_addr, __u32 *ip_dst_addr)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
// As this is being attached to a wireguard interface (tun device), we dont get layer 2 frames
// Just happy little ip packets
// Then parse the IP header.
struct iphdr *ip = data;
if ((void *)(ip + 1) > data_end)
{
return 0;
}
// We dont support ipv6
if (ip->version != 4)
{
return 0;
}
// Return the source IP address in network byte order.
*ip_src_addr = (__u32)(ip->saddr);
*ip_dst_addr = (__u32)(ip->daddr);
return 1;
}
static int conntrack(__u32 *src_ip, __u32 *dst_ip)
{
// Max lifetime of the session.
__u64 *session_expiry = bpf_map_lookup_elem(&sessions, src_ip);
if (!session_expiry)
{
return 0;
}
// The most recent time a valid packet was received from our a user src_ip
__u64 *lastpacket = bpf_map_lookup_elem(&last_packet_time, src_ip);
if (!lastpacket)
{
return 0;
}
// Our userland defined inactivity timeout
u32 index = 0;
__u64 *inactivity_timeout = bpf_map_lookup_elem(&inactivity_timeout_minutes, &index);
if (!inactivity_timeout)
{
return 0;
}
__u64 currentTime = bpf_ktime_get_boot_ns();
// The inner map must be a LPM trie
struct ip4_trie_key key = {
.prefixlen = 32,
.addr = *dst_ip,
};
// If the inactivity timeout is not disabled and users session has timed out
u8 isTimedOut = (*inactivity_timeout != __UINT64_MAX__ && ((currentTime - *lastpacket) >= *inactivity_timeout));
if (isTimedOut)
{
u64 locked = 0;
bpf_map_update_elem(&sessions, src_ip, &locked, BPF_EXIST);
}
// Order of preference is MFA -> Public, just in case someone adds multiple entries for the same route to make sure accidental exposure is less likely
// If the key is a match for the LPM in the public table
void *user_restricted_routes = bpf_map_lookup_elem(&mfa_table, src_ip);
if (user_restricted_routes)
{
if (bpf_map_lookup_elem(user_restricted_routes, &key) &&
// 0 indicates invalid session
*session_expiry != 0 &&
// If max session lifetime is disabled, or we are before the max lifetime of the session
(*session_expiry == __UINT64_MAX__ || *session_expiry > currentTime) &&
!isTimedOut)
{
// Doesnt matter if the value is not atomically set
*lastpacket = currentTime;
return 1;
}
}
void *user_public_routes = bpf_map_lookup_elem(&public_table, src_ip);
if (user_public_routes && bpf_map_lookup_elem(user_public_routes, &key))
{
// Only update the lastpacket time if we're not expired
if (!isTimedOut)
{
*lastpacket = currentTime;
}
return 1;
}
return 0;
}
SEC("xdp")
int xdp_prog_func(struct xdp_md *ctx)
{
__u32 src_ip, dst_ip;
if (!parse_ip_src_dst_addr(ctx, &src_ip, &dst_ip))
{
return XDP_DROP;
}
if (conntrack(&src_ip, &dst_ip) || conntrack(&dst_ip, &src_ip))
{
return XDP_PASS;
}
return XDP_DROP;
}
The questions I'm looking to answer are:
How do I profile which areas (if any) of the eBPF program are intensive?
Is this a processing time limit for XDP, or an optimal time to keep in mind?
Is my eBPF program sane?
Thanks.
For the BPF XDP hook, the most common sources of huge per-packet overhead are:
JIT compiler is disabled. You can check the value of /proc/sys/net/core/bpf_jit_enable for that.
The driver doesn't support XDP. You need to check this for the specific driver and kernel versions you are using.
As discussed in comments, you're in the second case. Your program is attached to the TUN device which doesn't support the XDP driver mode. That means your BPF program runs after the skb allocation and performance won't be much better than at the tc hook.
Related
Short Version
I want to write a Linux Driver for a custom USB device. Before writing the driver I used libusb-1.0 to test the device. With the following function call, I could read out a uin16_t value from the device:
status = libusb_control_transfer(handle, /* Device Handle */
0x80, /* bRequestType */
0x10, /* bRequest */
value, /* wValue */
0x0, /* wIndex */
((uint8_t *) &value), /* data */
2, /* wLength */
100); /* timeout */
After this call, I got a new value in the value variable.
Now I want to accomplish the same call in my Driver. I have tried the following in the probe function of my USB driver:
status = usb_control_msg(data->udev, usb_rcvctrlpipe(data->udev, 0), 0x10, USB_DIR_IN, 0, 0, (u8*) &my_data, 2, 100);
All I get is the return value -11 and on my device I don't see anything.
The only thing I am doing before this call, is calling data->udev = interface_to_usbdev(intf); to get the USB device from my interface.
Does anyone know, if I am missing something or if I am doing something wrong?
Long version
I want to learn how to write USB Drivers in Linux. As a DUT for which I can write a driver, I choose a Raspberry Pi Pico and the dev_lowlevel USB example. I adapt the code a little bit, so I can use a control transfer with bRequest 0x10 and bRequestType 0x0 (USB_DIR_OUT) to turn the Pico's onboard LED on or off and a control transfer with bRequest 0x10 and bRequestType 0x80 (USB_DIR_IN) to read back the current value of the LED.
With a user space program and the following code I can read out the value of the LED and turn it on or off:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <libusb-1.0/libusb.h>
#define VID 0x0000
#define DID 0x0001
int main(int argc, char **argv) {
int status, len;
libusb_device_handle *handle = NULL;
/* Init Libusb */
status = libusb_init(NULL);
if(status < 0) {
printf("Error init USB!\n");
return status;
}
handle = libusb_open_device_with_vid_pid(NULL, VID, DID);
if(!handle) {
printf("No device found with %04x:%04x\n", VID, DID);
libusb_exit(NULL);
return -1;
}
if(argc > 1)
value = atoi(argv[1]);
else {
/* Do control transfer */
status = libusb_control_transfer(handle, /* Device Handle */
0x80, /* bRequestType */
0x10, /* bRequest */
value, /* wValue */
0x0, /* wIndex */
((uint8_t *) &value), /* data */
2, /* wLength */
100); /* timeout */
if(status < 0) {
printf("Error during control transfer!\n");
libusb_close(handle);
libusb_exit(NULL);
return -1;
}
printf("Got: %d\n", value);
value = (value + 1) & 0x1;
}
/* Do control transfer */
status = libusb_control_transfer(handle, 0x0, 0x10, value, 0x0, NULL, 0, 100);
if(status < 0) {
printf("Error during control transfer!\n");
libusb_close(handle);
libusb_exit(NULL);
return -1;
}
libusb_close(handle);
libusb_exit(NULL);
return 0;
}
Now I want to control my device over a USB Driver. Here is what I got already:
#include <linux/module.h>
#include <linux/init.h>
#include <linux/usb.h>
#include <linux/slab.h>
/* Meta Information */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Johannes 4 GNU/Linux");
MODULE_DESCRIPTION("Driver for my custom RPi Pico USB device");
struct pico_usb {
struct usb_device *udev;
};
#define PICO_VID 0x0000
#define PICO_PID 0x0001
static struct usb_device_id pico_usb_table [] = {
{ USB_DEVICE(PICO_VID, PICO_PID) },
{},
};
MODULE_DEVICE_TABLE(usb, pico_usb_table);
static int pico_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) {
struct pico_usb *data;
int status;
int my_data;
printk("pico_usb_drv - Now I am in the Probe function!\n");
data = kzalloc(sizeof(struct pico_usb), GFP_KERNEL);
if(!data) {
printk("pico_usb_drv - Out of memory\n");
return -ENOMEM;
}
data->udev = interface_to_usbdev(intf);
usb_set_intfdata(intf, data);
/* Turn the LED on */
status = usb_control_msg(data->udev, usb_sndctrlpipe(data->udev, 0), 0x10, USB_DIR_OUT, 1, 0, 0, 0, 100);
/* Read LED state */
printk("pico_usb_drv - status USB_DIR_OUT: %d\n", status);
status = usb_control_msg(data->udev, usb_rcvctrlpipe(data->udev, 0), 0x10, USB_DIR_IN, 0, 0, (u8*) &my_data, 2, 100);
printk("pico_usb_drv - status USB_DIR_IN: %d\n", status);
return 0;
}
static void pico_usb_disconnect(struct usb_interface *intf) {
struct pico_usb *data;
printk("pico_usb_drv - Now I am in the Disconnect function!\n");
data = usb_get_intfdata(intf);
kfree(data);
}
static struct usb_driver pico_usb_driver = {
//.owner = THIS_MODULE,
.name = "pico_usb",
.id_table = pico_usb_table,
.probe = pico_usb_probe,
.disconnect = pico_usb_disconnect,
};
/**
* #brief This function is called, when the module is loaded into the kernel
*/
static int __init pico_usb_init(void) {
int result;
printk("pico_usb_drv - Registering the PICO USB device\n");
result = usb_register(&pico_usb_driver);
if(result) {
printk("pico_usb_drv - Error registering the PICO USB device\n");
return -result;
}
return 0;
}
/**
* #brief This function is called, when the module is removed from the kernel
*/
static void __exit pcio_usb_exit(void) {
printk("pico_usb_drv - Unregistering the PICO USB device\n");
usb_deregister(&pico_usb_driver);
}
module_init(pico_usb_init);
module_exit(pcio_usb_exit);
The first control message works and my LED is turned on. But the second control message doesn't do anything, but gives me the error code -11 back.
Does anyone know, if I am missing something or if I am doing something wrong?
Ok, I found the solution. Instead of usb_control_msg I use usb_control_msg_recv now and everything works just fine.
usb_control_msg_recv takes one more argument:
int usb_control_msg_recv(struct usb_device *dev, __u8 endpoint, __u8 request, __u8 requesttype, __u16 value, __u16 index, void *driver_data, __u16 size, int timeout, gfp_t memflags)
As I pass the pointer to a variable and don't want to allocate memory dynamically, I set the memflags argument to 0.
I want to write a simple test program to receive Ethernet frames using the ibverbs API.
The code below compiles and runs but never receives any packets. I'm using Mellanox ConnectX-3 hardware on Ubuntu 18.
Questions:
If, while running this RX program, I ping the Inifiniband interface from another machine, then ping receives responses. I would not expect that because the ping requests should be grabbed by the RX program and the Linux IP stack should not see them and therefore should not respond. What should happen?
Is there anything obvious wrong with my code?
Do I need a steering rule? If I remove the call of ibv_create_flow() should I just receive all packets the interface sees?
#include <infiniband/verbs.h>
#include <stdio.h>
#include <stdlib.h>
#define PORT_NUM 1
#define MAX_MSG_SIZE 1500 // The maximum size of each received packet.
#define RQ_NUM_DESC 512 // Max packets that can be received without processing.
// The MAC of the interface we are listening on.
#define DEST_MAC { 0x00, 0x0d, 0x3a, 0x47, 0x1c, 0x2e }
#define FATAL_ERROR(msg, ...) { fprintf(stderr, "ERROR: " msg "\n", ##__VA_ARGS__); exit(-1); }
int main() {
// Get the list of devices.
int num_devices = 0;
struct ibv_device **dev_list = ibv_get_device_list(&num_devices);
if (!dev_list)
FATAL_ERROR("Failed to get IB devices list.");
// Choose the first device.
struct ibv_device *ib_dev = dev_list[0];
if (!ib_dev)
FATAL_ERROR("IB device not found.");
printf("Found %i Infiniband device(s).\n", num_devices);
printf("Using device '%s'.\n", ibv_get_device_name(ib_dev));
// Get the device context.
struct ibv_context *context = ibv_open_device(ib_dev);
if (!context)
FATAL_ERROR("Couldn't get context for device.");
// Allocate a protection domain (PD) that will group memory
// regions (MR) and rings.
struct ibv_pd *pd = ibv_alloc_pd(context);
if (!pd)
FATAL_ERROR("Couldn't allocate protection domain.");
// Create Complition Queue (CQ).
struct ibv_cq *cq = ibv_create_cq(context, RQ_NUM_DESC, NULL, NULL, 0);
if (!cq)
FATAL_ERROR("Couldn't create completion queue. errno = %d.", errno);
// Create Queue Pair (QP).
struct ibv_qp_init_attr qp_init_attr = {
.qp_context = NULL,
.send_cq = cq, // Report receive completion to CQ.
.recv_cq = cq,
.cap = {
.max_send_wr = 0, // No send ring.
.max_recv_wr = RQ_NUM_DESC, // Max num packets in ring.
.max_recv_sge = 1, // Only one pointer per descriptor.
},
.qp_type = IBV_QPT_RAW_PACKET, // Use Ethernet packets.
};
struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr);
if (!qp)
FATAL_ERROR("Couldn't create queue pair.");
// Initialize the QP (receive ring) and assign a port.
struct ibv_qp_attr qp_attr = { 0 };
qp_attr.qp_state = IBV_QPS_INIT;
qp_attr.port_num = PORT_NUM;
int qp_flags = IBV_QP_STATE | IBV_QP_PORT;
if (ibv_modify_qp(qp, &qp_attr, qp_flags) < 0)
FATAL_ERROR("Failed to initialize queue pair.");
// Move ring state to ready-to-receive. This is needed in
// order to be able to receive packets.
memset(&qp_attr, 0, sizeof(qp_attr));
qp_flags = IBV_QP_STATE;
qp_attr.qp_state = IBV_QPS_RTR;
if (ibv_modify_qp(qp, &qp_attr, qp_flags) < 0)
FATAL_ERROR("Failed to put queue pair into ready-to-receive state.");
// Allocate memory for packet buffer.
int buf_size = MAX_MSG_SIZE * RQ_NUM_DESC; // Maximum size of data to be accessed by hardware.
void *buf = malloc(buf_size);
if (!buf)
FATAL_ERROR("Couldn't allocate memory.");
// Register the user memory so it can be accessed by the HW directly.
struct ibv_mr *mr = ibv_reg_mr(pd, buf, buf_size, IBV_ACCESS_LOCAL_WRITE);
if (!mr)
FATAL_ERROR("Couldn't register memory region.");
// Create a scatter/gather entry.
struct ibv_sge sg_entry;
sg_entry.length = MAX_MSG_SIZE;
sg_entry.lkey = mr->lkey;
// Create a receive work request.
struct ibv_recv_wr wr;
wr.num_sge = 1;
wr.sg_list = &sg_entry;
wr.next = NULL;
// Post a load of receive work requests onto the receive queue.
struct ibv_recv_wr *bad_wr;
for (int n = 0; n < RQ_NUM_DESC; n++) {
// Each descriptor points to max MTU size buffer.
sg_entry.addr = (uint64_t)buf + MAX_MSG_SIZE * n;
// When a packet is received, a work completion will be created
// corresponding to this work request. It will contain this field.
wr.wr_id = n;
// Post the receive buffer to the ring.
int rv = ibv_post_recv(qp, &wr, &bad_wr);
if (rv != 0) {
FATAL_ERROR("Posting recv failed with error code %i.", rv);
}
}
// Create steering rule.
struct raw_eth_flow_attr {
struct ibv_flow_attr attr;
struct ibv_flow_spec_eth spec_eth;
} __attribute__((packed)) flow_attr = {
.attr = {
.comp_mask = 0,
.type = IBV_FLOW_ATTR_NORMAL,
.size = sizeof(flow_attr),
.priority = 0,
.num_of_specs = 1,
.port = PORT_NUM,
.flags = 0,
},
.spec_eth = {
.type = IBV_FLOW_SPEC_ETH,
.size = sizeof(struct ibv_flow_spec_eth),
.val = {
.dst_mac = DEST_MAC,
.src_mac = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
.ether_type = 0,
.vlan_tag = 0,
},
.mask = {
.dst_mac = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
.src_mac = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
.ether_type = 0,
.vlan_tag = 0,
}
}
};
// Register steering rule to intercept packet to DEST_MAC and place packet in
// ring pointed by qp.
struct ibv_flow *eth_flow = ibv_create_flow(qp, &flow_attr.attr);
if (!eth_flow)
FATAL_ERROR("Couldn't attach steering flow. Does DEST_MAC match that of the local NIC?");
printf("Receiving.\n");
while (1) {
// Wait for CQ event upon message received, and print a message
struct ibv_wc wc;
int msgs_completed = ibv_poll_cq(cq, 1, &wc);
if (msgs_completed > 0) {
printf("Message %ld received size %d\n", wc.wr_id, wc.byte_len);
sg_entry.addr = (uint64_t)buf + wc.wr_id * MAX_MSG_SIZE;
wr.wr_id = wc.wr_id;
// After processed need to post back the buffer.
int rv = ibv_post_recv(qp, &wr, &bad_wr);
if (rv != 0) {
FATAL_ERROR("Re-posting recv failed with error code %i.", rv);
}
}
else if (msgs_completed < 0) {
FATAL_ERROR("Polling error.");
}
}
}
Take a look at this example from Mellanox: https://community.mellanox.com/s/article/raw-ethernet-programming--basic-introduction---code-example
To receive everything the interface sees, you can use the experimental api #include <infiniband/verbs_exp.h>, then when creating the steering rule, use ibv_exp_flow_attr and set the type to IBV_EXP_FLOW_ATTR_SNIFFER.
Please refer to https://github.com/Mellanox/libvma/wiki/Architecture
VMA implements native RDMA verbs API. The native RDMA verbs have been extended into the Ethernet RDMA-capable NICs, enabling the packets to pass directly between the user application and the InfiniBand HCA or Ethernet NIC, bypassing the kernel and its TCP/UDP handling network stack.
We are missing interrupts in an linux embedded system having multi-core running at 1.25GHz.
Background:
kernel version: 2.6.32.27
we have user space processes which need real time performance.
They operate in a 1ms boundary.
That is to say within 1ms they are expected to complete a set of task, which at the max may take about 800uS.
We have a external component FPGA which provides, 1ms and 10ms interrupts to the multi-core processor through GPIO pins configured as edge triggerred interrupts.
These interrupts are handled in kernel driver.
The software architecture is in such a way that the user process, after completing its work will do an ioctl to the GPIO driver.
In this ioctl the driver will put the process to wakeup_interruptible state.
Whenever the next 1ms interrupt is received, the ISR will wakeup the process. This cycle repeats.
Both the 1ms and 10ms interrupts are routed to a single core of the processor using smp_affinity.
Problem:
Sometimes we find that some interrupts are missed.
(ie ISR itself doesnt get invoked).
After 12 to 20ms ISR's are hit normally.
This we are able to understand by profiling the duration between consecutive ISR calls, and having counters incremented first thing in the ISR.
This mostly happens during high system load at process level, and is random and hard to reproduce.
I have attached the skeletal code.
First I have to isolate whether it is a hardware or software problem. As it is a FPGA which is giving the interrupts, we dont have much doubt on the hardware.
Is this kernel freezing? It is the most likely case since the cpu cycles are incrementing.
Can it be a case of cpu freeze due to thermal conditions? If so, then the cpu cycles wouldn't have incremented in first place.
Any pointers to debug/isolate the root cause will be of great help
considering the kernel version we are working on and the profiling/debugging
facilities available in this kernel version.
skeletal code:
/* Build time Configuration */
/* Macros */
DECLARE_WAIT_QUEUE_HEAD(wait);
/** Structure Definitions */
/** Global Variables */
gpio_dev_t gpio1msDev, gpio10msDev;
GpioIntProfileSectorData_t GpioSigProfileData[MAX_GPIO_INT_CONSUMERS];
GpioIntProfileSectorData_t *ProfilePtrSector;
GpioIntProfileData_t GpioProfileData;
GpioIntProfileData_t *GpioIntProfilePtr;
CurrentTickProfile_t TimeStamp;
uint64_t ModuleInitDone = 0, FirstTimePIDWrite = 0;
uint64_t PrevCycle = 0, NowCycle = 0;
volatile uint64_t TenMsFlag, OneMsFlag;
uint64_t OneMsCounter;
uint64_t OneMsIsrTime, TenMsIsrTime;
uint64_t OneMsCounter, OneMsTime, TenMsTime, SyncStarted;
uint64_t Prev = 0, Now = 0, DiffTen = 0, DiffOne, SesSyncHappened;
static spinlock_t GpioSyncLock = SPIN_LOCK_UNLOCKED;
static spinlock_t IoctlSyncLock = SPIN_LOCK_UNLOCKED;
uint64_t EventPresent[MAX_GPIO_INT_CONSUMERS];
GpioEvent_t CurrentEvent = KERN_NO_EVENT;
TickSyncSes_t *SyncSesPtr = NULL;
/** Function Declarations */
ssize_t write_pid(struct file *filep, const char __user * buf, size_t count, loff_t * ppos);
long Gpio_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
static const struct file_operations my_fops = {
write:write_pid,
compat_ioctl:Gpio_compat_ioctl,
};
/**
* IOCTL function for GPIO interrupt module
*
* #return
*/
long Gpio_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
int len = 1, status = 0;
uint8_t Instance;
uint64_t *EventPtr;
GpioIntProfileSectorData_t *SectorProfilePtr, *DebugProfilePtr;
GpioEvent_t EventToGive = KERN_NO_EVENT;
pid_t CurrentPid = current->pid;
spin_lock(&IoctlSyncLock); // Take the spinlock
Instance = GetSector(CurrentPid);
SectorProfilePtr = &GpioSigProfileData[Instance];
EventPtr = &EventPresent[Instance];
spin_unlock(&IoctlSyncLock);
if (Instance <= MAX_GPIO_INT_CONSUMERS)
{
switch (cmd)
{
case IOCTL_WAIT_ON_EVENT:
if (*EventPtr)
{
/* Dont block here since this is a case where interrupt has happened
* before process calling the polling API */
*EventPtr = 0;
/* some profiling code */
}
else
{
status = wait_event_interruptible(wait, (*EventPtr == 1));
*EventPtr = 0;
}
/* profiling code */
TimeStamp.CurrentEvent = EventToGive;
len = copy_to_user((char *)arg, (char *)&TimeStamp, sizeof(CurrentTickProfile_t));
break;
default:
break;
}
}
else
{
return -EINVAL;
}
return 0;
}
/**
* Send signals to registered PID's.
*
* #return
*/
static void WakeupWaitQueue(GpioEvent_t Event)
{
int i;
/* some profile code */
CurrentEvent = Event;
// we dont wake up debug app hence "< MAX_GPIO_INT_CONSUMERS" is used in for loop
for (i = 0; i < MAX_GPIO_INT_CONSUMERS; i++)
{
EventPresent[i] = 1;
}
wake_up_interruptible(&wait);
}
/**
* 1ms Interrupt handler
*
* #return
*/
static irqreturn_t gpio_int_handler_1ms(int irq, void *irq_arg)
{
uint64_t reg_read, my_core_num;
unsigned long flags;
GpioEvent_t event = KERN_NO_EVENT;
/* code to clear the interrupt registers */
/************ profiling start************/
NowCycle = get_cpu_cycle();
GpioIntProfilePtr->TotalOneMsInterrupts++;
/* Check the max diff between consecutive interrupts */
if (PrevCycle)
{
DiffOne = NowCycle - PrevCycle;
if (DiffOne > GpioIntProfilePtr->OneMsMaxDiff)
GpioIntProfilePtr->OneMsMaxDiff = DiffOne;
}
PrevCycle = NowCycle;
TimeStamp.OneMsCount++; /* increment the counter */
/* Store the timestamp */
GpioIntProfilePtr->Gpio1msTimeStamp[GpioIntProfilePtr->IndexOne] = NowCycle;
TimeStamp.OneMsTimeStampAtIsr = NowCycle;
GpioIntProfilePtr->IndexOne++;
if (GpioIntProfilePtr->IndexOne == GPIO_PROFILE_ARRAY_SIZE)
GpioIntProfilePtr->IndexOne = 0;
/************ profiling end************/
/*
* Whenever 10ms Interrupt happens we send only one event to the upper layers.
* Hence it is necessary to sync between 1 & 10ms interrupts.
* There is a chance that sometimes 1ms can happen at first and sometimes 10ms.
*
*/
/******** Sync mechanism ***********/
spin_lock_irqsave(&GpioSyncLock, flags); // Take the spinlock
OneMsCounter++;
OneMsTime = NowCycle;
DiffOne = OneMsTime - TenMsTime;
if (DiffOne < MAX_OFFSET_BETWEEN_1_AND_10MS) //ten ms has happened first
{
if (OneMsCounter == 10)
{
event = KERN_BOTH_EVENT;
SyncStarted = 1;
}
else
{
if (SyncStarted)
{
if (OneMsCounter < 10)
{
GpioIntProfilePtr->TickSyncErrAt1msLess++;
}
else if (OneMsCounter > 10)
{
GpioIntProfilePtr->TickSyncErrAt1msMore++;
}
}
}
OneMsCounter = 0;
}
else
{
if (OneMsCounter < 10)
{
if (SyncStarted)
{
event = KERN_ONE_MS_EVENT;
}
}
else if (OneMsCounter > 10)
{
OneMsCounter = 0;
if (SyncStarted)
{
GpioIntProfilePtr->TickSyncErrAt1msMore++;
}
}
}
TimeStamp.SFN = OneMsCounter;
spin_unlock_irqrestore(&GpioSyncLock, flags);
/******** Sync mechanism ***********/
if(event != KERN_NO_EVENT)
WakeupWaitQueue(event);
OneMsIsrTime = get_cpu_cycle() - NowCycle;
if (GpioIntProfilePtr->Max1msIsrTime < OneMsIsrTime)
GpioIntProfilePtr->Max1msIsrTime = OneMsIsrTime;
return IRQ_HANDLED;
}
/**
* 10ms Interrupt handler
*
* #return
*/
static irqreturn_t gpio_int_handler_10ms(int irq, void *irq_arg)
{
uint64_t reg_read, my_core_num;
unsigned long flags;
GpioEvent_t event = KERN_NO_EVENT;
/* clear the interrupt */
/************ profiling start************/
GpioIntProfilePtr->TotalTenMsInterrupts++;
Now = get_cpu_cycle();
if (Prev)
{
DiffTen = Now - Prev;
if (DiffTen > GpioIntProfilePtr->TenMsMaxDiff)
GpioIntProfilePtr->TenMsMaxDiff = DiffTen;
}
Prev = Now;
TimeStamp.OneMsCount++; /* increment the counter */
TimeStamp.TenMsCount++;
GpioIntProfilePtr->Gpio10msTimeStamp[GpioIntProfilePtr->IndexTen] = Now;
TimeStamp.TenMsTimeStampAtIsr = Now;
//do_gettimeofday(&TimeOfDayAtIsr.TimeAt10MsIsr);
GpioIntProfilePtr->IndexTen++;
if (GpioIntProfilePtr->IndexTen == GPIO_PROFILE_ARRAY_SIZE)
GpioIntProfilePtr->IndexTen = 0;
/************ profiling end************/
/******** Sync mechanism ***********/
spin_lock_irqsave(&GpioSyncLock, flags);
TenMsTime = Now;
DiffTen = TenMsTime - OneMsTime;
if (DiffTen < MAX_OFFSET_BETWEEN_1_AND_10MS) //one ms has happened first
{
if (OneMsCounter == 10)
{
TimeStamp.OneMsTimeStampAtIsr = Now;
event = KERN_BOTH_EVENT;
SyncStarted = 1;
}
OneMsCounter = 0;
}
else
{
if (SyncStarted)
{
if (OneMsCounter < 9)
{
GpioIntProfilePtr->TickSyncErrAt10msLess++;
OneMsCounter = 0;
}
else if (OneMsCounter > 9)
{
GpioIntProfilePtr->TickSyncErrAt10msMore++;
OneMsCounter = 0;
}
}
else
{
if (OneMsCounter != 9)
OneMsCounter = 0;
}
}
TimeStamp.SFN = OneMsCounter;
spin_unlock_irqrestore(&GpioSyncLock, flags);
/******** Sync mechanism ***********/
if(event != KERN_NO_EVENT)
WakeupWaitQueue(event);
TenMsIsrTime = get_cpu_cycle() - Now;
if (GpioIntProfilePtr->Max10msIsrTime < TenMsIsrTime)
GpioIntProfilePtr->Max10msIsrTime = TenMsIsrTime;
return IRQ_HANDLED;
}
Reseting EventPresent after waiting the event in wait_event_interruptible()
EventPtr = &EventPresent[Instance];
...
status = wait_event_interruptible(wait, (*EventPtr == 1));
*EventPtr = 0;
looks suspicious.
If WakeupWaitQueue() will be executed concurrently, then setting of the event
for (i = 0; i < MAX_GPIO_INT_CONSUMERS; i++)
{
EventPresent[i] = 1;
}
wake_up_interruptible(&wait);
will be lost.
It is better to have two independent counters for raised events and for processed events:
uint64_t EventPresent[MAX_GPIO_INT_CONSUMERS]; // Number if raised events
uint64_t EventProcessed[MAX_GPIO_INT_CONSUMERS]; // Number of processed events
In that case condition could be a comparision of these counters:
Gpio_compat_ioctl()
{
...
EventPresentPtr = &EventPresent[Instance];
EventProcessedPtr = &EventProcessed[Instance];
...
status = wait_event_interruptible(wait, (*EventPresentPtr != *EventProcessedPtr));
(*EventProcessedPtr)++;
...
}
WakeupWaitQueue()
{
...
for (i = 0; i < MAX_GPIO_INT_CONSUMERS; i++)
{
EventPresent[i]++;
}
wake_up_interruptible(&wait);
}
This was not a kernel freeze.
We had a free core in the system which was running baremetal. We routed the 1ms interrupts to this baremetal core as well. When the issue occurs, we compared with the baremetal core profile info. In baremetal core ISRs were hit properly linear to the time elapsed.
By this we ruled out that there are not HW issues or thermal issues.
Next on close look of the code, we started suspecting whether spinlock is causing to miss the interrupts. To experiment, we changed the logic to run the ISRs without spinlock. Now we see that there are not missed interrupts.
So the issues seems solved, however with spinlock present also the system was working properly under normal load conditions. This issue arises only during very high CPU load. This is something for which i dont have an answer though ie only during high load condition, why calling spinlock makes the other interrupt to be missed.
I am trying to create a file with FatFs on USB flash, but my f_open call trying to read boot sector for first time file system mount hangs on this function.
DRESULT disk_read (
BYTE drv, /* Physical drive number (0) */
BYTE *buff, /* Pointer to the data buffer to store read data */
DWORD sector, /* Start sector number (LBA) */
BYTE count /* Sector count (1..255) */
)
{
BYTE status = USBH_MSC_OK;
if (drv || !count) return RES_PARERR;
if (Stat & STA_NOINIT) return RES_NOTRDY;
if(HCD_IsDeviceConnected(&USB_OTG_Core))
{
do
{
status = USBH_MSC_Read10(&USB_OTG_Core, buff,sector,512 * count);
USBH_MSC_HandleBOTXfer(&USB_OTG_Core ,&USB_Host);
if(!HCD_IsDeviceConnected(&USB_OTG_Core))
{
return RES_ERROR;
}
}
while(status == USBH_MSC_BUSY ); // Loop which create hanging state
}
if(status == USBH_MSC_OK)
return RES_OK;
return RES_ERROR;
}
The main problem is the loop which creates hanging state
while(status == USBH_MSC_BUSY );
So I do not know what to do to avoid this. Using debugger I discover that state is caused by parameter CmdStateMachine of structure USBH_MSC_BOTXferParam, type USBH_BOTXfer_TypeDef is equal CMD_UNINITIALIZED_STATE which actually cause miss up of switch statement of USBH_MSC_Read10 function.
/**
* #brief USBH_MSC_Read10
* Issue the read command to the device. Once the response received,
* it updates the status to upper layer
* #param dataBuffer : DataBuffer will contain the data to be read
* #param address : Address from which the data will be read
* #param nbOfbytes : NbOfbytes to be read
* #retval Status
*/
uint8_t USBH_MSC_Read10(USB_OTG_CORE_HANDLE *pdev,
uint8_t *dataBuffer,
uint32_t address,
uint32_t nbOfbytes)
{
uint8_t index;
static USBH_MSC_Status_TypeDef status = USBH_MSC_BUSY;
uint16_t nbOfPages;
status = USBH_MSC_BUSY;
if(HCD_IsDeviceConnected(pdev))
{
switch(USBH_MSC_BOTXferParam.CmdStateMachine)
{
case CMD_SEND_STATE:
/*Prepare the CBW and relevant field*/
USBH_MSC_CBWData.field.CBWTransferLength = nbOfbytes;
USBH_MSC_CBWData.field.CBWFlags = USB_EP_DIR_IN;
USBH_MSC_CBWData.field.CBWLength = CBW_LENGTH;
USBH_MSC_BOTXferParam.pRxTxBuff = dataBuffer;
for(index = CBW_CB_LENGTH; index != 0; index--)
{
USBH_MSC_CBWData.field.CBWCB[index] = 0x00;
}
USBH_MSC_CBWData.field.CBWCB[0] = OPCODE_READ10;
/*logical block address*/
USBH_MSC_CBWData.field.CBWCB[2] = (((uint8_t*)&address)[3]);
USBH_MSC_CBWData.field.CBWCB[3] = (((uint8_t*)&address)[2]);
USBH_MSC_CBWData.field.CBWCB[4] = (((uint8_t*)&address)[1]);
USBH_MSC_CBWData.field.CBWCB[5] = (((uint8_t*)&address)[0]);
/*USBH_MSC_PAGE_LENGTH = 512*/
nbOfPages = nbOfbytes/ USBH_MSC_PAGE_LENGTH;
/*Tranfer length */
USBH_MSC_CBWData.field.CBWCB[7] = (((uint8_t *)&nbOfPages)[1]) ;
USBH_MSC_CBWData.field.CBWCB[8] = (((uint8_t *)&nbOfPages)[0]) ;
USBH_MSC_BOTXferParam.BOTState = USBH_MSC_SEND_CBW;
/* Start the transfer, then let the state machine
manage the other transactions */
USBH_MSC_BOTXferParam.MSCState = USBH_MSC_BOT_USB_TRANSFERS;
USBH_MSC_BOTXferParam.BOTXferStatus = USBH_MSC_BUSY;
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_WAIT_STATUS;
status = USBH_MSC_BUSY;
break;
case CMD_WAIT_STATUS:
if((USBH_MSC_BOTXferParam.BOTXferStatus == USBH_MSC_OK) && \
(HCD_IsDeviceConnected(pdev)))
{
/* Commands successfully sent and Response Received */
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_SEND_STATE;
status = USBH_MSC_OK;
}
else if (( USBH_MSC_BOTXferParam.BOTXferStatus == USBH_MSC_FAIL ) && \
(HCD_IsDeviceConnected(pdev)))
{
/* Failure Mode */
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_SEND_STATE;
}
else if ( USBH_MSC_BOTXferParam.BOTXferStatus == USBH_MSC_PHASE_ERROR )
{
/* Failure Mode */
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_SEND_STATE;
status = USBH_MSC_PHASE_ERROR;
}
else
{
/* Wait for the Commands to get Completed */
/* NO Change in state Machine */
}
break;
default:
break;
}
}
return status;
}
Here is USBH_BOTXfer_TypeDef type declaration;
typedef struct _BOTXfer
{
uint8_t MSCState;
uint8_t MSCStateBkp;
uint8_t MSCStateCurrent;
uint8_t CmdStateMachine;
uint8_t BOTState;
uint8_t BOTStateBkp;
uint8_t* pRxTxBuff;
uint16_t DataLength;
uint8_t BOTXferErrorCount;
uint8_t BOTXferStatus;
} USBH_BOTXfer_TypeDef;
During the debug I discover that all fields of it is 0x00.
Here are my FatFs calls
int main(void)
{
FATFS Fat;
FIL file;
FRESULT fr;
RCC->AHB1ENR |= RCC_AHB1ENR_GPIODEN;
/* Enable SWO output */
DBGMCU->CR = 0x00000020;
GPIOD->MODER=0x55000000;
GPIOD->OTYPER = 0x00000000;
GPIOD->OSPEEDR = 0x00000001;
while(1)
{
if (!USB_MSC_IsInitialized())
{
USB_MSC_Initialize();
}
if (USB_MSC_IsConnected())
{
GPIOD->ODR = (1 << 15);
disk_initialize(0);
fr = f_mount(0, &Fat);
if(fr == FR_OK)
{
fr = f_open(&file,"0:DP_lab8.pdf",(FA_CREATE_ALWAYS | FA_WRITE));
if (fr == FR_OK)
{
f_close(&file);
}
f_mount(0, NULL);
}
}
else
{
GPIOD->ODR = (1 << 14);
}
USB_MSC_Main();
}
}
USB_MSC_IsConnected function is:
int USB_MSC_IsConnected(void)
{
if (g_USB_MSC_HostStatus == USB_DEV_NOT_SUPPORTED)
{
USB_MSC_Uninitialize();
}
return !(g_USB_MSC_HostStatus == USB_DEV_DETACHED ||
g_USB_MSC_HostStatus == USB_HOST_NO_INIT ||
g_USB_MSC_HostStatus == USB_DEV_NOT_SUPPORTED);
}
And device states are:
typedef enum
{
USB_HOST_NO_INIT = 0, /* USB interface not initialized */
USB_DEV_DETACHED, /* no device connected */
USB_SPEED_ERROR, /* unsupported USB speed */
USB_DEV_NOT_SUPPORTED, /* unsupported device */
USB_DEV_WRITE_PROTECT, /* device is write protected */
USB_OVER_CURRENT, /* overcurrent detected */
USB_DEV_CONNECTED /* device connected and ready */
} USB_HostStatus;
The value of g_USB_MSC_HostStatus is received by standard USB HOST user callbacks.
I think it is a bug in ST host library. I've hunted it down, as my usb host was unable to pass enumeration stage. After a fix the stack is Ok.
There is union _USB_Setup in usbh_def.h file in "STM32Cube/Repository/STM32Cube_FW_F7_V1.13.0/Middlewares/ST/STM32_USB_Host_Library/Core/Inc" (any chip, not only F7, any version, not only V1.13.0). It has uint16_t bmRequestType and uint16_t bRequest. These two fileds must be uint8_t as of USB specs. Fixing this issue made usb host go as needed. Enumeration stage passes ok, and all other stages as well.
I modified device tree file and enable spi using 4 GPIO pins, which support pinmux and switch from gpio to spi function.
But in Linux kernel code, how does the code know which spi bus/pins is used?
For example, I find a Linux kernel driver: max1111.c, which drives a spi ADC chip. But I checked its code, and don't find where the spi bus/pins is specified.
I paste max1111.c below.
/*
* max1111.c - +2.7V, Low-Power, Multichannel, Serial 8-bit ADCs
*
* Based on arch/arm/mach-pxa/corgi_ssp.c
*
* Copyright (C) 2004-2005 Richard Purdie
*
* Copyright (C) 2008 Marvell International Ltd.
* Eric Miao <eric.miao#marvell.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* publishhed by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/spi/spi.h>
#include <linux/slab.h>
enum chips { max1110, max1111, max1112, max1113 };
#define MAX1111_TX_BUF_SIZE 1
#define MAX1111_RX_BUF_SIZE 2
/* MAX1111 Commands */
#define MAX1111_CTRL_PD0 (1u << 0)
#define MAX1111_CTRL_PD1 (1u << 1)
#define MAX1111_CTRL_SGL (1u << 2)
#define MAX1111_CTRL_UNI (1u << 3)
#define MAX1110_CTRL_SEL_SH (4)
#define MAX1111_CTRL_SEL_SH (5) /* NOTE: bit 4 is ignored */
#define MAX1111_CTRL_STR (1u << 7)
struct max1111_data {
struct spi_device *spi;
struct device *hwmon_dev;
struct spi_message msg;
struct spi_transfer xfer[2];
uint8_t tx_buf[MAX1111_TX_BUF_SIZE];
uint8_t rx_buf[MAX1111_RX_BUF_SIZE];
struct mutex drvdata_lock;
/* protect msg, xfer and buffers from multiple access */
int sel_sh;
int lsb;
};
static int max1111_read(struct device *dev, int channel)
{
struct max1111_data *data = dev_get_drvdata(dev);
uint8_t v1, v2;
int err;
/* writing to drvdata struct is not thread safe, wait on mutex */
mutex_lock(&data->drvdata_lock);
data->tx_buf[0] = (channel << data->sel_sh) |
MAX1111_CTRL_PD0 | MAX1111_CTRL_PD1 |
MAX1111_CTRL_SGL | MAX1111_CTRL_UNI | MAX1111_CTRL_STR;
err = spi_sync(data->spi, &data->msg);
if (err < 0) {
dev_err(dev, "spi_sync failed with %d\n", err);
mutex_unlock(&data->drvdata_lock);
return err;
}
v1 = data->rx_buf[0];
v2 = data->rx_buf[1];
mutex_unlock(&data->drvdata_lock);
if ((v1 & 0xc0) || (v2 & 0x3f))
return -EINVAL;
return (v1 << 2) | (v2 >> 6);
}
#ifdef CONFIG_SHARPSL_PM
static struct max1111_data *the_max1111;
int max1111_read_channel(int channel)
{
return max1111_read(&the_max1111->spi->dev, channel);
}
EXPORT_SYMBOL(max1111_read_channel);
#endif
/*
* NOTE: SPI devices do not have a default 'name' attribute, which is
* likely to be used by hwmon applications to distinguish between
* different devices, explicitly add a name attribute here.
*/
static ssize_t show_name(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_spi_device(dev)->modalias);
}
static ssize_t show_adc(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct max1111_data *data = dev_get_drvdata(dev);
int channel = to_sensor_dev_attr(attr)->index;
int ret;
ret = max1111_read(dev, channel);
if (ret < 0)
return ret;
/*
* Assume the reference voltage to be 2.048V or 4.096V, with an 8-bit
* sample. The LSB weight is 8mV or 16mV depending on the chip type.
*/
return sprintf(buf, "%d\n", ret * data->lsb);
}
#define MAX1111_ADC_ATTR(_id) \
SENSOR_DEVICE_ATTR(in##_id##_input, S_IRUGO, show_adc, NULL, _id)
static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
static MAX1111_ADC_ATTR(0);
static MAX1111_ADC_ATTR(1);
static MAX1111_ADC_ATTR(2);
static MAX1111_ADC_ATTR(3);
static MAX1111_ADC_ATTR(4);
static MAX1111_ADC_ATTR(5);
static MAX1111_ADC_ATTR(6);
static MAX1111_ADC_ATTR(7);
static struct attribute *max1111_attributes[] = {
&dev_attr_name.attr,
&sensor_dev_attr_in0_input.dev_attr.attr,
&sensor_dev_attr_in1_input.dev_attr.attr,
&sensor_dev_attr_in2_input.dev_attr.attr,
&sensor_dev_attr_in3_input.dev_attr.attr,
NULL,
};
static const struct attribute_group max1111_attr_group = {
.attrs = max1111_attributes,
};
static struct attribute *max1110_attributes[] = {
&sensor_dev_attr_in4_input.dev_attr.attr,
&sensor_dev_attr_in5_input.dev_attr.attr,
&sensor_dev_attr_in6_input.dev_attr.attr,
&sensor_dev_attr_in7_input.dev_attr.attr,
NULL,
};
static const struct attribute_group max1110_attr_group = {
.attrs = max1110_attributes,
};
static int setup_transfer(struct max1111_data *data)
{
struct spi_message *m;
struct spi_transfer *x;
m = &data->msg;
x = &data->xfer[0];
spi_message_init(m);
x->tx_buf = &data->tx_buf[0];
x->len = MAX1111_TX_BUF_SIZE;
spi_message_add_tail(x, m);
x++;
x->rx_buf = &data->rx_buf[0];
x->len = MAX1111_RX_BUF_SIZE;
spi_message_add_tail(x, m);
return 0;
}
static int max1111_probe(struct spi_device *spi)
{
enum chips chip = spi_get_device_id(spi)->driver_data;
struct max1111_data *data;
int err;
spi->bits_per_word = 8;
spi->mode = SPI_MODE_0;
err = spi_setup(spi);
if (err < 0)
return err;
data = devm_kzalloc(&spi->dev, sizeof(struct max1111_data), GFP_KERNEL);
if (data == NULL) {
dev_err(&spi->dev, "failed to allocate memory\n");
return -ENOMEM;
}
switch (chip) {
case max1110:
data->lsb = 8;
data->sel_sh = MAX1110_CTRL_SEL_SH;
break;
case max1111:
data->lsb = 8;
data->sel_sh = MAX1111_CTRL_SEL_SH;
break;
case max1112:
data->lsb = 16;
data->sel_sh = MAX1110_CTRL_SEL_SH;
break;
case max1113:
data->lsb = 16;
data->sel_sh = MAX1111_CTRL_SEL_SH;
break;
}
err = setup_transfer(data);
if (err)
return err;
mutex_init(&data->drvdata_lock);
data->spi = spi;
spi_set_drvdata(spi, data);
err = sysfs_create_group(&spi->dev.kobj, &max1111_attr_group);
if (err) {
dev_err(&spi->dev, "failed to create attribute group\n");
return err;
}
if (chip == max1110 || chip == max1112) {
err = sysfs_create_group(&spi->dev.kobj, &max1110_attr_group);
if (err) {
dev_err(&spi->dev,
"failed to create extended attribute group\n");
goto err_remove;
}
}
data->hwmon_dev = hwmon_device_register(&spi->dev);
if (IS_ERR(data->hwmon_dev)) {
dev_err(&spi->dev, "failed to create hwmon device\n");
err = PTR_ERR(data->hwmon_dev);
goto err_remove;
}
#ifdef CONFIG_SHARPSL_PM
the_max1111 = data;
#endif
return 0;
err_remove:
sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group);
sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group);
return err;
}
static int max1111_remove(struct spi_device *spi)
{
struct max1111_data *data = spi_get_drvdata(spi);
hwmon_device_unregister(data->hwmon_dev);
sysfs_remove_group(&spi->dev.kobj, &max1110_attr_group);
sysfs_remove_group(&spi->dev.kobj, &max1111_attr_group);
mutex_destroy(&data->drvdata_lock);
return 0;
}
static const struct spi_device_id max1111_ids[] = {
{ "max1110", max1110 },
{ "max1111", max1111 },
{ "max1112", max1112 },
{ "max1113", max1113 },
{ },
};
MODULE_DEVICE_TABLE(spi, max1111_ids);
static struct spi_driver max1111_driver = {
.driver = {
.name = "max1111",
.owner = THIS_MODULE,
},
.id_table = max1111_ids,
.probe = max1111_probe,
.remove = max1111_remove,
};
module_spi_driver(max1111_driver);
MODULE_AUTHOR("Eric Miao <eric.miao#marvell.com>");
MODULE_DESCRIPTION("MAX1110/MAX1111/MAX1112/MAX1113 ADC Driver");
MODULE_LICENSE("GPL");
SPI device driver (max1111 in your case) get pointer to underlying SPI-controller (struct spi_device *spi) during probe stage (max1111_probe). Driver should use it send requests to controller (using spi_sync, for example). Driver doesn't know about what PINS SPI-controller use.
What SPI-controller is passed to SPI device driver? SPI device, should be
declared in the DTS-file inside SPI-controller node. The controller initialized from SPI-controller node is passed to device probe.
SPI-controller can be hardware (specific to SoC), or SPI-GPIO. In case of hardware SPI, pins usually dedicated and specified in SoC TRM. In case of SPI-GPIO, GPIO names are specified inside DTS-properties of SPI-GPIO. The properties names are: gpio-sck, gpio-miso, gpio-mosi, num-chipselects and cs-gpios (list).
The device tree file where the SPI device(max1111) is listed as child will have the corresponding base address of SPI module as shown in below example, this base address is related to the SPI bus not device.
spix#ABCDXYZ {
compatible = "busdriver,variant";
/*This naming convention depends on the device tree*/
mosi = <gpioA> /*replace gpioA/B/C/D with your gpios*/
miso = <gpioB>
gpio-clk = <gpioC>
gpio-cs0 = <gpioD>
spi-max-frequency = <freq1>;
;
;
;
max1111#0 {
compatible = "max1111";
reg = <0>;
spi-max-frequency = <freq2>;
;
;
;
};
;
;
;
}
In the above device tree file you can see the device node "max1111" listed as child under the bus node "spix" whose register address range starts at address #ABCDXYZ. You need to refer userguide of MCU you are using to know this base address.
The driver file max1111.c, is the device driver file for one of the SPI devices sitting on the spix bus.
The driver max1111.c is agnostic to the SPI lines that it is sitting on & just enjoys the APIs (Ex: data transfer & chip select handling) provided by the SPI bus driver associated with "busdriver,variant". The SPI bus driver will take care of configuring/multiplexing the MCU pins into SPI mode (handled explicitly in machine/board specific initialisation code).
So all you have to do is to,
Identify which SPI bus max1111 device is using.
Then go to the device tree file corresponding to you board.
Add your device driver details (+ SPI parameters) as part of child to that bus node
include your max1111.c into the kernel biuld directory (mostly drivers/spi folder)
that is it.
Note: here the assumption is your machine is already having that particular SPI bus driver enabled.