I have created a small project (https://github.com/NHAS/wag) that uses XDP & eBPF to allow connections based on time over a wireguard VPN.
I have attached the XDP eBPF program to the wireguard TUN device, and am experiencing poor throughput (speedtest of down ~20 Mbps wireguard + eBPF, vs wireguard - eBPF ~100 Mbps). Additionally, pings to the wireguard server itself have inconsistent latency, and are dropped at a rate of 1 ICMP packet/~600 pings.
Please note that this occurs during unloaded periods. Where traffic will be less than 100 Mbps total.
The code below is loaded into the kernel with cilium.
// Kernel load
...
xdpLink, err = link.AttachXDP(link.XDPOptions{
Program: xdpObjects.XdpProgFunc,
Interface: iface.Index,
})
...
eBPF kernel:
// +build ignore
#include "bpf_endian.h"
#include "common.h"
char __license[] SEC("license") = "Dual MIT/GPL";
// One /24
#define MAX_MAP_ENTRIES 256
// Inner map is a LPM tri, so we use this as the key
struct ip4_trie_key
{
__u32 prefixlen; // first member must be u32
__u32 addr; // rest can are arbitrary
};
// Map of users (ipv4) to BOOTTIME uint64 timestamp denoting authorization status
struct bpf_map_def SEC("maps") sessions = {
.type = BPF_MAP_TYPE_HASH,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.map_flags = 0,
};
// Map of users (ipv4) to BOOTTIME uint64 timestamp denoting when the last packet was recieved
struct bpf_map_def SEC("maps") last_packet_time = {
.type = BPF_MAP_TYPE_HASH,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.map_flags = 0,
};
// A single variable in nano seconds
struct bpf_map_def SEC("maps") inactivity_timeout_minutes = {
.type = BPF_MAP_TYPE_ARRAY,
.max_entries = 1,
.key_size = sizeof(__u32),
.value_size = sizeof(__u64),
.map_flags = 0,
};
// Two tables of the same construction
// IP to LPM trie
struct bpf_map_def SEC("maps") mfa_table = {
.type = BPF_MAP_TYPE_HASH_OF_MAPS,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.map_flags = 0,
};
struct bpf_map_def SEC("maps") public_table = {
.type = BPF_MAP_TYPE_HASH_OF_MAPS,
.max_entries = MAX_MAP_ENTRIES,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.map_flags = 0,
};
/*
Attempt to parse the IPv4 source address from the packet.
Returns 0 if there is no IPv4 header field; otherwise returns non-zero.
*/
static int parse_ip_src_dst_addr(struct xdp_md *ctx, __u32 *ip_src_addr, __u32 *ip_dst_addr)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
// As this is being attached to a wireguard interface (tun device), we dont get layer 2 frames
// Just happy little ip packets
// Then parse the IP header.
struct iphdr *ip = data;
if ((void *)(ip + 1) > data_end)
{
return 0;
}
// We dont support ipv6
if (ip->version != 4)
{
return 0;
}
// Return the source IP address in network byte order.
*ip_src_addr = (__u32)(ip->saddr);
*ip_dst_addr = (__u32)(ip->daddr);
return 1;
}
static int conntrack(__u32 *src_ip, __u32 *dst_ip)
{
// Max lifetime of the session.
__u64 *session_expiry = bpf_map_lookup_elem(&sessions, src_ip);
if (!session_expiry)
{
return 0;
}
// The most recent time a valid packet was received from our a user src_ip
__u64 *lastpacket = bpf_map_lookup_elem(&last_packet_time, src_ip);
if (!lastpacket)
{
return 0;
}
// Our userland defined inactivity timeout
u32 index = 0;
__u64 *inactivity_timeout = bpf_map_lookup_elem(&inactivity_timeout_minutes, &index);
if (!inactivity_timeout)
{
return 0;
}
__u64 currentTime = bpf_ktime_get_boot_ns();
// The inner map must be a LPM trie
struct ip4_trie_key key = {
.prefixlen = 32,
.addr = *dst_ip,
};
// If the inactivity timeout is not disabled and users session has timed out
u8 isTimedOut = (*inactivity_timeout != __UINT64_MAX__ && ((currentTime - *lastpacket) >= *inactivity_timeout));
if (isTimedOut)
{
u64 locked = 0;
bpf_map_update_elem(&sessions, src_ip, &locked, BPF_EXIST);
}
// Order of preference is MFA -> Public, just in case someone adds multiple entries for the same route to make sure accidental exposure is less likely
// If the key is a match for the LPM in the public table
void *user_restricted_routes = bpf_map_lookup_elem(&mfa_table, src_ip);
if (user_restricted_routes)
{
if (bpf_map_lookup_elem(user_restricted_routes, &key) &&
// 0 indicates invalid session
*session_expiry != 0 &&
// If max session lifetime is disabled, or we are before the max lifetime of the session
(*session_expiry == __UINT64_MAX__ || *session_expiry > currentTime) &&
!isTimedOut)
{
// Doesnt matter if the value is not atomically set
*lastpacket = currentTime;
return 1;
}
}
void *user_public_routes = bpf_map_lookup_elem(&public_table, src_ip);
if (user_public_routes && bpf_map_lookup_elem(user_public_routes, &key))
{
// Only update the lastpacket time if we're not expired
if (!isTimedOut)
{
*lastpacket = currentTime;
}
return 1;
}
return 0;
}
SEC("xdp")
int xdp_prog_func(struct xdp_md *ctx)
{
__u32 src_ip, dst_ip;
if (!parse_ip_src_dst_addr(ctx, &src_ip, &dst_ip))
{
return XDP_DROP;
}
if (conntrack(&src_ip, &dst_ip) || conntrack(&dst_ip, &src_ip))
{
return XDP_PASS;
}
return XDP_DROP;
}
The questions I'm looking to answer are:
How do I profile which areas (if any) of the eBPF program are intensive?
Is this a processing time limit for XDP, or an optimal time to keep in mind?
Is my eBPF program sane?
Thanks.
For the BPF XDP hook, the most common sources of huge per-packet overhead are:
JIT compiler is disabled. You can check the value of /proc/sys/net/core/bpf_jit_enable for that.
The driver doesn't support XDP. You need to check this for the specific driver and kernel versions you are using.
As discussed in comments, you're in the second case. Your program is attached to the TUN device which doesn't support the XDP driver mode. That means your BPF program runs after the skb allocation and performance won't be much better than at the tc hook.
Short Version
I want to write a Linux Driver for a custom USB device. Before writing the driver I used libusb-1.0 to test the device. With the following function call, I could read out a uin16_t value from the device:
status = libusb_control_transfer(handle, /* Device Handle */
0x80, /* bRequestType */
0x10, /* bRequest */
value, /* wValue */
0x0, /* wIndex */
((uint8_t *) &value), /* data */
2, /* wLength */
100); /* timeout */
After this call, I got a new value in the value variable.
Now I want to accomplish the same call in my Driver. I have tried the following in the probe function of my USB driver:
status = usb_control_msg(data->udev, usb_rcvctrlpipe(data->udev, 0), 0x10, USB_DIR_IN, 0, 0, (u8*) &my_data, 2, 100);
All I get is the return value -11 and on my device I don't see anything.
The only thing I am doing before this call, is calling data->udev = interface_to_usbdev(intf); to get the USB device from my interface.
Does anyone know, if I am missing something or if I am doing something wrong?
Long version
I want to learn how to write USB Drivers in Linux. As a DUT for which I can write a driver, I choose a Raspberry Pi Pico and the dev_lowlevel USB example. I adapt the code a little bit, so I can use a control transfer with bRequest 0x10 and bRequestType 0x0 (USB_DIR_OUT) to turn the Pico's onboard LED on or off and a control transfer with bRequest 0x10 and bRequestType 0x80 (USB_DIR_IN) to read back the current value of the LED.
With a user space program and the following code I can read out the value of the LED and turn it on or off:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <libusb-1.0/libusb.h>
#define VID 0x0000
#define DID 0x0001
int main(int argc, char **argv) {
int status, len;
libusb_device_handle *handle = NULL;
/* Init Libusb */
status = libusb_init(NULL);
if(status < 0) {
printf("Error init USB!\n");
return status;
}
handle = libusb_open_device_with_vid_pid(NULL, VID, DID);
if(!handle) {
printf("No device found with %04x:%04x\n", VID, DID);
libusb_exit(NULL);
return -1;
}
if(argc > 1)
value = atoi(argv[1]);
else {
/* Do control transfer */
status = libusb_control_transfer(handle, /* Device Handle */
0x80, /* bRequestType */
0x10, /* bRequest */
value, /* wValue */
0x0, /* wIndex */
((uint8_t *) &value), /* data */
2, /* wLength */
100); /* timeout */
if(status < 0) {
printf("Error during control transfer!\n");
libusb_close(handle);
libusb_exit(NULL);
return -1;
}
printf("Got: %d\n", value);
value = (value + 1) & 0x1;
}
/* Do control transfer */
status = libusb_control_transfer(handle, 0x0, 0x10, value, 0x0, NULL, 0, 100);
if(status < 0) {
printf("Error during control transfer!\n");
libusb_close(handle);
libusb_exit(NULL);
return -1;
}
libusb_close(handle);
libusb_exit(NULL);
return 0;
}
Now I want to control my device over a USB Driver. Here is what I got already:
#include <linux/module.h>
#include <linux/init.h>
#include <linux/usb.h>
#include <linux/slab.h>
/* Meta Information */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Johannes 4 GNU/Linux");
MODULE_DESCRIPTION("Driver for my custom RPi Pico USB device");
struct pico_usb {
struct usb_device *udev;
};
#define PICO_VID 0x0000
#define PICO_PID 0x0001
static struct usb_device_id pico_usb_table [] = {
{ USB_DEVICE(PICO_VID, PICO_PID) },
{},
};
MODULE_DEVICE_TABLE(usb, pico_usb_table);
static int pico_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) {
struct pico_usb *data;
int status;
int my_data;
printk("pico_usb_drv - Now I am in the Probe function!\n");
data = kzalloc(sizeof(struct pico_usb), GFP_KERNEL);
if(!data) {
printk("pico_usb_drv - Out of memory\n");
return -ENOMEM;
}
data->udev = interface_to_usbdev(intf);
usb_set_intfdata(intf, data);
/* Turn the LED on */
status = usb_control_msg(data->udev, usb_sndctrlpipe(data->udev, 0), 0x10, USB_DIR_OUT, 1, 0, 0, 0, 100);
/* Read LED state */
printk("pico_usb_drv - status USB_DIR_OUT: %d\n", status);
status = usb_control_msg(data->udev, usb_rcvctrlpipe(data->udev, 0), 0x10, USB_DIR_IN, 0, 0, (u8*) &my_data, 2, 100);
printk("pico_usb_drv - status USB_DIR_IN: %d\n", status);
return 0;
}
static void pico_usb_disconnect(struct usb_interface *intf) {
struct pico_usb *data;
printk("pico_usb_drv - Now I am in the Disconnect function!\n");
data = usb_get_intfdata(intf);
kfree(data);
}
static struct usb_driver pico_usb_driver = {
//.owner = THIS_MODULE,
.name = "pico_usb",
.id_table = pico_usb_table,
.probe = pico_usb_probe,
.disconnect = pico_usb_disconnect,
};
/**
* #brief This function is called, when the module is loaded into the kernel
*/
static int __init pico_usb_init(void) {
int result;
printk("pico_usb_drv - Registering the PICO USB device\n");
result = usb_register(&pico_usb_driver);
if(result) {
printk("pico_usb_drv - Error registering the PICO USB device\n");
return -result;
}
return 0;
}
/**
* #brief This function is called, when the module is removed from the kernel
*/
static void __exit pcio_usb_exit(void) {
printk("pico_usb_drv - Unregistering the PICO USB device\n");
usb_deregister(&pico_usb_driver);
}
module_init(pico_usb_init);
module_exit(pcio_usb_exit);
The first control message works and my LED is turned on. But the second control message doesn't do anything, but gives me the error code -11 back.
Does anyone know, if I am missing something or if I am doing something wrong?
Ok, I found the solution. Instead of usb_control_msg I use usb_control_msg_recv now and everything works just fine.
usb_control_msg_recv takes one more argument:
int usb_control_msg_recv(struct usb_device *dev, __u8 endpoint, __u8 request, __u8 requesttype, __u16 value, __u16 index, void *driver_data, __u16 size, int timeout, gfp_t memflags)
As I pass the pointer to a variable and don't want to allocate memory dynamically, I set the memflags argument to 0.
I ran into a situation where I am generating random data packets to be transmitted over the uart of a Raspberry PI 3b+ with the Raspbian OS. I found that after 240 or so calls to rand() that an extra character is being generated in my data packet that could possible cause a 1 byte buffer overrun. So if the data length of my packet is 48 bytes, 49 bytes will be seen on the wire by the receiving devices but not by the transmitting device. I have verified that 1 extra byte is being sent on the wire via memory analysis on my receiving device.
Has anyone run into a situation where if the write() syscall is interrupted while writing to the /dev/serial0 device socket, the raspberry pi will send an incorrect byte?
My best guess is that when the pseudorandom number pool in Linux is being refreshed via the kernel during that system call. When I transmit an array of data of 48 bytes of fixed data, I don't get that spurious byte transmission, but I get that spurious byte if I auto generate .
The program I wrote has 2 pthreads. One for generating the packets and another for transmitting them. The transmitting thread at t=0 sleeps automatically because there is no data to send. The data generating thread will wake up the transmitting thread after 4 packets have been produced. I can regularly recreate this issue even if I sleep my transmitting thread for 1s after every transmission.
I tried to cut down on the unused code, but its still over 300 lines. It was a regular issue now its playing cat & mouse with me.
To Build:
gcc -pthread -g -o exp exp.c
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <termios.h>
#include <errno.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <time.h>
#include <assert.h>
#define SPORT "//dev//serial0"
#define NETPAYLOADSZ 256
#define DATPOOLSZ 16
typedef union __attribute__((packed)) PACKET
{
struct
{
uint8_t SID;
uint8_t DID;
uint8_t LEN;
uint8_t SPT;
uint8_t DPT;
uint8_t TID;
uint8_t DAT[249];//very last byte is the checksum
};
uint8_t packet[NETPAYLOADSZ];
}uPacket;
uPacket *DataPool[DATPOOLSZ];
enum THRDSTAT { eNOTRUNNING, eRUNNING, eSLEEPING, eSTOPPED };
struct SerRes
{
int sigint;
int serialFD;
int datInPoolIdx; //writen by GenDat, read by SerTx
int datOutPoolIdx;//written by SerTx
enum THRDSTAT statusGen;
enum THRDSTAT statusTX;
uint8_t dataAvail;
pthread_mutex_t sermut;
pthread_mutex_t genSleepmux;
pthread_mutex_t serTxSleepmux;
pthread_cond_t genDatSleep;
pthread_cond_t serTxSleep;
};
struct SerRes serres;
uPacket stDatPool[DATPOOLSZ];
void SIGHandler(int num)
{
struct termios options;
if(serres.sigint == 1)
{
close(serres.serialFD);
serres.statusGen = eSTOPPED;
serres.statusTX = eSTOPPED;
exit(-1);
return;
}
tcgetattr(serres.serialFD, &options);
options.c_cc[VTIME] = 1; //timeout of 100ms
options.c_cc[VMIN] = 0; //1 receive atleast 1 character
tcsetattr(serres.serialFD, TCSANOW, &options);
serres.sigint = 1;
}
void Init(char *serpath)
{
struct termios options;
memset(&serres, 0, sizeof(struct SerRes));
memset(&DataPool, 0, sizeof(uPacket*)*DATPOOLSZ);
pthread_mutex_init(&serres.sermut, NULL);
pthread_mutex_init(&serres.genSleepmux, NULL);
pthread_cond_init(&serres.genDatSleep, NULL);
pthread_cond_init(&serres.serTxSleep, NULL);
serres.serialFD = open(serpath, O_RDWR | O_NOCTTY | O_NDELAY);
if(serres.serialFD < 0)
{
printf("\nError no is: %d", errno);
printf("\nError description: %s\n", strerror(errno));
exit(-1);
}
cfmakeraw(&options);
tcgetattr(serres.serialFD, &options);
options.c_cflag = CS8 | CLOCAL | CREAD;
options.c_lflag &= ~(ICANON | ECHO | ECHOE | ISIG );
options.c_cc[VTIME] = 10; //timeout of 100ms
options.c_cc[VMIN] = 1; //1 receive atleast 1 character
cfsetspeed(&options, B115200); //set both input and output speed
tcsetattr(serres.serialFD, TCSANOW, &options);
}
void Deinit(void)
{
close(serres.serialFD);
pthread_mutex_destroy(&serres.sermut);
pthread_mutex_destroy(&serres.genSleepmux);
pthread_cond_destroy(&serres.genDatSleep);
}
void *DatGen(void *arg)
{
int randDev = 0;
uint8_t idx;
uint8_t cksum = 0;
uint8_t buffer[48] = {0x91, 0x39, 0x97, 0xb9, 0x50, 0x9b, 0x7a, 0x33, 0xe3, 0x1, 0xfa, 0x82, 0x61, 0xbd, 0xec, 0x1,\
0x8, 0x5, 0xd, 0x9c, 0x27, 0xcc, 0x4e, 0x8e, 0x63, 0x48, 0x37, 0x3b, 0x66, 0xde, 0x48, 0x77,\
0x98, 0xdf, 0x31, 0x68, 0xfa, 0x2b, 0x9b, 0x5f, 0x2c, 0x96, 0xe1, 0xd, 0x54, 0x4f, 0xf, 0x5c};
pid_t tid = (pid_t)syscall(__NR_gettid);
printf("\nDatGen - %d: Starting: \r\n", tid);
serres.statusGen = eRUNNING;
srand(0x089FFEE4);
while(serres.sigint == 0 && serres.statusGen != eSTOPPED)
{
//Sleep Condition
pthread_mutex_lock(&serres.genSleepmux);
if((serres.dataAvail == DATPOOLSZ))
{
printf("DatGen - %d: No more Data to Generate: Sleeping - %d\r\n", tid, serres.dataAvail );
serres.statusGen = eSLEEPING;
while(serres.dataAvail == DATPOOLSZ) //Gaurd against spurious wake up events
{
pthread_cond_wait(&serres.genDatSleep, &serres.genSleepmux);
}
printf("Datgen - %d: ******** Wokeup, running\r\n\n", tid);
if(serres.statusTX == eSTOPPED)
break;
serres.statusGen = eRUNNING;
}
pthread_mutex_unlock(&serres.genSleepmux);
//Time to wake up the SerTX thread? Maybe?
if(serres.dataAvail > 3 && serres.statusTX == eSLEEPING)
{
pthread_mutex_lock(&serres.serTxSleepmux);
pthread_cond_signal(&serres.serTxSleep);
pthread_mutex_unlock(&serres.serTxSleepmux);
}
//Generate the Packets.
idx = serres.datInPoolIdx;
serres.datInPoolIdx = (serres.datInPoolIdx + 1) & (DATPOOLSZ - 1);
assert(serres.datInPoolIdx < DATPOOLSZ);
stDatPool[idx].SID = 0x55;
stDatPool[idx].DID = 0xAA;
stDatPool[idx].LEN = 0x30; //(rand() % 100);
stDatPool[idx].SPT = 0xEE;
stDatPool[idx].DPT = 0x22;
stDatPool[idx].TID = 0x77;
for(int i = 0; i < stDatPool[idx].LEN ; i++)
{
stDatPool[idx].DAT[i] = rand() % 100; //Only Write
cksum += stDatPool[idx].DAT[i];
}
stDatPool[idx].LEN += 7;
cksum += stDatPool[idx].SID + stDatPool[idx].DID + stDatPool[idx].SPT + stDatPool[idx].LEN;
cksum += stDatPool[idx].DPT + stDatPool[idx].TID;
stDatPool[idx].packet[stDatPool[idx].LEN-1] = 0xFF;
/********* Critical Shared Section *************/
pthread_mutex_lock(&serres.sermut);
serres.dataAvail++; //Touched by both threads...
assert(serres.dataAvail < DATPOOLSZ+1);
if(serres.dataAvail == DATPOOLSZ)
{
printf("Max Dat Reached\r\n");
}
pthread_mutex_unlock(&serres.sermut);
/*************************************************/
}
serres.statusGen = eSTOPPED;
pthread_exit(&serres.sigint);
}
#define LOOPCOUNT 8
void *SerTx(void *arg)
{
pid_t tid = (pid_t)syscall(__NR_gettid);
uint8_t idx = 0;
uint16_t randel = 0;
uint8_t count = 0;
uint8_t bytesSent = 0;
serres.statusTX = eRUNNING;
while(serres.sigint == 0 && serres.statusTX != eSTOPPED && count < LOOPCOUNT)
{
//Sleep Condition
pthread_mutex_lock(&serres.genSleepmux);
if(serres.dataAvail < 1)
{
pthread_cond_signal(&serres.genDatSleep);
printf("SerTx - %d: All Data Consumed\r\n", tid);
serres.statusTX = eSLEEPING;
while(serres.dataAvail < 1) //Gaurd against spurious wakeup events.
{
pthread_cond_wait(&serres.serTxSleep, &serres.genSleepmux);
}
serres.statusTX = eRUNNING;
printf("SerTx - %d: ^^^^^^^ Woke up Running\r\n", tid);
}
pthread_mutex_unlock(&serres.genSleepmux);
//Output
idx = serres.datOutPoolIdx;
serres.datOutPoolIdx = (serres.datOutPoolIdx + 1) & (DATPOOLSZ - 1);
bytesSent = write(serres.serialFD, &stDatPool[idx].packet[0], stDatPool[idx].LEN); //only Read
if(stDatPool[idx].LEN != bytesSent) //did we not send all the bytes?
{
printf("Pkt Len: %x\nBytesSent: %x\r\n", stDatPool[idx].LEN, bytesSent);
assert(0);
}
printf("Consume: %x\r\n", stDatPool[idx].LEN);
/********* Critical Shared Section *************/
pthread_mutex_lock(&serres.sermut);
serres.dataAvail--; //shared write
assert(serres.dataAvail < DATPOOLSZ); //unsigned, so if it goes negative, it goes BIG!!
pthread_mutex_unlock(&serres.sermut);
/*************************************************/
//usleep(1000000);
//tcflush(serres.serialFD, TCIOFLUSH);
count++;
}
serres.statusTX = eSTOPPED;
pthread_cond_signal(&serres.genDatSleep);
pthread_exit(&serres.sigint);
}
/*
* pthread DatGen generates the data for serTx
* pthread SerTx consumes the data generated by DatGen and sends out the serial port
*/
int main(int argc, char **argv)
{
pthread_t datGen, serRx, serTx;
pid_t pid = getpid();
int thrdstat = 0;
Init(SPORT);
signal(SIGINT, SIGHandler);
pthread_create(&datGen, NULL, DatGen, NULL);
pthread_create(&serTx, NULL, SerTx, NULL);
//Wait for all the threads to close
pthread_join(datGen,NULL);
pthread_join(serTx,NULL);
Deinit();
printf("\n>>>> End <<<<< %d\r\n", pid);
return 0;
}
This is generated and subsequently transmitted in bulk
packet =
0x55, 0xaa, 0x37, 0xee, 0x22, 0x77, 0x4c, 0xbf, 0x8 , 0xad,
0xeb, 0xc9, 0xa, 0xb2, 0x1d, 0x45, 0x57, 0x48, 0xc0, 0xc1,
0xa3, 0x0, 0xb4, 0x73, 0x91, 0x8b, 0x28, 0x17, 0x3 , 0x40,
0x62, 0x48, 0x86, 0xc7, 0x9e, 0x60, 0xc2, 0xea, 0x20, 0xca,
0x98, 0x8c, 0x94, 0x22, 0xbe, 0x32, 0x67, 0x96, 0xf9, 0x28,
0xd7, 0x1d, 0xa7, 0x8c, 0xff
This is received by the device.
packet =
0x55, 0xaa, 0x37, 0xee, 0x22, 0x77, 0x4c, 0xbf, 0x8 , 0xad,
0xeb, 0xc9, 0xd, 0xa, 0xb2, 0x1d, 0x45, 0x57, 0x48, 0xc0,
0xc1, 0xa3, 0x0, 0xb4, 0x73, 0x91, 0x8b, 0x28, 0x17, 0x3,
0x40, 0x62, 0x48, 0x86, 0xc7, 0x9e, 0x60, 0xc2, 0xea, 0x20,
0xca, 0x98, 0x8c, 0x94, 0x22, 0xbe, 0x32, 0x67, 0x96, 0xf9,
0x28, 0xd7, 0x1d, 0xa7, 0x8c
Byte 22 (0 indexed) is incorrect. 0xD is in the received in place of 0xC9 and the last byte, 0xFF is not correctly received by the receiver. I failed to mention that there is no flow control and that is by design.
Update ...
All credit goes to: Craig Estey
Ok, I think I have resolution. The problem I was having was not properly setting up the serial socket. I found that my call to:
cfmakeraw(&options);
was in the wrong location. Thus, the OPOST option for c_oflags was not deasserted. As pointed out below in the answer update, when the kernel saw 0xA in my data, it was automatically sending 0xD. The corrected code no longer shows this behavior.
Secondly it was pointed out that there was a Race Condition in my producer-consumer relationship. I don't necessarily agree with the analysis but I looked at anyway and changed how full and empty were defined in my program. I think I was going to originally go this route but I was having issues with thread sychronization due to lost signals in pthreads....annoying...
pthread lost wake ups: Lost wakeups in pthreads
Final Code that appears to work:
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <termios.h>
#include <errno.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <time.h>
#include <assert.h>
#define SPORT "//dev//serial0"
#define NETPAYLOADSZ 256
typedef union __attribute__((packed)) PACKET
{
struct
{
uint8_t SID;
uint8_t DID;
uint8_t LEN;
uint8_t SPT;
uint8_t DPT;
uint8_t TID;
uint8_t DAT[249];//very last byte is the checksum
};
uint8_t packet[NETPAYLOADSZ];
}uPacket;
enum THRDSTAT { eNOTRUNNING, eRUNNING, eSLEEPING, eSTOPPED };
struct SerRes
{
int sigint;
int serialFD;
uint16_t datInPoolIdx; //writen by GenDat, read by SerTx
uint16_t datOutPoolIdx;//written by SerTx
enum THRDSTAT statusGen;
enum THRDSTAT statusTX;
uint8_t genSig;
uint8_t txSig;
uint8_t StartCond;
uint8_t dataAvail;
pthread_mutex_t sermut;
pthread_mutex_t genSleepmux;
pthread_mutex_t serTxSleepmux;
pthread_cond_t genDatSleep;
pthread_cond_t serTxSleep;
};
#define DATPOOLSZ 16
struct SerRes serres;
uPacket stDatPool[DATPOOLSZ];
void SIGHandler(int num)
{
struct termios options;
if(serres.sigint == 1)
{
close(serres.serialFD);
serres.statusGen = eSTOPPED;
serres.statusTX = eSTOPPED;
exit(-1);
return;
}
tcgetattr(serres.serialFD, &options);
options.c_cc[VTIME] = 1; //timeout of 100ms
options.c_cc[VMIN] = 0; //1 receive atleast 1 character
tcsetattr(serres.serialFD, TCSANOW, &options);
serres.sigint = 1;
}
void Init(char *serpath)
{
struct termios options;
memset(&serres, 0, sizeof(struct SerRes));
memset(&stDatPool, 0, sizeof(uPacket*)*DATPOOLSZ);
//serres.datInPoolIdx = 1; //starting condition
serres.StartCond = 1;
pthread_mutex_init(&serres.sermut, NULL);
pthread_mutex_init(&serres.genSleepmux, NULL);
pthread_cond_init(&serres.genDatSleep, NULL);
pthread_cond_init(&serres.serTxSleep, NULL);
serres.serialFD = open(serpath, O_RDWR | O_NOCTTY | O_NDELAY);
if(serres.serialFD < 0)
{
printf("\nError no is: %d", errno);
printf("\nError description: %s\n", strerror(errno));
exit(-1);
}
tcgetattr(serres.serialFD, &options);
cfmakeraw(&options);
options.c_cflag = CS8 | CLOCAL | CREAD;
options.c_lflag &= ~(ICANON | ECHO | ECHOE | ISIG );
options.c_oflag &= ~OPOST;
options.c_cc[VTIME] = 10; //timeout of 100ms
options.c_cc[VMIN] = 1; //1 receive atleast 1 character
cfsetspeed(&options, B115200); //set both input and output speed
tcsetattr(serres.serialFD, TCSANOW, &options);
}
void Deinit(void)
{
close(serres.serialFD);
pthread_mutex_destroy(&serres.sermut);
pthread_mutex_destroy(&serres.genSleepmux);
pthread_cond_destroy(&serres.genDatSleep);
}
void *DatGen(void *arg)
{
int randDev = 0;
uint8_t idx;
uint8_t cksum = 0;
pid_t tid = (pid_t)syscall(__NR_gettid);
printf("\nDatGen - %d: Starting: \r\n", tid);
serres.statusGen = eRUNNING;
srand(0x089FFEE4);
serres.datInPoolIdx = 1;// (serres.datInPoolIdx + 1) & (DATPOOLSZ - 1);
while(serres.sigint == 0 && serres.statusGen != eSTOPPED)
{
//Sleep Condition
pthread_mutex_lock(&serres.genSleepmux);
if(serres.datInPoolIdx == serres.datOutPoolIdx) //full condition
{
printf("DatGen - %d: Sleeping - %d\r\n", tid, serres.dataAvail );
serres.statusGen = eSLEEPING;
serres.StartCond = 0;
if(serres.statusTX == eSLEEPING)
{
serres.txSig = 1;
pthread_cond_signal(&serres.serTxSleep);
}
while((serres.datInPoolIdx == serres.datOutPoolIdx) && (serres.genSig == 0)) //Gaurd against spurious wake up events
{
serres.genSig = 0;
pthread_cond_wait(&serres.genDatSleep, &serres.genSleepmux);
}
serres.genSig = 0;
printf("Datgen - %d: Wokeup\r\n", tid);
if(serres.statusTX == eSTOPPED)
break;
serres.statusGen = eRUNNING;
}
idx = serres.datInPoolIdx;
assert(idx < DATPOOLSZ);
serres.datInPoolIdx = (serres.datInPoolIdx + 1) & (DATPOOLSZ - 1);
pthread_mutex_unlock(&serres.genSleepmux);
//Time to wake up the SerTX thread? Maybe?
//Generate the Packets.
stDatPool[idx].SID = 0x55;
stDatPool[idx].DID = 0xAA;
stDatPool[idx].LEN = 0x30; //(rand() % 100);
stDatPool[idx].SPT = 0xEE;
stDatPool[idx].DPT = 0x22;
stDatPool[idx].TID = 0x77;
for(int i = 0; i < stDatPool[idx].LEN ; i++)
{
stDatPool[idx].DAT[i] = i; //rand() % 100; //Only Write
cksum += stDatPool[idx].DAT[i];
}
stDatPool[idx].LEN += 7;
cksum += stDatPool[idx].SID + stDatPool[idx].DID + stDatPool[idx].SPT + stDatPool[idx].LEN;
cksum += stDatPool[idx].DPT + stDatPool[idx].TID;
stDatPool[idx].packet[stDatPool[idx].LEN-1] = 0xFF;
/********* Critical Shared Section *************/
pthread_mutex_lock(&serres.sermut);
serres.dataAvail++; //Touched by both threads...
pthread_mutex_unlock(&serres.sermut);
/*************************************************/
}
serres.statusGen = eSTOPPED;
pthread_exit(&serres.sigint);
}
#define LOOPCOUNT 8
void *SerTx(void *arg)
{
pid_t tid = (pid_t)syscall(__NR_gettid);
uint8_t idx = 0;
uint16_t randel = 0;
uint32_t count = 0;
uint8_t bytesSent = 0;
serres.statusTX = eRUNNING;
printf("SerTx - %d: Starting\r\n", tid);
while(serres.sigint == 0 && serres.statusTX != eSTOPPED)// && count < LOOPCOUNT)
{
//Sleep Condition
pthread_mutex_lock(&serres.genSleepmux);
serres.datOutPoolIdx = (serres.datOutPoolIdx + 1) & (DATPOOLSZ -1);
if((serres.datOutPoolIdx == serres.datInPoolIdx)) //Empty Condition, sleep on first start.
{
if(serres.statusGen == eSLEEPING)
{
printf("Wake GenDat\r\n");
pthread_cond_signal(&serres.genDatSleep);
serres.genSig = 1;
}
printf("SerTx - %d: Sleep\r\n", tid);
serres.statusTX = eSLEEPING;
while((serres.datOutPoolIdx == serres.datInPoolIdx) && serres.txSig == 0) //Gaurd against spurious wakeup events.
{
serres.txSig = 0;
pthread_cond_wait(&serres.serTxSleep, &serres.genSleepmux);
}
serres.txSig = 0;
serres.statusTX = eRUNNING;
printf("SerTx - %d: Running\r\n", tid);
}
idx = serres.datOutPoolIdx;
assert(idx < DATPOOLSZ);
pthread_mutex_unlock(&serres.genSleepmux);
//Output
if(stDatPool[idx].SID != 0x55)
assert(stDatPool[idx].SID == 0x55);
if(stDatPool[idx].DID != 0xAA)
assert(stDatPool[idx].DID != 0xAA);
if(stDatPool[idx].LEN != 0x37) //(rand() % 100);
assert(stDatPool[idx].LEN == 0x37);
if(stDatPool[idx].SPT != 0xEE)
assert(stDatPool[idx].SPT == 0xEE);
if(stDatPool[idx].DPT != 0x22)
assert(stDatPool[idx].DPT == 0x22);
if(stDatPool[idx].TID != 0x77)
assert(stDatPool[idx].TID == 0x77);
for(int i = 0; i < (stDatPool[idx].LEN-7); i++)
{
assert(stDatPool[idx].DAT[i] == i);
}
if(stDatPool[idx].packet[stDatPool[idx].LEN-1] != 0xFF)
assert(stDatPool[idx].packet[stDatPool[idx].LEN-1] == 0xFF);
/* bytesSent = write(serres.serialFD, &stDatPool[idx].packet[0], stDatPool[idx].LEN); //only Read
if(stDatPool[idx].LEN != bytesSent) //did we not send all the bytes?
{
printf("Pkt Len: %x\nBytesSent: %x\r\n", stDatPool[idx].LEN, bytesSent);
assert(0);
}
*/
//printf("Consume: %d\r\n", stDatPool[idx].LEN);
/********* Critical Shared Section *************/
pthread_mutex_lock(&serres.sermut);
memset(&stDatPool[idx], 0, sizeof(stDatPool[idx]));
serres.dataAvail--; //shared write
pthread_mutex_unlock(&serres.sermut);
/*************************************************/
//usleep(1000000);
//tcflush(serres.serialFD, TCIOFLUSH);
count++;
}
serres.statusTX = eSTOPPED;
pthread_cond_signal(&serres.genDatSleep);
pthread_exit(&serres.sigint);
}
/*
* pthread DatGen generates the data for serTx
* pthread SerTx consumes the data generated by DatGen and sends out the serial port
*/
int main(int argc, char **argv)
{
pthread_t datGen, serRx, serTx;
pid_t pid = getpid();
int thrdstat = 0;
Init(SPORT);
signal(SIGINT, SIGHandler);
pthread_create(&datGen, NULL, DatGen, NULL);
pthread_create(&serTx, NULL, SerTx, NULL);
//Wait for all the threads to close
pthread_join(datGen,NULL);
while(serres.StartCond == 1);
pthread_join(serTx,NULL);
Deinit();
printf("\n>>>> End <<<<< %d\r\n", pid);
return 0;
}
Edit: Thank you for editing your question and posting your generated data and the corresponding actual data received at the remote device.
Your problem [and solution] is much simpler. See the UPDATE #2 section below.
You are accessing your ring queue index variables (e.g. datInPoolIdx and datOutPoolIdx) outside of a locked region.
I admire all the work you put into this. But, I think you've got a bit too much complexity.
You really only need to access the index variables under lock.
Loosely ...
Tx thread should sleep if the ring queue is empty:
enqidx == deqidx
Gen thread should sleep if the ring queue is full:
((enqidx + 1) % DATPOOLSIZ) == deqidx
The pthread_cond_wait/pthread_cond_signal conditions should be based on a comparison of these values using something like the above.
Here's some pseudo code, based loosely on what you have. It doesn't have any condition variables, but I think you'll get the idea of how to add that if needed.
These functions only do byte-at-a-time. But, there is a way to modify them, so they produce a length and number of contiguous bytes either free or available so you can use memcpy to move a bunch of bytes into/out of the queue in bulk.
int
queue_wrap_idx(int idx)
{
idx += 1;
idx %= DATPOOLSIZ;
return idx;
}
// gen_avail -- space available in queue
// RETURNS: index of place to store (or -1=full)
int
gen_avail(void)
{
lock();
int idxenq = datInPoolIdx;
int idxdeq = datOutPoolIdx;
unlock();
int idxfull = queue_wrap_idx(idxenq);
if (idxfull == idxdeq)
idxenq = -1;
return idxenq;
}
// gen_advance -- advance generator queue index
void
gen_advance(void)
{
lock();
int idxenq = datInPoolIdx;
idxenq = queue_wrap_idx(idxenq);
datInPoolIdx = idxenq;
unlock();
}
// tx_avail -- data available in queue
// RETURNS: index of place to dequeue (or -1=empty)
int
tx_avail(void)
{
lock();
int idxenq = datInPoolIdx;
int idxdeq = datOutPoolIdx;
unlock();
if (idxdeq == idxenq)
idxdeq = -1;
return idxdeq;
}
// tx_advance -- advance transmitter queue index
void
tx_advance(void)
{
lock();
int idxdeq = datOutPoolIdx;
idxdeq = queue_wrap_idx(idxdeq);
datOutPoolIdx = idxdeq;
unlock();
}
// gen_thread -- data generator thread
void
gen_thread(void *ptr)
{
while (1) {
int idxenq = gen_avail();
if (idxenq >= 0) {
DAT[idxenq] = rand();
gen_advance();
}
}
}
// tx_thread -- serial port transmit thread
void
tx_thread(void *ptr)
{
while (1) {
int idxdeq = tx_avail();
if (idxdeq >= 0) {
char datval = DAT[idxdeq];
tx_advance();
write(serport,&datval,1);
}
}
}
UPDATE:
I can certainly change it, no biggie, but I don't think its the problem I'm experiencing.
You definitely have race conditions, as I've mentioned. Unfixed/latent race conditions appear very much like glitchy H/W. Until you fix these issues, you can't speculate further.
If I was overwriting the transmitted memory region via the GenDat thread then I would expect to see fragments on the receiving side. What I'm seeing is a single incorrect byte that is affecting my packaging of data.
Build a diagnostic mode [or two] ...
Just have GenThread send an incrementing stream of bytes (vs. rand/whatever). In this mode, skip the UART TX altogether. It is easy for the TxThread to notice a gap [because it should see the sequence 0x00-0xFF repeated indefinitely].
This will run the threads at a much faster rate and be much more likely to show up race conditions.
Remove all [debug] printf. They have locks that can perturb the timing so you're not measuring your [real] system, but the system with the printf. The printf calls are slow and mess things up.
You can add random nanosleep calls to stress the setup further. Be creative. Use the dark side of the force to create a test that is much worse than what the real system would experience.
You can have the GenThread deliberately send an out-of-sequence byte to verify that the TxThread can detect a gap.
When you've got all that working, run the diagnostic mode for a day or so to see what happens. In my experience, you'll see something usually within a few minutes to an hour. When I was doing testing, I'd run it overnight as an acceptance test.
It looks like the uart TX hardware is glitching, is it hardware? LOL yes!!(i'm software) but its most definitely software...somewhere.
Hmm ... Unlikely. I have direct [commercial/product] experience with UART TX on an RPi.
You might have inconsistent setup of RTS/CTS between sender system and receiver system. Or, the Tx/Rx clock frequencies / baud rates are slightly off.
The remote system might be overrunning the buffer (i.e.) it can't keep up with the [burst] data rate.
The receiver might be slow in processing the data. You should use hires timestamping (e.g. clock_gettime(CLOCK_MONOTONIC,...) to mark byte arrival, etc.
What I do for that is have a "trace buffer" that records "event" type and timestamp. Unfortunately [for you ;-)], I use a ring queue to record these events [using stdatomic.h functions such as atomic_compare_exchange_strong] to add/remove the trace entries.
So, you'd need a solid multithread ring queue implementation to save the trace entries [herein, a chicken-and-the-egg problem]. If the receiver is something like FreeRTOS, you'll have to deal with cli/sti and other bare metal considerations.
UPDATE #2:
I wrote a small perl script to analyze and compare your data. This analysis could also be done by creating two files that have the two digit hex values, one per line. Here is the diff -u output:
--- tx.txt 2021-02-23 10:38:22.295135431 -0500
+++ rx.txt 2021-02-23 10:38:22.295135431 -0500
## -10,6 +10,7 ##
AD
EB
C9
+0D
0A
B2
1D
## -52,4 +53,3 ##
1D
A7
8C
-FF
The data received at the device is identical to the generated sequence except when the generated data byte is:
0A
The receiver gets:
0D 0A
The host kernel TTY layer is sending <CR><LF> when it sees <LF>.
This is because when you were setting up the termios parameters, you did not set up "raw" mode correctly.
Specifically, you didn't disable "implementation-defined output processing" [per the example in man termios].
That is, in Init, you need to add:
options.c_oflag &= ~OPOST;
I'm trying to modify the UDP payload I receive from the client to clone and redirect the packet in order to respond to it. Swapping the MAC and IP addresses is already done, as well as the cloning, but I don't know how to modify the UDP payload.
I want to send a payload with the following 3 bytes: 0x1a 0x31 0x0f back to the client, either by creating a new payload (better) and replacing it in the packet or by replacement. How can I do that?
Here is my code so far:
UPDATED BASED ON COMMENTS (Now only needs checksum recalculation):
int pingpong(struct __sk_buff *skb)
{
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
struct ethhdr *eth = data;
struct iphdr *ip;
struct udphdr *udpdata;
if ((void *)eth + sizeof(*eth) > data_end) {
return TC_ACT_UNSPEC;
}
ip = data + sizeof(*eth);
if ((void *)ip + sizeof(*ip) > data_end) {
return TC_ACT_UNSPEC;
}
udpdata = (void *)ip + sizeof(*ip);
if ((void *)udpdata + sizeof(*udpdata) > data_end) {
return TC_ACT_UNSPEC;
}
if (eth->h_proto != htons(ETH_P_IP)) {
return TC_ACT_UNSPEC;
}
if (ip->protocol != IPPROTO_UDP) {
return TC_ACT_UNSPEC;
}
unsigned int payload_size;
unsigned char *payload;
payload_size = ntohs(udpdata->len) - sizeof(*udpdata);
payload = (unsigned char *)udpdata + sizeof(*udpdata);
if ((void *)payload + payload_size > data_end) {
return TC_ACT_UNSPEC;
}
// 1. Swap the MACs
__u8 tmp_mac[ETH_ALEN];
memcpy(tmp_mac, eth->h_dest, ETH_ALEN);
memcpy(eth->h_dest, eth->h_source, ETH_ALEN);
memcpy(eth->h_source, tmp_mac, ETH_ALEN);
// 2. Swap the IPs
if (eth->h_proto == htons(ETH_P_IP)) {
__u32 tmp_ip = ip->saddr;
ip->saddr = ip->daddr;
ip->daddr = tmp_ip;
}
// 3. Swap the ports
udpdata->source = port;
udpdata->dest = srcport;
// 4. Change the payload to be 0x1a 0x31 0x0f
bpf_skb_adjust_room(skb, -1, BPF_ADJ_ROOM_NET, 0);
uint8_t byte1 = 0x1a;
uint8_t byte2 = 0x31;
uint8_t byte3 = 0x0f;
int ret = bpf_skb_store_bytes(skb, payload_offset, &byte1, sizeof(byte1), 0);
ret = bpf_skb_store_bytes(skb, payload_offset+1, &byte2, sizeof(byte2), 0);
ret = bpf_skb_store_bytes(skb, payload_offset+2, &byte3, sizeof(byte3), 0);
// Re-calculate the checksum
bpf_l4_csum_replace(skb, L4_CSUM_OFF, datap[0], byte1, 0);
bpf_l4_csum_replace(skb, L4_CSUM_OFF, datap[1], byte2, 0);
bpf_l4_csum_replace(skb, L4_CSUM_OFF, datap[2], byte3, 0);
// Not working!
// Final: Redirect to be sent
bpf_clone_redirect(skb, skb->ifindex, 0);
}
If I just change the payload without removing the 1 byte using the adjust_room function, it is sent, but the last byte is 00. I want to remove that.
Any tips please? Thanks!
As suggested by #Qeole, you can use the following to change the size and content of your UDP payload:
// Remove 1 byte after IP header.
bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_NET, 0);
... re-check packet pointers or verifier will complain ...
// Need to rewrite the UDP header as the extra space was added before it.
*new_udphdr = *old_udphdr;
// Write payload.
udp_payload[0] = 0x1a
udp_payload[1] = 0x31
udp_payload[2] = 0x0f
I am trying to create a file with FatFs on USB flash, but my f_open call trying to read boot sector for first time file system mount hangs on this function.
DRESULT disk_read (
BYTE drv, /* Physical drive number (0) */
BYTE *buff, /* Pointer to the data buffer to store read data */
DWORD sector, /* Start sector number (LBA) */
BYTE count /* Sector count (1..255) */
)
{
BYTE status = USBH_MSC_OK;
if (drv || !count) return RES_PARERR;
if (Stat & STA_NOINIT) return RES_NOTRDY;
if(HCD_IsDeviceConnected(&USB_OTG_Core))
{
do
{
status = USBH_MSC_Read10(&USB_OTG_Core, buff,sector,512 * count);
USBH_MSC_HandleBOTXfer(&USB_OTG_Core ,&USB_Host);
if(!HCD_IsDeviceConnected(&USB_OTG_Core))
{
return RES_ERROR;
}
}
while(status == USBH_MSC_BUSY ); // Loop which create hanging state
}
if(status == USBH_MSC_OK)
return RES_OK;
return RES_ERROR;
}
The main problem is the loop which creates hanging state
while(status == USBH_MSC_BUSY );
So I do not know what to do to avoid this. Using debugger I discover that state is caused by parameter CmdStateMachine of structure USBH_MSC_BOTXferParam, type USBH_BOTXfer_TypeDef is equal CMD_UNINITIALIZED_STATE which actually cause miss up of switch statement of USBH_MSC_Read10 function.
/**
* #brief USBH_MSC_Read10
* Issue the read command to the device. Once the response received,
* it updates the status to upper layer
* #param dataBuffer : DataBuffer will contain the data to be read
* #param address : Address from which the data will be read
* #param nbOfbytes : NbOfbytes to be read
* #retval Status
*/
uint8_t USBH_MSC_Read10(USB_OTG_CORE_HANDLE *pdev,
uint8_t *dataBuffer,
uint32_t address,
uint32_t nbOfbytes)
{
uint8_t index;
static USBH_MSC_Status_TypeDef status = USBH_MSC_BUSY;
uint16_t nbOfPages;
status = USBH_MSC_BUSY;
if(HCD_IsDeviceConnected(pdev))
{
switch(USBH_MSC_BOTXferParam.CmdStateMachine)
{
case CMD_SEND_STATE:
/*Prepare the CBW and relevant field*/
USBH_MSC_CBWData.field.CBWTransferLength = nbOfbytes;
USBH_MSC_CBWData.field.CBWFlags = USB_EP_DIR_IN;
USBH_MSC_CBWData.field.CBWLength = CBW_LENGTH;
USBH_MSC_BOTXferParam.pRxTxBuff = dataBuffer;
for(index = CBW_CB_LENGTH; index != 0; index--)
{
USBH_MSC_CBWData.field.CBWCB[index] = 0x00;
}
USBH_MSC_CBWData.field.CBWCB[0] = OPCODE_READ10;
/*logical block address*/
USBH_MSC_CBWData.field.CBWCB[2] = (((uint8_t*)&address)[3]);
USBH_MSC_CBWData.field.CBWCB[3] = (((uint8_t*)&address)[2]);
USBH_MSC_CBWData.field.CBWCB[4] = (((uint8_t*)&address)[1]);
USBH_MSC_CBWData.field.CBWCB[5] = (((uint8_t*)&address)[0]);
/*USBH_MSC_PAGE_LENGTH = 512*/
nbOfPages = nbOfbytes/ USBH_MSC_PAGE_LENGTH;
/*Tranfer length */
USBH_MSC_CBWData.field.CBWCB[7] = (((uint8_t *)&nbOfPages)[1]) ;
USBH_MSC_CBWData.field.CBWCB[8] = (((uint8_t *)&nbOfPages)[0]) ;
USBH_MSC_BOTXferParam.BOTState = USBH_MSC_SEND_CBW;
/* Start the transfer, then let the state machine
manage the other transactions */
USBH_MSC_BOTXferParam.MSCState = USBH_MSC_BOT_USB_TRANSFERS;
USBH_MSC_BOTXferParam.BOTXferStatus = USBH_MSC_BUSY;
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_WAIT_STATUS;
status = USBH_MSC_BUSY;
break;
case CMD_WAIT_STATUS:
if((USBH_MSC_BOTXferParam.BOTXferStatus == USBH_MSC_OK) && \
(HCD_IsDeviceConnected(pdev)))
{
/* Commands successfully sent and Response Received */
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_SEND_STATE;
status = USBH_MSC_OK;
}
else if (( USBH_MSC_BOTXferParam.BOTXferStatus == USBH_MSC_FAIL ) && \
(HCD_IsDeviceConnected(pdev)))
{
/* Failure Mode */
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_SEND_STATE;
}
else if ( USBH_MSC_BOTXferParam.BOTXferStatus == USBH_MSC_PHASE_ERROR )
{
/* Failure Mode */
USBH_MSC_BOTXferParam.CmdStateMachine = CMD_SEND_STATE;
status = USBH_MSC_PHASE_ERROR;
}
else
{
/* Wait for the Commands to get Completed */
/* NO Change in state Machine */
}
break;
default:
break;
}
}
return status;
}
Here is USBH_BOTXfer_TypeDef type declaration;
typedef struct _BOTXfer
{
uint8_t MSCState;
uint8_t MSCStateBkp;
uint8_t MSCStateCurrent;
uint8_t CmdStateMachine;
uint8_t BOTState;
uint8_t BOTStateBkp;
uint8_t* pRxTxBuff;
uint16_t DataLength;
uint8_t BOTXferErrorCount;
uint8_t BOTXferStatus;
} USBH_BOTXfer_TypeDef;
During the debug I discover that all fields of it is 0x00.
Here are my FatFs calls
int main(void)
{
FATFS Fat;
FIL file;
FRESULT fr;
RCC->AHB1ENR |= RCC_AHB1ENR_GPIODEN;
/* Enable SWO output */
DBGMCU->CR = 0x00000020;
GPIOD->MODER=0x55000000;
GPIOD->OTYPER = 0x00000000;
GPIOD->OSPEEDR = 0x00000001;
while(1)
{
if (!USB_MSC_IsInitialized())
{
USB_MSC_Initialize();
}
if (USB_MSC_IsConnected())
{
GPIOD->ODR = (1 << 15);
disk_initialize(0);
fr = f_mount(0, &Fat);
if(fr == FR_OK)
{
fr = f_open(&file,"0:DP_lab8.pdf",(FA_CREATE_ALWAYS | FA_WRITE));
if (fr == FR_OK)
{
f_close(&file);
}
f_mount(0, NULL);
}
}
else
{
GPIOD->ODR = (1 << 14);
}
USB_MSC_Main();
}
}
USB_MSC_IsConnected function is:
int USB_MSC_IsConnected(void)
{
if (g_USB_MSC_HostStatus == USB_DEV_NOT_SUPPORTED)
{
USB_MSC_Uninitialize();
}
return !(g_USB_MSC_HostStatus == USB_DEV_DETACHED ||
g_USB_MSC_HostStatus == USB_HOST_NO_INIT ||
g_USB_MSC_HostStatus == USB_DEV_NOT_SUPPORTED);
}
And device states are:
typedef enum
{
USB_HOST_NO_INIT = 0, /* USB interface not initialized */
USB_DEV_DETACHED, /* no device connected */
USB_SPEED_ERROR, /* unsupported USB speed */
USB_DEV_NOT_SUPPORTED, /* unsupported device */
USB_DEV_WRITE_PROTECT, /* device is write protected */
USB_OVER_CURRENT, /* overcurrent detected */
USB_DEV_CONNECTED /* device connected and ready */
} USB_HostStatus;
The value of g_USB_MSC_HostStatus is received by standard USB HOST user callbacks.
I think it is a bug in ST host library. I've hunted it down, as my usb host was unable to pass enumeration stage. After a fix the stack is Ok.
There is union _USB_Setup in usbh_def.h file in "STM32Cube/Repository/STM32Cube_FW_F7_V1.13.0/Middlewares/ST/STM32_USB_Host_Library/Core/Inc" (any chip, not only F7, any version, not only V1.13.0). It has uint16_t bmRequestType and uint16_t bRequest. These two fileds must be uint8_t as of USB specs. Fixing this issue made usb host go as needed. Enumeration stage passes ok, and all other stages as well.