network device No buffer Space - c

Hello i created Network driver that uses the uart port to send and recive.
My driver works with some issues. I was able to ping but always afther a few pings i get
ping: sendmsg: No buffer space available driver
I checked the kernel logs but i could not see anything.
this is how i recive data:
struct stm32_port *stm32_port = netdev_priv(my_net);
struct sk_buff *skb;
unsigned char *dma_start;
dma_start = stm32_port->rx_buf + (RX_BUF_L - stm32_port->last_res);
print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1, dma_start, 16, true);
skb = dev_alloc_skb(dma_size + 2);
if (!skb) {
if (printk_ratelimit( ))
printk(KERN_NOTICE "snull rx: low on mem - packet dropped\n");
my_net->stats.rx_dropped++;
//goto error;
}
memcpy(skb_put(skb, dma_size), dma_start, dma_size);
/* Write metadata, and then pass to the receive level */
skb->dev = my_net;
skb->protocol = eth_type_trans(skb, my_net);
skb->ip_summed = CHECKSUM_NONE; // let the OS check the checksum
my_net->stats.rx_packets++;
my_net->stats.rx_bytes += dma_size;
netif_rx(skb);
port->icount.rx += dma_size;
stm32_port->last_res -= dma_size;
if (stm32_port->last_res == 0)
stm32_port->last_res = RX_BUF_L; //dma_count
Here is how i send my data:
struct stm32_port *lp = netdev_priv(ndev);
struct uart_port *port = &lp->port;
struct sk_buff *sk_buff;
struct dma_async_tx_descriptor *desc = NULL;
struct stm32_usart_offsets *ofs = &lp->info->ofs;
unsigned pktlen = skb->len;
dma_cookie_t cookie;
int ret = 0;
//print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, 16, true);
netif_stop_queue(ndev);
sk_buff = skb_get(skb);
if (ofs->icr == UNDEF_REG){
stm32_usart_clr_bits(port, ofs->isr, USART_SR_TC);
}else{
writel_relaxed(USART_ICR_TCCF, port->membase + ofs->icr);
}
memcpy(&lp->tx_buf[0], sk_buff->data, pktlen);
desc = dmaengine_prep_slave_single(lp->tx_ch,
lp->tx_dma_buf,
pktlen,
DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT);
if (!desc){
goto fallback_err;
}
cookie = dmaengine_submit(desc);
ret = dma_submit_error(cookie);
if (ret) {
/* dma no yet started, safe to free resources */
dmaengine_terminate_async(lp->tx_ch);
goto fallback_err;
}
/* Issue pending DMA TX requests */
dma_async_issue_pending(lp->tx_ch);
stm32_usart_set_bits(port, ofs->cr3, USART_CR3_DMAT);
/* rely on TXE irq (mask or unmask) for sending remaining data */
stm32_usart_tx_interrupt_disable(port);
ndev->stats.tx_packets++;
ndev->stats.tx_bytes += pktlen;
fallback_err:
skb_tx_timestamp(skb);
dev_kfree_skb (skb);
netif_start_queue(ndev);
return NETDEV_TX_OK;

Thanks to #stark i found a solution i free the buffer now with the __kfree_skb() methode because like that the refcount will not be checked.

Related

Netfilter hook stateful connection packet filtering

I am writing a Netfilter hook and want to do a stateful analysis of incoming TCP packets, whether they belong to an existing connection or a new connection is starting.
This is my first try at writing code using Netfilter and after reading https://people.netfilter.org/pablo/docs/login.pdf I understand I need to check if a packet is categorized as a NEW or ESTABLISHED state. But I cannot find any documentation of how to write code for this.
static unsigned int hfunc(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) {
struct iphdr *iph;
struct udphdr *udph;
if (!skb)
return NF_ACCEPT;
iph = ip_hdr(skb);
if (iph->protocol == IPPROTO_TCP) {
/*
if packet SYN flag is enabled and state==NEW:
return NF_ACCEPT
else if SYN flag is disabled and state==NEW:
return NF_DROP
*/
}
return NF_ACCEPT
}
static int __init my_net_module_init(void) {
printk(KERN_INFO "Initializing my netfilter module\n");
// Allocating memory for hook structure.
my_nf_hook = (struct nf_hook_ops*) kzalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
// Constructing the structure
my_nf_hook->hook = (nf_hookfn*)hfunc; /* hook function */
my_nf_hook->hooknum = NF_INET_PRE_ROUTING; /* received packets */
my_nf_hook->pf = PF_INET; /* IPv4 */
my_nf_hook->priority = NF_IP_PRI_FIRST; /* max hook priority */
nf_register_net_hook(&init_net, my_nf_hook);
return 0;
}
static void __exit my_net_module_exit(void) {
nf_unregister_net_hook(&init_net, my_nf_hook);
kfree(my_nf_hook);
printk(KERN_INFO "Exiting my netfilter module\n");
}
module_init(my_net_module_init);
module_exit(my_net_module_exit);
Edit:
Added code snippet for registering hook in pre-routing.
Seems that in your hook you want to make a decision on packet based on conntrack(CT) info about the connection state - to block (drop) all the TCP packets which are in the middle of connection, i.e. packets both without SYN flag and without connection entry in CT.
So if you want to reap the benefits of CT, you have to let him work a bit.
Now your hook is in NF_INET_PRE_ROUTING with NF_IP_PRI_FIRST priority. Just look at the picture of Linux kernel packet flow. If we talk about pre-routing chain CT-handling is somewhere after RAW table (i.e. with a lower priority).
The list of priorities you can see here:
enum nf_ip_hook_priorities {
NF_IP_PRI_FIRST = INT_MIN,
NF_IP_PRI_CONNTRACK_DEFRAG = -400,
NF_IP_PRI_RAW = -300,
NF_IP_PRI_SELINUX_FIRST = -225,
NF_IP_PRI_CONNTRACK = -200,
NF_IP_PRI_MANGLE = -150,
NF_IP_PRI_NAT_DST = -100,
NF_IP_PRI_FILTER = 0,
NF_IP_PRI_SECURITY = 50,
NF_IP_PRI_NAT_SRC = 100,
NF_IP_PRI_SELINUX_LAST = 225,
NF_IP_PRI_CONNTRACK_HELPER = 300,
NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX,
NF_IP_PRI_LAST = INT_MAX,
};
Thus to stick in after CT (after nf_conntrack_in()) you must register your hook with priority lower than NF_IP_PRI_CONNTRACK (i.e. with greater number, e.g. -50).
So you do:
static struct nf_hook_ops hooks[] __read_mostly = {
{
.hook = hfunc,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK + 150
},
// ...
};
// ...
int ret;
ret = nf_register_hooks(hooks, ARRAY_SIZE(hooks));
if (ret < 0)
// error
Then you should access the CT info from within your hook:
static unsigned int hfunc(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state) {
struct iphdr *iph;
iph = ip_hdr(skb);
if (iph->protocol == IPPROTO_TCP) {
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct tcphdr *tcph;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return NF_ACCEPT;
tcph = tcp_hdr(skb)
if (tcph->syn) { // && !tcph->ack ???
if (ctinfo == IP_CT_NEW)
return NF_ACCEPT;
} else {
if (ctinfo == IP_CT_NEW)
return NF_DROP;
}
}
return NF_ACCEPT
}
Also remember that CT must be involved in your Linux kernel network processing. There should be CT modules inserted into kernel and an appropriate iptables rule added.

Writing to page mapped dmas in kernel

I've been working on modifying the intel ixgbe kernel driver to function with my PCIe device (FPGA but that's not super important). The kernel and the PCIe device all negotiate quite well, configuration headers are passed along and communication seems to function. However attempting to write DMA_FROM_DEVICE I have a slight problem that I don't understand and I'm hoping for help.
rx_ring->desc = dma_alloc_coherent(dev, ///This function allocates dma space of size size for handle dma on device dev with flag GFP KERNEL
rx_ring->size,
&rx_ring->dma, ///This dma handle may be cast to unsigned integer of the same bus width and given to dev as the DMA base address
GFP_KERNEL);
page = dev_alloc_pages(0);
dma = dma_map_page(rx_ring->dev, page, 0, acc_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
//Writing to the PCI device the base address to place data into.
writel(q_vector->adapter->rx_ring[0]->dma >> 32, q_vector->adapter->hw_region2.hw_addr+0x08+ACC_PCI_IPCONT_DATA_OFFSET);
writel(q_vector->adapter->rx_ring[0]->dma & 0xFFFFFFFF, q_vector->adapter->hw_region2.hw_addr+0x0C+ACC_PCI_IPCONT_DATA_OFFSET);
//This will perfectly read data I place onto the PCIe bus.
rx_ring->desc->wb.upper.length
//This seems to read some garbage memory.
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_buffer->dma,
rx_buffer->page_offset,
acc_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
unsigned char *va = page_address(page) + rx_buffer->page_offset;
memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
//Some code later
dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma,
new_buff->page_offset,
acc_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
I've tried to purge code down to just the points of interest but here's the brief run down. I allocate space for the dma creating the virtual and bus address via the dma_alloc_coherent function. I create a page of memory for the dma and map this page to the dma via the dev_alloc_pages and dma_map_page commands. I pass the dma bus address to my PCIe device so it can write to the proper offset via the writel commands (I know iowrite32 but this is on redhat).
From here there are 2 ways that the origonal ixgbe driver reads data from the PCIe bus. First it directly reads from the dma's allocated virtual address (desc), but this is only used for configuration information (in the driver I am working off of). The second method is via use page_address(page) to I believe get a virtual address for the page of memory. The problem is there is only garbage memory there.
So here is my confusion. Where is page pointing to and how do I place data into page via the PCI bus? I assumed that dma_map_page would sort of merge the 2 virtual addresses into 1 so my write into the dma's bus address would collide into the page but this doesn't seem to be the case. What base address should my PCI device be writing from to align into this page of memory?
I'm working on redhat, specifically Centos kernel version 3.10.0 which makes for some problems since redhat kernel is very different from base kernel but hopefully someone can help. Thank you for any pointers.
EDIT: Added dma_sync calls which I forgot to include in original post.
EDIT2: Added a more complete code base. As a note I'm still not including some of the struct definitions or top function calls (like probe for instance), but hopefully this will be a lot more complete. Sorry for how long it is.
//These functions are called during configuration
int acc_setup_rx_resources(struct acc_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
int orig_node = dev_to_node(dev);
int numa_node = -1;
int size;
size = sizeof(struct acc_rx_buffer) * rx_ring->count;
if (rx_ring->q_vector)
numa_node = rx_ring->q_vector->numa_node;
rx_ring->rx_buffer_info = vzalloc_node(size, numa_node);
if (!rx_ring->rx_buffer_info)
rx_ring->rx_buffer_info = vzalloc(size);
if (!rx_ring->rx_buffer_info)
goto err;
/* Round up to nearest 4K */
rx_ring->size = rx_ring->count * sizeof(union acc_adv_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
set_dev_node(dev, numa_node);
rx_ring->desc = dma_alloc_coherent(dev,
rx_ring->size,
&rx_ring->dma,
GFP_KERNEL);
set_dev_node(dev, orig_node);
if (!rx_ring->desc)
rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
if (!rx_ring->desc)
goto err;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
return 0;
err:
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
return -ENOMEM;
}
static bool acc_alloc_mapped_page(struct acc_ring *rx_ring,
struct acc_rx_buffer *bi)
{
struct page *page = bi->page;
dma_addr_t dma = bi->dma;
if (likely(page))
return true;
page = dev_alloc_pages(0);
if(unlikely(!page)){
rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
}
/* map page for use */
dma = dma_map_page(rx_ring->dev, page, 0,
acc_rx_pg_size(rx_ring), DMA_FROM_DEVICE);
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, acc_rx_pg_order(rx_ring));
bi->page = NULL;
rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
page_ref_add(page, USHRT_MAX - 1); //This seems to exist in redhat kernel but not 3.10 base kernel... keep?
return true;
}
void acc_alloc_rx_buffers(struct acc_ring *rx_ring, u16 cleaned_count)
{
union acc_adv_rx_desc *rx_desc;
struct acc_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
printk(KERN_INFO "acc Attempting to allocate rx buffers\n");
/* nothing to do */
if (!cleaned_count)
return;
rx_desc = ACC_RX_DESC(rx_ring, i);
bi = &rx_ring->rx_buffer_info[i];
i -= rx_ring->count;
do {
if (!acc_alloc_mapped_page(rx_ring, bi)){
printk(KERN_INFO "acc Failed to allocate and map the page to dma\n");
break;
}
printk(KERN_INFO "acc happily allocated and mapped page to dma\n");
/*
* Refresh the desc even if buffer_addrs didn't change
* because each write-back erases this info.
*/
rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc++;
bi++; ///Move to the next buffer
i++;
if (unlikely(!i)) {
rx_desc = ACC_RX_DESC(rx_ring, 0);
bi = rx_ring->rx_buffer_info;
i -= rx_ring->count;
}
/* clear the hdr_addr for the next_to_use descriptor */
rx_desc->read.hdr_addr = 0;
cleaned_count--;
} while (cleaned_count);
i += rx_ring->count;
if (rx_ring->next_to_use != i)
acc_release_rx_desc(rx_ring, i);
}
//This function is called via a napi_schedule command which fires when an MSI interrupt is thrown from my PCIe device (all works fine).
int acc_poll(struct napi_struct *napi, int budget)
{
struct acc_q_vector *q_vector =
container_of(napi, struct acc_q_vector, napi);
struct acc_adapter *adapter = q_vector->adapter;
struct acc_ring *ring;
int per_ring_budget;
bool clean_complete = true;
e_dev_info("Landed in acc_poll\n");
e_dev_info("Attempting to read register space 0x00=%x\t0x04=%x\n", \
readl(q_vector->adapter->hw.hw_addr), readl(q_vector->adapter->hw.hw_addr+0x04));
e_dev_info("Attempting to write to pci ctl\n");
e_dev_info("Target address %.8x%.8x\n",q_vector->adapter->rx_ring[0]->dma >> 32, q_vector->adapter->rx_ring[0]->dma & 0xFFFFFFFF);
e_dev_info("Attempted page address %.8x%.8x\n",virt_to_bus(page_address(q_vector->adapter->rx_ring[0]->rx_buffer_info[0].page)) >> 32, virt_to_bus(page_address(q_vector->adapter->rx_ring[0]->rx_buffer_info[0].page)) & 0xFFFFFFFF);
writeq(0x0000000000000001, q_vector->adapter->hw_region2.hw_addr+ACC_PCI_IPCONT_DATA_OFFSET); //These are supposed to be iowrite64 but it seems iowrite64 is different in redhat and only supports the copy function (to,from,size). yay redhat think different.
writel(q_vector->adapter->rx_ring[0]->dma >> 32, q_vector->adapter->hw_region2.hw_addr+0x08+ACC_PCI_IPCONT_DATA_OFFSET);
writel(q_vector->adapter->rx_ring[0]->dma & 0xFFFFFFFF, q_vector->adapter->hw_region2.hw_addr+0x0C+ACC_PCI_IPCONT_DATA_OFFSET);
writel(virt_to_bus(page_address(q_vector->adapter->rx_ring[0]->rx_buffer_info[0].page)) >> 32, q_vector->adapter->hw_region2.hw_addr+0x10+ACC_PCI_IPCONT_DATA_OFFSET);
writel(virt_to_bus(page_address(q_vector->adapter->rx_ring[0]->rx_buffer_info[0].page)) & 0xFFFFFFFF, q_vector->adapter->hw_region2.hw_addr+0x14+ACC_PCI_IPCONT_DATA_OFFSET);
writeq(0xFF00000000000000, q_vector->adapter->hw_region2.hw_addr+0x18+ACC_PCI_IPCONT_DATA_OFFSET);
writeq(0x0000000CC0000000, q_vector->adapter->hw_region2.hw_addr+0x20+ACC_PCI_IPCONT_DATA_OFFSET);
writeq(0x0000000CC0000000, q_vector->adapter->hw_region2.hw_addr+0x28+ACC_PCI_IPCONT_DATA_OFFSET);
writeq(0x0003344000005500, q_vector->adapter->hw_region2.hw_addr+0x30+ACC_PCI_IPCONT_DATA_OFFSET);
//Send the start command to the block
writeq(0x0000000000000001, q_vector->adapter->hw_region2.hw_addr);
acc_for_each_ring(ring, q_vector->tx)
clean_complete &= !!acc_clean_tx_irq(q_vector, ring);
if (q_vector->rx.count > 1)
per_ring_budget = max(budget/q_vector->rx.count, 1);
else
per_ring_budget = budget;
acc_for_each_ring(ring, q_vector->rx){
e_dev_info("Calling clean_rx_irq\n");
clean_complete &= acc_clean_rx_irq(q_vector, ring,
per_ring_budget);
}
/* If all work not completed, return budget and keep polling */
if (!clean_complete)
return budget;
e_dev_info("Clean complete\n");
/* all work done, exit the polling mode */
napi_complete(napi);
if (adapter->rx_itr_setting & 1)
acc_set_itr(q_vector);
if (!test_bit(__ACC_DOWN, &adapter->state))
acc_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));
e_dev_info("Exiting acc_poll\n");
return 0;
}
static bool acc_clean_rx_irq(struct acc_q_vector *q_vector,
struct acc_ring *rx_ring,
const int budget)
{
printk(KERN_INFO "acc Entered clean_rx_irq\n");
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = acc_desc_unused(rx_ring); /// First pass this is count-1 because ntc and ntu are 0 so this is 512-1=511
printk(KERN_INFO "acc RX irq Clean count = %d\n", cleaned_count);
do {
union acc_adv_rx_desc *rx_desc;
struct sk_buff *skb;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= ACC_RX_BUFFER_WRITE) { //When the clean count is >16 allocate some more buffers to get the clean count down. First pass this happens.
acc_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
rx_desc = ACC_RX_DESC(rx_ring, rx_ring->next_to_clean);
printk(KERN_INFO "acc inside RX do while, acquired description\n");
printk(KERN_INFO "acc Everything I can about the rx_ring desc (acc_rx_buffer). status_error=%d\t \
length=%d\n", rx_desc->wb.upper.status_error, rx_desc->wb.upper.length);
if (!acc_test_staterr(rx_desc, ACC_RXD_STAT_DD))
break;
printk(KERN_INFO "acc inside RX past status_error check\n");
/*
* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
* RXD_STAT_DD bit is set
*/
rmb();
/* retrieve a buffer from the ring */
skb = acc_fetch_rx_buffer(rx_ring, rx_desc);
/* exit if we failed to retrieve a buffer */
if (!skb)
break;
printk(KERN_INFO "acc successfully retrieved a buffer\n");
cleaned_count++;
/* place incomplete frames back on ring for completion */
if (acc_is_non_eop(rx_ring, rx_desc, skb))
continue;
/* verify the packet layout is correct */
if (acc_cleanup_headers(rx_ring, rx_desc, skb))
continue;
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
/* populate checksum, timestamp, VLAN, and protocol */
acc_process_skb_fields(rx_ring, rx_desc, skb);
acc_rx_skb(q_vector, skb); ///I believe this sends data to the kernel network stuff and then the generic OS
/* update budget accounting */
total_rx_packets++;
} while (likely(total_rx_packets < budget));
printk(KERN_INFO "acc rx irq exited the while loop\n");
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.packets += total_rx_packets;
rx_ring->stats.bytes += total_rx_bytes;
u64_stats_update_end(&rx_ring->syncp);
q_vector->rx.total_packets += total_rx_packets;
q_vector->rx.total_bytes += total_rx_bytes;
if (cleaned_count)
acc_alloc_rx_buffers(rx_ring, cleaned_count);
printk(KERN_INFO "acc rx irq returning happily\n");
return (total_rx_packets < budget);
}
static struct sk_buff *acc_fetch_rx_buffer(struct acc_ring *rx_ring,
union acc_adv_rx_desc *rx_desc)
{
struct acc_rx_buffer *rx_buffer;
struct sk_buff *skb;
struct page *page;
printk(KERN_INFO "acc Attempting to fetch rx buffer\n");
rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
page = rx_buffer->page; //This page is set by I think acc_add_rx_frag... hard to tell. yes the page is created there and kind of linked to the dma via dma_map_page
prefetchw(page); ///Prefetch the page cacheline for writing
skb = rx_buffer->skb; ///This does the mapping between skb and dma page table I believe.
if (likely(!skb)) {
printk(KERN_INFO "acc attempting to allocate netdrv space for page.\n");
void *page_addr = page_address(page) + //get the virtual page address of this page.
rx_buffer->page_offset;
/* prefetch first cache line of first page */
prefetch(page_addr);
#if L1_CACHE_BYTES < 128
prefetch(page_addr + L1_CACHE_BYTES);
#endif
/* allocate a skb to store the frags */
skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
ACC_RX_HDR_SIZE);
if (unlikely(!skb)) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
return NULL;
}
/*
* we will be copying header into skb->data in
* pskb_may_pull so it is in our interest to prefetch
* it now to avoid a possible cache miss
*/
prefetchw(skb->data);
/*
* Delay unmapping of the first packet. It carries the
* header information, HW may still access the header
* after the writeback. Only unmap it when EOP is
* reached
*/
if (likely((rx_desc, ACC_RXD_STAT_EOP)))
goto dma_sync;
ACC_CB(skb)->dma = rx_buffer->dma;
} else {
if (acc_test_staterr(rx_desc, ACC_RXD_STAT_EOP))
acc_dma_sync_frag(rx_ring, skb);
dma_sync:
/* we are reusing so sync this buffer for CPU use */
printk(KERN_INFO "acc attempting to sync the dma and the device.\n");
dma_sync_single_range_for_cpu(rx_ring->dev, //Sync to the pci device, this dma buffer, at this page offset, this ring, for device to DMA transfer
rx_buffer->dma,
rx_buffer->page_offset,
acc_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
}
/* pull page into skb */
if (acc_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
//This is again temporary to try and create blockers around the problem.
return skb;
/* hand second half of page back to the ring */
acc_reuse_rx_page(rx_ring, rx_buffer);
} else if (ACC_CB(skb)->dma == rx_buffer->dma) {
/* the page has been released from the ring */
ACC_CB(skb)->page_released = true;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page(rx_ring->dev, rx_buffer->dma,
acc_rx_pg_size(rx_ring),
DMA_FROM_DEVICE);
}
/* clear contents of buffer_info */
rx_buffer->skb = NULL;
rx_buffer->dma = 0;
rx_buffer->page = NULL;
printk(KERN_INFO "acc returning from fetch_rx_buffer.\n");
return skb;
}
static bool acc_add_rx_frag(struct acc_ring *rx_ring,
struct acc_rx_buffer *rx_buffer,
union acc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
printk(KERN_INFO "acc Attempting to add rx_frag from page.\n");
struct page *page = rx_buffer->page;
unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
#if (PAGE_SIZE < 8192)
unsigned int truesize = acc_rx_bufsz(rx_ring);
#else
unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
unsigned int last_offset = acc_rx_pg_size(rx_ring) -
acc_rx_bufsz(rx_ring);
#endif
if ((size <= ACC_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
printk(KERN_INFO "acc Inside the size check.\n");
unsigned char *va = page_address(page) + rx_buffer->page_offset;
printk(KERN_INFO "page:%p\tpage_address:%p\tpage_offset:%d\n",page,page_address(page),rx_buffer->page_offset);
printk(KERN_INFO "acc First 4 bytes of string:%x %x %x %x\n",va[0],va[1],va[2],va[3]); //FIXME: I can now read this page table but there is still no meaningful data in it. (appear to be reading garbage)
printk(KERN_INFO "acc 32 bytes in:%x %x %x %x\n",va[32],va[33],va[34],va[35]);
return true;
memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
/* we can reuse buffer as-is, just make sure it is local */
if (likely(page_to_nid(page) == numa_node_id()))
return true;
/* this page cannot be reused so discard it */
put_page(page);
return false;
}
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
rx_buffer->page_offset, size, truesize);
/* avoid re-using remote pages */
if (unlikely(page_to_nid(page) != numa_node_id()))
return false;
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
if (unlikely(page_count(page) != 1))
return false;
/* flip page offset to other buffer */
rx_buffer->page_offset ^= truesize;
/*
* since we are the only owner of the page and we need to
* increment it, just set the value to 2 in order to avoid
* an unecessary locked operation
*/
atomic_set(&page->_count, 2);
#else
/* move offset up to the next cache line */
rx_buffer->page_offset += truesize;
if (rx_buffer->page_offset > last_offset)
return false;
/* bump ref count on page before it is given to the stack */
get_page(page);
#endif
return true;
}

Bogus TCP Header Length When Examining Packets in Tshark

I was trying to send a TCP SYN packet to a server on my machine on port 8000. Then, I wanted to check if the server responded with a SYN ACK. If this was the case, then I would send back a RST packet to abort the connection. However, when I sniff the SYN packet that I send out it tells me the TCP header has a bogus length of 0, which isn't the case. The sniffer I used was tshark, by the way. Here's my code:
In the main function, I run this:
FLAGS f = SYN;
tcp_scan("127.0.0.1",8000,f,0);
This function assembles the IP header:
struct iphdr* assemble_ip(char* dest,unsigned int proto) {
/* Assemble IP Layer */
struct iphdr* iph;
iph = malloc(sizeof(struct iphdr)); // allocate memory
if (iph == NULL) { // if the ip header is NULL
err();
return NULL;
}
srand((unsigned int)time(NULL)); // seed random number generator
/* Hardcoded values */
iph->version = 4; // the version
iph->tos = 0; // type of services
iph->ihl = 5; // internet header length
iph->id = htons(rand() % 65536); // random id
iph->ttl = rand() % 257; // ttl
iph->frag_off = 0; // fragment offset
if (iph->ttl < 64) iph->ttl += 64; // if TTL is not sufficient
iph->tot_len = htons(iph->ihl*4); // the internet header length
/* User defined values */
iph->saddr = inet_addr(client); // source address
iph->daddr = inet_addr(dest); // destination address
iph->protocol = proto; // protocol
iph->check = 0; // set to zero for later calculation
return iph;
}
This function assembles the TCP header:
struct tcphdr* assemble_tcp(unsigned int sport,unsigned int dport,FLAGS f) {
/* Assemble TCP layer */
struct tcphdr* tcph;
tcph = malloc(sizeof(struct tcphdr)); // allocate tcp header
if (tcph == NULL) { // if tcp is NULL
err();
return NULL;
}
bzero(tcph,sizeof(struct tcphdr));
srand((unsigned int)time(NULL)); // seed random number generator
/* Hardcoded values */
tcph->seq = htonl(rand() % 65001); // generate random sequence number
tcph->ack_seq = 0; // ack sequence should be 0
tcph->doff = 5; // set data offset
tcph->window = htons(rand() % 65536); // set window size
/* Increase values by random value above 64 */
if (ntohs(tcph->seq) < 64) tcph->seq += (rand() % 101 + 64);
if (ntohs(tcph->window) < 64) tcph->window += (rand() % 101 + 64);
/* User-defined values */
tcph->source = htons(sport); // source port
tcph->dest = htons(dport); // destination port
tcph = set_flags(tcph,f); // set the TCP flags
/* Set urgent ptr if URG flag is set*/
if (tcph->urg == 1) tcph->urg_ptr = 1;
else tcph->urg_ptr = 0;
tcph->check = 0; // set the checksum to 0 for other calculations
return tcph;
}
Also, I do compute the checksum of the headers. For my purposes, when calculating the checksum, the IP header is always 20 bytes long, since I'm not sending any data or options. That means that there are 10 16-bit words in the header. The TCP header is also going to be 20 bytes long since I didn't add any options or data. Here's the code:
unsigned short ip_checksum(struct iphdr* iph) {
/* Acquire IP checksum */
/*
Checksum for Internet Protocol:
One's complement of the one's complement sum of the 16 bit words in the header.
So we get the first 16 bits of the header then add it to the sum, and then
we get the next 16 bits, and add it, and so on.
...0100101010110101 -> "..." represents more bits
1111111111111111 -> this is 131071 in base 10
0000100101010110101 -> notice how the "..." bits are now 0's
*/
/* One's complement sum */
unsigned long long* ptr;
unsigned long long hdr;
unsigned short sum = 0;
unsigned long mask = 131071;
ptr = (unsigned long long*)iph; // cast structure
hdr = *ptr; // get hdr
for (int i = 0; i < 10; i++) { // 20 bytes -> 160 bits / 16 bits = 10 words
sum += (hdr & mask); // add to sum
hdr >>= 16; // shift the next 16 bits
}
sum = ~sum; // inverse
return sum;
}
TCP Checksum:
unsigned short tcp_checksum(struct tcphdr* tcph,struct iphdr* iph) {
/* Calculate TCP checksum */
struct pseudo_hdr* pseudo_hdr;
u_char* buffer;
u_char* segment;
u_char* pseudo_segment;
unsigned short sum = 0;
unsigned long mask = 131071;
unsigned long long* ptr;
unsigned long long hdr;
pseudo_hdr = malloc(sizeof(struct pseudo_hdr)); // allocate memory
buffer = malloc(32); // allocate for 32 bytes of information
if (pseudo_hdr == NULL || buffer == NULL) { // if memory wasn't allocated properly
err();
if (pseudo_hdr != NULL) free(pseudo_hdr);
if (buffer != NULL) free(buffer);
return 0;
}
pseudo_hdr->saddr = (unsigned long)iph->saddr; // we add the cast because the fields if of type u_int_32
pseudo_hdr->daddr = (unsigned long)iph->daddr; // same reason for adding the cast as above
memset(&pseudo_hdr->reserved,0,8); // set these 8 bits to 0
pseudo_hdr->proto = IPPROTO_TCP; // this will always be 6
pseudo_hdr->len = htons(tcph->doff*4); // length of tcp header
/* Place both headers into a buffer */
segment = (u_char*)tcph;
pseudo_segment = (u_char*)pseudo_hdr;
/* Concactenate */
strncat((char*)buffer,(char*)pseudo_segment,12); // first the pseudo header
strncat((char*)buffer,(char*)segment,20); // then the TCP segment
/* Calculate checksum just like IP checksum */
ptr = (unsigned long long*)buffer; // convert buffer
hdr = *ptr; // dereference for clarity in following clode
for (int i = 0; i < 16; i++) { // 32 bytes -> 256 bits / 16 bits = 16 words
sum += (hdr & mask); // apply mask to header and add to sum
hdr >>= 16; // shift the next 16 bits
}
sum = ~sum; // bitwise NOT operation
return sum;
};
Here's all of the functions I stated above put together:
int tcp_scan(char* ipaddr,unsigned int port,FLAGS f,unsigned int justsend) {
/* Do a TCP port scan */
u_char* buffer;
u_char recvbuf[65535];
u_char* ipbuf;
u_char* tcpbuf;
int s;
size_t bufsize;
size_t size;
struct sockaddr_in sa;
struct sockaddr_in recvstruct;
struct msghdr msg;
struct iovec iv[1];
struct iphdr* iph;
struct tcphdr* tcph;
FLAGS rst = RST;
srand((unsigned int)time(NULL)); // seed random number generator
bufsize = sizeof(struct tcphdr) + sizeof(struct iphdr); // store size in variable
buffer = malloc(bufsize); // allocate memory to buffer
iph = assemble_ip(ipaddr,IPPROTO_TCP); // set the ip address to provided and protocol as TCP
tcph = assemble_tcp(rand() % 65536,port,f); // set flag, source port as rand, and dest port as supplied port num
if (iph == NULL || tcph == NULL || buffer == NULL) { // if error occurs
err();
/* Deallocate memory to variables that still have it */
if (iph != NULL) free(iph);
if (tcph != NULL) free(tcph);
if (buffer != NULL) free(buffer);
return -1;
}
/* Now compute checksum */
iph->check = htons(ip_checksum(iph));
tcph->check = htons(tcp_checksum(tcph,iph));
/* Store headers in buffer */
ipbuf = (u_char*)iph;
tcpbuf = (u_char*)tcph;
/* Concactenate to buffer */
strncat((char*)buffer,(char*)tcpbuf,20); // copy only 20 bytes...this ensures that no extra bytes are catted
strncat((char*)buffer,(char*)ipbuf,20); // do same thing but with ip header
/* Create a socket */
s = create_socket(); // create a raw socket
if (s == -1) return -1; // if the socket wasn't able to be created
/* Clear memory */
bzero(&sa,sizeof(struct sockaddr_in));
bzero(&recvstruct,sizeof(struct sockaddr_in));
bzero(&msg,sizeof(struct msghdr));
bzero(&iv,sizeof(struct iovec));
/* For analyze_packet() */
sa.sin_family = AF_INET; // address family
sa.sin_addr.s_addr = inet_addr(ipaddr); // convert ip address
sa.sin_port = htons(port); // port number
/* For sendmsg() */
iv[0].iov_base = buffer;
iv[0].iov_len = bufsize;
msg.msg_name = &sa; // caller allocated buffer
msg.msg_namelen = sizeof(struct sockaddr_in); // specify size of buffer
msg.msg_iov = iv; // iov structure array
msg.msg_iovlen = 1; // the length of the array
msg.msg_control = NULL; // for ancillary data
msg.msg_controllen = 0; // sizeof ancillary data
if (sendmsg(s,&msg,0) == -1) {
err();
return -1;
}
printf("Sent\n");
if (justsend) return 0; // exit cleanly
bzero(&recvstruct,sizeof(struct sockaddr_in)); // clear structure
size = sizeof(struct sockaddr_in); // acquire size of recv structure
for(int i = 0; i < 100; i++) { // loop until we've received 100 packets
printf("Receiving\n");
bzero(recvbuf,65535); // clear memory
if (recvfrom(s,recvbuf,sizeof(recvbuf),0,(struct sockaddr*)&recvstruct,(socklen_t*)&size) == -1) { // recv
err();
return -1;
}
if (analyze_packet(recvbuf,sa,recvstruct) == 0) { // if packet is what we wanted
printf("\ttcp %d is open\n",port); // print out that port is opened
tcp_scan(ipaddr,port,rst,-1); // abort connection with RST flag
break;
}
}
return 0;
}
Alright, now that you've seen those, here's the 'tshark' command I used to sniff the packets:
sudo tshark -o tcp.check_checksum:TRUE # I also wanted to check the checksum value to make sure it was OK
Now here's the command to run the program:
sudo ./netmap enp0s3 # enp0s3 is the interface I'm sending packets on
After running both of these in separate terminals, tshark provides this output:
1 0.000000 10.0.2.15 -> 127.0.0.1 TCP 74 31280->8000 [<None>] Seq=1 Win=0, bogus TCP header length (0, must be 20)
Please note that the declarations for struct iphdr and struct tcphdr are located in the system header files <netinet/ip.h> and <netinet/tcp.h>, respectively.
I'm really lost as to how to solve this issue. In fact, I'm not certain what is causing the issue, in the first place. According to my knowledge there's no way to specify the length of the TCP header. Any help would be appreciated.
I think your problem is here
strncat((char*)buffer,(char*)tcpbuf,20); // copy only 20 bytes...this ensures that no extra bytes are catted
strncat((char*)buffer,(char*)ipbuf,20);
The headers aren't strings so you may only be copying part of each header. Try something like this;
memcpy((char*)buffer, (char*)tcpbuf, 20);
memcpy((char*)buffer+20, (char*)ipbuf, 20);

ENC28J60 Stops receiving

I'm currently using an stm32f405 and an ENC28J60 and lwip as tcp/ip stack. Everything runs fine at startup but after about a minute or so the ENC stops receiving packets. Transmitting keeps working fine. I've tried both polling it and using interrupts.
I'm using https://github.com/wolfgangr/enc28j60 to communicate to the ENC. And this is the code that handles incoming packets:
while (true) {
eventmask_t mask = chEvtWaitAnyTimeout(ALL_EVENTS, LWIP_PACKET_POLL_INTERVAL);
if(mask & ENC_INTERRUPT_ID)
{
/* Handle ENC28J60 interrupt */
ENC_IRQHandler(&encHandle);
/* Reenable interrupts */
ENC_EnableInterrupts(EIE_INTIE);
}
if (mask & PERIODIC_LINK_TIMER_ID)
{
bool current_link_status = ((encHandle.LinkStatus) & PHSTAT2_LSTAT) != 0;
if (current_link_status != prev_link_status) {
if (current_link_status) {
dhcp_start(&thisif);
}
else {
dhcp_stop(&thisif);
}
}
prev_link_status = current_link_status;
}
/* Check if new frames where received */
struct pbuf *p;
while ((p = low_level_input(&thisif)) != NULL) {
struct eth_hdr *ethhdr = p->payload;
switch (htons(ethhdr->type)) {
/* IP or ARP packet? */
case ETHTYPE_IP:
case ETHTYPE_ARP:
/* full packet send to tcpip_thread to process */
if (tcpip_input(p, &thisif) == ERR_OK)
break;
LWIP_DEBUGF(NETIF_DEBUG, ("ethernetif_input: IP input error\n"));
default:
pbuf_free(p);
}
}
}
Function low_level_input:
static struct pbuf *low_level_input(struct netif *netif) {
struct pbuf *p = NULL;
struct pbuf *q;
uint16_t len;
uint8_t *buffer;
uint32_t bufferoffset = 0;
if (!ENC_GetReceivedFrame(&encHandle)) {
return NULL;
}
/* Obtain the size of the packet and put it into the "len" variable. */
len = encHandle.RxFrameInfos.length;
buffer = (uint8_t *)encHandle.RxFrameInfos.buffer;
if (len > 0)
{
/* We allocate a pbuf chain of pbufs from the Lwip buffer pool */
p = pbuf_alloc(PBUF_RAW, len, PBUF_POOL);
}
if (p != NULL)
{
bufferoffset = 0;
for(q = p; q != NULL; q = q->next)
{
/* Copy data in pbuf */
memcpy( (uint8_t*)((uint8_t*)q->payload), (uint8_t*)((uint8_t*)buffer + bufferoffset), q->len);
bufferoffset = bufferoffset + q->len;
}
}
return p;
}
After a while the function ENC_GetReceivedFrame keeps returning false, even if I know for sure some packets should have been received.
I've debugged the function (found in enc28j60.c) and this line:
pktcnt = enc_rdbreg(handle, ENC_EPKTCNT);
pktcnt is always 0. I've looked at the SPI bus with a logic analyzer and the ENC truly anwsers 0. The SPI bus works fine.
Just before this happens some packets are received that are not flagged as RXSTAT_OK (look at line 1259 in enc28j60.c)
I've been at this for day's now, and truly have no ideas left.
I encountered a similar problem..
The EPKTCNT register was times to times decreased with no reason( without setting the ECON2_PKTDEC bit).
I noticed that when it happened it was after setting the ECON2_AUTOINC bit.
Not every time ECON2_AUTOINC was set but often.
I just set ECON2_AUTOINC at the initialization of the ENC28J60, no more during the reading process.
Since EPKTCNT stopped to decrease with no reason.
Hope it can help

Serial port ReadFile reads 0 bytes and returns true

I'm trying to read in data from a serial port in Windows 7 using the Windows API. When I try to read in data, the WaitCommEvent() fires just fine and the ReadFile() call returns 1 as the status, but no data is read in. In the the ReadFile documentation it says that:
When a synchronous read operation reaches the end of a file, ReadFile returns TRUE and sets *lpNumberOfBytesRead to zero.
However, I'm sure there are no EOT characters in the data being sent over the serial port.
I currently have two USB cables plugged into my computer and connected to each other. I know that they can send and receive data as I have tested them with Putty.
Why won't ReadFile() read in any data?
My code is below.
Header:
typedef struct uart_handle
{
uint8_t port_num;
char port_name[10];
uint32_t baud_rate;
uint8_t byte_size;
uint8_t stop;
uint8_t parity;
int32_t error;
HANDLE handle;
} uart_handle;
Main file:
uart_handle* serial_comm_init(uint8_t port_num, uint32_t baud_rate, uint8_t byte_size, uint8_t stop, uint8_t parity)
{
uart_handle* uart;
DCB uart_params = { 0 };
COMMTIMEOUTS timeouts = { 0 };
int status;
uart = (uart_handle*) malloc(1 * sizeof(uart_handle));
status = 0;
// Set port name
if (port_num > 9)
{
sprintf(uart->port_name, "\\\\.\\COM%d", port_num);
}
else
{
sprintf(uart->port_name, "COM%d", port_num);
}
// Set baud rate
uart->baud_rate = baud_rate;
// Set byte size
uart->byte_size = byte_size;
// Set stop bit
uart->stop = stop;
// Set parity
uart->parity = parity;
// Set up comm state
uart_params.DCBlength = sizeof(uart_params);
status = GetCommState(uart->handle, &uart_params);
uart_params.BaudRate = uart->baud_rate;
uart_params.ByteSize = uart->byte_size;
uart_params.StopBits = uart->stop;
uart_params.Parity = uart->parity;
SetCommState(uart->handle, &uart_params);
// Setup actual file handle
uart->handle = CreateFile(uart->port_name, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL);
if (uart->handle == INVALID_HANDLE_VALUE) {
printf("Error opening serial port %s.\n", uart->port_name);
free(uart);
return NULL;
}
else {
printf("Serial port %s opened successfully.\n", uart->port_name);
}
// Set timeouts
status = GetCommTimeouts(uart->handle, &timeouts);
timeouts.ReadIntervalTimeout = 50;
timeouts.ReadTotalTimeoutConstant = 50;
timeouts.ReadTotalTimeoutMultiplier = 10;
timeouts.WriteTotalTimeoutConstant = 50;
timeouts.WriteTotalTimeoutMultiplier = 10;
status = SetCommTimeouts(uart->handle, &timeouts);
if (status == 0) {
printf("Error setting comm timeouts: %d", GetLastError());
}
return uart;
}
int32_t serial_comm_read(void* handle, uint8_t* msg, uint32_t msg_size, uint32_t timeout_ms, uint32_t flag)
{
uart_handle* uart;
uint32_t num_bytes_read;
uint32_t event_mask;
int32_t status;
uart = (uart_handle*) handle;
num_bytes_read = 0;
event_mask = 0;
status = 0;
memset(msg, 0, msg_size);
// Register Event
status = SetCommMask(uart->handle, EV_RXCHAR);
// Wait for event
status = WaitCommEvent(uart->handle, &event_mask, NULL);
printf("Recieved characters.\n");
do {
status = ReadFile(uart->handle, msg, msg_size, &num_bytes_read, NULL);
printf("Status: %d\n", status);
printf("Num bytes read: %d\n", num_bytes_read);
printf("Message: %s\n", msg);
} while (num_bytes_read > 0);
printf("Read finished.\n");
return 0;
}
Output:
Serial port COM9 opened successfully.
Recieved characters.
Status: 1
Num bytes read: 0
Message:
Read finished.
The code shown calls GetCommState() on an uninitialised handle:
status = GetCommState(uart->handle, &uart_params);
provoking UB doing so. Its returned status is not tested.
Due to this uart_params probably contains BS no useful data.
Do yourself a favour: Always and ever check the return value on all relevant function calls (and let the code act accordingly)! Consider as "relevant" all those functions returning or changing data used afterwards.

Resources