I'm recently learning the TCP/IP implementation in linux kernel(version 4.4), and got really confused about the accept queue. I know there is a queue in struct inet_connection_sock which is called a accept queue:
struct inet_connection_sock {
...
/* #icsk_accept_queue: FIFO of established children */
struct request_sock_queue icsk_accept_queue;
...
}
and there is a qlen member in it, I think it is used to indicate the length of the queue.
struct request_sock_queue {
...
/* length of the queue? */
atomic_t qlen;
...
};
Here is what I think I know: when a LISTEN socket receives a SYN packet, in function tcp_conn_request, inet_csk_reqsk_queue_hash_add is called to put the newly created NEW_SYN_RECV sock into ehash table (not icsk_accept_queue)
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
unsigned long timeout)
{
reqsk_queue_hash_req(req, timeout); // add to ehash table
inet_csk_reqsk_queue_added(sk); // increment icsk_accept_queue.qlen
}
But in this funcfion, inet_csk_reqsk_queue_added is called to increment icsk_accept_queue.qlen. My question is why increment qlen since nothing is inserted to icsk_accept_queue? Is it not the length of icsk_accept_queue?
Also, tcp_conn_request called inet_csk_reqsk_queue_add to add a fast open sock(if enabled) to icsk_accept_queue:
struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
struct request_sock *req,
struct sock *child)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
spin_lock(&queue->rskq_lock);
if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_child_forget(sk, req, child);
child = NULL;
} else {
req->sk = child;
req->dl_next = NULL;
if (queue->rskq_accept_head == NULL)
queue->rskq_accept_head = req;
else
queue->rskq_accept_tail->dl_next = req;
queue->rskq_accept_tail = req;
sk_acceptq_added(sk); // increment sk.sk_ack_backlog
}
spin_unlock(&queue->rskq_lock);
return child;
}
But this function ended up incrementing sk.sk_ack_backlog (by calling sk_acceptq_added) instead of icsk_accept_queue.qlen. Why not?
Related
I am writing a Netfilter hook and want to do a stateful analysis of incoming TCP packets, whether they belong to an existing connection or a new connection is starting.
This is my first try at writing code using Netfilter and after reading https://people.netfilter.org/pablo/docs/login.pdf I understand I need to check if a packet is categorized as a NEW or ESTABLISHED state. But I cannot find any documentation of how to write code for this.
static unsigned int hfunc(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) {
struct iphdr *iph;
struct udphdr *udph;
if (!skb)
return NF_ACCEPT;
iph = ip_hdr(skb);
if (iph->protocol == IPPROTO_TCP) {
/*
if packet SYN flag is enabled and state==NEW:
return NF_ACCEPT
else if SYN flag is disabled and state==NEW:
return NF_DROP
*/
}
return NF_ACCEPT
}
static int __init my_net_module_init(void) {
printk(KERN_INFO "Initializing my netfilter module\n");
// Allocating memory for hook structure.
my_nf_hook = (struct nf_hook_ops*) kzalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
// Constructing the structure
my_nf_hook->hook = (nf_hookfn*)hfunc; /* hook function */
my_nf_hook->hooknum = NF_INET_PRE_ROUTING; /* received packets */
my_nf_hook->pf = PF_INET; /* IPv4 */
my_nf_hook->priority = NF_IP_PRI_FIRST; /* max hook priority */
nf_register_net_hook(&init_net, my_nf_hook);
return 0;
}
static void __exit my_net_module_exit(void) {
nf_unregister_net_hook(&init_net, my_nf_hook);
kfree(my_nf_hook);
printk(KERN_INFO "Exiting my netfilter module\n");
}
module_init(my_net_module_init);
module_exit(my_net_module_exit);
Edit:
Added code snippet for registering hook in pre-routing.
Seems that in your hook you want to make a decision on packet based on conntrack(CT) info about the connection state - to block (drop) all the TCP packets which are in the middle of connection, i.e. packets both without SYN flag and without connection entry in CT.
So if you want to reap the benefits of CT, you have to let him work a bit.
Now your hook is in NF_INET_PRE_ROUTING with NF_IP_PRI_FIRST priority. Just look at the picture of Linux kernel packet flow. If we talk about pre-routing chain CT-handling is somewhere after RAW table (i.e. with a lower priority).
The list of priorities you can see here:
enum nf_ip_hook_priorities {
NF_IP_PRI_FIRST = INT_MIN,
NF_IP_PRI_CONNTRACK_DEFRAG = -400,
NF_IP_PRI_RAW = -300,
NF_IP_PRI_SELINUX_FIRST = -225,
NF_IP_PRI_CONNTRACK = -200,
NF_IP_PRI_MANGLE = -150,
NF_IP_PRI_NAT_DST = -100,
NF_IP_PRI_FILTER = 0,
NF_IP_PRI_SECURITY = 50,
NF_IP_PRI_NAT_SRC = 100,
NF_IP_PRI_SELINUX_LAST = 225,
NF_IP_PRI_CONNTRACK_HELPER = 300,
NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX,
NF_IP_PRI_LAST = INT_MAX,
};
Thus to stick in after CT (after nf_conntrack_in()) you must register your hook with priority lower than NF_IP_PRI_CONNTRACK (i.e. with greater number, e.g. -50).
So you do:
static struct nf_hook_ops hooks[] __read_mostly = {
{
.hook = hfunc,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK + 150
},
// ...
};
// ...
int ret;
ret = nf_register_hooks(hooks, ARRAY_SIZE(hooks));
if (ret < 0)
// error
Then you should access the CT info from within your hook:
static unsigned int hfunc(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state) {
struct iphdr *iph;
iph = ip_hdr(skb);
if (iph->protocol == IPPROTO_TCP) {
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct tcphdr *tcph;
ct = nf_ct_get(skb, &ctinfo);
if (!ct)
return NF_ACCEPT;
tcph = tcp_hdr(skb)
if (tcph->syn) { // && !tcph->ack ???
if (ctinfo == IP_CT_NEW)
return NF_ACCEPT;
} else {
if (ctinfo == IP_CT_NEW)
return NF_DROP;
}
}
return NF_ACCEPT
}
Also remember that CT must be involved in your Linux kernel network processing. There should be CT modules inserted into kernel and an appropriate iptables rule added.
I'm trying to find the PID from struct sock, so I used the following code:
struct task_struct *get_task_from_sock(struct sock *sk)
{
struct task_struct *task = NULL;
if (sk == NULL)
{
pr_info("sk is NULL");
return 0;
}
if (sk->sk_peer_pid == NULL)
{
pr_info("sk_peer_pid is NULL");
return 0;
}
return get_pid_task(sk->sk_peer_pid, PIDTYPE_PID);
}
But sk_peer_pid is always NULL. So I referred to the source code:
# grep sk_peer_pid -r
include/net/sock.h: * #sk_peer_pid: &struct pid for this socket's peer
include/net/sock.h: struct pid *sk_peer_pid;
net/core/sock.c: cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
net/core/sock.c: put_pid(sk->sk_peer_pid);
net/core/sock.c: sk->sk_peer_pid = NULL;
net/unix/af_unix.c: put_pid(sk->sk_peer_pid);
net/unix/af_unix.c: sk->sk_peer_pid = get_pid(task_tgid(current));
net/unix/af_unix.c: put_pid(sk->sk_peer_pid);
net/unix/af_unix.c: sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
And its only available for unix local socket?
How can I find the owner of a struct sock? I've read this post already, it never mentions how to get PID from struct sock.
Does anyone know?
For a school project i made this program where clients (a tcpsock_t) can make a TCP connection with a server and pass data through it. I poll the different connections to see if there is data to read.
I got a tcpsock.c with functions for the TCP sockets which is used as library, a connmgr.c as server and a client code (which is not relevant).
In the tcpsock.h i typedef the struct of the tcpsock_t
(other functions below)
typedef struct tcpsock tcpsock_t;
and in the tcpsock.c i declare the struct (and it should stay here, i can not put it in de .h file)
struct tcpsock{
long cookie;
int sd;
char * ip_addr;
int port;
};
In the connmgr.c i want to make an array of this tcpsock_t to store all my connected clients, but when i compile it gives always the same sort of errors:
error: invalid use of undefined type ‘struct tcpsock’
client[conn_counter] = *requestclient;
Can someone tell me what i'm doing wrong?
Thanks a lot!
Here is my server code :
#include "tcpsock.h"
#include "connmgr.h"
int conn_counter = 0;
void add_client(tcpsock_t *newclient, tcpsock_t * array){
conn_counter++;
array= realloc(test,sizeof(get_size_tcpsock())*conn_counter);
array[conn_counter-1]=*newclient; //here i have the error
}
void add_poll(struct pollfd * polllist, tcpsock_t *client, tcpsock_t *server){
polllist = realloc(polllist,sizeof(struct pollfd)*conn_counter+1);
int clientsd;
tcp_get_sd(server, &clientsd);
polllist[conn_counter].fd= clientsd;
polllist[conn_counter].events = POLLIN;
}
int main(void){
tcpsock_t *server;
tcpsock_t *client;
client = malloc(sizeof(get_size_tcpsock()));
struct pollfd *poll_list;
poll_list = malloc(sizeof(struct pollfd));
sensor_data_t data;
int bytes,result;
if(tcp_passive_open(&server,PORT)!=TCP_NO_ERROR)exit(EXIT_FAILURE);
int serversd;
tcp_get_sd(server, &serversd);
poll_list[0].fd = serversd;
poll_list[0].events = POLLIN;
printf("start polling \n");
while(1){
int retval,i;
retval = poll(poll_list,conn_counter+1,5000);
if (retval>0){
if(poll_list[0].revents && POLLIN>0){
tcpsock_t *requestclient;
tcp_wait_for_connection(server,&requestclient);
client[conn_counter] = *requestclient; //here i have the error
add_client(requestclient,client);
add_poll(poll_list, requestclient, server);
}
for (i=0; i<conn_counter+1;i++){
if(poll_list[i+1].revents & POLLIN){
bytes = sizeof(data.id);
result = tcp_receive(client[i],(void *)&data.id,&bytes); //here i have the error
bytes = sizeof(data.value);
result = tcp_receive(client[i],(void *)&data.value,&bytes); //here i have the error
bytes = sizeof(data.ts);
result =tcp_receive(client[i],(void *)&data.ts,&bytes); //here i have the error
if ((result==TCP_NO_ERROR) && bytes){
printf("sensor id = %" PRIu16 " - temperature = %g - timestamp = %ld\n", data.id, data.value, (long int)data.ts);
}
fflush(stdout);
}
if (poll_list[i+1].revents & POLLHUP){
printf("client disconnected \n");
poll_list[conn_counter+1].fd=-1;
poll_list[conn_counter+1].events=0;
fflush(stdout);
}
}
}
}
tcp_close(&server);
return 1;
}
Here is the tcpsock.h, but the functions work fine.
typedef struct tcpsock tcpsock_t;
int get_size_tcpsock();
int tcp_passive_open(tcpsock_t ** socket, int port);
/* Creates a new socket and opens this socket in 'passive listening mode' (waiting for an active connection setup request)
* The socket is bound to port number 'port' and to any active IP interface of the system
*/
int tcp_active_open(tcpsock_t ** socket, int remote_port, char * remote_ip);
/* Creates a new TCP socket and opens a TCP connection to the system with IP address 'remote_ip' on port 'remote_port'
* The newly created socket is return as '*socket'
*/
int tcp_close(tcpsock_t ** socket);
int tcp_wait_for_connection(tcpsock_t * socket, tcpsock_t ** new_socket);
/* Puts the socket 'socket' in a blocking wait mode
* Returns when an incoming TCP connection setup request is received
* A newly created socket identifying the remote system that initiated the connection request is returned as '*new_socket'
*/
int tcp_send(tcpsock_t * socket, void * buffer, int * buf_size );
/* Initiates a send command on the socket 'socket' and tries to send the total '*buf_size' bytes of data in 'buffer' (recall that the function might block for a while)
* The function sets '*buf_size' to the number of bytes that were really sent, which might be less than the initial '*buf_size'
*/
int tcp_get_ip_addr( tcpsock_t * socket, char ** ip_addr);
/* Set '*ip_addr' to the IP address of 'socket' (could be NULL if the IP address is not set)
*/
int tcp_get_port(tcpsock_t * socket, int * port);
int tcp_get_sd(tcpsock_t * socket, int * sd);
I have the following code excerpt (heavily redacted to remove unimportant details) which fails under a rare and particular set of circumstances.
struct epoll_event *events = calloc(MAXEVENTS+1, sizeof(struct epoll_event));
struct sockaddr_in in_addr;
socklen_t in_len = sizeof in_addr;
while(1)
{
int n = epoll_wait(efd,events, MAXEVENTS, -1);
for(int i=0; i<n; i++)
{
struct epoll_event *evI = &events[i];
uint64 u64 = evI->data.u64;
int type = u64>>32, fd=u64, fdx = fd;
if(type == -1)
{
while((fd = accept4(fdx, &in_addr, &in_len, SOCK_NONBLOCK|SOCK_CLOEXEC))>-1)
{
setNew_Connection(efd, fd);
storeAddrPort(fd, &in_addr, &in_len);
}
}
else
{
if(evI->events&(EPOLLERR|EPOLLHUP|EPOLLRDHUP)){closeConnection(fd);}
if(evI->events&EPOLLOUT) //process out data stuff
else if(evI->events&EPOLLIN) //process in data stuff and possibly close a different connection.
}
}
}
Listening sockets are differentiated by -1 in the upper part of evI->data.u64
setNew_Connection does the usual accepting stuff like adding the new socket to epoll etc
EPOLLET is used.
Now it all works brilliantly except under the following circumstances it fails because events is only updated in the epoll_wait so a connection closure does not affect the n events until after returning to the top of the while(1) loop.
epoll_wait unblocks with 3 events queued in the events struct table.
First event (n=0), is incoming data after which code decides to close a connection (e.g. file descriptor 8) as it is no longer needed.
2nd event (n=1) is an incoming new connection. accept4 assigns fd:8 as it has recently become available. setNew_Connection adds it to the epoll list.
3rd event is incoming data for the connection closed in step 2. i.e. fd:8 but it is no longer valid as the original fd:8 connection was closed and the current fd:8 is for a different connection.
I hope I have explained the problem adequately. The issue is that queued events in the events table are not updated when a connection is closed until the code returns to epoll_wait. How can I code around this problem?
Orel gave me the answer but I thought I would post the complete code solution. Instead of
close(fd)
I use
shutdown(fd,SHUT_RDWR);
FDS[FDSL++] = fd;
The shutdown prevents anymore data being read or written but doesn't actually close the socket. FDS[FDSL++] = fd; stores the fd so that later after the n events are done, it can be closed with while(FDSL)close(FDS[--FDSL];
int FDS[MAXEVENTS],FDSL=0;
struct epoll_event *events = calloc(MAXEVENTS+1, sizeof(struct epoll_event));
struct sockaddr_in in_addr;
socklen_t in_len = sizeof in_addr;
while(1)
{
int n = epoll_wait(efd,events, MAXEVENTS, -1);
for(int i=0; i<n; i++)
{
struct epoll_event *evI = &events[i];
uint64 u64 = evI->data.u64;
int type = u64>>32, fd=u64, fdx = fd;
if(type == -1)
{
while((fd = accept4(fdx, &in_addr, &in_len, SOCK_NONBLOCK|SOCK_CLOEXEC))>-1)
{
setNew_Connection(efd, fd);
storeAddrPort(fd, &in_addr, &in_len);
}
}
else
{
if(evI->events&(EPOLLERR|EPOLLHUP|EPOLLRDHUP)){closeConnection(fd);}
if(evI->events&EPOLLOUT) //process out data stuff
else if(evI->events&EPOLLIN) //process in data stuff and possibly close a different connection.
}
}
while(FDSL)close(FDS[--FDSL];
}
I want to create a concurrent server to work on multiple client request. So I created a thread function to handle multiple request . My problem is I have a hash table , it is loaded with contents of file initially whenever server start and I have socket descriptor, file descriptor too. So how can I pass to thread function .Is it required structure to store the arguments and pass to thread ?
my code is like this :
struct UserData
{
char *username;
char *password;
};
struct HashTable
{
int size;
struct UserData *table
};
int main()
{
struct HashTable *htable;
//socket sd to open socket in server
and a Fd file descriptor to write to file
// hash table loaded with contents and it is a structure
/*create thread using pthread*/
pthread_create(...,fun,..);
}
void * fun(void *arg)
{
.............
}
how I declare a structure for passing to thread function including the arguments like socket descriptor (sd) ,file descriptor(fd) and hash table pointer ? will I use mutex to lock when I writing to file (fd) ?
pthread_create() takes a void * as its final argument, which gets passed to your thread entry function, fun() in your case. So you would just need to define a struct that contains all the fields you want to pass:
struct ThreadArg {
int sd; /* socket descriptor */
int fd; /* file descriptor */
struct HashTable *ht;
};
Then in your main() you fill it in and pass it to pthread_create():
...
struct ThreadArg *arg = malloc(sizeof(struct ThreadArg)); /* you should check for NULL */
arg->sd = sd;
arg->fd = fd;
arg->ht = htable;
pthread_create(..., fun, (void *)arg);
...
And in fun() you cast it back:
void *fun(void *arg) {
struct ThreadArg *thArg = (struct ThreadArg *)arg;
/* do whatever with thArg->sd, thArg->fd, etc. */
...
/* free the memory when done */
free(arg);
}