Mercurial > hg > nginx
diff src/event/quic/bpf/ngx_quic_reuseport_helper.c @ 8676:7df607cb2d11 quic
QUIC: ngx_quic_bpf module.
The quic kernel bpf helper inspects packet payload for DCID, extracts key
and routes the packet into socket matching the key.
Due to reuseport feature, each worker owns a personal socket, which is
identified by the same key, used to create DCID.
BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK.
The "ulimit -l" command may be used to setup proper limits, if maps
cannot be created with EPERM or updated with ETOOLONG.
author | Vladimir Homutov <vl@nginx.com> |
---|---|
date | Fri, 25 Dec 2020 15:01:15 +0300 |
parents | |
children | 1a489587e1c8 |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/src/event/quic/bpf/ngx_quic_reuseport_helper.c @@ -0,0 +1,140 @@ +#include <errno.h> +#include <linux/string.h> +#include <linux/udp.h> +#include <linux/bpf.h> +/* + * the bpf_helpers.h is not included into linux-headers, only available + * with kernel sources in "tools/lib/bpf/bpf_helpers.h" or in libbpf. + */ +#include <bpf/bpf_helpers.h> + + +#if !defined(SEC) +#define SEC(NAME) __attribute__((section(NAME), used)) +#endif + + +#if defined(LICENSE_GPL) + +/* + * To see debug: + * + * echo 1 > /sys/kernel/debug/tracing/events/bpf_trace/enable + * cat /sys/kernel/debug/tracing/trace_pipe + * echo 0 > /sys/kernel/debug/tracing/events/bpf_trace/enable + */ + +#define debugmsg(fmt, ...) \ +do { \ + char __buf[] = fmt; \ + bpf_trace_printk(__buf, sizeof(__buf), ##__VA_ARGS__); \ +} while (0) + +#else + +#define debugmsg(fmt, ...) + +#endif + +char _license[] SEC("license") = LICENSE; + +/*****************************************************************************/ + +#define NGX_QUIC_PKT_LONG 0x80 /* header form */ +#define NGX_QUIC_SERVER_CID_LEN 20 + + +#define advance_data(nbytes) \ + offset += nbytes; \ + if (start + offset > end) { \ + debugmsg("cannot read %ld bytes at offset %ld", nbytes, offset); \ + goto failed; \ + } \ + data = start + offset - 1; + + +#define ngx_quic_parse_uint64(p) \ + (((__u64)(p)[0] << 56) | \ + ((__u64)(p)[1] << 48) | \ + ((__u64)(p)[2] << 40) | \ + ((__u64)(p)[3] << 32) | \ + (p)[4] << 24 | \ + (p)[5] << 16 | \ + (p)[6] << 8 | \ + (p)[7]) + +/* + * actual map object is created by the "bpf" system call, + * all pointers to this variable are replaced by the bpf loader + */ +struct bpf_map_def SEC("maps") ngx_quic_sockmap; + + +SEC(PROGNAME) +int ngx_quic_select_socket_by_dcid(struct sk_reuseport_md *ctx) +{ + int rc; + __u64 key; + size_t len, offset; + unsigned char *start, *end, *data, *dcid; + + start = ctx->data; + end = (unsigned char *) ctx->data_end; + offset = 0; + + advance_data(sizeof(struct udphdr)); /* skip UDP header */ + advance_data(1); /* QUIC flags */ + + if (data[0] & NGX_QUIC_PKT_LONG) { + + advance_data(4); /* skip QUIC version */ + len = data[0]; /* read DCID length */ + + if (len < 8) { + /* it's useless to search for key in such short DCID */ + return SK_PASS; + } + + advance_data(1); /* skip DCID len */ + + } else { + len = NGX_QUIC_SERVER_CID_LEN; + } + + dcid = &data[1]; + advance_data(len); /* we expect the packet to have full DCID */ + + /* make verifier happy */ + if (dcid + sizeof(__u64) > end) { + goto failed; + } + + key = ngx_quic_parse_uint64(dcid); + + rc = bpf_sk_select_reuseport(ctx, &ngx_quic_sockmap, &key, 0); + + switch (rc) { + case 0: + debugmsg("nginx quic socket selected by key 0x%x", key); + return SK_PASS; + + /* kernel returns positive error numbers, errno.h defines positive */ + case -ENOENT: + debugmsg("nginx quic default route for key 0x%x", key); + /* let the default reuseport logic decide which socket to choose */ + return SK_PASS; + + default: + debugmsg("nginx quic bpf_sk_select_reuseport err: %d key 0x%x", + rc, key); + goto failed; + } + +failed: + /* + * SK_DROP will generate ICMP, but we may want to process "invalid" packet + * in userspace quic to investigate further and finally react properly + * (maybe ignore, maybe send something in response or close connection) + */ + return SK_PASS; +}