# HG changeset patch # User Vladimir Homutov # Date 1608897675 -10800 # Node ID 7df607cb2d11c56dab9707106cc70e2f573699b7 # Parent d3747ba486e79ef85222f045ee75050a983d4699 QUIC: ngx_quic_bpf module. The quic kernel bpf helper inspects packet payload for DCID, extracts key and routes the packet into socket matching the key. Due to reuseport feature, each worker owns a personal socket, which is identified by the same key, used to create DCID. BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK. The "ulimit -l" command may be used to setup proper limits, if maps cannot be created with EPERM or updated with ETOOLONG. diff --git a/auto/modules b/auto/modules --- a/auto/modules +++ b/auto/modules @@ -1345,6 +1345,22 @@ if [ $USE_OPENSSL$USE_OPENSSL_QUIC = YES ngx_module_order= . auto/module + + if [ $NGX_QUIC_BPF$BPF_FOUND$SO_COOKIE_FOUND = YESYESYES ]; then + ngx_module_type=CORE + ngx_module_name=ngx_quic_bpf_module + ngx_module_incs= + ngx_module_deps= + ngx_module_srcs="src/event/quic/ngx_event_quic_bpf.c \ + src/event/quic/ngx_event_quic_bpf_code.c" + ngx_module_libs= + ngx_module_link=YES + ngx_module_order= + + . auto/module + + have=NGX_QUIC_BPF . auto/have + fi fi diff --git a/auto/options b/auto/options --- a/auto/options +++ b/auto/options @@ -45,6 +45,8 @@ USE_THREADS=NO NGX_FILE_AIO=NO +NGX_QUIC_BPF=YES + HTTP=YES NGX_HTTP_LOG_PATH= @@ -170,6 +172,7 @@ NGX_GOOGLE_PERFTOOLS=NO NGX_CPP_TEST=NO BPF_FOUND=NO +SO_COOKIE_FOUND=NO NGX_LIBATOMIC=NO @@ -216,6 +219,8 @@ do --with-file-aio) NGX_FILE_AIO=YES ;; + --without-quic_bpf_module) NGX_QUIC_BPF=NO ;; + --with-ipv6) NGX_POST_CONF_MSG="$NGX_POST_CONF_MSG $0: warning: the \"--with-ipv6\" option is deprecated" @@ -450,6 +455,8 @@ cat << END --with-file-aio enable file AIO support + --without-quic_bpf_module disable ngx_quic_bpf_module + --with-http_ssl_module enable ngx_http_ssl_module --with-http_quic_module enable ngx_http_quic_module --with-http_v2_module enable ngx_http_v2_module diff --git a/auto/os/linux b/auto/os/linux --- a/auto/os/linux +++ b/auto/os/linux @@ -234,3 +234,41 @@ if [ $ngx_found = yes ]; then CORE_SRCS="$CORE_SRCS src/core/ngx_bpf.c" CORE_DEPS="$CORE_DEPS src/core/ngx_bpf.h" fi + + +# SO_COOKIE socket option + +ngx_feature="SO_COOKIE" +ngx_feature_name="NGX_HAVE_SO_COOKIE" +ngx_feature_run=no +ngx_feature_incs="#include + #include " +ngx_feature_path= +ngx_feature_libs= +ngx_feature_test="socklen_t optlen = sizeof(uint64_t); + uint64_t cookie; + getsockopt(0, SOL_SOCKET, SO_COOKIE, &cookie, &optlen)" +. auto/feature + +if [ $ngx_found = yes ]; then + SO_COOKIE_FOUND=YES + have=NGX_HAVE_SO_COOKIE . auto/have +fi + + +# ngx_quic_bpf module uses sockhash to select socket from reuseport group, +# support appeared in Linux-5.7: +# +# commit: 9fed9000c5c6cacfcaaa48aff74818072ae294cc +# bpf: Allow selecting reuseport socket from a SOCKMAP/SOCKHASH +# +if [ $NGX_QUIC_BPF$BPF_FOUND = YESYES ]; then + echo $ngx_n "checking for kernel with reuseport/BPF support...$ngx_c" + if [ $version -lt 329472 ]; then + echo " not found (at least 5.7 is required)" + NGX_QUIC_BPF=NO + else + echo " found" + fi +fi + diff --git a/src/core/nginx.c b/src/core/nginx.c --- a/src/core/nginx.c +++ b/src/core/nginx.c @@ -680,6 +680,9 @@ ngx_exec_new_binary(ngx_cycle_t *cycle, ls = cycle->listening.elts; for (i = 0; i < cycle->listening.nelts; i++) { + if (ls[i].ignore) { + continue; + } p = ngx_sprintf(p, "%ud;", ls[i].fd); } diff --git a/src/event/quic/bpf/bpfgen.sh b/src/event/quic/bpf/bpfgen.sh new file mode 100644 --- /dev/null +++ b/src/event/quic/bpf/bpfgen.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +export LANG=C + +set -e + +if [ $# -lt 1 ]; then + echo "Usage: PROGNAME=foo LICENSE=bar $0 " + exit 1 +fi + + +self=$0 +filename=$1 +funcname=$PROGNAME + +generate_head() +{ + cat << END +/* AUTO-GENERATED, DO NOT EDIT. */ + +#include +#include + +#include "ngx_bpf.h" + + +END +} + +generate_tail() +{ + cat << END + +ngx_bpf_program_t $PROGNAME = { + .relocs = bpf_reloc_prog_$funcname, + .nrelocs = sizeof(bpf_reloc_prog_$funcname) + / sizeof(bpf_reloc_prog_$funcname[0]), + .ins = bpf_insn_prog_$funcname, + .nins = sizeof(bpf_insn_prog_$funcname) + / sizeof(bpf_insn_prog_$funcname[0]), + .license = "$LICENSE", + .type = BPF_PROG_TYPE_SK_REUSEPORT, +}; + +END +} + +process_relocations() +{ + echo "static ngx_bpf_reloc_t bpf_reloc_prog_$funcname[] = {" + + objdump -r $filename | awk '{ + + if (enabled && $NF > 0) { + off = strtonum(sprintf("0x%s", $1)); + name = $3; + + printf(" { \"%s\", %d },\n", name, off/8); + } + + if ($1 == "OFFSET") { + enabled=1; + } +}' + echo "};" + echo +} + +process_section() +{ + echo "static struct bpf_insn bpf_insn_prog_$funcname[] = {" + echo " /* opcode dst src offset imm */" + + section_info=$(objdump -h $filename --section=$funcname | grep "1 $funcname") + + # dd doesn't know hex + length=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f3)) + offset=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f6)) + + for ins in $(dd if="$filename" bs=1 count=$length skip=$offset status=none | xxd -p -c 8) + do + opcode=0x${ins:0:2} + srcdst=0x${ins:2:2} + + # bytes are dumped in LE order + offset=0x${ins:6:2}${ins:4:2} # short + immedi=0x${ins:14:2}${ins:12:2}${ins:10:2}${ins:8:2} # int + + dst="$(($srcdst & 0xF))" + src="$(($srcdst & 0xF0))" + src="$(($src >> 4))" + + opcode=$(printf "0x%x" $opcode) + dst=$(printf "BPF_REG_%d" $dst) + src=$(printf "BPF_REG_%d" $src) + offset=$(printf "%d" $offset) + immedi=$(printf "0x%x" $immedi) + + printf " { %4s, %11s, %11s, (int16_t) %6s, %10s },\n" $opcode $dst $src $offset $immedi + done + +cat << END +}; + +END +} + +generate_head +process_relocations +process_section +generate_tail + diff --git a/src/event/quic/bpf/makefile b/src/event/quic/bpf/makefile new file mode 100644 --- /dev/null +++ b/src/event/quic/bpf/makefile @@ -0,0 +1,30 @@ +CFLAGS=-O2 -Wall + +LICENSE=BSD + +PROGNAME=ngx_quic_reuseport_helper +RESULT=ngx_event_quic_bpf_code +DEST=../$(RESULT).c + +all: $(RESULT) + +$(RESULT): $(PROGNAME).o + LICENSE=$(LICENSE) PROGNAME=$(PROGNAME) bash ./bpfgen.sh $< > $@ + +DEFS=-DPROGNAME=\"$(PROGNAME)\" \ + -DLICENSE_$(LICENSE) \ + -DLICENSE=\"$(LICENSE)\" \ + +$(PROGNAME).o: $(PROGNAME).c + clang $(CFLAGS) $(DEFS) -target bpf -c $< -o $@ + +install: $(RESULT) + cp $(RESULT) $(DEST) + +clean: + @rm -f $(RESULT) *.o + +debug: $(PROGNAME).o + llvm-objdump -S -no-show-raw-insn $< + +.DELETE_ON_ERROR: diff --git a/src/event/quic/bpf/ngx_quic_reuseport_helper.c b/src/event/quic/bpf/ngx_quic_reuseport_helper.c new file mode 100644 --- /dev/null +++ b/src/event/quic/bpf/ngx_quic_reuseport_helper.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include +/* + * the bpf_helpers.h is not included into linux-headers, only available + * with kernel sources in "tools/lib/bpf/bpf_helpers.h" or in libbpf. + */ +#include + + +#if !defined(SEC) +#define SEC(NAME) __attribute__((section(NAME), used)) +#endif + + +#if defined(LICENSE_GPL) + +/* + * To see debug: + * + * echo 1 > /sys/kernel/debug/tracing/events/bpf_trace/enable + * cat /sys/kernel/debug/tracing/trace_pipe + * echo 0 > /sys/kernel/debug/tracing/events/bpf_trace/enable + */ + +#define debugmsg(fmt, ...) \ +do { \ + char __buf[] = fmt; \ + bpf_trace_printk(__buf, sizeof(__buf), ##__VA_ARGS__); \ +} while (0) + +#else + +#define debugmsg(fmt, ...) + +#endif + +char _license[] SEC("license") = LICENSE; + +/*****************************************************************************/ + +#define NGX_QUIC_PKT_LONG 0x80 /* header form */ +#define NGX_QUIC_SERVER_CID_LEN 20 + + +#define advance_data(nbytes) \ + offset += nbytes; \ + if (start + offset > end) { \ + debugmsg("cannot read %ld bytes at offset %ld", nbytes, offset); \ + goto failed; \ + } \ + data = start + offset - 1; + + +#define ngx_quic_parse_uint64(p) \ + (((__u64)(p)[0] << 56) | \ + ((__u64)(p)[1] << 48) | \ + ((__u64)(p)[2] << 40) | \ + ((__u64)(p)[3] << 32) | \ + (p)[4] << 24 | \ + (p)[5] << 16 | \ + (p)[6] << 8 | \ + (p)[7]) + +/* + * actual map object is created by the "bpf" system call, + * all pointers to this variable are replaced by the bpf loader + */ +struct bpf_map_def SEC("maps") ngx_quic_sockmap; + + +SEC(PROGNAME) +int ngx_quic_select_socket_by_dcid(struct sk_reuseport_md *ctx) +{ + int rc; + __u64 key; + size_t len, offset; + unsigned char *start, *end, *data, *dcid; + + start = ctx->data; + end = (unsigned char *) ctx->data_end; + offset = 0; + + advance_data(sizeof(struct udphdr)); /* skip UDP header */ + advance_data(1); /* QUIC flags */ + + if (data[0] & NGX_QUIC_PKT_LONG) { + + advance_data(4); /* skip QUIC version */ + len = data[0]; /* read DCID length */ + + if (len < 8) { + /* it's useless to search for key in such short DCID */ + return SK_PASS; + } + + advance_data(1); /* skip DCID len */ + + } else { + len = NGX_QUIC_SERVER_CID_LEN; + } + + dcid = &data[1]; + advance_data(len); /* we expect the packet to have full DCID */ + + /* make verifier happy */ + if (dcid + sizeof(__u64) > end) { + goto failed; + } + + key = ngx_quic_parse_uint64(dcid); + + rc = bpf_sk_select_reuseport(ctx, &ngx_quic_sockmap, &key, 0); + + switch (rc) { + case 0: + debugmsg("nginx quic socket selected by key 0x%x", key); + return SK_PASS; + + /* kernel returns positive error numbers, errno.h defines positive */ + case -ENOENT: + debugmsg("nginx quic default route for key 0x%x", key); + /* let the default reuseport logic decide which socket to choose */ + return SK_PASS; + + default: + debugmsg("nginx quic bpf_sk_select_reuseport err: %d key 0x%x", + rc, key); + goto failed; + } + +failed: + /* + * SK_DROP will generate ICMP, but we may want to process "invalid" packet + * in userspace quic to investigate further and finally react properly + * (maybe ignore, maybe send something in response or close connection) + */ + return SK_PASS; +} diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c --- a/src/event/quic/ngx_event_quic.c +++ b/src/event/quic/ngx_event_quic.c @@ -232,6 +232,9 @@ static ngx_int_t ngx_quic_process_statel static ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c, ngx_quic_header_t *inpkt); static ngx_int_t ngx_quic_create_server_id(ngx_connection_t *c, u_char *id); +#if (NGX_QUIC_BPF) +static ngx_int_t ngx_quic_bpf_attach_id(ngx_connection_t *c, u_char *id); +#endif static ngx_int_t ngx_quic_send_retry(ngx_connection_t *c); static ngx_int_t ngx_quic_new_token(ngx_connection_t *c, ngx_str_t *token); static ngx_int_t ngx_quic_validate_token(ngx_connection_t *c, @@ -1297,6 +1300,14 @@ ngx_quic_create_server_id(ngx_connection return NGX_ERROR; } +#if (NGX_QUIC_BPF) + if (ngx_quic_bpf_attach_id(c, id) != NGX_OK) { + ngx_log_error(NGX_LOG_ERR, c->log, 0, + "quic bpf failed to generate socket key"); + /* ignore error, things still may work */ + } +#endif + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0, "quic create server id %*xs", (size_t) NGX_QUIC_SERVER_CID_LEN, id); @@ -1304,6 +1315,34 @@ ngx_quic_create_server_id(ngx_connection } +#if (NGX_QUIC_BPF) + +static ngx_int_t +ngx_quic_bpf_attach_id(ngx_connection_t *c, u_char *id) +{ + int fd; + uint64_t cookie; + socklen_t optlen; + + fd = c->listening->fd; + + optlen = sizeof(cookie); + + if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) { + ngx_log_error(NGX_LOG_ERR, c->log, ngx_socket_errno, + "quic getsockopt(SO_COOKIE) failed"); + + return NGX_ERROR; + } + + ngx_quic_dcid_encode_key(id, cookie); + + return NGX_OK; +} + +#endif + + static ngx_int_t ngx_quic_send_retry(ngx_connection_t *c) { diff --git a/src/event/quic/ngx_event_quic_bpf.c b/src/event/quic/ngx_event_quic_bpf.c new file mode 100644 --- /dev/null +++ b/src/event/quic/ngx_event_quic_bpf.c @@ -0,0 +1,649 @@ + +/* + * Copyright (C) Nginx, Inc. + */ + + +#include +#include + + +#define NGX_QUIC_BPF_VARNAME "NGINX_BPF_MAPS" +#define NGX_QUIC_BPF_VARSEP ';' +#define NGX_QUIC_BPF_ADDRSEP '#' + + +#define ngx_quic_bpf_get_conf(cycle) \ + (ngx_quic_bpf_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_quic_bpf_module) + +#define ngx_quic_bpf_get_old_conf(cycle) \ + cycle->old_cycle->conf_ctx ? ngx_quic_bpf_get_conf(cycle->old_cycle) \ + : NULL + +#define ngx_core_get_conf(cycle) \ + (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module) + + +typedef struct { + ngx_queue_t queue; + int map_fd; + + struct sockaddr *sockaddr; + socklen_t socklen; + ngx_uint_t unused; /* unsigned unused:1; */ +} ngx_quic_sock_group_t; + + +typedef struct { + ngx_flag_t enabled; + ngx_uint_t map_size; + ngx_queue_t groups; /* of ngx_quic_sock_group_t */ +} ngx_quic_bpf_conf_t; + + +static void *ngx_quic_bpf_create_conf(ngx_cycle_t *cycle); +static ngx_int_t ngx_quic_bpf_module_init(ngx_cycle_t *cycle); + +static void ngx_quic_bpf_cleanup(void *data); +static ngx_inline void ngx_quic_bpf_close(ngx_log_t *log, int fd, + const char *name); + +static ngx_quic_sock_group_t *ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, + ngx_listening_t *ls); +static ngx_quic_sock_group_t *ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, + struct sockaddr *sa, socklen_t socklen); +static ngx_quic_sock_group_t *ngx_quic_bpf_create_group(ngx_cycle_t *cycle, + ngx_listening_t *ls); +static ngx_quic_sock_group_t *ngx_quic_bpf_get_group(ngx_cycle_t *cycle, + ngx_listening_t *ls); +static ngx_int_t ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle, + ngx_listening_t *ls); +static uint64_t ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log); + +static ngx_int_t ngx_quic_bpf_export_maps(ngx_cycle_t *cycle); +static ngx_int_t ngx_quic_bpf_import_maps(ngx_cycle_t *cycle); + +extern ngx_bpf_program_t ngx_quic_reuseport_helper; + + +static ngx_command_t ngx_quic_bpf_commands[] = { + + { ngx_string("quic_bpf"), + NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_FLAG, + ngx_conf_set_flag_slot, + 0, + offsetof(ngx_quic_bpf_conf_t, enabled), + NULL }, + + ngx_null_command +}; + + +static ngx_core_module_t ngx_quic_bpf_module_ctx = { + ngx_string("quic_bpf"), + ngx_quic_bpf_create_conf, + NULL +}; + + +ngx_module_t ngx_quic_bpf_module = { + NGX_MODULE_V1, + &ngx_quic_bpf_module_ctx, /* module context */ + ngx_quic_bpf_commands, /* module directives */ + NGX_CORE_MODULE, /* module type */ + NULL, /* init master */ + ngx_quic_bpf_module_init, /* init module */ + NULL, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + NULL, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +static void * +ngx_quic_bpf_create_conf(ngx_cycle_t *cycle) +{ + ngx_quic_bpf_conf_t *bcf; + + bcf = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_bpf_conf_t)); + if (bcf == NULL) { + return NULL; + } + + bcf->enabled = NGX_CONF_UNSET; + bcf->map_size = NGX_CONF_UNSET_UINT; + + ngx_queue_init(&bcf->groups); + + return bcf; +} + + +static ngx_int_t +ngx_quic_bpf_module_init(ngx_cycle_t *cycle) +{ + ngx_uint_t i; + ngx_listening_t *ls; + ngx_core_conf_t *ccf; + ngx_pool_cleanup_t *cln; + ngx_quic_bpf_conf_t *bcf; + + ccf = ngx_core_get_conf(cycle); + bcf = ngx_quic_bpf_get_conf(cycle); + + ngx_conf_init_value(bcf->enabled, 0); + + bcf->map_size = ccf->worker_processes * 4; + + cln = ngx_pool_cleanup_add(cycle->pool, 0); + if (cln == NULL) { + goto failed; + } + + cln->data = bcf; + cln->handler = ngx_quic_bpf_cleanup; + + if (ngx_inherited && ngx_is_init_cycle(cycle->old_cycle)) { + if (ngx_quic_bpf_import_maps(cycle) != NGX_OK) { + goto failed; + } + } + + ls = cycle->listening.elts; + + for (i = 0; i < cycle->listening.nelts; i++) { + if (ls[i].quic && ls[i].reuseport) { + if (ngx_quic_bpf_group_add_socket(cycle, &ls[i]) != NGX_OK) { + goto failed; + } + } + } + + if (ngx_quic_bpf_export_maps(cycle) != NGX_OK) { + goto failed; + } + + return NGX_OK; + +failed: + + if (ngx_is_init_cycle(cycle->old_cycle)) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "ngx_quic_bpf_module failed to initialize, check limits"); + + /* refuse to start */ + return NGX_ERROR; + } + + /* + * returning error now will lead to master process exiting immediately + * leaving worker processes orphaned, what is really unexpected. + * Instead, just issue a not about failed initialization and try + * to cleanup a bit. Still program can be already loaded to kernel + * for some reuseport groups, and there is no way to revert, so + * behaviour may be inconsistent. + */ + + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "ngx_quic_bpf_module failed to initialize properly, ignored." + "please check limits and note that nginx state now " + "can be inconsistent and restart may be required"); + + return NGX_OK; +} + + +static void +ngx_quic_bpf_cleanup(void *data) +{ + ngx_quic_bpf_conf_t *bcf = (ngx_quic_bpf_conf_t *) data; + + ngx_queue_t *q; + ngx_quic_sock_group_t *grp; + + for (q = ngx_queue_head(&bcf->groups); + q != ngx_queue_sentinel(&bcf->groups); + q = ngx_queue_next(q)) + { + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + ngx_quic_bpf_close(ngx_cycle->log, grp->map_fd, "map"); + } +} + + +static ngx_inline void +ngx_quic_bpf_close(ngx_log_t *log, int fd, const char *name) +{ + if (close(fd) != -1) { + return; + } + + ngx_log_error(NGX_LOG_EMERG, log, ngx_errno, + "quic bpf close %s fd:%i failed", name, fd); +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, ngx_listening_t *ls) +{ + ngx_queue_t *q; + ngx_quic_sock_group_t *grp; + + for (q = ngx_queue_head(&bcf->groups); + q != ngx_queue_sentinel(&bcf->groups); + q = ngx_queue_next(q)) + { + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + if (ngx_cmp_sockaddr(ls->sockaddr, ls->socklen, + grp->sockaddr, grp->socklen, 1) + == NGX_OK) + { + return grp; + } + } + + return NULL; +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, struct sockaddr *sa, + socklen_t socklen) +{ + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + grp = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_sock_group_t)); + if (grp == NULL) { + return NULL; + } + + grp->socklen = socklen; + grp->sockaddr = ngx_palloc(cycle->pool, socklen); + if (grp->sockaddr == NULL) { + return NULL; + } + ngx_memcpy(grp->sockaddr, sa, socklen); + + ngx_queue_insert_tail(&bcf->groups, &grp->queue); + + return grp; +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_create_group(ngx_cycle_t *cycle, ngx_listening_t *ls) +{ + int progfd, failed, flags, rc; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + if (!bcf->enabled) { + return NULL; + } + + grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen); + if (grp == NULL) { + return NULL; + } + + grp->map_fd = ngx_bpf_map_create(cycle->log, BPF_MAP_TYPE_SOCKHASH, + sizeof(uint64_t), sizeof(uint64_t), + bcf->map_size, 0); + if (grp->map_fd == -1) { + goto failed; + } + + flags = fcntl(grp->map_fd, F_GETFD); + if (flags == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, errno, + "quic bpf getfd failed"); + goto failed; + } + + /* need to inherit map during binary upgrade after exec */ + flags &= ~FD_CLOEXEC; + + rc = fcntl(grp->map_fd, F_SETFD, flags); + if (rc == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, errno, + "quic bpf setfd failed"); + goto failed; + } + + ngx_bpf_program_link(&ngx_quic_reuseport_helper, + "ngx_quic_sockmap", grp->map_fd); + + progfd = ngx_bpf_load_program(cycle->log, &ngx_quic_reuseport_helper); + if (progfd < 0) { + goto failed; + } + + failed = 0; + + if (setsockopt(ls->fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, + &progfd, sizeof(int)) + == -1) + { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_socket_errno, + "quic bpf setsockopt(SO_ATTACH_REUSEPORT_EBPF) failed"); + failed = 1; + } + + ngx_quic_bpf_close(cycle->log, progfd, "program"); + + if (failed) { + goto failed; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap created fd:%i", grp->map_fd); + return grp; + +failed: + + if (grp->map_fd != -1) { + ngx_quic_bpf_close(cycle->log, grp->map_fd, "map"); + } + + ngx_queue_remove(&grp->queue); + + return NULL; +} + + +static ngx_quic_sock_group_t * +ngx_quic_bpf_get_group(ngx_cycle_t *cycle, ngx_listening_t *ls) +{ + ngx_quic_bpf_conf_t *bcf, *old_bcf; + ngx_quic_sock_group_t *grp, *ogrp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + grp = ngx_quic_bpf_find_group(bcf, ls); + if (grp) { + return grp; + } + + old_bcf = ngx_quic_bpf_get_old_conf(cycle); + + if (old_bcf == NULL) { + return ngx_quic_bpf_create_group(cycle, ls); + } + + ogrp = ngx_quic_bpf_find_group(old_bcf, ls); + if (ogrp == NULL) { + return ngx_quic_bpf_create_group(cycle, ls); + } + + grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen); + if (grp == NULL) { + return NULL; + } + + grp->map_fd = dup(ogrp->map_fd); + if (grp->map_fd == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "quic bpf failed to duplicate bpf map descriptor"); + + ngx_queue_remove(&grp->queue); + + return NULL; + } + + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap fd duplicated old:%i new:%i", + ogrp->map_fd, grp->map_fd); + + return grp; +} + + +static ngx_int_t +ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle, ngx_listening_t *ls) +{ + uint64_t cookie; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + bcf = ngx_quic_bpf_get_conf(cycle); + + grp = ngx_quic_bpf_get_group(cycle, ls); + + if (grp == NULL) { + if (!bcf->enabled) { + return NGX_OK; + } + + return NGX_ERROR; + } + + grp->unused = 0; + + cookie = ngx_quic_bpf_socket_key(ls->fd, cycle->log); + if (cookie == (uint64_t) NGX_ERROR) { + return NGX_ERROR; + } + + /* map[cookie] = socket; for use in kernel helper */ + if (ngx_bpf_map_update(grp->map_fd, &cookie, &ls->fd, BPF_ANY) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "quic bpf failed to update socket map key=%xL", cookie); + return NGX_ERROR; + } + + ngx_log_debug4(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap fd:%d add socket:%d cookie:0x%xL worker:%d", + grp->map_fd, ls->fd, cookie, ls->worker); + + /* do not inherit this socket */ + ls->ignore = 1; + + return NGX_OK; +} + + +static uint64_t +ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log) +{ + uint64_t cookie; + socklen_t optlen; + + optlen = sizeof(cookie); + + if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) { + ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno, + "quic bpf getsockopt(SO_COOKIE) failed"); + + return (ngx_uint_t) NGX_ERROR; + } + + return cookie; +} + + +static ngx_int_t +ngx_quic_bpf_export_maps(ngx_cycle_t *cycle) +{ + u_char *p, *buf; + size_t len; + ngx_str_t *var; + ngx_queue_t *q; + ngx_core_conf_t *ccf; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + ccf = ngx_core_get_conf(cycle); + bcf = ngx_quic_bpf_get_conf(cycle); + + len = sizeof(NGX_QUIC_BPF_VARNAME) + 1; + + q = ngx_queue_head(&bcf->groups); + + while (q != ngx_queue_sentinel(&bcf->groups)) { + + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + q = ngx_queue_next(q); + + if (grp->unused) { + /* + * map was inherited, but it is not used in this configuration; + * do not pass such map further and drop the group to prevent + * interference with changes during reload + */ + + ngx_quic_bpf_close(cycle->log, grp->map_fd, "map"); + ngx_queue_remove(&grp->queue); + + continue; + } + + len += NGX_INT32_LEN + 1 + NGX_SOCKADDR_STRLEN + 1; + } + + len++; + + buf = ngx_palloc(cycle->pool, len); + if (buf == NULL) { + return NGX_ERROR; + } + + p = ngx_cpymem(buf, NGX_QUIC_BPF_VARNAME "=", + sizeof(NGX_QUIC_BPF_VARNAME)); + + for (q = ngx_queue_head(&bcf->groups); + q != ngx_queue_sentinel(&bcf->groups); + q = ngx_queue_next(q)) + { + grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue); + + p = ngx_sprintf(p, "%ud", grp->map_fd); + + *p++ = NGX_QUIC_BPF_ADDRSEP; + + p += ngx_sock_ntop(grp->sockaddr, grp->socklen, p, + NGX_SOCKADDR_STRLEN, 1); + + *p++ = NGX_QUIC_BPF_VARSEP; + } + + *p = '\0'; + + var = ngx_array_push(&ccf->env); + if (var == NULL) { + return NGX_ERROR; + } + + var->data = buf; + var->len = sizeof(NGX_QUIC_BPF_VARNAME) - 1; + + return NGX_OK; +} + + +static ngx_int_t +ngx_quic_bpf_import_maps(ngx_cycle_t *cycle) +{ + int s; + u_char *inherited, *p, *v; + ngx_uint_t in_fd; + ngx_addr_t tmp; + ngx_quic_bpf_conf_t *bcf; + ngx_quic_sock_group_t *grp; + + inherited = (u_char *) getenv(NGX_QUIC_BPF_VARNAME); + + if (inherited == NULL) { + return NGX_OK; + } + + bcf = ngx_quic_bpf_get_conf(cycle); + +#if (NGX_SUPPRESS_WARN) + s = -1; +#endif + + in_fd = 1; + + for (p = inherited, v = p; *p; p++) { + + switch (*p) { + + case NGX_QUIC_BPF_ADDRSEP: + + if (!in_fd) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited env"); + return NGX_ERROR; + } + in_fd = 0; + + s = ngx_atoi(v, p - v); + if (s == NGX_ERROR) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited map fd"); + return NGX_ERROR; + } + + v = p + 1; + break; + + case NGX_QUIC_BPF_VARSEP: + + if (in_fd) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited env"); + return NGX_ERROR; + } + in_fd = 1; + + grp = ngx_pcalloc(cycle->pool, + sizeof(ngx_quic_sock_group_t)); + if (grp == NULL) { + return NGX_ERROR; + } + + grp->map_fd = s; + + if (ngx_parse_addr_port(cycle->pool, &tmp, v, p - v) + != NGX_OK) + { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "quic bpf failed to parse inherited" + " address '%*s'", p - v , v); + + ngx_quic_bpf_close(cycle->log, s, "inherited map"); + + return NGX_ERROR; + } + + grp->sockaddr = tmp.sockaddr; + grp->socklen = tmp.socklen; + + grp->unused = 1; + + ngx_queue_insert_tail(&bcf->groups, &grp->queue); + + ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0, + "quic bpf sockmap inherited with " + "fd:%i address:%*s", + grp->map_fd, p - v, v); + v = p + 1; + break; + + default: + break; + } + } + + return NGX_OK; +} diff --git a/src/event/quic/ngx_event_quic_bpf_code.c b/src/event/quic/ngx_event_quic_bpf_code.c new file mode 100644 --- /dev/null +++ b/src/event/quic/ngx_event_quic_bpf_code.c @@ -0,0 +1,90 @@ +/* AUTO-GENERATED, DO NOT EDIT. */ + +#include +#include + +#include "ngx_bpf.h" + + +static ngx_bpf_reloc_t bpf_reloc_prog_ngx_quic_reuseport_helper[] = { + { "ngx_quic_sockmap", 56 }, +}; + +static struct bpf_insn bpf_insn_prog_ngx_quic_reuseport_helper[] = { + /* opcode dst src offset imm */ + { 0x79, BPF_REG_4, BPF_REG_1, (int16_t) 0, 0x0 }, + { 0x79, BPF_REG_3, BPF_REG_1, (int16_t) 8, 0x0 }, + { 0xbf, BPF_REG_2, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x7, BPF_REG_2, BPF_REG_0, (int16_t) 0, 0x8 }, + { 0x2d, BPF_REG_2, BPF_REG_3, (int16_t) 55, 0x0 }, + { 0xbf, BPF_REG_5, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x7, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0x9 }, + { 0x2d, BPF_REG_5, BPF_REG_3, (int16_t) 52, 0x0 }, + { 0xb7, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0x14 }, + { 0xb7, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x9 }, + { 0x71, BPF_REG_6, BPF_REG_2, (int16_t) 0, 0x0 }, + { 0x67, BPF_REG_6, BPF_REG_0, (int16_t) 0, 0x38 }, + { 0xc7, BPF_REG_6, BPF_REG_0, (int16_t) 0, 0x38 }, + { 0x65, BPF_REG_6, BPF_REG_0, (int16_t) 10, 0xffffffff }, + { 0xbf, BPF_REG_2, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x7, BPF_REG_2, BPF_REG_0, (int16_t) 0, 0xd }, + { 0x2d, BPF_REG_2, BPF_REG_3, (int16_t) 43, 0x0 }, + { 0xbf, BPF_REG_5, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x7, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0xe }, + { 0x2d, BPF_REG_5, BPF_REG_3, (int16_t) 40, 0x0 }, + { 0xb7, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0xe }, + { 0x71, BPF_REG_5, BPF_REG_4, (int16_t) 12, 0x0 }, + { 0xb7, BPF_REG_6, BPF_REG_0, (int16_t) 0, 0x8 }, + { 0x2d, BPF_REG_6, BPF_REG_5, (int16_t) 36, 0x0 }, + { 0xf, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0x0 }, + { 0xf, BPF_REG_4, BPF_REG_5, (int16_t) 0, 0x0 }, + { 0x2d, BPF_REG_4, BPF_REG_3, (int16_t) 33, 0x0 }, + { 0xbf, BPF_REG_4, BPF_REG_2, (int16_t) 0, 0x0 }, + { 0x7, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x9 }, + { 0x2d, BPF_REG_4, BPF_REG_3, (int16_t) 30, 0x0 }, + { 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 1, 0x0 }, + { 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x38 }, + { 0x71, BPF_REG_3, BPF_REG_2, (int16_t) 2, 0x0 }, + { 0x67, BPF_REG_3, BPF_REG_0, (int16_t) 0, 0x30 }, + { 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 3, 0x0 }, + { 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x28 }, + { 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 4, 0x0 }, + { 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x20 }, + { 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 5, 0x0 }, + { 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x38 }, + { 0xc7, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x20 }, + { 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 6, 0x0 }, + { 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x10 }, + { 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 7, 0x0 }, + { 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x8 }, + { 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 }, + { 0x71, BPF_REG_2, BPF_REG_2, (int16_t) 8, 0x0 }, + { 0x4f, BPF_REG_3, BPF_REG_2, (int16_t) 0, 0x0 }, + { 0x7b, BPF_REG_10, BPF_REG_3, (int16_t) 65528, 0x0 }, + { 0xbf, BPF_REG_3, BPF_REG_10, (int16_t) 0, 0x0 }, + { 0x7, BPF_REG_3, BPF_REG_0, (int16_t) 0, 0xfffffff8 }, + { 0x18, BPF_REG_2, BPF_REG_0, (int16_t) 0, 0x0 }, + { 0x0, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x0 }, + { 0xb7, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x0 }, + { 0x85, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x52 }, + { 0xb7, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x1 }, + { 0x95, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x0 }, +}; + + +ngx_bpf_program_t ngx_quic_reuseport_helper = { + .relocs = bpf_reloc_prog_ngx_quic_reuseport_helper, + .nrelocs = sizeof(bpf_reloc_prog_ngx_quic_reuseport_helper) + / sizeof(bpf_reloc_prog_ngx_quic_reuseport_helper[0]), + .ins = bpf_insn_prog_ngx_quic_reuseport_helper, + .nins = sizeof(bpf_insn_prog_ngx_quic_reuseport_helper) + / sizeof(bpf_insn_prog_ngx_quic_reuseport_helper[0]), + .license = "BSD", + .type = BPF_PROG_TYPE_SK_REUSEPORT, +}; + diff --git a/src/event/quic/ngx_event_quic_transport.c b/src/event/quic/ngx_event_quic_transport.c --- a/src/event/quic/ngx_event_quic_transport.c +++ b/src/event/quic/ngx_event_quic_transport.c @@ -43,6 +43,17 @@ #endif +#define ngx_quic_write_uint64(p, s) \ + ((p)[0] = (u_char) ((s) >> 56), \ + (p)[1] = (u_char) ((s) >> 48), \ + (p)[2] = (u_char) ((s) >> 40), \ + (p)[3] = (u_char) ((s) >> 32), \ + (p)[4] = (u_char) ((s) >> 24), \ + (p)[5] = (u_char) ((s) >> 16), \ + (p)[6] = (u_char) ((s) >> 8), \ + (p)[7] = (u_char) (s), \ + (p) + sizeof(uint64_t)) + #define ngx_quic_write_uint24(p, s) \ ((p)[0] = (u_char) ((s) >> 16), \ (p)[1] = (u_char) ((s) >> 8), \ @@ -1981,3 +1992,10 @@ ngx_quic_create_close(u_char *p, ngx_qui return p - start; } + + +void +ngx_quic_dcid_encode_key(u_char *dcid, uint64_t key) +{ + (void) ngx_quic_write_uint64(dcid, key); +} diff --git a/src/event/quic/ngx_event_quic_transport.h b/src/event/quic/ngx_event_quic_transport.h --- a/src/event/quic/ngx_event_quic_transport.h +++ b/src/event/quic/ngx_event_quic_transport.h @@ -353,4 +353,6 @@ ngx_int_t ngx_quic_parse_transport_param ssize_t ngx_quic_create_transport_params(u_char *p, u_char *end, ngx_quic_tp_t *tp, size_t *clen); +void ngx_quic_dcid_encode_key(u_char *dcid, uint64_t key); + #endif /* _NGX_EVENT_QUIC_WIRE_H_INCLUDED_ */