view src/event/quic/ngx_event_quic_bpf.c @ 8676:7df607cb2d11 quic

QUIC: ngx_quic_bpf module. The quic kernel bpf helper inspects packet payload for DCID, extracts key and routes the packet into socket matching the key. Due to reuseport feature, each worker owns a personal socket, which is identified by the same key, used to create DCID. BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK. The "ulimit -l" command may be used to setup proper limits, if maps cannot be created with EPERM or updated with ETOOLONG.
author Vladimir Homutov <vl@nginx.com>
date Fri, 25 Dec 2020 15:01:15 +0300
parents
children d4e02b3b734f
line wrap: on
line source


/*
 * Copyright (C) Nginx, Inc.
 */


#include <ngx_config.h>
#include <ngx_core.h>


#define NGX_QUIC_BPF_VARNAME  "NGINX_BPF_MAPS"
#define NGX_QUIC_BPF_VARSEP    ';'
#define NGX_QUIC_BPF_ADDRSEP   '#'


#define ngx_quic_bpf_get_conf(cycle)                                          \
    (ngx_quic_bpf_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_quic_bpf_module)

#define ngx_quic_bpf_get_old_conf(cycle)                                      \
    cycle->old_cycle->conf_ctx ? ngx_quic_bpf_get_conf(cycle->old_cycle)      \
                               : NULL

#define ngx_core_get_conf(cycle)                                              \
    (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module)


typedef struct {
    ngx_queue_t           queue;
    int                   map_fd;

    struct sockaddr      *sockaddr;
    socklen_t             socklen;
    ngx_uint_t            unused;     /* unsigned  unused:1; */
} ngx_quic_sock_group_t;


typedef struct {
    ngx_flag_t            enabled;
    ngx_uint_t            map_size;
    ngx_queue_t           groups;     /* of ngx_quic_sock_group_t */
} ngx_quic_bpf_conf_t;


static void *ngx_quic_bpf_create_conf(ngx_cycle_t *cycle);
static ngx_int_t ngx_quic_bpf_module_init(ngx_cycle_t *cycle);

static void ngx_quic_bpf_cleanup(void *data);
static ngx_inline void ngx_quic_bpf_close(ngx_log_t *log, int fd,
    const char *name);

static ngx_quic_sock_group_t *ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf,
    ngx_listening_t *ls);
static ngx_quic_sock_group_t *ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle,
    struct sockaddr *sa, socklen_t socklen);
static ngx_quic_sock_group_t *ngx_quic_bpf_create_group(ngx_cycle_t *cycle,
    ngx_listening_t *ls);
static ngx_quic_sock_group_t *ngx_quic_bpf_get_group(ngx_cycle_t *cycle,
    ngx_listening_t *ls);
static ngx_int_t ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle,
    ngx_listening_t *ls);
static uint64_t ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log);

static ngx_int_t ngx_quic_bpf_export_maps(ngx_cycle_t *cycle);
static ngx_int_t ngx_quic_bpf_import_maps(ngx_cycle_t *cycle);

extern ngx_bpf_program_t  ngx_quic_reuseport_helper;


static ngx_command_t  ngx_quic_bpf_commands[] = {

    { ngx_string("quic_bpf"),
      NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_FLAG,
      ngx_conf_set_flag_slot,
      0,
      offsetof(ngx_quic_bpf_conf_t, enabled),
      NULL },

      ngx_null_command
};


static ngx_core_module_t  ngx_quic_bpf_module_ctx = {
    ngx_string("quic_bpf"),
    ngx_quic_bpf_create_conf,
    NULL
};


ngx_module_t  ngx_quic_bpf_module = {
    NGX_MODULE_V1,
    &ngx_quic_bpf_module_ctx,              /* module context */
    ngx_quic_bpf_commands,                 /* module directives */
    NGX_CORE_MODULE,                       /* module type */
    NULL,                                  /* init master */
    ngx_quic_bpf_module_init,              /* init module */
    NULL,                                  /* init process */
    NULL,                                  /* init thread */
    NULL,                                  /* exit thread */
    NULL,                                  /* exit process */
    NULL,                                  /* exit master */
    NGX_MODULE_V1_PADDING
};


static void *
ngx_quic_bpf_create_conf(ngx_cycle_t *cycle)
{
    ngx_quic_bpf_conf_t  *bcf;

    bcf = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_bpf_conf_t));
    if (bcf == NULL) {
        return NULL;
    }

    bcf->enabled = NGX_CONF_UNSET;
    bcf->map_size = NGX_CONF_UNSET_UINT;

    ngx_queue_init(&bcf->groups);

    return bcf;
}


static ngx_int_t
ngx_quic_bpf_module_init(ngx_cycle_t *cycle)
{
    ngx_uint_t            i;
    ngx_listening_t      *ls;
    ngx_core_conf_t      *ccf;
    ngx_pool_cleanup_t   *cln;
    ngx_quic_bpf_conf_t  *bcf;

    ccf = ngx_core_get_conf(cycle);
    bcf = ngx_quic_bpf_get_conf(cycle);

    ngx_conf_init_value(bcf->enabled, 0);

    bcf->map_size = ccf->worker_processes * 4;

    cln = ngx_pool_cleanup_add(cycle->pool, 0);
    if (cln == NULL) {
        goto failed;
    }

    cln->data = bcf;
    cln->handler = ngx_quic_bpf_cleanup;

    if (ngx_inherited && ngx_is_init_cycle(cycle->old_cycle)) {
        if (ngx_quic_bpf_import_maps(cycle) != NGX_OK) {
            goto failed;
        }
    }

    ls = cycle->listening.elts;

    for (i = 0; i < cycle->listening.nelts; i++) {
        if (ls[i].quic && ls[i].reuseport) {
            if (ngx_quic_bpf_group_add_socket(cycle, &ls[i]) != NGX_OK) {
                goto failed;
            }
        }
    }

    if (ngx_quic_bpf_export_maps(cycle) != NGX_OK) {
        goto failed;
    }

    return NGX_OK;

failed:

    if (ngx_is_init_cycle(cycle->old_cycle)) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
                      "ngx_quic_bpf_module failed to initialize, check limits");

        /* refuse to start */
        return NGX_ERROR;
    }

    /*
     * returning error now will lead to master process exiting immediately
     * leaving worker processes orphaned, what is really unexpected.
     * Instead, just issue a not about failed initialization and try
     * to cleanup a bit. Still program can be already loaded to kernel
     * for some reuseport groups, and there is no way to revert, so
     * behaviour may be inconsistent.
     */

    ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
                  "ngx_quic_bpf_module failed to initialize properly, ignored."
                  "please check limits and note that nginx state now "
                  "can be inconsistent and restart may be required");

    return NGX_OK;
}


static void
ngx_quic_bpf_cleanup(void *data)
{
    ngx_quic_bpf_conf_t  *bcf = (ngx_quic_bpf_conf_t *) data;

    ngx_queue_t            *q;
    ngx_quic_sock_group_t  *grp;

    for (q = ngx_queue_head(&bcf->groups);
         q != ngx_queue_sentinel(&bcf->groups);
         q = ngx_queue_next(q))
    {
        grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

        ngx_quic_bpf_close(ngx_cycle->log, grp->map_fd, "map");
    }
}


static ngx_inline void
ngx_quic_bpf_close(ngx_log_t *log, int fd, const char *name)
{
    if (close(fd) != -1) {
        return;
    }

    ngx_log_error(NGX_LOG_EMERG, log, ngx_errno,
                  "quic bpf close %s fd:%i failed", name, fd);
}


static ngx_quic_sock_group_t *
ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, ngx_listening_t *ls)
{
    ngx_queue_t            *q;
    ngx_quic_sock_group_t  *grp;

    for (q = ngx_queue_head(&bcf->groups);
         q != ngx_queue_sentinel(&bcf->groups);
         q = ngx_queue_next(q))
    {
        grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

        if (ngx_cmp_sockaddr(ls->sockaddr, ls->socklen,
                             grp->sockaddr, grp->socklen, 1)
            == NGX_OK)
        {
            return grp;
        }
    }

    return NULL;
}


static ngx_quic_sock_group_t *
ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, struct sockaddr *sa,
    socklen_t socklen)
{
    ngx_quic_bpf_conf_t    *bcf;
    ngx_quic_sock_group_t  *grp;

    bcf = ngx_quic_bpf_get_conf(cycle);

    grp = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_sock_group_t));
    if (grp == NULL) {
        return NULL;
    }

    grp->socklen = socklen;
    grp->sockaddr = ngx_palloc(cycle->pool, socklen);
    if (grp->sockaddr == NULL) {
        return NULL;
    }
    ngx_memcpy(grp->sockaddr, sa, socklen);

    ngx_queue_insert_tail(&bcf->groups, &grp->queue);

    return grp;
}


static ngx_quic_sock_group_t *
ngx_quic_bpf_create_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
{
    int                     progfd, failed, flags, rc;
    ngx_quic_bpf_conf_t    *bcf;
    ngx_quic_sock_group_t  *grp;

    bcf = ngx_quic_bpf_get_conf(cycle);

    if (!bcf->enabled) {
        return NULL;
    }

    grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
    if (grp == NULL) {
        return NULL;
    }

    grp->map_fd = ngx_bpf_map_create(cycle->log, BPF_MAP_TYPE_SOCKHASH,
                                     sizeof(uint64_t), sizeof(uint64_t),
                                     bcf->map_size, 0);
    if (grp->map_fd == -1) {
        goto failed;
    }

    flags = fcntl(grp->map_fd, F_GETFD);
    if (flags == -1) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
                      "quic bpf getfd failed");
        goto failed;
    }

    /* need to inherit map during binary upgrade after exec */
    flags &= ~FD_CLOEXEC;

    rc = fcntl(grp->map_fd, F_SETFD, flags);
    if (rc == -1) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
                      "quic bpf setfd failed");
        goto failed;
    }

    ngx_bpf_program_link(&ngx_quic_reuseport_helper,
                         "ngx_quic_sockmap", grp->map_fd);

    progfd = ngx_bpf_load_program(cycle->log, &ngx_quic_reuseport_helper);
    if (progfd < 0) {
        goto failed;
    }

    failed = 0;

    if (setsockopt(ls->fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
                   &progfd, sizeof(int))
        == -1)
    {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_socket_errno,
                      "quic bpf setsockopt(SO_ATTACH_REUSEPORT_EBPF) failed");
        failed = 1;
    }

    ngx_quic_bpf_close(cycle->log, progfd, "program");

    if (failed) {
        goto failed;
    }

    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
                   "quic bpf sockmap created fd:%i", grp->map_fd);
    return grp;

failed:

    if (grp->map_fd != -1) {
        ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
    }

    ngx_queue_remove(&grp->queue);

    return NULL;
}


static ngx_quic_sock_group_t *
ngx_quic_bpf_get_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
{
    ngx_quic_bpf_conf_t    *bcf, *old_bcf;
    ngx_quic_sock_group_t  *grp, *ogrp;

    bcf = ngx_quic_bpf_get_conf(cycle);

    grp = ngx_quic_bpf_find_group(bcf, ls);
    if (grp) {
        return grp;
    }

    old_bcf = ngx_quic_bpf_get_old_conf(cycle);

    if (old_bcf == NULL) {
        return ngx_quic_bpf_create_group(cycle, ls);
    }

    ogrp = ngx_quic_bpf_find_group(old_bcf, ls);
    if (ogrp == NULL) {
        return ngx_quic_bpf_create_group(cycle, ls);
    }

    grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
    if (grp == NULL) {
        return NULL;
    }

    grp->map_fd = dup(ogrp->map_fd);
    if (grp->map_fd == -1) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
                      "quic bpf failed to duplicate bpf map descriptor");

        ngx_queue_remove(&grp->queue);

        return NULL;
    }

    ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
              "quic bpf sockmap fd duplicated old:%i new:%i",
              ogrp->map_fd, grp->map_fd);

    return grp;
}


static ngx_int_t
ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle,  ngx_listening_t *ls)
{
    uint64_t                cookie;
    ngx_quic_bpf_conf_t    *bcf;
    ngx_quic_sock_group_t  *grp;

    bcf = ngx_quic_bpf_get_conf(cycle);

    grp = ngx_quic_bpf_get_group(cycle, ls);

    if (grp == NULL) {
        if (!bcf->enabled) {
            return NGX_OK;
        }

        return NGX_ERROR;
    }

    grp->unused = 0;

    cookie = ngx_quic_bpf_socket_key(ls->fd, cycle->log);
    if (cookie == (uint64_t) NGX_ERROR) {
        return NGX_ERROR;
    }

    /* map[cookie] = socket; for use in kernel helper */
    if (ngx_bpf_map_update(grp->map_fd, &cookie, &ls->fd, BPF_ANY) == -1) {
        ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
                      "quic bpf failed to update socket map key=%xL", cookie);
        return NGX_ERROR;
    }

    ngx_log_debug4(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
                  "quic bpf sockmap fd:%d add socket:%d cookie:0x%xL worker:%d",
                  grp->map_fd, ls->fd, cookie, ls->worker);

    /* do not inherit this socket */
    ls->ignore = 1;

    return NGX_OK;
}


static uint64_t
ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log)
{
    uint64_t   cookie;
    socklen_t  optlen;

    optlen = sizeof(cookie);

    if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) {
        ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
                      "quic bpf getsockopt(SO_COOKIE) failed");

        return (ngx_uint_t) NGX_ERROR;
    }

    return cookie;
}


static ngx_int_t
ngx_quic_bpf_export_maps(ngx_cycle_t *cycle)
{
    u_char                 *p, *buf;
    size_t                  len;
    ngx_str_t              *var;
    ngx_queue_t            *q;
    ngx_core_conf_t        *ccf;
    ngx_quic_bpf_conf_t    *bcf;
    ngx_quic_sock_group_t  *grp;

    ccf = ngx_core_get_conf(cycle);
    bcf = ngx_quic_bpf_get_conf(cycle);

    len = sizeof(NGX_QUIC_BPF_VARNAME) + 1;

    q = ngx_queue_head(&bcf->groups);

    while (q != ngx_queue_sentinel(&bcf->groups)) {

        grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

        q = ngx_queue_next(q);

        if (grp->unused) {
            /*
             * map was inherited, but it is not used in this configuration;
             * do not pass such map further and drop the group to prevent
             * interference with changes during reload
             */

            ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
            ngx_queue_remove(&grp->queue);

            continue;
        }

        len += NGX_INT32_LEN + 1 + NGX_SOCKADDR_STRLEN + 1;
    }

    len++;

    buf = ngx_palloc(cycle->pool, len);
    if (buf == NULL) {
        return NGX_ERROR;
    }

    p = ngx_cpymem(buf, NGX_QUIC_BPF_VARNAME "=",
                   sizeof(NGX_QUIC_BPF_VARNAME));

    for (q = ngx_queue_head(&bcf->groups);
         q != ngx_queue_sentinel(&bcf->groups);
         q = ngx_queue_next(q))
    {
        grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

        p = ngx_sprintf(p, "%ud", grp->map_fd);

        *p++ = NGX_QUIC_BPF_ADDRSEP;

        p += ngx_sock_ntop(grp->sockaddr, grp->socklen, p,
                           NGX_SOCKADDR_STRLEN, 1);

        *p++ = NGX_QUIC_BPF_VARSEP;
    }

    *p = '\0';

    var = ngx_array_push(&ccf->env);
    if (var == NULL) {
        return NGX_ERROR;
    }

    var->data = buf;
    var->len = sizeof(NGX_QUIC_BPF_VARNAME) - 1;

    return NGX_OK;
}


static ngx_int_t
ngx_quic_bpf_import_maps(ngx_cycle_t *cycle)
{
    int                     s;
    u_char                 *inherited, *p, *v;
    ngx_uint_t              in_fd;
    ngx_addr_t              tmp;
    ngx_quic_bpf_conf_t    *bcf;
    ngx_quic_sock_group_t  *grp;

    inherited = (u_char *) getenv(NGX_QUIC_BPF_VARNAME);

    if (inherited == NULL) {
        return NGX_OK;
    }

    bcf = ngx_quic_bpf_get_conf(cycle);

#if (NGX_SUPPRESS_WARN)
    s = -1;
#endif

    in_fd = 1;

    for (p = inherited, v = p; *p; p++) {

        switch (*p) {

        case NGX_QUIC_BPF_ADDRSEP:

            if (!in_fd) {
                ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
                              "quic bpf failed to parse inherited env");
                return NGX_ERROR;
            }
            in_fd = 0;

            s = ngx_atoi(v, p - v);
            if (s == NGX_ERROR) {
                ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
                              "quic bpf failed to parse inherited map fd");
                return NGX_ERROR;
            }

            v = p + 1;
            break;

        case NGX_QUIC_BPF_VARSEP:

            if (in_fd) {
                ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
                              "quic bpf failed to parse inherited env");
                return NGX_ERROR;
            }
            in_fd = 1;

            grp = ngx_pcalloc(cycle->pool,
                              sizeof(ngx_quic_sock_group_t));
            if (grp == NULL) {
                return NGX_ERROR;
            }

            grp->map_fd = s;

            if (ngx_parse_addr_port(cycle->pool, &tmp, v, p - v)
                != NGX_OK)
            {
                ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
                              "quic bpf failed to parse inherited"
                              " address '%*s'", p - v , v);

                ngx_quic_bpf_close(cycle->log, s, "inherited map");

                return NGX_ERROR;
            }

            grp->sockaddr = tmp.sockaddr;
            grp->socklen = tmp.socklen;

            grp->unused = 1;

            ngx_queue_insert_tail(&bcf->groups, &grp->queue);

            ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
                           "quic bpf sockmap inherited with "
                           "fd:%i address:%*s",
                           grp->map_fd, p - v, v);
            v = p + 1;
            break;

        default:
            break;
        }
    }

    return NGX_OK;
}