changeset 8811:bb5152ed045b quic

QUIC: added support for segmentation offloading. To improve output performance, UDP segmentation offloading is used if available. If there is a significant amount of data in an output queue and path is verified, QUIC packets are not sent one-by-one, but instead are collected in a buffer, which is then passed to kernel in a single sendmsg call, using UDP GSO. Such method greatly decreases number of system calls and thus system load.
author Vladimir Homutov <vl@nginx.com>
date Thu, 15 Jul 2021 14:22:00 +0300
parents 2dfd313f22f2
children e7a2d3914877
files auto/os/linux src/event/quic/ngx_event_quic_output.c src/os/unix/ngx_linux_config.h
diffstat 3 files changed, 262 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/auto/os/linux
+++ b/auto/os/linux
@@ -281,6 +281,27 @@ if [ $ngx_found = yes ]; then
 fi
 
 
+# UDP_SEGMENT socket option is used for segmentation offloading
+
+ngx_feature="UDP_SEGMENT"
+ngx_feature_name="NGX_HAVE_UDP_SEGMENT"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <stdint.h>
+                  #include <netinet/udp.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="socklen_t optlen = sizeof(int);
+              int val;
+              getsockopt(0, SOL_UDP, UDP_SEGMENT, &val, &optlen)"
+. auto/feature
+
+if [ $ngx_found = yes ]; then
+    UDP_SEGMENT_FOUND=YES
+    have=NGX_HAVE_UDP_SEGMENT . auto/have
+fi
+
+
 # ngx_quic_bpf module uses sockhash to select socket from reuseport group,
 # support appeared in Linux-5.7:
 #
--- a/src/event/quic/ngx_event_quic_output.c
+++ b/src/event/quic/ngx_event_quic_output.c
@@ -17,6 +17,9 @@
 #define NGX_QUIC_MAX_UDP_PAYLOAD_OUT   1252
 #define NGX_QUIC_MAX_UDP_PAYLOAD_OUT6  1232
 
+#define NGX_QUIC_MAX_UDP_SEGMENT_BUF  65487 /* 65K - IPv6 header */
+#define NGX_QUIC_MAX_SEGMENTS            64 /* UDP_MAX_SEGMENTS */
+
 #define NGX_QUIC_RETRY_TOKEN_LIFETIME     3 /* seconds */
 #define NGX_QUIC_NEW_TOKEN_LIFETIME     600 /* seconds */
 #define NGX_QUIC_RETRY_BUFFER_SIZE      256
@@ -39,6 +42,16 @@
 
 static ngx_int_t ngx_quic_socket_output(ngx_connection_t *c,
     ngx_quic_socket_t *qsock);
+static ngx_int_t ngx_quic_create_datagrams(ngx_connection_t *c,
+    ngx_quic_socket_t *qsock);
+#if ((NGX_HAVE_UDP_SEGMENT) && (NGX_HAVE_MSGHDR_MSG_CONTROL))
+static ngx_uint_t ngx_quic_allow_segmentation(ngx_connection_t *c,
+    ngx_quic_socket_t *qsock);
+static ngx_int_t ngx_quic_create_segments(ngx_connection_t *c,
+    ngx_quic_socket_t *qsock);
+static ssize_t ngx_quic_send_segments(ngx_connection_t *c, u_char *buf,
+    size_t len, struct sockaddr *sockaddr, socklen_t socklen, size_t segment);
+#endif
 static ssize_t ngx_quic_output_packet(ngx_connection_t *c,
     ngx_quic_send_ctx_t *ctx, u_char *data, size_t max, size_t min,
     ngx_quic_socket_t *qsock);
@@ -84,16 +97,10 @@ ngx_quic_output(ngx_connection_t *c)
 static ngx_int_t
 ngx_quic_socket_output(ngx_connection_t *c, ngx_quic_socket_t *qsock)
 {
-    off_t                   max;
-    size_t                  len, min, in_flight;
-    ssize_t                 n;
-    u_char                 *p;
-    ngx_uint_t              i, pad;
-    ngx_quic_path_t        *path;
-    ngx_quic_send_ctx_t    *ctx;
+    size_t                  in_flight;
+    ngx_int_t               rc;
     ngx_quic_congestion_t  *cg;
     ngx_quic_connection_t  *qc;
-    static u_char           dst[NGX_QUIC_MAX_UDP_PAYLOAD_SIZE];
 
     c->log->action = "sending frames";
 
@@ -102,6 +109,43 @@ ngx_quic_socket_output(ngx_connection_t 
 
     in_flight = cg->in_flight;
 
+#if ((NGX_HAVE_UDP_SEGMENT) && (NGX_HAVE_MSGHDR_MSG_CONTROL))
+    if (ngx_quic_allow_segmentation(c, qsock)) {
+        rc = ngx_quic_create_segments(c, qsock);
+    } else
+#endif
+    {
+        rc = ngx_quic_create_datagrams(c, qsock);
+    }
+
+    if (rc != NGX_OK) {
+        return NGX_ERROR;
+    }
+
+    if (in_flight != cg->in_flight && !qc->send_timer_set && !qc->closing) {
+        qc->send_timer_set = 1;
+        ngx_add_timer(c->read, qc->tp.max_idle_timeout);
+    }
+
+    return NGX_OK;
+}
+
+
+static ngx_int_t
+ngx_quic_create_datagrams(ngx_connection_t *c, ngx_quic_socket_t *qsock)
+{
+    off_t                   max;
+    size_t                  len, min;
+    ssize_t                 n;
+    u_char                 *p;
+    ngx_uint_t              i, pad;
+    ngx_quic_path_t        *path;
+    ngx_quic_send_ctx_t    *ctx;
+    ngx_quic_connection_t  *qc;
+    static u_char           dst[NGX_QUIC_MAX_UDP_PAYLOAD_SIZE];
+
+    qc = ngx_quic_get_connection(c);
+
     path = qsock->path;
 
     for ( ;; ) {
@@ -153,16 +197,198 @@ ngx_quic_socket_output(ngx_connection_t 
         path->sent += len;
     }
 
-    if (in_flight != cg->in_flight && !qc->send_timer_set && !qc->closing) {
-        qc->send_timer_set = 1;
-        ngx_add_timer(c->read, qc->tp.max_idle_timeout);
+    return NGX_OK;
+}
+
+
+#if ((NGX_HAVE_UDP_SEGMENT) && (NGX_HAVE_MSGHDR_MSG_CONTROL))
+
+static ngx_uint_t
+ngx_quic_allow_segmentation(ngx_connection_t *c, ngx_quic_socket_t *qsock)
+{
+    size_t                  bytes, len;
+    ngx_queue_t            *q;
+    ngx_quic_frame_t       *f;
+    ngx_quic_send_ctx_t    *ctx;
+    ngx_quic_connection_t  *qc;
+
+    if (qsock->path->state != NGX_QUIC_PATH_VALIDATED) {
+        /* don't even try to be faster on non-validated paths */
+        return 0;
+    }
+
+    qc = ngx_quic_get_connection(c);
+
+    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_initial);
+    if (!ngx_queue_empty(&ctx->frames)) {
+        return 0;
+    }
+
+    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_handshake);
+    if (!ngx_queue_empty(&ctx->frames)) {
+        return 0;
+    }
+
+    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+
+    bytes = 0;
+
+    len = ngx_min(qc->ctp.max_udp_payload_size,
+                  NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+
+    for (q = ngx_queue_head(&ctx->frames);
+         q != ngx_queue_sentinel(&ctx->frames);
+         q = ngx_queue_next(q))
+    {
+        f = ngx_queue_data(q, ngx_quic_frame_t, queue);
+
+        bytes += f->len;
+
+        if (bytes > len * 3) {
+            /* require at least ~3 full packets to batch */
+            return 1;
+        }
     }
 
+    return 0;
+}
+
+
+static ngx_int_t
+ngx_quic_create_segments(ngx_connection_t *c, ngx_quic_socket_t *qsock)
+{
+    size_t                  len, segsize;
+    ssize_t                 n;
+    u_char                 *p, *end;
+    ngx_uint_t              nseg;
+    ngx_quic_send_ctx_t    *ctx;
+    ngx_quic_path_t        *path;
+    ngx_quic_connection_t  *qc;
+    static u_char           dst[NGX_QUIC_MAX_UDP_SEGMENT_BUF];
+
+    qc = ngx_quic_get_connection(c);
+    path = qsock->path;
+
+    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+
+    if (ngx_quic_generate_ack(c, ctx) != NGX_OK) {
+        return NGX_ERROR;
+    }
+
+    segsize = ngx_min(qc->ctp.max_udp_payload_size,
+                      NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+    p = dst;
+    end = dst + sizeof(dst);
+
+    nseg = 0;
+
+    for ( ;; ) {
+
+        len = ngx_min(segsize, (size_t) (end - p));
+
+        if (len) {
+
+            n = ngx_quic_output_packet(c, ctx, p, len, len, qsock);
+            if (n == NGX_ERROR) {
+                return NGX_ERROR;
+            }
+
+            p += n;
+            nseg++;
+
+        } else {
+            n = 0;
+        }
+
+        if (p == dst) {
+            break;
+        }
+
+        if (n == 0 || nseg == NGX_QUIC_MAX_SEGMENTS) {
+            n = ngx_quic_send_segments(c, dst, p - dst, path->sockaddr,
+                                       path->socklen, segsize);
+            if (n == NGX_ERROR) {
+                return NGX_ERROR;
+            }
+
+            path->sent += n;
+
+            p = dst;
+            nseg = 0;
+        }
+    }
 
     return NGX_OK;
 }
 
 
+static ssize_t
+ngx_quic_send_segments(ngx_connection_t *c, u_char *buf, size_t len,
+    struct sockaddr *sockaddr, socklen_t socklen, size_t segment)
+{
+    size_t           clen;
+    ssize_t          n;
+    uint16_t        *valp;
+    struct iovec     iov;
+    struct msghdr    msg;
+    struct cmsghdr  *cmsg;
+
+#if defined(NGX_HAVE_ADDRINFO_CMSG)
+    char             msg_control[CMSG_SPACE(sizeof(uint16_t))
+                             + CMSG_SPACE(sizeof(ngx_addrinfo_t))];
+#else
+    char             msg_control[CMSG_SPACE(sizeof(uint16_t))];
+#endif
+
+    ngx_memzero(&msg, sizeof(struct msghdr));
+    ngx_memzero(msg_control, sizeof(msg_control));
+
+    iov.iov_len = len;
+    iov.iov_base = buf;
+
+    msg.msg_iov = &iov;
+    msg.msg_iovlen = 1;
+
+    msg.msg_name = sockaddr;
+    msg.msg_namelen = socklen;
+
+    msg.msg_control = msg_control;
+    msg.msg_controllen = sizeof(msg_control);
+
+    cmsg = CMSG_FIRSTHDR(&msg);
+
+    cmsg->cmsg_level = SOL_UDP;
+    cmsg->cmsg_type = UDP_SEGMENT;
+    cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+
+    clen = CMSG_SPACE(sizeof(uint16_t));
+
+    valp = (void *) CMSG_DATA(cmsg);
+    *valp = segment;
+
+#if defined(NGX_HAVE_ADDRINFO_CMSG)
+    if (c->listening && c->listening->wildcard && c->local_sockaddr) {
+        cmsg = CMSG_NXTHDR(&msg, cmsg);
+        clen += ngx_set_srcaddr_cmsg(cmsg, c->local_sockaddr);
+    }
+#endif
+
+    msg.msg_controllen = clen;
+
+    n = ngx_sendmsg(c, &msg, 0);
+    if (n == -1) {
+        return NGX_ERROR;
+    }
+
+    c->sent += n;
+
+    return n;
+}
+
+#endif
+
+
+
 static ngx_uint_t
 ngx_quic_get_padding_level(ngx_connection_t *c)
 {
--- a/src/os/unix/ngx_linux_config.h
+++ b/src/os/unix/ngx_linux_config.h
@@ -103,6 +103,10 @@ typedef struct iocb  ngx_aiocb_t;
 #include <linux/capability.h>
 #endif
 
+#if (NGX_HAVE_UDP_SEGMENT)
+#include <netinet/udp.h>
+#endif
+
 
 #define NGX_LISTEN_BACKLOG        511