changeset 7583:efd71d49bde0

Events: available bytes calculation via ioctl(FIONREAD). This makes it possible to avoid looping for a long time while working with a fast enough peer when data are added to the socket buffer faster than we are able to read and process them (ticket #1431). This is basically what we already do on FreeBSD with kqueue, where information about the number of bytes in the socket buffer is returned by the kevent() call. With other event methods rev->available is now set to -1 when the socket is ready for reading. Later in ngx_recv() and ngx_recv_chain(), if full buffer is received, real number of bytes in the socket buffer is retrieved using ioctl(FIONREAD). Reading more than this number of bytes ensures that even with edge-triggered event methods the event will be triggered again, so it is safe to stop processing of the socket and switch to other connections. Using ioctl(FIONREAD) only after reading a full buffer is an optimization. With this approach we only call ioctl(FIONREAD) when there are at least two recv()/readv() calls.
author Maxim Dounin <mdounin@mdounin.ru>
date Thu, 17 Oct 2019 16:02:19 +0300
parents 70749256af79
children 9d2ad2fb4423
files auto/unix src/event/modules/ngx_devpoll_module.c src/event/modules/ngx_epoll_module.c src/event/modules/ngx_eventport_module.c src/event/modules/ngx_poll_module.c src/event/modules/ngx_select_module.c src/event/modules/ngx_win32_poll_module.c src/event/modules/ngx_win32_select_module.c src/event/ngx_event.h src/os/unix/ngx_readv_chain.c src/os/unix/ngx_recv.c src/os/unix/ngx_socket.h src/os/win32/ngx_socket.c src/os/win32/ngx_socket.h src/os/win32/ngx_win32_config.h src/os/win32/ngx_wsarecv.c src/os/win32/ngx_wsarecv_chain.c
diffstat 17 files changed, 193 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/auto/unix
+++ b/auto/unix
@@ -943,6 +943,18 @@ ngx_feature_test="int i = FIONBIO; print
 . auto/feature
 
 
+ngx_feature="ioctl(FIONREAD)"
+ngx_feature_name="NGX_HAVE_FIONREAD"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/ioctl.h>
+                  #include <stdio.h>
+                  $NGX_INCLUDE_SYS_FILIO_H"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="int i = FIONREAD; printf(\"%d\", i)"
+. auto/feature
+
+
 ngx_feature="struct tm.tm_gmtoff"
 ngx_feature_name="NGX_HAVE_GMTOFF"
 ngx_feature_run=no
--- a/src/event/modules/ngx_devpoll_module.c
+++ b/src/event/modules/ngx_devpoll_module.c
@@ -495,6 +495,7 @@ ngx_devpoll_process_events(ngx_cycle_t *
 
         if ((revents & POLLIN) && rev->active) {
             rev->ready = 1;
+            rev->available = -1;
 
             if (flags & NGX_POST_EVENTS) {
                 queue = rev->accept ? &ngx_posted_accept_events
--- a/src/event/modules/ngx_epoll_module.c
+++ b/src/event/modules/ngx_epoll_module.c
@@ -886,11 +886,10 @@ ngx_epoll_process_events(ngx_cycle_t *cy
             if (revents & EPOLLRDHUP) {
                 rev->pending_eof = 1;
             }
-
-            rev->available = 1;
 #endif
 
             rev->ready = 1;
+            rev->available = -1;
 
             if (flags & NGX_POST_EVENTS) {
                 queue = rev->accept ? &ngx_posted_accept_events
--- a/src/event/modules/ngx_eventport_module.c
+++ b/src/event/modules/ngx_eventport_module.c
@@ -559,6 +559,7 @@ ngx_eventport_process_events(ngx_cycle_t
 
             if (revents & POLLIN) {
                 rev->ready = 1;
+                rev->available = -1;
 
                 if (flags & NGX_POST_EVENTS) {
                     queue = rev->accept ? &ngx_posted_accept_events
--- a/src/event/modules/ngx_poll_module.c
+++ b/src/event/modules/ngx_poll_module.c
@@ -370,6 +370,7 @@ ngx_poll_process_events(ngx_cycle_t *cyc
 
             ev = c->read;
             ev->ready = 1;
+            ev->available = -1;
 
             queue = ev->accept ? &ngx_posted_accept_events
                                : &ngx_posted_events;
--- a/src/event/modules/ngx_select_module.c
+++ b/src/event/modules/ngx_select_module.c
@@ -330,6 +330,7 @@ ngx_select_process_events(ngx_cycle_t *c
 
         if (found) {
             ev->ready = 1;
+            ev->available = -1;
 
             queue = ev->accept ? &ngx_posted_accept_events
                                : &ngx_posted_events;
--- a/src/event/modules/ngx_win32_poll_module.c
+++ b/src/event/modules/ngx_win32_poll_module.c
@@ -380,6 +380,7 @@ ngx_poll_process_events(ngx_cycle_t *cyc
 
             ev = c->read;
             ev->ready = 1;
+            ev->available = -1;
 
             queue = ev->accept ? &ngx_posted_accept_events
                                : &ngx_posted_events;
--- a/src/event/modules/ngx_win32_select_module.c
+++ b/src/event/modules/ngx_win32_select_module.c
@@ -330,6 +330,7 @@ ngx_select_process_events(ngx_cycle_t *c
 
         if (found) {
             ev->ready = 1;
+            ev->available = -1;
 
             queue = ev->accept ? &ngx_posted_accept_events
                                : &ngx_posted_events;
--- a/src/event/ngx_event.h
+++ b/src/event/ngx_event.h
@@ -91,21 +91,14 @@ struct ngx_event_s {
      *   write:      available space in buffer when event is ready
      *               or lowat when event is set with NGX_LOWAT_EVENT flag
      *
-     * epoll with EPOLLRDHUP:
-     *   accept:     1 if accept many, 0 otherwise
-     *   read:       1 if there can be data to read, 0 otherwise
-     *
      * iocp: TODO
      *
      * otherwise:
      *   accept:     1 if accept many, 0 otherwise
+     *   read:       bytes to read when event is ready, -1 if not known
      */
 
-#if (NGX_HAVE_KQUEUE) || (NGX_HAVE_IOCP)
     int              available;
-#else
-    unsigned         available:1;
-#endif
 
     ngx_event_handler_pt  handler;
 
--- a/src/os/unix/ngx_readv_chain.c
+++ b/src/os/unix/ngx_readv_chain.c
@@ -60,7 +60,7 @@ ngx_readv_chain(ngx_connection_t *c, ngx
                        "readv: eof:%d, avail:%d",
                        rev->pending_eof, rev->available);
 
-        if (!rev->available && !rev->pending_eof) {
+        if (rev->available == 0 && !rev->pending_eof) {
             return NGX_AGAIN;
         }
     }
@@ -165,6 +165,40 @@ ngx_readv_chain(ngx_connection_t *c, ngx
 
 #endif
 
+#if (NGX_HAVE_FIONREAD)
+
+            if (rev->available >= 0) {
+                rev->available -= n;
+
+                /*
+                 * negative rev->available means some additional bytes
+                 * were received between kernel notification and readv(),
+                 * and therefore ev->ready can be safely reset even for
+                 * edge-triggered event methods
+                 */
+
+                if (rev->available < 0) {
+                    rev->available = 0;
+                    rev->ready = 0;
+                }
+
+                ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                               "readv: avail:%d", rev->available);
+
+            } else if (n == size) {
+
+                if (ngx_socket_nread(c->fd, &rev->available) == -1) {
+                    n = ngx_connection_error(c, ngx_socket_errno,
+                                             ngx_socket_nread_n " failed");
+                    break;
+                }
+
+                ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                               "readv: avail:%d", rev->available);
+            }
+
+#endif
+
 #if (NGX_HAVE_EPOLLRDHUP)
 
             if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
--- a/src/os/unix/ngx_recv.c
+++ b/src/os/unix/ngx_recv.c
@@ -57,7 +57,7 @@ ngx_unix_recv(ngx_connection_t *c, u_cha
                        "recv: eof:%d, avail:%d",
                        rev->pending_eof, rev->available);
 
-        if (!rev->available && !rev->pending_eof) {
+        if (rev->available == 0 && !rev->pending_eof) {
             rev->ready = 0;
             return NGX_AGAIN;
         }
@@ -116,6 +116,40 @@ ngx_unix_recv(ngx_connection_t *c, u_cha
 
 #endif
 
+#if (NGX_HAVE_FIONREAD)
+
+            if (rev->available >= 0) {
+                rev->available -= n;
+
+                /*
+                 * negative rev->available means some additional bytes
+                 * were received between kernel notification and recv(),
+                 * and therefore ev->ready can be safely reset even for
+                 * edge-triggered event methods
+                 */
+
+                if (rev->available < 0) {
+                    rev->available = 0;
+                    rev->ready = 0;
+                }
+
+                ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                               "recv: avail:%d", rev->available);
+
+            } else if ((size_t) n == size) {
+
+                if (ngx_socket_nread(c->fd, &rev->available) == -1) {
+                    n = ngx_connection_error(c, ngx_socket_errno,
+                                             ngx_socket_nread_n " failed");
+                    break;
+                }
+
+                ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                               "recv: avail:%d", rev->available);
+            }
+
+#endif
+
 #if (NGX_HAVE_EPOLLRDHUP)
 
             if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
--- a/src/os/unix/ngx_socket.h
+++ b/src/os/unix/ngx_socket.h
@@ -38,6 +38,13 @@ int ngx_blocking(ngx_socket_t s);
 
 #endif
 
+#if (NGX_HAVE_FIONREAD)
+
+#define ngx_socket_nread(s, n)  ioctl(s, FIONREAD, n)
+#define ngx_socket_nread_n      "ioctl(FIONREAD)"
+
+#endif
+
 int ngx_tcp_nopush(ngx_socket_t s);
 int ngx_tcp_push(ngx_socket_t s);
 
--- a/src/os/win32/ngx_socket.c
+++ b/src/os/win32/ngx_socket.c
@@ -28,6 +28,21 @@ ngx_blocking(ngx_socket_t s)
 
 
 int
+ngx_socket_nread(ngx_socket_t s, int *n)
+{
+    unsigned long  nread;
+
+    if (ioctlsocket(s, FIONREAD, &nread) == -1) {
+        return -1;
+    }
+
+    *n = nread;
+
+    return 0;
+}
+
+
+int
 ngx_tcp_push(ngx_socket_t s)
 {
     return 0;
--- a/src/os/win32/ngx_socket.h
+++ b/src/os/win32/ngx_socket.h
@@ -31,6 +31,9 @@ int ngx_blocking(ngx_socket_t s);
 #define ngx_nonblocking_n   "ioctlsocket(FIONBIO)"
 #define ngx_blocking_n      "ioctlsocket(!FIONBIO)"
 
+int ngx_socket_nread(ngx_socket_t s, int *n);
+#define ngx_socket_nread_n  "ioctlsocket(FIONREAD)"
+
 #define ngx_shutdown_socket    shutdown
 #define ngx_shutdown_socket_n  "shutdown()"
 
--- a/src/os/win32/ngx_win32_config.h
+++ b/src/os/win32/ngx_win32_config.h
@@ -273,6 +273,10 @@ typedef int                 sig_atomic_t
 #define NGX_HAVE_SO_SNDLOWAT         0
 #endif
 
+#ifndef NGX_HAVE_FIONREAD
+#define NGX_HAVE_FIONREAD            1
+#endif
+
 #define NGX_HAVE_GETADDRINFO         1
 
 #define ngx_random               rand
--- a/src/os/win32/ngx_wsarecv.c
+++ b/src/os/win32/ngx_wsarecv.c
@@ -51,6 +51,45 @@ ngx_wsarecv(ngx_connection_t *c, u_char 
         return n;
     }
 
+#if (NGX_HAVE_FIONREAD)
+
+    if (rev->available >= 0 && bytes > 0) {
+        rev->available -= bytes;
+
+        /*
+         * negative rev->available means some additional bytes
+         * were received between kernel notification and WSARecv(),
+         * and therefore ev->ready can be safely reset even for
+         * edge-triggered event methods
+         */
+
+        if (rev->available < 0) {
+            rev->available = 0;
+            rev->ready = 0;
+        }
+
+        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "WSARecv: avail:%d", rev->available);
+
+    } else if (bytes == size) {
+
+        if (ngx_socket_nread(c->fd, &rev->available) == -1) {
+            n = ngx_connection_error(c, ngx_socket_errno,
+                                     ngx_socket_nread_n " failed");
+
+            if (n == NGX_ERROR) {
+                rev->error = 1;
+            }
+
+            return n;
+        }
+
+        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "WSARecv: avail:%d", rev->available);
+    }
+
+#endif
+
     if (bytes < size) {
         rev->ready = 0;
     }
--- a/src/os/win32/ngx_wsarecv_chain.c
+++ b/src/os/win32/ngx_wsarecv_chain.c
@@ -94,6 +94,41 @@ ngx_wsarecv_chain(ngx_connection_t *c, n
         return NGX_ERROR;
     }
 
+#if (NGX_HAVE_FIONREAD)
+
+    if (rev->available >= 0 && bytes > 0) {
+        rev->available -= bytes;
+
+        /*
+         * negative rev->available means some additional bytes
+         * were received between kernel notification and WSARecv(),
+         * and therefore ev->ready can be safely reset even for
+         * edge-triggered event methods
+         */
+
+        if (rev->available < 0) {
+            rev->available = 0;
+            rev->ready = 0;
+        }
+
+        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "WSARecv: avail:%d", rev->available);
+
+    } else if (bytes == size) {
+
+        if (ngx_socket_nread(c->fd, &rev->available) == -1) {
+            rev->error = 1;
+            ngx_connection_error(c, ngx_socket_errno,
+                                 ngx_socket_nread_n " failed");
+            return NGX_ERROR;
+        }
+
+        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "WSARecv: avail:%d", rev->available);
+    }
+
+#endif
+
     if (bytes < size) {
         rev->ready = 0;
     }