changeset 6635:6acaa638fa07

Events: support for EPOLLEXCLUSIVE. This flag appeared in Linux 4.5 and is useful for avoiding thundering herd problem. The current Linux kernel implementation walks the list of exclusive waiters, and queues an event to each epfd, until it finds the first waiter that has threads blocked on it via epoll_wait().
author Valentin Bartenev <vbart@nginx.com>
date Fri, 15 Jul 2016 15:18:57 +0300
parents 18f6120e3b7a
children ea284434db0f
files auto/modules auto/os/linux src/event/modules/ngx_epoll_module.c src/event/ngx_event.c src/event/ngx_event.h
diffstat 5 files changed, 59 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/auto/modules
+++ b/auto/modules
@@ -43,6 +43,7 @@ fi
 if [ $NGX_TEST_BUILD_EPOLL = YES ]; then
     have=NGX_HAVE_EPOLL . auto/have
     have=NGX_HAVE_EPOLLRDHUP . auto/have
+    have=NGX_HAVE_EPOLLEXCLUSIVE . auto/have
     have=NGX_HAVE_EVENTFD . auto/have
     have=NGX_TEST_BUILD_EPOLL . auto/have
     EVENT_MODULES="$EVENT_MODULES $EPOLL_MODULE"
--- a/auto/os/linux
+++ b/auto/os/linux
@@ -70,6 +70,22 @@ if [ $ngx_found = yes ]; then
                       ee.data.ptr = NULL;
                       epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ee)"
     . auto/feature
+
+
+    # EPOLLEXCLUSIVE appeared in Linux 4.5, glibc 2.24
+
+    ngx_feature="EPOLLEXCLUSIVE"
+    ngx_feature_name="NGX_HAVE_EPOLLEXCLUSIVE"
+    ngx_feature_run=no
+    ngx_feature_incs="#include <sys/epoll.h>"
+    ngx_feature_path=
+    ngx_feature_libs=
+    ngx_feature_test="int efd = 0, fd = 0;
+                      struct epoll_event ee;
+                      ee.events = EPOLLIN|EPOLLEXCLUSIVE;
+                      ee.data.ptr = NULL;
+                      epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ee)"
+    . auto/feature
 fi
 
 
--- a/src/event/modules/ngx_epoll_module.c
+++ b/src/event/modules/ngx_epoll_module.c
@@ -27,6 +27,7 @@
 
 #define EPOLLRDHUP     0x2000
 
+#define EPOLLEXCLUSIVE 0x10000000
 #define EPOLLONESHOT   0x40000000
 #define EPOLLET        0x80000000
 
@@ -610,6 +611,12 @@ ngx_epoll_add_event(ngx_event_t *ev, ngx
         op = EPOLL_CTL_ADD;
     }
 
+#if (NGX_HAVE_EPOLLEXCLUSIVE && NGX_HAVE_EPOLLRDHUP)
+    if (flags & NGX_EXCLUSIVE_EVENT) {
+        events &= ~EPOLLRDHUP;
+    }
+#endif
+
     ee.events = events | (uint32_t) flags;
     ee.data.ptr = (void *) ((uintptr_t) c | ev->instance);
 
--- a/src/event/ngx_event.c
+++ b/src/event/ngx_event.c
@@ -822,15 +822,38 @@ ngx_event_process_init(ngx_cycle_t *cycl
         rev->handler = (c->type == SOCK_STREAM) ? ngx_event_accept
                                                 : ngx_event_recvmsg;
 
-        if (ngx_use_accept_mutex
 #if (NGX_HAVE_REUSEPORT)
-            && !ls[i].reuseport
+
+        if (ls[i].reuseport) {
+            if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
+                return NGX_ERROR;
+            }
+
+            continue;
+        }
+
 #endif
-           )
-        {
+
+        if (ngx_use_accept_mutex) {
             continue;
         }
 
+#if (NGX_HAVE_EPOLLEXCLUSIVE)
+
+        if ((ngx_event_flags & NGX_USE_EPOLL_EVENT)
+            && ccf->worker_processes > 1)
+        {
+            if (ngx_add_event(rev, NGX_READ_EVENT, NGX_EXCLUSIVE_EVENT)
+                == NGX_ERROR)
+            {
+                return NGX_ERROR;
+            }
+
+            continue;
+        }
+
+#endif
+
         if (ngx_add_event(rev, NGX_READ_EVENT, 0) == NGX_ERROR) {
             return NGX_ERROR;
         }
--- a/src/event/ngx_event.h
+++ b/src/event/ngx_event.h
@@ -367,6 +367,9 @@ extern ngx_uint_t            ngx_use_epo
 #define NGX_ONESHOT_EVENT  EPOLLONESHOT
 #endif
 
+#if (NGX_HAVE_EPOLLEXCLUSIVE)
+#define NGX_EXCLUSIVE_EVENT  EPOLLEXCLUSIVE
+#endif
 
 #elif (NGX_HAVE_POLL)
 
@@ -395,6 +398,11 @@ extern ngx_uint_t            ngx_use_epo
 #endif
 
 
+#if (NGX_TEST_BUILD_EPOLL)
+#define NGX_EXCLUSIVE_EVENT  0
+#endif
+
+
 #ifndef NGX_CLEAR_EVENT
 #define NGX_CLEAR_EVENT    0    /* dummy declaration */
 #endif