# HG changeset patch # User Maxim Dounin # Date 1571317339 -10800 # Node ID efd71d49bde025a9d7df4a5b48bf7ed9bd919c27 # Parent 70749256af794d615d07e2350a301bd5814e4b86 Events: available bytes calculation via ioctl(FIONREAD). This makes it possible to avoid looping for a long time while working with a fast enough peer when data are added to the socket buffer faster than we are able to read and process them (ticket #1431). This is basically what we already do on FreeBSD with kqueue, where information about the number of bytes in the socket buffer is returned by the kevent() call. With other event methods rev->available is now set to -1 when the socket is ready for reading. Later in ngx_recv() and ngx_recv_chain(), if full buffer is received, real number of bytes in the socket buffer is retrieved using ioctl(FIONREAD). Reading more than this number of bytes ensures that even with edge-triggered event methods the event will be triggered again, so it is safe to stop processing of the socket and switch to other connections. Using ioctl(FIONREAD) only after reading a full buffer is an optimization. With this approach we only call ioctl(FIONREAD) when there are at least two recv()/readv() calls. diff --git a/auto/unix b/auto/unix --- a/auto/unix +++ b/auto/unix @@ -943,6 +943,18 @@ ngx_feature_test="int i = FIONBIO; print . auto/feature +ngx_feature="ioctl(FIONREAD)" +ngx_feature_name="NGX_HAVE_FIONREAD" +ngx_feature_run=no +ngx_feature_incs="#include + #include + $NGX_INCLUDE_SYS_FILIO_H" +ngx_feature_path= +ngx_feature_libs= +ngx_feature_test="int i = FIONREAD; printf(\"%d\", i)" +. auto/feature + + ngx_feature="struct tm.tm_gmtoff" ngx_feature_name="NGX_HAVE_GMTOFF" ngx_feature_run=no diff --git a/src/event/modules/ngx_devpoll_module.c b/src/event/modules/ngx_devpoll_module.c --- a/src/event/modules/ngx_devpoll_module.c +++ b/src/event/modules/ngx_devpoll_module.c @@ -495,6 +495,7 @@ ngx_devpoll_process_events(ngx_cycle_t * if ((revents & POLLIN) && rev->active) { rev->ready = 1; + rev->available = -1; if (flags & NGX_POST_EVENTS) { queue = rev->accept ? &ngx_posted_accept_events diff --git a/src/event/modules/ngx_epoll_module.c b/src/event/modules/ngx_epoll_module.c --- a/src/event/modules/ngx_epoll_module.c +++ b/src/event/modules/ngx_epoll_module.c @@ -886,11 +886,10 @@ ngx_epoll_process_events(ngx_cycle_t *cy if (revents & EPOLLRDHUP) { rev->pending_eof = 1; } - - rev->available = 1; #endif rev->ready = 1; + rev->available = -1; if (flags & NGX_POST_EVENTS) { queue = rev->accept ? &ngx_posted_accept_events diff --git a/src/event/modules/ngx_eventport_module.c b/src/event/modules/ngx_eventport_module.c --- a/src/event/modules/ngx_eventport_module.c +++ b/src/event/modules/ngx_eventport_module.c @@ -559,6 +559,7 @@ ngx_eventport_process_events(ngx_cycle_t if (revents & POLLIN) { rev->ready = 1; + rev->available = -1; if (flags & NGX_POST_EVENTS) { queue = rev->accept ? &ngx_posted_accept_events diff --git a/src/event/modules/ngx_poll_module.c b/src/event/modules/ngx_poll_module.c --- a/src/event/modules/ngx_poll_module.c +++ b/src/event/modules/ngx_poll_module.c @@ -370,6 +370,7 @@ ngx_poll_process_events(ngx_cycle_t *cyc ev = c->read; ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events; diff --git a/src/event/modules/ngx_select_module.c b/src/event/modules/ngx_select_module.c --- a/src/event/modules/ngx_select_module.c +++ b/src/event/modules/ngx_select_module.c @@ -330,6 +330,7 @@ ngx_select_process_events(ngx_cycle_t *c if (found) { ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events; diff --git a/src/event/modules/ngx_win32_poll_module.c b/src/event/modules/ngx_win32_poll_module.c --- a/src/event/modules/ngx_win32_poll_module.c +++ b/src/event/modules/ngx_win32_poll_module.c @@ -380,6 +380,7 @@ ngx_poll_process_events(ngx_cycle_t *cyc ev = c->read; ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events; diff --git a/src/event/modules/ngx_win32_select_module.c b/src/event/modules/ngx_win32_select_module.c --- a/src/event/modules/ngx_win32_select_module.c +++ b/src/event/modules/ngx_win32_select_module.c @@ -330,6 +330,7 @@ ngx_select_process_events(ngx_cycle_t *c if (found) { ev->ready = 1; + ev->available = -1; queue = ev->accept ? &ngx_posted_accept_events : &ngx_posted_events; diff --git a/src/event/ngx_event.h b/src/event/ngx_event.h --- a/src/event/ngx_event.h +++ b/src/event/ngx_event.h @@ -91,21 +91,14 @@ struct ngx_event_s { * write: available space in buffer when event is ready * or lowat when event is set with NGX_LOWAT_EVENT flag * - * epoll with EPOLLRDHUP: - * accept: 1 if accept many, 0 otherwise - * read: 1 if there can be data to read, 0 otherwise - * * iocp: TODO * * otherwise: * accept: 1 if accept many, 0 otherwise + * read: bytes to read when event is ready, -1 if not known */ -#if (NGX_HAVE_KQUEUE) || (NGX_HAVE_IOCP) int available; -#else - unsigned available:1; -#endif ngx_event_handler_pt handler; diff --git a/src/os/unix/ngx_readv_chain.c b/src/os/unix/ngx_readv_chain.c --- a/src/os/unix/ngx_readv_chain.c +++ b/src/os/unix/ngx_readv_chain.c @@ -60,7 +60,7 @@ ngx_readv_chain(ngx_connection_t *c, ngx "readv: eof:%d, avail:%d", rev->pending_eof, rev->available); - if (!rev->available && !rev->pending_eof) { + if (rev->available == 0 && !rev->pending_eof) { return NGX_AGAIN; } } @@ -165,6 +165,40 @@ ngx_readv_chain(ngx_connection_t *c, ngx #endif +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0) { + rev->available -= n; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and readv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "readv: avail:%d", rev->available); + + } else if (n == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + n = ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + break; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "readv: avail:%d", rev->available); + } + +#endif + #if (NGX_HAVE_EPOLLRDHUP) if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) diff --git a/src/os/unix/ngx_recv.c b/src/os/unix/ngx_recv.c --- a/src/os/unix/ngx_recv.c +++ b/src/os/unix/ngx_recv.c @@ -57,7 +57,7 @@ ngx_unix_recv(ngx_connection_t *c, u_cha "recv: eof:%d, avail:%d", rev->pending_eof, rev->available); - if (!rev->available && !rev->pending_eof) { + if (rev->available == 0 && !rev->pending_eof) { rev->ready = 0; return NGX_AGAIN; } @@ -116,6 +116,40 @@ ngx_unix_recv(ngx_connection_t *c, u_cha #endif +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0) { + rev->available -= n; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and recv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "recv: avail:%d", rev->available); + + } else if ((size_t) n == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + n = ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + break; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "recv: avail:%d", rev->available); + } + +#endif + #if (NGX_HAVE_EPOLLRDHUP) if ((ngx_event_flags & NGX_USE_EPOLL_EVENT) diff --git a/src/os/unix/ngx_socket.h b/src/os/unix/ngx_socket.h --- a/src/os/unix/ngx_socket.h +++ b/src/os/unix/ngx_socket.h @@ -38,6 +38,13 @@ int ngx_blocking(ngx_socket_t s); #endif +#if (NGX_HAVE_FIONREAD) + +#define ngx_socket_nread(s, n) ioctl(s, FIONREAD, n) +#define ngx_socket_nread_n "ioctl(FIONREAD)" + +#endif + int ngx_tcp_nopush(ngx_socket_t s); int ngx_tcp_push(ngx_socket_t s); diff --git a/src/os/win32/ngx_socket.c b/src/os/win32/ngx_socket.c --- a/src/os/win32/ngx_socket.c +++ b/src/os/win32/ngx_socket.c @@ -28,6 +28,21 @@ ngx_blocking(ngx_socket_t s) int +ngx_socket_nread(ngx_socket_t s, int *n) +{ + unsigned long nread; + + if (ioctlsocket(s, FIONREAD, &nread) == -1) { + return -1; + } + + *n = nread; + + return 0; +} + + +int ngx_tcp_push(ngx_socket_t s) { return 0; diff --git a/src/os/win32/ngx_socket.h b/src/os/win32/ngx_socket.h --- a/src/os/win32/ngx_socket.h +++ b/src/os/win32/ngx_socket.h @@ -31,6 +31,9 @@ int ngx_blocking(ngx_socket_t s); #define ngx_nonblocking_n "ioctlsocket(FIONBIO)" #define ngx_blocking_n "ioctlsocket(!FIONBIO)" +int ngx_socket_nread(ngx_socket_t s, int *n); +#define ngx_socket_nread_n "ioctlsocket(FIONREAD)" + #define ngx_shutdown_socket shutdown #define ngx_shutdown_socket_n "shutdown()" diff --git a/src/os/win32/ngx_win32_config.h b/src/os/win32/ngx_win32_config.h --- a/src/os/win32/ngx_win32_config.h +++ b/src/os/win32/ngx_win32_config.h @@ -273,6 +273,10 @@ typedef int sig_atomic_t #define NGX_HAVE_SO_SNDLOWAT 0 #endif +#ifndef NGX_HAVE_FIONREAD +#define NGX_HAVE_FIONREAD 1 +#endif + #define NGX_HAVE_GETADDRINFO 1 #define ngx_random rand diff --git a/src/os/win32/ngx_wsarecv.c b/src/os/win32/ngx_wsarecv.c --- a/src/os/win32/ngx_wsarecv.c +++ b/src/os/win32/ngx_wsarecv.c @@ -51,6 +51,45 @@ ngx_wsarecv(ngx_connection_t *c, u_char return n; } +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0 && bytes > 0) { + rev->available -= bytes; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and WSARecv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + + } else if (bytes == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + n = ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + + if (n == NGX_ERROR) { + rev->error = 1; + } + + return n; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + } + +#endif + if (bytes < size) { rev->ready = 0; } diff --git a/src/os/win32/ngx_wsarecv_chain.c b/src/os/win32/ngx_wsarecv_chain.c --- a/src/os/win32/ngx_wsarecv_chain.c +++ b/src/os/win32/ngx_wsarecv_chain.c @@ -94,6 +94,41 @@ ngx_wsarecv_chain(ngx_connection_t *c, n return NGX_ERROR; } +#if (NGX_HAVE_FIONREAD) + + if (rev->available >= 0 && bytes > 0) { + rev->available -= bytes; + + /* + * negative rev->available means some additional bytes + * were received between kernel notification and WSARecv(), + * and therefore ev->ready can be safely reset even for + * edge-triggered event methods + */ + + if (rev->available < 0) { + rev->available = 0; + rev->ready = 0; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + + } else if (bytes == size) { + + if (ngx_socket_nread(c->fd, &rev->available) == -1) { + rev->error = 1; + ngx_connection_error(c, ngx_socket_errno, + ngx_socket_nread_n " failed"); + return NGX_ERROR; + } + + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0, + "WSARecv: avail:%d", rev->available); + } + +#endif + if (bytes < size) { rev->ready = 0; }