Mercurial > hg > nginx
view src/http/ngx_http_upstream_round_robin.c @ 6536:f7849bfb6d21
Improved EPOLLRDHUP handling.
When it's known that the kernel supports EPOLLRDHUP, there is no need in
additional recv() call to get EOF or error when the flag is absent in the
event generated by the kernel. A special runtime test is done at startup
to detect if EPOLLRDHUP is actually supported by the kernel because
epoll_ctl() silently ignores unknown flags.
With this knowledge it's now possible to drop the "ready" flag for partial
read. Previously, the "ready" flag was kept until the recv() returned EOF
or error. In particular, this change allows the lingering close heuristics
(which relies on the "ready" flag state) to actually work on Linux, and not
wait for more data in most cases.
The "available" flag is now used in the read event with the semantics similar
to the corresponding counter in kqueue.
author | Valentin Bartenev <vbart@nginx.com> |
---|---|
date | Fri, 13 May 2016 17:19:23 +0300 |
parents | 2cd019520210 |
children | b3b7e33083ac |
line wrap: on
line source
/* * Copyright (C) Igor Sysoev * Copyright (C) Nginx, Inc. */ #include <ngx_config.h> #include <ngx_core.h> #include <ngx_http.h> #define ngx_http_upstream_tries(p) ((p)->number \ + ((p)->next ? (p)->next->number : 0)) static ngx_http_upstream_rr_peer_t *ngx_http_upstream_get_peer( ngx_http_upstream_rr_peer_data_t *rrp); #if (NGX_HTTP_SSL) static ngx_int_t ngx_http_upstream_empty_set_session(ngx_peer_connection_t *pc, void *data); static void ngx_http_upstream_empty_save_session(ngx_peer_connection_t *pc, void *data); #endif ngx_int_t ngx_http_upstream_init_round_robin(ngx_conf_t *cf, ngx_http_upstream_srv_conf_t *us) { ngx_url_t u; ngx_uint_t i, j, n, w; ngx_http_upstream_server_t *server; ngx_http_upstream_rr_peer_t *peer, **peerp; ngx_http_upstream_rr_peers_t *peers, *backup; us->peer.init = ngx_http_upstream_init_round_robin_peer; if (us->servers) { server = us->servers->elts; n = 0; w = 0; for (i = 0; i < us->servers->nelts; i++) { if (server[i].backup) { continue; } n += server[i].naddrs; w += server[i].naddrs * server[i].weight; } if (n == 0) { ngx_log_error(NGX_LOG_EMERG, cf->log, 0, "no servers in upstream \"%V\" in %s:%ui", &us->host, us->file_name, us->line); return NGX_ERROR; } peers = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peers_t)); if (peers == NULL) { return NGX_ERROR; } peer = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peer_t) * n); if (peer == NULL) { return NGX_ERROR; } peers->single = (n == 1); peers->number = n; peers->weighted = (w != n); peers->total_weight = w; peers->name = &us->host; n = 0; peerp = &peers->peer; for (i = 0; i < us->servers->nelts; i++) { if (server[i].backup) { continue; } for (j = 0; j < server[i].naddrs; j++) { peer[n].sockaddr = server[i].addrs[j].sockaddr; peer[n].socklen = server[i].addrs[j].socklen; peer[n].name = server[i].addrs[j].name; peer[n].weight = server[i].weight; peer[n].effective_weight = server[i].weight; peer[n].current_weight = 0; peer[n].max_fails = server[i].max_fails; peer[n].fail_timeout = server[i].fail_timeout; peer[n].down = server[i].down; peer[n].server = server[i].name; *peerp = &peer[n]; peerp = &peer[n].next; n++; } } us->peer.data = peers; /* backup servers */ n = 0; w = 0; for (i = 0; i < us->servers->nelts; i++) { if (!server[i].backup) { continue; } n += server[i].naddrs; w += server[i].naddrs * server[i].weight; } if (n == 0) { return NGX_OK; } backup = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peers_t)); if (backup == NULL) { return NGX_ERROR; } peer = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peer_t) * n); if (peer == NULL) { return NGX_ERROR; } peers->single = 0; backup->single = 0; backup->number = n; backup->weighted = (w != n); backup->total_weight = w; backup->name = &us->host; n = 0; peerp = &backup->peer; for (i = 0; i < us->servers->nelts; i++) { if (!server[i].backup) { continue; } for (j = 0; j < server[i].naddrs; j++) { peer[n].sockaddr = server[i].addrs[j].sockaddr; peer[n].socklen = server[i].addrs[j].socklen; peer[n].name = server[i].addrs[j].name; peer[n].weight = server[i].weight; peer[n].effective_weight = server[i].weight; peer[n].current_weight = 0; peer[n].max_fails = server[i].max_fails; peer[n].fail_timeout = server[i].fail_timeout; peer[n].down = server[i].down; peer[n].server = server[i].name; *peerp = &peer[n]; peerp = &peer[n].next; n++; } } peers->next = backup; return NGX_OK; } /* an upstream implicitly defined by proxy_pass, etc. */ if (us->port == 0) { ngx_log_error(NGX_LOG_EMERG, cf->log, 0, "no port in upstream \"%V\" in %s:%ui", &us->host, us->file_name, us->line); return NGX_ERROR; } ngx_memzero(&u, sizeof(ngx_url_t)); u.host = us->host; u.port = us->port; if (ngx_inet_resolve_host(cf->pool, &u) != NGX_OK) { if (u.err) { ngx_log_error(NGX_LOG_EMERG, cf->log, 0, "%s in upstream \"%V\" in %s:%ui", u.err, &us->host, us->file_name, us->line); } return NGX_ERROR; } n = u.naddrs; peers = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peers_t)); if (peers == NULL) { return NGX_ERROR; } peer = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peer_t) * n); if (peer == NULL) { return NGX_ERROR; } peers->single = (n == 1); peers->number = n; peers->weighted = 0; peers->total_weight = n; peers->name = &us->host; peerp = &peers->peer; for (i = 0; i < u.naddrs; i++) { peer[i].sockaddr = u.addrs[i].sockaddr; peer[i].socklen = u.addrs[i].socklen; peer[i].name = u.addrs[i].name; peer[i].weight = 1; peer[i].effective_weight = 1; peer[i].current_weight = 0; peer[i].max_fails = 1; peer[i].fail_timeout = 10; *peerp = &peer[i]; peerp = &peer[i].next; } us->peer.data = peers; /* implicitly defined upstream has no backup servers */ return NGX_OK; } ngx_int_t ngx_http_upstream_init_round_robin_peer(ngx_http_request_t *r, ngx_http_upstream_srv_conf_t *us) { ngx_uint_t n; ngx_http_upstream_rr_peer_data_t *rrp; rrp = r->upstream->peer.data; if (rrp == NULL) { rrp = ngx_palloc(r->pool, sizeof(ngx_http_upstream_rr_peer_data_t)); if (rrp == NULL) { return NGX_ERROR; } r->upstream->peer.data = rrp; } rrp->peers = us->peer.data; rrp->current = NULL; n = rrp->peers->number; if (rrp->peers->next && rrp->peers->next->number > n) { n = rrp->peers->next->number; } if (n <= 8 * sizeof(uintptr_t)) { rrp->tried = &rrp->data; rrp->data = 0; } else { n = (n + (8 * sizeof(uintptr_t) - 1)) / (8 * sizeof(uintptr_t)); rrp->tried = ngx_pcalloc(r->pool, n * sizeof(uintptr_t)); if (rrp->tried == NULL) { return NGX_ERROR; } } r->upstream->peer.get = ngx_http_upstream_get_round_robin_peer; r->upstream->peer.free = ngx_http_upstream_free_round_robin_peer; r->upstream->peer.tries = ngx_http_upstream_tries(rrp->peers); #if (NGX_HTTP_SSL) r->upstream->peer.set_session = ngx_http_upstream_set_round_robin_peer_session; r->upstream->peer.save_session = ngx_http_upstream_save_round_robin_peer_session; #endif return NGX_OK; } ngx_int_t ngx_http_upstream_create_round_robin_peer(ngx_http_request_t *r, ngx_http_upstream_resolved_t *ur) { u_char *p; size_t len; socklen_t socklen; ngx_uint_t i, n; struct sockaddr *sockaddr; ngx_http_upstream_rr_peer_t *peer, **peerp; ngx_http_upstream_rr_peers_t *peers; ngx_http_upstream_rr_peer_data_t *rrp; rrp = r->upstream->peer.data; if (rrp == NULL) { rrp = ngx_palloc(r->pool, sizeof(ngx_http_upstream_rr_peer_data_t)); if (rrp == NULL) { return NGX_ERROR; } r->upstream->peer.data = rrp; } peers = ngx_pcalloc(r->pool, sizeof(ngx_http_upstream_rr_peers_t)); if (peers == NULL) { return NGX_ERROR; } peer = ngx_pcalloc(r->pool, sizeof(ngx_http_upstream_rr_peer_t) * ur->naddrs); if (peer == NULL) { return NGX_ERROR; } peers->single = (ur->naddrs == 1); peers->number = ur->naddrs; peers->name = &ur->host; if (ur->sockaddr) { peer[0].sockaddr = ur->sockaddr; peer[0].socklen = ur->socklen; peer[0].name = ur->host; peer[0].weight = 1; peer[0].effective_weight = 1; peer[0].current_weight = 0; peer[0].max_fails = 1; peer[0].fail_timeout = 10; peers->peer = peer; } else { peerp = &peers->peer; for (i = 0; i < ur->naddrs; i++) { socklen = ur->addrs[i].socklen; sockaddr = ngx_palloc(r->pool, socklen); if (sockaddr == NULL) { return NGX_ERROR; } ngx_memcpy(sockaddr, ur->addrs[i].sockaddr, socklen); switch (sockaddr->sa_family) { #if (NGX_HAVE_INET6) case AF_INET6: ((struct sockaddr_in6 *) sockaddr)->sin6_port = htons(ur->port); break; #endif default: /* AF_INET */ ((struct sockaddr_in *) sockaddr)->sin_port = htons(ur->port); } p = ngx_pnalloc(r->pool, NGX_SOCKADDR_STRLEN); if (p == NULL) { return NGX_ERROR; } len = ngx_sock_ntop(sockaddr, socklen, p, NGX_SOCKADDR_STRLEN, 1); peer[i].sockaddr = sockaddr; peer[i].socklen = socklen; peer[i].name.len = len; peer[i].name.data = p; peer[i].weight = 1; peer[i].effective_weight = 1; peer[i].current_weight = 0; peer[i].max_fails = 1; peer[i].fail_timeout = 10; *peerp = &peer[i]; peerp = &peer[i].next; } } rrp->peers = peers; rrp->current = NULL; if (rrp->peers->number <= 8 * sizeof(uintptr_t)) { rrp->tried = &rrp->data; rrp->data = 0; } else { n = (rrp->peers->number + (8 * sizeof(uintptr_t) - 1)) / (8 * sizeof(uintptr_t)); rrp->tried = ngx_pcalloc(r->pool, n * sizeof(uintptr_t)); if (rrp->tried == NULL) { return NGX_ERROR; } } r->upstream->peer.get = ngx_http_upstream_get_round_robin_peer; r->upstream->peer.free = ngx_http_upstream_free_round_robin_peer; r->upstream->peer.tries = ngx_http_upstream_tries(rrp->peers); #if (NGX_HTTP_SSL) r->upstream->peer.set_session = ngx_http_upstream_empty_set_session; r->upstream->peer.save_session = ngx_http_upstream_empty_save_session; #endif return NGX_OK; } ngx_int_t ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data) { ngx_http_upstream_rr_peer_data_t *rrp = data; ngx_int_t rc; ngx_uint_t i, n; ngx_http_upstream_rr_peer_t *peer; ngx_http_upstream_rr_peers_t *peers; ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, "get rr peer, try: %ui", pc->tries); pc->cached = 0; pc->connection = NULL; peers = rrp->peers; ngx_http_upstream_rr_peers_wlock(peers); if (peers->single) { peer = peers->peer; if (peer->down) { goto failed; } rrp->current = peer; } else { /* there are several peers */ peer = ngx_http_upstream_get_peer(rrp); if (peer == NULL) { goto failed; } ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, "get rr peer, current: %p %i", peer, peer->current_weight); } pc->sockaddr = peer->sockaddr; pc->socklen = peer->socklen; pc->name = &peer->name; peer->conns++; ngx_http_upstream_rr_peers_unlock(peers); return NGX_OK; failed: if (peers->next) { ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pc->log, 0, "backup servers"); rrp->peers = peers->next; n = (rrp->peers->number + (8 * sizeof(uintptr_t) - 1)) / (8 * sizeof(uintptr_t)); for (i = 0; i < n; i++) { rrp->tried[i] = 0; } ngx_http_upstream_rr_peers_unlock(peers); rc = ngx_http_upstream_get_round_robin_peer(pc, rrp); if (rc != NGX_BUSY) { return rc; } ngx_http_upstream_rr_peers_wlock(peers); } /* all peers failed, mark them as live for quick recovery */ for (peer = peers->peer; peer; peer = peer->next) { peer->fails = 0; } ngx_http_upstream_rr_peers_unlock(peers); pc->name = peers->name; return NGX_BUSY; } static ngx_http_upstream_rr_peer_t * ngx_http_upstream_get_peer(ngx_http_upstream_rr_peer_data_t *rrp) { time_t now; uintptr_t m; ngx_int_t total; ngx_uint_t i, n, p; ngx_http_upstream_rr_peer_t *peer, *best; now = ngx_time(); best = NULL; total = 0; #if (NGX_SUPPRESS_WARN) p = 0; #endif for (peer = rrp->peers->peer, i = 0; peer; peer = peer->next, i++) { n = i / (8 * sizeof(uintptr_t)); m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t)); if (rrp->tried[n] & m) { continue; } if (peer->down) { continue; } if (peer->max_fails && peer->fails >= peer->max_fails && now - peer->checked <= peer->fail_timeout) { continue; } peer->current_weight += peer->effective_weight; total += peer->effective_weight; if (peer->effective_weight < peer->weight) { peer->effective_weight++; } if (best == NULL || peer->current_weight > best->current_weight) { best = peer; p = i; } } if (best == NULL) { return NULL; } rrp->current = best; n = p / (8 * sizeof(uintptr_t)); m = (uintptr_t) 1 << p % (8 * sizeof(uintptr_t)); rrp->tried[n] |= m; best->current_weight -= total; if (now - best->checked > best->fail_timeout) { best->checked = now; } return best; } void ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data, ngx_uint_t state) { ngx_http_upstream_rr_peer_data_t *rrp = data; time_t now; ngx_http_upstream_rr_peer_t *peer; ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, "free rr peer %ui %ui", pc->tries, state); /* TODO: NGX_PEER_KEEPALIVE */ peer = rrp->current; ngx_http_upstream_rr_peers_rlock(rrp->peers); ngx_http_upstream_rr_peer_lock(rrp->peers, peer); if (rrp->peers->single) { peer->conns--; ngx_http_upstream_rr_peer_unlock(rrp->peers, peer); ngx_http_upstream_rr_peers_unlock(rrp->peers); pc->tries = 0; return; } if (state & NGX_PEER_FAILED) { now = ngx_time(); peer->fails++; peer->accessed = now; peer->checked = now; if (peer->max_fails) { peer->effective_weight -= peer->weight / peer->max_fails; if (peer->fails >= peer->max_fails) { ngx_log_error(NGX_LOG_WARN, pc->log, 0, "upstream server temporarily disabled"); } } ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, "free rr peer failed: %p %i", peer, peer->effective_weight); if (peer->effective_weight < 0) { peer->effective_weight = 0; } } else { /* mark peer live if check passed */ if (peer->accessed < peer->checked) { peer->fails = 0; } } peer->conns--; ngx_http_upstream_rr_peer_unlock(rrp->peers, peer); ngx_http_upstream_rr_peers_unlock(rrp->peers); if (pc->tries) { pc->tries--; } } #if (NGX_HTTP_SSL) ngx_int_t ngx_http_upstream_set_round_robin_peer_session(ngx_peer_connection_t *pc, void *data) { ngx_http_upstream_rr_peer_data_t *rrp = data; ngx_int_t rc; ngx_ssl_session_t *ssl_session; ngx_http_upstream_rr_peer_t *peer; #if (NGX_HTTP_UPSTREAM_ZONE) int len; #if OPENSSL_VERSION_NUMBER >= 0x0090707fL const #endif u_char *p; ngx_http_upstream_rr_peers_t *peers; u_char buf[NGX_SSL_MAX_SESSION_SIZE]; #endif peer = rrp->current; #if (NGX_HTTP_UPSTREAM_ZONE) peers = rrp->peers; if (peers->shpool) { ngx_http_upstream_rr_peers_rlock(peers); ngx_http_upstream_rr_peer_lock(peers, peer); if (peer->ssl_session == NULL) { ngx_http_upstream_rr_peer_unlock(peers, peer); ngx_http_upstream_rr_peers_unlock(peers); return NGX_OK; } len = peer->ssl_session_len; ngx_memcpy(buf, peer->ssl_session, len); ngx_http_upstream_rr_peer_unlock(peers, peer); ngx_http_upstream_rr_peers_unlock(peers); p = buf; ssl_session = d2i_SSL_SESSION(NULL, &p, len); rc = ngx_ssl_set_session(pc->connection, ssl_session); ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, "set session: %p", ssl_session); ngx_ssl_free_session(ssl_session); return rc; } #endif ssl_session = peer->ssl_session; rc = ngx_ssl_set_session(pc->connection, ssl_session); ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, "set session: %p", ssl_session); return rc; } void ngx_http_upstream_save_round_robin_peer_session(ngx_peer_connection_t *pc, void *data) { ngx_http_upstream_rr_peer_data_t *rrp = data; ngx_ssl_session_t *old_ssl_session, *ssl_session; ngx_http_upstream_rr_peer_t *peer; #if (NGX_HTTP_UPSTREAM_ZONE) int len; u_char *p; ngx_http_upstream_rr_peers_t *peers; u_char buf[NGX_SSL_MAX_SESSION_SIZE]; #endif #if (NGX_HTTP_UPSTREAM_ZONE) peers = rrp->peers; if (peers->shpool) { ssl_session = SSL_get0_session(pc->connection->ssl->connection); if (ssl_session == NULL) { return; } ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, "save session: %p", ssl_session); len = i2d_SSL_SESSION(ssl_session, NULL); /* do not cache too big session */ if (len > NGX_SSL_MAX_SESSION_SIZE) { return; } p = buf; (void) i2d_SSL_SESSION(ssl_session, &p); peer = rrp->current; ngx_http_upstream_rr_peers_rlock(peers); ngx_http_upstream_rr_peer_lock(peers, peer); if (len > peer->ssl_session_len) { ngx_shmtx_lock(&peers->shpool->mutex); if (peer->ssl_session) { ngx_slab_free_locked(peers->shpool, peer->ssl_session); } peer->ssl_session = ngx_slab_alloc_locked(peers->shpool, len); ngx_shmtx_unlock(&peers->shpool->mutex); if (peer->ssl_session == NULL) { peer->ssl_session_len = 0; ngx_http_upstream_rr_peer_unlock(peers, peer); ngx_http_upstream_rr_peers_unlock(peers); return; } peer->ssl_session_len = len; } ngx_memcpy(peer->ssl_session, buf, len); ngx_http_upstream_rr_peer_unlock(peers, peer); ngx_http_upstream_rr_peers_unlock(peers); return; } #endif ssl_session = ngx_ssl_get_session(pc->connection); if (ssl_session == NULL) { return; } ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, "save session: %p", ssl_session); peer = rrp->current; old_ssl_session = peer->ssl_session; peer->ssl_session = ssl_session; if (old_ssl_session) { ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, "old session: %p", old_ssl_session); /* TODO: may block */ ngx_ssl_free_session(old_ssl_session); } } static ngx_int_t ngx_http_upstream_empty_set_session(ngx_peer_connection_t *pc, void *data) { return NGX_OK; } static void ngx_http_upstream_empty_save_session(ngx_peer_connection_t *pc, void *data) { return; } #endif