Mercurial > hg > nginx
comparison src/http/ngx_http_upstream_round_robin.c @ 4207:4fc91bae6f83
Better recheck of dead upstream servers.
Previously nginx used to mark backend again as live as soon as fail_timeout
passes (10s by default) since last failure. On the other hand, detecting
dead backend takes up to 60s (proxy_connect_timeout) in typical situation
"backend is down and doesn't respond to any packets". This resulted in
suboptimal behaviour in the above situation (up to 23% of requests were
directed to dead backend with default settings).
More detailed description of the problem may be found here (in Russian):
http://mailman.nginx.org/pipermail/nginx-ru/2011-August/042172.html
Fix is to only allow one request after fail_timeout passes, and
mark backend as "live" only if this request succeeds.
Note that with new code backend will not be marked "live" unless "check"
request is completed, and this may take a while in some specific workloads
(e.g. streaming). This is believed to be acceptable.
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Wed, 12 Oct 2011 14:22:48 +0000 |
parents | 9d4cbb09ae8b |
children | d620f497c50f |
comparison
equal
deleted
inserted
replaced
4206:1a94a56a4e5d | 4207:4fc91bae6f83 |
---|---|
441 || peer->fails < peer->max_fails) | 441 || peer->fails < peer->max_fails) |
442 { | 442 { |
443 break; | 443 break; |
444 } | 444 } |
445 | 445 |
446 if (now - peer->accessed > peer->fail_timeout) { | 446 if (now - peer->checked > peer->fail_timeout) { |
447 peer->fails = 0; | 447 peer->checked = now; |
448 break; | 448 break; |
449 } | 449 } |
450 | 450 |
451 peer->current_weight = 0; | 451 peer->current_weight = 0; |
452 | 452 |
489 || peer->fails < peer->max_fails) | 489 || peer->fails < peer->max_fails) |
490 { | 490 { |
491 break; | 491 break; |
492 } | 492 } |
493 | 493 |
494 if (now - peer->accessed > peer->fail_timeout) { | 494 if (now - peer->checked > peer->fail_timeout) { |
495 peer->fails = 0; | 495 peer->checked = now; |
496 break; | 496 break; |
497 } | 497 } |
498 | 498 |
499 peer->current_weight = 0; | 499 peer->current_weight = 0; |
500 | 500 |
661 if (rrp->peers->single) { | 661 if (rrp->peers->single) { |
662 pc->tries = 0; | 662 pc->tries = 0; |
663 return; | 663 return; |
664 } | 664 } |
665 | 665 |
666 peer = &rrp->peers->peer[rrp->current]; | |
667 | |
666 if (state & NGX_PEER_FAILED) { | 668 if (state & NGX_PEER_FAILED) { |
667 now = ngx_time(); | 669 now = ngx_time(); |
668 | 670 |
669 peer = &rrp->peers->peer[rrp->current]; | |
670 | |
671 /* ngx_lock_mutex(rrp->peers->mutex); */ | 671 /* ngx_lock_mutex(rrp->peers->mutex); */ |
672 | 672 |
673 peer->fails++; | 673 peer->fails++; |
674 peer->accessed = now; | 674 peer->accessed = now; |
675 peer->checked = now; | |
675 | 676 |
676 if (peer->max_fails) { | 677 if (peer->max_fails) { |
677 peer->current_weight -= peer->weight / peer->max_fails; | 678 peer->current_weight -= peer->weight / peer->max_fails; |
678 } | 679 } |
679 | 680 |
684 if (peer->current_weight < 0) { | 685 if (peer->current_weight < 0) { |
685 peer->current_weight = 0; | 686 peer->current_weight = 0; |
686 } | 687 } |
687 | 688 |
688 /* ngx_unlock_mutex(rrp->peers->mutex); */ | 689 /* ngx_unlock_mutex(rrp->peers->mutex); */ |
690 | |
691 } else { | |
692 | |
693 /* mark peer live if check passed */ | |
694 | |
695 if (peer->accessed < peer->checked) { | |
696 peer->fails = 0; | |
697 } | |
689 } | 698 } |
690 | 699 |
691 rrp->current++; | 700 rrp->current++; |
692 | 701 |
693 if (rrp->current >= rrp->peers->number) { | 702 if (rrp->current >= rrp->peers->number) { |