Mercurial > hg > nginx
comparison src/http/ngx_http_upstream_round_robin.c @ 4667:d05ab8793a69 stable-1.2
Merge of r4622, r4623: balancing changes.
*) Upstream: smooth weighted round-robin balancing.
For edge case weights like { 5, 1, 1 } we now produce { a, a, b, a, c, a, a }
sequence instead of { c, b, a, a, a, a, a } produced previously.
Algorithm is as follows: on each peer selection we increase current_weight
of each eligible peer by its weight, select peer with greatest current_weight
and reduce its current_weight by total number of weight points distributed
among peers.
In case of { 5, 1, 1 } weights this gives the following sequence of
current_weight's:
a b c
0 0 0 (initial state)
5 1 1 (a selected)
-2 1 1
3 2 2 (a selected)
-4 2 2
1 3 3 (b selected)
1 -4 3
6 -3 4 (a selected)
-1 -3 4
4 -2 5 (c selected)
4 -2 -2
9 -1 -1 (a selected)
2 -1 -1
7 0 0 (a selected)
0 0 0
To preserve weight reduction in case of failures the effective_weight
variable was introduced, which usually matches peer's weight, but is
reduced temporarily on peer failures.
This change also fixes loop with backup servers and proxy_next_upstream
http_404 (ticket #47), and skipping alive upstreams in some cases if there
are multiple dead ones (ticket #64).
*) Upstream: fixed ip_hash rebalancing with the "down" flag.
Due to weight being set to 0 for down peers, order of peers after sorting
wasn't the same as without the "down" flag (with down peers at the end),
resulting in client rebalancing for clients on other servers. The only
rebalancing which should happen after adding "down" to a server is one
for clients on the server.
The problem was introduced in r1377 (which fixed endless loop by setting
weight to 0 for down servers). The loop is no longer possible with new
smooth algorithm, so preserving original weight is safe.
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Mon, 04 Jun 2012 11:21:58 +0000 |
parents | 1db899642518 |
children | 0141b4aec0e4 |
comparison
equal
deleted
inserted
replaced
4666:0bb016b1fd2d | 4667:d05ab8793a69 |
---|---|
10 #include <ngx_http.h> | 10 #include <ngx_http.h> |
11 | 11 |
12 | 12 |
13 static ngx_int_t ngx_http_upstream_cmp_servers(const void *one, | 13 static ngx_int_t ngx_http_upstream_cmp_servers(const void *one, |
14 const void *two); | 14 const void *two); |
15 static ngx_uint_t | 15 static ngx_http_upstream_rr_peer_t *ngx_http_upstream_get_peer( |
16 ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers); | 16 ngx_http_upstream_rr_peer_data_t *rrp); |
17 | 17 |
18 #if (NGX_HTTP_SSL) | 18 #if (NGX_HTTP_SSL) |
19 | 19 |
20 static ngx_int_t ngx_http_upstream_empty_set_session(ngx_peer_connection_t *pc, | 20 static ngx_int_t ngx_http_upstream_empty_set_session(ngx_peer_connection_t *pc, |
21 void *data); | 21 void *data); |
78 peers->peer[n].socklen = server[i].addrs[j].socklen; | 78 peers->peer[n].socklen = server[i].addrs[j].socklen; |
79 peers->peer[n].name = server[i].addrs[j].name; | 79 peers->peer[n].name = server[i].addrs[j].name; |
80 peers->peer[n].max_fails = server[i].max_fails; | 80 peers->peer[n].max_fails = server[i].max_fails; |
81 peers->peer[n].fail_timeout = server[i].fail_timeout; | 81 peers->peer[n].fail_timeout = server[i].fail_timeout; |
82 peers->peer[n].down = server[i].down; | 82 peers->peer[n].down = server[i].down; |
83 peers->peer[n].weight = server[i].down ? 0 : server[i].weight; | 83 peers->peer[n].weight = server[i].weight; |
84 peers->peer[n].current_weight = peers->peer[n].weight; | 84 peers->peer[n].effective_weight = server[i].weight; |
85 peers->peer[n].current_weight = 0; | |
85 n++; | 86 n++; |
86 } | 87 } |
87 } | 88 } |
88 | 89 |
89 us->peer.data = peers; | 90 us->peer.data = peers; |
129 | 130 |
130 backup->peer[n].sockaddr = server[i].addrs[j].sockaddr; | 131 backup->peer[n].sockaddr = server[i].addrs[j].sockaddr; |
131 backup->peer[n].socklen = server[i].addrs[j].socklen; | 132 backup->peer[n].socklen = server[i].addrs[j].socklen; |
132 backup->peer[n].name = server[i].addrs[j].name; | 133 backup->peer[n].name = server[i].addrs[j].name; |
133 backup->peer[n].weight = server[i].weight; | 134 backup->peer[n].weight = server[i].weight; |
134 backup->peer[n].current_weight = server[i].weight; | 135 backup->peer[n].effective_weight = server[i].weight; |
136 backup->peer[n].current_weight = 0; | |
135 backup->peer[n].max_fails = server[i].max_fails; | 137 backup->peer[n].max_fails = server[i].max_fails; |
136 backup->peer[n].fail_timeout = server[i].fail_timeout; | 138 backup->peer[n].fail_timeout = server[i].fail_timeout; |
137 backup->peer[n].down = server[i].down; | 139 backup->peer[n].down = server[i].down; |
138 n++; | 140 n++; |
139 } | 141 } |
188 for (i = 0; i < u.naddrs; i++) { | 190 for (i = 0; i < u.naddrs; i++) { |
189 peers->peer[i].sockaddr = u.addrs[i].sockaddr; | 191 peers->peer[i].sockaddr = u.addrs[i].sockaddr; |
190 peers->peer[i].socklen = u.addrs[i].socklen; | 192 peers->peer[i].socklen = u.addrs[i].socklen; |
191 peers->peer[i].name = u.addrs[i].name; | 193 peers->peer[i].name = u.addrs[i].name; |
192 peers->peer[i].weight = 1; | 194 peers->peer[i].weight = 1; |
193 peers->peer[i].current_weight = 1; | 195 peers->peer[i].effective_weight = 1; |
196 peers->peer[i].current_weight = 0; | |
194 peers->peer[i].max_fails = 1; | 197 peers->peer[i].max_fails = 1; |
195 peers->peer[i].fail_timeout = 10; | 198 peers->peer[i].fail_timeout = 10; |
196 } | 199 } |
197 | 200 |
198 us->peer.data = peers; | 201 us->peer.data = peers; |
304 if (ur->sockaddr) { | 307 if (ur->sockaddr) { |
305 peers->peer[0].sockaddr = ur->sockaddr; | 308 peers->peer[0].sockaddr = ur->sockaddr; |
306 peers->peer[0].socklen = ur->socklen; | 309 peers->peer[0].socklen = ur->socklen; |
307 peers->peer[0].name = ur->host; | 310 peers->peer[0].name = ur->host; |
308 peers->peer[0].weight = 1; | 311 peers->peer[0].weight = 1; |
309 peers->peer[0].current_weight = 1; | 312 peers->peer[0].effective_weight = 1; |
313 peers->peer[0].current_weight = 0; | |
310 peers->peer[0].max_fails = 1; | 314 peers->peer[0].max_fails = 1; |
311 peers->peer[0].fail_timeout = 10; | 315 peers->peer[0].fail_timeout = 10; |
312 | 316 |
313 } else { | 317 } else { |
314 | 318 |
336 peers->peer[i].sockaddr = (struct sockaddr *) sin; | 340 peers->peer[i].sockaddr = (struct sockaddr *) sin; |
337 peers->peer[i].socklen = sizeof(struct sockaddr_in); | 341 peers->peer[i].socklen = sizeof(struct sockaddr_in); |
338 peers->peer[i].name.len = len; | 342 peers->peer[i].name.len = len; |
339 peers->peer[i].name.data = p; | 343 peers->peer[i].name.data = p; |
340 peers->peer[i].weight = 1; | 344 peers->peer[i].weight = 1; |
341 peers->peer[i].current_weight = 1; | 345 peers->peer[i].effective_weight = 1; |
346 peers->peer[i].current_weight = 0; | |
342 peers->peer[i].max_fails = 1; | 347 peers->peer[i].max_fails = 1; |
343 peers->peer[i].fail_timeout = 10; | 348 peers->peer[i].fail_timeout = 10; |
344 } | 349 } |
345 } | 350 } |
346 | 351 |
376 ngx_int_t | 381 ngx_int_t |
377 ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data) | 382 ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data) |
378 { | 383 { |
379 ngx_http_upstream_rr_peer_data_t *rrp = data; | 384 ngx_http_upstream_rr_peer_data_t *rrp = data; |
380 | 385 |
381 time_t now; | |
382 uintptr_t m; | |
383 ngx_int_t rc; | 386 ngx_int_t rc; |
384 ngx_uint_t i, n; | 387 ngx_uint_t i, n; |
385 ngx_connection_t *c; | 388 ngx_connection_t *c; |
386 ngx_http_upstream_rr_peer_t *peer; | 389 ngx_http_upstream_rr_peer_t *peer; |
387 ngx_http_upstream_rr_peers_t *peers; | 390 ngx_http_upstream_rr_peers_t *peers; |
388 | 391 |
389 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, | 392 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, |
390 "get rr peer, try: %ui", pc->tries); | 393 "get rr peer, try: %ui", pc->tries); |
391 | 394 |
392 now = ngx_time(); | |
393 | |
394 /* ngx_lock_mutex(rrp->peers->mutex); */ | 395 /* ngx_lock_mutex(rrp->peers->mutex); */ |
395 | 396 |
396 if (rrp->peers->last_cached) { | 397 if (rrp->peers->last_cached) { |
397 | 398 |
398 /* cached connection */ | 399 /* cached connection */ |
421 | 422 |
422 } else { | 423 } else { |
423 | 424 |
424 /* there are several peers */ | 425 /* there are several peers */ |
425 | 426 |
426 if (pc->tries == rrp->peers->number) { | 427 peer = ngx_http_upstream_get_peer(rrp); |
427 | 428 |
428 /* it's a first try - get a current peer */ | 429 if (peer == NULL) { |
429 | 430 goto failed; |
430 i = pc->tries; | 431 } |
431 | 432 |
432 for ( ;; ) { | 433 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, |
433 rrp->current = ngx_http_upstream_get_peer(rrp->peers); | 434 "get rr peer, current: %ui %i", |
434 | 435 rrp->current, peer->current_weight); |
435 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, | |
436 "get rr peer, current: %ui %i", | |
437 rrp->current, | |
438 rrp->peers->peer[rrp->current].current_weight); | |
439 | |
440 n = rrp->current / (8 * sizeof(uintptr_t)); | |
441 m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t)); | |
442 | |
443 if (!(rrp->tried[n] & m)) { | |
444 peer = &rrp->peers->peer[rrp->current]; | |
445 | |
446 if (!peer->down) { | |
447 | |
448 if (peer->max_fails == 0 | |
449 || peer->fails < peer->max_fails) | |
450 { | |
451 break; | |
452 } | |
453 | |
454 if (now - peer->checked > peer->fail_timeout) { | |
455 peer->checked = now; | |
456 break; | |
457 } | |
458 | |
459 peer->current_weight = 0; | |
460 | |
461 } else { | |
462 rrp->tried[n] |= m; | |
463 } | |
464 | |
465 pc->tries--; | |
466 } | |
467 | |
468 if (pc->tries == 0) { | |
469 goto failed; | |
470 } | |
471 | |
472 if (--i == 0) { | |
473 ngx_log_error(NGX_LOG_ALERT, pc->log, 0, | |
474 "round robin upstream stuck on %ui tries", | |
475 pc->tries); | |
476 goto failed; | |
477 } | |
478 } | |
479 | |
480 peer->current_weight--; | |
481 | |
482 } else { | |
483 | |
484 i = pc->tries; | |
485 | |
486 for ( ;; ) { | |
487 n = rrp->current / (8 * sizeof(uintptr_t)); | |
488 m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t)); | |
489 | |
490 if (!(rrp->tried[n] & m)) { | |
491 | |
492 peer = &rrp->peers->peer[rrp->current]; | |
493 | |
494 if (!peer->down) { | |
495 | |
496 if (peer->max_fails == 0 | |
497 || peer->fails < peer->max_fails) | |
498 { | |
499 break; | |
500 } | |
501 | |
502 if (now - peer->checked > peer->fail_timeout) { | |
503 peer->checked = now; | |
504 break; | |
505 } | |
506 | |
507 peer->current_weight = 0; | |
508 | |
509 } else { | |
510 rrp->tried[n] |= m; | |
511 } | |
512 | |
513 pc->tries--; | |
514 } | |
515 | |
516 rrp->current++; | |
517 | |
518 if (rrp->current >= rrp->peers->number) { | |
519 rrp->current = 0; | |
520 } | |
521 | |
522 if (pc->tries == 0) { | |
523 goto failed; | |
524 } | |
525 | |
526 if (--i == 0) { | |
527 ngx_log_error(NGX_LOG_ALERT, pc->log, 0, | |
528 "round robin upstream stuck on %ui tries", | |
529 pc->tries); | |
530 goto failed; | |
531 } | |
532 } | |
533 | |
534 peer->current_weight--; | |
535 } | |
536 | |
537 rrp->tried[n] |= m; | |
538 } | 436 } |
539 | 437 |
540 pc->sockaddr = peer->sockaddr; | 438 pc->sockaddr = peer->sockaddr; |
541 pc->socklen = peer->socklen; | 439 pc->socklen = peer->socklen; |
542 pc->name = &peer->name; | 440 pc->name = &peer->name; |
543 | 441 |
544 /* ngx_unlock_mutex(rrp->peers->mutex); */ | 442 /* ngx_unlock_mutex(rrp->peers->mutex); */ |
545 | 443 |
546 if (pc->tries == 1 && rrp->peers->next) { | 444 if (pc->tries == 1 && rrp->peers->next) { |
547 pc->tries += rrp->peers->next->number; | 445 pc->tries += rrp->peers->next->number; |
548 | |
549 n = rrp->peers->next->number / (8 * sizeof(uintptr_t)) + 1; | |
550 for (i = 0; i < n; i++) { | |
551 rrp->tried[i] = 0; | |
552 } | |
553 } | 446 } |
554 | 447 |
555 return NGX_OK; | 448 return NGX_OK; |
556 | 449 |
557 failed: | 450 failed: |
593 | 486 |
594 return NGX_BUSY; | 487 return NGX_BUSY; |
595 } | 488 } |
596 | 489 |
597 | 490 |
598 static ngx_uint_t | 491 static ngx_http_upstream_rr_peer_t * |
599 ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers) | 492 ngx_http_upstream_get_peer(ngx_http_upstream_rr_peer_data_t *rrp) |
600 { | 493 { |
601 ngx_uint_t i, n, reset = 0; | 494 time_t now; |
602 ngx_http_upstream_rr_peer_t *peer; | 495 uintptr_t m; |
603 | 496 ngx_int_t total; |
604 peer = &peers->peer[0]; | 497 ngx_uint_t i, n; |
605 | 498 ngx_http_upstream_rr_peer_t *peer, *best; |
606 for ( ;; ) { | 499 |
607 | 500 now = ngx_time(); |
608 for (i = 0; i < peers->number; i++) { | 501 |
609 | 502 best = NULL; |
610 if (peer[i].current_weight <= 0) { | 503 total = 0; |
611 continue; | 504 |
612 } | 505 for (i = 0; i < rrp->peers->number; i++) { |
613 | 506 |
614 n = i; | 507 n = i / (8 * sizeof(uintptr_t)); |
615 | 508 m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t)); |
616 while (i < peers->number - 1) { | 509 |
617 | 510 if (rrp->tried[n] & m) { |
618 i++; | 511 continue; |
619 | 512 } |
620 if (peer[i].current_weight <= 0) { | 513 |
621 continue; | 514 peer = &rrp->peers->peer[i]; |
622 } | 515 |
623 | 516 if (peer->down) { |
624 if (peer[n].current_weight * 1000 / peer[i].current_weight | 517 continue; |
625 > peer[n].weight * 1000 / peer[i].weight) | 518 } |
626 { | 519 |
627 return n; | 520 if (peer->max_fails |
628 } | 521 && peer->fails >= peer->max_fails |
629 | 522 && now - peer->checked <= peer->fail_timeout) |
630 n = i; | 523 { |
631 } | 524 continue; |
632 | 525 } |
633 if (peer[i].current_weight > 0) { | 526 |
634 n = i; | 527 peer->current_weight += peer->effective_weight; |
635 } | 528 total += peer->effective_weight; |
636 | 529 |
637 return n; | 530 if (peer->effective_weight < peer->weight) { |
638 } | 531 peer->effective_weight++; |
639 | 532 } |
640 if (reset++) { | 533 |
641 return 0; | 534 if (best == NULL || peer->current_weight > best->current_weight) { |
642 } | 535 best = peer; |
643 | 536 } |
644 for (i = 0; i < peers->number; i++) { | 537 } |
645 peer[i].current_weight = peer[i].weight; | 538 |
646 } | 539 if (best == NULL) { |
647 } | 540 return NULL; |
541 } | |
542 | |
543 i = best - &rrp->peers->peer[0]; | |
544 | |
545 rrp->current = i; | |
546 | |
547 n = i / (8 * sizeof(uintptr_t)); | |
548 m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t)); | |
549 | |
550 rrp->tried[n] |= m; | |
551 | |
552 best->current_weight -= total; | |
553 best->checked = now; | |
554 | |
555 return best; | |
648 } | 556 } |
649 | 557 |
650 | 558 |
651 void | 559 void |
652 ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data, | 560 ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data, |
681 peer->fails++; | 589 peer->fails++; |
682 peer->accessed = now; | 590 peer->accessed = now; |
683 peer->checked = now; | 591 peer->checked = now; |
684 | 592 |
685 if (peer->max_fails) { | 593 if (peer->max_fails) { |
686 peer->current_weight -= peer->weight / peer->max_fails; | 594 peer->effective_weight -= peer->weight / peer->max_fails; |
687 } | 595 } |
688 | 596 |
689 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, | 597 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, |
690 "free rr peer failed: %ui %i", | 598 "free rr peer failed: %ui %i", |
691 rrp->current, peer->current_weight); | 599 rrp->current, peer->effective_weight); |
692 | 600 |
693 if (peer->current_weight < 0) { | 601 if (peer->effective_weight < 0) { |
694 peer->current_weight = 0; | 602 peer->effective_weight = 0; |
695 } | 603 } |
696 | 604 |
697 /* ngx_unlock_mutex(rrp->peers->mutex); */ | 605 /* ngx_unlock_mutex(rrp->peers->mutex); */ |
698 | 606 |
699 } else { | 607 } else { |
701 /* mark peer live if check passed */ | 609 /* mark peer live if check passed */ |
702 | 610 |
703 if (peer->accessed < peer->checked) { | 611 if (peer->accessed < peer->checked) { |
704 peer->fails = 0; | 612 peer->fails = 0; |
705 } | 613 } |
706 } | |
707 | |
708 rrp->current++; | |
709 | |
710 if (rrp->current >= rrp->peers->number) { | |
711 rrp->current = 0; | |
712 } | 614 } |
713 | 615 |
714 if (pc->tries) { | 616 if (pc->tries) { |
715 pc->tries--; | 617 pc->tries--; |
716 } | 618 } |