comparison src/http/ngx_http_upstream_round_robin.c @ 4667:d05ab8793a69 stable-1.2

Merge of r4622, r4623: balancing changes. *) Upstream: smooth weighted round-robin balancing. For edge case weights like { 5, 1, 1 } we now produce { a, a, b, a, c, a, a } sequence instead of { c, b, a, a, a, a, a } produced previously. Algorithm is as follows: on each peer selection we increase current_weight of each eligible peer by its weight, select peer with greatest current_weight and reduce its current_weight by total number of weight points distributed among peers. In case of { 5, 1, 1 } weights this gives the following sequence of current_weight's: a b c 0 0 0 (initial state) 5 1 1 (a selected) -2 1 1 3 2 2 (a selected) -4 2 2 1 3 3 (b selected) 1 -4 3 6 -3 4 (a selected) -1 -3 4 4 -2 5 (c selected) 4 -2 -2 9 -1 -1 (a selected) 2 -1 -1 7 0 0 (a selected) 0 0 0 To preserve weight reduction in case of failures the effective_weight variable was introduced, which usually matches peer's weight, but is reduced temporarily on peer failures. This change also fixes loop with backup servers and proxy_next_upstream http_404 (ticket #47), and skipping alive upstreams in some cases if there are multiple dead ones (ticket #64). *) Upstream: fixed ip_hash rebalancing with the "down" flag. Due to weight being set to 0 for down peers, order of peers after sorting wasn't the same as without the "down" flag (with down peers at the end), resulting in client rebalancing for clients on other servers. The only rebalancing which should happen after adding "down" to a server is one for clients on the server. The problem was introduced in r1377 (which fixed endless loop by setting weight to 0 for down servers). The loop is no longer possible with new smooth algorithm, so preserving original weight is safe.
author Maxim Dounin <mdounin@mdounin.ru>
date Mon, 04 Jun 2012 11:21:58 +0000
parents 1db899642518
children 0141b4aec0e4
comparison
equal deleted inserted replaced
4666:0bb016b1fd2d 4667:d05ab8793a69
10 #include <ngx_http.h> 10 #include <ngx_http.h>
11 11
12 12
13 static ngx_int_t ngx_http_upstream_cmp_servers(const void *one, 13 static ngx_int_t ngx_http_upstream_cmp_servers(const void *one,
14 const void *two); 14 const void *two);
15 static ngx_uint_t 15 static ngx_http_upstream_rr_peer_t *ngx_http_upstream_get_peer(
16 ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers); 16 ngx_http_upstream_rr_peer_data_t *rrp);
17 17
18 #if (NGX_HTTP_SSL) 18 #if (NGX_HTTP_SSL)
19 19
20 static ngx_int_t ngx_http_upstream_empty_set_session(ngx_peer_connection_t *pc, 20 static ngx_int_t ngx_http_upstream_empty_set_session(ngx_peer_connection_t *pc,
21 void *data); 21 void *data);
78 peers->peer[n].socklen = server[i].addrs[j].socklen; 78 peers->peer[n].socklen = server[i].addrs[j].socklen;
79 peers->peer[n].name = server[i].addrs[j].name; 79 peers->peer[n].name = server[i].addrs[j].name;
80 peers->peer[n].max_fails = server[i].max_fails; 80 peers->peer[n].max_fails = server[i].max_fails;
81 peers->peer[n].fail_timeout = server[i].fail_timeout; 81 peers->peer[n].fail_timeout = server[i].fail_timeout;
82 peers->peer[n].down = server[i].down; 82 peers->peer[n].down = server[i].down;
83 peers->peer[n].weight = server[i].down ? 0 : server[i].weight; 83 peers->peer[n].weight = server[i].weight;
84 peers->peer[n].current_weight = peers->peer[n].weight; 84 peers->peer[n].effective_weight = server[i].weight;
85 peers->peer[n].current_weight = 0;
85 n++; 86 n++;
86 } 87 }
87 } 88 }
88 89
89 us->peer.data = peers; 90 us->peer.data = peers;
129 130
130 backup->peer[n].sockaddr = server[i].addrs[j].sockaddr; 131 backup->peer[n].sockaddr = server[i].addrs[j].sockaddr;
131 backup->peer[n].socklen = server[i].addrs[j].socklen; 132 backup->peer[n].socklen = server[i].addrs[j].socklen;
132 backup->peer[n].name = server[i].addrs[j].name; 133 backup->peer[n].name = server[i].addrs[j].name;
133 backup->peer[n].weight = server[i].weight; 134 backup->peer[n].weight = server[i].weight;
134 backup->peer[n].current_weight = server[i].weight; 135 backup->peer[n].effective_weight = server[i].weight;
136 backup->peer[n].current_weight = 0;
135 backup->peer[n].max_fails = server[i].max_fails; 137 backup->peer[n].max_fails = server[i].max_fails;
136 backup->peer[n].fail_timeout = server[i].fail_timeout; 138 backup->peer[n].fail_timeout = server[i].fail_timeout;
137 backup->peer[n].down = server[i].down; 139 backup->peer[n].down = server[i].down;
138 n++; 140 n++;
139 } 141 }
188 for (i = 0; i < u.naddrs; i++) { 190 for (i = 0; i < u.naddrs; i++) {
189 peers->peer[i].sockaddr = u.addrs[i].sockaddr; 191 peers->peer[i].sockaddr = u.addrs[i].sockaddr;
190 peers->peer[i].socklen = u.addrs[i].socklen; 192 peers->peer[i].socklen = u.addrs[i].socklen;
191 peers->peer[i].name = u.addrs[i].name; 193 peers->peer[i].name = u.addrs[i].name;
192 peers->peer[i].weight = 1; 194 peers->peer[i].weight = 1;
193 peers->peer[i].current_weight = 1; 195 peers->peer[i].effective_weight = 1;
196 peers->peer[i].current_weight = 0;
194 peers->peer[i].max_fails = 1; 197 peers->peer[i].max_fails = 1;
195 peers->peer[i].fail_timeout = 10; 198 peers->peer[i].fail_timeout = 10;
196 } 199 }
197 200
198 us->peer.data = peers; 201 us->peer.data = peers;
304 if (ur->sockaddr) { 307 if (ur->sockaddr) {
305 peers->peer[0].sockaddr = ur->sockaddr; 308 peers->peer[0].sockaddr = ur->sockaddr;
306 peers->peer[0].socklen = ur->socklen; 309 peers->peer[0].socklen = ur->socklen;
307 peers->peer[0].name = ur->host; 310 peers->peer[0].name = ur->host;
308 peers->peer[0].weight = 1; 311 peers->peer[0].weight = 1;
309 peers->peer[0].current_weight = 1; 312 peers->peer[0].effective_weight = 1;
313 peers->peer[0].current_weight = 0;
310 peers->peer[0].max_fails = 1; 314 peers->peer[0].max_fails = 1;
311 peers->peer[0].fail_timeout = 10; 315 peers->peer[0].fail_timeout = 10;
312 316
313 } else { 317 } else {
314 318
336 peers->peer[i].sockaddr = (struct sockaddr *) sin; 340 peers->peer[i].sockaddr = (struct sockaddr *) sin;
337 peers->peer[i].socklen = sizeof(struct sockaddr_in); 341 peers->peer[i].socklen = sizeof(struct sockaddr_in);
338 peers->peer[i].name.len = len; 342 peers->peer[i].name.len = len;
339 peers->peer[i].name.data = p; 343 peers->peer[i].name.data = p;
340 peers->peer[i].weight = 1; 344 peers->peer[i].weight = 1;
341 peers->peer[i].current_weight = 1; 345 peers->peer[i].effective_weight = 1;
346 peers->peer[i].current_weight = 0;
342 peers->peer[i].max_fails = 1; 347 peers->peer[i].max_fails = 1;
343 peers->peer[i].fail_timeout = 10; 348 peers->peer[i].fail_timeout = 10;
344 } 349 }
345 } 350 }
346 351
376 ngx_int_t 381 ngx_int_t
377 ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data) 382 ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
378 { 383 {
379 ngx_http_upstream_rr_peer_data_t *rrp = data; 384 ngx_http_upstream_rr_peer_data_t *rrp = data;
380 385
381 time_t now;
382 uintptr_t m;
383 ngx_int_t rc; 386 ngx_int_t rc;
384 ngx_uint_t i, n; 387 ngx_uint_t i, n;
385 ngx_connection_t *c; 388 ngx_connection_t *c;
386 ngx_http_upstream_rr_peer_t *peer; 389 ngx_http_upstream_rr_peer_t *peer;
387 ngx_http_upstream_rr_peers_t *peers; 390 ngx_http_upstream_rr_peers_t *peers;
388 391
389 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0, 392 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0,
390 "get rr peer, try: %ui", pc->tries); 393 "get rr peer, try: %ui", pc->tries);
391 394
392 now = ngx_time();
393
394 /* ngx_lock_mutex(rrp->peers->mutex); */ 395 /* ngx_lock_mutex(rrp->peers->mutex); */
395 396
396 if (rrp->peers->last_cached) { 397 if (rrp->peers->last_cached) {
397 398
398 /* cached connection */ 399 /* cached connection */
421 422
422 } else { 423 } else {
423 424
424 /* there are several peers */ 425 /* there are several peers */
425 426
426 if (pc->tries == rrp->peers->number) { 427 peer = ngx_http_upstream_get_peer(rrp);
427 428
428 /* it's a first try - get a current peer */ 429 if (peer == NULL) {
429 430 goto failed;
430 i = pc->tries; 431 }
431 432
432 for ( ;; ) { 433 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
433 rrp->current = ngx_http_upstream_get_peer(rrp->peers); 434 "get rr peer, current: %ui %i",
434 435 rrp->current, peer->current_weight);
435 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
436 "get rr peer, current: %ui %i",
437 rrp->current,
438 rrp->peers->peer[rrp->current].current_weight);
439
440 n = rrp->current / (8 * sizeof(uintptr_t));
441 m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));
442
443 if (!(rrp->tried[n] & m)) {
444 peer = &rrp->peers->peer[rrp->current];
445
446 if (!peer->down) {
447
448 if (peer->max_fails == 0
449 || peer->fails < peer->max_fails)
450 {
451 break;
452 }
453
454 if (now - peer->checked > peer->fail_timeout) {
455 peer->checked = now;
456 break;
457 }
458
459 peer->current_weight = 0;
460
461 } else {
462 rrp->tried[n] |= m;
463 }
464
465 pc->tries--;
466 }
467
468 if (pc->tries == 0) {
469 goto failed;
470 }
471
472 if (--i == 0) {
473 ngx_log_error(NGX_LOG_ALERT, pc->log, 0,
474 "round robin upstream stuck on %ui tries",
475 pc->tries);
476 goto failed;
477 }
478 }
479
480 peer->current_weight--;
481
482 } else {
483
484 i = pc->tries;
485
486 for ( ;; ) {
487 n = rrp->current / (8 * sizeof(uintptr_t));
488 m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));
489
490 if (!(rrp->tried[n] & m)) {
491
492 peer = &rrp->peers->peer[rrp->current];
493
494 if (!peer->down) {
495
496 if (peer->max_fails == 0
497 || peer->fails < peer->max_fails)
498 {
499 break;
500 }
501
502 if (now - peer->checked > peer->fail_timeout) {
503 peer->checked = now;
504 break;
505 }
506
507 peer->current_weight = 0;
508
509 } else {
510 rrp->tried[n] |= m;
511 }
512
513 pc->tries--;
514 }
515
516 rrp->current++;
517
518 if (rrp->current >= rrp->peers->number) {
519 rrp->current = 0;
520 }
521
522 if (pc->tries == 0) {
523 goto failed;
524 }
525
526 if (--i == 0) {
527 ngx_log_error(NGX_LOG_ALERT, pc->log, 0,
528 "round robin upstream stuck on %ui tries",
529 pc->tries);
530 goto failed;
531 }
532 }
533
534 peer->current_weight--;
535 }
536
537 rrp->tried[n] |= m;
538 } 436 }
539 437
540 pc->sockaddr = peer->sockaddr; 438 pc->sockaddr = peer->sockaddr;
541 pc->socklen = peer->socklen; 439 pc->socklen = peer->socklen;
542 pc->name = &peer->name; 440 pc->name = &peer->name;
543 441
544 /* ngx_unlock_mutex(rrp->peers->mutex); */ 442 /* ngx_unlock_mutex(rrp->peers->mutex); */
545 443
546 if (pc->tries == 1 && rrp->peers->next) { 444 if (pc->tries == 1 && rrp->peers->next) {
547 pc->tries += rrp->peers->next->number; 445 pc->tries += rrp->peers->next->number;
548
549 n = rrp->peers->next->number / (8 * sizeof(uintptr_t)) + 1;
550 for (i = 0; i < n; i++) {
551 rrp->tried[i] = 0;
552 }
553 } 446 }
554 447
555 return NGX_OK; 448 return NGX_OK;
556 449
557 failed: 450 failed:
593 486
594 return NGX_BUSY; 487 return NGX_BUSY;
595 } 488 }
596 489
597 490
598 static ngx_uint_t 491 static ngx_http_upstream_rr_peer_t *
599 ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers) 492 ngx_http_upstream_get_peer(ngx_http_upstream_rr_peer_data_t *rrp)
600 { 493 {
601 ngx_uint_t i, n, reset = 0; 494 time_t now;
602 ngx_http_upstream_rr_peer_t *peer; 495 uintptr_t m;
603 496 ngx_int_t total;
604 peer = &peers->peer[0]; 497 ngx_uint_t i, n;
605 498 ngx_http_upstream_rr_peer_t *peer, *best;
606 for ( ;; ) { 499
607 500 now = ngx_time();
608 for (i = 0; i < peers->number; i++) { 501
609 502 best = NULL;
610 if (peer[i].current_weight <= 0) { 503 total = 0;
611 continue; 504
612 } 505 for (i = 0; i < rrp->peers->number; i++) {
613 506
614 n = i; 507 n = i / (8 * sizeof(uintptr_t));
615 508 m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
616 while (i < peers->number - 1) { 509
617 510 if (rrp->tried[n] & m) {
618 i++; 511 continue;
619 512 }
620 if (peer[i].current_weight <= 0) { 513
621 continue; 514 peer = &rrp->peers->peer[i];
622 } 515
623 516 if (peer->down) {
624 if (peer[n].current_weight * 1000 / peer[i].current_weight 517 continue;
625 > peer[n].weight * 1000 / peer[i].weight) 518 }
626 { 519
627 return n; 520 if (peer->max_fails
628 } 521 && peer->fails >= peer->max_fails
629 522 && now - peer->checked <= peer->fail_timeout)
630 n = i; 523 {
631 } 524 continue;
632 525 }
633 if (peer[i].current_weight > 0) { 526
634 n = i; 527 peer->current_weight += peer->effective_weight;
635 } 528 total += peer->effective_weight;
636 529
637 return n; 530 if (peer->effective_weight < peer->weight) {
638 } 531 peer->effective_weight++;
639 532 }
640 if (reset++) { 533
641 return 0; 534 if (best == NULL || peer->current_weight > best->current_weight) {
642 } 535 best = peer;
643 536 }
644 for (i = 0; i < peers->number; i++) { 537 }
645 peer[i].current_weight = peer[i].weight; 538
646 } 539 if (best == NULL) {
647 } 540 return NULL;
541 }
542
543 i = best - &rrp->peers->peer[0];
544
545 rrp->current = i;
546
547 n = i / (8 * sizeof(uintptr_t));
548 m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
549
550 rrp->tried[n] |= m;
551
552 best->current_weight -= total;
553 best->checked = now;
554
555 return best;
648 } 556 }
649 557
650 558
651 void 559 void
652 ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data, 560 ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data,
681 peer->fails++; 589 peer->fails++;
682 peer->accessed = now; 590 peer->accessed = now;
683 peer->checked = now; 591 peer->checked = now;
684 592
685 if (peer->max_fails) { 593 if (peer->max_fails) {
686 peer->current_weight -= peer->weight / peer->max_fails; 594 peer->effective_weight -= peer->weight / peer->max_fails;
687 } 595 }
688 596
689 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, 597 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
690 "free rr peer failed: %ui %i", 598 "free rr peer failed: %ui %i",
691 rrp->current, peer->current_weight); 599 rrp->current, peer->effective_weight);
692 600
693 if (peer->current_weight < 0) { 601 if (peer->effective_weight < 0) {
694 peer->current_weight = 0; 602 peer->effective_weight = 0;
695 } 603 }
696 604
697 /* ngx_unlock_mutex(rrp->peers->mutex); */ 605 /* ngx_unlock_mutex(rrp->peers->mutex); */
698 606
699 } else { 607 } else {
701 /* mark peer live if check passed */ 609 /* mark peer live if check passed */
702 610
703 if (peer->accessed < peer->checked) { 611 if (peer->accessed < peer->checked) {
704 peer->fails = 0; 612 peer->fails = 0;
705 } 613 }
706 }
707
708 rrp->current++;
709
710 if (rrp->current >= rrp->peers->number) {
711 rrp->current = 0;
712 } 614 }
713 615
714 if (pc->tries) { 616 if (pc->tries) {
715 pc->tries--; 617 pc->tries--;
716 } 618 }