comparison src/http/modules/ngx_http_gzip_filter_module.c @ 7155:29e9571b1989

Gzip: support for a zlib variant from Intel. A zlib variant from Intel as available from https://github.com/jtkukunas/zlib uses 64K hash instead of scaling it from the specified memory level, and also uses 16-byte padding in one of the window-sized memory buffers, and can force window bits to 13 if compression level is set to 1 and appropriate compile options are used. As a result, nginx complained with "gzip filter failed to use preallocated memory" alerts. This change improves deflate_state allocation detection by testing that items is 1 (deflate_state is the only allocation where items is 1). Additionally, on first failure to use preallocated memory we now assume that we are working with the Intel's modified zlib, and switch to using appropriate preallocations. If this does not help, we complain with the usual alerts. Previous version of this patch was published at http://mailman.nginx.org/pipermail/nginx/2014-July/044568.html. The zlib variant in question is used by default in ClearLinux from Intel, see http://mailman.nginx.org/pipermail/nginx-ru/2017-October/060421.html, http://mailman.nginx.org/pipermail/nginx-ru/2017-November/060544.html.
author Maxim Dounin <mdounin@mdounin.ru>
date Sat, 18 Nov 2017 04:03:27 +0300
parents ab5117642647
children ac5a741d39cf
comparison
equal deleted inserted replaced
7154:595a3de03e91 7155:29e9571b1989
55 unsigned redo:1; 55 unsigned redo:1;
56 unsigned done:1; 56 unsigned done:1;
57 unsigned nomem:1; 57 unsigned nomem:1;
58 unsigned gzheader:1; 58 unsigned gzheader:1;
59 unsigned buffering:1; 59 unsigned buffering:1;
60 unsigned intel:1;
60 61
61 size_t zin; 62 size_t zin;
62 size_t zout; 63 size_t zout;
63 64
64 uint32_t crc32; 65 uint32_t crc32;
231 static ngx_str_t ngx_http_gzip_ratio = ngx_string("gzip_ratio"); 232 static ngx_str_t ngx_http_gzip_ratio = ngx_string("gzip_ratio");
232 233
233 static ngx_http_output_header_filter_pt ngx_http_next_header_filter; 234 static ngx_http_output_header_filter_pt ngx_http_next_header_filter;
234 static ngx_http_output_body_filter_pt ngx_http_next_body_filter; 235 static ngx_http_output_body_filter_pt ngx_http_next_body_filter;
235 236
237 static ngx_uint_t ngx_http_gzip_assume_intel;
238
236 239
237 static ngx_int_t 240 static ngx_int_t
238 ngx_http_gzip_header_filter(ngx_http_request_t *r) 241 ngx_http_gzip_header_filter(ngx_http_request_t *r)
239 { 242 {
240 ngx_table_elt_t *h; 243 ngx_table_elt_t *h;
525 * 8K is for zlib deflate_state, it takes 528 * 8K is for zlib deflate_state, it takes
526 * *) 5816 bytes on i386 and sparc64 (32-bit mode) 529 * *) 5816 bytes on i386 and sparc64 (32-bit mode)
527 * *) 5920 bytes on amd64 and sparc64 530 * *) 5920 bytes on amd64 and sparc64
528 */ 531 */
529 532
530 ctx->allocated = 8192 + (1 << (wbits + 2)) + (1 << (memlevel + 9)); 533 if (!ngx_http_gzip_assume_intel) {
534 ctx->allocated = 8192 + (1 << (wbits + 2)) + (1 << (memlevel + 9));
535
536 } else {
537 /*
538 * A zlib variant from Intel, https://github.com/jtkukunas/zlib.
539 * It can force window bits to 13 for fast compression level,
540 * on processors with SSE 4.2 it uses 64K hash instead of scaling
541 * it from the specified memory level, and also introduces
542 * 16-byte padding in one out of the two window-sized buffers.
543 */
544
545 if (conf->level == 1) {
546 wbits = ngx_max(wbits, 13);
547 }
548
549 ctx->allocated = 8192 + 16 + (1 << (wbits + 2))
550 + (1 << (ngx_max(memlevel, 8) + 8))
551 + (1 << (memlevel + 8));
552 ctx->intel = 1;
553 }
531 } 554 }
532 555
533 556
534 static ngx_int_t 557 static ngx_int_t
535 ngx_http_gzip_filter_buffer(ngx_http_gzip_ctx_t *ctx, ngx_chain_t *in) 558 ngx_http_gzip_filter_buffer(ngx_http_gzip_ctx_t *ctx, ngx_chain_t *in)
1001 void *p; 1024 void *p;
1002 ngx_uint_t alloc; 1025 ngx_uint_t alloc;
1003 1026
1004 alloc = items * size; 1027 alloc = items * size;
1005 1028
1006 if (alloc % 512 != 0 && alloc < 8192) { 1029 if (items == 1 && alloc % 512 != 0 && alloc < 8192) {
1007 1030
1008 /* 1031 /*
1009 * The zlib deflate_state allocation, it takes about 6K, 1032 * The zlib deflate_state allocation, it takes about 6K,
1010 * we allocate 8K. Other allocations are divisible by 512. 1033 * we allocate 8K. Other allocations are divisible by 512.
1011 */ 1034 */
1023 items, size, alloc, p); 1046 items, size, alloc, p);
1024 1047
1025 return p; 1048 return p;
1026 } 1049 }
1027 1050
1028 ngx_log_error(NGX_LOG_ALERT, ctx->request->connection->log, 0, 1051 if (ctx->intel) {
1029 "gzip filter failed to use preallocated memory: %ud of %ui", 1052 ngx_log_error(NGX_LOG_ALERT, ctx->request->connection->log, 0,
1030 items * size, ctx->allocated); 1053 "gzip filter failed to use preallocated memory: "
1054 "%ud of %ui", items * size, ctx->allocated);
1055
1056 } else {
1057 ngx_http_gzip_assume_intel = 1;
1058 }
1031 1059
1032 p = ngx_palloc(ctx->request->pool, items * size); 1060 p = ngx_palloc(ctx->request->pool, items * size);
1033 1061
1034 return p; 1062 return p;
1035 } 1063 }