Mercurial > hg > nginx
view src/http/modules/ngx_http_charset_filter_module.c @ 7508:c30a20e06c21
Range filter: fixed duplicate last buffers.
In ngx_http_range_singlepart_body() special buffers where passed
unmodified, including ones after the end of the range. As such,
if the last buffer of a response was sent separately as a special
buffer, two buffers with b->last_buf set were present in the response.
In particular, this might result in a duplicate final chunk when using
chunked transfer encoding (normally range filter and chunked transfer
encoding are not used together, but this may happen if there are trailers
in the response). This also likely to cause problems in HTTP/2.
Fix is to skip all special buffers after we've sent the last part of
the range requested. These special buffers are not meaningful anyway,
since we set b->last_buf in the buffer with the last part of the range,
and everything is expected to be flushed due to it.
Additionally, ngx_http_next_body_filter() is now called even
if no buffers are to be passed to it. This ensures that various
write events are properly propagated through the filter chain. In
particular, this fixes test failures observed with the above change
and aio enabled.
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Mon, 13 May 2019 22:44:49 +0300 |
parents | a72886067bbb |
children | f53146df9a47 |
line wrap: on
line source
/* * Copyright (C) Igor Sysoev * Copyright (C) Nginx, Inc. */ #include <ngx_config.h> #include <ngx_core.h> #include <ngx_http.h> #define NGX_HTTP_CHARSET_OFF -2 #define NGX_HTTP_NO_CHARSET -3 #define NGX_HTTP_CHARSET_VAR 0x10000 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */ #define NGX_UTF_LEN 4 #define NGX_HTML_ENTITY_LEN (sizeof("") - 1) typedef struct { u_char **tables; ngx_str_t name; unsigned length:16; unsigned utf8:1; } ngx_http_charset_t; typedef struct { ngx_int_t src; ngx_int_t dst; } ngx_http_charset_recode_t; typedef struct { ngx_int_t src; ngx_int_t dst; u_char *src2dst; u_char *dst2src; } ngx_http_charset_tables_t; typedef struct { ngx_array_t charsets; /* ngx_http_charset_t */ ngx_array_t tables; /* ngx_http_charset_tables_t */ ngx_array_t recodes; /* ngx_http_charset_recode_t */ } ngx_http_charset_main_conf_t; typedef struct { ngx_int_t charset; ngx_int_t source_charset; ngx_flag_t override_charset; ngx_hash_t types; ngx_array_t *types_keys; } ngx_http_charset_loc_conf_t; typedef struct { u_char *table; ngx_int_t charset; ngx_str_t charset_name; ngx_chain_t *busy; ngx_chain_t *free_bufs; ngx_chain_t *free_buffers; size_t saved_len; u_char saved[NGX_UTF_LEN]; unsigned length:16; unsigned from_utf8:1; unsigned to_utf8:1; } ngx_http_charset_ctx_t; typedef struct { ngx_http_charset_tables_t *table; ngx_http_charset_t *charset; ngx_uint_t characters; } ngx_http_charset_conf_ctx_t; static ngx_int_t ngx_http_destination_charset(ngx_http_request_t *r, ngx_str_t *name); static ngx_int_t ngx_http_main_request_charset(ngx_http_request_t *r, ngx_str_t *name); static ngx_int_t ngx_http_source_charset(ngx_http_request_t *r, ngx_str_t *name); static ngx_int_t ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name); static ngx_inline void ngx_http_set_charset(ngx_http_request_t *r, ngx_str_t *charset); static ngx_int_t ngx_http_charset_ctx(ngx_http_request_t *r, ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset); static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table); static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx); static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx); static ngx_chain_t *ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx); static ngx_chain_t *ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx, size_t size); static char *ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf); static char *ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf); static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf); static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name); static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf); static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf); static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child); static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf); static ngx_str_t ngx_http_charset_default_types[] = { ngx_string("text/html"), ngx_string("text/xml"), ngx_string("text/plain"), ngx_string("text/vnd.wap.wml"), ngx_string("application/javascript"), ngx_string("application/rss+xml"), ngx_null_string }; static ngx_command_t ngx_http_charset_filter_commands[] = { { ngx_string("charset"), NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1, ngx_http_set_charset_slot, NGX_HTTP_LOC_CONF_OFFSET, offsetof(ngx_http_charset_loc_conf_t, charset), NULL }, { ngx_string("source_charset"), NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1, ngx_http_set_charset_slot, NGX_HTTP_LOC_CONF_OFFSET, offsetof(ngx_http_charset_loc_conf_t, source_charset), NULL }, { ngx_string("override_charset"), NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG, ngx_conf_set_flag_slot, NGX_HTTP_LOC_CONF_OFFSET, offsetof(ngx_http_charset_loc_conf_t, override_charset), NULL }, { ngx_string("charset_types"), NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE, ngx_http_types_slot, NGX_HTTP_LOC_CONF_OFFSET, offsetof(ngx_http_charset_loc_conf_t, types_keys), &ngx_http_charset_default_types[0] }, { ngx_string("charset_map"), NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2, ngx_http_charset_map_block, NGX_HTTP_MAIN_CONF_OFFSET, 0, NULL }, ngx_null_command }; static ngx_http_module_t ngx_http_charset_filter_module_ctx = { NULL, /* preconfiguration */ ngx_http_charset_postconfiguration, /* postconfiguration */ ngx_http_charset_create_main_conf, /* create main configuration */ NULL, /* init main configuration */ NULL, /* create server configuration */ NULL, /* merge server configuration */ ngx_http_charset_create_loc_conf, /* create location configuration */ ngx_http_charset_merge_loc_conf /* merge location configuration */ }; ngx_module_t ngx_http_charset_filter_module = { NGX_MODULE_V1, &ngx_http_charset_filter_module_ctx, /* module context */ ngx_http_charset_filter_commands, /* module directives */ NGX_HTTP_MODULE, /* module type */ NULL, /* init master */ NULL, /* init module */ NULL, /* init process */ NULL, /* init thread */ NULL, /* exit thread */ NULL, /* exit process */ NULL, /* exit master */ NGX_MODULE_V1_PADDING }; static ngx_http_output_header_filter_pt ngx_http_next_header_filter; static ngx_http_output_body_filter_pt ngx_http_next_body_filter; static ngx_int_t ngx_http_charset_header_filter(ngx_http_request_t *r) { ngx_int_t charset, source_charset; ngx_str_t dst, src; ngx_http_charset_t *charsets; ngx_http_charset_main_conf_t *mcf; if (r == r->main) { charset = ngx_http_destination_charset(r, &dst); } else { charset = ngx_http_main_request_charset(r, &dst); } if (charset == NGX_ERROR) { return NGX_ERROR; } if (charset == NGX_DECLINED) { return ngx_http_next_header_filter(r); } /* charset: charset index or NGX_HTTP_NO_CHARSET */ source_charset = ngx_http_source_charset(r, &src); if (source_charset == NGX_ERROR) { return NGX_ERROR; } /* * source_charset: charset index, NGX_HTTP_NO_CHARSET, * or NGX_HTTP_CHARSET_OFF */ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "charset: \"%V\" > \"%V\"", &src, &dst); if (source_charset == NGX_HTTP_CHARSET_OFF) { ngx_http_set_charset(r, &dst); return ngx_http_next_header_filter(r); } if (charset == NGX_HTTP_NO_CHARSET || source_charset == NGX_HTTP_NO_CHARSET) { if (source_charset != charset || ngx_strncasecmp(dst.data, src.data, dst.len) != 0) { goto no_charset_map; } ngx_http_set_charset(r, &dst); return ngx_http_next_header_filter(r); } if (source_charset == charset) { r->headers_out.content_type.len = r->headers_out.content_type_len; ngx_http_set_charset(r, &dst); return ngx_http_next_header_filter(r); } /* source_charset != charset */ if (r->headers_out.content_encoding && r->headers_out.content_encoding->value.len) { return ngx_http_next_header_filter(r); } mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); charsets = mcf->charsets.elts; if (charsets[source_charset].tables == NULL || charsets[source_charset].tables[charset] == NULL) { goto no_charset_map; } r->headers_out.content_type.len = r->headers_out.content_type_len; ngx_http_set_charset(r, &dst); return ngx_http_charset_ctx(r, charsets, charset, source_charset); no_charset_map: ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, "no \"charset_map\" between the charsets \"%V\" and \"%V\"", &src, &dst); return ngx_http_next_header_filter(r); } static ngx_int_t ngx_http_destination_charset(ngx_http_request_t *r, ngx_str_t *name) { ngx_int_t charset; ngx_http_charset_t *charsets; ngx_http_variable_value_t *vv; ngx_http_charset_loc_conf_t *mlcf; ngx_http_charset_main_conf_t *mcf; if (r->headers_out.content_type.len == 0) { return NGX_DECLINED; } if (r->headers_out.override_charset && r->headers_out.override_charset->len) { *name = *r->headers_out.override_charset; charset = ngx_http_get_charset(r, name); if (charset != NGX_HTTP_NO_CHARSET) { return charset; } ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, "unknown charset \"%V\" to override", name); return NGX_DECLINED; } mlcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module); charset = mlcf->charset; if (charset == NGX_HTTP_CHARSET_OFF) { return NGX_DECLINED; } if (r->headers_out.charset.len) { if (mlcf->override_charset == 0) { return NGX_DECLINED; } } else { if (ngx_http_test_content_type(r, &mlcf->types) == NULL) { return NGX_DECLINED; } } if (charset < NGX_HTTP_CHARSET_VAR) { mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); charsets = mcf->charsets.elts; *name = charsets[charset].name; return charset; } vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR); if (vv == NULL || vv->not_found) { return NGX_ERROR; } name->len = vv->len; name->data = vv->data; return ngx_http_get_charset(r, name); } static ngx_int_t ngx_http_main_request_charset(ngx_http_request_t *r, ngx_str_t *src) { ngx_int_t charset; ngx_str_t *main_charset; ngx_http_charset_ctx_t *ctx; ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module); if (ctx) { *src = ctx->charset_name; return ctx->charset; } main_charset = &r->main->headers_out.charset; if (main_charset->len == 0) { return NGX_DECLINED; } ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t)); if (ctx == NULL) { return NGX_ERROR; } ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module); charset = ngx_http_get_charset(r, main_charset); ctx->charset = charset; ctx->charset_name = *main_charset; *src = *main_charset; return charset; } static ngx_int_t ngx_http_source_charset(ngx_http_request_t *r, ngx_str_t *name) { ngx_int_t charset; ngx_http_charset_t *charsets; ngx_http_variable_value_t *vv; ngx_http_charset_loc_conf_t *lcf; ngx_http_charset_main_conf_t *mcf; if (r->headers_out.charset.len) { *name = r->headers_out.charset; return ngx_http_get_charset(r, name); } lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module); charset = lcf->source_charset; if (charset == NGX_HTTP_CHARSET_OFF) { name->len = 0; return charset; } if (charset < NGX_HTTP_CHARSET_VAR) { mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); charsets = mcf->charsets.elts; *name = charsets[charset].name; return charset; } vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR); if (vv == NULL || vv->not_found) { return NGX_ERROR; } name->len = vv->len; name->data = vv->data; return ngx_http_get_charset(r, name); } static ngx_int_t ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name) { ngx_uint_t i, n; ngx_http_charset_t *charset; ngx_http_charset_main_conf_t *mcf; mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module); charset = mcf->charsets.elts; n = mcf->charsets.nelts; for (i = 0; i < n; i++) { if (charset[i].name.len != name->len) { continue; } if (ngx_strncasecmp(charset[i].name.data, name->data, name->len) == 0) { return i; } } return NGX_HTTP_NO_CHARSET; } static ngx_inline void ngx_http_set_charset(ngx_http_request_t *r, ngx_str_t *charset) { if (r != r->main) { return; } if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY) { /* * do not set charset for the redirect because NN 4.x * use this charset instead of the next page charset */ r->headers_out.charset.len = 0; return; } r->headers_out.charset = *charset; } static ngx_int_t ngx_http_charset_ctx(ngx_http_request_t *r, ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset) { ngx_http_charset_ctx_t *ctx; ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t)); if (ctx == NULL) { return NGX_ERROR; } ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module); ctx->table = charsets[source_charset].tables[charset]; ctx->charset = charset; ctx->charset_name = charsets[charset].name; ctx->length = charsets[charset].length; ctx->from_utf8 = charsets[source_charset].utf8; ctx->to_utf8 = charsets[charset].utf8; r->filter_need_in_memory = 1; if ((ctx->to_utf8 || ctx->from_utf8) && r == r->main) { ngx_http_clear_content_length(r); } else { r->filter_need_temporary = 1; } return ngx_http_next_header_filter(r); } static ngx_int_t ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in) { ngx_int_t rc; ngx_buf_t *b; ngx_chain_t *cl, *out, **ll; ngx_http_charset_ctx_t *ctx; ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module); if (ctx == NULL || ctx->table == NULL) { return ngx_http_next_body_filter(r, in); } if ((ctx->to_utf8 || ctx->from_utf8) || ctx->busy) { out = NULL; ll = &out; for (cl = in; cl; cl = cl->next) { b = cl->buf; if (ngx_buf_size(b) == 0) { *ll = ngx_alloc_chain_link(r->pool); if (*ll == NULL) { return NGX_ERROR; } (*ll)->buf = b; (*ll)->next = NULL; ll = &(*ll)->next; continue; } if (ctx->to_utf8) { *ll = ngx_http_charset_recode_to_utf8(r->pool, b, ctx); } else { *ll = ngx_http_charset_recode_from_utf8(r->pool, b, ctx); } if (*ll == NULL) { return NGX_ERROR; } while (*ll) { ll = &(*ll)->next; } } rc = ngx_http_next_body_filter(r, out); if (out) { if (ctx->busy == NULL) { ctx->busy = out; } else { for (cl = ctx->busy; cl->next; cl = cl->next) { /* void */ } cl->next = out; } } while (ctx->busy) { cl = ctx->busy; b = cl->buf; if (ngx_buf_size(b) != 0) { break; } ctx->busy = cl->next; if (b->tag != (ngx_buf_tag_t) &ngx_http_charset_filter_module) { continue; } if (b->shadow) { b->shadow->pos = b->shadow->last; } if (b->pos) { cl->next = ctx->free_buffers; ctx->free_buffers = cl; continue; } cl->next = ctx->free_bufs; ctx->free_bufs = cl; } return rc; } for (cl = in; cl; cl = cl->next) { (void) ngx_http_charset_recode(cl->buf, ctx->table); } return ngx_http_next_body_filter(r, in); } static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table) { u_char *p, *last; last = b->last; for (p = b->pos; p < last; p++) { if (*p != table[*p]) { goto recode; } } return 0; recode: do { if (*p != table[*p]) { *p = table[*p]; } p++; } while (p < last); b->in_file = 0; return 1; } static ngx_chain_t * ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx) { size_t len, size; u_char c, *p, *src, *dst, *saved, **table; uint32_t n; ngx_buf_t *b; ngx_uint_t i; ngx_chain_t *out, *cl, **ll; src = buf->pos; if (ctx->saved_len == 0) { for ( /* void */ ; src < buf->last; src++) { if (*src < 0x80) { continue; } len = src - buf->pos; if (len > 512) { out = ngx_http_charset_get_buf(pool, ctx); if (out == NULL) { return NULL; } b = out->buf; b->temporary = buf->temporary; b->memory = buf->memory; b->mmap = buf->mmap; b->flush = buf->flush; b->pos = buf->pos; b->last = src; out->buf = b; out->next = NULL; size = buf->last - src; saved = src; n = ngx_utf8_decode(&saved, size); if (n == 0xfffffffe) { /* incomplete UTF-8 symbol */ ngx_memcpy(ctx->saved, src, size); ctx->saved_len = size; b->shadow = buf; return out; } } else { out = NULL; size = len + buf->last - src; src = buf->pos; } if (size < NGX_HTML_ENTITY_LEN) { size += NGX_HTML_ENTITY_LEN; } cl = ngx_http_charset_get_buffer(pool, ctx, size); if (cl == NULL) { return NULL; } if (out) { out->next = cl; } else { out = cl; } b = cl->buf; dst = b->pos; goto recode; } out = ngx_alloc_chain_link(pool); if (out == NULL) { return NULL; } out->buf = buf; out->next = NULL; return out; } /* process incomplete UTF sequence from previous buffer */ ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pool->log, 0, "http charset utf saved: %z", ctx->saved_len); p = src; for (i = ctx->saved_len; i < NGX_UTF_LEN; i++) { ctx->saved[i] = *p++; if (p == buf->last) { break; } } saved = ctx->saved; n = ngx_utf8_decode(&saved, i); c = '\0'; if (n < 0x10000) { table = (u_char **) ctx->table; p = table[n >> 8]; if (p) { c = p[n & 0xff]; } } else if (n == 0xfffffffe) { /* incomplete UTF-8 symbol */ if (i < NGX_UTF_LEN) { out = ngx_http_charset_get_buf(pool, ctx); if (out == NULL) { return NULL; } b = out->buf; b->pos = buf->pos; b->last = buf->last; b->sync = 1; b->shadow = buf; ngx_memcpy(&ctx->saved[ctx->saved_len], src, i); ctx->saved_len += i; return out; } } size = buf->last - buf->pos; if (size < NGX_HTML_ENTITY_LEN) { size += NGX_HTML_ENTITY_LEN; } cl = ngx_http_charset_get_buffer(pool, ctx, size); if (cl == NULL) { return NULL; } out = cl; b = cl->buf; dst = b->pos; if (c) { *dst++ = c; } else if (n == 0xfffffffe) { *dst++ = '?'; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0, "http charset invalid utf 0"); saved = &ctx->saved[NGX_UTF_LEN]; } else if (n > 0x10ffff) { *dst++ = '?'; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0, "http charset invalid utf 1"); } else { dst = ngx_sprintf(dst, "&#%uD;", n); } src += (saved - ctx->saved) - ctx->saved_len; ctx->saved_len = 0; recode: ll = &cl->next; table = (u_char **) ctx->table; while (src < buf->last) { if ((size_t) (b->end - dst) < NGX_HTML_ENTITY_LEN) { b->last = dst; size = buf->last - src + NGX_HTML_ENTITY_LEN; cl = ngx_http_charset_get_buffer(pool, ctx, size); if (cl == NULL) { return NULL; } *ll = cl; ll = &cl->next; b = cl->buf; dst = b->pos; } if (*src < 0x80) { *dst++ = *src++; continue; } len = buf->last - src; n = ngx_utf8_decode(&src, len); if (n < 0x10000) { p = table[n >> 8]; if (p) { c = p[n & 0xff]; if (c) { *dst++ = c; continue; } } dst = ngx_sprintf(dst, "&#%uD;", n); continue; } if (n == 0xfffffffe) { /* incomplete UTF-8 symbol */ ngx_memcpy(ctx->saved, src, len); ctx->saved_len = len; if (b->pos == dst) { b->sync = 1; b->temporary = 0; } break; } if (n > 0x10ffff) { *dst++ = '?'; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0, "http charset invalid utf 2"); continue; } /* n > 0xffff */ dst = ngx_sprintf(dst, "&#%uD;", n); } b->last = dst; b->last_buf = buf->last_buf; b->last_in_chain = buf->last_in_chain; b->flush = buf->flush; b->shadow = buf; return out; } static ngx_chain_t * ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx) { size_t len, size; u_char *p, *src, *dst, *table; ngx_buf_t *b; ngx_chain_t *out, *cl, **ll; table = ctx->table; for (src = buf->pos; src < buf->last; src++) { if (table[*src * NGX_UTF_LEN] == '\1') { continue; } goto recode; } out = ngx_alloc_chain_link(pool); if (out == NULL) { return NULL; } out->buf = buf; out->next = NULL; return out; recode: /* * we assume that there are about half of characters to be recoded, * so we preallocate "size / 2 + size / 2 * ctx->length" */ len = src - buf->pos; if (len > 512) { out = ngx_http_charset_get_buf(pool, ctx); if (out == NULL) { return NULL; } b = out->buf; b->temporary = buf->temporary; b->memory = buf->memory; b->mmap = buf->mmap; b->flush = buf->flush; b->pos = buf->pos; b->last = src; out->buf = b; out->next = NULL; size = buf->last - src; size = size / 2 + size / 2 * ctx->length; } else { out = NULL; size = buf->last - src; size = len + size / 2 + size / 2 * ctx->length; src = buf->pos; } cl = ngx_http_charset_get_buffer(pool, ctx, size); if (cl == NULL) { return NULL; } if (out) { out->next = cl; } else { out = cl; } ll = &cl->next; b = cl->buf; dst = b->pos; while (src < buf->last) { p = &table[*src++ * NGX_UTF_LEN]; len = *p++; if ((size_t) (b->end - dst) < len) { b->last = dst; size = buf->last - src; size = len + size / 2 + size / 2 * ctx->length; cl = ngx_http_charset_get_buffer(pool, ctx, size); if (cl == NULL) { return NULL; } *ll = cl; ll = &cl->next; b = cl->buf; dst = b->pos; } while (len) { *dst++ = *p++; len--; } } b->last = dst; b->last_buf = buf->last_buf; b->last_in_chain = buf->last_in_chain; b->flush = buf->flush; b->shadow = buf; return out; } static ngx_chain_t * ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx) { ngx_chain_t *cl; cl = ctx->free_bufs; if (cl) { ctx->free_bufs = cl->next; cl->buf->shadow = NULL; cl->next = NULL; return cl; } cl = ngx_alloc_chain_link(pool); if (cl == NULL) { return NULL; } cl->buf = ngx_calloc_buf(pool); if (cl->buf == NULL) { return NULL; } cl->next = NULL; cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module; return cl; } static ngx_chain_t * ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx, size_t size) { ngx_buf_t *b; ngx_chain_t *cl, **ll; for (ll = &ctx->free_buffers, cl = ctx->free_buffers; cl; ll = &cl->next, cl = cl->next) { b = cl->buf; if ((size_t) (b->end - b->start) >= size) { *ll = cl->next; cl->next = NULL; b->pos = b->start; b->temporary = 1; b->shadow = NULL; return cl; } } cl = ngx_alloc_chain_link(pool); if (cl == NULL) { return NULL; } cl->buf = ngx_create_temp_buf(pool, size); if (cl->buf == NULL) { return NULL; } cl->next = NULL; cl->buf->temporary = 1; cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module; return cl; } static char * ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) { ngx_http_charset_main_conf_t *mcf = conf; char *rv; u_char *p, *dst2src, **pp; ngx_int_t src, dst; ngx_uint_t i, n; ngx_str_t *value; ngx_conf_t pvcf; ngx_http_charset_t *charset; ngx_http_charset_tables_t *table; ngx_http_charset_conf_ctx_t ctx; value = cf->args->elts; src = ngx_http_add_charset(&mcf->charsets, &value[1]); if (src == NGX_ERROR) { return NGX_CONF_ERROR; } dst = ngx_http_add_charset(&mcf->charsets, &value[2]); if (dst == NGX_ERROR) { return NGX_CONF_ERROR; } if (src == dst) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "\"charset_map\" between the same charsets " "\"%V\" and \"%V\"", &value[1], &value[2]); return NGX_CONF_ERROR; } table = mcf->tables.elts; for (i = 0; i < mcf->tables.nelts; i++) { if ((src == table->src && dst == table->dst) || (src == table->dst && dst == table->src)) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "duplicate \"charset_map\" between " "\"%V\" and \"%V\"", &value[1], &value[2]); return NGX_CONF_ERROR; } } table = ngx_array_push(&mcf->tables); if (table == NULL) { return NGX_CONF_ERROR; } table->src = src; table->dst = dst; if (ngx_strcasecmp(value[2].data, (u_char *) "utf-8") == 0) { table->src2dst = ngx_pcalloc(cf->pool, 256 * NGX_UTF_LEN); if (table->src2dst == NULL) { return NGX_CONF_ERROR; } table->dst2src = ngx_pcalloc(cf->pool, 256 * sizeof(void *)); if (table->dst2src == NULL) { return NGX_CONF_ERROR; } dst2src = ngx_pcalloc(cf->pool, 256); if (dst2src == NULL) { return NGX_CONF_ERROR; } pp = (u_char **) &table->dst2src[0]; pp[0] = dst2src; for (i = 0; i < 128; i++) { p = &table->src2dst[i * NGX_UTF_LEN]; p[0] = '\1'; p[1] = (u_char) i; dst2src[i] = (u_char) i; } for (/* void */; i < 256; i++) { p = &table->src2dst[i * NGX_UTF_LEN]; p[0] = '\1'; p[1] = '?'; } } else { table->src2dst = ngx_palloc(cf->pool, 256); if (table->src2dst == NULL) { return NGX_CONF_ERROR; } table->dst2src = ngx_palloc(cf->pool, 256); if (table->dst2src == NULL) { return NGX_CONF_ERROR; } for (i = 0; i < 128; i++) { table->src2dst[i] = (u_char) i; table->dst2src[i] = (u_char) i; } for (/* void */; i < 256; i++) { table->src2dst[i] = '?'; table->dst2src[i] = '?'; } } charset = mcf->charsets.elts; ctx.table = table; ctx.charset = &charset[dst]; ctx.characters = 0; pvcf = *cf; cf->ctx = &ctx; cf->handler = ngx_http_charset_map; cf->handler_conf = conf; rv = ngx_conf_parse(cf, NULL); *cf = pvcf; if (ctx.characters) { n = ctx.charset->length; ctx.charset->length /= ctx.characters; if (((n * 10) / ctx.characters) % 10 > 4) { ctx.charset->length++; } } return rv; } static char * ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf) { u_char *p, *dst2src, **pp; uint32_t n; ngx_int_t src, dst; ngx_str_t *value; ngx_uint_t i; ngx_http_charset_tables_t *table; ngx_http_charset_conf_ctx_t *ctx; if (cf->args->nelts != 2) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number"); return NGX_CONF_ERROR; } value = cf->args->elts; src = ngx_hextoi(value[0].data, value[0].len); if (src == NGX_ERROR || src > 255) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid value \"%V\"", &value[0]); return NGX_CONF_ERROR; } ctx = cf->ctx; table = ctx->table; if (ctx->charset->utf8) { p = &table->src2dst[src * NGX_UTF_LEN]; *p++ = (u_char) (value[1].len / 2); for (i = 0; i < value[1].len; i += 2) { dst = ngx_hextoi(&value[1].data[i], 2); if (dst == NGX_ERROR || dst > 255) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid value \"%V\"", &value[1]); return NGX_CONF_ERROR; } *p++ = (u_char) dst; } i /= 2; ctx->charset->length += i; ctx->characters++; p = &table->src2dst[src * NGX_UTF_LEN] + 1; n = ngx_utf8_decode(&p, i); if (n > 0xffff) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid value \"%V\"", &value[1]); return NGX_CONF_ERROR; } pp = (u_char **) &table->dst2src[0]; dst2src = pp[n >> 8]; if (dst2src == NULL) { dst2src = ngx_pcalloc(cf->pool, 256); if (dst2src == NULL) { return NGX_CONF_ERROR; } pp[n >> 8] = dst2src; } dst2src[n & 0xff] = (u_char) src; } else { dst = ngx_hextoi(value[1].data, value[1].len); if (dst == NGX_ERROR || dst > 255) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid value \"%V\"", &value[1]); return NGX_CONF_ERROR; } table->src2dst[src] = (u_char) dst; table->dst2src[dst] = (u_char) src; } return NGX_CONF_OK; } static char * ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) { char *p = conf; ngx_int_t *cp; ngx_str_t *value, var; ngx_http_charset_main_conf_t *mcf; cp = (ngx_int_t *) (p + cmd->offset); if (*cp != NGX_CONF_UNSET) { return "is duplicate"; } value = cf->args->elts; if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset) && ngx_strcmp(value[1].data, "off") == 0) { *cp = NGX_HTTP_CHARSET_OFF; return NGX_CONF_OK; } if (value[1].data[0] == '$') { var.len = value[1].len - 1; var.data = value[1].data + 1; *cp = ngx_http_get_variable_index(cf, &var); if (*cp == NGX_ERROR) { return NGX_CONF_ERROR; } *cp += NGX_HTTP_CHARSET_VAR; return NGX_CONF_OK; } mcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_charset_filter_module); *cp = ngx_http_add_charset(&mcf->charsets, &value[1]); if (*cp == NGX_ERROR) { return NGX_CONF_ERROR; } return NGX_CONF_OK; } static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name) { ngx_uint_t i; ngx_http_charset_t *c; c = charsets->elts; for (i = 0; i < charsets->nelts; i++) { if (name->len != c[i].name.len) { continue; } if (ngx_strcasecmp(name->data, c[i].name.data) == 0) { break; } } if (i < charsets->nelts) { return i; } c = ngx_array_push(charsets); if (c == NULL) { return NGX_ERROR; } c->tables = NULL; c->name = *name; c->length = 0; if (ngx_strcasecmp(name->data, (u_char *) "utf-8") == 0) { c->utf8 = 1; } else { c->utf8 = 0; } return i; } static void * ngx_http_charset_create_main_conf(ngx_conf_t *cf) { ngx_http_charset_main_conf_t *mcf; mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t)); if (mcf == NULL) { return NULL; } if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t)) != NGX_OK) { return NULL; } if (ngx_array_init(&mcf->tables, cf->pool, 1, sizeof(ngx_http_charset_tables_t)) != NGX_OK) { return NULL; } if (ngx_array_init(&mcf->recodes, cf->pool, 2, sizeof(ngx_http_charset_recode_t)) != NGX_OK) { return NULL; } return mcf; } static void * ngx_http_charset_create_loc_conf(ngx_conf_t *cf) { ngx_http_charset_loc_conf_t *lcf; lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t)); if (lcf == NULL) { return NULL; } /* * set by ngx_pcalloc(): * * lcf->types = { NULL }; * lcf->types_keys = NULL; */ lcf->charset = NGX_CONF_UNSET; lcf->source_charset = NGX_CONF_UNSET; lcf->override_charset = NGX_CONF_UNSET; return lcf; } static char * ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) { ngx_http_charset_loc_conf_t *prev = parent; ngx_http_charset_loc_conf_t *conf = child; ngx_uint_t i; ngx_http_charset_recode_t *recode; ngx_http_charset_main_conf_t *mcf; if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types, &prev->types_keys, &prev->types, ngx_http_charset_default_types) != NGX_OK) { return NGX_CONF_ERROR; } ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0); ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_CHARSET_OFF); ngx_conf_merge_value(conf->source_charset, prev->source_charset, NGX_HTTP_CHARSET_OFF); if (conf->charset == NGX_HTTP_CHARSET_OFF || conf->source_charset == NGX_HTTP_CHARSET_OFF || conf->charset == conf->source_charset) { return NGX_CONF_OK; } if (conf->source_charset >= NGX_HTTP_CHARSET_VAR || conf->charset >= NGX_HTTP_CHARSET_VAR) { return NGX_CONF_OK; } mcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_charset_filter_module); recode = mcf->recodes.elts; for (i = 0; i < mcf->recodes.nelts; i++) { if (conf->source_charset == recode[i].src && conf->charset == recode[i].dst) { return NGX_CONF_OK; } } recode = ngx_array_push(&mcf->recodes); if (recode == NULL) { return NGX_CONF_ERROR; } recode->src = conf->source_charset; recode->dst = conf->charset; return NGX_CONF_OK; } static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf) { u_char **src, **dst; ngx_int_t c; ngx_uint_t i, t; ngx_http_charset_t *charset; ngx_http_charset_recode_t *recode; ngx_http_charset_tables_t *tables; ngx_http_charset_main_conf_t *mcf; mcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_charset_filter_module); recode = mcf->recodes.elts; tables = mcf->tables.elts; charset = mcf->charsets.elts; for (i = 0; i < mcf->recodes.nelts; i++) { c = recode[i].src; for (t = 0; t < mcf->tables.nelts; t++) { if (c == tables[t].src && recode[i].dst == tables[t].dst) { goto next; } if (c == tables[t].dst && recode[i].dst == tables[t].src) { goto next; } } ngx_log_error(NGX_LOG_EMERG, cf->log, 0, "no \"charset_map\" between the charsets \"%V\" and \"%V\"", &charset[c].name, &charset[recode[i].dst].name); return NGX_ERROR; next: continue; } for (t = 0; t < mcf->tables.nelts; t++) { src = charset[tables[t].src].tables; if (src == NULL) { src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts); if (src == NULL) { return NGX_ERROR; } charset[tables[t].src].tables = src; } dst = charset[tables[t].dst].tables; if (dst == NULL) { dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts); if (dst == NULL) { return NGX_ERROR; } charset[tables[t].dst].tables = dst; } src[tables[t].dst] = tables[t].src2dst; dst[tables[t].src] = tables[t].dst2src; } ngx_http_next_header_filter = ngx_http_top_header_filter; ngx_http_top_header_filter = ngx_http_charset_header_filter; ngx_http_next_body_filter = ngx_http_top_body_filter; ngx_http_top_body_filter = ngx_http_charset_body_filter; return NGX_OK; }