50
|
1
|
|
2 /*
|
|
3 * Copyright (C) Igor Sysoev
|
|
4 */
|
|
5
|
|
6
|
|
7 #include <ngx_config.h>
|
|
8 #include <ngx_core.h>
|
|
9 #include <ngx_http.h>
|
|
10
|
|
11
|
206
|
12 #define NGX_HTTP_NO_CHARSET -2
|
244
|
13 #define NGX_HTTP_CHARSET_VAR 0x10000
|
206
|
14
|
|
15 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */
|
|
16 #define NGX_UTF_LEN 4
|
|
17
|
|
18 #define NGX_HTML_ENTITY_LEN (sizeof("") - 1)
|
78
|
19
|
|
20
|
50
|
21 typedef struct {
|
206
|
22 u_char **tables;
|
|
23 ngx_str_t name;
|
72
|
24
|
206
|
25 unsigned length:16;
|
|
26 unsigned utf8:1;
|
50
|
27 } ngx_http_charset_t;
|
|
28
|
|
29
|
|
30 typedef struct {
|
206
|
31 ngx_int_t src;
|
|
32 ngx_int_t dst;
|
78
|
33 } ngx_http_charset_recode_t;
|
|
34
|
|
35
|
|
36 typedef struct {
|
206
|
37 ngx_int_t src;
|
|
38 ngx_int_t dst;
|
|
39 u_char *src2dst;
|
|
40 u_char *dst2src;
|
50
|
41 } ngx_http_charset_tables_t;
|
|
42
|
|
43
|
|
44 typedef struct {
|
206
|
45 ngx_array_t charsets; /* ngx_http_charset_t */
|
|
46 ngx_array_t tables; /* ngx_http_charset_tables_t */
|
|
47 ngx_array_t recodes; /* ngx_http_charset_recode_t */
|
50
|
48 } ngx_http_charset_main_conf_t;
|
|
49
|
|
50
|
|
51 typedef struct {
|
206
|
52 ngx_int_t charset;
|
|
53 ngx_int_t source_charset;
|
|
54 ngx_flag_t override_charset;
|
394
|
55
|
|
56 ngx_hash_t types;
|
|
57 ngx_array_t *types_keys;
|
50
|
58 } ngx_http_charset_loc_conf_t;
|
|
59
|
|
60
|
|
61 typedef struct {
|
206
|
62 u_char *table;
|
|
63 ngx_int_t charset;
|
|
64
|
|
65 ngx_chain_t *busy;
|
|
66 ngx_chain_t *free_bufs;
|
|
67 ngx_chain_t *free_buffers;
|
|
68
|
|
69 size_t saved_len;
|
|
70 u_char saved[NGX_UTF_LEN];
|
|
71
|
|
72 unsigned length:16;
|
|
73 unsigned from_utf8:1;
|
|
74 unsigned to_utf8:1;
|
50
|
75 } ngx_http_charset_ctx_t;
|
|
76
|
|
77
|
206
|
78 typedef struct {
|
|
79 ngx_http_charset_tables_t *table;
|
|
80 ngx_http_charset_t *charset;
|
|
81 ngx_uint_t characters;
|
|
82 } ngx_http_charset_conf_ctx_t;
|
|
83
|
50
|
84
|
206
|
85 static ngx_int_t ngx_http_charset_get_charset(ngx_http_charset_t *charsets,
|
244
|
86 ngx_uint_t n, ngx_str_t *charset);
|
206
|
87 static ngx_int_t ngx_http_charset_set_charset(ngx_http_request_t *r,
|
|
88 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset);
|
|
89 static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);
|
|
90 static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool,
|
|
91 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
|
|
92 static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool,
|
|
93 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
|
|
94
|
|
95 static ngx_chain_t *ngx_http_charset_get_buf(ngx_pool_t *pool,
|
|
96 ngx_http_charset_ctx_t *ctx);
|
|
97 static ngx_chain_t *ngx_http_charset_get_buffer(ngx_pool_t *pool,
|
|
98 ngx_http_charset_ctx_t *ctx, size_t size);
|
|
99
|
|
100 static char *ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd,
|
50
|
101 void *conf);
|
206
|
102 static char *ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy,
|
|
103 void *conf);
|
50
|
104
|
|
105 static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd,
|
|
106 void *conf);
|
|
107 static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name);
|
|
108
|
|
109 static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf);
|
|
110 static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf);
|
|
111 static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf,
|
|
112 void *parent, void *child);
|
78
|
113 static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf);
|
50
|
114
|
|
115
|
394
|
116 ngx_str_t ngx_http_charset_default_types[] = {
|
|
117 ngx_string("text/html"),
|
|
118 ngx_string("text/css"),
|
|
119 ngx_string("text/xml"),
|
|
120 ngx_string("text/plain"),
|
|
121 ngx_string("text/vnd.wap.wml"),
|
|
122 ngx_string("application/x-javascript"),
|
|
123 ngx_string("application/rss+xml"),
|
|
124 ngx_null_string
|
|
125 };
|
|
126
|
|
127
|
50
|
128 static ngx_command_t ngx_http_charset_filter_commands[] = {
|
|
129
|
78
|
130 { ngx_string("charset"),
|
108
|
131 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
132 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
|
78
|
133 ngx_http_set_charset_slot,
|
|
134 NGX_HTTP_LOC_CONF_OFFSET,
|
|
135 offsetof(ngx_http_charset_loc_conf_t, charset),
|
|
136 NULL },
|
|
137
|
|
138 { ngx_string("source_charset"),
|
108
|
139 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
140 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
|
78
|
141 ngx_http_set_charset_slot,
|
|
142 NGX_HTTP_LOC_CONF_OFFSET,
|
|
143 offsetof(ngx_http_charset_loc_conf_t, source_charset),
|
|
144 NULL },
|
|
145
|
184
|
146 { ngx_string("override_charset"),
|
|
147 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
148 |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG,
|
|
149 ngx_conf_set_flag_slot,
|
|
150 NGX_HTTP_LOC_CONF_OFFSET,
|
|
151 offsetof(ngx_http_charset_loc_conf_t, override_charset),
|
|
152 NULL },
|
|
153
|
394
|
154 { ngx_string("charset_types"),
|
|
155 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE,
|
|
156 ngx_http_types_slot,
|
|
157 NGX_HTTP_LOC_CONF_OFFSET,
|
|
158 offsetof(ngx_http_charset_loc_conf_t, types_keys),
|
|
159 &ngx_http_charset_default_types[0] },
|
|
160
|
50
|
161 { ngx_string("charset_map"),
|
|
162 NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2,
|
206
|
163 ngx_http_charset_map_block,
|
50
|
164 NGX_HTTP_MAIN_CONF_OFFSET,
|
|
165 0,
|
|
166 NULL },
|
|
167
|
|
168 ngx_null_command
|
|
169 };
|
|
170
|
|
171
|
|
172 static ngx_http_module_t ngx_http_charset_filter_module_ctx = {
|
58
|
173 NULL, /* preconfiguration */
|
78
|
174 ngx_http_charset_postconfiguration, /* postconfiguration */
|
50
|
175
|
|
176 ngx_http_charset_create_main_conf, /* create main configuration */
|
78
|
177 NULL, /* init main configuration */
|
50
|
178
|
|
179 NULL, /* create server configuration */
|
|
180 NULL, /* merge server configuration */
|
|
181
|
|
182 ngx_http_charset_create_loc_conf, /* create location configuration */
|
|
183 ngx_http_charset_merge_loc_conf /* merge location configuration */
|
|
184 };
|
|
185
|
|
186
|
|
187 ngx_module_t ngx_http_charset_filter_module = {
|
58
|
188 NGX_MODULE_V1,
|
50
|
189 &ngx_http_charset_filter_module_ctx, /* module context */
|
|
190 ngx_http_charset_filter_commands, /* module directives */
|
|
191 NGX_HTTP_MODULE, /* module type */
|
90
|
192 NULL, /* init master */
|
230
|
193 NULL, /* init module */
|
90
|
194 NULL, /* init process */
|
|
195 NULL, /* init thread */
|
|
196 NULL, /* exit thread */
|
|
197 NULL, /* exit process */
|
|
198 NULL, /* exit master */
|
|
199 NGX_MODULE_V1_PADDING
|
50
|
200 };
|
|
201
|
|
202
|
|
203 static ngx_http_output_header_filter_pt ngx_http_next_header_filter;
|
|
204 static ngx_http_output_body_filter_pt ngx_http_next_body_filter;
|
|
205
|
|
206
|
|
207 static ngx_int_t
|
|
208 ngx_http_charset_header_filter(ngx_http_request_t *r)
|
|
209 {
|
184
|
210 ngx_int_t charset, source_charset;
|
320
|
211 ngx_str_t *mc, *from, *to, s;
|
206
|
212 ngx_uint_t n;
|
50
|
213 ngx_http_charset_t *charsets;
|
|
214 ngx_http_charset_ctx_t *ctx;
|
244
|
215 ngx_http_variable_value_t *vv;
|
202
|
216 ngx_http_charset_loc_conf_t *lcf, *mlcf;
|
50
|
217 ngx_http_charset_main_conf_t *mcf;
|
|
218
|
|
219 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
|
184
|
220
|
206
|
221 charsets = mcf->charsets.elts;
|
|
222 n = mcf->charsets.nelts;
|
184
|
223
|
206
|
224 /* destination charset */
|
202
|
225
|
|
226 if (r == r->main) {
|
206
|
227
|
382
|
228 if (r->headers_out.content_encoding
|
|
229 && r->headers_out.content_encoding->value.len)
|
|
230 {
|
|
231 return ngx_http_next_header_filter(r);
|
|
232 }
|
|
233
|
202
|
234 if (r->headers_out.content_type.len == 0) {
|
|
235 return ngx_http_next_header_filter(r);
|
|
236 }
|
50
|
237
|
206
|
238 if (r->headers_out.override_charset
|
|
239 && r->headers_out.override_charset->len)
|
202
|
240 {
|
206
|
241 charset = ngx_http_charset_get_charset(charsets, n,
|
244
|
242 r->headers_out.override_charset);
|
206
|
243
|
|
244 if (charset == NGX_HTTP_NO_CHARSET) {
|
210
|
245 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
|
206
|
246 "unknown charset \"%V\" to override",
|
208
|
247 r->headers_out.override_charset);
|
206
|
248
|
|
249 return ngx_http_next_header_filter(r);
|
|
250 }
|
|
251
|
|
252 } else {
|
|
253 mlcf = ngx_http_get_module_loc_conf(r,
|
|
254 ngx_http_charset_filter_module);
|
|
255 charset = mlcf->charset;
|
|
256
|
|
257 if (charset == NGX_HTTP_NO_CHARSET) {
|
|
258 return ngx_http_next_header_filter(r);
|
|
259 }
|
|
260
|
|
261 if (r->headers_out.charset.len) {
|
|
262 if (mlcf->override_charset == 0) {
|
|
263 return ngx_http_next_header_filter(r);
|
|
264 }
|
|
265
|
|
266 } else {
|
394
|
267 if (ngx_http_test_content_type(r, &mlcf->types) == NULL) {
|
206
|
268 return ngx_http_next_header_filter(r);
|
|
269 }
|
|
270 }
|
244
|
271
|
|
272 if (charset >= NGX_HTTP_CHARSET_VAR) {
|
|
273 vv = ngx_http_get_indexed_variable(r,
|
|
274 charset - NGX_HTTP_CHARSET_VAR);
|
|
275
|
250
|
276 if (vv == NULL || vv->not_found) {
|
|
277 return NGX_ERROR;
|
|
278 }
|
|
279
|
320
|
280 s.len = vv->len;
|
|
281 s.data = vv->data;
|
|
282
|
|
283 charset = ngx_http_charset_get_charset(charsets, n, &s);
|
244
|
284 }
|
202
|
285 }
|
|
286
|
|
287 } else {
|
206
|
288 ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);
|
|
289
|
|
290 if (ctx == NULL) {
|
|
291
|
|
292 mc = &r->main->headers_out.charset;
|
|
293
|
|
294 if (mc->len == 0) {
|
|
295 return ngx_http_next_header_filter(r);
|
|
296 }
|
|
297
|
|
298 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
|
|
299 if (ctx == NULL) {
|
|
300 return NGX_ERROR;
|
|
301 }
|
|
302
|
|
303 ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module);
|
202
|
304
|
244
|
305 charset = ngx_http_charset_get_charset(charsets, n, mc);
|
206
|
306
|
|
307 ctx->charset = charset;
|
|
308
|
210
|
309 } else {
|
|
310 charset = ctx->charset;
|
202
|
311 }
|
50
|
312 }
|
|
313
|
206
|
314 /* source charset */
|
184
|
315
|
206
|
316 if (r->headers_out.charset.len == 0) {
|
|
317 lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
|
184
|
318
|
244
|
319 source_charset = lcf->source_charset;
|
|
320
|
|
321 if (source_charset >= NGX_HTTP_CHARSET_VAR) {
|
|
322 vv = ngx_http_get_indexed_variable(r,
|
|
323 source_charset - NGX_HTTP_CHARSET_VAR);
|
|
324
|
250
|
325 if (vv == NULL || vv->not_found) {
|
|
326 return NGX_ERROR;
|
|
327 }
|
|
328
|
320
|
329 s.len = vv->len;
|
|
330 s.data = vv->data;
|
|
331
|
|
332 source_charset = ngx_http_charset_get_charset(charsets, n, &s);
|
244
|
333 }
|
|
334
|
210
|
335 if (charset != NGX_HTTP_NO_CHARSET) {
|
|
336 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset,
|
244
|
337 source_charset);
|
210
|
338 }
|
|
339
|
244
|
340 if (source_charset == NGX_CONF_UNSET) {
|
210
|
341 return ngx_http_next_header_filter(r);
|
|
342 }
|
|
343
|
244
|
344 from = &charsets[source_charset].name;
|
212
|
345 to = &r->main->headers_out.charset;
|
210
|
346
|
212
|
347 goto no_charset_map;
|
206
|
348 }
|
184
|
349
|
206
|
350 source_charset = ngx_http_charset_get_charset(charsets, n,
|
244
|
351 &r->headers_out.charset);
|
184
|
352
|
210
|
353 if (charset == NGX_HTTP_NO_CHARSET
|
|
354 || source_charset == NGX_HTTP_NO_CHARSET)
|
|
355 {
|
|
356 if (charset != source_charset
|
|
357 || ngx_strcasecmp(r->main->headers_out.charset.data,
|
|
358 r->headers_out.charset.data)
|
|
359 != 0)
|
|
360 {
|
212
|
361 from = &r->headers_out.charset;
|
|
362 to = (charset == NGX_HTTP_NO_CHARSET) ?
|
|
363 &r->main->headers_out.charset:
|
|
364 &charsets[charset].name;
|
|
365
|
|
366 goto no_charset_map;
|
210
|
367 }
|
184
|
368
|
206
|
369 return ngx_http_next_header_filter(r);
|
|
370 }
|
184
|
371
|
206
|
372 if (source_charset != charset
|
|
373 && (charsets[source_charset].tables == NULL
|
|
374 || charsets[source_charset].tables[charset] == NULL))
|
|
375 {
|
212
|
376 from = &charsets[source_charset].name;
|
|
377 to = &charsets[charset].name;
|
184
|
378
|
212
|
379 goto no_charset_map;
|
50
|
380 }
|
|
381
|
206
|
382 r->headers_out.content_type.len = r->headers_out.content_type_len;
|
|
383
|
|
384 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset,
|
|
385 source_charset);
|
212
|
386
|
|
387 no_charset_map:
|
|
388
|
|
389 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
|
342
|
390 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
|
|
391 from, to);
|
212
|
392
|
|
393 return ngx_http_next_header_filter(r);
|
206
|
394 }
|
|
395
|
|
396
|
|
397 static ngx_int_t
|
|
398 ngx_http_charset_get_charset(ngx_http_charset_t *charsets, ngx_uint_t n,
|
244
|
399 ngx_str_t *charset)
|
206
|
400 {
|
|
401 ngx_uint_t i;
|
|
402
|
|
403 for (i = 0; i < n; i++) {
|
320
|
404 if (charsets[i].name.len != charset->len) {
|
244
|
405 continue;
|
|
406 }
|
|
407
|
320
|
408 if (ngx_strncasecmp(charsets[i].name.data, charset->data, charset->len)
|
|
409 == 0)
|
|
410 {
|
206
|
411 return i;
|
|
412 }
|
|
413 }
|
|
414
|
|
415 return NGX_HTTP_NO_CHARSET;
|
|
416 }
|
|
417
|
|
418
|
|
419 static ngx_int_t
|
|
420 ngx_http_charset_set_charset(ngx_http_request_t *r,
|
|
421 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset)
|
|
422 {
|
|
423 ngx_http_charset_ctx_t *ctx;
|
|
424
|
50
|
425 if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
|
|
426 || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
|
|
427 {
|
|
428 /*
|
184
|
429 * do not set charset for the redirect because NN 4.x
|
|
430 * use this charset instead of the next page charset
|
50
|
431 */
|
|
432
|
|
433 r->headers_out.charset.len = 0;
|
206
|
434
|
50
|
435 return ngx_http_next_header_filter(r);
|
|
436 }
|
|
437
|
184
|
438 r->headers_out.charset = charsets[charset].name;
|
|
439 r->utf8 = charsets[charset].utf8;
|
|
440
|
|
441 if (source_charset == NGX_CONF_UNSET || source_charset == charset) {
|
50
|
442 return ngx_http_next_header_filter(r);
|
|
443 }
|
|
444
|
|
445 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
|
|
446 if (ctx == NULL) {
|
|
447 return NGX_ERROR;
|
|
448 }
|
|
449
|
|
450 ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);
|
|
451
|
184
|
452 ctx->table = charsets[source_charset].tables[charset];
|
|
453 ctx->charset = charset;
|
206
|
454 ctx->length = charsets[charset].length;
|
|
455 ctx->from_utf8 = charsets[source_charset].utf8;
|
|
456 ctx->to_utf8 = charsets[charset].utf8;
|
|
457
|
212
|
458 r->filter_need_in_memory = 1;
|
|
459
|
206
|
460 if ((ctx->to_utf8 || ctx->from_utf8) && r == r->main) {
|
|
461 ngx_http_clear_content_length(r);
|
212
|
462
|
|
463 } else {
|
|
464 r->filter_need_temporary = 1;
|
206
|
465 }
|
50
|
466
|
|
467 return ngx_http_next_header_filter(r);
|
|
468 }
|
|
469
|
|
470
|
|
471 static ngx_int_t
|
|
472 ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in)
|
|
473 {
|
206
|
474 ngx_int_t rc;
|
|
475 ngx_buf_t *b;
|
|
476 ngx_chain_t *cl, *out, **ll;
|
184
|
477 ngx_http_charset_ctx_t *ctx;
|
50
|
478
|
|
479 ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module);
|
|
480
|
184
|
481 if (ctx == NULL || ctx->table == NULL) {
|
50
|
482 return ngx_http_next_body_filter(r, in);
|
|
483 }
|
|
484
|
206
|
485 if ((ctx->to_utf8 || ctx->from_utf8) || ctx->busy) {
|
|
486
|
|
487 out = NULL;
|
|
488 ll = &out;
|
|
489
|
|
490 for (cl = in; cl; cl = cl->next) {
|
|
491 b = cl->buf;
|
|
492
|
|
493 if (ngx_buf_size(b) == 0) {
|
214
|
494
|
|
495 *ll = ngx_alloc_chain_link(r->pool);
|
|
496 if (*ll == NULL) {
|
|
497 return NGX_ERROR;
|
|
498 }
|
|
499
|
|
500 (*ll)->buf = b;
|
|
501 (*ll)->next = NULL;
|
|
502
|
|
503 ll = &(*ll)->next;
|
|
504
|
206
|
505 continue;
|
|
506 }
|
|
507
|
|
508 if (ctx->to_utf8) {
|
|
509 *ll = ngx_http_charset_recode_to_utf8(r->pool, b, ctx);
|
|
510
|
|
511 } else {
|
|
512 *ll = ngx_http_charset_recode_from_utf8(r->pool, b, ctx);
|
|
513 }
|
|
514
|
|
515 if (*ll == NULL) {
|
|
516 return NGX_ERROR;
|
|
517 }
|
|
518
|
|
519 while (*ll) {
|
|
520 ll = &(*ll)->next;
|
|
521 }
|
|
522 }
|
|
523
|
|
524 rc = ngx_http_next_body_filter(r, out);
|
|
525
|
|
526 if (out) {
|
|
527 if (ctx->busy == NULL) {
|
|
528 ctx->busy = out;
|
|
529
|
|
530 } else {
|
|
531 for (cl = ctx->busy; cl->next; cl = cl->next) { /* void */ }
|
|
532 cl->next = out;
|
|
533 }
|
|
534 }
|
|
535
|
|
536 while (ctx->busy) {
|
|
537
|
|
538 cl = ctx->busy;
|
|
539 b = cl->buf;
|
|
540
|
|
541 if (ngx_buf_size(b) != 0) {
|
|
542 break;
|
|
543 }
|
|
544
|
|
545 #if (NGX_HAVE_WRITE_ZEROCOPY)
|
|
546 if (b->zerocopy_busy) {
|
|
547 break;
|
|
548 }
|
|
549 #endif
|
|
550
|
|
551 ctx->busy = cl->next;
|
|
552
|
|
553 if (b->tag != (ngx_buf_tag_t) &ngx_http_charset_filter_module) {
|
|
554 continue;
|
|
555 }
|
|
556
|
|
557 if (b->shadow) {
|
|
558 b->shadow->pos = b->shadow->last;
|
|
559 }
|
|
560
|
|
561 if (b->pos) {
|
|
562 cl->next = ctx->free_buffers;
|
|
563 ctx->free_buffers = cl;
|
|
564 continue;
|
|
565 }
|
|
566
|
|
567 cl->next = ctx->free_bufs;
|
|
568 ctx->free_bufs = cl;
|
|
569 }
|
|
570
|
|
571 return rc;
|
|
572 }
|
|
573
|
50
|
574 for (cl = in; cl; cl = cl->next) {
|
184
|
575 (void) ngx_http_charset_recode(cl->buf, ctx->table);
|
50
|
576 }
|
|
577
|
|
578 return ngx_http_next_body_filter(r, in);
|
|
579 }
|
|
580
|
|
581
|
|
582 static ngx_uint_t
|
184
|
583 ngx_http_charset_recode(ngx_buf_t *b, u_char *table)
|
50
|
584 {
|
370
|
585 u_char *p, *last;
|
184
|
586
|
370
|
587 last = b->last;
|
50
|
588
|
370
|
589 for (p = b->pos; p < last; p++) {
|
|
590
|
|
591 if (*p != table[*p]) {
|
|
592 goto recode;
|
50
|
593 }
|
|
594 }
|
|
595
|
184
|
596 return 0;
|
370
|
597
|
|
598 recode:
|
|
599
|
|
600 do {
|
|
601 if (*p != table[*p]) {
|
|
602 *p = table[*p];
|
|
603 }
|
|
604
|
|
605 p++;
|
|
606
|
|
607 } while (p < last);
|
|
608
|
|
609 b->in_file = 0;
|
|
610
|
|
611 return 1;
|
50
|
612 }
|
|
613
|
|
614
|
206
|
615 static ngx_chain_t *
|
|
616 ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
|
|
617 ngx_http_charset_ctx_t *ctx)
|
|
618 {
|
|
619 size_t len, size;
|
|
620 u_char c, *p, *src, *dst, *saved, **table;
|
|
621 uint32_t n;
|
|
622 ngx_buf_t *b;
|
|
623 ngx_uint_t i;
|
|
624 ngx_chain_t *out, *cl, **ll;
|
|
625
|
|
626 src = buf->pos;
|
|
627
|
|
628 if (ctx->saved_len == 0) {
|
|
629
|
|
630 for ( /* void */ ; src < buf->last; src++) {
|
|
631
|
|
632 if (*src < 0x80) {
|
|
633 continue;
|
|
634 }
|
|
635
|
|
636 len = src - buf->pos;
|
|
637
|
|
638 if (len > 512) {
|
|
639 out = ngx_http_charset_get_buf(pool, ctx);
|
|
640 if (out == NULL) {
|
|
641 return NULL;
|
|
642 }
|
|
643
|
|
644 b = out->buf;
|
|
645
|
|
646 b->temporary = buf->temporary;
|
|
647 b->memory = buf->memory;
|
|
648 b->mmap = buf->mmap;
|
|
649 b->flush = buf->flush;
|
|
650
|
|
651 b->pos = buf->pos;
|
|
652 b->last = src;
|
|
653
|
|
654 out->buf = b;
|
|
655 out->next = NULL;
|
|
656
|
|
657 size = buf->last - src;
|
|
658
|
|
659 saved = src;
|
390
|
660 n = ngx_utf8_decode(&saved, size);
|
206
|
661
|
|
662 if (n == 0xfffffffe) {
|
|
663 /* incomplete UTF-8 symbol */
|
|
664
|
|
665 ngx_memcpy(ctx->saved, src, size);
|
|
666 ctx->saved_len = size;
|
|
667
|
|
668 b->shadow = buf;
|
|
669
|
|
670 return out;
|
|
671 }
|
|
672
|
|
673 } else {
|
|
674 out = NULL;
|
|
675 size = len + buf->last - src;
|
|
676 src = buf->pos;
|
|
677 }
|
|
678
|
|
679 if (size < NGX_HTML_ENTITY_LEN) {
|
|
680 size += NGX_HTML_ENTITY_LEN;
|
|
681 }
|
|
682
|
|
683 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
684 if (cl == NULL) {
|
|
685 return NULL;
|
|
686 }
|
|
687
|
|
688 if (out) {
|
|
689 out->next = cl;
|
|
690
|
|
691 } else {
|
|
692 out = cl;
|
|
693 }
|
|
694
|
|
695 b = cl->buf;
|
|
696 dst = b->pos;
|
|
697
|
|
698 goto recode;
|
|
699 }
|
|
700
|
|
701 out = ngx_alloc_chain_link(pool);
|
|
702 if (out == NULL) {
|
|
703 return NULL;
|
|
704 }
|
|
705
|
|
706 out->buf = buf;
|
|
707 out->next = NULL;
|
|
708
|
|
709 return out;
|
|
710 }
|
|
711
|
|
712 /* process incomplete UTF sequence from previous buffer */
|
|
713
|
|
714 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
715 "http charset utf saved: %z", ctx->saved_len);
|
|
716
|
|
717 p = src;
|
|
718
|
|
719 for (i = ctx->saved_len; i < NGX_UTF_LEN; i++) {
|
|
720 ctx->saved[i] = *p++;
|
|
721
|
|
722 if (p == buf->last) {
|
|
723 break;
|
|
724 }
|
|
725 }
|
|
726
|
|
727 saved = ctx->saved;
|
390
|
728 n = ngx_utf8_decode(&saved, i);
|
206
|
729
|
|
730 c = '\0';
|
|
731
|
|
732 if (n < 0x10000) {
|
|
733 table = (u_char **) ctx->table;
|
|
734 p = table[n >> 8];
|
|
735
|
|
736 if (p) {
|
|
737 c = p[n & 0xff];
|
|
738 }
|
|
739
|
|
740 } else if (n == 0xfffffffe) {
|
|
741
|
|
742 /* incomplete UTF-8 symbol */
|
|
743
|
|
744 if (i < NGX_UTF_LEN) {
|
|
745 out = ngx_http_charset_get_buf(pool, ctx);
|
|
746 if (out == NULL) {
|
|
747 return NULL;
|
|
748 }
|
|
749
|
|
750 b = out->buf;
|
|
751
|
|
752 b->pos = buf->pos;
|
|
753 b->last = buf->last;
|
|
754 b->sync = 1;
|
|
755 b->shadow = buf;
|
|
756
|
|
757 ngx_memcpy(&ctx->saved[ctx->saved_len], src, i);
|
|
758 ctx->saved_len += i;
|
|
759
|
|
760 return out;
|
|
761 }
|
|
762 }
|
|
763
|
|
764 size = buf->last - buf->pos;
|
|
765
|
|
766 if (size < NGX_HTML_ENTITY_LEN) {
|
|
767 size += NGX_HTML_ENTITY_LEN;
|
|
768 }
|
|
769
|
|
770 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
771 if (cl == NULL) {
|
|
772 return NULL;
|
|
773 }
|
|
774
|
|
775 out = cl;
|
|
776
|
|
777 b = cl->buf;
|
|
778 dst = b->pos;
|
|
779
|
|
780 if (c) {
|
|
781 *dst++ = c;
|
|
782
|
|
783 } else if (n == 0xfffffffe) {
|
|
784 *dst++ = '?';
|
|
785
|
|
786 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
787 "http charset invalid utf 0");
|
|
788
|
|
789 saved = &ctx->saved[NGX_UTF_LEN];
|
|
790
|
|
791 } else if (n > 0x10ffff) {
|
|
792 *dst++ = '?';
|
|
793
|
|
794 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
795 "http charset invalid utf 1");
|
|
796
|
|
797 } else {
|
|
798 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
799 }
|
|
800
|
|
801 src += (saved - ctx->saved) - ctx->saved_len;
|
|
802 ctx->saved_len = 0;
|
|
803
|
|
804 recode:
|
|
805
|
|
806 ll = &cl->next;
|
|
807
|
|
808 table = (u_char **) ctx->table;
|
|
809
|
|
810 while (src < buf->last) {
|
|
811
|
|
812 if ((size_t) (b->end - dst) < NGX_HTML_ENTITY_LEN) {
|
|
813 b->last = dst;
|
|
814
|
|
815 size = buf->last - src + NGX_HTML_ENTITY_LEN;
|
|
816
|
|
817 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
818 if (cl == NULL) {
|
|
819 return NULL;
|
|
820 }
|
|
821
|
|
822 *ll = cl;
|
|
823 ll = &cl->next;
|
|
824
|
|
825 b = cl->buf;
|
|
826 dst = b->pos;
|
|
827 }
|
|
828
|
|
829 if (*src < 0x80) {
|
|
830 *dst++ = *src++;
|
|
831 continue;
|
|
832 }
|
|
833
|
|
834 len = buf->last - src;
|
|
835
|
390
|
836 n = ngx_utf8_decode(&src, len);
|
206
|
837
|
|
838 if (n < 0x10000) {
|
|
839
|
|
840 p = table[n >> 8];
|
|
841
|
|
842 if (p) {
|
|
843 c = p[n & 0xff];
|
|
844
|
|
845 if (c) {
|
|
846 *dst++ = c;
|
|
847 continue;
|
|
848 }
|
|
849 }
|
|
850
|
|
851 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
852
|
|
853 continue;
|
|
854 }
|
|
855
|
|
856 if (n == 0xfffffffe) {
|
|
857 /* incomplete UTF-8 symbol */
|
|
858
|
|
859 ngx_memcpy(ctx->saved, src, len);
|
|
860 ctx->saved_len = len;
|
|
861
|
|
862 if (b->pos == dst) {
|
|
863 b->sync = 1;
|
|
864 b->temporary = 0;
|
|
865 }
|
|
866
|
|
867 break;
|
|
868 }
|
|
869
|
|
870 if (n > 0x10ffff) {
|
|
871 *dst++ = '?';
|
|
872
|
|
873 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
874 "http charset invalid utf 2");
|
|
875
|
|
876 continue;
|
|
877 }
|
|
878
|
|
879 /* n > 0xffff */
|
|
880
|
|
881 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
882 }
|
|
883
|
|
884 b->last = dst;
|
|
885
|
|
886 b->last_buf = buf->last_buf;
|
|
887 b->last_in_chain = buf->last_in_chain;
|
|
888 b->flush = buf->flush;
|
|
889
|
|
890 b->shadow = buf;
|
|
891
|
|
892 return out;
|
|
893 }
|
|
894
|
|
895
|
|
896 static ngx_chain_t *
|
|
897 ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
|
|
898 ngx_http_charset_ctx_t *ctx)
|
|
899 {
|
|
900 size_t len, size;
|
|
901 u_char *p, *src, *dst, *table;
|
|
902 ngx_buf_t *b;
|
|
903 ngx_chain_t *out, *cl, **ll;
|
|
904
|
|
905 table = ctx->table;
|
|
906
|
|
907 for (src = buf->pos; src < buf->last; src++) {
|
|
908 if (table[*src * NGX_UTF_LEN] == '\1') {
|
|
909 continue;
|
|
910 }
|
|
911
|
|
912 goto recode;
|
|
913 }
|
|
914
|
|
915 out = ngx_alloc_chain_link(pool);
|
|
916 if (out == NULL) {
|
|
917 return NULL;
|
|
918 }
|
|
919
|
|
920 out->buf = buf;
|
|
921 out->next = NULL;
|
|
922
|
|
923 return out;
|
|
924
|
|
925 recode:
|
|
926
|
|
927 /*
|
|
928 * we assume that there are about half of characters to be recoded,
|
|
929 * so we preallocate "size / 2 + size / 2 * ctx->length"
|
|
930 */
|
|
931
|
|
932 len = src - buf->pos;
|
|
933
|
|
934 if (len > 512) {
|
|
935 out = ngx_http_charset_get_buf(pool, ctx);
|
|
936 if (out == NULL) {
|
|
937 return NULL;
|
|
938 }
|
|
939
|
|
940 b = out->buf;
|
|
941
|
|
942 b->temporary = buf->temporary;
|
|
943 b->memory = buf->memory;
|
|
944 b->mmap = buf->mmap;
|
|
945 b->flush = buf->flush;
|
|
946
|
|
947 b->pos = buf->pos;
|
|
948 b->last = src;
|
|
949
|
|
950 out->buf = b;
|
|
951 out->next = NULL;
|
|
952
|
|
953 size = buf->last - src;
|
|
954 size = size / 2 + size / 2 * ctx->length;
|
|
955
|
|
956 } else {
|
|
957 out = NULL;
|
|
958
|
|
959 size = buf->last - src;
|
|
960 size = len + size / 2 + size / 2 * ctx->length;
|
|
961
|
|
962 src = buf->pos;
|
|
963 }
|
|
964
|
|
965 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
966 if (cl == NULL) {
|
|
967 return NULL;
|
|
968 }
|
|
969
|
|
970 if (out) {
|
|
971 out->next = cl;
|
|
972
|
|
973 } else {
|
|
974 out = cl;
|
|
975 }
|
|
976
|
|
977 ll = &cl->next;
|
|
978
|
|
979 b = cl->buf;
|
|
980 dst = b->pos;
|
|
981
|
|
982 while (src < buf->last) {
|
|
983
|
|
984 p = &table[*src++ * NGX_UTF_LEN];
|
|
985 len = *p++;
|
|
986
|
|
987 if ((size_t) (b->end - dst) < len) {
|
|
988 b->last = dst;
|
|
989
|
|
990 size = buf->last - src;
|
|
991 size = len + size / 2 + size / 2 * ctx->length;
|
|
992
|
|
993 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
994 if (cl == NULL) {
|
|
995 return NULL;
|
|
996 }
|
|
997
|
|
998 *ll = cl;
|
|
999 ll = &cl->next;
|
|
1000
|
|
1001 b = cl->buf;
|
|
1002 dst = b->pos;
|
|
1003 }
|
|
1004
|
|
1005 while (len) {
|
|
1006 *dst++ = *p++;
|
|
1007 len--;
|
|
1008 }
|
|
1009 }
|
|
1010
|
|
1011 b->last = dst;
|
|
1012
|
|
1013 b->last_buf = buf->last_buf;
|
|
1014 b->last_in_chain = buf->last_in_chain;
|
|
1015 b->flush = buf->flush;
|
|
1016
|
|
1017 b->shadow = buf;
|
|
1018
|
|
1019 return out;
|
|
1020 }
|
|
1021
|
|
1022
|
|
1023 static ngx_chain_t *
|
|
1024 ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx)
|
|
1025 {
|
|
1026 ngx_chain_t *cl;
|
|
1027
|
|
1028 cl = ctx->free_bufs;
|
|
1029
|
|
1030 if (cl) {
|
|
1031 ctx->free_bufs = cl->next;
|
|
1032
|
|
1033 cl->buf->shadow = NULL;
|
|
1034 cl->next = NULL;
|
|
1035
|
|
1036 return cl;
|
|
1037 }
|
|
1038
|
|
1039 cl = ngx_alloc_chain_link(pool);
|
|
1040 if (cl == NULL) {
|
|
1041 return NULL;
|
|
1042 }
|
|
1043
|
|
1044 cl->buf = ngx_calloc_buf(pool);
|
|
1045 if (cl->buf == NULL) {
|
|
1046 return NULL;
|
|
1047 }
|
|
1048
|
|
1049 cl->next = NULL;
|
|
1050
|
|
1051 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
|
|
1052
|
|
1053 return cl;
|
|
1054 }
|
|
1055
|
|
1056
|
|
1057 static ngx_chain_t *
|
|
1058 ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx,
|
|
1059 size_t size)
|
|
1060 {
|
|
1061 ngx_buf_t *b;
|
|
1062 ngx_chain_t *cl, **ll;
|
|
1063
|
|
1064 for (ll = &ctx->free_buffers, cl = ctx->free_buffers;
|
|
1065 cl;
|
|
1066 ll = &cl->next, cl = cl->next)
|
|
1067 {
|
|
1068 b = cl->buf;
|
|
1069
|
|
1070 if ((size_t) (b->end - b->start) >= size) {
|
|
1071 *ll = cl->next;
|
|
1072 cl->next = NULL;
|
|
1073
|
|
1074 b->pos = b->start;
|
|
1075 b->temporary = 1;
|
|
1076 b->shadow = NULL;
|
|
1077
|
|
1078 return cl;
|
|
1079 }
|
|
1080 }
|
|
1081
|
|
1082 cl = ngx_alloc_chain_link(pool);
|
|
1083 if (cl == NULL) {
|
|
1084 return NULL;
|
|
1085 }
|
|
1086
|
|
1087 cl->buf = ngx_create_temp_buf(pool, size);
|
|
1088 if (cl->buf == NULL) {
|
|
1089 return NULL;
|
|
1090 }
|
|
1091
|
|
1092 cl->next = NULL;
|
|
1093
|
|
1094 cl->buf->temporary = 1;
|
|
1095 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
|
|
1096
|
|
1097 return cl;
|
|
1098 }
|
|
1099
|
|
1100
|
50
|
1101 static char *
|
206
|
1102 ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
|
50
|
1103 {
|
|
1104 ngx_http_charset_main_conf_t *mcf = conf;
|
|
1105
|
206
|
1106 char *rv;
|
|
1107 u_char *p, *dst2src, **pp;
|
|
1108 ngx_int_t src, dst;
|
|
1109 ngx_uint_t i, n;
|
|
1110 ngx_str_t *value;
|
|
1111 ngx_conf_t pvcf;
|
|
1112 ngx_http_charset_t *charset;
|
|
1113 ngx_http_charset_tables_t *table;
|
|
1114 ngx_http_charset_conf_ctx_t ctx;
|
50
|
1115
|
|
1116 value = cf->args->elts;
|
|
1117
|
|
1118 src = ngx_http_add_charset(&mcf->charsets, &value[1]);
|
|
1119 if (src == NGX_ERROR) {
|
|
1120 return NGX_CONF_ERROR;
|
|
1121 }
|
|
1122
|
|
1123 dst = ngx_http_add_charset(&mcf->charsets, &value[2]);
|
|
1124 if (dst == NGX_ERROR) {
|
|
1125 return NGX_CONF_ERROR;
|
|
1126 }
|
|
1127
|
|
1128 if (src == dst) {
|
|
1129 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1130 "\"charset_map\" between the same charsets "
|
|
1131 "\"%V\" and \"%V\"", &value[1], &value[2]);
|
|
1132 return NGX_CONF_ERROR;
|
|
1133 }
|
|
1134
|
|
1135 table = mcf->tables.elts;
|
|
1136 for (i = 0; i < mcf->tables.nelts; i++) {
|
|
1137 if ((src == table->src && dst == table->dst)
|
|
1138 || (src == table->dst && dst == table->src))
|
|
1139 {
|
|
1140 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1141 "duplicate \"charset_map\" between "
|
|
1142 "\"%V\" and \"%V\"", &value[1], &value[2]);
|
|
1143 return NGX_CONF_ERROR;
|
|
1144 }
|
|
1145 }
|
|
1146
|
|
1147 table = ngx_array_push(&mcf->tables);
|
|
1148 if (table == NULL) {
|
|
1149 return NGX_CONF_ERROR;
|
|
1150 }
|
|
1151
|
|
1152 table->src = src;
|
|
1153 table->dst = dst;
|
|
1154
|
286
|
1155 if (ngx_strcasecmp(value[2].data, (u_char *) "utf-8") == 0) {
|
206
|
1156 table->src2dst = ngx_pcalloc(cf->pool, 256 * NGX_UTF_LEN);
|
|
1157 if (table->src2dst == NULL) {
|
|
1158 return NGX_CONF_ERROR;
|
|
1159 }
|
|
1160
|
|
1161 table->dst2src = ngx_pcalloc(cf->pool, 256 * sizeof(void *));
|
|
1162 if (table->dst2src == NULL) {
|
|
1163 return NGX_CONF_ERROR;
|
|
1164 }
|
|
1165
|
|
1166 dst2src = ngx_pcalloc(cf->pool, 256);
|
|
1167 if (dst2src == NULL) {
|
|
1168 return NGX_CONF_ERROR;
|
|
1169 }
|
|
1170
|
|
1171 pp = (u_char **) &table->dst2src[0];
|
|
1172 pp[0] = dst2src;
|
|
1173
|
|
1174 for (i = 0; i < 128; i++) {
|
|
1175 p = &table->src2dst[i * NGX_UTF_LEN];
|
|
1176 p[0] = '\1';
|
|
1177 p[1] = (u_char) i;
|
|
1178 dst2src[i] = (u_char) i;
|
|
1179 }
|
50
|
1180
|
206
|
1181 for (/* void */; i < 256; i++) {
|
|
1182 p = &table->src2dst[i * NGX_UTF_LEN];
|
|
1183 p[0] = '\1';
|
|
1184 p[1] = '?';
|
|
1185 }
|
|
1186
|
|
1187 } else {
|
|
1188 table->src2dst = ngx_palloc(cf->pool, 256);
|
|
1189 if (table->src2dst == NULL) {
|
|
1190 return NGX_CONF_ERROR;
|
|
1191 }
|
|
1192
|
|
1193 table->dst2src = ngx_palloc(cf->pool, 256);
|
|
1194 if (table->dst2src == NULL) {
|
|
1195 return NGX_CONF_ERROR;
|
|
1196 }
|
|
1197
|
|
1198 for (i = 0; i < 128; i++) {
|
|
1199 table->src2dst[i] = (u_char) i;
|
|
1200 table->dst2src[i] = (u_char) i;
|
|
1201 }
|
|
1202
|
|
1203 for (/* void */; i < 256; i++) {
|
|
1204 table->src2dst[i] = '?';
|
|
1205 table->dst2src[i] = '?';
|
|
1206 }
|
50
|
1207 }
|
|
1208
|
206
|
1209 charset = mcf->charsets.elts;
|
50
|
1210
|
206
|
1211 ctx.table = table;
|
|
1212 ctx.charset = &charset[dst];
|
|
1213 ctx.characters = 0;
|
50
|
1214
|
|
1215 pvcf = *cf;
|
206
|
1216 cf->ctx = &ctx;
|
|
1217 cf->handler = ngx_http_charset_map;
|
50
|
1218 cf->handler_conf = conf;
|
|
1219
|
|
1220 rv = ngx_conf_parse(cf, NULL);
|
|
1221
|
|
1222 *cf = pvcf;
|
|
1223
|
206
|
1224 if (ctx.characters) {
|
|
1225 n = ctx.charset->length;
|
|
1226 ctx.charset->length /= ctx.characters;
|
|
1227
|
|
1228 if (((n * 10) / ctx.characters) % 10 > 4) {
|
|
1229 ctx.charset->length++;
|
|
1230 }
|
|
1231 }
|
|
1232
|
50
|
1233 return rv;
|
|
1234 }
|
|
1235
|
|
1236
|
|
1237 static char *
|
206
|
1238 ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
|
50
|
1239 {
|
206
|
1240 u_char *p, *dst2src, **pp;
|
|
1241 uint32_t n;
|
|
1242 ngx_int_t src, dst;
|
|
1243 ngx_str_t *value;
|
|
1244 ngx_uint_t i;
|
|
1245 ngx_http_charset_tables_t *table;
|
|
1246 ngx_http_charset_conf_ctx_t *ctx;
|
50
|
1247
|
|
1248 if (cf->args->nelts != 2) {
|
|
1249 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number");
|
|
1250 return NGX_CONF_ERROR;
|
|
1251 }
|
|
1252
|
|
1253 value = cf->args->elts;
|
|
1254
|
|
1255 src = ngx_hextoi(value[0].data, value[0].len);
|
|
1256 if (src == NGX_ERROR || src > 255) {
|
|
1257 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1258 "invalid value \"%V\"", &value[0]);
|
|
1259 return NGX_CONF_ERROR;
|
|
1260 }
|
|
1261
|
206
|
1262 ctx = cf->ctx;
|
|
1263 table = ctx->table;
|
|
1264
|
|
1265 if (ctx->charset->utf8) {
|
|
1266 p = &table->src2dst[src * NGX_UTF_LEN];
|
|
1267
|
|
1268 *p++ = (u_char) (value[1].len / 2);
|
|
1269
|
|
1270 for (i = 0; i < value[1].len; i += 2) {
|
|
1271 dst = ngx_hextoi(&value[1].data[i], 2);
|
|
1272 if (dst == NGX_ERROR || dst > 255) {
|
|
1273 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1274 "invalid value \"%V\"", &value[1]);
|
|
1275 return NGX_CONF_ERROR;
|
|
1276 }
|
|
1277
|
|
1278 *p++ = (u_char) dst;
|
|
1279 }
|
|
1280
|
|
1281 i /= 2;
|
|
1282
|
|
1283 ctx->charset->length += i;
|
|
1284 ctx->characters++;
|
|
1285
|
|
1286 p = &table->src2dst[src * NGX_UTF_LEN] + 1;
|
|
1287
|
390
|
1288 n = ngx_utf8_decode(&p, i);
|
206
|
1289
|
|
1290 if (n > 0xffff) {
|
|
1291 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1292 "invalid value \"%V\"", &value[1]);
|
|
1293 return NGX_CONF_ERROR;
|
|
1294 }
|
|
1295
|
|
1296 pp = (u_char **) &table->dst2src[0];
|
|
1297
|
|
1298 dst2src = pp[n >> 8];
|
|
1299
|
|
1300 if (dst2src == NULL) {
|
|
1301 dst2src = ngx_pcalloc(cf->pool, 256);
|
|
1302 if (dst2src == NULL) {
|
|
1303 return NGX_CONF_ERROR;
|
|
1304 }
|
|
1305
|
|
1306 pp[n >> 8] = dst2src;
|
|
1307 }
|
|
1308
|
|
1309 dst2src[n & 0xff] = (u_char) src;
|
|
1310
|
|
1311 } else {
|
|
1312 dst = ngx_hextoi(value[1].data, value[1].len);
|
|
1313 if (dst == NGX_ERROR || dst > 255) {
|
|
1314 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1315 "invalid value \"%V\"", &value[1]);
|
|
1316 return NGX_CONF_ERROR;
|
|
1317 }
|
|
1318
|
|
1319 table->src2dst[src] = (u_char) dst;
|
|
1320 table->dst2src[dst] = (u_char) src;
|
50
|
1321 }
|
|
1322
|
|
1323 return NGX_CONF_OK;
|
|
1324 }
|
|
1325
|
|
1326
|
|
1327 static char *
|
|
1328 ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
|
|
1329 {
|
|
1330 char *p = conf;
|
|
1331
|
|
1332 ngx_int_t *cp;
|
244
|
1333 ngx_str_t *value, var;
|
50
|
1334 ngx_http_charset_main_conf_t *mcf;
|
|
1335
|
|
1336 cp = (ngx_int_t *) (p + cmd->offset);
|
|
1337
|
|
1338 if (*cp != NGX_CONF_UNSET) {
|
|
1339 return "is duplicate";
|
|
1340 }
|
|
1341
|
78
|
1342 value = cf->args->elts;
|
|
1343
|
|
1344 if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
|
|
1345 && ngx_strcmp(value[1].data, "off") == 0)
|
|
1346 {
|
|
1347 *cp = NGX_HTTP_NO_CHARSET;
|
|
1348 return NGX_CONF_OK;
|
|
1349 }
|
|
1350
|
244
|
1351
|
|
1352 if (value[1].data[0] == '$') {
|
|
1353 var.len = value[1].len - 1;
|
|
1354 var.data = value[1].data + 1;
|
|
1355
|
|
1356 *cp = ngx_http_get_variable_index(cf, &var);
|
|
1357
|
|
1358 if (*cp == NGX_ERROR) {
|
|
1359 return NGX_CONF_ERROR;
|
|
1360 }
|
|
1361
|
|
1362 *cp += NGX_HTTP_CHARSET_VAR;
|
|
1363
|
|
1364 return NGX_CONF_OK;
|
|
1365 }
|
|
1366
|
50
|
1367 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1368 ngx_http_charset_filter_module);
|
|
1369
|
|
1370 *cp = ngx_http_add_charset(&mcf->charsets, &value[1]);
|
|
1371 if (*cp == NGX_ERROR) {
|
|
1372 return NGX_CONF_ERROR;
|
|
1373 }
|
|
1374
|
|
1375 return NGX_CONF_OK;
|
|
1376 }
|
|
1377
|
|
1378
|
|
1379 static ngx_int_t
|
|
1380 ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name)
|
|
1381 {
|
|
1382 ngx_uint_t i;
|
|
1383 ngx_http_charset_t *c;
|
|
1384
|
|
1385 c = charsets->elts;
|
|
1386 for (i = 0; i < charsets->nelts; i++) {
|
|
1387 if (name->len != c[i].name.len) {
|
|
1388 continue;
|
|
1389 }
|
|
1390
|
|
1391 if (ngx_strcasecmp(name->data, c[i].name.data) == 0) {
|
|
1392 break;
|
|
1393 }
|
|
1394 }
|
|
1395
|
|
1396 if (i < charsets->nelts) {
|
|
1397 return i;
|
|
1398 }
|
|
1399
|
|
1400 c = ngx_array_push(charsets);
|
|
1401 if (c == NULL) {
|
|
1402 return NGX_ERROR;
|
|
1403 }
|
|
1404
|
|
1405 c->tables = NULL;
|
|
1406 c->name = *name;
|
206
|
1407 c->length = 0;
|
50
|
1408
|
286
|
1409 if (ngx_strcasecmp(name->data, (u_char *) "utf-8") == 0) {
|
72
|
1410 c->utf8 = 1;
|
216
|
1411
|
|
1412 } else {
|
|
1413 c->utf8 = 0;
|
72
|
1414 }
|
|
1415
|
50
|
1416 return i;
|
|
1417 }
|
|
1418
|
|
1419
|
|
1420 static void *
|
|
1421 ngx_http_charset_create_main_conf(ngx_conf_t *cf)
|
|
1422 {
|
|
1423 ngx_http_charset_main_conf_t *mcf;
|
|
1424
|
|
1425 mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t));
|
|
1426 if (mcf == NULL) {
|
|
1427 return NGX_CONF_ERROR;
|
|
1428 }
|
|
1429
|
|
1430 if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t))
|
78
|
1431 == NGX_ERROR)
|
50
|
1432 {
|
|
1433 return NGX_CONF_ERROR;
|
|
1434 }
|
|
1435
|
78
|
1436 if (ngx_array_init(&mcf->tables, cf->pool, 1,
|
50
|
1437 sizeof(ngx_http_charset_tables_t)) == NGX_ERROR)
|
|
1438 {
|
|
1439 return NGX_CONF_ERROR;
|
|
1440 }
|
|
1441
|
78
|
1442 if (ngx_array_init(&mcf->recodes, cf->pool, 2,
|
|
1443 sizeof(ngx_http_charset_recode_t)) == NGX_ERROR)
|
|
1444 {
|
|
1445 return NGX_CONF_ERROR;
|
|
1446 }
|
|
1447
|
50
|
1448 return mcf;
|
|
1449 }
|
|
1450
|
|
1451
|
|
1452 static void *
|
|
1453 ngx_http_charset_create_loc_conf(ngx_conf_t *cf)
|
|
1454 {
|
|
1455 ngx_http_charset_loc_conf_t *lcf;
|
|
1456
|
|
1457 lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t));
|
|
1458 if (lcf == NULL) {
|
|
1459 return NGX_CONF_ERROR;
|
|
1460 }
|
|
1461
|
394
|
1462 /*
|
|
1463 * set by ngx_pcalloc():
|
|
1464 *
|
|
1465 * lcf->types = { NULL };
|
|
1466 * lcf->types_keys = NULL;
|
|
1467 */
|
|
1468
|
78
|
1469 lcf->charset = NGX_CONF_UNSET;
|
50
|
1470 lcf->source_charset = NGX_CONF_UNSET;
|
184
|
1471 lcf->override_charset = NGX_CONF_UNSET;
|
50
|
1472
|
|
1473 return lcf;
|
|
1474 }
|
|
1475
|
|
1476
|
|
1477 static char *
|
|
1478 ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
|
|
1479 {
|
|
1480 ngx_http_charset_loc_conf_t *prev = parent;
|
|
1481 ngx_http_charset_loc_conf_t *conf = child;
|
|
1482
|
78
|
1483 ngx_uint_t i;
|
|
1484 ngx_http_charset_recode_t *recode;
|
|
1485 ngx_http_charset_main_conf_t *mcf;
|
50
|
1486
|
396
|
1487 if (ngx_http_merge_types(cf, conf->types_keys, &conf->types,
|
|
1488 prev->types_keys, &prev->types,
|
|
1489 ngx_http_charset_default_types)
|
|
1490 != NGX_OK)
|
|
1491 {
|
|
1492 return NGX_CONF_ERROR;
|
|
1493 }
|
|
1494
|
184
|
1495 ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
|
78
|
1496 ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_NO_CHARSET);
|
50
|
1497
|
|
1498 if (conf->source_charset == NGX_CONF_UNSET) {
|
|
1499 conf->source_charset = prev->source_charset;
|
|
1500 }
|
|
1501
|
78
|
1502 if (conf->charset == NGX_HTTP_NO_CHARSET
|
|
1503 || conf->source_charset == NGX_CONF_UNSET
|
|
1504 || conf->charset == conf->source_charset)
|
50
|
1505 {
|
78
|
1506 return NGX_CONF_OK;
|
50
|
1507 }
|
|
1508
|
342
|
1509 if (conf->source_charset >= NGX_HTTP_CHARSET_VAR
|
|
1510 || conf->charset >= NGX_HTTP_CHARSET_VAR)
|
|
1511 {
|
|
1512 return NGX_CONF_OK;
|
|
1513 }
|
|
1514
|
78
|
1515 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1516 ngx_http_charset_filter_module);
|
|
1517 recode = mcf->recodes.elts;
|
|
1518 for (i = 0; i < mcf->recodes.nelts; i++) {
|
|
1519 if (conf->source_charset == recode[i].src
|
|
1520 && conf->charset == recode[i].dst)
|
|
1521 {
|
|
1522 return NGX_CONF_OK;
|
|
1523 }
|
50
|
1524 }
|
|
1525
|
78
|
1526 recode = ngx_array_push(&mcf->recodes);
|
|
1527 if (recode == NULL) {
|
50
|
1528 return NGX_CONF_ERROR;
|
|
1529 }
|
|
1530
|
78
|
1531 recode->src = conf->source_charset;
|
|
1532 recode->dst = conf->charset;
|
|
1533
|
50
|
1534 return NGX_CONF_OK;
|
|
1535 }
|
78
|
1536
|
|
1537
|
|
1538 static ngx_int_t
|
|
1539 ngx_http_charset_postconfiguration(ngx_conf_t *cf)
|
|
1540 {
|
184
|
1541 u_char **src, **dst;
|
78
|
1542 ngx_int_t c;
|
|
1543 ngx_uint_t i, t;
|
|
1544 ngx_http_charset_t *charset;
|
|
1545 ngx_http_charset_recode_t *recode;
|
|
1546 ngx_http_charset_tables_t *tables;
|
|
1547 ngx_http_charset_main_conf_t *mcf;
|
|
1548
|
|
1549 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1550 ngx_http_charset_filter_module);
|
|
1551
|
|
1552 recode = mcf->recodes.elts;
|
|
1553 tables = mcf->tables.elts;
|
|
1554 charset = mcf->charsets.elts;
|
|
1555
|
|
1556 for (i = 0; i < mcf->recodes.nelts; i++) {
|
|
1557
|
|
1558 c = recode[i].src;
|
|
1559
|
|
1560 for (t = 0; t < mcf->tables.nelts; t++) {
|
|
1561
|
|
1562 if (c == tables[t].src && recode[i].dst == tables[t].dst) {
|
|
1563 goto next;
|
|
1564 }
|
|
1565
|
|
1566 if (c == tables[t].dst && recode[i].dst == tables[t].src) {
|
|
1567 goto next;
|
|
1568 }
|
|
1569 }
|
|
1570
|
|
1571 ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
|
342
|
1572 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
|
|
1573 &charset[c].name, &charset[recode[i].dst].name);
|
78
|
1574 return NGX_ERROR;
|
|
1575
|
|
1576 next:
|
|
1577 continue;
|
|
1578 }
|
|
1579
|
184
|
1580
|
|
1581 for (t = 0; t < mcf->tables.nelts; t++) {
|
|
1582
|
|
1583 src = charset[tables[t].src].tables;
|
|
1584
|
|
1585 if (src == NULL) {
|
|
1586 src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
|
|
1587 if (src == NULL) {
|
|
1588 return NGX_ERROR;
|
|
1589 }
|
|
1590
|
|
1591 charset[tables[t].src].tables = src;
|
|
1592 }
|
|
1593
|
|
1594 dst = charset[tables[t].dst].tables;
|
|
1595
|
|
1596 if (dst == NULL) {
|
|
1597 dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
|
|
1598 if (dst == NULL) {
|
|
1599 return NGX_ERROR;
|
|
1600 }
|
|
1601
|
|
1602 charset[tables[t].dst].tables = dst;
|
|
1603 }
|
|
1604
|
|
1605 src[tables[t].dst] = tables[t].src2dst;
|
|
1606 dst[tables[t].src] = tables[t].dst2src;
|
|
1607 }
|
|
1608
|
230
|
1609 ngx_http_next_header_filter = ngx_http_top_header_filter;
|
|
1610 ngx_http_top_header_filter = ngx_http_charset_header_filter;
|
|
1611
|
|
1612 ngx_http_next_body_filter = ngx_http_top_body_filter;
|
|
1613 ngx_http_top_body_filter = ngx_http_charset_body_filter;
|
|
1614
|
78
|
1615 return NGX_OK;
|
|
1616 }
|