50
|
1
|
|
2 /*
|
|
3 * Copyright (C) Igor Sysoev
|
644
|
4 * Copyright (C) Nginx, Inc.
|
50
|
5 */
|
|
6
|
|
7
|
|
8 #include <ngx_config.h>
|
|
9 #include <ngx_core.h>
|
|
10 #include <ngx_http.h>
|
|
11
|
|
12
|
496
|
13 #define NGX_HTTP_CHARSET_OFF -2
|
|
14 #define NGX_HTTP_NO_CHARSET -3
|
|
15 #define NGX_HTTP_CHARSET_VAR 0x10000
|
206
|
16
|
|
17 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */
|
|
18 #define NGX_UTF_LEN 4
|
|
19
|
|
20 #define NGX_HTML_ENTITY_LEN (sizeof("") - 1)
|
78
|
21
|
|
22
|
50
|
23 typedef struct {
|
206
|
24 u_char **tables;
|
|
25 ngx_str_t name;
|
72
|
26
|
206
|
27 unsigned length:16;
|
|
28 unsigned utf8:1;
|
50
|
29 } ngx_http_charset_t;
|
|
30
|
|
31
|
|
32 typedef struct {
|
206
|
33 ngx_int_t src;
|
|
34 ngx_int_t dst;
|
78
|
35 } ngx_http_charset_recode_t;
|
|
36
|
|
37
|
|
38 typedef struct {
|
206
|
39 ngx_int_t src;
|
|
40 ngx_int_t dst;
|
|
41 u_char *src2dst;
|
|
42 u_char *dst2src;
|
50
|
43 } ngx_http_charset_tables_t;
|
|
44
|
|
45
|
|
46 typedef struct {
|
206
|
47 ngx_array_t charsets; /* ngx_http_charset_t */
|
|
48 ngx_array_t tables; /* ngx_http_charset_tables_t */
|
|
49 ngx_array_t recodes; /* ngx_http_charset_recode_t */
|
50
|
50 } ngx_http_charset_main_conf_t;
|
|
51
|
|
52
|
|
53 typedef struct {
|
206
|
54 ngx_int_t charset;
|
|
55 ngx_int_t source_charset;
|
|
56 ngx_flag_t override_charset;
|
394
|
57
|
|
58 ngx_hash_t types;
|
|
59 ngx_array_t *types_keys;
|
50
|
60 } ngx_http_charset_loc_conf_t;
|
|
61
|
|
62
|
|
63 typedef struct {
|
206
|
64 u_char *table;
|
|
65 ngx_int_t charset;
|
496
|
66 ngx_str_t charset_name;
|
206
|
67
|
|
68 ngx_chain_t *busy;
|
|
69 ngx_chain_t *free_bufs;
|
|
70 ngx_chain_t *free_buffers;
|
|
71
|
|
72 size_t saved_len;
|
|
73 u_char saved[NGX_UTF_LEN];
|
|
74
|
|
75 unsigned length:16;
|
|
76 unsigned from_utf8:1;
|
|
77 unsigned to_utf8:1;
|
50
|
78 } ngx_http_charset_ctx_t;
|
|
79
|
|
80
|
206
|
81 typedef struct {
|
|
82 ngx_http_charset_tables_t *table;
|
|
83 ngx_http_charset_t *charset;
|
|
84 ngx_uint_t characters;
|
|
85 } ngx_http_charset_conf_ctx_t;
|
|
86
|
50
|
87
|
496
|
88 static ngx_int_t ngx_http_destination_charset(ngx_http_request_t *r,
|
|
89 ngx_str_t *name);
|
|
90 static ngx_int_t ngx_http_main_request_charset(ngx_http_request_t *r,
|
|
91 ngx_str_t *name);
|
|
92 static ngx_int_t ngx_http_source_charset(ngx_http_request_t *r,
|
|
93 ngx_str_t *name);
|
|
94 static ngx_int_t ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name);
|
|
95 static ngx_inline void ngx_http_set_charset(ngx_http_request_t *r,
|
|
96 ngx_str_t *charset);
|
|
97 static ngx_int_t ngx_http_charset_ctx(ngx_http_request_t *r,
|
206
|
98 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset);
|
|
99 static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);
|
|
100 static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool,
|
|
101 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
|
|
102 static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool,
|
|
103 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
|
|
104
|
|
105 static ngx_chain_t *ngx_http_charset_get_buf(ngx_pool_t *pool,
|
|
106 ngx_http_charset_ctx_t *ctx);
|
|
107 static ngx_chain_t *ngx_http_charset_get_buffer(ngx_pool_t *pool,
|
|
108 ngx_http_charset_ctx_t *ctx, size_t size);
|
|
109
|
|
110 static char *ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd,
|
50
|
111 void *conf);
|
206
|
112 static char *ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy,
|
|
113 void *conf);
|
50
|
114
|
|
115 static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd,
|
|
116 void *conf);
|
|
117 static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name);
|
|
118
|
|
119 static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf);
|
|
120 static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf);
|
|
121 static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf,
|
|
122 void *parent, void *child);
|
78
|
123 static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf);
|
50
|
124
|
|
125
|
394
|
126 ngx_str_t ngx_http_charset_default_types[] = {
|
|
127 ngx_string("text/html"),
|
|
128 ngx_string("text/xml"),
|
|
129 ngx_string("text/plain"),
|
|
130 ngx_string("text/vnd.wap.wml"),
|
|
131 ngx_string("application/x-javascript"),
|
|
132 ngx_string("application/rss+xml"),
|
|
133 ngx_null_string
|
|
134 };
|
|
135
|
|
136
|
50
|
137 static ngx_command_t ngx_http_charset_filter_commands[] = {
|
|
138
|
78
|
139 { ngx_string("charset"),
|
108
|
140 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
141 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
|
78
|
142 ngx_http_set_charset_slot,
|
|
143 NGX_HTTP_LOC_CONF_OFFSET,
|
|
144 offsetof(ngx_http_charset_loc_conf_t, charset),
|
|
145 NULL },
|
|
146
|
|
147 { ngx_string("source_charset"),
|
108
|
148 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
149 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
|
78
|
150 ngx_http_set_charset_slot,
|
|
151 NGX_HTTP_LOC_CONF_OFFSET,
|
|
152 offsetof(ngx_http_charset_loc_conf_t, source_charset),
|
|
153 NULL },
|
|
154
|
184
|
155 { ngx_string("override_charset"),
|
|
156 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
157 |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG,
|
|
158 ngx_conf_set_flag_slot,
|
|
159 NGX_HTTP_LOC_CONF_OFFSET,
|
|
160 offsetof(ngx_http_charset_loc_conf_t, override_charset),
|
|
161 NULL },
|
|
162
|
394
|
163 { ngx_string("charset_types"),
|
|
164 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE,
|
|
165 ngx_http_types_slot,
|
|
166 NGX_HTTP_LOC_CONF_OFFSET,
|
|
167 offsetof(ngx_http_charset_loc_conf_t, types_keys),
|
|
168 &ngx_http_charset_default_types[0] },
|
|
169
|
50
|
170 { ngx_string("charset_map"),
|
|
171 NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2,
|
206
|
172 ngx_http_charset_map_block,
|
50
|
173 NGX_HTTP_MAIN_CONF_OFFSET,
|
|
174 0,
|
|
175 NULL },
|
|
176
|
|
177 ngx_null_command
|
|
178 };
|
|
179
|
|
180
|
|
181 static ngx_http_module_t ngx_http_charset_filter_module_ctx = {
|
58
|
182 NULL, /* preconfiguration */
|
78
|
183 ngx_http_charset_postconfiguration, /* postconfiguration */
|
50
|
184
|
|
185 ngx_http_charset_create_main_conf, /* create main configuration */
|
78
|
186 NULL, /* init main configuration */
|
50
|
187
|
|
188 NULL, /* create server configuration */
|
|
189 NULL, /* merge server configuration */
|
|
190
|
|
191 ngx_http_charset_create_loc_conf, /* create location configuration */
|
|
192 ngx_http_charset_merge_loc_conf /* merge location configuration */
|
|
193 };
|
|
194
|
|
195
|
|
196 ngx_module_t ngx_http_charset_filter_module = {
|
58
|
197 NGX_MODULE_V1,
|
50
|
198 &ngx_http_charset_filter_module_ctx, /* module context */
|
|
199 ngx_http_charset_filter_commands, /* module directives */
|
|
200 NGX_HTTP_MODULE, /* module type */
|
90
|
201 NULL, /* init master */
|
230
|
202 NULL, /* init module */
|
90
|
203 NULL, /* init process */
|
|
204 NULL, /* init thread */
|
|
205 NULL, /* exit thread */
|
|
206 NULL, /* exit process */
|
|
207 NULL, /* exit master */
|
|
208 NGX_MODULE_V1_PADDING
|
50
|
209 };
|
|
210
|
|
211
|
|
212 static ngx_http_output_header_filter_pt ngx_http_next_header_filter;
|
|
213 static ngx_http_output_body_filter_pt ngx_http_next_body_filter;
|
|
214
|
|
215
|
|
216 static ngx_int_t
|
|
217 ngx_http_charset_header_filter(ngx_http_request_t *r)
|
|
218 {
|
184
|
219 ngx_int_t charset, source_charset;
|
496
|
220 ngx_str_t dst, src;
|
50
|
221 ngx_http_charset_t *charsets;
|
|
222 ngx_http_charset_main_conf_t *mcf;
|
|
223
|
202
|
224 if (r == r->main) {
|
496
|
225 charset = ngx_http_destination_charset(r, &dst);
|
202
|
226
|
|
227 } else {
|
496
|
228 charset = ngx_http_main_request_charset(r, &dst);
|
|
229 }
|
206
|
230
|
496
|
231 if (charset == NGX_ERROR) {
|
|
232 return NGX_ERROR;
|
|
233 }
|
202
|
234
|
496
|
235 if (charset == NGX_DECLINED) {
|
|
236 return ngx_http_next_header_filter(r);
|
50
|
237 }
|
|
238
|
496
|
239 /* charset: charset index or NGX_HTTP_NO_CHARSET */
|
250
|
240
|
496
|
241 source_charset = ngx_http_source_charset(r, &src);
|
244
|
242
|
496
|
243 if (source_charset == NGX_ERROR) {
|
|
244 return NGX_ERROR;
|
206
|
245 }
|
184
|
246
|
496
|
247 /*
|
|
248 * source_charset: charset index, NGX_HTTP_NO_CHARSET,
|
|
249 * or NGX_HTTP_CHARSET_OFF
|
|
250 */
|
|
251
|
|
252 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
|
|
253 "charset: \"%V\" > \"%V\"", &src, &dst);
|
|
254
|
|
255 if (source_charset == NGX_HTTP_CHARSET_OFF) {
|
|
256 ngx_http_set_charset(r, &dst);
|
|
257
|
|
258 return ngx_http_next_header_filter(r);
|
|
259 }
|
184
|
260
|
210
|
261 if (charset == NGX_HTTP_NO_CHARSET
|
|
262 || source_charset == NGX_HTTP_NO_CHARSET)
|
|
263 {
|
496
|
264 if (source_charset != charset
|
|
265 || ngx_strncasecmp(dst.data, src.data, dst.len) != 0)
|
210
|
266 {
|
212
|
267 goto no_charset_map;
|
210
|
268 }
|
184
|
269
|
496
|
270 ngx_http_set_charset(r, &dst);
|
|
271
|
206
|
272 return ngx_http_next_header_filter(r);
|
|
273 }
|
184
|
274
|
496
|
275 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
|
|
276 charsets = mcf->charsets.elts;
|
|
277
|
206
|
278 if (source_charset != charset
|
|
279 && (charsets[source_charset].tables == NULL
|
|
280 || charsets[source_charset].tables[charset] == NULL))
|
|
281 {
|
212
|
282 goto no_charset_map;
|
50
|
283 }
|
|
284
|
206
|
285 r->headers_out.content_type.len = r->headers_out.content_type_len;
|
|
286
|
496
|
287 ngx_http_set_charset(r, &dst);
|
|
288
|
|
289 if (source_charset != charset) {
|
|
290 return ngx_http_charset_ctx(r, charsets, charset, source_charset);
|
|
291 }
|
|
292
|
|
293 return ngx_http_next_header_filter(r);
|
212
|
294
|
|
295 no_charset_map:
|
|
296
|
|
297 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
|
342
|
298 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
|
496
|
299 &src, &dst);
|
212
|
300
|
|
301 return ngx_http_next_header_filter(r);
|
206
|
302 }
|
|
303
|
|
304
|
|
305 static ngx_int_t
|
496
|
306 ngx_http_destination_charset(ngx_http_request_t *r, ngx_str_t *name)
|
|
307 {
|
|
308 ngx_int_t charset;
|
|
309 ngx_http_charset_t *charsets;
|
|
310 ngx_http_variable_value_t *vv;
|
|
311 ngx_http_charset_loc_conf_t *mlcf;
|
|
312 ngx_http_charset_main_conf_t *mcf;
|
|
313
|
|
314 if (!r->ignore_content_encoding
|
|
315 && r->headers_out.content_encoding
|
|
316 && r->headers_out.content_encoding->value.len)
|
|
317 {
|
|
318 return NGX_DECLINED;
|
|
319 }
|
|
320
|
|
321 if (r->headers_out.content_type.len == 0) {
|
|
322 return NGX_DECLINED;
|
|
323 }
|
|
324
|
|
325 if (r->headers_out.override_charset
|
|
326 && r->headers_out.override_charset->len)
|
|
327 {
|
|
328 *name = *r->headers_out.override_charset;
|
|
329
|
|
330 charset = ngx_http_get_charset(r, name);
|
|
331
|
|
332 if (charset != NGX_HTTP_NO_CHARSET) {
|
|
333 return charset;
|
|
334 }
|
|
335
|
|
336 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
|
|
337 "unknown charset \"%V\" to override", name);
|
|
338
|
|
339 return NGX_DECLINED;
|
|
340 }
|
|
341
|
|
342 mlcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
|
|
343 charset = mlcf->charset;
|
|
344
|
|
345 if (charset == NGX_HTTP_CHARSET_OFF) {
|
|
346 return NGX_DECLINED;
|
|
347 }
|
|
348
|
|
349 if (r->headers_out.charset.len) {
|
|
350 if (mlcf->override_charset == 0) {
|
|
351 return NGX_DECLINED;
|
|
352 }
|
|
353
|
|
354 } else {
|
|
355 if (ngx_http_test_content_type(r, &mlcf->types) == NULL) {
|
|
356 return NGX_DECLINED;
|
|
357 }
|
|
358 }
|
|
359
|
|
360 if (charset < NGX_HTTP_CHARSET_VAR) {
|
|
361 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
|
|
362 charsets = mcf->charsets.elts;
|
|
363 *name = charsets[charset].name;
|
|
364 return charset;
|
|
365 }
|
|
366
|
|
367 vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR);
|
|
368
|
|
369 if (vv == NULL || vv->not_found) {
|
|
370 return NGX_ERROR;
|
|
371 }
|
|
372
|
|
373 name->len = vv->len;
|
|
374 name->data = vv->data;
|
|
375
|
|
376 return ngx_http_get_charset(r, name);
|
|
377 }
|
|
378
|
|
379
|
|
380 static ngx_int_t
|
|
381 ngx_http_main_request_charset(ngx_http_request_t *r, ngx_str_t *src)
|
206
|
382 {
|
496
|
383 ngx_int_t charset;
|
|
384 ngx_str_t *main_charset;
|
|
385 ngx_http_charset_ctx_t *ctx;
|
|
386
|
|
387 ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);
|
|
388
|
|
389 if (ctx) {
|
|
390 *src = ctx->charset_name;
|
|
391 return ctx->charset;
|
|
392 }
|
|
393
|
|
394 main_charset = &r->main->headers_out.charset;
|
|
395
|
|
396 if (main_charset->len == 0) {
|
|
397 return NGX_DECLINED;
|
|
398 }
|
|
399
|
|
400 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
|
|
401 if (ctx == NULL) {
|
|
402 return NGX_ERROR;
|
|
403 }
|
|
404
|
|
405 ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module);
|
|
406
|
|
407 charset = ngx_http_get_charset(r, main_charset);
|
|
408
|
|
409 ctx->charset = charset;
|
|
410 ctx->charset_name = *main_charset;
|
|
411 *src = *main_charset;
|
|
412
|
|
413 return charset;
|
|
414 }
|
|
415
|
|
416
|
|
417 static ngx_int_t
|
|
418 ngx_http_source_charset(ngx_http_request_t *r, ngx_str_t *name)
|
|
419 {
|
|
420 ngx_int_t charset;
|
|
421 ngx_http_charset_t *charsets;
|
|
422 ngx_http_variable_value_t *vv;
|
|
423 ngx_http_charset_loc_conf_t *lcf;
|
|
424 ngx_http_charset_main_conf_t *mcf;
|
|
425
|
|
426 if (r->headers_out.charset.len) {
|
|
427 *name = r->headers_out.charset;
|
|
428 return ngx_http_get_charset(r, name);
|
|
429 }
|
|
430
|
|
431 lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
|
|
432
|
|
433 charset = lcf->source_charset;
|
|
434
|
|
435 if (charset == NGX_HTTP_CHARSET_OFF) {
|
|
436 name->len = 0;
|
|
437 return charset;
|
|
438 }
|
|
439
|
|
440 if (charset < NGX_HTTP_CHARSET_VAR) {
|
|
441 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
|
|
442 charsets = mcf->charsets.elts;
|
|
443 *name = charsets[charset].name;
|
|
444 return charset;
|
|
445 }
|
|
446
|
|
447 vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR);
|
|
448
|
|
449 if (vv == NULL || vv->not_found) {
|
|
450 return NGX_ERROR;
|
|
451 }
|
|
452
|
|
453 name->len = vv->len;
|
|
454 name->data = vv->data;
|
|
455
|
|
456 return ngx_http_get_charset(r, name);
|
|
457 }
|
|
458
|
|
459
|
|
460 static ngx_int_t
|
|
461 ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name)
|
|
462 {
|
|
463 ngx_uint_t i, n;
|
|
464 ngx_http_charset_t *charset;
|
|
465 ngx_http_charset_main_conf_t *mcf;
|
|
466
|
|
467 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
|
|
468
|
|
469 charset = mcf->charsets.elts;
|
|
470 n = mcf->charsets.nelts;
|
206
|
471
|
|
472 for (i = 0; i < n; i++) {
|
496
|
473 if (charset[i].name.len != name->len) {
|
244
|
474 continue;
|
|
475 }
|
|
476
|
496
|
477 if (ngx_strncasecmp(charset[i].name.data, name->data, name->len) == 0) {
|
206
|
478 return i;
|
|
479 }
|
|
480 }
|
|
481
|
|
482 return NGX_HTTP_NO_CHARSET;
|
|
483 }
|
|
484
|
|
485
|
496
|
486 static ngx_inline void
|
|
487 ngx_http_set_charset(ngx_http_request_t *r, ngx_str_t *charset)
|
206
|
488 {
|
496
|
489 if (r != r->main) {
|
|
490 return;
|
|
491 }
|
206
|
492
|
50
|
493 if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
|
|
494 || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
|
|
495 {
|
|
496 /*
|
184
|
497 * do not set charset for the redirect because NN 4.x
|
|
498 * use this charset instead of the next page charset
|
50
|
499 */
|
|
500
|
|
501 r->headers_out.charset.len = 0;
|
496
|
502 return;
|
50
|
503 }
|
|
504
|
496
|
505 r->headers_out.charset = *charset;
|
|
506 }
|
|
507
|
184
|
508
|
496
|
509 static ngx_int_t
|
|
510 ngx_http_charset_ctx(ngx_http_request_t *r, ngx_http_charset_t *charsets,
|
|
511 ngx_int_t charset, ngx_int_t source_charset)
|
|
512 {
|
|
513 ngx_http_charset_ctx_t *ctx;
|
50
|
514
|
|
515 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
|
|
516 if (ctx == NULL) {
|
|
517 return NGX_ERROR;
|
|
518 }
|
|
519
|
|
520 ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);
|
|
521
|
184
|
522 ctx->table = charsets[source_charset].tables[charset];
|
|
523 ctx->charset = charset;
|
496
|
524 ctx->charset_name = charsets[charset].name;
|
206
|
525 ctx->length = charsets[charset].length;
|
|
526 ctx->from_utf8 = charsets[source_charset].utf8;
|
|
527 ctx->to_utf8 = charsets[charset].utf8;
|
|
528
|
212
|
529 r->filter_need_in_memory = 1;
|
|
530
|
206
|
531 if ((ctx->to_utf8 || ctx->from_utf8) && r == r->main) {
|
|
532 ngx_http_clear_content_length(r);
|
212
|
533
|
|
534 } else {
|
|
535 r->filter_need_temporary = 1;
|
206
|
536 }
|
50
|
537
|
|
538 return ngx_http_next_header_filter(r);
|
|
539 }
|
|
540
|
|
541
|
|
542 static ngx_int_t
|
|
543 ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in)
|
|
544 {
|
206
|
545 ngx_int_t rc;
|
|
546 ngx_buf_t *b;
|
|
547 ngx_chain_t *cl, *out, **ll;
|
184
|
548 ngx_http_charset_ctx_t *ctx;
|
50
|
549
|
|
550 ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module);
|
|
551
|
184
|
552 if (ctx == NULL || ctx->table == NULL) {
|
50
|
553 return ngx_http_next_body_filter(r, in);
|
|
554 }
|
|
555
|
206
|
556 if ((ctx->to_utf8 || ctx->from_utf8) || ctx->busy) {
|
|
557
|
|
558 out = NULL;
|
|
559 ll = &out;
|
|
560
|
|
561 for (cl = in; cl; cl = cl->next) {
|
|
562 b = cl->buf;
|
|
563
|
|
564 if (ngx_buf_size(b) == 0) {
|
214
|
565
|
|
566 *ll = ngx_alloc_chain_link(r->pool);
|
|
567 if (*ll == NULL) {
|
|
568 return NGX_ERROR;
|
|
569 }
|
|
570
|
|
571 (*ll)->buf = b;
|
|
572 (*ll)->next = NULL;
|
|
573
|
|
574 ll = &(*ll)->next;
|
|
575
|
206
|
576 continue;
|
|
577 }
|
|
578
|
|
579 if (ctx->to_utf8) {
|
|
580 *ll = ngx_http_charset_recode_to_utf8(r->pool, b, ctx);
|
|
581
|
|
582 } else {
|
|
583 *ll = ngx_http_charset_recode_from_utf8(r->pool, b, ctx);
|
|
584 }
|
|
585
|
|
586 if (*ll == NULL) {
|
|
587 return NGX_ERROR;
|
|
588 }
|
|
589
|
|
590 while (*ll) {
|
|
591 ll = &(*ll)->next;
|
|
592 }
|
|
593 }
|
|
594
|
|
595 rc = ngx_http_next_body_filter(r, out);
|
|
596
|
|
597 if (out) {
|
|
598 if (ctx->busy == NULL) {
|
|
599 ctx->busy = out;
|
|
600
|
|
601 } else {
|
|
602 for (cl = ctx->busy; cl->next; cl = cl->next) { /* void */ }
|
|
603 cl->next = out;
|
|
604 }
|
|
605 }
|
|
606
|
|
607 while (ctx->busy) {
|
|
608
|
|
609 cl = ctx->busy;
|
|
610 b = cl->buf;
|
|
611
|
|
612 if (ngx_buf_size(b) != 0) {
|
|
613 break;
|
|
614 }
|
|
615
|
|
616 ctx->busy = cl->next;
|
|
617
|
|
618 if (b->tag != (ngx_buf_tag_t) &ngx_http_charset_filter_module) {
|
|
619 continue;
|
|
620 }
|
|
621
|
|
622 if (b->shadow) {
|
|
623 b->shadow->pos = b->shadow->last;
|
|
624 }
|
|
625
|
|
626 if (b->pos) {
|
|
627 cl->next = ctx->free_buffers;
|
|
628 ctx->free_buffers = cl;
|
|
629 continue;
|
|
630 }
|
|
631
|
|
632 cl->next = ctx->free_bufs;
|
|
633 ctx->free_bufs = cl;
|
|
634 }
|
|
635
|
|
636 return rc;
|
|
637 }
|
|
638
|
50
|
639 for (cl = in; cl; cl = cl->next) {
|
184
|
640 (void) ngx_http_charset_recode(cl->buf, ctx->table);
|
50
|
641 }
|
|
642
|
|
643 return ngx_http_next_body_filter(r, in);
|
|
644 }
|
|
645
|
|
646
|
|
647 static ngx_uint_t
|
184
|
648 ngx_http_charset_recode(ngx_buf_t *b, u_char *table)
|
50
|
649 {
|
370
|
650 u_char *p, *last;
|
184
|
651
|
370
|
652 last = b->last;
|
50
|
653
|
370
|
654 for (p = b->pos; p < last; p++) {
|
|
655
|
|
656 if (*p != table[*p]) {
|
|
657 goto recode;
|
50
|
658 }
|
|
659 }
|
|
660
|
184
|
661 return 0;
|
370
|
662
|
|
663 recode:
|
|
664
|
|
665 do {
|
|
666 if (*p != table[*p]) {
|
|
667 *p = table[*p];
|
|
668 }
|
|
669
|
|
670 p++;
|
|
671
|
|
672 } while (p < last);
|
|
673
|
|
674 b->in_file = 0;
|
|
675
|
|
676 return 1;
|
50
|
677 }
|
|
678
|
|
679
|
206
|
680 static ngx_chain_t *
|
|
681 ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
|
|
682 ngx_http_charset_ctx_t *ctx)
|
|
683 {
|
|
684 size_t len, size;
|
|
685 u_char c, *p, *src, *dst, *saved, **table;
|
|
686 uint32_t n;
|
|
687 ngx_buf_t *b;
|
|
688 ngx_uint_t i;
|
|
689 ngx_chain_t *out, *cl, **ll;
|
|
690
|
|
691 src = buf->pos;
|
|
692
|
|
693 if (ctx->saved_len == 0) {
|
|
694
|
|
695 for ( /* void */ ; src < buf->last; src++) {
|
|
696
|
|
697 if (*src < 0x80) {
|
|
698 continue;
|
|
699 }
|
|
700
|
|
701 len = src - buf->pos;
|
|
702
|
|
703 if (len > 512) {
|
|
704 out = ngx_http_charset_get_buf(pool, ctx);
|
|
705 if (out == NULL) {
|
|
706 return NULL;
|
|
707 }
|
|
708
|
|
709 b = out->buf;
|
|
710
|
|
711 b->temporary = buf->temporary;
|
|
712 b->memory = buf->memory;
|
|
713 b->mmap = buf->mmap;
|
|
714 b->flush = buf->flush;
|
|
715
|
|
716 b->pos = buf->pos;
|
|
717 b->last = src;
|
|
718
|
|
719 out->buf = b;
|
|
720 out->next = NULL;
|
|
721
|
|
722 size = buf->last - src;
|
|
723
|
|
724 saved = src;
|
390
|
725 n = ngx_utf8_decode(&saved, size);
|
206
|
726
|
|
727 if (n == 0xfffffffe) {
|
|
728 /* incomplete UTF-8 symbol */
|
|
729
|
|
730 ngx_memcpy(ctx->saved, src, size);
|
|
731 ctx->saved_len = size;
|
|
732
|
|
733 b->shadow = buf;
|
|
734
|
|
735 return out;
|
|
736 }
|
|
737
|
|
738 } else {
|
|
739 out = NULL;
|
|
740 size = len + buf->last - src;
|
|
741 src = buf->pos;
|
|
742 }
|
|
743
|
|
744 if (size < NGX_HTML_ENTITY_LEN) {
|
|
745 size += NGX_HTML_ENTITY_LEN;
|
|
746 }
|
|
747
|
|
748 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
749 if (cl == NULL) {
|
|
750 return NULL;
|
|
751 }
|
|
752
|
|
753 if (out) {
|
|
754 out->next = cl;
|
|
755
|
|
756 } else {
|
|
757 out = cl;
|
|
758 }
|
|
759
|
|
760 b = cl->buf;
|
|
761 dst = b->pos;
|
|
762
|
|
763 goto recode;
|
|
764 }
|
|
765
|
|
766 out = ngx_alloc_chain_link(pool);
|
|
767 if (out == NULL) {
|
|
768 return NULL;
|
|
769 }
|
|
770
|
|
771 out->buf = buf;
|
|
772 out->next = NULL;
|
|
773
|
|
774 return out;
|
|
775 }
|
|
776
|
|
777 /* process incomplete UTF sequence from previous buffer */
|
|
778
|
|
779 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
780 "http charset utf saved: %z", ctx->saved_len);
|
|
781
|
|
782 p = src;
|
|
783
|
|
784 for (i = ctx->saved_len; i < NGX_UTF_LEN; i++) {
|
|
785 ctx->saved[i] = *p++;
|
|
786
|
|
787 if (p == buf->last) {
|
|
788 break;
|
|
789 }
|
|
790 }
|
|
791
|
|
792 saved = ctx->saved;
|
390
|
793 n = ngx_utf8_decode(&saved, i);
|
206
|
794
|
|
795 c = '\0';
|
|
796
|
|
797 if (n < 0x10000) {
|
|
798 table = (u_char **) ctx->table;
|
|
799 p = table[n >> 8];
|
|
800
|
|
801 if (p) {
|
|
802 c = p[n & 0xff];
|
|
803 }
|
|
804
|
|
805 } else if (n == 0xfffffffe) {
|
|
806
|
|
807 /* incomplete UTF-8 symbol */
|
|
808
|
|
809 if (i < NGX_UTF_LEN) {
|
|
810 out = ngx_http_charset_get_buf(pool, ctx);
|
|
811 if (out == NULL) {
|
|
812 return NULL;
|
|
813 }
|
|
814
|
|
815 b = out->buf;
|
|
816
|
|
817 b->pos = buf->pos;
|
|
818 b->last = buf->last;
|
|
819 b->sync = 1;
|
|
820 b->shadow = buf;
|
|
821
|
|
822 ngx_memcpy(&ctx->saved[ctx->saved_len], src, i);
|
|
823 ctx->saved_len += i;
|
|
824
|
|
825 return out;
|
|
826 }
|
|
827 }
|
|
828
|
|
829 size = buf->last - buf->pos;
|
|
830
|
|
831 if (size < NGX_HTML_ENTITY_LEN) {
|
|
832 size += NGX_HTML_ENTITY_LEN;
|
|
833 }
|
|
834
|
|
835 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
836 if (cl == NULL) {
|
|
837 return NULL;
|
|
838 }
|
|
839
|
|
840 out = cl;
|
|
841
|
|
842 b = cl->buf;
|
|
843 dst = b->pos;
|
|
844
|
|
845 if (c) {
|
|
846 *dst++ = c;
|
|
847
|
|
848 } else if (n == 0xfffffffe) {
|
|
849 *dst++ = '?';
|
|
850
|
|
851 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
852 "http charset invalid utf 0");
|
|
853
|
|
854 saved = &ctx->saved[NGX_UTF_LEN];
|
|
855
|
|
856 } else if (n > 0x10ffff) {
|
|
857 *dst++ = '?';
|
|
858
|
|
859 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
860 "http charset invalid utf 1");
|
|
861
|
|
862 } else {
|
|
863 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
864 }
|
|
865
|
|
866 src += (saved - ctx->saved) - ctx->saved_len;
|
|
867 ctx->saved_len = 0;
|
|
868
|
|
869 recode:
|
|
870
|
|
871 ll = &cl->next;
|
|
872
|
|
873 table = (u_char **) ctx->table;
|
|
874
|
|
875 while (src < buf->last) {
|
|
876
|
|
877 if ((size_t) (b->end - dst) < NGX_HTML_ENTITY_LEN) {
|
|
878 b->last = dst;
|
|
879
|
|
880 size = buf->last - src + NGX_HTML_ENTITY_LEN;
|
|
881
|
|
882 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
883 if (cl == NULL) {
|
|
884 return NULL;
|
|
885 }
|
|
886
|
|
887 *ll = cl;
|
|
888 ll = &cl->next;
|
|
889
|
|
890 b = cl->buf;
|
|
891 dst = b->pos;
|
|
892 }
|
|
893
|
|
894 if (*src < 0x80) {
|
|
895 *dst++ = *src++;
|
|
896 continue;
|
|
897 }
|
|
898
|
|
899 len = buf->last - src;
|
|
900
|
390
|
901 n = ngx_utf8_decode(&src, len);
|
206
|
902
|
|
903 if (n < 0x10000) {
|
|
904
|
|
905 p = table[n >> 8];
|
|
906
|
|
907 if (p) {
|
|
908 c = p[n & 0xff];
|
|
909
|
|
910 if (c) {
|
|
911 *dst++ = c;
|
|
912 continue;
|
|
913 }
|
|
914 }
|
|
915
|
|
916 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
917
|
|
918 continue;
|
|
919 }
|
|
920
|
|
921 if (n == 0xfffffffe) {
|
|
922 /* incomplete UTF-8 symbol */
|
|
923
|
|
924 ngx_memcpy(ctx->saved, src, len);
|
|
925 ctx->saved_len = len;
|
|
926
|
|
927 if (b->pos == dst) {
|
|
928 b->sync = 1;
|
|
929 b->temporary = 0;
|
|
930 }
|
|
931
|
|
932 break;
|
|
933 }
|
|
934
|
|
935 if (n > 0x10ffff) {
|
|
936 *dst++ = '?';
|
|
937
|
|
938 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
939 "http charset invalid utf 2");
|
|
940
|
|
941 continue;
|
|
942 }
|
|
943
|
|
944 /* n > 0xffff */
|
|
945
|
|
946 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
947 }
|
|
948
|
|
949 b->last = dst;
|
|
950
|
|
951 b->last_buf = buf->last_buf;
|
|
952 b->last_in_chain = buf->last_in_chain;
|
|
953 b->flush = buf->flush;
|
|
954
|
|
955 b->shadow = buf;
|
|
956
|
|
957 return out;
|
|
958 }
|
|
959
|
|
960
|
|
961 static ngx_chain_t *
|
|
962 ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
|
|
963 ngx_http_charset_ctx_t *ctx)
|
|
964 {
|
|
965 size_t len, size;
|
|
966 u_char *p, *src, *dst, *table;
|
|
967 ngx_buf_t *b;
|
|
968 ngx_chain_t *out, *cl, **ll;
|
|
969
|
|
970 table = ctx->table;
|
|
971
|
|
972 for (src = buf->pos; src < buf->last; src++) {
|
|
973 if (table[*src * NGX_UTF_LEN] == '\1') {
|
|
974 continue;
|
|
975 }
|
|
976
|
|
977 goto recode;
|
|
978 }
|
|
979
|
|
980 out = ngx_alloc_chain_link(pool);
|
|
981 if (out == NULL) {
|
|
982 return NULL;
|
|
983 }
|
|
984
|
|
985 out->buf = buf;
|
|
986 out->next = NULL;
|
|
987
|
|
988 return out;
|
|
989
|
|
990 recode:
|
|
991
|
|
992 /*
|
|
993 * we assume that there are about half of characters to be recoded,
|
|
994 * so we preallocate "size / 2 + size / 2 * ctx->length"
|
|
995 */
|
|
996
|
|
997 len = src - buf->pos;
|
|
998
|
|
999 if (len > 512) {
|
|
1000 out = ngx_http_charset_get_buf(pool, ctx);
|
|
1001 if (out == NULL) {
|
|
1002 return NULL;
|
|
1003 }
|
|
1004
|
|
1005 b = out->buf;
|
|
1006
|
|
1007 b->temporary = buf->temporary;
|
|
1008 b->memory = buf->memory;
|
|
1009 b->mmap = buf->mmap;
|
|
1010 b->flush = buf->flush;
|
|
1011
|
|
1012 b->pos = buf->pos;
|
|
1013 b->last = src;
|
|
1014
|
|
1015 out->buf = b;
|
|
1016 out->next = NULL;
|
|
1017
|
|
1018 size = buf->last - src;
|
|
1019 size = size / 2 + size / 2 * ctx->length;
|
|
1020
|
|
1021 } else {
|
|
1022 out = NULL;
|
|
1023
|
|
1024 size = buf->last - src;
|
|
1025 size = len + size / 2 + size / 2 * ctx->length;
|
|
1026
|
|
1027 src = buf->pos;
|
|
1028 }
|
|
1029
|
|
1030 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
1031 if (cl == NULL) {
|
|
1032 return NULL;
|
|
1033 }
|
|
1034
|
|
1035 if (out) {
|
|
1036 out->next = cl;
|
|
1037
|
|
1038 } else {
|
|
1039 out = cl;
|
|
1040 }
|
|
1041
|
|
1042 ll = &cl->next;
|
|
1043
|
|
1044 b = cl->buf;
|
|
1045 dst = b->pos;
|
|
1046
|
|
1047 while (src < buf->last) {
|
|
1048
|
|
1049 p = &table[*src++ * NGX_UTF_LEN];
|
|
1050 len = *p++;
|
|
1051
|
|
1052 if ((size_t) (b->end - dst) < len) {
|
|
1053 b->last = dst;
|
|
1054
|
|
1055 size = buf->last - src;
|
|
1056 size = len + size / 2 + size / 2 * ctx->length;
|
|
1057
|
|
1058 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
1059 if (cl == NULL) {
|
|
1060 return NULL;
|
|
1061 }
|
|
1062
|
|
1063 *ll = cl;
|
|
1064 ll = &cl->next;
|
|
1065
|
|
1066 b = cl->buf;
|
|
1067 dst = b->pos;
|
|
1068 }
|
|
1069
|
|
1070 while (len) {
|
|
1071 *dst++ = *p++;
|
|
1072 len--;
|
|
1073 }
|
|
1074 }
|
|
1075
|
|
1076 b->last = dst;
|
|
1077
|
|
1078 b->last_buf = buf->last_buf;
|
|
1079 b->last_in_chain = buf->last_in_chain;
|
|
1080 b->flush = buf->flush;
|
|
1081
|
|
1082 b->shadow = buf;
|
|
1083
|
|
1084 return out;
|
|
1085 }
|
|
1086
|
|
1087
|
|
1088 static ngx_chain_t *
|
|
1089 ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx)
|
|
1090 {
|
|
1091 ngx_chain_t *cl;
|
|
1092
|
|
1093 cl = ctx->free_bufs;
|
|
1094
|
|
1095 if (cl) {
|
|
1096 ctx->free_bufs = cl->next;
|
|
1097
|
|
1098 cl->buf->shadow = NULL;
|
|
1099 cl->next = NULL;
|
|
1100
|
|
1101 return cl;
|
|
1102 }
|
|
1103
|
|
1104 cl = ngx_alloc_chain_link(pool);
|
|
1105 if (cl == NULL) {
|
|
1106 return NULL;
|
|
1107 }
|
|
1108
|
|
1109 cl->buf = ngx_calloc_buf(pool);
|
|
1110 if (cl->buf == NULL) {
|
|
1111 return NULL;
|
|
1112 }
|
|
1113
|
|
1114 cl->next = NULL;
|
|
1115
|
|
1116 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
|
|
1117
|
|
1118 return cl;
|
|
1119 }
|
|
1120
|
|
1121
|
|
1122 static ngx_chain_t *
|
|
1123 ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx,
|
|
1124 size_t size)
|
|
1125 {
|
|
1126 ngx_buf_t *b;
|
|
1127 ngx_chain_t *cl, **ll;
|
|
1128
|
|
1129 for (ll = &ctx->free_buffers, cl = ctx->free_buffers;
|
|
1130 cl;
|
|
1131 ll = &cl->next, cl = cl->next)
|
|
1132 {
|
|
1133 b = cl->buf;
|
|
1134
|
|
1135 if ((size_t) (b->end - b->start) >= size) {
|
|
1136 *ll = cl->next;
|
|
1137 cl->next = NULL;
|
|
1138
|
|
1139 b->pos = b->start;
|
|
1140 b->temporary = 1;
|
|
1141 b->shadow = NULL;
|
|
1142
|
|
1143 return cl;
|
|
1144 }
|
|
1145 }
|
|
1146
|
|
1147 cl = ngx_alloc_chain_link(pool);
|
|
1148 if (cl == NULL) {
|
|
1149 return NULL;
|
|
1150 }
|
|
1151
|
|
1152 cl->buf = ngx_create_temp_buf(pool, size);
|
|
1153 if (cl->buf == NULL) {
|
|
1154 return NULL;
|
|
1155 }
|
|
1156
|
|
1157 cl->next = NULL;
|
|
1158
|
|
1159 cl->buf->temporary = 1;
|
|
1160 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
|
|
1161
|
|
1162 return cl;
|
|
1163 }
|
|
1164
|
|
1165
|
50
|
1166 static char *
|
206
|
1167 ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
|
50
|
1168 {
|
|
1169 ngx_http_charset_main_conf_t *mcf = conf;
|
|
1170
|
206
|
1171 char *rv;
|
|
1172 u_char *p, *dst2src, **pp;
|
|
1173 ngx_int_t src, dst;
|
|
1174 ngx_uint_t i, n;
|
|
1175 ngx_str_t *value;
|
|
1176 ngx_conf_t pvcf;
|
|
1177 ngx_http_charset_t *charset;
|
|
1178 ngx_http_charset_tables_t *table;
|
|
1179 ngx_http_charset_conf_ctx_t ctx;
|
50
|
1180
|
|
1181 value = cf->args->elts;
|
|
1182
|
|
1183 src = ngx_http_add_charset(&mcf->charsets, &value[1]);
|
|
1184 if (src == NGX_ERROR) {
|
|
1185 return NGX_CONF_ERROR;
|
|
1186 }
|
|
1187
|
|
1188 dst = ngx_http_add_charset(&mcf->charsets, &value[2]);
|
|
1189 if (dst == NGX_ERROR) {
|
|
1190 return NGX_CONF_ERROR;
|
|
1191 }
|
|
1192
|
|
1193 if (src == dst) {
|
|
1194 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1195 "\"charset_map\" between the same charsets "
|
|
1196 "\"%V\" and \"%V\"", &value[1], &value[2]);
|
|
1197 return NGX_CONF_ERROR;
|
|
1198 }
|
|
1199
|
|
1200 table = mcf->tables.elts;
|
|
1201 for (i = 0; i < mcf->tables.nelts; i++) {
|
|
1202 if ((src == table->src && dst == table->dst)
|
|
1203 || (src == table->dst && dst == table->src))
|
|
1204 {
|
|
1205 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1206 "duplicate \"charset_map\" between "
|
|
1207 "\"%V\" and \"%V\"", &value[1], &value[2]);
|
|
1208 return NGX_CONF_ERROR;
|
|
1209 }
|
|
1210 }
|
|
1211
|
|
1212 table = ngx_array_push(&mcf->tables);
|
|
1213 if (table == NULL) {
|
|
1214 return NGX_CONF_ERROR;
|
|
1215 }
|
|
1216
|
|
1217 table->src = src;
|
|
1218 table->dst = dst;
|
|
1219
|
286
|
1220 if (ngx_strcasecmp(value[2].data, (u_char *) "utf-8") == 0) {
|
206
|
1221 table->src2dst = ngx_pcalloc(cf->pool, 256 * NGX_UTF_LEN);
|
|
1222 if (table->src2dst == NULL) {
|
|
1223 return NGX_CONF_ERROR;
|
|
1224 }
|
|
1225
|
|
1226 table->dst2src = ngx_pcalloc(cf->pool, 256 * sizeof(void *));
|
|
1227 if (table->dst2src == NULL) {
|
|
1228 return NGX_CONF_ERROR;
|
|
1229 }
|
|
1230
|
|
1231 dst2src = ngx_pcalloc(cf->pool, 256);
|
|
1232 if (dst2src == NULL) {
|
|
1233 return NGX_CONF_ERROR;
|
|
1234 }
|
|
1235
|
|
1236 pp = (u_char **) &table->dst2src[0];
|
|
1237 pp[0] = dst2src;
|
|
1238
|
|
1239 for (i = 0; i < 128; i++) {
|
|
1240 p = &table->src2dst[i * NGX_UTF_LEN];
|
|
1241 p[0] = '\1';
|
|
1242 p[1] = (u_char) i;
|
|
1243 dst2src[i] = (u_char) i;
|
|
1244 }
|
50
|
1245
|
206
|
1246 for (/* void */; i < 256; i++) {
|
|
1247 p = &table->src2dst[i * NGX_UTF_LEN];
|
|
1248 p[0] = '\1';
|
|
1249 p[1] = '?';
|
|
1250 }
|
|
1251
|
|
1252 } else {
|
|
1253 table->src2dst = ngx_palloc(cf->pool, 256);
|
|
1254 if (table->src2dst == NULL) {
|
|
1255 return NGX_CONF_ERROR;
|
|
1256 }
|
|
1257
|
|
1258 table->dst2src = ngx_palloc(cf->pool, 256);
|
|
1259 if (table->dst2src == NULL) {
|
|
1260 return NGX_CONF_ERROR;
|
|
1261 }
|
|
1262
|
|
1263 for (i = 0; i < 128; i++) {
|
|
1264 table->src2dst[i] = (u_char) i;
|
|
1265 table->dst2src[i] = (u_char) i;
|
|
1266 }
|
|
1267
|
|
1268 for (/* void */; i < 256; i++) {
|
|
1269 table->src2dst[i] = '?';
|
|
1270 table->dst2src[i] = '?';
|
|
1271 }
|
50
|
1272 }
|
|
1273
|
206
|
1274 charset = mcf->charsets.elts;
|
50
|
1275
|
206
|
1276 ctx.table = table;
|
|
1277 ctx.charset = &charset[dst];
|
|
1278 ctx.characters = 0;
|
50
|
1279
|
|
1280 pvcf = *cf;
|
206
|
1281 cf->ctx = &ctx;
|
|
1282 cf->handler = ngx_http_charset_map;
|
50
|
1283 cf->handler_conf = conf;
|
|
1284
|
|
1285 rv = ngx_conf_parse(cf, NULL);
|
|
1286
|
|
1287 *cf = pvcf;
|
|
1288
|
206
|
1289 if (ctx.characters) {
|
|
1290 n = ctx.charset->length;
|
|
1291 ctx.charset->length /= ctx.characters;
|
|
1292
|
|
1293 if (((n * 10) / ctx.characters) % 10 > 4) {
|
|
1294 ctx.charset->length++;
|
|
1295 }
|
|
1296 }
|
|
1297
|
50
|
1298 return rv;
|
|
1299 }
|
|
1300
|
|
1301
|
|
1302 static char *
|
206
|
1303 ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
|
50
|
1304 {
|
206
|
1305 u_char *p, *dst2src, **pp;
|
|
1306 uint32_t n;
|
|
1307 ngx_int_t src, dst;
|
|
1308 ngx_str_t *value;
|
|
1309 ngx_uint_t i;
|
|
1310 ngx_http_charset_tables_t *table;
|
|
1311 ngx_http_charset_conf_ctx_t *ctx;
|
50
|
1312
|
|
1313 if (cf->args->nelts != 2) {
|
|
1314 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number");
|
|
1315 return NGX_CONF_ERROR;
|
|
1316 }
|
|
1317
|
|
1318 value = cf->args->elts;
|
|
1319
|
|
1320 src = ngx_hextoi(value[0].data, value[0].len);
|
|
1321 if (src == NGX_ERROR || src > 255) {
|
|
1322 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1323 "invalid value \"%V\"", &value[0]);
|
|
1324 return NGX_CONF_ERROR;
|
|
1325 }
|
|
1326
|
206
|
1327 ctx = cf->ctx;
|
|
1328 table = ctx->table;
|
|
1329
|
|
1330 if (ctx->charset->utf8) {
|
|
1331 p = &table->src2dst[src * NGX_UTF_LEN];
|
|
1332
|
|
1333 *p++ = (u_char) (value[1].len / 2);
|
|
1334
|
|
1335 for (i = 0; i < value[1].len; i += 2) {
|
|
1336 dst = ngx_hextoi(&value[1].data[i], 2);
|
|
1337 if (dst == NGX_ERROR || dst > 255) {
|
|
1338 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1339 "invalid value \"%V\"", &value[1]);
|
|
1340 return NGX_CONF_ERROR;
|
|
1341 }
|
|
1342
|
|
1343 *p++ = (u_char) dst;
|
|
1344 }
|
|
1345
|
|
1346 i /= 2;
|
|
1347
|
|
1348 ctx->charset->length += i;
|
|
1349 ctx->characters++;
|
|
1350
|
|
1351 p = &table->src2dst[src * NGX_UTF_LEN] + 1;
|
|
1352
|
390
|
1353 n = ngx_utf8_decode(&p, i);
|
206
|
1354
|
|
1355 if (n > 0xffff) {
|
|
1356 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1357 "invalid value \"%V\"", &value[1]);
|
|
1358 return NGX_CONF_ERROR;
|
|
1359 }
|
|
1360
|
|
1361 pp = (u_char **) &table->dst2src[0];
|
|
1362
|
|
1363 dst2src = pp[n >> 8];
|
|
1364
|
|
1365 if (dst2src == NULL) {
|
|
1366 dst2src = ngx_pcalloc(cf->pool, 256);
|
|
1367 if (dst2src == NULL) {
|
|
1368 return NGX_CONF_ERROR;
|
|
1369 }
|
|
1370
|
|
1371 pp[n >> 8] = dst2src;
|
|
1372 }
|
|
1373
|
|
1374 dst2src[n & 0xff] = (u_char) src;
|
|
1375
|
|
1376 } else {
|
|
1377 dst = ngx_hextoi(value[1].data, value[1].len);
|
|
1378 if (dst == NGX_ERROR || dst > 255) {
|
|
1379 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1380 "invalid value \"%V\"", &value[1]);
|
|
1381 return NGX_CONF_ERROR;
|
|
1382 }
|
|
1383
|
|
1384 table->src2dst[src] = (u_char) dst;
|
|
1385 table->dst2src[dst] = (u_char) src;
|
50
|
1386 }
|
|
1387
|
|
1388 return NGX_CONF_OK;
|
|
1389 }
|
|
1390
|
|
1391
|
|
1392 static char *
|
|
1393 ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
|
|
1394 {
|
|
1395 char *p = conf;
|
|
1396
|
|
1397 ngx_int_t *cp;
|
244
|
1398 ngx_str_t *value, var;
|
50
|
1399 ngx_http_charset_main_conf_t *mcf;
|
|
1400
|
|
1401 cp = (ngx_int_t *) (p + cmd->offset);
|
|
1402
|
|
1403 if (*cp != NGX_CONF_UNSET) {
|
|
1404 return "is duplicate";
|
|
1405 }
|
|
1406
|
78
|
1407 value = cf->args->elts;
|
|
1408
|
|
1409 if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
|
|
1410 && ngx_strcmp(value[1].data, "off") == 0)
|
|
1411 {
|
496
|
1412 *cp = NGX_HTTP_CHARSET_OFF;
|
78
|
1413 return NGX_CONF_OK;
|
|
1414 }
|
|
1415
|
244
|
1416
|
|
1417 if (value[1].data[0] == '$') {
|
|
1418 var.len = value[1].len - 1;
|
|
1419 var.data = value[1].data + 1;
|
|
1420
|
|
1421 *cp = ngx_http_get_variable_index(cf, &var);
|
|
1422
|
|
1423 if (*cp == NGX_ERROR) {
|
|
1424 return NGX_CONF_ERROR;
|
|
1425 }
|
|
1426
|
|
1427 *cp += NGX_HTTP_CHARSET_VAR;
|
|
1428
|
|
1429 return NGX_CONF_OK;
|
|
1430 }
|
|
1431
|
50
|
1432 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1433 ngx_http_charset_filter_module);
|
|
1434
|
|
1435 *cp = ngx_http_add_charset(&mcf->charsets, &value[1]);
|
|
1436 if (*cp == NGX_ERROR) {
|
|
1437 return NGX_CONF_ERROR;
|
|
1438 }
|
|
1439
|
|
1440 return NGX_CONF_OK;
|
|
1441 }
|
|
1442
|
|
1443
|
|
1444 static ngx_int_t
|
|
1445 ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name)
|
|
1446 {
|
|
1447 ngx_uint_t i;
|
|
1448 ngx_http_charset_t *c;
|
|
1449
|
|
1450 c = charsets->elts;
|
|
1451 for (i = 0; i < charsets->nelts; i++) {
|
|
1452 if (name->len != c[i].name.len) {
|
|
1453 continue;
|
|
1454 }
|
|
1455
|
|
1456 if (ngx_strcasecmp(name->data, c[i].name.data) == 0) {
|
|
1457 break;
|
|
1458 }
|
|
1459 }
|
|
1460
|
|
1461 if (i < charsets->nelts) {
|
|
1462 return i;
|
|
1463 }
|
|
1464
|
|
1465 c = ngx_array_push(charsets);
|
|
1466 if (c == NULL) {
|
|
1467 return NGX_ERROR;
|
|
1468 }
|
|
1469
|
|
1470 c->tables = NULL;
|
|
1471 c->name = *name;
|
206
|
1472 c->length = 0;
|
50
|
1473
|
286
|
1474 if (ngx_strcasecmp(name->data, (u_char *) "utf-8") == 0) {
|
72
|
1475 c->utf8 = 1;
|
216
|
1476
|
|
1477 } else {
|
|
1478 c->utf8 = 0;
|
72
|
1479 }
|
|
1480
|
50
|
1481 return i;
|
|
1482 }
|
|
1483
|
|
1484
|
|
1485 static void *
|
|
1486 ngx_http_charset_create_main_conf(ngx_conf_t *cf)
|
|
1487 {
|
|
1488 ngx_http_charset_main_conf_t *mcf;
|
|
1489
|
|
1490 mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t));
|
|
1491 if (mcf == NULL) {
|
496
|
1492 return NULL;
|
50
|
1493 }
|
|
1494
|
|
1495 if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t))
|
454
|
1496 != NGX_OK)
|
50
|
1497 {
|
496
|
1498 return NULL;
|
50
|
1499 }
|
|
1500
|
78
|
1501 if (ngx_array_init(&mcf->tables, cf->pool, 1,
|
454
|
1502 sizeof(ngx_http_charset_tables_t))
|
|
1503 != NGX_OK)
|
50
|
1504 {
|
496
|
1505 return NULL;
|
50
|
1506 }
|
|
1507
|
78
|
1508 if (ngx_array_init(&mcf->recodes, cf->pool, 2,
|
454
|
1509 sizeof(ngx_http_charset_recode_t))
|
|
1510 != NGX_OK)
|
78
|
1511 {
|
496
|
1512 return NULL;
|
78
|
1513 }
|
|
1514
|
50
|
1515 return mcf;
|
|
1516 }
|
|
1517
|
|
1518
|
|
1519 static void *
|
|
1520 ngx_http_charset_create_loc_conf(ngx_conf_t *cf)
|
|
1521 {
|
|
1522 ngx_http_charset_loc_conf_t *lcf;
|
|
1523
|
|
1524 lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t));
|
|
1525 if (lcf == NULL) {
|
496
|
1526 return NULL;
|
50
|
1527 }
|
|
1528
|
394
|
1529 /*
|
|
1530 * set by ngx_pcalloc():
|
|
1531 *
|
|
1532 * lcf->types = { NULL };
|
|
1533 * lcf->types_keys = NULL;
|
|
1534 */
|
|
1535
|
78
|
1536 lcf->charset = NGX_CONF_UNSET;
|
50
|
1537 lcf->source_charset = NGX_CONF_UNSET;
|
184
|
1538 lcf->override_charset = NGX_CONF_UNSET;
|
50
|
1539
|
|
1540 return lcf;
|
|
1541 }
|
|
1542
|
|
1543
|
|
1544 static char *
|
|
1545 ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
|
|
1546 {
|
|
1547 ngx_http_charset_loc_conf_t *prev = parent;
|
|
1548 ngx_http_charset_loc_conf_t *conf = child;
|
|
1549
|
78
|
1550 ngx_uint_t i;
|
|
1551 ngx_http_charset_recode_t *recode;
|
|
1552 ngx_http_charset_main_conf_t *mcf;
|
50
|
1553
|
554
|
1554 if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types,
|
|
1555 &prev->types_keys, &prev->types,
|
396
|
1556 ngx_http_charset_default_types)
|
|
1557 != NGX_OK)
|
|
1558 {
|
|
1559 return NGX_CONF_ERROR;
|
|
1560 }
|
|
1561
|
184
|
1562 ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
|
496
|
1563 ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_CHARSET_OFF);
|
|
1564 ngx_conf_merge_value(conf->source_charset, prev->source_charset,
|
|
1565 NGX_HTTP_CHARSET_OFF);
|
50
|
1566
|
496
|
1567 if (conf->charset == NGX_HTTP_CHARSET_OFF
|
|
1568 || conf->source_charset == NGX_HTTP_CHARSET_OFF
|
78
|
1569 || conf->charset == conf->source_charset)
|
50
|
1570 {
|
78
|
1571 return NGX_CONF_OK;
|
50
|
1572 }
|
|
1573
|
342
|
1574 if (conf->source_charset >= NGX_HTTP_CHARSET_VAR
|
|
1575 || conf->charset >= NGX_HTTP_CHARSET_VAR)
|
|
1576 {
|
|
1577 return NGX_CONF_OK;
|
|
1578 }
|
|
1579
|
78
|
1580 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1581 ngx_http_charset_filter_module);
|
|
1582 recode = mcf->recodes.elts;
|
|
1583 for (i = 0; i < mcf->recodes.nelts; i++) {
|
|
1584 if (conf->source_charset == recode[i].src
|
|
1585 && conf->charset == recode[i].dst)
|
|
1586 {
|
|
1587 return NGX_CONF_OK;
|
|
1588 }
|
50
|
1589 }
|
|
1590
|
78
|
1591 recode = ngx_array_push(&mcf->recodes);
|
|
1592 if (recode == NULL) {
|
50
|
1593 return NGX_CONF_ERROR;
|
|
1594 }
|
|
1595
|
78
|
1596 recode->src = conf->source_charset;
|
|
1597 recode->dst = conf->charset;
|
|
1598
|
50
|
1599 return NGX_CONF_OK;
|
|
1600 }
|
78
|
1601
|
|
1602
|
|
1603 static ngx_int_t
|
|
1604 ngx_http_charset_postconfiguration(ngx_conf_t *cf)
|
|
1605 {
|
184
|
1606 u_char **src, **dst;
|
78
|
1607 ngx_int_t c;
|
|
1608 ngx_uint_t i, t;
|
|
1609 ngx_http_charset_t *charset;
|
|
1610 ngx_http_charset_recode_t *recode;
|
|
1611 ngx_http_charset_tables_t *tables;
|
|
1612 ngx_http_charset_main_conf_t *mcf;
|
|
1613
|
|
1614 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1615 ngx_http_charset_filter_module);
|
|
1616
|
|
1617 recode = mcf->recodes.elts;
|
|
1618 tables = mcf->tables.elts;
|
|
1619 charset = mcf->charsets.elts;
|
|
1620
|
|
1621 for (i = 0; i < mcf->recodes.nelts; i++) {
|
|
1622
|
|
1623 c = recode[i].src;
|
|
1624
|
|
1625 for (t = 0; t < mcf->tables.nelts; t++) {
|
|
1626
|
|
1627 if (c == tables[t].src && recode[i].dst == tables[t].dst) {
|
|
1628 goto next;
|
|
1629 }
|
|
1630
|
|
1631 if (c == tables[t].dst && recode[i].dst == tables[t].src) {
|
|
1632 goto next;
|
|
1633 }
|
|
1634 }
|
|
1635
|
|
1636 ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
|
342
|
1637 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
|
|
1638 &charset[c].name, &charset[recode[i].dst].name);
|
78
|
1639 return NGX_ERROR;
|
|
1640
|
|
1641 next:
|
|
1642 continue;
|
|
1643 }
|
|
1644
|
184
|
1645
|
|
1646 for (t = 0; t < mcf->tables.nelts; t++) {
|
|
1647
|
|
1648 src = charset[tables[t].src].tables;
|
|
1649
|
|
1650 if (src == NULL) {
|
|
1651 src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
|
|
1652 if (src == NULL) {
|
|
1653 return NGX_ERROR;
|
|
1654 }
|
|
1655
|
|
1656 charset[tables[t].src].tables = src;
|
|
1657 }
|
|
1658
|
|
1659 dst = charset[tables[t].dst].tables;
|
|
1660
|
|
1661 if (dst == NULL) {
|
|
1662 dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
|
|
1663 if (dst == NULL) {
|
|
1664 return NGX_ERROR;
|
|
1665 }
|
|
1666
|
|
1667 charset[tables[t].dst].tables = dst;
|
|
1668 }
|
|
1669
|
|
1670 src[tables[t].dst] = tables[t].src2dst;
|
|
1671 dst[tables[t].src] = tables[t].dst2src;
|
|
1672 }
|
|
1673
|
230
|
1674 ngx_http_next_header_filter = ngx_http_top_header_filter;
|
|
1675 ngx_http_top_header_filter = ngx_http_charset_header_filter;
|
|
1676
|
|
1677 ngx_http_next_body_filter = ngx_http_top_body_filter;
|
|
1678 ngx_http_top_body_filter = ngx_http_charset_body_filter;
|
|
1679
|
78
|
1680 return NGX_OK;
|
|
1681 }
|