50
|
1
|
|
2 /*
|
|
3 * Copyright (C) Igor Sysoev
|
|
4 */
|
|
5
|
|
6
|
|
7 #include <ngx_config.h>
|
|
8 #include <ngx_core.h>
|
|
9 #include <ngx_http.h>
|
|
10
|
|
11
|
206
|
12 #define NGX_HTTP_NO_CHARSET -2
|
|
13
|
|
14 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */
|
|
15 #define NGX_UTF_LEN 4
|
|
16
|
|
17 #define NGX_HTML_ENTITY_LEN (sizeof("") - 1)
|
78
|
18
|
|
19
|
50
|
20 typedef struct {
|
206
|
21 u_char **tables;
|
|
22 ngx_str_t name;
|
72
|
23
|
206
|
24 unsigned length:16;
|
|
25 unsigned utf8:1;
|
50
|
26 } ngx_http_charset_t;
|
|
27
|
|
28
|
|
29 typedef struct {
|
206
|
30 ngx_int_t src;
|
|
31 ngx_int_t dst;
|
78
|
32 } ngx_http_charset_recode_t;
|
|
33
|
|
34
|
|
35 typedef struct {
|
206
|
36 ngx_int_t src;
|
|
37 ngx_int_t dst;
|
|
38 u_char *src2dst;
|
|
39 u_char *dst2src;
|
50
|
40 } ngx_http_charset_tables_t;
|
|
41
|
|
42
|
|
43 typedef struct {
|
206
|
44 ngx_array_t charsets; /* ngx_http_charset_t */
|
|
45 ngx_array_t tables; /* ngx_http_charset_tables_t */
|
|
46 ngx_array_t recodes; /* ngx_http_charset_recode_t */
|
50
|
47 } ngx_http_charset_main_conf_t;
|
|
48
|
|
49
|
|
50 typedef struct {
|
206
|
51 ngx_int_t charset;
|
|
52 ngx_int_t source_charset;
|
|
53 ngx_flag_t override_charset;
|
50
|
54 } ngx_http_charset_loc_conf_t;
|
|
55
|
|
56
|
|
57 typedef struct {
|
206
|
58 u_char *table;
|
|
59 ngx_int_t charset;
|
|
60
|
|
61 ngx_chain_t *busy;
|
|
62 ngx_chain_t *free_bufs;
|
|
63 ngx_chain_t *free_buffers;
|
|
64
|
|
65 size_t saved_len;
|
|
66 u_char saved[NGX_UTF_LEN];
|
|
67
|
|
68 unsigned length:16;
|
|
69 unsigned from_utf8:1;
|
|
70 unsigned to_utf8:1;
|
50
|
71 } ngx_http_charset_ctx_t;
|
|
72
|
|
73
|
206
|
74 typedef struct {
|
|
75 ngx_http_charset_tables_t *table;
|
|
76 ngx_http_charset_t *charset;
|
|
77 ngx_uint_t characters;
|
|
78 } ngx_http_charset_conf_ctx_t;
|
|
79
|
50
|
80
|
206
|
81 static ngx_int_t ngx_http_charset_get_charset(ngx_http_charset_t *charsets,
|
|
82 ngx_uint_t n, u_char *charset);
|
|
83 static ngx_int_t ngx_http_charset_set_charset(ngx_http_request_t *r,
|
|
84 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset);
|
|
85 static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);
|
|
86 static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool,
|
|
87 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
|
|
88 static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool,
|
|
89 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
|
|
90
|
|
91 static ngx_chain_t *ngx_http_charset_get_buf(ngx_pool_t *pool,
|
|
92 ngx_http_charset_ctx_t *ctx);
|
|
93 static ngx_chain_t *ngx_http_charset_get_buffer(ngx_pool_t *pool,
|
|
94 ngx_http_charset_ctx_t *ctx, size_t size);
|
|
95
|
|
96 static char *ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd,
|
50
|
97 void *conf);
|
206
|
98 static char *ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy,
|
|
99 void *conf);
|
50
|
100
|
|
101 static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd,
|
|
102 void *conf);
|
|
103 static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name);
|
|
104
|
|
105 static ngx_int_t ngx_http_charset_filter_init(ngx_cycle_t *cycle);
|
|
106
|
|
107 static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf);
|
|
108 static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf);
|
|
109 static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf,
|
|
110 void *parent, void *child);
|
78
|
111 static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf);
|
50
|
112
|
|
113
|
|
114 static ngx_command_t ngx_http_charset_filter_commands[] = {
|
|
115
|
78
|
116 { ngx_string("charset"),
|
108
|
117 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
118 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
|
78
|
119 ngx_http_set_charset_slot,
|
|
120 NGX_HTTP_LOC_CONF_OFFSET,
|
|
121 offsetof(ngx_http_charset_loc_conf_t, charset),
|
|
122 NULL },
|
|
123
|
|
124 { ngx_string("source_charset"),
|
108
|
125 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
126 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
|
78
|
127 ngx_http_set_charset_slot,
|
|
128 NGX_HTTP_LOC_CONF_OFFSET,
|
|
129 offsetof(ngx_http_charset_loc_conf_t, source_charset),
|
|
130 NULL },
|
|
131
|
184
|
132 { ngx_string("override_charset"),
|
|
133 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
|
|
134 |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG,
|
|
135 ngx_conf_set_flag_slot,
|
|
136 NGX_HTTP_LOC_CONF_OFFSET,
|
|
137 offsetof(ngx_http_charset_loc_conf_t, override_charset),
|
|
138 NULL },
|
|
139
|
50
|
140 { ngx_string("charset_map"),
|
|
141 NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2,
|
206
|
142 ngx_http_charset_map_block,
|
50
|
143 NGX_HTTP_MAIN_CONF_OFFSET,
|
|
144 0,
|
|
145 NULL },
|
|
146
|
|
147 ngx_null_command
|
|
148 };
|
|
149
|
|
150
|
|
151 static ngx_http_module_t ngx_http_charset_filter_module_ctx = {
|
58
|
152 NULL, /* preconfiguration */
|
78
|
153 ngx_http_charset_postconfiguration, /* postconfiguration */
|
50
|
154
|
|
155 ngx_http_charset_create_main_conf, /* create main configuration */
|
78
|
156 NULL, /* init main configuration */
|
50
|
157
|
|
158 NULL, /* create server configuration */
|
|
159 NULL, /* merge server configuration */
|
|
160
|
|
161 ngx_http_charset_create_loc_conf, /* create location configuration */
|
|
162 ngx_http_charset_merge_loc_conf /* merge location configuration */
|
|
163 };
|
|
164
|
|
165
|
|
166 ngx_module_t ngx_http_charset_filter_module = {
|
58
|
167 NGX_MODULE_V1,
|
50
|
168 &ngx_http_charset_filter_module_ctx, /* module context */
|
|
169 ngx_http_charset_filter_commands, /* module directives */
|
|
170 NGX_HTTP_MODULE, /* module type */
|
90
|
171 NULL, /* init master */
|
50
|
172 ngx_http_charset_filter_init, /* init module */
|
90
|
173 NULL, /* init process */
|
|
174 NULL, /* init thread */
|
|
175 NULL, /* exit thread */
|
|
176 NULL, /* exit process */
|
|
177 NULL, /* exit master */
|
|
178 NGX_MODULE_V1_PADDING
|
50
|
179 };
|
|
180
|
|
181
|
|
182 static ngx_http_output_header_filter_pt ngx_http_next_header_filter;
|
|
183 static ngx_http_output_body_filter_pt ngx_http_next_body_filter;
|
|
184
|
|
185
|
|
186 static ngx_int_t
|
|
187 ngx_http_charset_header_filter(ngx_http_request_t *r)
|
|
188 {
|
206
|
189 u_char *ct;
|
184
|
190 ngx_int_t charset, source_charset;
|
206
|
191 ngx_str_t *mc;
|
|
192 ngx_uint_t n;
|
50
|
193 ngx_http_charset_t *charsets;
|
|
194 ngx_http_charset_ctx_t *ctx;
|
202
|
195 ngx_http_charset_loc_conf_t *lcf, *mlcf;
|
50
|
196 ngx_http_charset_main_conf_t *mcf;
|
|
197
|
|
198 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
|
184
|
199
|
206
|
200 charsets = mcf->charsets.elts;
|
|
201 n = mcf->charsets.nelts;
|
184
|
202
|
206
|
203 /* destination charset */
|
202
|
204
|
|
205 if (r == r->main) {
|
206
|
206
|
202
|
207 if (r->headers_out.content_type.len == 0) {
|
|
208 return ngx_http_next_header_filter(r);
|
|
209 }
|
50
|
210
|
206
|
211 if (r->headers_out.override_charset
|
|
212 && r->headers_out.override_charset->len)
|
202
|
213 {
|
206
|
214 charset = ngx_http_charset_get_charset(charsets, n,
|
|
215 r->headers_out.override_charset->data);
|
|
216
|
|
217 if (charset == NGX_HTTP_NO_CHARSET) {
|
|
218 ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0,
|
|
219 "unknown charset \"%V\" to override",
|
208
|
220 r->headers_out.override_charset);
|
206
|
221
|
|
222 return ngx_http_next_header_filter(r);
|
|
223 }
|
|
224
|
|
225 } else {
|
|
226 mlcf = ngx_http_get_module_loc_conf(r,
|
|
227 ngx_http_charset_filter_module);
|
|
228 charset = mlcf->charset;
|
|
229
|
|
230 if (charset == NGX_HTTP_NO_CHARSET) {
|
|
231 return ngx_http_next_header_filter(r);
|
|
232 }
|
|
233
|
|
234 if (r->headers_out.charset.len) {
|
|
235 if (mlcf->override_charset == 0) {
|
|
236 return ngx_http_next_header_filter(r);
|
|
237 }
|
|
238
|
|
239 } else {
|
|
240 ct = r->headers_out.content_type.data;
|
|
241
|
|
242 if (ngx_strncasecmp(ct, "text/", 5) != 0
|
|
243 && ngx_strncasecmp(ct, "application/x-javascript", 24) != 0)
|
|
244 {
|
|
245 return ngx_http_next_header_filter(r);
|
|
246 }
|
|
247 }
|
202
|
248 }
|
|
249
|
|
250 } else {
|
206
|
251 ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);
|
|
252
|
|
253 if (ctx == NULL) {
|
|
254
|
|
255 mc = &r->main->headers_out.charset;
|
|
256
|
|
257 if (mc->len == 0) {
|
|
258 return ngx_http_next_header_filter(r);
|
|
259 }
|
|
260
|
|
261 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
|
|
262 if (ctx == NULL) {
|
|
263 return NGX_ERROR;
|
|
264 }
|
|
265
|
|
266 ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module);
|
202
|
267
|
206
|
268 charset = ngx_http_charset_get_charset(charsets, n, mc->data);
|
|
269
|
|
270 ctx->charset = charset;
|
|
271
|
|
272 if (charset == NGX_HTTP_NO_CHARSET) {
|
|
273 ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0,
|
|
274 "unknown charset \"%V\" of main request", mc);
|
|
275
|
|
276 return ngx_http_next_header_filter(r);
|
|
277 }
|
|
278 }
|
|
279
|
|
280 charset = ctx->charset;
|
|
281
|
|
282 if (charset == NGX_HTTP_NO_CHARSET) {
|
|
283 return ngx_http_next_header_filter(r);
|
202
|
284 }
|
50
|
285 }
|
|
286
|
206
|
287 /* source charset */
|
184
|
288
|
206
|
289 if (r->headers_out.charset.len == 0) {
|
|
290 lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
|
184
|
291
|
206
|
292 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset,
|
|
293 lcf->source_charset);
|
|
294 }
|
184
|
295
|
206
|
296 source_charset = ngx_http_charset_get_charset(charsets, n,
|
|
297 r->headers_out.charset.data);
|
184
|
298
|
206
|
299 if (source_charset == NGX_HTTP_NO_CHARSET) {
|
|
300 ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0,
|
|
301 "unknown source charset \"%V\"", &r->headers_out.charset);
|
184
|
302
|
206
|
303 return ngx_http_next_header_filter(r);
|
|
304 }
|
184
|
305
|
206
|
306 if (source_charset != charset
|
|
307 && (charsets[source_charset].tables == NULL
|
|
308 || charsets[source_charset].tables[charset] == NULL))
|
|
309 {
|
|
310 ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0,
|
|
311 "no \"charset_map\" between the charsets "
|
|
312 "\"%V\" and \"%V\"",
|
|
313 &charsets[source_charset].name, &charsets[charset].name);
|
184
|
314
|
50
|
315 return ngx_http_next_header_filter(r);
|
|
316 }
|
|
317
|
206
|
318 r->headers_out.content_type.len = r->headers_out.content_type_len;
|
|
319
|
|
320 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset,
|
|
321 source_charset);
|
|
322 }
|
|
323
|
|
324
|
|
325 static ngx_int_t
|
|
326 ngx_http_charset_get_charset(ngx_http_charset_t *charsets, ngx_uint_t n,
|
|
327 u_char *charset)
|
|
328 {
|
|
329 ngx_uint_t i;
|
|
330
|
|
331 for (i = 0; i < n; i++) {
|
|
332 if (ngx_strcasecmp(charsets[i].name.data, charset) == 0) {
|
|
333 return i;
|
|
334 }
|
|
335 }
|
|
336
|
|
337 return NGX_HTTP_NO_CHARSET;
|
|
338 }
|
|
339
|
|
340
|
|
341 static ngx_int_t
|
|
342 ngx_http_charset_set_charset(ngx_http_request_t *r,
|
|
343 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset)
|
|
344 {
|
|
345 ngx_http_charset_ctx_t *ctx;
|
|
346
|
50
|
347 if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
|
|
348 || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
|
|
349 {
|
|
350 /*
|
184
|
351 * do not set charset for the redirect because NN 4.x
|
|
352 * use this charset instead of the next page charset
|
50
|
353 */
|
|
354
|
|
355 r->headers_out.charset.len = 0;
|
206
|
356
|
50
|
357 return ngx_http_next_header_filter(r);
|
|
358 }
|
|
359
|
184
|
360 r->headers_out.charset = charsets[charset].name;
|
|
361 r->utf8 = charsets[charset].utf8;
|
|
362
|
|
363 if (source_charset == NGX_CONF_UNSET || source_charset == charset) {
|
50
|
364 return ngx_http_next_header_filter(r);
|
|
365 }
|
|
366
|
|
367 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
|
|
368 if (ctx == NULL) {
|
|
369 return NGX_ERROR;
|
|
370 }
|
|
371
|
|
372 ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);
|
|
373
|
184
|
374 ctx->table = charsets[source_charset].tables[charset];
|
|
375 ctx->charset = charset;
|
206
|
376 ctx->length = charsets[charset].length;
|
|
377 ctx->from_utf8 = charsets[source_charset].utf8;
|
|
378 ctx->to_utf8 = charsets[charset].utf8;
|
|
379
|
|
380 if ((ctx->to_utf8 || ctx->from_utf8) && r == r->main) {
|
|
381 ngx_http_clear_content_length(r);
|
|
382 }
|
50
|
383
|
|
384 r->filter_need_in_memory = 1;
|
|
385
|
|
386 return ngx_http_next_header_filter(r);
|
|
387 }
|
|
388
|
|
389
|
|
390 static ngx_int_t
|
|
391 ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in)
|
|
392 {
|
206
|
393 ngx_int_t rc;
|
|
394 ngx_buf_t *b;
|
|
395 ngx_chain_t *cl, *out, **ll;
|
184
|
396 ngx_http_charset_ctx_t *ctx;
|
50
|
397
|
|
398 ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module);
|
|
399
|
184
|
400 if (ctx == NULL || ctx->table == NULL) {
|
50
|
401 return ngx_http_next_body_filter(r, in);
|
|
402 }
|
|
403
|
206
|
404 if ((ctx->to_utf8 || ctx->from_utf8) || ctx->busy) {
|
|
405
|
|
406 out = NULL;
|
|
407 ll = &out;
|
|
408
|
|
409 for (cl = in; cl; cl = cl->next) {
|
|
410 b = cl->buf;
|
|
411
|
|
412 if (ngx_buf_size(b) == 0) {
|
|
413 continue;
|
|
414 }
|
|
415
|
|
416 if (ctx->to_utf8) {
|
|
417 *ll = ngx_http_charset_recode_to_utf8(r->pool, b, ctx);
|
|
418
|
|
419 } else {
|
|
420 *ll = ngx_http_charset_recode_from_utf8(r->pool, b, ctx);
|
|
421 }
|
|
422
|
|
423 if (*ll == NULL) {
|
|
424 return NGX_ERROR;
|
|
425 }
|
|
426
|
|
427 while (*ll) {
|
|
428 ll = &(*ll)->next;
|
|
429 }
|
|
430 }
|
|
431
|
|
432 rc = ngx_http_next_body_filter(r, out);
|
|
433
|
|
434 if (out) {
|
|
435 if (ctx->busy == NULL) {
|
|
436 ctx->busy = out;
|
|
437
|
|
438 } else {
|
|
439 for (cl = ctx->busy; cl->next; cl = cl->next) { /* void */ }
|
|
440 cl->next = out;
|
|
441 }
|
|
442 }
|
|
443
|
|
444 while (ctx->busy) {
|
|
445
|
|
446 cl = ctx->busy;
|
|
447 b = cl->buf;
|
|
448
|
|
449 if (ngx_buf_size(b) != 0) {
|
|
450 break;
|
|
451 }
|
|
452
|
|
453 #if (NGX_HAVE_WRITE_ZEROCOPY)
|
|
454 if (b->zerocopy_busy) {
|
|
455 break;
|
|
456 }
|
|
457 #endif
|
|
458
|
|
459 ctx->busy = cl->next;
|
|
460
|
|
461 if (b->tag != (ngx_buf_tag_t) &ngx_http_charset_filter_module) {
|
|
462 continue;
|
|
463 }
|
|
464
|
|
465 if (b->shadow) {
|
|
466 b->shadow->pos = b->shadow->last;
|
|
467 }
|
|
468
|
|
469 if (b->pos) {
|
|
470 cl->next = ctx->free_buffers;
|
|
471 ctx->free_buffers = cl;
|
|
472 continue;
|
|
473 }
|
|
474
|
|
475 cl->next = ctx->free_bufs;
|
|
476 ctx->free_bufs = cl;
|
|
477 }
|
|
478
|
|
479 return rc;
|
|
480 }
|
|
481
|
50
|
482 for (cl = in; cl; cl = cl->next) {
|
184
|
483 (void) ngx_http_charset_recode(cl->buf, ctx->table);
|
50
|
484 }
|
|
485
|
|
486 return ngx_http_next_body_filter(r, in);
|
|
487 }
|
|
488
|
|
489
|
|
490 static ngx_uint_t
|
184
|
491 ngx_http_charset_recode(ngx_buf_t *b, u_char *table)
|
50
|
492 {
|
184
|
493 u_char *p;
|
50
|
494
|
|
495 for (p = b->pos; p < b->last; p++) {
|
184
|
496
|
|
497 if (*p == table[*p]) {
|
|
498 continue;
|
50
|
499 }
|
|
500
|
|
501 while (p < b->last) {
|
|
502 *p = table[*p];
|
|
503 p++;
|
|
504 }
|
|
505
|
|
506 b->in_file = 0;
|
184
|
507
|
|
508 return 1;
|
50
|
509 }
|
|
510
|
184
|
511 return 0;
|
50
|
512 }
|
|
513
|
|
514
|
206
|
515 static ngx_chain_t *
|
|
516 ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
|
|
517 ngx_http_charset_ctx_t *ctx)
|
|
518 {
|
|
519 size_t len, size;
|
|
520 u_char c, *p, *src, *dst, *saved, **table;
|
|
521 uint32_t n;
|
|
522 ngx_buf_t *b;
|
|
523 ngx_uint_t i;
|
|
524 ngx_chain_t *out, *cl, **ll;
|
|
525
|
|
526 src = buf->pos;
|
|
527
|
|
528 if (ctx->saved_len == 0) {
|
|
529
|
|
530 for ( /* void */ ; src < buf->last; src++) {
|
|
531
|
|
532 if (*src < 0x80) {
|
|
533 continue;
|
|
534 }
|
|
535
|
|
536 len = src - buf->pos;
|
|
537
|
|
538 if (len > 512) {
|
|
539 out = ngx_http_charset_get_buf(pool, ctx);
|
|
540 if (out == NULL) {
|
|
541 return NULL;
|
|
542 }
|
|
543
|
|
544 b = out->buf;
|
|
545
|
|
546 b->temporary = buf->temporary;
|
|
547 b->memory = buf->memory;
|
|
548 b->mmap = buf->mmap;
|
|
549 b->flush = buf->flush;
|
|
550
|
|
551 b->pos = buf->pos;
|
|
552 b->last = src;
|
|
553
|
|
554 out->buf = b;
|
|
555 out->next = NULL;
|
|
556
|
|
557 size = buf->last - src;
|
|
558
|
|
559 saved = src;
|
|
560 n = ngx_utf_decode(&saved, size);
|
|
561
|
|
562 if (n == 0xfffffffe) {
|
|
563 /* incomplete UTF-8 symbol */
|
|
564
|
|
565 ngx_memcpy(ctx->saved, src, size);
|
|
566 ctx->saved_len = size;
|
|
567
|
|
568 b->shadow = buf;
|
|
569
|
|
570 return out;
|
|
571 }
|
|
572
|
|
573 } else {
|
|
574 out = NULL;
|
|
575 size = len + buf->last - src;
|
|
576 src = buf->pos;
|
|
577 }
|
|
578
|
|
579 if (size < NGX_HTML_ENTITY_LEN) {
|
|
580 size += NGX_HTML_ENTITY_LEN;
|
|
581 }
|
|
582
|
|
583 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
584 if (cl == NULL) {
|
|
585 return NULL;
|
|
586 }
|
|
587
|
|
588 if (out) {
|
|
589 out->next = cl;
|
|
590
|
|
591 } else {
|
|
592 out = cl;
|
|
593 }
|
|
594
|
|
595 b = cl->buf;
|
|
596 dst = b->pos;
|
|
597
|
|
598 goto recode;
|
|
599 }
|
|
600
|
|
601 out = ngx_alloc_chain_link(pool);
|
|
602 if (out == NULL) {
|
|
603 return NULL;
|
|
604 }
|
|
605
|
|
606 out->buf = buf;
|
|
607 out->next = NULL;
|
|
608
|
|
609 return out;
|
|
610 }
|
|
611
|
|
612 /* process incomplete UTF sequence from previous buffer */
|
|
613
|
|
614 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
615 "http charset utf saved: %z", ctx->saved_len);
|
|
616
|
|
617 p = src;
|
|
618
|
|
619 for (i = ctx->saved_len; i < NGX_UTF_LEN; i++) {
|
|
620 ctx->saved[i] = *p++;
|
|
621
|
|
622 if (p == buf->last) {
|
|
623 break;
|
|
624 }
|
|
625 }
|
|
626
|
|
627 saved = ctx->saved;
|
|
628 n = ngx_utf_decode(&saved, i);
|
|
629
|
|
630 c = '\0';
|
|
631
|
|
632 if (n < 0x10000) {
|
|
633 table = (u_char **) ctx->table;
|
|
634 p = table[n >> 8];
|
|
635
|
|
636 if (p) {
|
|
637 c = p[n & 0xff];
|
|
638 }
|
|
639
|
|
640 } else if (n == 0xfffffffe) {
|
|
641
|
|
642 /* incomplete UTF-8 symbol */
|
|
643
|
|
644 if (i < NGX_UTF_LEN) {
|
|
645 out = ngx_http_charset_get_buf(pool, ctx);
|
|
646 if (out == NULL) {
|
|
647 return NULL;
|
|
648 }
|
|
649
|
|
650 b = out->buf;
|
|
651
|
|
652 b->pos = buf->pos;
|
|
653 b->last = buf->last;
|
|
654 b->sync = 1;
|
|
655 b->shadow = buf;
|
|
656
|
|
657 ngx_memcpy(&ctx->saved[ctx->saved_len], src, i);
|
|
658 ctx->saved_len += i;
|
|
659
|
|
660 return out;
|
|
661 }
|
|
662 }
|
|
663
|
|
664 size = buf->last - buf->pos;
|
|
665
|
|
666 if (size < NGX_HTML_ENTITY_LEN) {
|
|
667 size += NGX_HTML_ENTITY_LEN;
|
|
668 }
|
|
669
|
|
670 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
671 if (cl == NULL) {
|
|
672 return NULL;
|
|
673 }
|
|
674
|
|
675 out = cl;
|
|
676
|
|
677 b = cl->buf;
|
|
678 dst = b->pos;
|
|
679
|
|
680 if (c) {
|
|
681 *dst++ = c;
|
|
682
|
|
683 } else if (n == 0xfffffffe) {
|
|
684 *dst++ = '?';
|
|
685
|
|
686 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
687 "http charset invalid utf 0");
|
|
688
|
|
689 saved = &ctx->saved[NGX_UTF_LEN];
|
|
690
|
|
691 } else if (n > 0x10ffff) {
|
|
692 *dst++ = '?';
|
|
693
|
|
694 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
695 "http charset invalid utf 1");
|
|
696
|
|
697 } else {
|
|
698 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
699 }
|
|
700
|
|
701 src += (saved - ctx->saved) - ctx->saved_len;
|
|
702 ctx->saved_len = 0;
|
|
703
|
|
704 recode:
|
|
705
|
|
706 ll = &cl->next;
|
|
707
|
|
708 table = (u_char **) ctx->table;
|
|
709
|
|
710 while (src < buf->last) {
|
|
711
|
|
712 if ((size_t) (b->end - dst) < NGX_HTML_ENTITY_LEN) {
|
|
713 b->last = dst;
|
|
714
|
|
715 size = buf->last - src + NGX_HTML_ENTITY_LEN;
|
|
716
|
|
717 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
718 if (cl == NULL) {
|
|
719 return NULL;
|
|
720 }
|
|
721
|
|
722 *ll = cl;
|
|
723 ll = &cl->next;
|
|
724
|
|
725 b = cl->buf;
|
|
726 dst = b->pos;
|
|
727 }
|
|
728
|
|
729 if (*src < 0x80) {
|
|
730 *dst++ = *src++;
|
|
731 continue;
|
|
732 }
|
|
733
|
|
734 len = buf->last - src;
|
|
735
|
|
736 n = ngx_utf_decode(&src, len);
|
|
737
|
|
738 if (n < 0x10000) {
|
|
739
|
|
740 p = table[n >> 8];
|
|
741
|
|
742 if (p) {
|
|
743 c = p[n & 0xff];
|
|
744
|
|
745 if (c) {
|
|
746 *dst++ = c;
|
|
747 continue;
|
|
748 }
|
|
749 }
|
|
750
|
|
751 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
752
|
|
753 continue;
|
|
754 }
|
|
755
|
|
756 if (n == 0xfffffffe) {
|
|
757 /* incomplete UTF-8 symbol */
|
|
758
|
|
759 ngx_memcpy(ctx->saved, src, len);
|
|
760 ctx->saved_len = len;
|
|
761
|
|
762 if (b->pos == dst) {
|
|
763 b->sync = 1;
|
|
764 b->temporary = 0;
|
|
765 }
|
|
766
|
|
767 break;
|
|
768 }
|
|
769
|
|
770 if (n > 0x10ffff) {
|
|
771 *dst++ = '?';
|
|
772
|
|
773 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
|
|
774 "http charset invalid utf 2");
|
|
775
|
|
776 continue;
|
|
777 }
|
|
778
|
|
779 /* n > 0xffff */
|
|
780
|
|
781 dst = ngx_sprintf(dst, "&#%uD;", n);
|
|
782 }
|
|
783
|
|
784 b->last = dst;
|
|
785
|
|
786 b->last_buf = buf->last_buf;
|
|
787 b->last_in_chain = buf->last_in_chain;
|
|
788 b->flush = buf->flush;
|
|
789
|
|
790 b->shadow = buf;
|
|
791
|
|
792 return out;
|
|
793 }
|
|
794
|
|
795
|
|
796 static ngx_chain_t *
|
|
797 ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
|
|
798 ngx_http_charset_ctx_t *ctx)
|
|
799 {
|
|
800 size_t len, size;
|
|
801 u_char *p, *src, *dst, *table;
|
|
802 ngx_buf_t *b;
|
|
803 ngx_chain_t *out, *cl, **ll;
|
|
804
|
|
805 table = ctx->table;
|
|
806
|
|
807 for (src = buf->pos; src < buf->last; src++) {
|
|
808 if (table[*src * NGX_UTF_LEN] == '\1') {
|
|
809 continue;
|
|
810 }
|
|
811
|
|
812 goto recode;
|
|
813 }
|
|
814
|
|
815 out = ngx_alloc_chain_link(pool);
|
|
816 if (out == NULL) {
|
|
817 return NULL;
|
|
818 }
|
|
819
|
|
820 out->buf = buf;
|
|
821 out->next = NULL;
|
|
822
|
|
823 return out;
|
|
824
|
|
825 recode:
|
|
826
|
|
827 /*
|
|
828 * we assume that there are about half of characters to be recoded,
|
|
829 * so we preallocate "size / 2 + size / 2 * ctx->length"
|
|
830 */
|
|
831
|
|
832 len = src - buf->pos;
|
|
833
|
|
834 if (len > 512) {
|
|
835 out = ngx_http_charset_get_buf(pool, ctx);
|
|
836 if (out == NULL) {
|
|
837 return NULL;
|
|
838 }
|
|
839
|
|
840 b = out->buf;
|
|
841
|
|
842 b->temporary = buf->temporary;
|
|
843 b->memory = buf->memory;
|
|
844 b->mmap = buf->mmap;
|
|
845 b->flush = buf->flush;
|
|
846
|
|
847 b->pos = buf->pos;
|
|
848 b->last = src;
|
|
849
|
|
850 out->buf = b;
|
|
851 out->next = NULL;
|
|
852
|
|
853 size = buf->last - src;
|
|
854 size = size / 2 + size / 2 * ctx->length;
|
|
855
|
|
856 } else {
|
|
857 out = NULL;
|
|
858
|
|
859 size = buf->last - src;
|
|
860 size = len + size / 2 + size / 2 * ctx->length;
|
|
861
|
|
862 src = buf->pos;
|
|
863 }
|
|
864
|
|
865 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
866 if (cl == NULL) {
|
|
867 return NULL;
|
|
868 }
|
|
869
|
|
870 if (out) {
|
|
871 out->next = cl;
|
|
872
|
|
873 } else {
|
|
874 out = cl;
|
|
875 }
|
|
876
|
|
877 ll = &cl->next;
|
|
878
|
|
879 b = cl->buf;
|
|
880 dst = b->pos;
|
|
881
|
|
882 while (src < buf->last) {
|
|
883
|
|
884 p = &table[*src++ * NGX_UTF_LEN];
|
|
885 len = *p++;
|
|
886
|
|
887 if ((size_t) (b->end - dst) < len) {
|
|
888 b->last = dst;
|
|
889
|
|
890 size = buf->last - src;
|
|
891 size = len + size / 2 + size / 2 * ctx->length;
|
|
892
|
|
893 cl = ngx_http_charset_get_buffer(pool, ctx, size);
|
|
894 if (cl == NULL) {
|
|
895 return NULL;
|
|
896 }
|
|
897
|
|
898 *ll = cl;
|
|
899 ll = &cl->next;
|
|
900
|
|
901 b = cl->buf;
|
|
902 dst = b->pos;
|
|
903 }
|
|
904
|
|
905 while (len) {
|
|
906 *dst++ = *p++;
|
|
907 len--;
|
|
908 }
|
|
909 }
|
|
910
|
|
911 b->last = dst;
|
|
912
|
|
913 b->last_buf = buf->last_buf;
|
|
914 b->last_in_chain = buf->last_in_chain;
|
|
915 b->flush = buf->flush;
|
|
916
|
|
917 b->shadow = buf;
|
|
918
|
|
919 return out;
|
|
920 }
|
|
921
|
|
922
|
|
923 static ngx_chain_t *
|
|
924 ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx)
|
|
925 {
|
|
926 ngx_chain_t *cl;
|
|
927
|
|
928 cl = ctx->free_bufs;
|
|
929
|
|
930 if (cl) {
|
|
931 ctx->free_bufs = cl->next;
|
|
932
|
|
933 cl->buf->shadow = NULL;
|
|
934 cl->next = NULL;
|
|
935
|
|
936 return cl;
|
|
937 }
|
|
938
|
|
939 cl = ngx_alloc_chain_link(pool);
|
|
940 if (cl == NULL) {
|
|
941 return NULL;
|
|
942 }
|
|
943
|
|
944 cl->buf = ngx_calloc_buf(pool);
|
|
945 if (cl->buf == NULL) {
|
|
946 return NULL;
|
|
947 }
|
|
948
|
|
949 cl->next = NULL;
|
|
950
|
|
951 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
|
|
952
|
|
953 return cl;
|
|
954 }
|
|
955
|
|
956
|
|
957 static ngx_chain_t *
|
|
958 ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx,
|
|
959 size_t size)
|
|
960 {
|
|
961 ngx_buf_t *b;
|
|
962 ngx_chain_t *cl, **ll;
|
|
963
|
|
964 for (ll = &ctx->free_buffers, cl = ctx->free_buffers;
|
|
965 cl;
|
|
966 ll = &cl->next, cl = cl->next)
|
|
967 {
|
|
968 b = cl->buf;
|
|
969
|
|
970 if ((size_t) (b->end - b->start) >= size) {
|
|
971 *ll = cl->next;
|
|
972 cl->next = NULL;
|
|
973
|
|
974 b->pos = b->start;
|
|
975 b->temporary = 1;
|
|
976 b->shadow = NULL;
|
|
977
|
|
978 return cl;
|
|
979 }
|
|
980 }
|
|
981
|
|
982 cl = ngx_alloc_chain_link(pool);
|
|
983 if (cl == NULL) {
|
|
984 return NULL;
|
|
985 }
|
|
986
|
|
987 cl->buf = ngx_create_temp_buf(pool, size);
|
|
988 if (cl->buf == NULL) {
|
|
989 return NULL;
|
|
990 }
|
|
991
|
|
992 cl->next = NULL;
|
|
993
|
|
994 cl->buf->temporary = 1;
|
|
995 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
|
|
996
|
|
997 return cl;
|
|
998 }
|
|
999
|
|
1000
|
50
|
1001 static char *
|
206
|
1002 ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
|
50
|
1003 {
|
|
1004 ngx_http_charset_main_conf_t *mcf = conf;
|
|
1005
|
206
|
1006 char *rv;
|
|
1007 u_char *p, *dst2src, **pp;
|
|
1008 ngx_int_t src, dst;
|
|
1009 ngx_uint_t i, n;
|
|
1010 ngx_str_t *value;
|
|
1011 ngx_conf_t pvcf;
|
|
1012 ngx_http_charset_t *charset;
|
|
1013 ngx_http_charset_tables_t *table;
|
|
1014 ngx_http_charset_conf_ctx_t ctx;
|
50
|
1015
|
|
1016 value = cf->args->elts;
|
|
1017
|
|
1018 src = ngx_http_add_charset(&mcf->charsets, &value[1]);
|
|
1019 if (src == NGX_ERROR) {
|
|
1020 return NGX_CONF_ERROR;
|
|
1021 }
|
|
1022
|
|
1023 dst = ngx_http_add_charset(&mcf->charsets, &value[2]);
|
|
1024 if (dst == NGX_ERROR) {
|
|
1025 return NGX_CONF_ERROR;
|
|
1026 }
|
|
1027
|
|
1028 if (src == dst) {
|
|
1029 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1030 "\"charset_map\" between the same charsets "
|
|
1031 "\"%V\" and \"%V\"", &value[1], &value[2]);
|
|
1032 return NGX_CONF_ERROR;
|
|
1033 }
|
|
1034
|
|
1035 table = mcf->tables.elts;
|
|
1036 for (i = 0; i < mcf->tables.nelts; i++) {
|
|
1037 if ((src == table->src && dst == table->dst)
|
|
1038 || (src == table->dst && dst == table->src))
|
|
1039 {
|
|
1040 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1041 "duplicate \"charset_map\" between "
|
|
1042 "\"%V\" and \"%V\"", &value[1], &value[2]);
|
|
1043 return NGX_CONF_ERROR;
|
|
1044 }
|
|
1045 }
|
|
1046
|
|
1047 table = ngx_array_push(&mcf->tables);
|
|
1048 if (table == NULL) {
|
|
1049 return NGX_CONF_ERROR;
|
|
1050 }
|
|
1051
|
|
1052 table->src = src;
|
|
1053 table->dst = dst;
|
|
1054
|
206
|
1055 if (ngx_strcasecmp(value[2].data, "utf-8") == 0) {
|
|
1056 table->src2dst = ngx_pcalloc(cf->pool, 256 * NGX_UTF_LEN);
|
|
1057 if (table->src2dst == NULL) {
|
|
1058 return NGX_CONF_ERROR;
|
|
1059 }
|
|
1060
|
|
1061 table->dst2src = ngx_pcalloc(cf->pool, 256 * sizeof(void *));
|
|
1062 if (table->dst2src == NULL) {
|
|
1063 return NGX_CONF_ERROR;
|
|
1064 }
|
|
1065
|
|
1066 dst2src = ngx_pcalloc(cf->pool, 256);
|
|
1067 if (dst2src == NULL) {
|
|
1068 return NGX_CONF_ERROR;
|
|
1069 }
|
|
1070
|
|
1071 pp = (u_char **) &table->dst2src[0];
|
|
1072 pp[0] = dst2src;
|
|
1073
|
|
1074 for (i = 0; i < 128; i++) {
|
|
1075 p = &table->src2dst[i * NGX_UTF_LEN];
|
|
1076 p[0] = '\1';
|
|
1077 p[1] = (u_char) i;
|
|
1078 dst2src[i] = (u_char) i;
|
|
1079 }
|
50
|
1080
|
206
|
1081 for (/* void */; i < 256; i++) {
|
|
1082 p = &table->src2dst[i * NGX_UTF_LEN];
|
|
1083 p[0] = '\1';
|
|
1084 p[1] = '?';
|
|
1085 }
|
|
1086
|
|
1087 } else {
|
|
1088 table->src2dst = ngx_palloc(cf->pool, 256);
|
|
1089 if (table->src2dst == NULL) {
|
|
1090 return NGX_CONF_ERROR;
|
|
1091 }
|
|
1092
|
|
1093 table->dst2src = ngx_palloc(cf->pool, 256);
|
|
1094 if (table->dst2src == NULL) {
|
|
1095 return NGX_CONF_ERROR;
|
|
1096 }
|
|
1097
|
|
1098 for (i = 0; i < 128; i++) {
|
|
1099 table->src2dst[i] = (u_char) i;
|
|
1100 table->dst2src[i] = (u_char) i;
|
|
1101 }
|
|
1102
|
|
1103 for (/* void */; i < 256; i++) {
|
|
1104 table->src2dst[i] = '?';
|
|
1105 table->dst2src[i] = '?';
|
|
1106 }
|
50
|
1107 }
|
|
1108
|
206
|
1109 charset = mcf->charsets.elts;
|
50
|
1110
|
206
|
1111 ctx.table = table;
|
|
1112 ctx.charset = &charset[dst];
|
|
1113 ctx.characters = 0;
|
50
|
1114
|
|
1115 pvcf = *cf;
|
206
|
1116 cf->ctx = &ctx;
|
|
1117 cf->handler = ngx_http_charset_map;
|
50
|
1118 cf->handler_conf = conf;
|
|
1119
|
|
1120 rv = ngx_conf_parse(cf, NULL);
|
|
1121
|
|
1122 *cf = pvcf;
|
|
1123
|
206
|
1124 if (ctx.characters) {
|
|
1125 n = ctx.charset->length;
|
|
1126 ctx.charset->length /= ctx.characters;
|
|
1127
|
|
1128 if (((n * 10) / ctx.characters) % 10 > 4) {
|
|
1129 ctx.charset->length++;
|
|
1130 }
|
|
1131 }
|
|
1132
|
50
|
1133 return rv;
|
|
1134 }
|
|
1135
|
|
1136
|
|
1137 static char *
|
206
|
1138 ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
|
50
|
1139 {
|
206
|
1140 u_char *p, *dst2src, **pp;
|
|
1141 uint32_t n;
|
|
1142 ngx_int_t src, dst;
|
|
1143 ngx_str_t *value;
|
|
1144 ngx_uint_t i;
|
|
1145 ngx_http_charset_tables_t *table;
|
|
1146 ngx_http_charset_conf_ctx_t *ctx;
|
50
|
1147
|
|
1148 if (cf->args->nelts != 2) {
|
|
1149 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number");
|
|
1150 return NGX_CONF_ERROR;
|
|
1151 }
|
|
1152
|
|
1153 value = cf->args->elts;
|
|
1154
|
|
1155 src = ngx_hextoi(value[0].data, value[0].len);
|
|
1156 if (src == NGX_ERROR || src > 255) {
|
|
1157 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1158 "invalid value \"%V\"", &value[0]);
|
|
1159 return NGX_CONF_ERROR;
|
|
1160 }
|
|
1161
|
206
|
1162 ctx = cf->ctx;
|
|
1163 table = ctx->table;
|
|
1164
|
|
1165 if (ctx->charset->utf8) {
|
|
1166 p = &table->src2dst[src * NGX_UTF_LEN];
|
|
1167
|
|
1168 *p++ = (u_char) (value[1].len / 2);
|
|
1169
|
|
1170 for (i = 0; i < value[1].len; i += 2) {
|
|
1171 dst = ngx_hextoi(&value[1].data[i], 2);
|
|
1172 if (dst == NGX_ERROR || dst > 255) {
|
|
1173 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1174 "invalid value \"%V\"", &value[1]);
|
|
1175 return NGX_CONF_ERROR;
|
|
1176 }
|
|
1177
|
|
1178 *p++ = (u_char) dst;
|
|
1179 }
|
|
1180
|
|
1181 i /= 2;
|
|
1182
|
|
1183 ctx->charset->length += i;
|
|
1184 ctx->characters++;
|
|
1185
|
|
1186 p = &table->src2dst[src * NGX_UTF_LEN] + 1;
|
|
1187
|
|
1188 n = ngx_utf_decode(&p, i);
|
|
1189
|
|
1190 if (n > 0xffff) {
|
|
1191 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1192 "invalid value \"%V\"", &value[1]);
|
|
1193 return NGX_CONF_ERROR;
|
|
1194 }
|
|
1195
|
|
1196 pp = (u_char **) &table->dst2src[0];
|
|
1197
|
|
1198 dst2src = pp[n >> 8];
|
|
1199
|
|
1200 if (dst2src == NULL) {
|
|
1201 dst2src = ngx_pcalloc(cf->pool, 256);
|
|
1202 if (dst2src == NULL) {
|
|
1203 return NGX_CONF_ERROR;
|
|
1204 }
|
|
1205
|
|
1206 pp[n >> 8] = dst2src;
|
|
1207 }
|
|
1208
|
|
1209 dst2src[n & 0xff] = (u_char) src;
|
|
1210
|
|
1211 } else {
|
|
1212 dst = ngx_hextoi(value[1].data, value[1].len);
|
|
1213 if (dst == NGX_ERROR || dst > 255) {
|
|
1214 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
|
|
1215 "invalid value \"%V\"", &value[1]);
|
|
1216 return NGX_CONF_ERROR;
|
|
1217 }
|
|
1218
|
|
1219 table->src2dst[src] = (u_char) dst;
|
|
1220 table->dst2src[dst] = (u_char) src;
|
50
|
1221 }
|
|
1222
|
|
1223 return NGX_CONF_OK;
|
|
1224 }
|
|
1225
|
|
1226
|
|
1227 static char *
|
|
1228 ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
|
|
1229 {
|
|
1230 char *p = conf;
|
|
1231
|
|
1232 ngx_int_t *cp;
|
|
1233 ngx_str_t *value;
|
|
1234 ngx_http_charset_main_conf_t *mcf;
|
|
1235
|
|
1236 cp = (ngx_int_t *) (p + cmd->offset);
|
|
1237
|
|
1238 if (*cp != NGX_CONF_UNSET) {
|
|
1239 return "is duplicate";
|
|
1240 }
|
|
1241
|
78
|
1242 value = cf->args->elts;
|
|
1243
|
|
1244 if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
|
|
1245 && ngx_strcmp(value[1].data, "off") == 0)
|
|
1246 {
|
|
1247 *cp = NGX_HTTP_NO_CHARSET;
|
|
1248 return NGX_CONF_OK;
|
|
1249 }
|
|
1250
|
50
|
1251 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1252 ngx_http_charset_filter_module);
|
|
1253
|
|
1254 *cp = ngx_http_add_charset(&mcf->charsets, &value[1]);
|
|
1255 if (*cp == NGX_ERROR) {
|
|
1256 return NGX_CONF_ERROR;
|
|
1257 }
|
|
1258
|
|
1259 return NGX_CONF_OK;
|
|
1260 }
|
|
1261
|
|
1262
|
|
1263 static ngx_int_t
|
|
1264 ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name)
|
|
1265 {
|
|
1266 ngx_uint_t i;
|
|
1267 ngx_http_charset_t *c;
|
|
1268
|
|
1269 c = charsets->elts;
|
|
1270 for (i = 0; i < charsets->nelts; i++) {
|
|
1271 if (name->len != c[i].name.len) {
|
|
1272 continue;
|
|
1273 }
|
|
1274
|
|
1275 if (ngx_strcasecmp(name->data, c[i].name.data) == 0) {
|
|
1276 break;
|
|
1277 }
|
|
1278 }
|
|
1279
|
|
1280 if (i < charsets->nelts) {
|
|
1281 return i;
|
|
1282 }
|
|
1283
|
|
1284 c = ngx_array_push(charsets);
|
|
1285 if (c == NULL) {
|
|
1286 return NGX_ERROR;
|
|
1287 }
|
|
1288
|
|
1289 c->tables = NULL;
|
|
1290 c->name = *name;
|
206
|
1291 c->length = 0;
|
50
|
1292
|
72
|
1293 if (ngx_strcasecmp(name->data, "utf-8") == 0) {
|
|
1294 c->utf8 = 1;
|
|
1295 }
|
|
1296
|
50
|
1297 return i;
|
|
1298 }
|
|
1299
|
|
1300
|
|
1301 static ngx_int_t
|
|
1302 ngx_http_charset_filter_init(ngx_cycle_t *cycle)
|
|
1303 {
|
|
1304 ngx_http_next_header_filter = ngx_http_top_header_filter;
|
|
1305 ngx_http_top_header_filter = ngx_http_charset_header_filter;
|
|
1306
|
|
1307 ngx_http_next_body_filter = ngx_http_top_body_filter;
|
|
1308 ngx_http_top_body_filter = ngx_http_charset_body_filter;
|
|
1309
|
|
1310 return NGX_OK;
|
|
1311 }
|
|
1312
|
|
1313
|
|
1314 static void *
|
|
1315 ngx_http_charset_create_main_conf(ngx_conf_t *cf)
|
|
1316 {
|
|
1317 ngx_http_charset_main_conf_t *mcf;
|
|
1318
|
|
1319 mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t));
|
|
1320 if (mcf == NULL) {
|
|
1321 return NGX_CONF_ERROR;
|
|
1322 }
|
|
1323
|
|
1324 if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t))
|
78
|
1325 == NGX_ERROR)
|
50
|
1326 {
|
|
1327 return NGX_CONF_ERROR;
|
|
1328 }
|
|
1329
|
78
|
1330 if (ngx_array_init(&mcf->tables, cf->pool, 1,
|
50
|
1331 sizeof(ngx_http_charset_tables_t)) == NGX_ERROR)
|
|
1332 {
|
|
1333 return NGX_CONF_ERROR;
|
|
1334 }
|
|
1335
|
78
|
1336 if (ngx_array_init(&mcf->recodes, cf->pool, 2,
|
|
1337 sizeof(ngx_http_charset_recode_t)) == NGX_ERROR)
|
|
1338 {
|
|
1339 return NGX_CONF_ERROR;
|
|
1340 }
|
|
1341
|
50
|
1342 return mcf;
|
|
1343 }
|
|
1344
|
|
1345
|
|
1346 static void *
|
|
1347 ngx_http_charset_create_loc_conf(ngx_conf_t *cf)
|
|
1348 {
|
|
1349 ngx_http_charset_loc_conf_t *lcf;
|
|
1350
|
|
1351 lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t));
|
|
1352 if (lcf == NULL) {
|
|
1353 return NGX_CONF_ERROR;
|
|
1354 }
|
|
1355
|
78
|
1356 lcf->charset = NGX_CONF_UNSET;
|
50
|
1357 lcf->source_charset = NGX_CONF_UNSET;
|
184
|
1358 lcf->override_charset = NGX_CONF_UNSET;
|
50
|
1359
|
|
1360 return lcf;
|
|
1361 }
|
|
1362
|
|
1363
|
|
1364 static char *
|
|
1365 ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
|
|
1366 {
|
|
1367 ngx_http_charset_loc_conf_t *prev = parent;
|
|
1368 ngx_http_charset_loc_conf_t *conf = child;
|
|
1369
|
78
|
1370 ngx_uint_t i;
|
|
1371 ngx_http_charset_recode_t *recode;
|
|
1372 ngx_http_charset_main_conf_t *mcf;
|
50
|
1373
|
184
|
1374 ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
|
78
|
1375 ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_NO_CHARSET);
|
50
|
1376
|
|
1377 if (conf->source_charset == NGX_CONF_UNSET) {
|
|
1378 conf->source_charset = prev->source_charset;
|
|
1379 }
|
|
1380
|
78
|
1381 if (conf->charset == NGX_HTTP_NO_CHARSET
|
|
1382 || conf->source_charset == NGX_CONF_UNSET
|
|
1383 || conf->charset == conf->source_charset)
|
50
|
1384 {
|
78
|
1385 return NGX_CONF_OK;
|
50
|
1386 }
|
|
1387
|
78
|
1388 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1389 ngx_http_charset_filter_module);
|
|
1390 recode = mcf->recodes.elts;
|
|
1391 for (i = 0; i < mcf->recodes.nelts; i++) {
|
|
1392 if (conf->source_charset == recode[i].src
|
|
1393 && conf->charset == recode[i].dst)
|
|
1394 {
|
|
1395 return NGX_CONF_OK;
|
|
1396 }
|
50
|
1397 }
|
|
1398
|
78
|
1399 recode = ngx_array_push(&mcf->recodes);
|
|
1400 if (recode == NULL) {
|
50
|
1401 return NGX_CONF_ERROR;
|
|
1402 }
|
|
1403
|
78
|
1404 recode->src = conf->source_charset;
|
|
1405 recode->dst = conf->charset;
|
|
1406
|
50
|
1407 return NGX_CONF_OK;
|
|
1408 }
|
78
|
1409
|
|
1410
|
|
1411 static ngx_int_t
|
|
1412 ngx_http_charset_postconfiguration(ngx_conf_t *cf)
|
|
1413 {
|
184
|
1414 u_char **src, **dst;
|
78
|
1415 ngx_int_t c;
|
|
1416 ngx_uint_t i, t;
|
|
1417 ngx_http_charset_t *charset;
|
|
1418 ngx_http_charset_recode_t *recode;
|
|
1419 ngx_http_charset_tables_t *tables;
|
|
1420 ngx_http_charset_main_conf_t *mcf;
|
|
1421
|
|
1422 mcf = ngx_http_conf_get_module_main_conf(cf,
|
|
1423 ngx_http_charset_filter_module);
|
|
1424
|
|
1425 recode = mcf->recodes.elts;
|
|
1426 tables = mcf->tables.elts;
|
|
1427 charset = mcf->charsets.elts;
|
|
1428
|
|
1429 for (i = 0; i < mcf->recodes.nelts; i++) {
|
|
1430
|
|
1431 c = recode[i].src;
|
|
1432
|
|
1433 for (t = 0; t < mcf->tables.nelts; t++) {
|
|
1434
|
|
1435 if (c == tables[t].src && recode[i].dst == tables[t].dst) {
|
|
1436 goto next;
|
|
1437 }
|
|
1438
|
|
1439 if (c == tables[t].dst && recode[i].dst == tables[t].src) {
|
|
1440 goto next;
|
|
1441 }
|
|
1442 }
|
|
1443
|
|
1444 ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
|
|
1445 " no \"charset_map\" between the charsets "
|
|
1446 "\"%V\" and \"%V\"",
|
|
1447 &charset[c].name, &charset[recode[i].dst].name);
|
|
1448 return NGX_ERROR;
|
|
1449
|
|
1450 next:
|
|
1451 continue;
|
|
1452 }
|
|
1453
|
184
|
1454
|
|
1455 for (t = 0; t < mcf->tables.nelts; t++) {
|
|
1456
|
|
1457 src = charset[tables[t].src].tables;
|
|
1458
|
|
1459 if (src == NULL) {
|
|
1460 src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
|
|
1461 if (src == NULL) {
|
|
1462 return NGX_ERROR;
|
|
1463 }
|
|
1464
|
|
1465 charset[tables[t].src].tables = src;
|
|
1466 }
|
|
1467
|
|
1468 dst = charset[tables[t].dst].tables;
|
|
1469
|
|
1470 if (dst == NULL) {
|
|
1471 dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
|
|
1472 if (dst == NULL) {
|
|
1473 return NGX_ERROR;
|
|
1474 }
|
|
1475
|
|
1476 charset[tables[t].dst].tables = dst;
|
|
1477 }
|
|
1478
|
|
1479 src[tables[t].dst] = tables[t].src2dst;
|
|
1480 dst[tables[t].src] = tables[t].dst2src;
|
|
1481 }
|
|
1482
|
78
|
1483 return NGX_OK;
|
|
1484 }
|