comparison src/http/modules/ngx_http_charset_filter_module.c @ 2889:0bb8c54f4c45

refactor ngx_http_charset_header_filter()
author Igor Sysoev <igor@sysoev.ru>
date Mon, 25 May 2009 15:57:43 +0000
parents 3cd2790f4a9b
children a4302032b831
comparison
equal deleted inserted replaced
2888:512d164a8348 2889:0bb8c54f4c45
7 #include <ngx_config.h> 7 #include <ngx_config.h>
8 #include <ngx_core.h> 8 #include <ngx_core.h>
9 #include <ngx_http.h> 9 #include <ngx_http.h>
10 10
11 11
12 #define NGX_HTTP_NO_CHARSET -2 12 #define NGX_HTTP_CHARSET_OFF -2
13 #define NGX_HTTP_CHARSET_VAR 0x10000 13 #define NGX_HTTP_NO_CHARSET -3
14 #define NGX_HTTP_CHARSET_VAR 0x10000
14 15
15 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */ 16 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */
16 #define NGX_UTF_LEN 4 17 #define NGX_UTF_LEN 4
17 18
18 #define NGX_HTML_ENTITY_LEN (sizeof("&#1114111;") - 1) 19 #define NGX_HTML_ENTITY_LEN (sizeof("&#1114111;") - 1)
59 60
60 61
61 typedef struct { 62 typedef struct {
62 u_char *table; 63 u_char *table;
63 ngx_int_t charset; 64 ngx_int_t charset;
65 ngx_str_t charset_name;
64 66
65 ngx_chain_t *busy; 67 ngx_chain_t *busy;
66 ngx_chain_t *free_bufs; 68 ngx_chain_t *free_bufs;
67 ngx_chain_t *free_buffers; 69 ngx_chain_t *free_buffers;
68 70
80 ngx_http_charset_t *charset; 82 ngx_http_charset_t *charset;
81 ngx_uint_t characters; 83 ngx_uint_t characters;
82 } ngx_http_charset_conf_ctx_t; 84 } ngx_http_charset_conf_ctx_t;
83 85
84 86
85 static ngx_int_t ngx_http_charset_get_charset(ngx_http_charset_t *charsets, 87 static ngx_int_t ngx_http_destination_charset(ngx_http_request_t *r,
86 ngx_uint_t n, ngx_str_t *charset); 88 ngx_str_t *name);
87 static ngx_int_t ngx_http_charset_set_charset(ngx_http_request_t *r, 89 static ngx_int_t ngx_http_main_request_charset(ngx_http_request_t *r,
90 ngx_str_t *name);
91 static ngx_int_t ngx_http_source_charset(ngx_http_request_t *r,
92 ngx_str_t *name);
93 static ngx_int_t ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name);
94 static ngx_inline void ngx_http_set_charset(ngx_http_request_t *r,
95 ngx_str_t *charset);
96 static ngx_int_t ngx_http_charset_ctx(ngx_http_request_t *r,
88 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset); 97 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset);
89 static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table); 98 static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);
90 static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, 99 static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool,
91 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx); 100 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
92 static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, 101 static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool,
205 214
206 static ngx_int_t 215 static ngx_int_t
207 ngx_http_charset_header_filter(ngx_http_request_t *r) 216 ngx_http_charset_header_filter(ngx_http_request_t *r)
208 { 217 {
209 ngx_int_t charset, source_charset; 218 ngx_int_t charset, source_charset;
210 ngx_str_t *mc, *from, *to, s; 219 ngx_str_t dst, src;
211 ngx_uint_t n;
212 ngx_http_charset_t *charsets; 220 ngx_http_charset_t *charsets;
213 ngx_http_charset_ctx_t *ctx;
214 ngx_http_variable_value_t *vv;
215 ngx_http_charset_loc_conf_t *lcf, *mlcf;
216 ngx_http_charset_main_conf_t *mcf; 221 ngx_http_charset_main_conf_t *mcf;
217 222
218 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
219
220 charsets = mcf->charsets.elts;
221 n = mcf->charsets.nelts;
222
223 /* destination charset */
224
225 if (r == r->main) { 223 if (r == r->main) {
226 224 charset = ngx_http_destination_charset(r, &dst);
227 if (!r->ignore_content_encoding
228 && r->headers_out.content_encoding
229 && r->headers_out.content_encoding->value.len)
230 {
231 return ngx_http_next_header_filter(r);
232 }
233
234 if (r->headers_out.content_type.len == 0) {
235 return ngx_http_next_header_filter(r);
236 }
237
238 if (r->headers_out.override_charset
239 && r->headers_out.override_charset->len)
240 {
241 charset = ngx_http_charset_get_charset(charsets, n,
242 r->headers_out.override_charset);
243
244 if (charset == NGX_HTTP_NO_CHARSET) {
245 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
246 "unknown charset \"%V\" to override",
247 r->headers_out.override_charset);
248
249 return ngx_http_next_header_filter(r);
250 }
251
252 } else {
253 mlcf = ngx_http_get_module_loc_conf(r,
254 ngx_http_charset_filter_module);
255 charset = mlcf->charset;
256
257 if (charset == NGX_HTTP_NO_CHARSET) {
258 return ngx_http_next_header_filter(r);
259 }
260
261 if (r->headers_out.charset.len) {
262 if (mlcf->override_charset == 0) {
263 return ngx_http_next_header_filter(r);
264 }
265
266 } else {
267 if (ngx_http_test_content_type(r, &mlcf->types) == NULL) {
268 return ngx_http_next_header_filter(r);
269 }
270 }
271
272 if (charset >= NGX_HTTP_CHARSET_VAR) {
273 vv = ngx_http_get_indexed_variable(r,
274 charset - NGX_HTTP_CHARSET_VAR);
275
276 if (vv == NULL || vv->not_found) {
277 return NGX_ERROR;
278 }
279
280 s.len = vv->len;
281 s.data = vv->data;
282
283 charset = ngx_http_charset_get_charset(charsets, n, &s);
284 }
285 }
286 225
287 } else { 226 } else {
288 ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module); 227 charset = ngx_http_main_request_charset(r, &dst);
289 228 }
290 if (ctx == NULL) { 229
291 230 if (charset == NGX_ERROR) {
292 mc = &r->main->headers_out.charset; 231 return NGX_ERROR;
293 232 }
294 if (mc->len == 0) { 233
295 return ngx_http_next_header_filter(r); 234 if (charset == NGX_DECLINED) {
296 } 235 return ngx_http_next_header_filter(r);
297 236 }
298 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t)); 237
299 if (ctx == NULL) { 238 /* charset: charset index or NGX_HTTP_NO_CHARSET */
300 return NGX_ERROR; 239
301 } 240 source_charset = ngx_http_source_charset(r, &src);
302 241
303 ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module); 242 if (source_charset == NGX_ERROR) {
304 243 return NGX_ERROR;
305 charset = ngx_http_charset_get_charset(charsets, n, mc); 244 }
306 245
307 ctx->charset = charset; 246 /*
308 247 * source_charset: charset index, NGX_HTTP_NO_CHARSET,
309 } else { 248 * or NGX_HTTP_CHARSET_OFF
310 charset = ctx->charset; 249 */
311 } 250
312 } 251 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
313 252 "charset: \"%V\" > \"%V\"", &src, &dst);
314 /* source charset */ 253
315 254 if (source_charset == NGX_HTTP_CHARSET_OFF) {
316 if (r->headers_out.charset.len == 0) { 255
317 lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module); 256 if (r == r->main) {
318 257 ngx_http_set_charset(r, &dst);
319 source_charset = lcf->source_charset; 258 }
320 259
321 if (source_charset >= NGX_HTTP_CHARSET_VAR) { 260 return ngx_http_next_header_filter(r);
322 vv = ngx_http_get_indexed_variable(r, 261 }
323 source_charset - NGX_HTTP_CHARSET_VAR);
324
325 if (vv == NULL || vv->not_found) {
326 return NGX_ERROR;
327 }
328
329 s.len = vv->len;
330 s.data = vv->data;
331
332 source_charset = ngx_http_charset_get_charset(charsets, n, &s);
333 }
334
335 if (charset != NGX_HTTP_NO_CHARSET) {
336 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset,
337 source_charset);
338 }
339
340 if (source_charset == NGX_CONF_UNSET) {
341 return ngx_http_next_header_filter(r);
342 }
343
344 from = &charsets[source_charset].name;
345 to = &r->main->headers_out.charset;
346
347 goto no_charset_map;
348 }
349
350 source_charset = ngx_http_charset_get_charset(charsets, n,
351 &r->headers_out.charset);
352 262
353 if (charset == NGX_HTTP_NO_CHARSET 263 if (charset == NGX_HTTP_NO_CHARSET
354 || source_charset == NGX_HTTP_NO_CHARSET) 264 || source_charset == NGX_HTTP_NO_CHARSET)
355 { 265 {
356 if (charset != source_charset 266 if (source_charset != charset
357 || ngx_strcasecmp(r->main->headers_out.charset.data, 267 || ngx_strncasecmp(dst.data, src.data, dst.len) != 0)
358 r->headers_out.charset.data)
359 != 0)
360 { 268 {
361 from = &r->headers_out.charset;
362 to = (charset == NGX_HTTP_NO_CHARSET) ?
363 &r->main->headers_out.charset:
364 &charsets[charset].name;
365
366 goto no_charset_map; 269 goto no_charset_map;
367 } 270 }
368 271
272 ngx_http_set_charset(r, &dst);
273
369 return ngx_http_next_header_filter(r); 274 return ngx_http_next_header_filter(r);
370 } 275 }
276
277 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
278 charsets = mcf->charsets.elts;
371 279
372 if (source_charset != charset 280 if (source_charset != charset
373 && (charsets[source_charset].tables == NULL 281 && (charsets[source_charset].tables == NULL
374 || charsets[source_charset].tables[charset] == NULL)) 282 || charsets[source_charset].tables[charset] == NULL))
375 { 283 {
376 from = &charsets[source_charset].name;
377 to = &charsets[charset].name;
378
379 goto no_charset_map; 284 goto no_charset_map;
380 } 285 }
381 286
382 r->headers_out.content_type.len = r->headers_out.content_type_len; 287 r->headers_out.content_type.len = r->headers_out.content_type_len;
383 288
384 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset, 289 ngx_http_set_charset(r, &dst);
385 source_charset); 290
291 if (source_charset == charset) {
292 return ngx_http_charset_ctx(r, charsets, charset, source_charset);
293 }
294
295 return ngx_http_next_header_filter(r);
386 296
387 no_charset_map: 297 no_charset_map:
388 298
389 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0, 299 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
390 "no \"charset_map\" between the charsets \"%V\" and \"%V\"", 300 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
391 from, to); 301 &src, &dst);
392 302
393 return ngx_http_next_header_filter(r); 303 return ngx_http_next_header_filter(r);
394 } 304 }
395 305
396 306
397 static ngx_int_t 307 static ngx_int_t
398 ngx_http_charset_get_charset(ngx_http_charset_t *charsets, ngx_uint_t n, 308 ngx_http_destination_charset(ngx_http_request_t *r, ngx_str_t *name)
399 ngx_str_t *charset) 309 {
400 { 310 ngx_int_t charset;
401 ngx_uint_t i; 311 ngx_http_charset_t *charsets;
312 ngx_http_variable_value_t *vv;
313 ngx_http_charset_loc_conf_t *mlcf;
314 ngx_http_charset_main_conf_t *mcf;
315
316 if (!r->ignore_content_encoding
317 && r->headers_out.content_encoding
318 && r->headers_out.content_encoding->value.len)
319 {
320 return NGX_DECLINED;
321 }
322
323 if (r->headers_out.content_type.len == 0) {
324 return NGX_DECLINED;
325 }
326
327 if (r->headers_out.override_charset
328 && r->headers_out.override_charset->len)
329 {
330 *name = *r->headers_out.override_charset;
331
332 charset = ngx_http_get_charset(r, name);
333
334 if (charset != NGX_HTTP_NO_CHARSET) {
335 return charset;
336 }
337
338 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
339 "unknown charset \"%V\" to override", name);
340
341 return NGX_DECLINED;
342 }
343
344 mlcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
345 charset = mlcf->charset;
346
347 if (charset == NGX_HTTP_CHARSET_OFF) {
348 return NGX_DECLINED;
349 }
350
351 if (r->headers_out.charset.len) {
352 if (mlcf->override_charset == 0) {
353 return NGX_DECLINED;
354 }
355
356 } else {
357 if (ngx_http_test_content_type(r, &mlcf->types) == NULL) {
358 return NGX_DECLINED;
359 }
360 }
361
362 if (charset < NGX_HTTP_CHARSET_VAR) {
363 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
364 charsets = mcf->charsets.elts;
365 *name = charsets[charset].name;
366 return charset;
367 }
368
369 vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR);
370
371 if (vv == NULL || vv->not_found) {
372 return NGX_ERROR;
373 }
374
375 name->len = vv->len;
376 name->data = vv->data;
377
378 return ngx_http_get_charset(r, name);
379 }
380
381
382 static ngx_int_t
383 ngx_http_main_request_charset(ngx_http_request_t *r, ngx_str_t *src)
384 {
385 ngx_int_t charset;
386 ngx_str_t *main_charset;
387 ngx_http_charset_ctx_t *ctx;
388
389 ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);
390
391 if (ctx) {
392 *src = ctx->charset_name;
393 return ctx->charset;
394 }
395
396 main_charset = &r->main->headers_out.charset;
397
398 if (main_charset->len == 0) {
399 return NGX_DECLINED;
400 }
401
402 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
403 if (ctx == NULL) {
404 return NGX_ERROR;
405 }
406
407 ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module);
408
409 charset = ngx_http_get_charset(r, main_charset);
410
411 ctx->charset = charset;
412 ctx->charset_name = *main_charset;
413
414 return charset;
415 }
416
417
418 static ngx_int_t
419 ngx_http_source_charset(ngx_http_request_t *r, ngx_str_t *name)
420 {
421 ngx_int_t charset;
422 ngx_http_charset_t *charsets;
423 ngx_http_variable_value_t *vv;
424 ngx_http_charset_loc_conf_t *lcf;
425 ngx_http_charset_main_conf_t *mcf;
426
427 if (r->headers_out.charset.len) {
428 *name = r->headers_out.charset;
429 return ngx_http_get_charset(r, name);
430 }
431
432 lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
433
434 charset = lcf->source_charset;
435
436 if (charset == NGX_HTTP_CHARSET_OFF) {
437 name->len = 0;
438 return charset;
439 }
440
441 if (charset < NGX_HTTP_CHARSET_VAR) {
442 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
443 charsets = mcf->charsets.elts;
444 *name = charsets[charset].name;
445 return charset;
446 }
447
448 vv = ngx_http_get_indexed_variable(r, charset - NGX_HTTP_CHARSET_VAR);
449
450 if (vv == NULL || vv->not_found) {
451 return NGX_ERROR;
452 }
453
454 name->len = vv->len;
455 name->data = vv->data;
456
457 return ngx_http_get_charset(r, name);
458 }
459
460
461 static ngx_int_t
462 ngx_http_get_charset(ngx_http_request_t *r, ngx_str_t *name)
463 {
464 ngx_uint_t i, n;
465 ngx_http_charset_t *charset;
466 ngx_http_charset_main_conf_t *mcf;
467
468 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
469
470 charset = mcf->charsets.elts;
471 n = mcf->charsets.nelts;
402 472
403 for (i = 0; i < n; i++) { 473 for (i = 0; i < n; i++) {
404 if (charsets[i].name.len != charset->len) { 474 if (charset[i].name.len != name->len) {
405 continue; 475 continue;
406 } 476 }
407 477
408 if (ngx_strncasecmp(charsets[i].name.data, charset->data, charset->len) 478 if (ngx_strncasecmp(charset[i].name.data, name->data, name->len) == 0) {
409 == 0)
410 {
411 return i; 479 return i;
412 } 480 }
413 } 481 }
414 482
415 return NGX_HTTP_NO_CHARSET; 483 return NGX_HTTP_NO_CHARSET;
416 } 484 }
417 485
418 486
419 static ngx_int_t 487 static ngx_inline void
420 ngx_http_charset_set_charset(ngx_http_request_t *r, 488 ngx_http_set_charset(ngx_http_request_t *r, ngx_str_t *charset)
421 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset) 489 {
422 {
423 ngx_http_charset_ctx_t *ctx;
424
425 if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY 490 if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
426 || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY) 491 || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
427 { 492 {
428 /* 493 /*
429 * do not set charset for the redirect because NN 4.x 494 * do not set charset for the redirect because NN 4.x
430 * use this charset instead of the next page charset 495 * use this charset instead of the next page charset
431 */ 496 */
432 497
433 r->headers_out.charset.len = 0; 498 r->headers_out.charset.len = 0;
434 499 return;
435 return ngx_http_next_header_filter(r); 500 }
436 } 501
437 502 r->headers_out.charset = *charset;
438 r->headers_out.charset = charsets[charset].name; 503 }
439 r->utf8 = charsets[charset].utf8; 504
440 505
441 if (source_charset == NGX_CONF_UNSET || source_charset == charset) { 506 static ngx_int_t
442 return ngx_http_next_header_filter(r); 507 ngx_http_charset_ctx(ngx_http_request_t *r, ngx_http_charset_t *charsets,
443 } 508 ngx_int_t charset, ngx_int_t source_charset)
509 {
510 ngx_http_charset_ctx_t *ctx;
444 511
445 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t)); 512 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
446 if (ctx == NULL) { 513 if (ctx == NULL) {
447 return NGX_ERROR; 514 return NGX_ERROR;
448 } 515 }
1336 value = cf->args->elts; 1403 value = cf->args->elts;
1337 1404
1338 if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset) 1405 if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
1339 && ngx_strcmp(value[1].data, "off") == 0) 1406 && ngx_strcmp(value[1].data, "off") == 0)
1340 { 1407 {
1341 *cp = NGX_HTTP_NO_CHARSET; 1408 *cp = NGX_HTTP_CHARSET_OFF;
1342 return NGX_CONF_OK; 1409 return NGX_CONF_OK;
1343 } 1410 }
1344 1411
1345 1412
1346 if (value[1].data[0] == '$') { 1413 if (value[1].data[0] == '$') {
1487 { 1554 {
1488 return NGX_CONF_ERROR; 1555 return NGX_CONF_ERROR;
1489 } 1556 }
1490 1557
1491 ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0); 1558 ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
1492 ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_NO_CHARSET); 1559 ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_CHARSET_OFF);
1493 1560 ngx_conf_merge_value(conf->source_charset, prev->source_charset,
1494 if (conf->source_charset == NGX_CONF_UNSET) { 1561 NGX_HTTP_CHARSET_OFF);
1495 conf->source_charset = prev->source_charset; 1562
1496 } 1563 if (conf->charset == NGX_HTTP_CHARSET_OFF
1497 1564 || conf->source_charset == NGX_HTTP_CHARSET_OFF
1498 if (conf->charset == NGX_HTTP_NO_CHARSET
1499 || conf->source_charset == NGX_CONF_UNSET
1500 || conf->charset == conf->source_charset) 1565 || conf->charset == conf->source_charset)
1501 { 1566 {
1502 return NGX_CONF_OK; 1567 return NGX_CONF_OK;
1503 } 1568 }
1504 1569