# HG changeset patch # User Igor Sysoev # Date 1232982595 0 # Node ID d7d3a72c07d872c4d8a43eb085d8912615c2304e # Parent 92488f6e80dba1ac15972eb988ab2812b8941b87 r2121, r2126 merge: *) fix utf-8 names in autoindex *) rename ngx_utf_...() to ngx_utf8_...() diff --git a/src/core/ngx_string.c b/src/core/ngx_string.c --- a/src/core/ngx_string.c +++ b/src/core/ngx_string.c @@ -952,16 +952,16 @@ ngx_decode_base64(ngx_str_t *dst, ngx_st /* - * ngx_utf_decode() decodes two and more bytes UTF sequences only + * ngx_utf8_decode() decodes two and more bytes UTF sequences only * the return values: * 0x80 - 0x10ffff valid character - * 0x10ffff - 0xfffffffd invalid sequence + * 0x110000 - 0xfffffffd invalid sequence * 0xfffffffe incomplete sequence * 0xffffffff error */ uint32_t -ngx_utf_decode(u_char **p, size_t n) +ngx_utf8_decode(u_char **p, size_t n) { size_t len; uint32_t u, i, valid; @@ -1018,31 +1018,26 @@ ngx_utf_decode(u_char **p, size_t n) size_t -ngx_utf_length(u_char *p, size_t n) +ngx_utf8_length(u_char *p, size_t n) { - u_char c; - size_t len; - ngx_uint_t i; + u_char c, *last; + size_t len; - for (len = 0, i = 0; i < n; len++, i++) { + last = p + n; - c = p[i]; + for (len = 0; p < last; len++) { + + c = *p; if (c < 0x80) { + p++; continue; } - if (c >= 0xc0) { - for (c <<= 1; c & 0x80; c <<= 1) { - i++; - } - - continue; + if (ngx_utf8_decode(&p, n) > 0x10ffff) { + /* invalid UTF-8 */ + return n; } - - /* invalid utf */ - - return n; } return len; @@ -1050,36 +1045,45 @@ ngx_utf_length(u_char *p, size_t n) u_char * -ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n) +ngx_utf8_cpystrn(u_char *dst, u_char *src, size_t n, size_t len) { - u_char c; + u_char c, *next; if (n == 0) { return dst; } - for ( /* void */ ; --n; dst++, src++) { + while (--n) { c = *src; *dst = c; if (c < 0x80) { - if (*dst != '\0') { + + if (c != '\0') { + dst++; + src++; + len--; + continue; } return dst; } - if (c >= 0xc0) { - for (c <<= 1; c & 0x80; c <<= 1) { - *++dst = *++src; - } + next = src; - continue; + if (ngx_utf8_decode(&next, len) > 0x10ffff) { + /* invalid UTF-8 */ + break; } - /* invalid utf */ + len--; + + while (src < next) { + *++dst = *++src; + len--; + } } *dst = '\0'; diff --git a/src/core/ngx_string.h b/src/core/ngx_string.h --- a/src/core/ngx_string.h +++ b/src/core/ngx_string.h @@ -151,9 +151,9 @@ u_char *ngx_hex_dump(u_char *dst, u_char void ngx_encode_base64(ngx_str_t *dst, ngx_str_t *src); ngx_int_t ngx_decode_base64(ngx_str_t *dst, ngx_str_t *src); -uint32_t ngx_utf_decode(u_char **p, size_t n); -size_t ngx_utf_length(u_char *p, size_t n); -u_char *ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n); +uint32_t ngx_utf8_decode(u_char **p, size_t n); +size_t ngx_utf8_length(u_char *p, size_t n); +u_char *ngx_utf8_cpystrn(u_char *dst, u_char *src, size_t n, size_t len); #define NGX_ESCAPE_URI 0 diff --git a/src/http/modules/ngx_http_autoindex_module.c b/src/http/modules/ngx_http_autoindex_module.c --- a/src/http/modules/ngx_http_autoindex_module.c +++ b/src/http/modules/ngx_http_autoindex_module.c @@ -135,7 +135,7 @@ ngx_http_autoindex_handler(ngx_http_requ { u_char *last, *filename, scale; off_t length; - size_t len, copy, allocated, root; + size_t len, utf_len, allocated, root; ngx_tm_t tm; ngx_err_t err; ngx_buf_t *b; @@ -329,7 +329,7 @@ ngx_http_autoindex_handler(ngx_http_requ NGX_ESCAPE_HTML); if (r->utf8) { - entry->utf_len = ngx_utf_length(entry->name.data, entry->name.len); + entry->utf_len = ngx_utf8_length(entry->name.data, entry->name.len); } else { entry->utf_len = len; } @@ -412,15 +412,16 @@ ngx_http_autoindex_handler(ngx_http_requ len = entry[i].utf_len; - if (entry[i].name.len - len) { + if (entry[i].name.len != len) { if (len > NGX_HTTP_AUTOINDEX_NAME_LEN) { - copy = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1; + utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1; } else { - copy = NGX_HTTP_AUTOINDEX_NAME_LEN + 1; + utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN + 1; } - b->last = ngx_utf_cpystrn(b->last, entry[i].name.data, copy); + b->last = ngx_utf8_cpystrn(b->last, entry[i].name.data, + utf_len, entry[i].name.len + 1); last = b->last; } else { diff --git a/src/http/modules/ngx_http_charset_filter_module.c b/src/http/modules/ngx_http_charset_filter_module.c --- a/src/http/modules/ngx_http_charset_filter_module.c +++ b/src/http/modules/ngx_http_charset_filter_module.c @@ -642,7 +642,7 @@ ngx_http_charset_recode_from_utf8(ngx_po size = buf->last - src; saved = src; - n = ngx_utf_decode(&saved, size); + n = ngx_utf8_decode(&saved, size); if (n == 0xfffffffe) { /* incomplete UTF-8 symbol */ @@ -710,7 +710,7 @@ ngx_http_charset_recode_from_utf8(ngx_po } saved = ctx->saved; - n = ngx_utf_decode(&saved, i); + n = ngx_utf8_decode(&saved, i); c = '\0'; @@ -818,7 +818,7 @@ recode: len = buf->last - src; - n = ngx_utf_decode(&src, len); + n = ngx_utf8_decode(&src, len); if (n < 0x10000) { @@ -1270,7 +1270,7 @@ ngx_http_charset_map(ngx_conf_t *cf, ngx p = &table->src2dst[src * NGX_UTF_LEN] + 1; - n = ngx_utf_decode(&p, i); + n = ngx_utf8_decode(&p, i); if (n > 0xffff) { ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,