changeset 2472:d7d3a72c07d8 stable-0.6

r2121, r2126 merge: *) fix utf-8 names in autoindex *) rename ngx_utf_...() to ngx_utf8_...()
author Igor Sysoev <igor@sysoev.ru>
date Mon, 26 Jan 2009 15:09:55 +0000
parents 92488f6e80db
children afba93b8bf06
files src/core/ngx_string.c src/core/ngx_string.h src/http/modules/ngx_http_autoindex_module.c src/http/modules/ngx_http_charset_filter_module.c
diffstat 4 files changed, 47 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/src/core/ngx_string.c
+++ b/src/core/ngx_string.c
@@ -952,16 +952,16 @@ ngx_decode_base64(ngx_str_t *dst, ngx_st
 
 
 /*
- * ngx_utf_decode() decodes two and more bytes UTF sequences only
+ * ngx_utf8_decode() decodes two and more bytes UTF sequences only
  * the return values:
  *    0x80 - 0x10ffff         valid character
- *    0x10ffff - 0xfffffffd   invalid sequence
+ *    0x110000 - 0xfffffffd   invalid sequence
  *    0xfffffffe              incomplete sequence
  *    0xffffffff              error
  */
 
 uint32_t
-ngx_utf_decode(u_char **p, size_t n)
+ngx_utf8_decode(u_char **p, size_t n)
 {
     size_t    len;
     uint32_t  u, i, valid;
@@ -1018,31 +1018,26 @@ ngx_utf_decode(u_char **p, size_t n)
 
 
 size_t
-ngx_utf_length(u_char *p, size_t n)
+ngx_utf8_length(u_char *p, size_t n)
 {
-    u_char      c;
-    size_t      len;
-    ngx_uint_t  i;
+    u_char  c, *last;
+    size_t  len;
 
-    for (len = 0, i = 0; i < n; len++, i++) {
+    last = p + n;
 
-        c = p[i];
+    for (len = 0; p < last; len++) {
+
+        c = *p;
 
         if (c < 0x80) {
+            p++;
             continue;
         }
 
-        if (c >= 0xc0) {
-            for (c <<= 1; c & 0x80; c <<= 1) {
-                i++;
-            }
-
-            continue;
+        if (ngx_utf8_decode(&p, n) > 0x10ffff) {
+            /* invalid UTF-8 */
+            return n;
         }
-
-        /* invalid utf */
-
-        return n;
     }
 
     return len;
@@ -1050,36 +1045,45 @@ ngx_utf_length(u_char *p, size_t n)
 
 
 u_char *
-ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n)
+ngx_utf8_cpystrn(u_char *dst, u_char *src, size_t n, size_t len)
 {
-    u_char  c;
+    u_char  c, *next;
 
     if (n == 0) {
         return dst;
     }
 
-    for ( /* void */ ; --n; dst++, src++) {
+    while (--n) {
 
         c = *src;
         *dst = c;
 
         if (c < 0x80) {
-            if (*dst != '\0') {
+
+            if (c != '\0') {
+                dst++;
+                src++;
+                len--;
+
                 continue;
             }
 
             return dst;
         }
 
-        if (c >= 0xc0) {
-            for (c <<= 1; c & 0x80; c <<= 1) {
-               *++dst = *++src;
-            }
+        next = src;
 
-            continue;
+        if (ngx_utf8_decode(&next, len) > 0x10ffff) {
+            /* invalid UTF-8 */
+            break;
         }
 
-        /* invalid utf */
+        len--;
+
+        while (src < next) {
+            *++dst = *++src;
+            len--;
+        }
     }
 
     *dst = '\0';
--- a/src/core/ngx_string.h
+++ b/src/core/ngx_string.h
@@ -151,9 +151,9 @@ u_char *ngx_hex_dump(u_char *dst, u_char
 void ngx_encode_base64(ngx_str_t *dst, ngx_str_t *src);
 ngx_int_t ngx_decode_base64(ngx_str_t *dst, ngx_str_t *src);
 
-uint32_t ngx_utf_decode(u_char **p, size_t n);
-size_t ngx_utf_length(u_char *p, size_t n);
-u_char *ngx_utf_cpystrn(u_char *dst, u_char *src, size_t n);
+uint32_t ngx_utf8_decode(u_char **p, size_t n);
+size_t ngx_utf8_length(u_char *p, size_t n);
+u_char *ngx_utf8_cpystrn(u_char *dst, u_char *src, size_t n, size_t len);
 
 
 #define NGX_ESCAPE_URI         0
--- a/src/http/modules/ngx_http_autoindex_module.c
+++ b/src/http/modules/ngx_http_autoindex_module.c
@@ -135,7 +135,7 @@ ngx_http_autoindex_handler(ngx_http_requ
 {
     u_char                         *last, *filename, scale;
     off_t                           length;
-    size_t                          len, copy, allocated, root;
+    size_t                          len, utf_len, allocated, root;
     ngx_tm_t                        tm;
     ngx_err_t                       err;
     ngx_buf_t                      *b;
@@ -329,7 +329,7 @@ ngx_http_autoindex_handler(ngx_http_requ
                                            NGX_ESCAPE_HTML);
 
         if (r->utf8) {
-            entry->utf_len = ngx_utf_length(entry->name.data, entry->name.len);
+            entry->utf_len = ngx_utf8_length(entry->name.data, entry->name.len);
         } else {
             entry->utf_len = len;
         }
@@ -412,15 +412,16 @@ ngx_http_autoindex_handler(ngx_http_requ
 
         len = entry[i].utf_len;
 
-        if (entry[i].name.len - len) {
+        if (entry[i].name.len != len) {
             if (len > NGX_HTTP_AUTOINDEX_NAME_LEN) {
-                copy = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1;
+                utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN - 3 + 1;
 
             } else {
-                copy = NGX_HTTP_AUTOINDEX_NAME_LEN + 1;
+                utf_len = NGX_HTTP_AUTOINDEX_NAME_LEN + 1;
             }
 
-            b->last = ngx_utf_cpystrn(b->last, entry[i].name.data, copy);
+            b->last = ngx_utf8_cpystrn(b->last, entry[i].name.data,
+                                       utf_len, entry[i].name.len + 1);
             last = b->last;
 
         } else {
--- a/src/http/modules/ngx_http_charset_filter_module.c
+++ b/src/http/modules/ngx_http_charset_filter_module.c
@@ -642,7 +642,7 @@ ngx_http_charset_recode_from_utf8(ngx_po
                 size = buf->last - src;
 
                 saved = src;
-                n = ngx_utf_decode(&saved, size);
+                n = ngx_utf8_decode(&saved, size);
 
                 if (n == 0xfffffffe) {
                     /* incomplete UTF-8 symbol */
@@ -710,7 +710,7 @@ ngx_http_charset_recode_from_utf8(ngx_po
     }
 
     saved = ctx->saved;
-    n = ngx_utf_decode(&saved, i);
+    n = ngx_utf8_decode(&saved, i);
 
     c = '\0';
 
@@ -818,7 +818,7 @@ recode:
 
         len = buf->last - src;
 
-        n = ngx_utf_decode(&src, len);
+        n = ngx_utf8_decode(&src, len);
 
         if (n < 0x10000) {
 
@@ -1270,7 +1270,7 @@ ngx_http_charset_map(ngx_conf_t *cf, ngx
 
         p = &table->src2dst[src * NGX_UTF_LEN] + 1;
 
-        n = ngx_utf_decode(&p, i);
+        n = ngx_utf8_decode(&p, i);
 
         if (n > 0xffff) {
             ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,