changeset 7880:dfd8dfb436e5

Core: escaping of chars not allowed in URIs per RFC 3986. Per RFC 3986 only the following characters are allowed in URIs unescaped: unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" And "%" can appear as a part of escaping itself. The following characters are not allowed and need to be escaped: %00-%1F, %7F-%FF, " ", """, "<", ">", "\", "^", "`", "{", "|", "}". Not escaping ">" is known to cause problems at least with MS Exchange (see http://nginx.org/pipermail/nginx-ru/2010-January/031261.html) and in Tomcat (ticket #2191). The patch adds escaping of the following chars in all URI parts: """, "<", ">", "\", "^", "`", "{", "|", "}". Note that comments are mostly preserved to outline important characters being escaped.
author Maxim Dounin <mdounin@mdounin.ru>
date Mon, 28 Jun 2021 18:01:11 +0300
parents b7407334c60d
children 52338ddf9e2f
files src/core/ngx_string.c
diffstat 1 files changed, 29 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/src/core/ngx_string.c
+++ b/src/core/ngx_string.c
@@ -1493,19 +1493,32 @@ ngx_escape_uri(u_char *dst, u_char *src,
     uint32_t       *escape;
     static u_char   hex[] = "0123456789ABCDEF";
 
-                    /* " ", "#", "%", "?", %00-%1F, %7F-%FF */
+    /*
+     * Per RFC 3986 only the following chars are allowed in URIs unescaped:
+     *
+     * unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+     * gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+     * sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+     *               / "*" / "+" / "," / ";" / "="
+     *
+     * And "%" can appear as a part of escaping itself.  The following
+     * characters are not allowed and need to be escaped: %00-%1F, %7F-%FF,
+     * " ", """, "<", ">", "\", "^", "`", "{", "|", "}".
+     */
+
+                    /* " ", "#", "%", "?", not allowed */
 
     static uint32_t   uri[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x80000029, /* 1000 0000 0000 0000  0000 0000 0010 1001 */
+        0xd000002d, /* 1101 0000 0000 0000  0000 0000 0010 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1513,19 +1526,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", "#", "%", "&", "+", ";", "?", %00-%1F, %7F-%FF */
+                    /* " ", "#", "%", "&", "+", ";", "?", not allowed */
 
     static uint32_t   args[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x88000869, /* 1000 1000 0000 0000  0000 1000 0110 1001 */
+        0xd800086d, /* 1101 1000 0000 0000  0000 1000 0110 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1553,19 +1566,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", "#", """, "%", "'", %00-%1F, %7F-%FF */
+                    /* " ", "#", """, "%", "'", not allowed */
 
     static uint32_t   html[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x000000ad, /* 0000 0000 0000 0000  0000 0000 1010 1101 */
+        0x500000ad, /* 0101 0000 0000 0000  0000 0000 1010 1101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xb8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
@@ -1573,19 +1586,19 @@ ngx_escape_uri(u_char *dst, u_char *src,
         0xffffffff  /* 1111 1111 1111 1111  1111 1111 1111 1111 */
     };
 
-                    /* " ", """, "'", %00-%1F, %7F-%FF */
+                    /* " ", """, "'", not allowed */
 
     static uint32_t   refresh[] = {
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
 
                     /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
-        0x00000085, /* 0000 0000 0000 0000  0000 0000 1000 0101 */
+        0x50000085, /* 0101 0000 0000 0000  0000 0000 1000 0101 */
 
                     /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
-        0x00000000, /* 0000 0000 0000 0000  0000 0000 0000 0000 */
+        0x50000000, /* 0101 0000 0000 0000  0000 0000 0000 0000 */
 
                     /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
-        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */
+        0xd8000001, /* 1011 1000 0000 0000  0000 0000 0000 0001 */
 
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
         0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */