comparison src/core/ngx_string.c @ 7880:dfd8dfb436e5

Core: escaping of chars not allowed in URIs per RFC 3986. Per RFC 3986 only the following characters are allowed in URIs unescaped: unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" And "%" can appear as a part of escaping itself. The following characters are not allowed and need to be escaped: %00-%1F, %7F-%FF, " ", """, "<", ">", "\", "^", "`", "{", "|", "}". Not escaping ">" is known to cause problems at least with MS Exchange (see http://nginx.org/pipermail/nginx-ru/2010-January/031261.html) and in Tomcat (ticket #2191). The patch adds escaping of the following chars in all URI parts: """, "<", ">", "\", "^", "`", "{", "|", "}". Note that comments are mostly preserved to outline important characters being escaped.
author Maxim Dounin <mdounin@mdounin.ru>
date Mon, 28 Jun 2021 18:01:11 +0300
parents b7407334c60d
children a10210a45c8b
comparison
equal deleted inserted replaced
7879:b7407334c60d 7880:dfd8dfb436e5
1491 { 1491 {
1492 ngx_uint_t n; 1492 ngx_uint_t n;
1493 uint32_t *escape; 1493 uint32_t *escape;
1494 static u_char hex[] = "0123456789ABCDEF"; 1494 static u_char hex[] = "0123456789ABCDEF";
1495 1495
1496 /* " ", "#", "%", "?", %00-%1F, %7F-%FF */ 1496 /*
1497 * Per RFC 3986 only the following chars are allowed in URIs unescaped:
1498 *
1499 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1500 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1501 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1502 * / "*" / "+" / "," / ";" / "="
1503 *
1504 * And "%" can appear as a part of escaping itself. The following
1505 * characters are not allowed and need to be escaped: %00-%1F, %7F-%FF,
1506 * " ", """, "<", ">", "\", "^", "`", "{", "|", "}".
1507 */
1508
1509 /* " ", "#", "%", "?", not allowed */
1497 1510
1498 static uint32_t uri[] = { 1511 static uint32_t uri[] = {
1499 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1512 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1500 1513
1501 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ 1514 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
1502 0x80000029, /* 1000 0000 0000 0000 0000 0000 0010 1001 */ 1515 0xd000002d, /* 1101 0000 0000 0000 0000 0000 0010 1101 */
1503 1516
1504 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ 1517 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
1505 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ 1518 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */
1506 1519
1507 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ 1520 /* ~}| {zyx wvut srqp onml kjih gfed cba` */
1508 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ 1521 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */
1509 1522
1510 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1523 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1511 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1524 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1512 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1525 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1513 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1526 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1514 }; 1527 };
1515 1528
1516 /* " ", "#", "%", "&", "+", ";", "?", %00-%1F, %7F-%FF */ 1529 /* " ", "#", "%", "&", "+", ";", "?", not allowed */
1517 1530
1518 static uint32_t args[] = { 1531 static uint32_t args[] = {
1519 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1532 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1520 1533
1521 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ 1534 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
1522 0x88000869, /* 1000 1000 0000 0000 0000 1000 0110 1001 */ 1535 0xd800086d, /* 1101 1000 0000 0000 0000 1000 0110 1101 */
1523 1536
1524 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ 1537 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
1525 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ 1538 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */
1526 1539
1527 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ 1540 /* ~}| {zyx wvut srqp onml kjih gfed cba` */
1528 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ 1541 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */
1529 1542
1530 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1543 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1531 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1544 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1532 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1545 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1533 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1546 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1551 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1564 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1552 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1565 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1553 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1566 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1554 }; 1567 };
1555 1568
1556 /* " ", "#", """, "%", "'", %00-%1F, %7F-%FF */ 1569 /* " ", "#", """, "%", "'", not allowed */
1557 1570
1558 static uint32_t html[] = { 1571 static uint32_t html[] = {
1559 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1572 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1560 1573
1561 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ 1574 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
1562 0x000000ad, /* 0000 0000 0000 0000 0000 0000 1010 1101 */ 1575 0x500000ad, /* 0101 0000 0000 0000 0000 0000 1010 1101 */
1563 1576
1564 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ 1577 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
1565 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ 1578 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */
1566 1579
1567 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ 1580 /* ~}| {zyx wvut srqp onml kjih gfed cba` */
1568 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ 1581 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */
1569 1582
1570 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1583 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1571 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1584 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1572 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1585 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1573 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1586 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1574 }; 1587 };
1575 1588
1576 /* " ", """, "'", %00-%1F, %7F-%FF */ 1589 /* " ", """, "'", not allowed */
1577 1590
1578 static uint32_t refresh[] = { 1591 static uint32_t refresh[] = {
1579 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1592 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1580 1593
1581 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ 1594 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
1582 0x00000085, /* 0000 0000 0000 0000 0000 0000 1000 0101 */ 1595 0x50000085, /* 0101 0000 0000 0000 0000 0000 1000 0101 */
1583 1596
1584 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ 1597 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
1585 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ 1598 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */
1586 1599
1587 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ 1600 /* ~}| {zyx wvut srqp onml kjih gfed cba` */
1588 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ 1601 0xd8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */
1589 1602
1590 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1603 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1591 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1604 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1592 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1605 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
1593 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ 1606 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */