Mercurial > hg > nginx
comparison src/core/ngx_string.c @ 7880:dfd8dfb436e5
Core: escaping of chars not allowed in URIs per RFC 3986.
Per RFC 3986 only the following characters are allowed in URIs unescaped:
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
And "%" can appear as a part of escaping itself. The following
characters are not allowed and need to be escaped: %00-%1F, %7F-%FF,
" ", """, "<", ">", "\", "^", "`", "{", "|", "}".
Not escaping ">" is known to cause problems at least with MS Exchange (see
http://nginx.org/pipermail/nginx-ru/2010-January/031261.html) and in
Tomcat (ticket #2191).
The patch adds escaping of the following chars in all URI parts: """, "<",
">", "\", "^", "`", "{", "|", "}". Note that comments are mostly preserved
to outline important characters being escaped.
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Mon, 28 Jun 2021 18:01:11 +0300 |
parents | b7407334c60d |
children | a10210a45c8b |
comparison
equal
deleted
inserted
replaced
7879:b7407334c60d | 7880:dfd8dfb436e5 |
---|---|
1491 { | 1491 { |
1492 ngx_uint_t n; | 1492 ngx_uint_t n; |
1493 uint32_t *escape; | 1493 uint32_t *escape; |
1494 static u_char hex[] = "0123456789ABCDEF"; | 1494 static u_char hex[] = "0123456789ABCDEF"; |
1495 | 1495 |
1496 /* " ", "#", "%", "?", %00-%1F, %7F-%FF */ | 1496 /* |
1497 * Per RFC 3986 only the following chars are allowed in URIs unescaped: | |
1498 * | |
1499 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | |
1500 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" | |
1501 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" | |
1502 * / "*" / "+" / "," / ";" / "=" | |
1503 * | |
1504 * And "%" can appear as a part of escaping itself. The following | |
1505 * characters are not allowed and need to be escaped: %00-%1F, %7F-%FF, | |
1506 * " ", """, "<", ">", "\", "^", "`", "{", "|", "}". | |
1507 */ | |
1508 | |
1509 /* " ", "#", "%", "?", not allowed */ | |
1497 | 1510 |
1498 static uint32_t uri[] = { | 1511 static uint32_t uri[] = { |
1499 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1512 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1500 | 1513 |
1501 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ | 1514 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ |
1502 0x80000029, /* 1000 0000 0000 0000 0000 0000 0010 1001 */ | 1515 0xd000002d, /* 1101 0000 0000 0000 0000 0000 0010 1101 */ |
1503 | 1516 |
1504 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ | 1517 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ |
1505 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ | 1518 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */ |
1506 | 1519 |
1507 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ | 1520 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ |
1508 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ | 1521 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */ |
1509 | 1522 |
1510 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1523 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1511 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1524 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1512 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1525 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1513 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1526 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1514 }; | 1527 }; |
1515 | 1528 |
1516 /* " ", "#", "%", "&", "+", ";", "?", %00-%1F, %7F-%FF */ | 1529 /* " ", "#", "%", "&", "+", ";", "?", not allowed */ |
1517 | 1530 |
1518 static uint32_t args[] = { | 1531 static uint32_t args[] = { |
1519 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1532 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1520 | 1533 |
1521 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ | 1534 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ |
1522 0x88000869, /* 1000 1000 0000 0000 0000 1000 0110 1001 */ | 1535 0xd800086d, /* 1101 1000 0000 0000 0000 1000 0110 1101 */ |
1523 | 1536 |
1524 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ | 1537 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ |
1525 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ | 1538 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */ |
1526 | 1539 |
1527 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ | 1540 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ |
1528 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ | 1541 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */ |
1529 | 1542 |
1530 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1543 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1531 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1544 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1532 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1545 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1533 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1546 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1551 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1564 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1552 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1565 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1553 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1566 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1554 }; | 1567 }; |
1555 | 1568 |
1556 /* " ", "#", """, "%", "'", %00-%1F, %7F-%FF */ | 1569 /* " ", "#", """, "%", "'", not allowed */ |
1557 | 1570 |
1558 static uint32_t html[] = { | 1571 static uint32_t html[] = { |
1559 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1572 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1560 | 1573 |
1561 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ | 1574 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ |
1562 0x000000ad, /* 0000 0000 0000 0000 0000 0000 1010 1101 */ | 1575 0x500000ad, /* 0101 0000 0000 0000 0000 0000 1010 1101 */ |
1563 | 1576 |
1564 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ | 1577 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ |
1565 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ | 1578 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */ |
1566 | 1579 |
1567 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ | 1580 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ |
1568 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ | 1581 0xb8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */ |
1569 | 1582 |
1570 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1583 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1571 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1584 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1572 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1585 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1573 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1586 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1574 }; | 1587 }; |
1575 | 1588 |
1576 /* " ", """, "'", %00-%1F, %7F-%FF */ | 1589 /* " ", """, "'", not allowed */ |
1577 | 1590 |
1578 static uint32_t refresh[] = { | 1591 static uint32_t refresh[] = { |
1579 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1592 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1580 | 1593 |
1581 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ | 1594 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ |
1582 0x00000085, /* 0000 0000 0000 0000 0000 0000 1000 0101 */ | 1595 0x50000085, /* 0101 0000 0000 0000 0000 0000 1000 0101 */ |
1583 | 1596 |
1584 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ | 1597 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ |
1585 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */ | 1598 0x50000000, /* 0101 0000 0000 0000 0000 0000 0000 0000 */ |
1586 | 1599 |
1587 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ | 1600 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ |
1588 0x80000000, /* 1000 0000 0000 0000 0000 0000 0000 0000 */ | 1601 0xd8000001, /* 1011 1000 0000 0000 0000 0000 0000 0001 */ |
1589 | 1602 |
1590 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1603 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1591 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1604 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1592 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1605 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |
1593 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | 1606 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ |