comparison src/core/ngx_string.c @ 8142:a10210a45c8b

Core: stricter UTF-8 handling in ngx_utf8_decode(). An UTF-8 octet sequence cannot start with a 11111xxx byte (above 0xf8), see https://datatracker.ietf.org/doc/html/rfc3629#section-3. Previously, such bytes were accepted by ngx_utf8_decode() and misinterpreted as 11110xxx bytes (as in a 4-byte sequence). While unlikely, this can potentially cause issues. Fix is to explicitly reject such bytes in ngx_utf8_decode().
author Yugo Horie <u5.horie@gmail.com>
date Thu, 23 Feb 2023 08:09:50 +0900
parents dfd8dfb436e5
children
comparison
equal deleted inserted replaced
8141:2acb00b9b5ff 8142:a10210a45c8b
1362 size_t len; 1362 size_t len;
1363 uint32_t u, i, valid; 1363 uint32_t u, i, valid;
1364 1364
1365 u = **p; 1365 u = **p;
1366 1366
1367 if (u >= 0xf0) { 1367 if (u >= 0xf8) {
1368
1369 (*p)++;
1370 return 0xffffffff;
1371
1372 } else if (u >= 0xf0) {
1368 1373
1369 u &= 0x07; 1374 u &= 0x07;
1370 valid = 0xffff; 1375 valid = 0xffff;
1371 len = 3; 1376 len = 3;
1372 1377