Mercurial > hg > nginx
comparison src/os/win32/ngx_files.c @ 8130:b0a06c50c1b4
Win32: non-ASCII names support in autoindex (ticket #458).
Notably, ngx_open_dir() now supports opening directories with non-ASCII
characters, and directory entries returned by ngx_read_dir() are properly
converted to UTF-8.
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Thu, 23 Feb 2023 20:49:39 +0300 |
parents | ccb5ff87ab3e |
children | 751f79bd802c |
comparison
equal
deleted
inserted
replaced
8129:3c4d81ea1338 | 8130:b0a06c50c1b4 |
---|---|
11 | 11 |
12 #define NGX_UTF16_BUFLEN 256 | 12 #define NGX_UTF16_BUFLEN 256 |
13 | 13 |
14 static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u, | 14 static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u, |
15 size_t len); | 15 size_t len); |
16 static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len); | 16 static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, |
17 size_t reserved); | |
18 static u_char *ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, | |
19 size_t *allocated); | |
20 uint32_t ngx_utf16_decode(u_short **u, size_t n); | |
17 | 21 |
18 | 22 |
19 /* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */ | 23 /* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */ |
20 | 24 |
21 ngx_fd_t | 25 ngx_fd_t |
26 ngx_fd_t fd; | 30 ngx_fd_t fd; |
27 ngx_err_t err; | 31 ngx_err_t err; |
28 u_short utf16[NGX_UTF16_BUFLEN]; | 32 u_short utf16[NGX_UTF16_BUFLEN]; |
29 | 33 |
30 len = NGX_UTF16_BUFLEN; | 34 len = NGX_UTF16_BUFLEN; |
31 u = ngx_utf8_to_utf16(utf16, name, &len); | 35 u = ngx_utf8_to_utf16(utf16, name, &len, 0); |
32 | 36 |
33 if (u == NULL) { | 37 if (u == NULL) { |
34 return INVALID_HANDLE_VALUE; | 38 return INVALID_HANDLE_VALUE; |
35 } | 39 } |
36 | 40 |
267 WIN32_FILE_ATTRIBUTE_DATA fa; | 271 WIN32_FILE_ATTRIBUTE_DATA fa; |
268 u_short utf16[NGX_UTF16_BUFLEN]; | 272 u_short utf16[NGX_UTF16_BUFLEN]; |
269 | 273 |
270 len = NGX_UTF16_BUFLEN; | 274 len = NGX_UTF16_BUFLEN; |
271 | 275 |
272 u = ngx_utf8_to_utf16(utf16, file, &len); | 276 u = ngx_utf8_to_utf16(utf16, file, &len, 0); |
273 | 277 |
274 if (u == NULL) { | 278 if (u == NULL) { |
275 return NGX_FILE_ERROR; | 279 return NGX_FILE_ERROR; |
276 } | 280 } |
277 | 281 |
425 | 429 |
426 | 430 |
427 ngx_int_t | 431 ngx_int_t |
428 ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir) | 432 ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir) |
429 { | 433 { |
430 u_char *pattern, *p; | 434 size_t len; |
435 u_short *u, *p; | |
431 ngx_err_t err; | 436 ngx_err_t err; |
432 | 437 u_short utf16[NGX_UTF16_BUFLEN]; |
433 pattern = malloc(name->len + 3); | 438 |
434 if (pattern == NULL) { | 439 len = NGX_UTF16_BUFLEN - 2; |
440 u = ngx_utf8_to_utf16(utf16, name->data, &len, 2); | |
441 | |
442 if (u == NULL) { | |
435 return NGX_ERROR; | 443 return NGX_ERROR; |
436 } | 444 } |
437 | 445 |
438 p = ngx_cpymem(pattern, name->data, name->len); | 446 if (ngx_win32_check_filename(name->data, u, len) != NGX_OK) { |
447 goto failed; | |
448 } | |
449 | |
450 p = &u[len - 1]; | |
439 | 451 |
440 *p++ = '/'; | 452 *p++ = '/'; |
441 *p++ = '*'; | 453 *p++ = '*'; |
442 *p = '\0'; | 454 *p = '\0'; |
443 | 455 |
444 dir->dir = FindFirstFile((const char *) pattern, &dir->finddata); | 456 dir->dir = FindFirstFileW(u, &dir->finddata); |
445 | 457 |
446 if (dir->dir == INVALID_HANDLE_VALUE) { | 458 if (dir->dir == INVALID_HANDLE_VALUE) { |
447 err = ngx_errno; | 459 goto failed; |
448 ngx_free(pattern); | 460 } |
449 ngx_set_errno(err); | 461 |
450 return NGX_ERROR; | 462 if (u != utf16) { |
451 } | 463 ngx_free(u); |
452 | 464 } |
453 ngx_free(pattern); | |
454 | 465 |
455 dir->valid_info = 1; | 466 dir->valid_info = 1; |
456 dir->ready = 1; | 467 dir->ready = 1; |
468 dir->name = NULL; | |
469 dir->allocated = 0; | |
457 | 470 |
458 return NGX_OK; | 471 return NGX_OK; |
472 | |
473 failed: | |
474 | |
475 if (u != utf16) { | |
476 err = ngx_errno; | |
477 ngx_free(u); | |
478 ngx_set_errno(err); | |
479 } | |
480 | |
481 return NGX_ERROR; | |
459 } | 482 } |
460 | 483 |
461 | 484 |
462 ngx_int_t | 485 ngx_int_t |
463 ngx_read_dir(ngx_dir_t *dir) | 486 ngx_read_dir(ngx_dir_t *dir) |
464 { | 487 { |
488 u_char *name; | |
489 size_t len, allocated; | |
490 | |
465 if (dir->ready) { | 491 if (dir->ready) { |
466 dir->ready = 0; | 492 dir->ready = 0; |
467 return NGX_OK; | 493 goto convert; |
468 } | 494 } |
469 | 495 |
470 if (FindNextFile(dir->dir, &dir->finddata) != 0) { | 496 if (FindNextFileW(dir->dir, &dir->finddata) != 0) { |
471 dir->type = 1; | 497 dir->type = 1; |
472 return NGX_OK; | 498 goto convert; |
473 } | 499 } |
474 | 500 |
475 return NGX_ERROR; | 501 return NGX_ERROR; |
502 | |
503 convert: | |
504 | |
505 name = dir->name; | |
506 len = dir->allocated; | |
507 | |
508 name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, &allocated); | |
509 | |
510 if (name == NULL) { | |
511 return NGX_ERROR; | |
512 } | |
513 | |
514 if (name != dir->name) { | |
515 | |
516 if (dir->name) { | |
517 ngx_free(dir->name); | |
518 } | |
519 | |
520 dir->name = name; | |
521 dir->allocated = allocated; | |
522 } | |
523 | |
524 dir->namelen = len - 1; | |
525 | |
526 return NGX_OK; | |
476 } | 527 } |
477 | 528 |
478 | 529 |
479 ngx_int_t | 530 ngx_int_t |
480 ngx_close_dir(ngx_dir_t *dir) | 531 ngx_close_dir(ngx_dir_t *dir) |
481 { | 532 { |
533 if (dir->name) { | |
534 ngx_free(dir->name); | |
535 } | |
536 | |
482 if (FindClose(dir->dir) == 0) { | 537 if (FindClose(dir->dir) == 0) { |
483 return NGX_ERROR; | 538 return NGX_ERROR; |
484 } | 539 } |
485 | 540 |
486 return NGX_OK; | 541 return NGX_OK; |
814 return NGX_ERROR; | 869 return NGX_ERROR; |
815 } | 870 } |
816 | 871 |
817 | 872 |
818 static u_short * | 873 static u_short * |
819 ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len) | 874 ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, size_t reserved) |
820 { | 875 { |
821 u_char *p; | 876 u_char *p; |
822 u_short *u, *last; | 877 u_short *u, *last; |
823 uint32_t n; | 878 uint32_t n; |
824 | 879 |
863 *u++ = (u_short) n; | 918 *u++ = (u_short) n; |
864 } | 919 } |
865 | 920 |
866 /* the given buffer is not enough, allocate a new one */ | 921 /* the given buffer is not enough, allocate a new one */ |
867 | 922 |
868 u = malloc(((p - utf8) + ngx_strlen(p) + 1) * sizeof(u_short)); | 923 u = malloc(((p - utf8) + ngx_strlen(p) + 1 + reserved) * sizeof(u_short)); |
869 if (u == NULL) { | 924 if (u == NULL) { |
870 return NULL; | 925 return NULL; |
871 } | 926 } |
872 | 927 |
873 ngx_memcpy(u, utf16, *len * 2); | 928 ngx_memcpy(u, utf16, *len * 2); |
908 *u++ = (u_short) n; | 963 *u++ = (u_short) n; |
909 } | 964 } |
910 | 965 |
911 /* unreachable */ | 966 /* unreachable */ |
912 } | 967 } |
968 | |
969 | |
970 static u_char * | |
971 ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, size_t *allocated) | |
972 { | |
973 u_char *p, *last; | |
974 u_short *u, *j; | |
975 uint32_t n; | |
976 | |
977 u = utf16; | |
978 p = utf8; | |
979 last = utf8 + *len; | |
980 | |
981 while (p < last) { | |
982 | |
983 if (*u < 0x80) { | |
984 *p++ = (u_char) *u; | |
985 | |
986 if (*u == 0) { | |
987 *len = p - utf8; | |
988 return utf8; | |
989 } | |
990 | |
991 u++; | |
992 | |
993 continue; | |
994 } | |
995 | |
996 if (p >= last - 4) { | |
997 *len = p - utf8; | |
998 break; | |
999 } | |
1000 | |
1001 n = ngx_utf16_decode(&u, 2); | |
1002 | |
1003 if (n > 0x10ffff) { | |
1004 ngx_set_errno(NGX_EILSEQ); | |
1005 return NULL; | |
1006 } | |
1007 | |
1008 if (n >= 0x10000) { | |
1009 *p++ = (u_char) (0xf0 + (n >> 18)); | |
1010 *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f)); | |
1011 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); | |
1012 *p++ = (u_char) (0x80 + (n & 0x3f)); | |
1013 continue; | |
1014 } | |
1015 | |
1016 if (n >= 0x0800) { | |
1017 *p++ = (u_char) (0xe0 + (n >> 12)); | |
1018 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); | |
1019 *p++ = (u_char) (0x80 + (n & 0x3f)); | |
1020 continue; | |
1021 } | |
1022 | |
1023 *p++ = (u_char) (0xc0 + (n >> 6)); | |
1024 *p++ = (u_char) (0x80 + (n & 0x3f)); | |
1025 } | |
1026 | |
1027 /* the given buffer is not enough, allocate a new one */ | |
1028 | |
1029 for (j = u; *j; j++) { /* void */ } | |
1030 | |
1031 p = malloc((j - utf16) * 4 + 1); | |
1032 if (p == NULL) { | |
1033 return NULL; | |
1034 } | |
1035 | |
1036 if (allocated) { | |
1037 *allocated = (j - utf16) * 4 + 1; | |
1038 } | |
1039 | |
1040 ngx_memcpy(p, utf8, *len); | |
1041 | |
1042 utf8 = p; | |
1043 p += *len; | |
1044 | |
1045 for ( ;; ) { | |
1046 | |
1047 if (*u < 0x80) { | |
1048 *p++ = (u_char) *u; | |
1049 | |
1050 if (*u == 0) { | |
1051 *len = p - utf8; | |
1052 return utf8; | |
1053 } | |
1054 | |
1055 u++; | |
1056 | |
1057 continue; | |
1058 } | |
1059 | |
1060 n = ngx_utf16_decode(&u, 2); | |
1061 | |
1062 if (n > 0x10ffff) { | |
1063 ngx_free(utf8); | |
1064 ngx_set_errno(NGX_EILSEQ); | |
1065 return NULL; | |
1066 } | |
1067 | |
1068 if (n >= 0x10000) { | |
1069 *p++ = (u_char) (0xf0 + (n >> 18)); | |
1070 *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f)); | |
1071 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); | |
1072 *p++ = (u_char) (0x80 + (n & 0x3f)); | |
1073 continue; | |
1074 } | |
1075 | |
1076 if (n >= 0x0800) { | |
1077 *p++ = (u_char) (0xe0 + (n >> 12)); | |
1078 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); | |
1079 *p++ = (u_char) (0x80 + (n & 0x3f)); | |
1080 continue; | |
1081 } | |
1082 | |
1083 *p++ = (u_char) (0xc0 + (n >> 6)); | |
1084 *p++ = (u_char) (0x80 + (n & 0x3f)); | |
1085 } | |
1086 | |
1087 /* unreachable */ | |
1088 } | |
1089 | |
1090 | |
1091 /* | |
1092 * ngx_utf16_decode() decodes one or two UTF-16 code units | |
1093 * the return values: | |
1094 * 0x80 - 0x10ffff valid character | |
1095 * 0x110000 - 0xfffffffd invalid sequence | |
1096 * 0xfffffffe incomplete sequence | |
1097 * 0xffffffff error | |
1098 */ | |
1099 | |
1100 uint32_t | |
1101 ngx_utf16_decode(u_short **u, size_t n) | |
1102 { | |
1103 uint32_t k, m; | |
1104 | |
1105 k = **u; | |
1106 | |
1107 if (k < 0xd800 || k > 0xdfff) { | |
1108 (*u)++; | |
1109 return k; | |
1110 } | |
1111 | |
1112 if (k > 0xdbff) { | |
1113 (*u)++; | |
1114 return 0xffffffff; | |
1115 } | |
1116 | |
1117 if (n < 2) { | |
1118 return 0xfffffffe; | |
1119 } | |
1120 | |
1121 (*u)++; | |
1122 | |
1123 m = *(*u)++; | |
1124 | |
1125 if (m < 0xdc00 || m > 0xdfff) { | |
1126 return 0xffffffff; | |
1127 | |
1128 } | |
1129 | |
1130 return 0x10000 + ((k - 0xd800) << 10) + (m - 0xdc00); | |
1131 } |