comparison src/os/win32/ngx_files.c @ 8130:b0a06c50c1b4

Win32: non-ASCII names support in autoindex (ticket #458). Notably, ngx_open_dir() now supports opening directories with non-ASCII characters, and directory entries returned by ngx_read_dir() are properly converted to UTF-8.
author Maxim Dounin <mdounin@mdounin.ru>
date Thu, 23 Feb 2023 20:49:39 +0300
parents ccb5ff87ab3e
children 751f79bd802c
comparison
equal deleted inserted replaced
8129:3c4d81ea1338 8130:b0a06c50c1b4
11 11
12 #define NGX_UTF16_BUFLEN 256 12 #define NGX_UTF16_BUFLEN 256
13 13
14 static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u, 14 static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u,
15 size_t len); 15 size_t len);
16 static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len); 16 static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len,
17 size_t reserved);
18 static u_char *ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len,
19 size_t *allocated);
20 uint32_t ngx_utf16_decode(u_short **u, size_t n);
17 21
18 22
19 /* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */ 23 /* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */
20 24
21 ngx_fd_t 25 ngx_fd_t
26 ngx_fd_t fd; 30 ngx_fd_t fd;
27 ngx_err_t err; 31 ngx_err_t err;
28 u_short utf16[NGX_UTF16_BUFLEN]; 32 u_short utf16[NGX_UTF16_BUFLEN];
29 33
30 len = NGX_UTF16_BUFLEN; 34 len = NGX_UTF16_BUFLEN;
31 u = ngx_utf8_to_utf16(utf16, name, &len); 35 u = ngx_utf8_to_utf16(utf16, name, &len, 0);
32 36
33 if (u == NULL) { 37 if (u == NULL) {
34 return INVALID_HANDLE_VALUE; 38 return INVALID_HANDLE_VALUE;
35 } 39 }
36 40
267 WIN32_FILE_ATTRIBUTE_DATA fa; 271 WIN32_FILE_ATTRIBUTE_DATA fa;
268 u_short utf16[NGX_UTF16_BUFLEN]; 272 u_short utf16[NGX_UTF16_BUFLEN];
269 273
270 len = NGX_UTF16_BUFLEN; 274 len = NGX_UTF16_BUFLEN;
271 275
272 u = ngx_utf8_to_utf16(utf16, file, &len); 276 u = ngx_utf8_to_utf16(utf16, file, &len, 0);
273 277
274 if (u == NULL) { 278 if (u == NULL) {
275 return NGX_FILE_ERROR; 279 return NGX_FILE_ERROR;
276 } 280 }
277 281
425 429
426 430
427 ngx_int_t 431 ngx_int_t
428 ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir) 432 ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir)
429 { 433 {
430 u_char *pattern, *p; 434 size_t len;
435 u_short *u, *p;
431 ngx_err_t err; 436 ngx_err_t err;
432 437 u_short utf16[NGX_UTF16_BUFLEN];
433 pattern = malloc(name->len + 3); 438
434 if (pattern == NULL) { 439 len = NGX_UTF16_BUFLEN - 2;
440 u = ngx_utf8_to_utf16(utf16, name->data, &len, 2);
441
442 if (u == NULL) {
435 return NGX_ERROR; 443 return NGX_ERROR;
436 } 444 }
437 445
438 p = ngx_cpymem(pattern, name->data, name->len); 446 if (ngx_win32_check_filename(name->data, u, len) != NGX_OK) {
447 goto failed;
448 }
449
450 p = &u[len - 1];
439 451
440 *p++ = '/'; 452 *p++ = '/';
441 *p++ = '*'; 453 *p++ = '*';
442 *p = '\0'; 454 *p = '\0';
443 455
444 dir->dir = FindFirstFile((const char *) pattern, &dir->finddata); 456 dir->dir = FindFirstFileW(u, &dir->finddata);
445 457
446 if (dir->dir == INVALID_HANDLE_VALUE) { 458 if (dir->dir == INVALID_HANDLE_VALUE) {
447 err = ngx_errno; 459 goto failed;
448 ngx_free(pattern); 460 }
449 ngx_set_errno(err); 461
450 return NGX_ERROR; 462 if (u != utf16) {
451 } 463 ngx_free(u);
452 464 }
453 ngx_free(pattern);
454 465
455 dir->valid_info = 1; 466 dir->valid_info = 1;
456 dir->ready = 1; 467 dir->ready = 1;
468 dir->name = NULL;
469 dir->allocated = 0;
457 470
458 return NGX_OK; 471 return NGX_OK;
472
473 failed:
474
475 if (u != utf16) {
476 err = ngx_errno;
477 ngx_free(u);
478 ngx_set_errno(err);
479 }
480
481 return NGX_ERROR;
459 } 482 }
460 483
461 484
462 ngx_int_t 485 ngx_int_t
463 ngx_read_dir(ngx_dir_t *dir) 486 ngx_read_dir(ngx_dir_t *dir)
464 { 487 {
488 u_char *name;
489 size_t len, allocated;
490
465 if (dir->ready) { 491 if (dir->ready) {
466 dir->ready = 0; 492 dir->ready = 0;
467 return NGX_OK; 493 goto convert;
468 } 494 }
469 495
470 if (FindNextFile(dir->dir, &dir->finddata) != 0) { 496 if (FindNextFileW(dir->dir, &dir->finddata) != 0) {
471 dir->type = 1; 497 dir->type = 1;
472 return NGX_OK; 498 goto convert;
473 } 499 }
474 500
475 return NGX_ERROR; 501 return NGX_ERROR;
502
503 convert:
504
505 name = dir->name;
506 len = dir->allocated;
507
508 name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, &allocated);
509
510 if (name == NULL) {
511 return NGX_ERROR;
512 }
513
514 if (name != dir->name) {
515
516 if (dir->name) {
517 ngx_free(dir->name);
518 }
519
520 dir->name = name;
521 dir->allocated = allocated;
522 }
523
524 dir->namelen = len - 1;
525
526 return NGX_OK;
476 } 527 }
477 528
478 529
479 ngx_int_t 530 ngx_int_t
480 ngx_close_dir(ngx_dir_t *dir) 531 ngx_close_dir(ngx_dir_t *dir)
481 { 532 {
533 if (dir->name) {
534 ngx_free(dir->name);
535 }
536
482 if (FindClose(dir->dir) == 0) { 537 if (FindClose(dir->dir) == 0) {
483 return NGX_ERROR; 538 return NGX_ERROR;
484 } 539 }
485 540
486 return NGX_OK; 541 return NGX_OK;
814 return NGX_ERROR; 869 return NGX_ERROR;
815 } 870 }
816 871
817 872
818 static u_short * 873 static u_short *
819 ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len) 874 ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, size_t reserved)
820 { 875 {
821 u_char *p; 876 u_char *p;
822 u_short *u, *last; 877 u_short *u, *last;
823 uint32_t n; 878 uint32_t n;
824 879
863 *u++ = (u_short) n; 918 *u++ = (u_short) n;
864 } 919 }
865 920
866 /* the given buffer is not enough, allocate a new one */ 921 /* the given buffer is not enough, allocate a new one */
867 922
868 u = malloc(((p - utf8) + ngx_strlen(p) + 1) * sizeof(u_short)); 923 u = malloc(((p - utf8) + ngx_strlen(p) + 1 + reserved) * sizeof(u_short));
869 if (u == NULL) { 924 if (u == NULL) {
870 return NULL; 925 return NULL;
871 } 926 }
872 927
873 ngx_memcpy(u, utf16, *len * 2); 928 ngx_memcpy(u, utf16, *len * 2);
908 *u++ = (u_short) n; 963 *u++ = (u_short) n;
909 } 964 }
910 965
911 /* unreachable */ 966 /* unreachable */
912 } 967 }
968
969
970 static u_char *
971 ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, size_t *allocated)
972 {
973 u_char *p, *last;
974 u_short *u, *j;
975 uint32_t n;
976
977 u = utf16;
978 p = utf8;
979 last = utf8 + *len;
980
981 while (p < last) {
982
983 if (*u < 0x80) {
984 *p++ = (u_char) *u;
985
986 if (*u == 0) {
987 *len = p - utf8;
988 return utf8;
989 }
990
991 u++;
992
993 continue;
994 }
995
996 if (p >= last - 4) {
997 *len = p - utf8;
998 break;
999 }
1000
1001 n = ngx_utf16_decode(&u, 2);
1002
1003 if (n > 0x10ffff) {
1004 ngx_set_errno(NGX_EILSEQ);
1005 return NULL;
1006 }
1007
1008 if (n >= 0x10000) {
1009 *p++ = (u_char) (0xf0 + (n >> 18));
1010 *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
1011 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
1012 *p++ = (u_char) (0x80 + (n & 0x3f));
1013 continue;
1014 }
1015
1016 if (n >= 0x0800) {
1017 *p++ = (u_char) (0xe0 + (n >> 12));
1018 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
1019 *p++ = (u_char) (0x80 + (n & 0x3f));
1020 continue;
1021 }
1022
1023 *p++ = (u_char) (0xc0 + (n >> 6));
1024 *p++ = (u_char) (0x80 + (n & 0x3f));
1025 }
1026
1027 /* the given buffer is not enough, allocate a new one */
1028
1029 for (j = u; *j; j++) { /* void */ }
1030
1031 p = malloc((j - utf16) * 4 + 1);
1032 if (p == NULL) {
1033 return NULL;
1034 }
1035
1036 if (allocated) {
1037 *allocated = (j - utf16) * 4 + 1;
1038 }
1039
1040 ngx_memcpy(p, utf8, *len);
1041
1042 utf8 = p;
1043 p += *len;
1044
1045 for ( ;; ) {
1046
1047 if (*u < 0x80) {
1048 *p++ = (u_char) *u;
1049
1050 if (*u == 0) {
1051 *len = p - utf8;
1052 return utf8;
1053 }
1054
1055 u++;
1056
1057 continue;
1058 }
1059
1060 n = ngx_utf16_decode(&u, 2);
1061
1062 if (n > 0x10ffff) {
1063 ngx_free(utf8);
1064 ngx_set_errno(NGX_EILSEQ);
1065 return NULL;
1066 }
1067
1068 if (n >= 0x10000) {
1069 *p++ = (u_char) (0xf0 + (n >> 18));
1070 *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f));
1071 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
1072 *p++ = (u_char) (0x80 + (n & 0x3f));
1073 continue;
1074 }
1075
1076 if (n >= 0x0800) {
1077 *p++ = (u_char) (0xe0 + (n >> 12));
1078 *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f));
1079 *p++ = (u_char) (0x80 + (n & 0x3f));
1080 continue;
1081 }
1082
1083 *p++ = (u_char) (0xc0 + (n >> 6));
1084 *p++ = (u_char) (0x80 + (n & 0x3f));
1085 }
1086
1087 /* unreachable */
1088 }
1089
1090
1091 /*
1092 * ngx_utf16_decode() decodes one or two UTF-16 code units
1093 * the return values:
1094 * 0x80 - 0x10ffff valid character
1095 * 0x110000 - 0xfffffffd invalid sequence
1096 * 0xfffffffe incomplete sequence
1097 * 0xffffffff error
1098 */
1099
1100 uint32_t
1101 ngx_utf16_decode(u_short **u, size_t n)
1102 {
1103 uint32_t k, m;
1104
1105 k = **u;
1106
1107 if (k < 0xd800 || k > 0xdfff) {
1108 (*u)++;
1109 return k;
1110 }
1111
1112 if (k > 0xdbff) {
1113 (*u)++;
1114 return 0xffffffff;
1115 }
1116
1117 if (n < 2) {
1118 return 0xfffffffe;
1119 }
1120
1121 (*u)++;
1122
1123 m = *(*u)++;
1124
1125 if (m < 0xdc00 || m > 0xdfff) {
1126 return 0xffffffff;
1127
1128 }
1129
1130 return 0x10000 + ((k - 0xd800) << 10) + (m - 0xdc00);
1131 }