comparison src/http/ngx_http_parse.c @ 254:f3ec44f4a53b NGINX_0_4_12

nginx 0.4.12 *) Feature: the ngx_http_perl_module supports the $r->variable method. *) Bugfix: if a big static file was included using SSI in a response, then the response may be transferred incomplete. *) Bugfix: nginx did not omit the "#fragment" part in URI.
author Igor Sysoev <http://sysoev.ru>
date Tue, 31 Oct 2006 00:00:00 +0300
parents fbf2b2f66c9f
children 2e9c57a5e50a
comparison
equal deleted inserted replaced
253:b75231e1a353 254:f3ec44f4a53b
5 5
6 6
7 #include <ngx_config.h> 7 #include <ngx_config.h>
8 #include <ngx_core.h> 8 #include <ngx_core.h>
9 #include <ngx_http.h> 9 #include <ngx_http.h>
10
11
12 static uint32_t usual[] = {
13 0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */
14
15 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
16 0x7fff37d6, /* 0111 1111 1111 1111 0011 0111 1101 0110 */
17
18 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
19 #if (NGX_WIN32)
20 0xefffffff, /* 1110 1111 1111 1111 1111 1111 1111 1111 */
21 #else
22 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
23 #endif
24
25 /* ~}| {zyx wvut srqp onml kjih gfed cba` */
26 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
27
28 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
29 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
30 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
31 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
32 };
10 33
11 34
12 /* gcc, icc, msvc and others compile these switches as an jump table */ 35 /* gcc, icc, msvc and others compile these switches as an jump table */
13 36
14 ngx_int_t 37 ngx_int_t
16 { 39 {
17 u_char c, ch, *p, *m; 40 u_char c, ch, *p, *m;
18 enum { 41 enum {
19 sw_start = 0, 42 sw_start = 0,
20 sw_method, 43 sw_method,
21 sw_space_after_method,
22 sw_spaces_before_uri, 44 sw_spaces_before_uri,
23 sw_schema, 45 sw_schema,
24 sw_schema_slash, 46 sw_schema_slash,
25 sw_schema_slash_slash, 47 sw_schema_slash_slash,
26 sw_host, 48 sw_host,
116 return NGX_HTTP_PARSE_INVALID_METHOD; 138 return NGX_HTTP_PARSE_INVALID_METHOD;
117 } 139 }
118 140
119 break; 141 break;
120 142
121 /* single space after method */
122 case sw_space_after_method:
123 switch (ch) {
124 case ' ':
125 state = sw_spaces_before_uri;
126 break;
127 default:
128 return NGX_HTTP_PARSE_INVALID_METHOD;
129 }
130 break;
131
132 /* space* before URI */ 143 /* space* before URI */
133 case sw_spaces_before_uri: 144 case sw_spaces_before_uri:
145
146 if (ch == '/' ){
147 r->uri_start = p;
148 state = sw_after_slash_in_uri;
149 break;
150 }
134 151
135 c = (u_char) (ch | 0x20); 152 c = (u_char) (ch | 0x20);
136 if (c >= 'a' && c <= 'z') { 153 if (c >= 'a' && c <= 'z') {
137 r->schema_start = p; 154 r->schema_start = p;
138 state = sw_schema; 155 state = sw_schema;
139 break; 156 break;
140 } 157 }
141 158
142 switch (ch) { 159 switch (ch) {
143 case '/':
144 r->uri_start = p;
145 state = sw_after_slash_in_uri;
146 break;
147 case ' ': 160 case ' ':
148 break; 161 break;
149 default: 162 default:
150 return NGX_HTTP_PARSE_INVALID_REQUEST; 163 return NGX_HTTP_PARSE_INVALID_REQUEST;
151 } 164 }
194 c = (u_char) (ch | 0x20); 207 c = (u_char) (ch | 0x20);
195 if (c >= 'a' && c <= 'z') { 208 if (c >= 'a' && c <= 'z') {
196 break; 209 break;
197 } 210 }
198 211
199 if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') 212 if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') {
200 {
201 break; 213 break;
202 } 214 }
203 215
204 switch (ch) { 216 switch (ch) {
205 case ':': 217 case ':':
233 break; 245 break;
234 246
235 /* check "/.", "//", "%", and "\" (Win32) in URI */ 247 /* check "/.", "//", "%", and "\" (Win32) in URI */
236 case sw_after_slash_in_uri: 248 case sw_after_slash_in_uri:
237 249
238 c = (u_char) (ch | 0x20); 250 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
239 if (c >= 'a' && c <= 'z') {
240 state = sw_check_uri;
241 break;
242 }
243
244 if (ch >= '0' && ch <= '9') {
245 state = sw_check_uri; 251 state = sw_check_uri;
246 break; 252 break;
247 } 253 }
248 254
249 switch (ch) { 255 switch (ch) {
280 #endif 286 #endif
281 case '?': 287 case '?':
282 r->args_start = p + 1; 288 r->args_start = p + 1;
283 state = sw_uri; 289 state = sw_uri;
284 break; 290 break;
291 case '#':
292 r->complex_uri = 1;
293 state = sw_uri;
294 break;
285 case '+': 295 case '+':
286 r->plus_in_uri = 1; 296 r->plus_in_uri = 1;
287 break; 297 break;
288 case '\0': 298 case '\0':
289 r->zero_in_uri = 1; 299 r->zero_in_uri = 1;
295 break; 305 break;
296 306
297 /* check "/", "%" and "\" (Win32) in URI */ 307 /* check "/", "%" and "\" (Win32) in URI */
298 case sw_check_uri: 308 case sw_check_uri:
299 309
300 c = (u_char) (ch | 0x20); 310 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
301 if (c >= 'a' && c <= 'z') {
302 break;
303 }
304
305 if (ch >= '0' && ch <= '9') {
306 break; 311 break;
307 } 312 }
308 313
309 switch (ch) { 314 switch (ch) {
310 case '/': 315 case '/':
339 break; 344 break;
340 case '?': 345 case '?':
341 r->args_start = p + 1; 346 r->args_start = p + 1;
342 state = sw_uri; 347 state = sw_uri;
343 break; 348 break;
349 case '#':
350 r->complex_uri = 1;
351 state = sw_uri;
352 break;
344 case '+': 353 case '+':
345 r->plus_in_uri = 1; 354 r->plus_in_uri = 1;
346 break; 355 break;
347 case '\0': 356 case '\0':
348 r->zero_in_uri = 1; 357 r->zero_in_uri = 1;
350 } 359 }
351 break; 360 break;
352 361
353 /* URI */ 362 /* URI */
354 case sw_uri: 363 case sw_uri:
364
365 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
366 break;
367 }
368
355 switch (ch) { 369 switch (ch) {
356 case ' ': 370 case ' ':
357 r->uri_end = p; 371 r->uri_end = p;
358 state = sw_http_09; 372 state = sw_http_09;
359 break; 373 break;
364 break; 378 break;
365 case LF: 379 case LF:
366 r->uri_end = p; 380 r->uri_end = p;
367 r->http_minor = 9; 381 r->http_minor = 9;
368 goto done; 382 goto done;
383 case '#':
384 r->complex_uri = 1;
385 break;
369 case '\0': 386 case '\0':
370 r->zero_in_uri = 1; 387 r->zero_in_uri = 1;
371 break; 388 break;
372 } 389 }
373 break; 390 break;
790 "s:%d in:'%Xd:%c', out:'%c'", state, ch, ch, *u); 807 "s:%d in:'%Xd:%c', out:'%c'", state, ch, ch, *u);
791 808
792 switch (state) { 809 switch (state) {
793 810
794 case sw_usual: 811 case sw_usual:
812
813 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
814 *u++ = ch;
815 ch = *p++;
816 break;
817 }
818
795 switch(ch) { 819 switch(ch) {
796 #if (NGX_WIN32) 820 #if (NGX_WIN32)
797 case '\\': 821 case '\\':
798 r->uri_ext = NULL; 822 r->uri_ext = NULL;
799 823
820 quoted_state = state; 844 quoted_state = state;
821 state = sw_quoted; 845 state = sw_quoted;
822 break; 846 break;
823 case '?': 847 case '?':
824 r->args_start = p; 848 r->args_start = p;
849 goto args;
850 case '#':
825 goto done; 851 goto done;
826 case '.': 852 case '.':
827 r->uri_ext = u + 1; 853 r->uri_ext = u + 1;
828 *u++ = ch; 854 *u++ = ch;
829 break; 855 break;
831 r->plus_in_uri = 1; 857 r->plus_in_uri = 1;
832 default: 858 default:
833 *u++ = ch; 859 *u++ = ch;
834 break; 860 break;
835 } 861 }
862
836 ch = *p++; 863 ch = *p++;
837 break; 864 break;
838 865
839 case sw_slash: 866 case sw_slash:
867
868 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
869 state = sw_usual;
870 *u++ = ch;
871 ch = *p++;
872 break;
873 }
874
840 switch(ch) { 875 switch(ch) {
841 #if (NGX_WIN32) 876 #if (NGX_WIN32)
842 case '\\': 877 case '\\':
843 #endif 878 #endif
844 case '/': 879 case '/':
851 quoted_state = state; 886 quoted_state = state;
852 state = sw_quoted; 887 state = sw_quoted;
853 break; 888 break;
854 case '?': 889 case '?':
855 r->args_start = p; 890 r->args_start = p;
891 goto args;
892 case '#':
856 goto done; 893 goto done;
857 case '+': 894 case '+':
858 r->plus_in_uri = 1; 895 r->plus_in_uri = 1;
859 default: 896 default:
860 state = sw_usual; 897 state = sw_usual;
861 *u++ = ch; 898 *u++ = ch;
862 break; 899 break;
863 } 900 }
901
864 ch = *p++; 902 ch = *p++;
865 break; 903 break;
866 904
867 case sw_dot: 905 case sw_dot:
906
907 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
908 state = sw_usual;
909 *u++ = ch;
910 ch = *p++;
911 break;
912 }
913
868 switch(ch) { 914 switch(ch) {
869 #if (NGX_WIN32) 915 #if (NGX_WIN32)
870 case '\\': 916 case '\\':
871 #endif 917 #endif
872 case '/': 918 case '/':
881 quoted_state = state; 927 quoted_state = state;
882 state = sw_quoted; 928 state = sw_quoted;
883 break; 929 break;
884 case '?': 930 case '?':
885 r->args_start = p; 931 r->args_start = p;
932 goto args;
933 case '#':
886 goto done; 934 goto done;
887 case '+': 935 case '+':
888 r->plus_in_uri = 1; 936 r->plus_in_uri = 1;
889 default: 937 default:
890 state = sw_usual; 938 state = sw_usual;
891 *u++ = ch; 939 *u++ = ch;
892 break; 940 break;
893 } 941 }
942
894 ch = *p++; 943 ch = *p++;
895 break; 944 break;
896 945
897 case sw_dot_dot: 946 case sw_dot_dot:
947
948 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
949 state = sw_usual;
950 *u++ = ch;
951 ch = *p++;
952 break;
953 }
954
898 switch(ch) { 955 switch(ch) {
899 #if (NGX_WIN32) 956 #if (NGX_WIN32)
900 case '\\': 957 case '\\':
901 #endif 958 #endif
902 case '/': 959 case '/':
913 quoted_state = state; 970 quoted_state = state;
914 state = sw_quoted; 971 state = sw_quoted;
915 break; 972 break;
916 case '?': 973 case '?':
917 r->args_start = p; 974 r->args_start = p;
975 goto args;
976 case '#':
918 goto done; 977 goto done;
919 #if (NGX_WIN32) 978 #if (NGX_WIN32)
920 case '.': 979 case '.':
921 state = sw_dot_dot_dot; 980 state = sw_dot_dot_dot;
922 *u++ = ch; 981 *u++ = ch;
927 default: 986 default:
928 state = sw_usual; 987 state = sw_usual;
929 *u++ = ch; 988 *u++ = ch;
930 break; 989 break;
931 } 990 }
991
932 ch = *p++; 992 ch = *p++;
933 break; 993 break;
934 994
935 #if (NGX_WIN32) 995 #if (NGX_WIN32)
936 case sw_dot_dot_dot: 996 case sw_dot_dot_dot:
997
998 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
999 state = sw_usual;
1000 *u++ = ch;
1001 ch = *p++;
1002 break;
1003 }
1004
937 switch(ch) { 1005 switch(ch) {
938 case '\\': 1006 case '\\':
939 case '/': 1007 case '/':
940 state = sw_slash; 1008 state = sw_slash;
941 u -= 5; 1009 u -= 5;
956 quoted_state = state; 1024 quoted_state = state;
957 state = sw_quoted; 1025 state = sw_quoted;
958 break; 1026 break;
959 case '?': 1027 case '?':
960 r->args_start = p; 1028 r->args_start = p;
1029 goto args;
1030 case '#':
961 goto done; 1031 goto done;
962 case '+': 1032 case '+':
963 r->plus_in_uri = 1; 1033 r->plus_in_uri = 1;
964 default: 1034 default:
965 state = sw_usual; 1035 state = sw_usual;
966 *u++ = ch; 1036 *u++ = ch;
967 break; 1037 break;
968 } 1038 }
1039
969 ch = *p++; 1040 ch = *p++;
970 break; 1041 break;
971 #endif 1042 #endif
972 1043
973 case sw_quoted: 1044 case sw_quoted:
999 *u++ = ch; 1070 *u++ = ch;
1000 ch = *p++; 1071 ch = *p++;
1001 break; 1072 break;
1002 } 1073 }
1003 1074
1004 if (ch == '\0') { 1075 if (ch == '#') {
1076 *u++ = ch;
1077 ch = *p++;
1078
1079 } else if (ch == '\0') {
1005 r->zero_in_uri = 1; 1080 r->zero_in_uri = 1;
1006 } 1081 }
1007 1082
1008 state = quoted_state; 1083 state = quoted_state;
1009 break; 1084 break;
1030 } 1105 }
1031 1106
1032 done: 1107 done:
1033 1108
1034 r->uri.len = u - r->uri.data; 1109 r->uri.len = u - r->uri.data;
1035 r->uri.data[r->uri.len] = '\0'; 1110
1111 if (r->uri_ext) {
1112 r->exten.len = u - r->uri_ext;
1113 r->exten.data = r->uri_ext;
1114 }
1115
1116 r->uri_ext = NULL;
1117
1118 return NGX_OK;
1119
1120 args:
1121
1122 while (p < r->uri_end) {
1123 if (*p++ != '#') {
1124 continue;
1125 }
1126
1127 r->args.len = p - 1 - r->args_start;
1128 r->args.data = r->args_start;
1129 r->args_start = NULL;
1130
1131 break;
1132 }
1133
1134 r->uri.len = u - r->uri.data;
1036 1135
1037 if (r->uri_ext) { 1136 if (r->uri_ext) {
1038 r->exten.len = u - r->uri_ext; 1137 r->exten.len = u - r->uri_ext;
1039 r->exten.data = r->uri_ext; 1138 r->exten.data = r->uri_ext;
1040 } 1139 }
1070 1169
1071 for ( /* void */ ; len; len--) { 1170 for ( /* void */ ; len; len--) {
1072 1171
1073 ch = *p++; 1172 ch = *p++;
1074 1173
1174 if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
1175 continue;
1176 }
1177
1075 if (ch == '?') { 1178 if (ch == '?') {
1076 args->len = len - 1; 1179 args->len = len - 1;
1077 args->data = p; 1180 args->data = p;
1078 uri->len -= len; 1181 uri->len -= len;
1079 1182
1083 if (ch == '\0') { 1186 if (ch == '\0') {
1084 *flags |= NGX_HTTP_ZERO_IN_URI; 1187 *flags |= NGX_HTTP_ZERO_IN_URI;
1085 continue; 1188 continue;
1086 } 1189 }
1087 1190
1088 if (ch != '/' 1191 if ((ch == '/'
1089 #if (NGX_WIN32) 1192 #if (NGX_WIN32)
1090 && ch != '\\' 1193 || ch == '\\'
1091 #endif 1194 #endif
1092 ) 1195 ) && len > 2)
1093 { 1196 {
1094 continue;
1095 }
1096
1097 if (len > 2) {
1098
1099 /* detect "/../" */ 1197 /* detect "/../" */
1100 1198
1101 if (p[0] == '.' && p[1] == '.' && p[2] == '/') { 1199 if (p[0] == '.' && p[1] == '.' && p[2] == '/') {
1102 goto unsafe; 1200 goto unsafe;
1103 } 1201 }