Mercurial > hg > nginx-vendor-0-5
comparison src/http/ngx_http_parse.c @ 254:f3ec44f4a53b NGINX_0_4_12
nginx 0.4.12
*) Feature: the ngx_http_perl_module supports the $r->variable method.
*) Bugfix: if a big static file was included using SSI in a response,
then the response may be transferred incomplete.
*) Bugfix: nginx did not omit the "#fragment" part in URI.
author | Igor Sysoev <http://sysoev.ru> |
---|---|
date | Tue, 31 Oct 2006 00:00:00 +0300 |
parents | fbf2b2f66c9f |
children | 2e9c57a5e50a |
comparison
equal
deleted
inserted
replaced
253:b75231e1a353 | 254:f3ec44f4a53b |
---|---|
5 | 5 |
6 | 6 |
7 #include <ngx_config.h> | 7 #include <ngx_config.h> |
8 #include <ngx_core.h> | 8 #include <ngx_core.h> |
9 #include <ngx_http.h> | 9 #include <ngx_http.h> |
10 | |
11 | |
12 static uint32_t usual[] = { | |
13 0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */ | |
14 | |
15 /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ | |
16 0x7fff37d6, /* 0111 1111 1111 1111 0011 0111 1101 0110 */ | |
17 | |
18 /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ | |
19 #if (NGX_WIN32) | |
20 0xefffffff, /* 1110 1111 1111 1111 1111 1111 1111 1111 */ | |
21 #else | |
22 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | |
23 #endif | |
24 | |
25 /* ~}| {zyx wvut srqp onml kjih gfed cba` */ | |
26 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | |
27 | |
28 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | |
29 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | |
30 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | |
31 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ | |
32 }; | |
10 | 33 |
11 | 34 |
12 /* gcc, icc, msvc and others compile these switches as an jump table */ | 35 /* gcc, icc, msvc and others compile these switches as an jump table */ |
13 | 36 |
14 ngx_int_t | 37 ngx_int_t |
16 { | 39 { |
17 u_char c, ch, *p, *m; | 40 u_char c, ch, *p, *m; |
18 enum { | 41 enum { |
19 sw_start = 0, | 42 sw_start = 0, |
20 sw_method, | 43 sw_method, |
21 sw_space_after_method, | |
22 sw_spaces_before_uri, | 44 sw_spaces_before_uri, |
23 sw_schema, | 45 sw_schema, |
24 sw_schema_slash, | 46 sw_schema_slash, |
25 sw_schema_slash_slash, | 47 sw_schema_slash_slash, |
26 sw_host, | 48 sw_host, |
116 return NGX_HTTP_PARSE_INVALID_METHOD; | 138 return NGX_HTTP_PARSE_INVALID_METHOD; |
117 } | 139 } |
118 | 140 |
119 break; | 141 break; |
120 | 142 |
121 /* single space after method */ | |
122 case sw_space_after_method: | |
123 switch (ch) { | |
124 case ' ': | |
125 state = sw_spaces_before_uri; | |
126 break; | |
127 default: | |
128 return NGX_HTTP_PARSE_INVALID_METHOD; | |
129 } | |
130 break; | |
131 | |
132 /* space* before URI */ | 143 /* space* before URI */ |
133 case sw_spaces_before_uri: | 144 case sw_spaces_before_uri: |
145 | |
146 if (ch == '/' ){ | |
147 r->uri_start = p; | |
148 state = sw_after_slash_in_uri; | |
149 break; | |
150 } | |
134 | 151 |
135 c = (u_char) (ch | 0x20); | 152 c = (u_char) (ch | 0x20); |
136 if (c >= 'a' && c <= 'z') { | 153 if (c >= 'a' && c <= 'z') { |
137 r->schema_start = p; | 154 r->schema_start = p; |
138 state = sw_schema; | 155 state = sw_schema; |
139 break; | 156 break; |
140 } | 157 } |
141 | 158 |
142 switch (ch) { | 159 switch (ch) { |
143 case '/': | |
144 r->uri_start = p; | |
145 state = sw_after_slash_in_uri; | |
146 break; | |
147 case ' ': | 160 case ' ': |
148 break; | 161 break; |
149 default: | 162 default: |
150 return NGX_HTTP_PARSE_INVALID_REQUEST; | 163 return NGX_HTTP_PARSE_INVALID_REQUEST; |
151 } | 164 } |
194 c = (u_char) (ch | 0x20); | 207 c = (u_char) (ch | 0x20); |
195 if (c >= 'a' && c <= 'z') { | 208 if (c >= 'a' && c <= 'z') { |
196 break; | 209 break; |
197 } | 210 } |
198 | 211 |
199 if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') | 212 if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') { |
200 { | |
201 break; | 213 break; |
202 } | 214 } |
203 | 215 |
204 switch (ch) { | 216 switch (ch) { |
205 case ':': | 217 case ':': |
233 break; | 245 break; |
234 | 246 |
235 /* check "/.", "//", "%", and "\" (Win32) in URI */ | 247 /* check "/.", "//", "%", and "\" (Win32) in URI */ |
236 case sw_after_slash_in_uri: | 248 case sw_after_slash_in_uri: |
237 | 249 |
238 c = (u_char) (ch | 0x20); | 250 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { |
239 if (c >= 'a' && c <= 'z') { | |
240 state = sw_check_uri; | |
241 break; | |
242 } | |
243 | |
244 if (ch >= '0' && ch <= '9') { | |
245 state = sw_check_uri; | 251 state = sw_check_uri; |
246 break; | 252 break; |
247 } | 253 } |
248 | 254 |
249 switch (ch) { | 255 switch (ch) { |
280 #endif | 286 #endif |
281 case '?': | 287 case '?': |
282 r->args_start = p + 1; | 288 r->args_start = p + 1; |
283 state = sw_uri; | 289 state = sw_uri; |
284 break; | 290 break; |
291 case '#': | |
292 r->complex_uri = 1; | |
293 state = sw_uri; | |
294 break; | |
285 case '+': | 295 case '+': |
286 r->plus_in_uri = 1; | 296 r->plus_in_uri = 1; |
287 break; | 297 break; |
288 case '\0': | 298 case '\0': |
289 r->zero_in_uri = 1; | 299 r->zero_in_uri = 1; |
295 break; | 305 break; |
296 | 306 |
297 /* check "/", "%" and "\" (Win32) in URI */ | 307 /* check "/", "%" and "\" (Win32) in URI */ |
298 case sw_check_uri: | 308 case sw_check_uri: |
299 | 309 |
300 c = (u_char) (ch | 0x20); | 310 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { |
301 if (c >= 'a' && c <= 'z') { | |
302 break; | |
303 } | |
304 | |
305 if (ch >= '0' && ch <= '9') { | |
306 break; | 311 break; |
307 } | 312 } |
308 | 313 |
309 switch (ch) { | 314 switch (ch) { |
310 case '/': | 315 case '/': |
339 break; | 344 break; |
340 case '?': | 345 case '?': |
341 r->args_start = p + 1; | 346 r->args_start = p + 1; |
342 state = sw_uri; | 347 state = sw_uri; |
343 break; | 348 break; |
349 case '#': | |
350 r->complex_uri = 1; | |
351 state = sw_uri; | |
352 break; | |
344 case '+': | 353 case '+': |
345 r->plus_in_uri = 1; | 354 r->plus_in_uri = 1; |
346 break; | 355 break; |
347 case '\0': | 356 case '\0': |
348 r->zero_in_uri = 1; | 357 r->zero_in_uri = 1; |
350 } | 359 } |
351 break; | 360 break; |
352 | 361 |
353 /* URI */ | 362 /* URI */ |
354 case sw_uri: | 363 case sw_uri: |
364 | |
365 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { | |
366 break; | |
367 } | |
368 | |
355 switch (ch) { | 369 switch (ch) { |
356 case ' ': | 370 case ' ': |
357 r->uri_end = p; | 371 r->uri_end = p; |
358 state = sw_http_09; | 372 state = sw_http_09; |
359 break; | 373 break; |
364 break; | 378 break; |
365 case LF: | 379 case LF: |
366 r->uri_end = p; | 380 r->uri_end = p; |
367 r->http_minor = 9; | 381 r->http_minor = 9; |
368 goto done; | 382 goto done; |
383 case '#': | |
384 r->complex_uri = 1; | |
385 break; | |
369 case '\0': | 386 case '\0': |
370 r->zero_in_uri = 1; | 387 r->zero_in_uri = 1; |
371 break; | 388 break; |
372 } | 389 } |
373 break; | 390 break; |
790 "s:%d in:'%Xd:%c', out:'%c'", state, ch, ch, *u); | 807 "s:%d in:'%Xd:%c', out:'%c'", state, ch, ch, *u); |
791 | 808 |
792 switch (state) { | 809 switch (state) { |
793 | 810 |
794 case sw_usual: | 811 case sw_usual: |
812 | |
813 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { | |
814 *u++ = ch; | |
815 ch = *p++; | |
816 break; | |
817 } | |
818 | |
795 switch(ch) { | 819 switch(ch) { |
796 #if (NGX_WIN32) | 820 #if (NGX_WIN32) |
797 case '\\': | 821 case '\\': |
798 r->uri_ext = NULL; | 822 r->uri_ext = NULL; |
799 | 823 |
820 quoted_state = state; | 844 quoted_state = state; |
821 state = sw_quoted; | 845 state = sw_quoted; |
822 break; | 846 break; |
823 case '?': | 847 case '?': |
824 r->args_start = p; | 848 r->args_start = p; |
849 goto args; | |
850 case '#': | |
825 goto done; | 851 goto done; |
826 case '.': | 852 case '.': |
827 r->uri_ext = u + 1; | 853 r->uri_ext = u + 1; |
828 *u++ = ch; | 854 *u++ = ch; |
829 break; | 855 break; |
831 r->plus_in_uri = 1; | 857 r->plus_in_uri = 1; |
832 default: | 858 default: |
833 *u++ = ch; | 859 *u++ = ch; |
834 break; | 860 break; |
835 } | 861 } |
862 | |
836 ch = *p++; | 863 ch = *p++; |
837 break; | 864 break; |
838 | 865 |
839 case sw_slash: | 866 case sw_slash: |
867 | |
868 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { | |
869 state = sw_usual; | |
870 *u++ = ch; | |
871 ch = *p++; | |
872 break; | |
873 } | |
874 | |
840 switch(ch) { | 875 switch(ch) { |
841 #if (NGX_WIN32) | 876 #if (NGX_WIN32) |
842 case '\\': | 877 case '\\': |
843 #endif | 878 #endif |
844 case '/': | 879 case '/': |
851 quoted_state = state; | 886 quoted_state = state; |
852 state = sw_quoted; | 887 state = sw_quoted; |
853 break; | 888 break; |
854 case '?': | 889 case '?': |
855 r->args_start = p; | 890 r->args_start = p; |
891 goto args; | |
892 case '#': | |
856 goto done; | 893 goto done; |
857 case '+': | 894 case '+': |
858 r->plus_in_uri = 1; | 895 r->plus_in_uri = 1; |
859 default: | 896 default: |
860 state = sw_usual; | 897 state = sw_usual; |
861 *u++ = ch; | 898 *u++ = ch; |
862 break; | 899 break; |
863 } | 900 } |
901 | |
864 ch = *p++; | 902 ch = *p++; |
865 break; | 903 break; |
866 | 904 |
867 case sw_dot: | 905 case sw_dot: |
906 | |
907 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { | |
908 state = sw_usual; | |
909 *u++ = ch; | |
910 ch = *p++; | |
911 break; | |
912 } | |
913 | |
868 switch(ch) { | 914 switch(ch) { |
869 #if (NGX_WIN32) | 915 #if (NGX_WIN32) |
870 case '\\': | 916 case '\\': |
871 #endif | 917 #endif |
872 case '/': | 918 case '/': |
881 quoted_state = state; | 927 quoted_state = state; |
882 state = sw_quoted; | 928 state = sw_quoted; |
883 break; | 929 break; |
884 case '?': | 930 case '?': |
885 r->args_start = p; | 931 r->args_start = p; |
932 goto args; | |
933 case '#': | |
886 goto done; | 934 goto done; |
887 case '+': | 935 case '+': |
888 r->plus_in_uri = 1; | 936 r->plus_in_uri = 1; |
889 default: | 937 default: |
890 state = sw_usual; | 938 state = sw_usual; |
891 *u++ = ch; | 939 *u++ = ch; |
892 break; | 940 break; |
893 } | 941 } |
942 | |
894 ch = *p++; | 943 ch = *p++; |
895 break; | 944 break; |
896 | 945 |
897 case sw_dot_dot: | 946 case sw_dot_dot: |
947 | |
948 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { | |
949 state = sw_usual; | |
950 *u++ = ch; | |
951 ch = *p++; | |
952 break; | |
953 } | |
954 | |
898 switch(ch) { | 955 switch(ch) { |
899 #if (NGX_WIN32) | 956 #if (NGX_WIN32) |
900 case '\\': | 957 case '\\': |
901 #endif | 958 #endif |
902 case '/': | 959 case '/': |
913 quoted_state = state; | 970 quoted_state = state; |
914 state = sw_quoted; | 971 state = sw_quoted; |
915 break; | 972 break; |
916 case '?': | 973 case '?': |
917 r->args_start = p; | 974 r->args_start = p; |
975 goto args; | |
976 case '#': | |
918 goto done; | 977 goto done; |
919 #if (NGX_WIN32) | 978 #if (NGX_WIN32) |
920 case '.': | 979 case '.': |
921 state = sw_dot_dot_dot; | 980 state = sw_dot_dot_dot; |
922 *u++ = ch; | 981 *u++ = ch; |
927 default: | 986 default: |
928 state = sw_usual; | 987 state = sw_usual; |
929 *u++ = ch; | 988 *u++ = ch; |
930 break; | 989 break; |
931 } | 990 } |
991 | |
932 ch = *p++; | 992 ch = *p++; |
933 break; | 993 break; |
934 | 994 |
935 #if (NGX_WIN32) | 995 #if (NGX_WIN32) |
936 case sw_dot_dot_dot: | 996 case sw_dot_dot_dot: |
997 | |
998 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { | |
999 state = sw_usual; | |
1000 *u++ = ch; | |
1001 ch = *p++; | |
1002 break; | |
1003 } | |
1004 | |
937 switch(ch) { | 1005 switch(ch) { |
938 case '\\': | 1006 case '\\': |
939 case '/': | 1007 case '/': |
940 state = sw_slash; | 1008 state = sw_slash; |
941 u -= 5; | 1009 u -= 5; |
956 quoted_state = state; | 1024 quoted_state = state; |
957 state = sw_quoted; | 1025 state = sw_quoted; |
958 break; | 1026 break; |
959 case '?': | 1027 case '?': |
960 r->args_start = p; | 1028 r->args_start = p; |
1029 goto args; | |
1030 case '#': | |
961 goto done; | 1031 goto done; |
962 case '+': | 1032 case '+': |
963 r->plus_in_uri = 1; | 1033 r->plus_in_uri = 1; |
964 default: | 1034 default: |
965 state = sw_usual; | 1035 state = sw_usual; |
966 *u++ = ch; | 1036 *u++ = ch; |
967 break; | 1037 break; |
968 } | 1038 } |
1039 | |
969 ch = *p++; | 1040 ch = *p++; |
970 break; | 1041 break; |
971 #endif | 1042 #endif |
972 | 1043 |
973 case sw_quoted: | 1044 case sw_quoted: |
999 *u++ = ch; | 1070 *u++ = ch; |
1000 ch = *p++; | 1071 ch = *p++; |
1001 break; | 1072 break; |
1002 } | 1073 } |
1003 | 1074 |
1004 if (ch == '\0') { | 1075 if (ch == '#') { |
1076 *u++ = ch; | |
1077 ch = *p++; | |
1078 | |
1079 } else if (ch == '\0') { | |
1005 r->zero_in_uri = 1; | 1080 r->zero_in_uri = 1; |
1006 } | 1081 } |
1007 | 1082 |
1008 state = quoted_state; | 1083 state = quoted_state; |
1009 break; | 1084 break; |
1030 } | 1105 } |
1031 | 1106 |
1032 done: | 1107 done: |
1033 | 1108 |
1034 r->uri.len = u - r->uri.data; | 1109 r->uri.len = u - r->uri.data; |
1035 r->uri.data[r->uri.len] = '\0'; | 1110 |
1111 if (r->uri_ext) { | |
1112 r->exten.len = u - r->uri_ext; | |
1113 r->exten.data = r->uri_ext; | |
1114 } | |
1115 | |
1116 r->uri_ext = NULL; | |
1117 | |
1118 return NGX_OK; | |
1119 | |
1120 args: | |
1121 | |
1122 while (p < r->uri_end) { | |
1123 if (*p++ != '#') { | |
1124 continue; | |
1125 } | |
1126 | |
1127 r->args.len = p - 1 - r->args_start; | |
1128 r->args.data = r->args_start; | |
1129 r->args_start = NULL; | |
1130 | |
1131 break; | |
1132 } | |
1133 | |
1134 r->uri.len = u - r->uri.data; | |
1036 | 1135 |
1037 if (r->uri_ext) { | 1136 if (r->uri_ext) { |
1038 r->exten.len = u - r->uri_ext; | 1137 r->exten.len = u - r->uri_ext; |
1039 r->exten.data = r->uri_ext; | 1138 r->exten.data = r->uri_ext; |
1040 } | 1139 } |
1070 | 1169 |
1071 for ( /* void */ ; len; len--) { | 1170 for ( /* void */ ; len; len--) { |
1072 | 1171 |
1073 ch = *p++; | 1172 ch = *p++; |
1074 | 1173 |
1174 if (usual[ch >> 5] & (1 << (ch & 0x1f))) { | |
1175 continue; | |
1176 } | |
1177 | |
1075 if (ch == '?') { | 1178 if (ch == '?') { |
1076 args->len = len - 1; | 1179 args->len = len - 1; |
1077 args->data = p; | 1180 args->data = p; |
1078 uri->len -= len; | 1181 uri->len -= len; |
1079 | 1182 |
1083 if (ch == '\0') { | 1186 if (ch == '\0') { |
1084 *flags |= NGX_HTTP_ZERO_IN_URI; | 1187 *flags |= NGX_HTTP_ZERO_IN_URI; |
1085 continue; | 1188 continue; |
1086 } | 1189 } |
1087 | 1190 |
1088 if (ch != '/' | 1191 if ((ch == '/' |
1089 #if (NGX_WIN32) | 1192 #if (NGX_WIN32) |
1090 && ch != '\\' | 1193 || ch == '\\' |
1091 #endif | 1194 #endif |
1092 ) | 1195 ) && len > 2) |
1093 { | 1196 { |
1094 continue; | |
1095 } | |
1096 | |
1097 if (len > 2) { | |
1098 | |
1099 /* detect "/../" */ | 1197 /* detect "/../" */ |
1100 | 1198 |
1101 if (p[0] == '.' && p[1] == '.' && p[2] == '/') { | 1199 if (p[0] == '.' && p[1] == '.' && p[2] == '/') { |
1102 goto unsafe; | 1200 goto unsafe; |
1103 } | 1201 } |