Mercurial > hg > nginx
comparison src/core/ngx_regex.c @ 7981:0b5f12d5c531
PCRE2 library support.
The PCRE2 library is now used by default if found, instead of the
original PCRE library. If needed for some reason, this can be disabled
with the --without-pcre2 configure option.
To make it possible to specify paths to the library and include files
via --with-cc-opt / --with-ld-opt, the library is first tested without
any additional paths and options. If this fails, the pcre2-config script
is used.
Similarly to the original PCRE library, it is now possible to build PCRE2
from sources with nginx configure, by using the --with-pcre= option.
It automatically detects if PCRE or PCRE2 sources are provided.
Note that compiling PCRE2 10.33 and later requires inttypes.h. When
compiling on Windows with MSVC, inttypes.h is only available starting
with MSVC 2013. In older versions some replacement needs to be provided
("echo '#include <stdint.h>' > pcre2-10.xx/src/inttypes.h" is good enough
for MSVC 2010).
The interface on nginx side remains unchanged.
author | Maxim Dounin <mdounin@mdounin.ru> |
---|---|
date | Sat, 25 Dec 2021 01:07:15 +0300 |
parents | 060bf88d2473 |
children | fbbb5ce52995 |
comparison
equal
deleted
inserted
replaced
7980:8007ea138d6a | 7981:0b5f12d5c531 |
---|---|
16 | 16 |
17 | 17 |
18 static ngx_inline void ngx_regex_malloc_init(ngx_pool_t *pool); | 18 static ngx_inline void ngx_regex_malloc_init(ngx_pool_t *pool); |
19 static ngx_inline void ngx_regex_malloc_done(void); | 19 static ngx_inline void ngx_regex_malloc_done(void); |
20 | 20 |
21 #if (NGX_PCRE2) | |
22 static void * ngx_libc_cdecl ngx_regex_malloc(size_t size, void *data); | |
23 static void ngx_libc_cdecl ngx_regex_free(void *p, void *data); | |
24 #else | |
21 static void * ngx_libc_cdecl ngx_regex_malloc(size_t size); | 25 static void * ngx_libc_cdecl ngx_regex_malloc(size_t size); |
22 static void ngx_libc_cdecl ngx_regex_free(void *p); | 26 static void ngx_libc_cdecl ngx_regex_free(void *p); |
27 #endif | |
23 static void ngx_regex_cleanup(void *data); | 28 static void ngx_regex_cleanup(void *data); |
24 | 29 |
25 static ngx_int_t ngx_regex_module_init(ngx_cycle_t *cycle); | 30 static ngx_int_t ngx_regex_module_init(ngx_cycle_t *cycle); |
26 | 31 |
27 static void *ngx_regex_create_conf(ngx_cycle_t *cycle); | 32 static void *ngx_regex_create_conf(ngx_cycle_t *cycle); |
65 NULL, /* exit master */ | 70 NULL, /* exit master */ |
66 NGX_MODULE_V1_PADDING | 71 NGX_MODULE_V1_PADDING |
67 }; | 72 }; |
68 | 73 |
69 | 74 |
70 static ngx_pool_t *ngx_regex_pool; | 75 static ngx_pool_t *ngx_regex_pool; |
71 static ngx_list_t *ngx_regex_studies; | 76 static ngx_list_t *ngx_regex_studies; |
77 static ngx_uint_t ngx_regex_direct_alloc; | |
78 | |
79 #if (NGX_PCRE2) | |
80 static pcre2_compile_context *ngx_regex_compile_context; | |
81 static pcre2_match_data *ngx_regex_match_data; | |
82 static ngx_uint_t ngx_regex_match_data_size; | |
83 #endif | |
72 | 84 |
73 | 85 |
74 void | 86 void |
75 ngx_regex_init(void) | 87 ngx_regex_init(void) |
76 { | 88 { |
89 #if !(NGX_PCRE2) | |
77 pcre_malloc = ngx_regex_malloc; | 90 pcre_malloc = ngx_regex_malloc; |
78 pcre_free = ngx_regex_free; | 91 pcre_free = ngx_regex_free; |
92 #endif | |
79 } | 93 } |
80 | 94 |
81 | 95 |
82 static ngx_inline void | 96 static ngx_inline void |
83 ngx_regex_malloc_init(ngx_pool_t *pool) | 97 ngx_regex_malloc_init(ngx_pool_t *pool) |
84 { | 98 { |
85 ngx_regex_pool = pool; | 99 ngx_regex_pool = pool; |
100 ngx_regex_direct_alloc = (pool == NULL) ? 1 : 0; | |
86 } | 101 } |
87 | 102 |
88 | 103 |
89 static ngx_inline void | 104 static ngx_inline void |
90 ngx_regex_malloc_done(void) | 105 ngx_regex_malloc_done(void) |
91 { | 106 { |
92 ngx_regex_pool = NULL; | 107 ngx_regex_pool = NULL; |
93 } | 108 ngx_regex_direct_alloc = 0; |
94 | 109 } |
110 | |
111 | |
112 #if (NGX_PCRE2) | |
113 | |
114 ngx_int_t | |
115 ngx_regex_compile(ngx_regex_compile_t *rc) | |
116 { | |
117 int n, errcode; | |
118 char *p; | |
119 u_char errstr[128]; | |
120 size_t erroff; | |
121 pcre2_code *re; | |
122 ngx_regex_elt_t *elt; | |
123 pcre2_general_context *gctx; | |
124 pcre2_compile_context *cctx; | |
125 | |
126 if (ngx_regex_compile_context == NULL) { | |
127 /* | |
128 * Allocate a compile context if not yet allocated. This uses | |
129 * direct allocations from heap, so the result can be cached | |
130 * even at runtime. | |
131 */ | |
132 | |
133 ngx_regex_malloc_init(NULL); | |
134 | |
135 gctx = pcre2_general_context_create(ngx_regex_malloc, ngx_regex_free, | |
136 NULL); | |
137 if (gctx == NULL) { | |
138 ngx_regex_malloc_done(); | |
139 goto nomem; | |
140 } | |
141 | |
142 cctx = pcre2_compile_context_create(gctx); | |
143 if (cctx == NULL) { | |
144 pcre2_general_context_free(gctx); | |
145 ngx_regex_malloc_done(); | |
146 goto nomem; | |
147 } | |
148 | |
149 ngx_regex_compile_context = cctx; | |
150 | |
151 pcre2_general_context_free(gctx); | |
152 ngx_regex_malloc_done(); | |
153 } | |
154 | |
155 ngx_regex_malloc_init(rc->pool); | |
156 | |
157 re = pcre2_compile(rc->pattern.data, rc->pattern.len, | |
158 (uint32_t) rc->options, &errcode, &erroff, | |
159 ngx_regex_compile_context); | |
160 | |
161 /* ensure that there is no current pool */ | |
162 ngx_regex_malloc_done(); | |
163 | |
164 if (re == NULL) { | |
165 pcre2_get_error_message(errcode, errstr, 128); | |
166 | |
167 if ((size_t) erroff == rc->pattern.len) { | |
168 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len, | |
169 "pcre2_compile() failed: %s in \"%V\"", | |
170 errstr, &rc->pattern) | |
171 - rc->err.data; | |
172 | |
173 } else { | |
174 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len, | |
175 "pcre2_compile() failed: %s in \"%V\" at \"%s\"", | |
176 errstr, &rc->pattern, rc->pattern.data + erroff) | |
177 - rc->err.data; | |
178 } | |
179 | |
180 return NGX_ERROR; | |
181 } | |
182 | |
183 rc->regex = re; | |
184 | |
185 /* do not study at runtime */ | |
186 | |
187 if (ngx_regex_studies != NULL) { | |
188 elt = ngx_list_push(ngx_regex_studies); | |
189 if (elt == NULL) { | |
190 goto nomem; | |
191 } | |
192 | |
193 elt->regex = rc->regex; | |
194 elt->name = rc->pattern.data; | |
195 } | |
196 | |
197 n = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &rc->captures); | |
198 if (n < 0) { | |
199 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_CAPTURECOUNT) failed: %d"; | |
200 goto failed; | |
201 } | |
202 | |
203 if (rc->captures == 0) { | |
204 return NGX_OK; | |
205 } | |
206 | |
207 n = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &rc->named_captures); | |
208 if (n < 0) { | |
209 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMECOUNT) failed: %d"; | |
210 goto failed; | |
211 } | |
212 | |
213 if (rc->named_captures == 0) { | |
214 return NGX_OK; | |
215 } | |
216 | |
217 n = pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &rc->name_size); | |
218 if (n < 0) { | |
219 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMEENTRYSIZE) failed: %d"; | |
220 goto failed; | |
221 } | |
222 | |
223 n = pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &rc->names); | |
224 if (n < 0) { | |
225 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMETABLE) failed: %d"; | |
226 goto failed; | |
227 } | |
228 | |
229 return NGX_OK; | |
230 | |
231 failed: | |
232 | |
233 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len, p, &rc->pattern, n) | |
234 - rc->err.data; | |
235 return NGX_ERROR; | |
236 | |
237 nomem: | |
238 | |
239 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len, | |
240 "regex \"%V\" compilation failed: no memory", | |
241 &rc->pattern) | |
242 - rc->err.data; | |
243 return NGX_ERROR; | |
244 } | |
245 | |
246 #else | |
95 | 247 |
96 ngx_int_t | 248 ngx_int_t |
97 ngx_regex_compile(ngx_regex_compile_t *rc) | 249 ngx_regex_compile(ngx_regex_compile_t *rc) |
98 { | 250 { |
99 int n, erroff; | 251 int n, erroff; |
193 &rc->pattern) | 345 &rc->pattern) |
194 - rc->err.data; | 346 - rc->err.data; |
195 return NGX_ERROR; | 347 return NGX_ERROR; |
196 } | 348 } |
197 | 349 |
350 #endif | |
351 | |
352 | |
353 #if (NGX_PCRE2) | |
354 | |
355 ngx_int_t | |
356 ngx_regex_exec(ngx_regex_t *re, ngx_str_t *s, int *captures, ngx_uint_t size) | |
357 { | |
358 size_t *ov; | |
359 ngx_int_t rc; | |
360 ngx_uint_t n, i; | |
361 | |
362 /* | |
363 * The pcre2_match() function might allocate memory for backtracking | |
364 * frames, typical allocations are from 40k and above. So the allocator | |
365 * is configured to do direct allocations from heap during matching. | |
366 */ | |
367 | |
368 ngx_regex_malloc_init(NULL); | |
369 | |
370 if (ngx_regex_match_data == NULL | |
371 || size > ngx_regex_match_data_size) | |
372 { | |
373 /* | |
374 * Allocate a match data if not yet allocated or smaller than | |
375 * needed. | |
376 */ | |
377 | |
378 if (ngx_regex_match_data) { | |
379 pcre2_match_data_free(ngx_regex_match_data); | |
380 } | |
381 | |
382 ngx_regex_match_data_size = size; | |
383 ngx_regex_match_data = pcre2_match_data_create(size / 3, NULL); | |
384 | |
385 if (ngx_regex_match_data == NULL) { | |
386 rc = PCRE2_ERROR_NOMEMORY; | |
387 goto failed; | |
388 } | |
389 } | |
390 | |
391 rc = pcre2_match(re, s->data, s->len, 0, 0, ngx_regex_match_data, NULL); | |
392 | |
393 if (rc < 0) { | |
394 goto failed; | |
395 } | |
396 | |
397 n = pcre2_get_ovector_count(ngx_regex_match_data); | |
398 ov = pcre2_get_ovector_pointer(ngx_regex_match_data); | |
399 | |
400 if (n > size / 3) { | |
401 n = size / 3; | |
402 } | |
403 | |
404 for (i = 0; i < n; i++) { | |
405 captures[i * 2] = ov[i * 2]; | |
406 captures[i * 2 + 1] = ov[i * 2 + 1]; | |
407 } | |
408 | |
409 failed: | |
410 | |
411 ngx_regex_malloc_done(); | |
412 | |
413 return rc; | |
414 } | |
415 | |
416 #endif | |
417 | |
198 | 418 |
199 ngx_int_t | 419 ngx_int_t |
200 ngx_regex_exec_array(ngx_array_t *a, ngx_str_t *s, ngx_log_t *log) | 420 ngx_regex_exec_array(ngx_array_t *a, ngx_str_t *s, ngx_log_t *log) |
201 { | 421 { |
202 ngx_int_t n; | 422 ngx_int_t n; |
227 | 447 |
228 return NGX_DECLINED; | 448 return NGX_DECLINED; |
229 } | 449 } |
230 | 450 |
231 | 451 |
452 #if (NGX_PCRE2) | |
453 | |
454 static void * ngx_libc_cdecl | |
455 ngx_regex_malloc(size_t size, void *data) | |
456 { | |
457 if (ngx_regex_pool) { | |
458 return ngx_palloc(ngx_regex_pool, size); | |
459 } | |
460 | |
461 if (ngx_regex_direct_alloc) { | |
462 return ngx_alloc(size, ngx_cycle->log); | |
463 } | |
464 | |
465 return NULL; | |
466 } | |
467 | |
468 | |
469 static void ngx_libc_cdecl | |
470 ngx_regex_free(void *p, void *data) | |
471 { | |
472 if (ngx_regex_direct_alloc) { | |
473 ngx_free(p); | |
474 } | |
475 | |
476 return; | |
477 } | |
478 | |
479 #else | |
480 | |
232 static void * ngx_libc_cdecl | 481 static void * ngx_libc_cdecl |
233 ngx_regex_malloc(size_t size) | 482 ngx_regex_malloc(size_t size) |
234 { | 483 { |
235 if (ngx_regex_pool) { | 484 if (ngx_regex_pool) { |
236 return ngx_palloc(ngx_regex_pool, size); | 485 return ngx_palloc(ngx_regex_pool, size); |
244 ngx_regex_free(void *p) | 493 ngx_regex_free(void *p) |
245 { | 494 { |
246 return; | 495 return; |
247 } | 496 } |
248 | 497 |
498 #endif | |
499 | |
249 | 500 |
250 static void | 501 static void |
251 ngx_regex_cleanup(void *data) | 502 ngx_regex_cleanup(void *data) |
252 { | 503 { |
253 #if (NGX_HAVE_PCRE_JIT) | 504 #if (NGX_PCRE2 || NGX_HAVE_PCRE_JIT) |
254 ngx_regex_conf_t *rcf = data; | 505 ngx_regex_conf_t *rcf = data; |
255 | 506 |
256 ngx_uint_t i; | 507 ngx_uint_t i; |
257 ngx_list_part_t *part; | 508 ngx_list_part_t *part; |
258 ngx_regex_elt_t *elts; | 509 ngx_regex_elt_t *elts; |
273 } | 524 } |
274 | 525 |
275 /* | 526 /* |
276 * The PCRE JIT compiler uses mmap for its executable codes, so we | 527 * The PCRE JIT compiler uses mmap for its executable codes, so we |
277 * have to explicitly call the pcre_free_study() function to free | 528 * have to explicitly call the pcre_free_study() function to free |
278 * this memory. | 529 * this memory. In PCRE2, we call the pcre2_code_free() function |
530 * for the same reason. | |
279 */ | 531 */ |
280 | 532 |
533 #if (NGX_PCRE2) | |
534 pcre2_code_free(elts[i].regex); | |
535 #else | |
281 if (elts[i].regex->extra != NULL) { | 536 if (elts[i].regex->extra != NULL) { |
282 pcre_free_study(elts[i].regex->extra); | 537 pcre_free_study(elts[i].regex->extra); |
283 } | 538 } |
539 #endif | |
284 } | 540 } |
285 #endif | 541 #endif |
286 | 542 |
287 /* | 543 /* |
288 * On configuration parsing errors ngx_regex_module_init() will not | 544 * On configuration parsing errors ngx_regex_module_init() will not |
289 * be called. Make sure ngx_regex_studies is properly cleared anyway. | 545 * be called. Make sure ngx_regex_studies is properly cleared anyway. |
290 */ | 546 */ |
291 | 547 |
292 ngx_regex_studies = NULL; | 548 ngx_regex_studies = NULL; |
549 | |
550 #if (NGX_PCRE2) | |
551 | |
552 /* | |
553 * Free compile context and match data. If needed at runtime by | |
554 * the new cycle, these will be re-allocated. | |
555 */ | |
556 | |
557 if (ngx_regex_compile_context) { | |
558 pcre2_compile_context_free(ngx_regex_compile_context); | |
559 ngx_regex_compile_context = NULL; | |
560 } | |
561 | |
562 if (ngx_regex_match_data) { | |
563 pcre2_match_data_free(ngx_regex_match_data); | |
564 ngx_regex_match_data = NULL; | |
565 ngx_regex_match_data_size = 0; | |
566 } | |
567 | |
568 #endif | |
293 } | 569 } |
294 | 570 |
295 | 571 |
296 static ngx_int_t | 572 static ngx_int_t |
297 ngx_regex_module_init(ngx_cycle_t *cycle) | 573 ngx_regex_module_init(ngx_cycle_t *cycle) |
298 { | 574 { |
299 int opt; | 575 int opt; |
576 #if !(NGX_PCRE2) | |
300 const char *errstr; | 577 const char *errstr; |
578 #endif | |
301 ngx_uint_t i; | 579 ngx_uint_t i; |
302 ngx_list_part_t *part; | 580 ngx_list_part_t *part; |
303 ngx_regex_elt_t *elts; | 581 ngx_regex_elt_t *elts; |
304 ngx_regex_conf_t *rcf; | 582 ngx_regex_conf_t *rcf; |
305 | 583 |
306 opt = 0; | 584 opt = 0; |
307 | 585 |
308 rcf = (ngx_regex_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_regex_module); | 586 rcf = (ngx_regex_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_regex_module); |
309 | 587 |
310 #if (NGX_HAVE_PCRE_JIT) | 588 #if (NGX_PCRE2 || NGX_HAVE_PCRE_JIT) |
589 | |
311 if (rcf->pcre_jit) { | 590 if (rcf->pcre_jit) { |
591 #if (NGX_PCRE2) | |
592 opt = 1; | |
593 #else | |
312 opt = PCRE_STUDY_JIT_COMPILE; | 594 opt = PCRE_STUDY_JIT_COMPILE; |
313 } | 595 #endif |
596 } | |
597 | |
314 #endif | 598 #endif |
315 | 599 |
316 ngx_regex_malloc_init(cycle->pool); | 600 ngx_regex_malloc_init(cycle->pool); |
317 | 601 |
318 part = &rcf->studies->part; | 602 part = &rcf->studies->part; |
327 | 611 |
328 part = part->next; | 612 part = part->next; |
329 elts = part->elts; | 613 elts = part->elts; |
330 i = 0; | 614 i = 0; |
331 } | 615 } |
616 | |
617 #if (NGX_PCRE2) | |
618 | |
619 if (opt) { | |
620 int n; | |
621 | |
622 n = pcre2_jit_compile(elts[i].regex, PCRE2_JIT_COMPLETE); | |
623 | |
624 if (n != 0) { | |
625 ngx_log_error(NGX_LOG_INFO, cycle->log, 0, | |
626 "pcre2_jit_compile() failed: %d in \"%s\", " | |
627 "ignored", | |
628 n, elts[i].name); | |
629 } | |
630 } | |
631 | |
632 #else | |
332 | 633 |
333 elts[i].regex->extra = pcre_study(elts[i].regex->code, opt, &errstr); | 634 elts[i].regex->extra = pcre_study(elts[i].regex->code, opt, &errstr); |
334 | 635 |
335 if (errstr != NULL) { | 636 if (errstr != NULL) { |
336 ngx_log_error(NGX_LOG_ALERT, cycle->log, 0, | 637 ngx_log_error(NGX_LOG_ALERT, cycle->log, 0, |
351 "JIT compiler does not support pattern: \"%s\"", | 652 "JIT compiler does not support pattern: \"%s\"", |
352 elts[i].name); | 653 elts[i].name); |
353 } | 654 } |
354 } | 655 } |
355 #endif | 656 #endif |
657 #endif | |
356 } | 658 } |
357 | 659 |
358 ngx_regex_malloc_done(); | 660 ngx_regex_malloc_done(); |
359 | 661 |
360 ngx_regex_studies = NULL; | 662 ngx_regex_studies = NULL; |
663 #if (NGX_PCRE2) | |
664 ngx_regex_compile_context = NULL; | |
665 #endif | |
361 | 666 |
362 return NGX_OK; | 667 return NGX_OK; |
363 } | 668 } |
364 | 669 |
365 | 670 |
413 | 718 |
414 if (*fp == 0) { | 719 if (*fp == 0) { |
415 return NGX_CONF_OK; | 720 return NGX_CONF_OK; |
416 } | 721 } |
417 | 722 |
418 #if (NGX_HAVE_PCRE_JIT) | 723 #if (NGX_PCRE2) |
724 { | |
725 int r; | |
726 uint32_t jit; | |
727 | |
728 jit = 0; | |
729 r = pcre2_config(PCRE2_CONFIG_JIT, &jit); | |
730 | |
731 if (r != 0 || jit != 1) { | |
732 ngx_conf_log_error(NGX_LOG_WARN, cf, 0, | |
733 "PCRE2 library does not support JIT"); | |
734 *fp = 0; | |
735 } | |
736 } | |
737 #elif (NGX_HAVE_PCRE_JIT) | |
419 { | 738 { |
420 int jit, r; | 739 int jit, r; |
421 | 740 |
422 jit = 0; | 741 jit = 0; |
423 r = pcre_config(PCRE_CONFIG_JIT, &jit); | 742 r = pcre_config(PCRE_CONFIG_JIT, &jit); |