comparison src/core/ngx_regex.c @ 7981:0b5f12d5c531

PCRE2 library support. The PCRE2 library is now used by default if found, instead of the original PCRE library. If needed for some reason, this can be disabled with the --without-pcre2 configure option. To make it possible to specify paths to the library and include files via --with-cc-opt / --with-ld-opt, the library is first tested without any additional paths and options. If this fails, the pcre2-config script is used. Similarly to the original PCRE library, it is now possible to build PCRE2 from sources with nginx configure, by using the --with-pcre= option. It automatically detects if PCRE or PCRE2 sources are provided. Note that compiling PCRE2 10.33 and later requires inttypes.h. When compiling on Windows with MSVC, inttypes.h is only available starting with MSVC 2013. In older versions some replacement needs to be provided ("echo '#include <stdint.h>' > pcre2-10.xx/src/inttypes.h" is good enough for MSVC 2010). The interface on nginx side remains unchanged.
author Maxim Dounin <mdounin@mdounin.ru>
date Sat, 25 Dec 2021 01:07:15 +0300
parents 060bf88d2473
children fbbb5ce52995
comparison
equal deleted inserted replaced
7980:8007ea138d6a 7981:0b5f12d5c531
16 16
17 17
18 static ngx_inline void ngx_regex_malloc_init(ngx_pool_t *pool); 18 static ngx_inline void ngx_regex_malloc_init(ngx_pool_t *pool);
19 static ngx_inline void ngx_regex_malloc_done(void); 19 static ngx_inline void ngx_regex_malloc_done(void);
20 20
21 #if (NGX_PCRE2)
22 static void * ngx_libc_cdecl ngx_regex_malloc(size_t size, void *data);
23 static void ngx_libc_cdecl ngx_regex_free(void *p, void *data);
24 #else
21 static void * ngx_libc_cdecl ngx_regex_malloc(size_t size); 25 static void * ngx_libc_cdecl ngx_regex_malloc(size_t size);
22 static void ngx_libc_cdecl ngx_regex_free(void *p); 26 static void ngx_libc_cdecl ngx_regex_free(void *p);
27 #endif
23 static void ngx_regex_cleanup(void *data); 28 static void ngx_regex_cleanup(void *data);
24 29
25 static ngx_int_t ngx_regex_module_init(ngx_cycle_t *cycle); 30 static ngx_int_t ngx_regex_module_init(ngx_cycle_t *cycle);
26 31
27 static void *ngx_regex_create_conf(ngx_cycle_t *cycle); 32 static void *ngx_regex_create_conf(ngx_cycle_t *cycle);
65 NULL, /* exit master */ 70 NULL, /* exit master */
66 NGX_MODULE_V1_PADDING 71 NGX_MODULE_V1_PADDING
67 }; 72 };
68 73
69 74
70 static ngx_pool_t *ngx_regex_pool; 75 static ngx_pool_t *ngx_regex_pool;
71 static ngx_list_t *ngx_regex_studies; 76 static ngx_list_t *ngx_regex_studies;
77 static ngx_uint_t ngx_regex_direct_alloc;
78
79 #if (NGX_PCRE2)
80 static pcre2_compile_context *ngx_regex_compile_context;
81 static pcre2_match_data *ngx_regex_match_data;
82 static ngx_uint_t ngx_regex_match_data_size;
83 #endif
72 84
73 85
74 void 86 void
75 ngx_regex_init(void) 87 ngx_regex_init(void)
76 { 88 {
89 #if !(NGX_PCRE2)
77 pcre_malloc = ngx_regex_malloc; 90 pcre_malloc = ngx_regex_malloc;
78 pcre_free = ngx_regex_free; 91 pcre_free = ngx_regex_free;
92 #endif
79 } 93 }
80 94
81 95
82 static ngx_inline void 96 static ngx_inline void
83 ngx_regex_malloc_init(ngx_pool_t *pool) 97 ngx_regex_malloc_init(ngx_pool_t *pool)
84 { 98 {
85 ngx_regex_pool = pool; 99 ngx_regex_pool = pool;
100 ngx_regex_direct_alloc = (pool == NULL) ? 1 : 0;
86 } 101 }
87 102
88 103
89 static ngx_inline void 104 static ngx_inline void
90 ngx_regex_malloc_done(void) 105 ngx_regex_malloc_done(void)
91 { 106 {
92 ngx_regex_pool = NULL; 107 ngx_regex_pool = NULL;
93 } 108 ngx_regex_direct_alloc = 0;
94 109 }
110
111
112 #if (NGX_PCRE2)
113
114 ngx_int_t
115 ngx_regex_compile(ngx_regex_compile_t *rc)
116 {
117 int n, errcode;
118 char *p;
119 u_char errstr[128];
120 size_t erroff;
121 pcre2_code *re;
122 ngx_regex_elt_t *elt;
123 pcre2_general_context *gctx;
124 pcre2_compile_context *cctx;
125
126 if (ngx_regex_compile_context == NULL) {
127 /*
128 * Allocate a compile context if not yet allocated. This uses
129 * direct allocations from heap, so the result can be cached
130 * even at runtime.
131 */
132
133 ngx_regex_malloc_init(NULL);
134
135 gctx = pcre2_general_context_create(ngx_regex_malloc, ngx_regex_free,
136 NULL);
137 if (gctx == NULL) {
138 ngx_regex_malloc_done();
139 goto nomem;
140 }
141
142 cctx = pcre2_compile_context_create(gctx);
143 if (cctx == NULL) {
144 pcre2_general_context_free(gctx);
145 ngx_regex_malloc_done();
146 goto nomem;
147 }
148
149 ngx_regex_compile_context = cctx;
150
151 pcre2_general_context_free(gctx);
152 ngx_regex_malloc_done();
153 }
154
155 ngx_regex_malloc_init(rc->pool);
156
157 re = pcre2_compile(rc->pattern.data, rc->pattern.len,
158 (uint32_t) rc->options, &errcode, &erroff,
159 ngx_regex_compile_context);
160
161 /* ensure that there is no current pool */
162 ngx_regex_malloc_done();
163
164 if (re == NULL) {
165 pcre2_get_error_message(errcode, errstr, 128);
166
167 if ((size_t) erroff == rc->pattern.len) {
168 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len,
169 "pcre2_compile() failed: %s in \"%V\"",
170 errstr, &rc->pattern)
171 - rc->err.data;
172
173 } else {
174 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len,
175 "pcre2_compile() failed: %s in \"%V\" at \"%s\"",
176 errstr, &rc->pattern, rc->pattern.data + erroff)
177 - rc->err.data;
178 }
179
180 return NGX_ERROR;
181 }
182
183 rc->regex = re;
184
185 /* do not study at runtime */
186
187 if (ngx_regex_studies != NULL) {
188 elt = ngx_list_push(ngx_regex_studies);
189 if (elt == NULL) {
190 goto nomem;
191 }
192
193 elt->regex = rc->regex;
194 elt->name = rc->pattern.data;
195 }
196
197 n = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &rc->captures);
198 if (n < 0) {
199 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_CAPTURECOUNT) failed: %d";
200 goto failed;
201 }
202
203 if (rc->captures == 0) {
204 return NGX_OK;
205 }
206
207 n = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &rc->named_captures);
208 if (n < 0) {
209 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMECOUNT) failed: %d";
210 goto failed;
211 }
212
213 if (rc->named_captures == 0) {
214 return NGX_OK;
215 }
216
217 n = pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &rc->name_size);
218 if (n < 0) {
219 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMEENTRYSIZE) failed: %d";
220 goto failed;
221 }
222
223 n = pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &rc->names);
224 if (n < 0) {
225 p = "pcre2_pattern_info(\"%V\", PCRE2_INFO_NAMETABLE) failed: %d";
226 goto failed;
227 }
228
229 return NGX_OK;
230
231 failed:
232
233 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len, p, &rc->pattern, n)
234 - rc->err.data;
235 return NGX_ERROR;
236
237 nomem:
238
239 rc->err.len = ngx_snprintf(rc->err.data, rc->err.len,
240 "regex \"%V\" compilation failed: no memory",
241 &rc->pattern)
242 - rc->err.data;
243 return NGX_ERROR;
244 }
245
246 #else
95 247
96 ngx_int_t 248 ngx_int_t
97 ngx_regex_compile(ngx_regex_compile_t *rc) 249 ngx_regex_compile(ngx_regex_compile_t *rc)
98 { 250 {
99 int n, erroff; 251 int n, erroff;
193 &rc->pattern) 345 &rc->pattern)
194 - rc->err.data; 346 - rc->err.data;
195 return NGX_ERROR; 347 return NGX_ERROR;
196 } 348 }
197 349
350 #endif
351
352
353 #if (NGX_PCRE2)
354
355 ngx_int_t
356 ngx_regex_exec(ngx_regex_t *re, ngx_str_t *s, int *captures, ngx_uint_t size)
357 {
358 size_t *ov;
359 ngx_int_t rc;
360 ngx_uint_t n, i;
361
362 /*
363 * The pcre2_match() function might allocate memory for backtracking
364 * frames, typical allocations are from 40k and above. So the allocator
365 * is configured to do direct allocations from heap during matching.
366 */
367
368 ngx_regex_malloc_init(NULL);
369
370 if (ngx_regex_match_data == NULL
371 || size > ngx_regex_match_data_size)
372 {
373 /*
374 * Allocate a match data if not yet allocated or smaller than
375 * needed.
376 */
377
378 if (ngx_regex_match_data) {
379 pcre2_match_data_free(ngx_regex_match_data);
380 }
381
382 ngx_regex_match_data_size = size;
383 ngx_regex_match_data = pcre2_match_data_create(size / 3, NULL);
384
385 if (ngx_regex_match_data == NULL) {
386 rc = PCRE2_ERROR_NOMEMORY;
387 goto failed;
388 }
389 }
390
391 rc = pcre2_match(re, s->data, s->len, 0, 0, ngx_regex_match_data, NULL);
392
393 if (rc < 0) {
394 goto failed;
395 }
396
397 n = pcre2_get_ovector_count(ngx_regex_match_data);
398 ov = pcre2_get_ovector_pointer(ngx_regex_match_data);
399
400 if (n > size / 3) {
401 n = size / 3;
402 }
403
404 for (i = 0; i < n; i++) {
405 captures[i * 2] = ov[i * 2];
406 captures[i * 2 + 1] = ov[i * 2 + 1];
407 }
408
409 failed:
410
411 ngx_regex_malloc_done();
412
413 return rc;
414 }
415
416 #endif
417
198 418
199 ngx_int_t 419 ngx_int_t
200 ngx_regex_exec_array(ngx_array_t *a, ngx_str_t *s, ngx_log_t *log) 420 ngx_regex_exec_array(ngx_array_t *a, ngx_str_t *s, ngx_log_t *log)
201 { 421 {
202 ngx_int_t n; 422 ngx_int_t n;
227 447
228 return NGX_DECLINED; 448 return NGX_DECLINED;
229 } 449 }
230 450
231 451
452 #if (NGX_PCRE2)
453
454 static void * ngx_libc_cdecl
455 ngx_regex_malloc(size_t size, void *data)
456 {
457 if (ngx_regex_pool) {
458 return ngx_palloc(ngx_regex_pool, size);
459 }
460
461 if (ngx_regex_direct_alloc) {
462 return ngx_alloc(size, ngx_cycle->log);
463 }
464
465 return NULL;
466 }
467
468
469 static void ngx_libc_cdecl
470 ngx_regex_free(void *p, void *data)
471 {
472 if (ngx_regex_direct_alloc) {
473 ngx_free(p);
474 }
475
476 return;
477 }
478
479 #else
480
232 static void * ngx_libc_cdecl 481 static void * ngx_libc_cdecl
233 ngx_regex_malloc(size_t size) 482 ngx_regex_malloc(size_t size)
234 { 483 {
235 if (ngx_regex_pool) { 484 if (ngx_regex_pool) {
236 return ngx_palloc(ngx_regex_pool, size); 485 return ngx_palloc(ngx_regex_pool, size);
244 ngx_regex_free(void *p) 493 ngx_regex_free(void *p)
245 { 494 {
246 return; 495 return;
247 } 496 }
248 497
498 #endif
499
249 500
250 static void 501 static void
251 ngx_regex_cleanup(void *data) 502 ngx_regex_cleanup(void *data)
252 { 503 {
253 #if (NGX_HAVE_PCRE_JIT) 504 #if (NGX_PCRE2 || NGX_HAVE_PCRE_JIT)
254 ngx_regex_conf_t *rcf = data; 505 ngx_regex_conf_t *rcf = data;
255 506
256 ngx_uint_t i; 507 ngx_uint_t i;
257 ngx_list_part_t *part; 508 ngx_list_part_t *part;
258 ngx_regex_elt_t *elts; 509 ngx_regex_elt_t *elts;
273 } 524 }
274 525
275 /* 526 /*
276 * The PCRE JIT compiler uses mmap for its executable codes, so we 527 * The PCRE JIT compiler uses mmap for its executable codes, so we
277 * have to explicitly call the pcre_free_study() function to free 528 * have to explicitly call the pcre_free_study() function to free
278 * this memory. 529 * this memory. In PCRE2, we call the pcre2_code_free() function
530 * for the same reason.
279 */ 531 */
280 532
533 #if (NGX_PCRE2)
534 pcre2_code_free(elts[i].regex);
535 #else
281 if (elts[i].regex->extra != NULL) { 536 if (elts[i].regex->extra != NULL) {
282 pcre_free_study(elts[i].regex->extra); 537 pcre_free_study(elts[i].regex->extra);
283 } 538 }
539 #endif
284 } 540 }
285 #endif 541 #endif
286 542
287 /* 543 /*
288 * On configuration parsing errors ngx_regex_module_init() will not 544 * On configuration parsing errors ngx_regex_module_init() will not
289 * be called. Make sure ngx_regex_studies is properly cleared anyway. 545 * be called. Make sure ngx_regex_studies is properly cleared anyway.
290 */ 546 */
291 547
292 ngx_regex_studies = NULL; 548 ngx_regex_studies = NULL;
549
550 #if (NGX_PCRE2)
551
552 /*
553 * Free compile context and match data. If needed at runtime by
554 * the new cycle, these will be re-allocated.
555 */
556
557 if (ngx_regex_compile_context) {
558 pcre2_compile_context_free(ngx_regex_compile_context);
559 ngx_regex_compile_context = NULL;
560 }
561
562 if (ngx_regex_match_data) {
563 pcre2_match_data_free(ngx_regex_match_data);
564 ngx_regex_match_data = NULL;
565 ngx_regex_match_data_size = 0;
566 }
567
568 #endif
293 } 569 }
294 570
295 571
296 static ngx_int_t 572 static ngx_int_t
297 ngx_regex_module_init(ngx_cycle_t *cycle) 573 ngx_regex_module_init(ngx_cycle_t *cycle)
298 { 574 {
299 int opt; 575 int opt;
576 #if !(NGX_PCRE2)
300 const char *errstr; 577 const char *errstr;
578 #endif
301 ngx_uint_t i; 579 ngx_uint_t i;
302 ngx_list_part_t *part; 580 ngx_list_part_t *part;
303 ngx_regex_elt_t *elts; 581 ngx_regex_elt_t *elts;
304 ngx_regex_conf_t *rcf; 582 ngx_regex_conf_t *rcf;
305 583
306 opt = 0; 584 opt = 0;
307 585
308 rcf = (ngx_regex_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_regex_module); 586 rcf = (ngx_regex_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_regex_module);
309 587
310 #if (NGX_HAVE_PCRE_JIT) 588 #if (NGX_PCRE2 || NGX_HAVE_PCRE_JIT)
589
311 if (rcf->pcre_jit) { 590 if (rcf->pcre_jit) {
591 #if (NGX_PCRE2)
592 opt = 1;
593 #else
312 opt = PCRE_STUDY_JIT_COMPILE; 594 opt = PCRE_STUDY_JIT_COMPILE;
313 } 595 #endif
596 }
597
314 #endif 598 #endif
315 599
316 ngx_regex_malloc_init(cycle->pool); 600 ngx_regex_malloc_init(cycle->pool);
317 601
318 part = &rcf->studies->part; 602 part = &rcf->studies->part;
327 611
328 part = part->next; 612 part = part->next;
329 elts = part->elts; 613 elts = part->elts;
330 i = 0; 614 i = 0;
331 } 615 }
616
617 #if (NGX_PCRE2)
618
619 if (opt) {
620 int n;
621
622 n = pcre2_jit_compile(elts[i].regex, PCRE2_JIT_COMPLETE);
623
624 if (n != 0) {
625 ngx_log_error(NGX_LOG_INFO, cycle->log, 0,
626 "pcre2_jit_compile() failed: %d in \"%s\", "
627 "ignored",
628 n, elts[i].name);
629 }
630 }
631
632 #else
332 633
333 elts[i].regex->extra = pcre_study(elts[i].regex->code, opt, &errstr); 634 elts[i].regex->extra = pcre_study(elts[i].regex->code, opt, &errstr);
334 635
335 if (errstr != NULL) { 636 if (errstr != NULL) {
336 ngx_log_error(NGX_LOG_ALERT, cycle->log, 0, 637 ngx_log_error(NGX_LOG_ALERT, cycle->log, 0,
351 "JIT compiler does not support pattern: \"%s\"", 652 "JIT compiler does not support pattern: \"%s\"",
352 elts[i].name); 653 elts[i].name);
353 } 654 }
354 } 655 }
355 #endif 656 #endif
657 #endif
356 } 658 }
357 659
358 ngx_regex_malloc_done(); 660 ngx_regex_malloc_done();
359 661
360 ngx_regex_studies = NULL; 662 ngx_regex_studies = NULL;
663 #if (NGX_PCRE2)
664 ngx_regex_compile_context = NULL;
665 #endif
361 666
362 return NGX_OK; 667 return NGX_OK;
363 } 668 }
364 669
365 670
413 718
414 if (*fp == 0) { 719 if (*fp == 0) {
415 return NGX_CONF_OK; 720 return NGX_CONF_OK;
416 } 721 }
417 722
418 #if (NGX_HAVE_PCRE_JIT) 723 #if (NGX_PCRE2)
724 {
725 int r;
726 uint32_t jit;
727
728 jit = 0;
729 r = pcre2_config(PCRE2_CONFIG_JIT, &jit);
730
731 if (r != 0 || jit != 1) {
732 ngx_conf_log_error(NGX_LOG_WARN, cf, 0,
733 "PCRE2 library does not support JIT");
734 *fp = 0;
735 }
736 }
737 #elif (NGX_HAVE_PCRE_JIT)
419 { 738 {
420 int jit, r; 739 int jit, r;
421 740
422 jit = 0; 741 jit = 0;
423 r = pcre_config(PCRE_CONFIG_JIT, &jit); 742 r = pcre_config(PCRE_CONFIG_JIT, &jit);