# HG changeset patch # User Dmitry Volyntsev # Date 1439822522 -10800 # Node ID b9447fc457b4a57c9ed947658c661429a435c798 # Parent bd55d75a1410502a9b4ac6fb44ec4528437b530a Sub filter: support of multiple strings to replace. diff --git a/src/http/modules/ngx_http_sub_filter_module.c b/src/http/modules/ngx_http_sub_filter_module.c --- a/src/http/modules/ngx_http_sub_filter_module.c +++ b/src/http/modules/ngx_http_sub_filter_module.c @@ -13,6 +13,20 @@ typedef struct { ngx_str_t match; ngx_http_complex_value_t value; +} ngx_http_sub_match_t; + + +typedef struct { + ngx_uint_t min_match_len; + ngx_uint_t max_match_len; + + u_char index[257]; + u_char shift[256]; +} ngx_http_sub_tables_t; + + +typedef struct { + ngx_http_sub_tables_t *tables; ngx_hash_t types; @@ -20,17 +34,11 @@ typedef struct { ngx_flag_t last_modified; ngx_array_t *types_keys; + ngx_array_t *matches; } ngx_http_sub_loc_conf_t; -typedef enum { - sub_start_state = 0, - sub_match_state, -} ngx_http_sub_state_e; - - typedef struct { - ngx_str_t match; ngx_str_t saved; ngx_str_t looked; @@ -48,12 +56,17 @@ typedef struct { ngx_chain_t *busy; ngx_chain_t *free; - ngx_str_t sub; + ngx_str_t *sub; + ngx_uint_t applied; - ngx_uint_t state; + ngx_int_t offset; + ngx_uint_t index; } ngx_http_sub_ctx_t; +static ngx_uint_t ngx_http_sub_cmp_index; + + static ngx_int_t ngx_http_sub_output(ngx_http_request_t *r, ngx_http_sub_ctx_t *ctx); static ngx_int_t ngx_http_sub_parse(ngx_http_request_t *r, @@ -64,6 +77,9 @@ static char * ngx_http_sub_filter(ngx_co static void *ngx_http_sub_create_conf(ngx_conf_t *cf); static char *ngx_http_sub_merge_conf(ngx_conf_t *cf, void *parent, void *child); +static void ngx_http_sub_init_tables(ngx_http_sub_tables_t *tables, + ngx_http_sub_match_t *match, ngx_uint_t n); +static ngx_int_t ngx_http_sub_cmp_matches(const void *one, const void *two); static ngx_int_t ngx_http_sub_filter_init(ngx_conf_t *cf); @@ -144,7 +160,7 @@ ngx_http_sub_header_filter(ngx_http_requ slcf = ngx_http_get_module_loc_conf(r, ngx_http_sub_filter_module); - if (slcf->match.len == 0 + if (slcf->matches == NULL || r->headers_out.content_length_n == 0 || ngx_http_test_content_type(r, &slcf->types) == NULL) { @@ -156,19 +172,19 @@ ngx_http_sub_header_filter(ngx_http_requ return NGX_ERROR; } - ctx->saved.data = ngx_pnalloc(r->pool, slcf->match.len); + ctx->saved.data = ngx_pnalloc(r->pool, slcf->tables->max_match_len - 1); if (ctx->saved.data == NULL) { return NGX_ERROR; } - ctx->looked.data = ngx_pnalloc(r->pool, slcf->match.len); + ctx->looked.data = ngx_pnalloc(r->pool, slcf->tables->max_match_len - 1); if (ctx->looked.data == NULL) { return NGX_ERROR; } ngx_http_set_ctx(r, ctx, ngx_http_sub_filter_module); - ctx->match = slcf->match; + ctx->offset = slcf->tables->min_match_len - 1; ctx->last_out = &ctx->out; r->filter_need_in_memory = 1; @@ -194,8 +210,10 @@ ngx_http_sub_body_filter(ngx_http_reques { ngx_int_t rc; ngx_buf_t *b; + ngx_str_t *sub; ngx_chain_t *cl; ngx_http_sub_ctx_t *ctx; + ngx_http_sub_match_t *match; ngx_http_sub_loc_conf_t *slcf; ctx = ngx_http_get_module_ctx(r, ngx_http_sub_filter_module); @@ -242,18 +260,10 @@ ngx_http_sub_body_filter(ngx_http_reques ctx->pos = ctx->buf->pos; } - if (ctx->state == sub_start_state) { - ctx->copy_start = ctx->pos; - ctx->copy_end = ctx->pos; - } - b = NULL; while (ctx->pos < ctx->buf->last) { - ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, - "saved: \"%V\" state: %d", &ctx->saved, ctx->state); - rc = ngx_http_sub_parse(r, ctx); ngx_log_debug4(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, @@ -320,20 +330,6 @@ ngx_http_sub_body_filter(ngx_http_reques ctx->last_out = &cl->next; } - if (ctx->state == sub_start_state) { - ctx->copy_start = ctx->pos; - ctx->copy_end = ctx->pos; - - } else { - ctx->copy_start = NULL; - ctx->copy_end = NULL; - } - - if (ctx->looked.len > (size_t) (ctx->pos - ctx->buf->pos)) { - ctx->saved.len = ctx->looked.len - (ctx->pos - ctx->buf->pos); - ngx_memcpy(ctx->saved.data, ctx->looked.data, ctx->saved.len); - } - if (rc == NGX_AGAIN) { continue; } @@ -352,19 +348,30 @@ ngx_http_sub_body_filter(ngx_http_reques slcf = ngx_http_get_module_loc_conf(r, ngx_http_sub_filter_module); - if (ctx->sub.data == NULL) { + if (ctx->sub == NULL) { + ctx->sub = ngx_pcalloc(r->pool, sizeof(ngx_str_t) + * slcf->matches->nelts); + if (ctx->sub == NULL) { + return NGX_ERROR; + } + } - if (ngx_http_complex_value(r, &slcf->value, &ctx->sub) + sub = &ctx->sub[ctx->index]; + + if (sub->data == NULL) { + match = slcf->matches->elts; + + if (ngx_http_complex_value(r, &match[ctx->index].value, sub) != NGX_OK) { return NGX_ERROR; } } - if (ctx->sub.len) { + if (sub->len) { b->memory = 1; - b->pos = ctx->sub.data; - b->last = ctx->sub.data + ctx->sub.len; + b->pos = sub->data; + b->last = sub->data + sub->len; } else { b->sync = 1; @@ -373,7 +380,8 @@ ngx_http_sub_body_filter(ngx_http_reques *ctx->last_out = cl; ctx->last_out = &cl->next; - ctx->once = slcf->once; + ctx->index = 0; + ctx->once = slcf->once && (++ctx->applied == slcf->matches->nelts); continue; } @@ -428,9 +436,6 @@ ngx_http_sub_body_filter(ngx_http_reques } ctx->buf = NULL; - - ctx->saved.len = ctx->looked.len; - ngx_memcpy(ctx->saved.data, ctx->looked.data, ctx->looked.len); } if (ctx->out == NULL && ctx->busy == NULL) { @@ -513,158 +518,142 @@ ngx_http_sub_output(ngx_http_request_t * static ngx_int_t ngx_http_sub_parse(ngx_http_request_t *r, ngx_http_sub_ctx_t *ctx) { - u_char *p, *last, *copy_end, ch, match; - size_t looked, i; - ngx_http_sub_state_e state; + u_char *p, *last, *pat, *pat_end, c; + ngx_str_t *m; + ngx_int_t offset, start, next, end, len, rc; + ngx_uint_t shift, i, j; + ngx_http_sub_match_t *match; + ngx_http_sub_tables_t *tables; + ngx_http_sub_loc_conf_t *slcf; + + slcf = ngx_http_get_module_loc_conf(r, ngx_http_sub_filter_module); + tables = slcf->tables; + + offset = ctx->offset; + end = ctx->buf->last - ctx->pos; if (ctx->once) { - ctx->copy_start = ctx->pos; - ctx->copy_end = ctx->buf->last; - ctx->pos = ctx->buf->last; - ctx->looked.len = 0; - - ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0, "once"); - - return NGX_AGAIN; + /* sets start and next to end */ + offset = end + (ngx_int_t) tables->min_match_len - 1; + goto again; } - state = ctx->state; - looked = ctx->looked.len; - last = ctx->buf->last; - copy_end = ctx->copy_end; - - for (p = ctx->pos; p < last; p++) { - - ch = *p; - ch = ngx_tolower(ch); - - if (state == sub_start_state) { + while (offset < end) { - /* the tight loop */ - - match = ctx->match.data[0]; - - for ( ;; ) { - if (ch == match) { - - if (ctx->match.len == 1) { - ctx->pos = p + 1; - ctx->copy_end = p; - - return NGX_OK; - } + c = offset < 0 ? ctx->looked.data[ctx->looked.len + offset] + : ctx->pos[offset]; - copy_end = p; - ctx->looked.data[0] = *p; - looked = 1; - state = sub_match_state; - - goto match_started; - } - - if (++p == last) { - break; - } + c = ngx_tolower(c); - ch = *p; - ch = ngx_tolower(ch); - } - - ctx->state = state; - ctx->pos = p; - ctx->looked.len = looked; - ctx->copy_end = p; - - if (ctx->copy_start == NULL) { - ctx->copy_start = ctx->buf->pos; - } - - return NGX_AGAIN; - - match_started: - + shift = tables->shift[c]; + if (shift > 0) { + offset += shift; continue; } - /* state == sub_match_state */ + /* a potential match */ + + start = offset - (ngx_int_t) tables->min_match_len + 1; + match = slcf->matches->elts; + + i = ngx_max(tables->index[c], ctx->index); + j = tables->index[c + 1]; - if (ch == ctx->match.data[looked]) { - ctx->looked.data[looked] = *p; - looked++; + while (i != j) { - if (looked == ctx->match.len) { + if (slcf->once && ctx->sub && ctx->sub[i].data) { + goto next; + } + + m = &match[i].match; - ctx->state = sub_start_state; - ctx->pos = p + 1; - ctx->looked.len = 0; - ctx->saved.len = 0; - ctx->copy_end = copy_end; + pat = m->data; + pat_end = m->data + m->len; + + if (start >= 0) { + p = ctx->pos + start; - if (ctx->copy_start == NULL && copy_end) { - ctx->copy_start = ctx->buf->pos; + } else { + last = ctx->looked.data + ctx->looked.len; + p = last + start; + + while (p < last && pat < pat_end) { + if (ngx_tolower(*p) != *pat) { + goto next; + } + + p++; + pat++; } - return NGX_OK; + p = ctx->pos; } - } else { - /* - * check if there is another partial match in previously - * matched substring to catch cases like "aab" in "aaab" - */ + while (p < ctx->buf->last && pat < pat_end) { + if (ngx_tolower(*p) != *pat) { + goto next; + } - ctx->looked.data[looked] = *p; - looked++; - - for (i = 1; i < looked; i++) { - if (ngx_strncasecmp(ctx->looked.data + i, - ctx->match.data, looked - i) - == 0) - { - break; - } + p++; + pat++; } - if (i < looked) { - if (ctx->saved.len > i) { - ctx->saved.len = i; - } + ctx->index = i; - if ((size_t) (p + 1 - ctx->buf->pos) >= looked - i) { - copy_end = p + 1 - (looked - i); - } - - ngx_memmove(ctx->looked.data, ctx->looked.data + i, looked - i); - looked = looked - i; - - } else { - copy_end = p; - looked = 0; - state = sub_start_state; + if (pat != pat_end) { + /* partial match */ + goto again; } - if (ctx->saved.len) { - p++; - goto out; - } + ctx->offset = offset + (ngx_int_t) m->len; + next = start + (ngx_int_t) m->len; + end = ngx_max(next, 0); + rc = NGX_OK; + + goto done; + + next: + + i++; } + + offset++; + ctx->index = 0; } - ctx->saved.len = 0; +again: -out: + ctx->offset = offset; + start = offset - (ngx_int_t) tables->min_match_len + 1; + next = start; + rc = NGX_AGAIN; - ctx->state = state; - ctx->pos = p; - ctx->looked.len = looked; +done: + + /* send [ - looked.len, start ] to client */ + + ctx->saved.len = ctx->looked.len + ngx_min(start, 0); + ngx_memcpy(ctx->saved.data, ctx->looked.data, ctx->saved.len); + + ctx->copy_start = ctx->pos; + ctx->copy_end = ctx->pos + ngx_max(start, 0); - ctx->copy_end = (state == sub_start_state) ? p : copy_end; + /* save [ next, end ] in looked */ + + len = ngx_min(next, 0); + p = ctx->looked.data; + p = ngx_movemem(p, p + ctx->looked.len + len, - len); - if (ctx->copy_start == NULL && ctx->copy_end) { - ctx->copy_start = ctx->buf->pos; - } + len = ngx_max(next, 0); + p = ngx_cpymem(p, ctx->pos + len, end - len); + ctx->looked.len = p - ctx->looked.data; - return NGX_AGAIN; + /* update position */ + + ctx->pos += end; + ctx->offset -= end; + + return rc; } @@ -674,23 +663,44 @@ ngx_http_sub_filter(ngx_conf_t *cf, ngx_ ngx_http_sub_loc_conf_t *slcf = conf; ngx_str_t *value; + ngx_http_sub_match_t *match; ngx_http_compile_complex_value_t ccv; - if (slcf->match.data) { - return "is duplicate"; - } - value = cf->args->elts; + if (value[1].len == 0) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "empty search pattern"); + return NGX_CONF_ERROR; + } + + if (slcf->matches == NULL) { + slcf->matches = ngx_array_create(cf->pool, 1, + sizeof(ngx_http_sub_match_t)); + if (slcf->matches == NULL) { + return NGX_CONF_ERROR; + } + } + + if (slcf->matches->nelts == 255) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "number of search patterns exceeds 255"); + return NGX_CONF_ERROR; + } + ngx_strlow(value[1].data, value[1].data, value[1].len); - slcf->match = value[1]; + match = ngx_array_push(slcf->matches); + if (match == NULL) { + return NGX_CONF_ERROR; + } + + match->match = value[1]; ngx_memzero(&ccv, sizeof(ngx_http_compile_complex_value_t)); ccv.cf = cf; ccv.value = &value[2]; - ccv.complex_value = &slcf->value; + ccv.complex_value = &match->value; if (ngx_http_compile_complex_value(&ccv) != NGX_OK) { return NGX_CONF_ERROR; @@ -713,9 +723,10 @@ ngx_http_sub_create_conf(ngx_conf_t *cf) /* * set by ngx_pcalloc(): * - * conf->match = { 0, NULL }; + * conf->tables = NULL; * conf->types = { NULL }; * conf->types_keys = NULL; + * conf->matches = NULL; */ slcf->once = NGX_CONF_UNSET; @@ -732,13 +743,8 @@ ngx_http_sub_merge_conf(ngx_conf_t *cf, ngx_http_sub_loc_conf_t *conf = child; ngx_conf_merge_value(conf->once, prev->once, 1); - ngx_conf_merge_str_value(conf->match, prev->match, ""); ngx_conf_merge_value(conf->last_modified, prev->last_modified, 0); - if (conf->value.value.data == NULL) { - conf->value = prev->value; - } - if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types, &prev->types_keys, &prev->types, ngx_http_html_default_types) @@ -747,10 +753,85 @@ ngx_http_sub_merge_conf(ngx_conf_t *cf, return NGX_CONF_ERROR; } + if (conf->matches == NULL) { + conf->matches = prev->matches; + conf->tables = prev->tables; + + } else { + conf->tables = ngx_palloc(cf->pool, sizeof(ngx_http_sub_tables_t)); + if (conf->tables == NULL) { + return NGX_CONF_ERROR; + } + + ngx_http_sub_init_tables(conf->tables, conf->matches->elts, + conf->matches->nelts); + } + return NGX_CONF_OK; } +static void +ngx_http_sub_init_tables(ngx_http_sub_tables_t *tables, + ngx_http_sub_match_t *match, ngx_uint_t n) +{ + u_char c; + ngx_uint_t i, j, min, max, ch; + + min = match[0].match.len; + max = match[0].match.len; + + for (i = 1; i < n; i++) { + min = ngx_min(min, match[i].match.len); + max = ngx_max(max, match[i].match.len); + } + + tables->min_match_len = min; + tables->max_match_len = max; + + ngx_http_sub_cmp_index = tables->min_match_len - 1; + ngx_sort(match, n, sizeof(ngx_http_sub_match_t), ngx_http_sub_cmp_matches); + + min = ngx_min(min, 255); + ngx_memset(tables->shift, min, 256); + + ch = 0; + + for (i = 0; i < n; i++) { + + for (j = 0; j < min; j++) { + c = match[i].match.data[tables->min_match_len - 1 - j]; + tables->shift[c] = ngx_min(tables->shift[c], (u_char) j); + } + + c = match[i].match.data[tables->min_match_len - 1]; + while (ch <= c) { + tables->index[ch++] = (u_char) i; + } + } + + while (ch < 257) { + tables->index[ch++] = (u_char) n; + } +} + + +static ngx_int_t +ngx_http_sub_cmp_matches(const void *one, const void *two) +{ + ngx_int_t c1, c2; + ngx_http_sub_match_t *first, *second; + + first = (ngx_http_sub_match_t *) one; + second = (ngx_http_sub_match_t *) two; + + c1 = first->match.data[ngx_http_sub_cmp_index]; + c2 = second->match.data[ngx_http_sub_cmp_index]; + + return c1 - c2; +} + + static ngx_int_t ngx_http_sub_filter_init(ngx_conf_t *cf) {