view src/http/modules/ngx_http_charset_filter_module.c @ 196:8759b346e431 NGINX_0_3_45

nginx 0.3.45 *) Feature: the "ssl_verify_client", "ssl_verify_depth", and "ssl_client_certificate" directives. *) Change: the $request_method variable now returns the main request method. *) Change: the ° symbol codes were changed in koi-win conversion table. *) Feature: the euro и N symbols were added to koi-win conversion table. *) Bugfix: if nginx distributed the requests among several backends and some backend failed, then requests intended for this backend was directed to one live backend only instead of being distributed among the rest.
author Igor Sysoev <http://sysoev.ru>
date Sat, 06 May 2006 00:00:00 +0400
parents 71ff1e2b484a
children ca5f86d94316
line wrap: on
line source


/*
 * Copyright (C) Igor Sysoev
 */


#include <ngx_config.h>
#include <ngx_core.h>
#include <ngx_http.h>


#define NGX_HTTP_NO_CHARSET  -2


typedef struct {
    u_char     **tables;
    ngx_str_t     name;

    ngx_uint_t    utf8;   /* unsigned     utf8:1; */
} ngx_http_charset_t;


typedef struct {
    ngx_int_t     src;
    ngx_int_t     dst;
} ngx_http_charset_recode_t;


typedef struct {
    ngx_int_t     src;
    ngx_int_t     dst;
    u_char       *src2dst;
    u_char       *dst2src;
} ngx_http_charset_tables_t;


typedef struct {
    ngx_array_t   charsets;               /* ngx_http_charset_t */
    ngx_array_t   tables;                 /* ngx_http_charset_tables_t */
    ngx_array_t   recodes;                /* ngx_http_charset_recode_t */
} ngx_http_charset_main_conf_t;


typedef struct {
    ngx_int_t     charset;
    ngx_int_t     source_charset;
    ngx_flag_t    override_charset;
} ngx_http_charset_loc_conf_t;


typedef struct {
    u_char       *table;
    ngx_int_t     charset;
} ngx_http_charset_ctx_t;


static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);

static char *ngx_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd,
    void *conf);
static char *ngx_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf);

static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd,
    void *conf);
static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name);

static ngx_int_t ngx_http_charset_filter_init(ngx_cycle_t *cycle);

static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf);
static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf);
static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf,
    void *parent, void *child);
static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf);


static ngx_command_t  ngx_http_charset_filter_commands[] = {

    { ngx_string("charset"),
      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
                        |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
      ngx_http_set_charset_slot,
      NGX_HTTP_LOC_CONF_OFFSET,
      offsetof(ngx_http_charset_loc_conf_t, charset),
      NULL },

    { ngx_string("source_charset"),
      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
                        |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
      ngx_http_set_charset_slot,
      NGX_HTTP_LOC_CONF_OFFSET,
      offsetof(ngx_http_charset_loc_conf_t, source_charset),
      NULL },

    { ngx_string("override_charset"),
      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
                        |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG,
      ngx_conf_set_flag_slot,
      NGX_HTTP_LOC_CONF_OFFSET,
      offsetof(ngx_http_charset_loc_conf_t, override_charset),
      NULL },

    { ngx_string("charset_map"),
      NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2,
      ngx_charset_map_block,
      NGX_HTTP_MAIN_CONF_OFFSET,
      0,
      NULL },

      ngx_null_command
};


static ngx_http_module_t  ngx_http_charset_filter_module_ctx = {
    NULL,                                  /* preconfiguration */
    ngx_http_charset_postconfiguration,    /* postconfiguration */

    ngx_http_charset_create_main_conf,     /* create main configuration */
    NULL,                                  /* init main configuration */

    NULL,                                  /* create server configuration */
    NULL,                                  /* merge server configuration */

    ngx_http_charset_create_loc_conf,      /* create location configuration */
    ngx_http_charset_merge_loc_conf        /* merge location configuration */
};


ngx_module_t  ngx_http_charset_filter_module = {
    NGX_MODULE_V1,
    &ngx_http_charset_filter_module_ctx,   /* module context */
    ngx_http_charset_filter_commands,      /* module directives */
    NGX_HTTP_MODULE,                       /* module type */
    NULL,                                  /* init master */
    ngx_http_charset_filter_init,          /* init module */
    NULL,                                  /* init process */
    NULL,                                  /* init thread */
    NULL,                                  /* exit thread */
    NULL,                                  /* exit process */
    NULL,                                  /* exit master */
    NGX_MODULE_V1_PADDING
};


static ngx_http_output_header_filter_pt  ngx_http_next_header_filter;
static ngx_http_output_body_filter_pt    ngx_http_next_body_filter;


static ngx_int_t
ngx_http_charset_header_filter(ngx_http_request_t *r)
{
    size_t                         len;
    u_char                        *p;
    ngx_int_t                      charset, source_charset;
    ngx_uint_t                     i;
    ngx_http_charset_t            *charsets;
    ngx_http_charset_ctx_t        *ctx;
    ngx_http_charset_loc_conf_t   *lcf;
    ngx_http_charset_main_conf_t  *mcf;

    mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);

    ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);

    if (ctx == NULL) {
        lcf = ngx_http_get_module_loc_conf(r->main,
                                           ngx_http_charset_filter_module);
        charset = lcf->charset;

        if (charset == NGX_HTTP_NO_CHARSET) {
            return ngx_http_next_header_filter(r);
        }

    } else {
        charset = ctx->charset;
    }

    if (r->headers_out.content_type.len == 0) {
        return ngx_http_next_header_filter(r);
    }

    if (ngx_strncasecmp(r->headers_out.content_type.data, "text/", 5) != 0
        && ngx_strncasecmp(r->headers_out.content_type.data,
                           "application/x-javascript", 24) != 0)
    {
        return ngx_http_next_header_filter(r);
    }

    charsets = mcf->charsets.elts;

    lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);

    len = 0;

    for (p = r->headers_out.content_type.data; *p; p++) {
        if (*p == ';') {
            len = p - r->headers_out.content_type.data;
        }

        if (ngx_strncasecmp(p, "charset=", 8) != 0) {
            continue;
        }

        p += 8;

        for (i = 0; i < mcf->charsets.nelts; i++) {

            if (ngx_strcasecmp(p, charsets[i].name.data) == 0) {

                if (r == r->main && lcf->override_charset == 0) {
                    ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
                    if (ctx == NULL) {
                        return NGX_ERROR;
                    }

                    ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);

                    ctx->charset = i;

                    return ngx_http_next_header_filter(r);
                }

                if (i != (ngx_uint_t) charset
                    && (charsets[i].tables == NULL
                        || charsets[i].tables[charset] == NULL))
                {
                    ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0,
                                  "no \"charset_map\" between the charsets "
                                  "\"%V\" and \"%V\"",
                                  &charsets[i].name, &charsets[charset].name);

                    return ngx_http_next_header_filter(r);
                }

                r->headers_out.content_type.len = len;

                if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
                    || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
                {
                    /*
                     * do not set charset for the redirect because NN 4.x
                     * uses this charset instead of the next page charset
                     */

                    r->headers_out.charset.len = 0;
                    return ngx_http_next_header_filter(r);
                }

                source_charset = i;

                goto found;
            }
        }

        return ngx_http_next_header_filter(r);
    }

    if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
        || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
    {
        /*
         * do not set charset for the redirect because NN 4.x
         * use this charset instead of the next page charset
         */

        r->headers_out.charset.len = 0;
        return ngx_http_next_header_filter(r);
    }

    if (r->headers_out.charset.len) {
        return ngx_http_next_header_filter(r);
    }

    source_charset = lcf->source_charset;

found:

    r->headers_out.charset = charsets[charset].name;
    r->utf8 = charsets[charset].utf8;

    if (source_charset == NGX_CONF_UNSET || source_charset == charset) {
        return ngx_http_next_header_filter(r);
    }

    ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
    if (ctx == NULL) {
        return NGX_ERROR;
    }

    ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);

    ctx->table = charsets[source_charset].tables[charset];
    ctx->charset = charset;

    r->filter_need_in_memory = 1;

    return ngx_http_next_header_filter(r);
}


static ngx_int_t
ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in)
{
    ngx_chain_t             *cl;
    ngx_http_charset_ctx_t  *ctx;

    ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module);

    if (ctx == NULL || ctx->table == NULL) {
        return ngx_http_next_body_filter(r, in);
    }

    for (cl = in; cl; cl = cl->next) {
        (void) ngx_http_charset_recode(cl->buf, ctx->table);
    }

    return ngx_http_next_body_filter(r, in);
}


static ngx_uint_t
ngx_http_charset_recode(ngx_buf_t *b, u_char *table)
{
    u_char  *p;

    for (p = b->pos; p < b->last; p++) {

        if (*p == table[*p]) {
            continue;
        }

        while (p < b->last) {
            *p = table[*p];
            p++;
        }

        b->in_file = 0;

        return 1;
    }

    return 0;
}


static char *
ngx_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
    ngx_http_charset_main_conf_t  *mcf = conf;

    char                       *rv;
    ngx_int_t                   src, dst;
    ngx_uint_t                  i;
    ngx_str_t                  *value;
    ngx_conf_t                  pvcf;
    ngx_http_charset_tables_t  *table;

    value = cf->args->elts;

    src = ngx_http_add_charset(&mcf->charsets, &value[1]);
    if (src == NGX_ERROR) {
        return NGX_CONF_ERROR;
    }

    dst = ngx_http_add_charset(&mcf->charsets, &value[2]);
    if (dst == NGX_ERROR) {
        return NGX_CONF_ERROR;
    }

    if (src == dst) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                           "\"charset_map\" between the same charsets "
                           "\"%V\" and \"%V\"", &value[1], &value[2]);
        return NGX_CONF_ERROR;
    }

    table = mcf->tables.elts;
    for (i = 0; i < mcf->tables.nelts; i++) {
        if ((src == table->src && dst == table->dst)
             || (src == table->dst && dst == table->src))
        {
            ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                               "duplicate \"charset_map\" between "
                               "\"%V\" and \"%V\"", &value[1], &value[2]);
            return NGX_CONF_ERROR;
        }
    }

    table = ngx_array_push(&mcf->tables);
    if (table == NULL) {
        return NGX_CONF_ERROR;
    }

    table->src = src;
    table->dst = dst;

    table->src2dst = ngx_palloc(cf->pool, 256);
    if (table->src2dst == NULL) {
        return NGX_CONF_ERROR;
    }

    table->dst2src = ngx_palloc(cf->pool, 256);
    if (table->dst2src == NULL) {
        return NGX_CONF_ERROR;
    }

    for (i = 0; i < 128; i++) {
        table->src2dst[i] = (u_char) i;
        table->dst2src[i] = (u_char) i;
    }

    for (/* void */; i < 256; i++) {
        table->src2dst[i] = '?';
        table->dst2src[i] = '?';
    }

    pvcf = *cf;
    cf->ctx = table;
    cf->handler = ngx_charset_map;
    cf->handler_conf = conf;

    rv = ngx_conf_parse(cf, NULL);

    *cf = pvcf;

    return rv;
}


static char *
ngx_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
{
    ngx_int_t                   src, dst;
    ngx_str_t                  *value;
    ngx_http_charset_tables_t  *table;

    if (cf->args->nelts != 2) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number");
        return NGX_CONF_ERROR;
    }

    value = cf->args->elts;

    src = ngx_hextoi(value[0].data, value[0].len);
    if (src == NGX_ERROR || src > 255) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                           "invalid value \"%V\"", &value[0]);
        return NGX_CONF_ERROR;
    }

    dst = ngx_hextoi(value[1].data, value[1].len);
    if (dst == NGX_ERROR || dst > 255) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                           "invalid value \"%V\"", &value[1]);
        return NGX_CONF_ERROR;
    }

    table = cf->ctx;

    table->src2dst[src] = (u_char) dst;
    table->dst2src[dst] = (u_char) src;

    return NGX_CONF_OK;
}


static char *
ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
    char  *p = conf;

    ngx_int_t                     *cp;
    ngx_str_t                     *value;
    ngx_http_charset_main_conf_t  *mcf;

    cp = (ngx_int_t *) (p + cmd->offset);

    if (*cp != NGX_CONF_UNSET) {
        return "is duplicate";
    }

    value = cf->args->elts;

    if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
        && ngx_strcmp(value[1].data, "off") == 0)
    {
        *cp = NGX_HTTP_NO_CHARSET;
        return NGX_CONF_OK;
    }

    mcf = ngx_http_conf_get_module_main_conf(cf,
                                             ngx_http_charset_filter_module);

    *cp = ngx_http_add_charset(&mcf->charsets, &value[1]);
    if (*cp == NGX_ERROR) {
        return NGX_CONF_ERROR;
    }

    return NGX_CONF_OK;
}


static ngx_int_t
ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name)
{
    ngx_uint_t           i;
    ngx_http_charset_t  *c;

    c = charsets->elts;
    for (i = 0; i < charsets->nelts; i++) {
        if (name->len != c[i].name.len) {
            continue;
        }

        if (ngx_strcasecmp(name->data, c[i].name.data) == 0) {
            break;
        }
    }

    if (i < charsets->nelts) {
        return i;
    }

    c = ngx_array_push(charsets);
    if (c == NULL) {
        return NGX_ERROR;
    }

    c->tables = NULL;
    c->name = *name;

    if (ngx_strcasecmp(name->data, "utf-8") == 0) {
        c->utf8 = 1;
    }

    return i;
}


static ngx_int_t
ngx_http_charset_filter_init(ngx_cycle_t *cycle)
{
    ngx_http_next_header_filter = ngx_http_top_header_filter;
    ngx_http_top_header_filter = ngx_http_charset_header_filter;

    ngx_http_next_body_filter = ngx_http_top_body_filter;
    ngx_http_top_body_filter = ngx_http_charset_body_filter;

    return NGX_OK;
}


static void *
ngx_http_charset_create_main_conf(ngx_conf_t *cf)
{
    ngx_http_charset_main_conf_t  *mcf;

    mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t));
    if (mcf == NULL) {
        return NGX_CONF_ERROR;
    }

    if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t))
        == NGX_ERROR)
    {
        return NGX_CONF_ERROR;
    }

    if (ngx_array_init(&mcf->tables, cf->pool, 1,
                       sizeof(ngx_http_charset_tables_t)) == NGX_ERROR)
    {
        return NGX_CONF_ERROR;
    }

    if (ngx_array_init(&mcf->recodes, cf->pool, 2,
                       sizeof(ngx_http_charset_recode_t)) == NGX_ERROR)
    {
        return NGX_CONF_ERROR;
    }

    return mcf;
}


static void *
ngx_http_charset_create_loc_conf(ngx_conf_t *cf)
{
    ngx_http_charset_loc_conf_t  *lcf;

    lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t));
    if (lcf == NULL) {
        return NGX_CONF_ERROR;
    }

    lcf->charset = NGX_CONF_UNSET;
    lcf->source_charset = NGX_CONF_UNSET;
    lcf->override_charset = NGX_CONF_UNSET;

    return lcf;
}


static char *
ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
{
    ngx_http_charset_loc_conf_t *prev = parent;
    ngx_http_charset_loc_conf_t *conf = child;

    ngx_uint_t                     i;
    ngx_http_charset_recode_t     *recode;
    ngx_http_charset_main_conf_t  *mcf;

    ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
    ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_NO_CHARSET);

    if (conf->source_charset == NGX_CONF_UNSET) {
        conf->source_charset = prev->source_charset;
    }

    if (conf->charset == NGX_HTTP_NO_CHARSET
        || conf->source_charset == NGX_CONF_UNSET
        || conf->charset == conf->source_charset)
    {
        return NGX_CONF_OK;
    }

    mcf = ngx_http_conf_get_module_main_conf(cf,
                                             ngx_http_charset_filter_module);
    recode = mcf->recodes.elts;
    for (i = 0; i < mcf->recodes.nelts; i++) {
        if (conf->source_charset == recode[i].src
            && conf->charset == recode[i].dst)
        {
            return NGX_CONF_OK;
        }
    }

    recode = ngx_array_push(&mcf->recodes);
    if (recode == NULL) {
        return NGX_CONF_ERROR;
    }

    recode->src = conf->source_charset;
    recode->dst = conf->charset;

    return NGX_CONF_OK;
}


static ngx_int_t
ngx_http_charset_postconfiguration(ngx_conf_t *cf)
{
    u_char                       **src, **dst;
    ngx_int_t                      c;
    ngx_uint_t                     i, t;
    ngx_http_charset_t            *charset;
    ngx_http_charset_recode_t     *recode;
    ngx_http_charset_tables_t     *tables;
    ngx_http_charset_main_conf_t  *mcf;

    mcf = ngx_http_conf_get_module_main_conf(cf,
                                             ngx_http_charset_filter_module);

    recode = mcf->recodes.elts;
    tables = mcf->tables.elts;
    charset = mcf->charsets.elts;

    for (i = 0; i < mcf->recodes.nelts; i++) {

        c = recode[i].src;

        for (t = 0; t < mcf->tables.nelts; t++) {

            if (c == tables[t].src && recode[i].dst == tables[t].dst) {
                goto next;
            }

            if (c == tables[t].dst && recode[i].dst == tables[t].src) {
                goto next;
            }
        }

        ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
                      " no \"charset_map\" between the charsets "
                      "\"%V\" and \"%V\"",
                      &charset[c].name, &charset[recode[i].dst].name);
        return NGX_ERROR;

    next:
        continue;
    }


    for (t = 0; t < mcf->tables.nelts; t++) {

        src = charset[tables[t].src].tables;

        if (src == NULL) {
            src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
            if (src == NULL) {
                return NGX_ERROR;
            }

            charset[tables[t].src].tables = src;
        }

        dst = charset[tables[t].dst].tables;

        if (dst == NULL) {
            dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
            if (dst == NULL) {
                return NGX_ERROR;
            }

            charset[tables[t].dst].tables = dst;
        }

        src[tables[t].dst] = tables[t].src2dst;
        dst[tables[t].src] = tables[t].dst2src;
    }

    return NGX_OK;
}