view src/http/modules/ngx_http_charset_filter_module.c @ 202:ca5f86d94316 NGINX_0_3_48

nginx 0.3.48 *) Change: now the ngx_http_charset_module works for subrequests, if the response has no "Content-Type" header line. *) Bugfix: if the "proxy_pass" directive has no URI part, then the "proxy_redirect default" directive add the unnecessary slash in start of the rewritten redirect. *) Bugfix: the internal redirect always transform client's HTTP method to GET, now the transformation is made for the "X-Accel-Redirect" redirects only and if the method is not HEAD; bug appeared in 0.3.42. *) Bugfix: the ngx_http_perl_module could not be built, if the perl was built with the threads support; bug appeared in 0.3.46.
author Igor Sysoev <http://sysoev.ru>
date Mon, 29 May 2006 00:00:00 +0400
parents 71ff1e2b484a
children 3866d57d9cfd
line wrap: on
line source


/*
 * Copyright (C) Igor Sysoev
 */


#include <ngx_config.h>
#include <ngx_core.h>
#include <ngx_http.h>


#define NGX_HTTP_NO_CHARSET  -2


typedef struct {
    u_char     **tables;
    ngx_str_t     name;

    ngx_uint_t    utf8;   /* unsigned     utf8:1; */
} ngx_http_charset_t;


typedef struct {
    ngx_int_t     src;
    ngx_int_t     dst;
} ngx_http_charset_recode_t;


typedef struct {
    ngx_int_t     src;
    ngx_int_t     dst;
    u_char       *src2dst;
    u_char       *dst2src;
} ngx_http_charset_tables_t;


typedef struct {
    ngx_array_t   charsets;               /* ngx_http_charset_t */
    ngx_array_t   tables;                 /* ngx_http_charset_tables_t */
    ngx_array_t   recodes;                /* ngx_http_charset_recode_t */
} ngx_http_charset_main_conf_t;


typedef struct {
    ngx_int_t     charset;
    ngx_int_t     source_charset;
    ngx_flag_t    override_charset;
} ngx_http_charset_loc_conf_t;


typedef struct {
    u_char       *table;
    ngx_int_t     charset;
} ngx_http_charset_ctx_t;


static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);

static char *ngx_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd,
    void *conf);
static char *ngx_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf);

static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd,
    void *conf);
static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name);

static ngx_int_t ngx_http_charset_filter_init(ngx_cycle_t *cycle);

static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf);
static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf);
static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf,
    void *parent, void *child);
static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf);


static ngx_command_t  ngx_http_charset_filter_commands[] = {

    { ngx_string("charset"),
      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
                        |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
      ngx_http_set_charset_slot,
      NGX_HTTP_LOC_CONF_OFFSET,
      offsetof(ngx_http_charset_loc_conf_t, charset),
      NULL },

    { ngx_string("source_charset"),
      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
                        |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
      ngx_http_set_charset_slot,
      NGX_HTTP_LOC_CONF_OFFSET,
      offsetof(ngx_http_charset_loc_conf_t, source_charset),
      NULL },

    { ngx_string("override_charset"),
      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
                        |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG,
      ngx_conf_set_flag_slot,
      NGX_HTTP_LOC_CONF_OFFSET,
      offsetof(ngx_http_charset_loc_conf_t, override_charset),
      NULL },

    { ngx_string("charset_map"),
      NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2,
      ngx_charset_map_block,
      NGX_HTTP_MAIN_CONF_OFFSET,
      0,
      NULL },

      ngx_null_command
};


static ngx_http_module_t  ngx_http_charset_filter_module_ctx = {
    NULL,                                  /* preconfiguration */
    ngx_http_charset_postconfiguration,    /* postconfiguration */

    ngx_http_charset_create_main_conf,     /* create main configuration */
    NULL,                                  /* init main configuration */

    NULL,                                  /* create server configuration */
    NULL,                                  /* merge server configuration */

    ngx_http_charset_create_loc_conf,      /* create location configuration */
    ngx_http_charset_merge_loc_conf        /* merge location configuration */
};


ngx_module_t  ngx_http_charset_filter_module = {
    NGX_MODULE_V1,
    &ngx_http_charset_filter_module_ctx,   /* module context */
    ngx_http_charset_filter_commands,      /* module directives */
    NGX_HTTP_MODULE,                       /* module type */
    NULL,                                  /* init master */
    ngx_http_charset_filter_init,          /* init module */
    NULL,                                  /* init process */
    NULL,                                  /* init thread */
    NULL,                                  /* exit thread */
    NULL,                                  /* exit process */
    NULL,                                  /* exit master */
    NGX_MODULE_V1_PADDING
};


static ngx_http_output_header_filter_pt  ngx_http_next_header_filter;
static ngx_http_output_body_filter_pt    ngx_http_next_body_filter;


static ngx_int_t
ngx_http_charset_header_filter(ngx_http_request_t *r)
{
    size_t                         len;
    u_char                        *p;
    ngx_int_t                      charset, source_charset;
    ngx_uint_t                     i;
    ngx_http_charset_t            *charsets;
    ngx_http_charset_ctx_t        *ctx;
    ngx_http_charset_loc_conf_t   *lcf, *mlcf;
    ngx_http_charset_main_conf_t  *mcf;

    mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);

    ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);

    if (ctx == NULL) {
        mlcf = ngx_http_get_module_loc_conf(r->main,
                                            ngx_http_charset_filter_module);
        charset = mlcf->charset;

        if (charset == NGX_HTTP_NO_CHARSET) {
            return ngx_http_next_header_filter(r);
        }

    } else {
        charset = ctx->charset;
    }

    charsets = mcf->charsets.elts;

    if (r == r->main) {
        if (r->headers_out.content_type.len == 0) {
            return ngx_http_next_header_filter(r);
        }

        if (ngx_strncasecmp(r->headers_out.content_type.data, "text/", 5) != 0
            && ngx_strncasecmp(r->headers_out.content_type.data,
                               "application/x-javascript", 24) != 0)
        {
            return ngx_http_next_header_filter(r);
        }

    } else {
        if (r->headers_out.content_type.len == 0) {
            mlcf = ngx_http_get_module_loc_conf(r->main,
                                                ngx_http_charset_filter_module);
            source_charset = mlcf->source_charset;

            goto found;
        }
    }

    lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);

    len = 0;

    for (p = r->headers_out.content_type.data; *p; p++) {
        if (*p == ';') {
            len = p - r->headers_out.content_type.data;
        }

        if (ngx_strncasecmp(p, "charset=", 8) != 0) {
            continue;
        }

        p += 8;

        for (i = 0; i < mcf->charsets.nelts; i++) {

            if (ngx_strcasecmp(p, charsets[i].name.data) == 0) {

                if (r == r->main && lcf->override_charset == 0) {
                    ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
                    if (ctx == NULL) {
                        return NGX_ERROR;
                    }

                    ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);

                    ctx->charset = i;

                    return ngx_http_next_header_filter(r);
                }

                if (i != (ngx_uint_t) charset
                    && (charsets[i].tables == NULL
                        || charsets[i].tables[charset] == NULL))
                {
                    ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0,
                                  "no \"charset_map\" between the charsets "
                                  "\"%V\" and \"%V\"",
                                  &charsets[i].name, &charsets[charset].name);

                    return ngx_http_next_header_filter(r);
                }

                r->headers_out.content_type.len = len;

                if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
                    || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
                {
                    /*
                     * do not set charset for the redirect because NN 4.x
                     * uses this charset instead of the next page charset
                     */

                    r->headers_out.charset.len = 0;
                    return ngx_http_next_header_filter(r);
                }

                source_charset = i;

                goto found;
            }
        }

        return ngx_http_next_header_filter(r);
    }

    if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
        || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
    {
        /*
         * do not set charset for the redirect because NN 4.x
         * use this charset instead of the next page charset
         */

        r->headers_out.charset.len = 0;
        return ngx_http_next_header_filter(r);
    }

    if (r->headers_out.charset.len) {
        return ngx_http_next_header_filter(r);
    }

    source_charset = lcf->source_charset;

found:

    r->headers_out.charset = charsets[charset].name;
    r->utf8 = charsets[charset].utf8;

    if (source_charset == NGX_CONF_UNSET || source_charset == charset) {
        return ngx_http_next_header_filter(r);
    }

    ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
    if (ctx == NULL) {
        return NGX_ERROR;
    }

    ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);

    ctx->table = charsets[source_charset].tables[charset];
    ctx->charset = charset;

    r->filter_need_in_memory = 1;

    return ngx_http_next_header_filter(r);
}


static ngx_int_t
ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in)
{
    ngx_chain_t             *cl;
    ngx_http_charset_ctx_t  *ctx;

    ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module);

    if (ctx == NULL || ctx->table == NULL) {
        return ngx_http_next_body_filter(r, in);
    }

    for (cl = in; cl; cl = cl->next) {
        (void) ngx_http_charset_recode(cl->buf, ctx->table);
    }

    return ngx_http_next_body_filter(r, in);
}


static ngx_uint_t
ngx_http_charset_recode(ngx_buf_t *b, u_char *table)
{
    u_char  *p;

    for (p = b->pos; p < b->last; p++) {

        if (*p == table[*p]) {
            continue;
        }

        while (p < b->last) {
            *p = table[*p];
            p++;
        }

        b->in_file = 0;

        return 1;
    }

    return 0;
}


static char *
ngx_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
    ngx_http_charset_main_conf_t  *mcf = conf;

    char                       *rv;
    ngx_int_t                   src, dst;
    ngx_uint_t                  i;
    ngx_str_t                  *value;
    ngx_conf_t                  pvcf;
    ngx_http_charset_tables_t  *table;

    value = cf->args->elts;

    src = ngx_http_add_charset(&mcf->charsets, &value[1]);
    if (src == NGX_ERROR) {
        return NGX_CONF_ERROR;
    }

    dst = ngx_http_add_charset(&mcf->charsets, &value[2]);
    if (dst == NGX_ERROR) {
        return NGX_CONF_ERROR;
    }

    if (src == dst) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                           "\"charset_map\" between the same charsets "
                           "\"%V\" and \"%V\"", &value[1], &value[2]);
        return NGX_CONF_ERROR;
    }

    table = mcf->tables.elts;
    for (i = 0; i < mcf->tables.nelts; i++) {
        if ((src == table->src && dst == table->dst)
             || (src == table->dst && dst == table->src))
        {
            ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                               "duplicate \"charset_map\" between "
                               "\"%V\" and \"%V\"", &value[1], &value[2]);
            return NGX_CONF_ERROR;
        }
    }

    table = ngx_array_push(&mcf->tables);
    if (table == NULL) {
        return NGX_CONF_ERROR;
    }

    table->src = src;
    table->dst = dst;

    table->src2dst = ngx_palloc(cf->pool, 256);
    if (table->src2dst == NULL) {
        return NGX_CONF_ERROR;
    }

    table->dst2src = ngx_palloc(cf->pool, 256);
    if (table->dst2src == NULL) {
        return NGX_CONF_ERROR;
    }

    for (i = 0; i < 128; i++) {
        table->src2dst[i] = (u_char) i;
        table->dst2src[i] = (u_char) i;
    }

    for (/* void */; i < 256; i++) {
        table->src2dst[i] = '?';
        table->dst2src[i] = '?';
    }

    pvcf = *cf;
    cf->ctx = table;
    cf->handler = ngx_charset_map;
    cf->handler_conf = conf;

    rv = ngx_conf_parse(cf, NULL);

    *cf = pvcf;

    return rv;
}


static char *
ngx_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
{
    ngx_int_t                   src, dst;
    ngx_str_t                  *value;
    ngx_http_charset_tables_t  *table;

    if (cf->args->nelts != 2) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number");
        return NGX_CONF_ERROR;
    }

    value = cf->args->elts;

    src = ngx_hextoi(value[0].data, value[0].len);
    if (src == NGX_ERROR || src > 255) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                           "invalid value \"%V\"", &value[0]);
        return NGX_CONF_ERROR;
    }

    dst = ngx_hextoi(value[1].data, value[1].len);
    if (dst == NGX_ERROR || dst > 255) {
        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
                           "invalid value \"%V\"", &value[1]);
        return NGX_CONF_ERROR;
    }

    table = cf->ctx;

    table->src2dst[src] = (u_char) dst;
    table->dst2src[dst] = (u_char) src;

    return NGX_CONF_OK;
}


static char *
ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
    char  *p = conf;

    ngx_int_t                     *cp;
    ngx_str_t                     *value;
    ngx_http_charset_main_conf_t  *mcf;

    cp = (ngx_int_t *) (p + cmd->offset);

    if (*cp != NGX_CONF_UNSET) {
        return "is duplicate";
    }

    value = cf->args->elts;

    if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
        && ngx_strcmp(value[1].data, "off") == 0)
    {
        *cp = NGX_HTTP_NO_CHARSET;
        return NGX_CONF_OK;
    }

    mcf = ngx_http_conf_get_module_main_conf(cf,
                                             ngx_http_charset_filter_module);

    *cp = ngx_http_add_charset(&mcf->charsets, &value[1]);
    if (*cp == NGX_ERROR) {
        return NGX_CONF_ERROR;
    }

    return NGX_CONF_OK;
}


static ngx_int_t
ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name)
{
    ngx_uint_t           i;
    ngx_http_charset_t  *c;

    c = charsets->elts;
    for (i = 0; i < charsets->nelts; i++) {
        if (name->len != c[i].name.len) {
            continue;
        }

        if (ngx_strcasecmp(name->data, c[i].name.data) == 0) {
            break;
        }
    }

    if (i < charsets->nelts) {
        return i;
    }

    c = ngx_array_push(charsets);
    if (c == NULL) {
        return NGX_ERROR;
    }

    c->tables = NULL;
    c->name = *name;

    if (ngx_strcasecmp(name->data, "utf-8") == 0) {
        c->utf8 = 1;
    }

    return i;
}


static ngx_int_t
ngx_http_charset_filter_init(ngx_cycle_t *cycle)
{
    ngx_http_next_header_filter = ngx_http_top_header_filter;
    ngx_http_top_header_filter = ngx_http_charset_header_filter;

    ngx_http_next_body_filter = ngx_http_top_body_filter;
    ngx_http_top_body_filter = ngx_http_charset_body_filter;

    return NGX_OK;
}


static void *
ngx_http_charset_create_main_conf(ngx_conf_t *cf)
{
    ngx_http_charset_main_conf_t  *mcf;

    mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t));
    if (mcf == NULL) {
        return NGX_CONF_ERROR;
    }

    if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t))
        == NGX_ERROR)
    {
        return NGX_CONF_ERROR;
    }

    if (ngx_array_init(&mcf->tables, cf->pool, 1,
                       sizeof(ngx_http_charset_tables_t)) == NGX_ERROR)
    {
        return NGX_CONF_ERROR;
    }

    if (ngx_array_init(&mcf->recodes, cf->pool, 2,
                       sizeof(ngx_http_charset_recode_t)) == NGX_ERROR)
    {
        return NGX_CONF_ERROR;
    }

    return mcf;
}


static void *
ngx_http_charset_create_loc_conf(ngx_conf_t *cf)
{
    ngx_http_charset_loc_conf_t  *lcf;

    lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t));
    if (lcf == NULL) {
        return NGX_CONF_ERROR;
    }

    lcf->charset = NGX_CONF_UNSET;
    lcf->source_charset = NGX_CONF_UNSET;
    lcf->override_charset = NGX_CONF_UNSET;

    return lcf;
}


static char *
ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
{
    ngx_http_charset_loc_conf_t *prev = parent;
    ngx_http_charset_loc_conf_t *conf = child;

    ngx_uint_t                     i;
    ngx_http_charset_recode_t     *recode;
    ngx_http_charset_main_conf_t  *mcf;

    ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
    ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_NO_CHARSET);

    if (conf->source_charset == NGX_CONF_UNSET) {
        conf->source_charset = prev->source_charset;
    }

    if (conf->charset == NGX_HTTP_NO_CHARSET
        || conf->source_charset == NGX_CONF_UNSET
        || conf->charset == conf->source_charset)
    {
        return NGX_CONF_OK;
    }

    mcf = ngx_http_conf_get_module_main_conf(cf,
                                             ngx_http_charset_filter_module);
    recode = mcf->recodes.elts;
    for (i = 0; i < mcf->recodes.nelts; i++) {
        if (conf->source_charset == recode[i].src
            && conf->charset == recode[i].dst)
        {
            return NGX_CONF_OK;
        }
    }

    recode = ngx_array_push(&mcf->recodes);
    if (recode == NULL) {
        return NGX_CONF_ERROR;
    }

    recode->src = conf->source_charset;
    recode->dst = conf->charset;

    return NGX_CONF_OK;
}


static ngx_int_t
ngx_http_charset_postconfiguration(ngx_conf_t *cf)
{
    u_char                       **src, **dst;
    ngx_int_t                      c;
    ngx_uint_t                     i, t;
    ngx_http_charset_t            *charset;
    ngx_http_charset_recode_t     *recode;
    ngx_http_charset_tables_t     *tables;
    ngx_http_charset_main_conf_t  *mcf;

    mcf = ngx_http_conf_get_module_main_conf(cf,
                                             ngx_http_charset_filter_module);

    recode = mcf->recodes.elts;
    tables = mcf->tables.elts;
    charset = mcf->charsets.elts;

    for (i = 0; i < mcf->recodes.nelts; i++) {

        c = recode[i].src;

        for (t = 0; t < mcf->tables.nelts; t++) {

            if (c == tables[t].src && recode[i].dst == tables[t].dst) {
                goto next;
            }

            if (c == tables[t].dst && recode[i].dst == tables[t].src) {
                goto next;
            }
        }

        ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
                      " no \"charset_map\" between the charsets "
                      "\"%V\" and \"%V\"",
                      &charset[c].name, &charset[recode[i].dst].name);
        return NGX_ERROR;

    next:
        continue;
    }


    for (t = 0; t < mcf->tables.nelts; t++) {

        src = charset[tables[t].src].tables;

        if (src == NULL) {
            src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
            if (src == NULL) {
                return NGX_ERROR;
            }

            charset[tables[t].src].tables = src;
        }

        dst = charset[tables[t].dst].tables;

        if (dst == NULL) {
            dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
            if (dst == NULL) {
                return NGX_ERROR;
            }

            charset[tables[t].dst].tables = dst;
        }

        src[tables[t].dst] = tables[t].src2dst;
        dst[tables[t].src] = tables[t].dst2src;
    }

    return NGX_OK;
}