annotate contrib/unicode2nginx/unicode-to-nginx.pl @ 391:1d9bef53cd8e

Range filter: late_ranges functionality. Add one more filtering point after postpone filter. This allows to serve range capable replies with subrequests. It's not as efficient as range filtering for static data (i.e. doesn't save us from reading data from disk if some filter needs them in memory), but it may save some network bandwidth for us and for our users.
author Maxim Dounin <mdounin@mdounin.ru>
date Mon, 21 Jul 2008 05:33:01 +0400
parents fa32d59d9a15
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
216
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
1 #!/usr/bin/perl -w
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
2
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
3 # Convert unicode mappings to nginx configuration file format.
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
4
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
5 # You may find useful mappings in various places, including
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
6 # unicode.org official site:
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
7 #
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
8 # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
9 # http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
10
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
11 # Needs perl 5.6 or later.
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
12
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
13 # Written by Maxim Dounin, mdounin@rambler-co.ru
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
14
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
15 ###############################################################################
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
16
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
17 require 5.006;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
18
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
19 while (<>) {
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
20 # Skip comments and empty lines
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
21
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
22 next if /^#/;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
23 next if /^\s*$/;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
24 chomp;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
25
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
26 # Convert mappings
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
27
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
28 if (/^\s*0x(..)\s*0x(....)\s*(#.*)/) {
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
29 # Mapping <from-code> <unicode-code> "#" <unicode-name>
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
30 my $cs_code = $1;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
31 my $un_code = $2;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
32 my $un_name = $3;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
33
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
34 # Produce UTF-8 sequence from character code;
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
35
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
36 my $un_utf8 = join('', map { sprintf("%02X", $_) } unpack("C*", pack("U", hex($un_code))));
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
37
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
38 print " $cs_code $un_utf8 ; $un_name\n";
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
39
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
40 } else {
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
41 warn "Unrecognized line: '$_'";
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
42 }
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
43 }
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
44
fa32d59d9a15 nginx 0.3.55
Igor Sysoev <http://sysoev.ru>
parents:
diff changeset
45 ###############################################################################