comparison xslscript.pl @ 8:86721b270ebb

XSLScript: explicit whitespace handling.
author Maxim Dounin <mdounin@mdounin.ru>
date Tue, 25 Feb 2014 21:56:52 +0400
parents 9a283d72ddc3
children e93b6e98b817
comparison
equal deleted inserted replaced
7:9a283d72ddc3 8:86721b270ebb
36 36
37 my $grammar = <<'EOF'; 37 my $grammar = <<'EOF';
38 38
39 # XSLTScript grammar, reconstructed 39 # XSLTScript grammar, reconstructed
40 40
41 startrule : item(s) eofile 41 startrule : <skip:""> item(s) eofile
42 { $return = $item[1]; 1 } 42 { $return = $item{'item(s)'}; 1 }
43 43
44 item : "<!--" <commit> comment 44 item : "<!--" <commit> comment
45 | "!!" <commit> exclam_double 45 | "!!" <commit> exclam_double
46 | "!{" <commit> exclam_xpath 46 | "!{" <commit> exclam_xpath
47 | "!" name <commit> params 47 | "!" name <commit> params
48 { $return = [ 48 { $return = [
49 "X:call-template", "name", $item{name}, [], 49 "X:call-template", "name", $item{name}, [],
50 $item{params} 50 $item{params}
51 ]; 1 } 51 ]; 1 }
52 | "<%" <commit> instruction "%>" 52 | "<%" <commit> space instruction space "%>"
53 { $return = $item{instruction}; 1 } 53 { $return = $item{instruction}; 1 }
54 | "<" name attrs ">" <commit> item(s?) "</" name ">" 54 | "<" name attrs space ">" <commit> item(s?) "</" name ">"
55 { $return = [ "tag", $item{name}, $item{attrs}, $item[6] ]; 1 } 55 { $return = [ "tag", $item{name}, $item{attrs}, $item{'item(s?)'} ]; 1 }
56 | "<" <commit> name attrs "/" ">" 56 | "<" <commit> name attrs space "/>"
57 { $return = [ "tag", $item{name}, $item{attrs} ]; 1 } 57 { $return = [ "tag", $item{name}, $item{attrs} ]; 1 }
58 | "X:variable" <commit> xvariable 58 | "X:variable" space <commit> xvariable
59 | "X:var" <commit> xvariable 59 | "X:var" space <commit> xvariable
60 | "X:template" <commit> xtemplate 60 | "X:template" space <commit> xtemplate
61 | "X:if" <commit> xif 61 | "X:if" space <commit> xif
62 | "X:param" <commit> xparam 62 | "X:param" space <commit> xparam
63 | "X:for-each" <commit> xforeach 63 | "X:for-each" space <commit> xforeach
64 | "X:sort" <commit> xsort 64 | "X:sort" space <commit> xsort
65 | "X:when" <commit> xwhen 65 | "X:when" space <commit> xwhen
66 | "X:attribute" <commit> xattribute 66 | "X:attribute" space <commit> xattribute
67 | "X:output" <commit> xoutput 67 | "X:output" space <commit> xoutput
68 | "X:copy-of" <commit> xcopyof 68 | "X:copy-of" space <commit> xcopyof
69 | instruction <commit> attrs body 69 | instruction space <commit> attrs body
70 { $return = [ $item{instruction}, $item{attrs}, $item{body} ]; 1 } 70 { $return = [ $item{instruction}, $item{attrs}, $item{body} ]; 1 }
71 | space_notempty
71 | text 72 | text
72 | <error> 73 | <error>
73 74
74 # list of simple instructions 75 # list of simple instructions
75 76
100 # comments, <!-- ... --> 101 # comments, <!-- ... -->
101 # not sure if it's something to be interpreted specially 102 # not sure if it's something to be interpreted specially
102 # likely an artifact of our dump process 103 # likely an artifact of our dump process
103 104
104 comment : /((?!-->).)*/ms "-->" 105 comment : /((?!-->).)*/ms "-->"
105 { $return = ""; 1 } 106 { $return = "<!--" . $item[1] . "-->"; 1 }
106 107
107 # special chars: ', ", {, }, \ 108 # special chars: ', ", {, }, \
108 # if used in text, they needs to be escaped with backslash 109 # if used in text, they needs to be escaped with backslash
109 110
110 text : quoted | unreserved | "'" | "\"" | "{" 111 text : quoted | unreserved | "'" | "\"" | "{"
111 quoted : "\\" special 112 quoted : "\\" special
112 { $return = $item{special}; 1; } 113 { $return = $item{special}; 1; }
113 special : "'" | "\"" | "\\" | "{" | "}" 114 special : "'" | "\"" | "\\" | "{" | "}"
114 unreserved : /[^'"\\{}<\s]+\s*/ 115 unreserved : /[^'"\\{}<\s]+\s*/ms
116
117 # whitespace
118
119 space : /\s*/ms
120 space_notempty : /\s+/ms
115 121
116 # shortcuts: 122 # shortcuts:
117 # 123 #
118 # !! for X:apply-templates 124 # !! for X:apply-templates
119 # !{xpath-expression} for X:value-of select="xpath-expression"; 125 # !{xpath-expression} for X:value-of select="xpath-expression";
120 # !foo() for X:call-template name="foo" 126 # !foo() for X:call-template name="foo"
121 127
122 # !root (path = { !{ substring($DIRNAME, 2) } }) 128 # !root (path = { !{ substring($DIRNAME, 2) } })
123 # !root (path = "substring-after($path, '/')") 129 # !root (path = "substring-after($path, '/')")
124 130
125 exclam_double : value(?) params(?) attrs ";" 131 exclam_double : space value(?) params(?) attrs space ";"
126 { $return = [ 132 { $return = [
127 "X:apply-templates", "select", $item[1][0], $item{attrs}, 133 "X:apply-templates", "select", $item{'value(?)'}[0],
128 $item[2][0] 134 $item{attrs}, $item{'params(?)'}[0]
129 ]; 1 } 135 ]; 1 }
130 136
131 exclam_xpath : xpath "}" 137 exclam_xpath : xpath "}"
132 { $return = [ 138 { $return = [
133 "X:value-of", "select", $item{xpath}, [] 139 "X:value-of", "select", $item{xpath}, []
136 142
137 # instruction attributes 143 # instruction attributes
138 # name="value" 144 # name="value"
139 145
140 attrs : attr(s?) 146 attrs : attr(s?)
141 attr : name "=" value 147 attr : space name space "=" space value
142 { $return = $item{name} . "=" . $item{value}; } 148 { $return = $item{name} . "=" . $item{value}; }
143 name : /[a-z0-9_:-]+/i 149 name : /[a-z0-9_:-]+/i
144 value : /"[^"]*"/ 150 value : /"[^"]*"/
145 151
146 # template parameters 152 # template parameters
147 # ( bar="init", baz={markup} ) 153 # ( bar="init", baz={markup} )
148 154
149 params : "(" param(s? /,/) ")" 155 params : space "(" param(s? /\s*,\s*/) ")" space
150 { $return = $item[2]; 1 } 156 { $return = $item[3]; 1 }
151 param : name "=" value 157 param : space name space "=" space value space
152 { $return = [ 158 { $return = [
153 "X:with-param", 159 "X:with-param",
154 "select", $item{value}, 160 "select", $item{value},
155 "name", $item{name}, 161 "name", $item{name},
156 [] 162 []
157 ]; 1 } 163 ]; 1 }
158 | name "=" <commit> "{" item(s) "}" 164 | space name space "=" <commit> space "{" item(s) "}"
159 { $return = [ 165 { $return = [
160 "X:with-param", "name", $item{name}, [], 166 "X:with-param", "name", $item{name}, [],
161 $item[5] 167 $item{'item(s)'}
162 ]; 1 } 168 ]; 1 }
163 | name 169 | space name
164 { $return = [ 170 { $return = [
165 "X:param", "name", $item{name}, [] 171 "X:param", "name", $item{name}, []
166 ]; 1 } 172 ]; 1 }
167 173
168 # instruction body 174 # instruction body
169 # ";" for empty body, "{ ... }" otherwise 175 # ";" for empty body, "{ ... }" otherwise
170 176
171 body : ";" 177 body : space ";"
172 { $return = ""; } 178 { $return = ""; }
173 | "{" <commit> item(s?) "}" 179 | space "{" <commit> item(s?) "}" (space ";")(?)
174 { $return = $item[3]; 1 } 180 { $return = $item{'item(s?)'}; 1 }
175 181
176 # special handling of some instructions 182 # special handling of some instructions
177 # X:if attribute is test= 183 # X:if attribute is test=
178 184
179 xif : value body "else" <commit> body 185 xif : value body space "else" <commit> body
180 { $return = [ 186 { $return = [
181 "X:choose", [], [ 187 "X:choose", [], [
182 [ "X:when", "test", $item[1], [], $item[2] ], 188 [ "X:when", "test", $item[1], [], $item[2] ],
183 [ "X:otherwise", [], $item[5] ] 189 [ "X:otherwise", [], $item[6] ]
184 ] 190 ]
185 ]; 1 } 191 ]; 1 }
186 | value attrs body 192 | value attrs body
187 { $return = [ 193 { $return = [
188 "X:if", "test", $item{value}, $item{attrs}, $item{body}, 194 "X:if", "test", $item{value}, $item{attrs}, $item{body},
194 | <error> 200 | <error>
195 201
196 # X:template name(params) = "match" { 202 # X:template name(params) = "match" {
197 # X:template name( bar="init", baz={markup} ) = "match" mode="some" { 203 # X:template name( bar="init", baz={markup} ) = "match" mode="some" {
198 204
199 xtemplate : name(?) params(?) ( "=" value )(?) attrs body 205 xtemplate : name(?) params(?) space
200 { $return = [ 206 (space "=" space value)(?) attrs body
201 "X:template", "name", $item[1][0], "match", $item[3][0], 207 { $return = [
208 "X:template",
209 "name", $item{'name(?)'}[0],
210 "match", $item[4][0],
202 $item{attrs}, 211 $item{attrs},
203 [ ($item[2][0] ? @{$item[2][0]} : ()), @{$item{body}} ] 212 [ ($item[2][0] ? @{$item[2][0]} : ()), @{$item{body}} ]
204 ]; 1 } 213 ]; 1 }
205 214
206 # X:var LINK = "/article/@link"; 215 # X:var LINK = "/article/@link";
207 # X:var year = { ... } 216 # X:var year = { ... }
208 # semicolon is optional 217 # semicolon is optional
209 218
210 xvariable : name "=" value attrs body 219 xvariable : name space "=" space value attrs body
211 { $return = [ 220 { $return = [
212 "X:variable", 221 "X:variable",
213 "select", $item{value}, 222 "select", $item{value},
214 "name", $item{name}, 223 "name", $item{name},
215 $item{attrs}, $item{body} 224 $item{attrs}, $item{body}
216 ]; 1 } 225 ]; 1 }
217 | name "=" attrs body 226 | name space "=" space attrs body
218 { $return = [ 227 { $return = [
219 "X:variable", 228 "X:variable",
220 "name", $item{name}, 229 "name", $item{name},
221 $item{attrs}, $item{body} 230 $item{attrs}, $item{body}
222 ]; 1 } 231 ]; 1 }
223 | name "=" value 232 | name space "=" space value
224 { $return = [ 233 { $return = [
225 "X:variable", 234 "X:variable",
226 "select", $item{value}, 235 "select", $item{value},
227 "name", $item{name}, 236 "name", $item{name},
228 [] 237 []
229 ]; 1 } 238 ]; 1 }
230 | name "=" 239 | name space "="
231 { $return = [ 240 { $return = [
232 "X:variable", 241 "X:variable",
233 "name", $item{name}, 242 "name", $item{name},
234 [] 243 []
235 ]; 1 } 244 ]; 1 }
236 | <error> 245 | <error>
237 246
238 # X:param XML = "'../xml'"; 247 # X:param XML = "'../xml'";
239 # X:param YEAR; 248 # X:param YEAR;
240 249
241 xparam : name "=" value attrs body 250 xparam : name space "=" space value attrs body
242 { $return = [ 251 { $return = [
243 "X:param", 252 "X:param",
244 "select", $item{value}, 253 "select", $item{value},
245 "name", $item{name}, 254 "name", $item{name},
246 $item{attrs}, $item{body} 255 $item{attrs}, $item{body}
256 265
257 xforeach : value attrs body 266 xforeach : value attrs body
258 { $return = [ 267 { $return = [
259 "X:for-each", "select", $item{value}, $item{attrs}, $item{body} 268 "X:for-each", "select", $item{value}, $item{attrs}, $item{body}
260 ]; 1 } 269 ]; 1 }
261 | value attrs "," "X:sort" <commit> value attrs body 270 | value attrs space
271 "," space "X:sort" <commit> space value attrs body
262 { $return = [ 272 { $return = [
263 "X:for-each", "select", $item[1], $item[2], [ 273 "X:for-each", "select", $item[1], $item[2], [
264 [ "X:sort", "select", $item[6], $item[7] ], 274 [ "X:sort", "select", $item[9], $item[10] ],
265 @{$item{body}} 275 @{$item{body}}
266 ] 276 ]
267 ]; 1 } 277 ]; 1 }
268 278
269 # X:sort select 279 # X:sort select
329 $s .= $space . "(undef)" . "\n"; 339 $s .= $space . "(undef)" . "\n";
330 next; 340 next;
331 } 341 }
332 342
333 if (not ref($el) && defined $el) { 343 if (not ref($el) && defined $el) {
334 #$s .= $space . $el . "\n"; 344 if ($el =~ /^<!--(.*)-->$/s) {
345 my $comment = $1;
346 $comment =~ s/--/../sg;
347 $el = "<!--" . $comment . "-->";
348 }
349
335 $s .= $el; 350 $s .= $el;
336 next; 351 next;
337 } 352 }
338 353
339 die if ref($el) ne 'ARRAY'; 354 die if ref($el) ne 'ARRAY';
341 my $tag = $el->[0]; 356 my $tag = $el->[0];
342 357
343 if ($tag eq 'tag') { 358 if ($tag eq 'tag') {
344 my (undef, $name, $attrs, $body) = @{$el}; 359 my (undef, $name, $attrs, $body) = @{$el};
345 360
346 $s .= $space . "<" . join(" ", $name, @{$attrs}); 361 $s .= "<" . join(" ", $name, @{$attrs});
347 if ($body) { 362 if ($body) {
348 my $t = format_tree($body, $indent + 1); 363 $s .= ">" . format_tree($body, $indent + 1)
349 if ($t =~ /\n/) { 364 . "</$name>";
350 $s .= ">\n" . $t
351 . $space . "</$name>\n";
352 } else {
353 $s .= ">$t</$name>\n";
354 }
355 } else { 365 } else {
356 $s .= "/>\n"; 366 $s .= "/>";
357 } 367 }
358 368
359 next; 369 next;
360 } 370 }
361 371
380 } 390 }
381 391
382 my ($attrs, $body) = @a; 392 my ($attrs, $body) = @a;
383 $attrs = [ @{$attrs}, @attrs ]; 393 $attrs = [ @{$attrs}, @attrs ];
384 394
385 $s .= $space . "<" . join(" ", $name, @{$attrs}); 395 $s .= "<" . join(" ", $name, @{$attrs});
386 396
387 if ($body && scalar @{$body} > 0) { 397 if ($body && scalar @{$body} > 0) {
388 my $t = format_tree($body, $indent + 1); 398 $s .= ">" . format_tree($body, $indent + 1)
389 if ($t =~ /\n/) { 399 . "</$name>";
390 $s .= ">\n" . $t
391 . $space . "</$name>\n";
392 } else {
393 $s .= ">$t</$name>\n";
394 }
395 } else { 400 } else {
396 $s .= "/>\n"; 401 $s .= "/>";
397 } 402 }
398 403
399 next; 404 next;
400 } 405 }
401 406