diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2013-05-28 04:39:43 (JST) |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2013-05-28 04:54:16 (JST) |
commit | 8149be213f1c8f52b0dbe6c213f6073af57fa954 (patch) | |
tree | e4d0315f53022bb7335f782ad394d8e7602f1b52 /filters/html-converters/resources | |
parent | dcbc0438b2543a733858d62170f3110a89edbed6 (diff) | |
download | cgit-8149be213f1c8f52b0dbe6c213f6073af57fa954.zip cgit-8149be213f1c8f52b0dbe6c213f6073af57fa954.tar.gz |
filters: import more modern scripts
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'filters/html-converters/resources')
-rwxr-xr-x | filters/html-converters/resources/markdown.pl | 1731 | ||||
-rw-r--r-- | filters/html-converters/resources/rst-template.txt | 4 |
2 files changed, 1735 insertions, 0 deletions
diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl new file mode 100755 index 0000000..abec173 --- /dev/null +++ b/filters/html-converters/resources/markdown.pl | |||
@@ -0,0 +1,1731 @@ | |||
1 | #!/usr/bin/perl | ||
2 | |||
3 | # | ||
4 | # Markdown -- A text-to-HTML conversion tool for web writers | ||
5 | # | ||
6 | # Copyright (c) 2004 John Gruber | ||
7 | # <http://daringfireball.net/projects/markdown/> | ||
8 | # | ||
9 | |||
10 | |||
11 | package Markdown; | ||
12 | require 5.006_000; | ||
13 | use strict; | ||
14 | use warnings; | ||
15 | |||
16 | use Digest::MD5 qw(md5_hex); | ||
17 | use vars qw($VERSION); | ||
18 | $VERSION = '1.0.1'; | ||
19 | # Tue 14 Dec 2004 | ||
20 | |||
21 | ## Disabled; causes problems under Perl 5.6.1: | ||
22 | use utf8; | ||
23 | binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html | ||
24 | |||
25 | |||
26 | # | ||
27 | # Global default settings: | ||
28 | # | ||
29 | my $g_empty_element_suffix = " />"; # Change to ">" for HTML output | ||
30 | my $g_tab_width = 4; | ||
31 | |||
32 | |||
33 | # | ||
34 | # Globals: | ||
35 | # | ||
36 | |||
37 | # Regex to match balanced [brackets]. See Friedl's | ||
38 | # "Mastering Regular Expressions", 2nd Ed., pp. 328-331. | ||
39 | my $g_nested_brackets; | ||
40 | $g_nested_brackets = qr{ | ||
41 | (?> # Atomic matching | ||
42 | [^\[\]]+ # Anything other than brackets | ||
43 | | | ||
44 | \[ | ||
45 | (??{ $g_nested_brackets }) # Recursive set of nested brackets | ||
46 | \] | ||
47 | )* | ||
48 | }x; | ||
49 | |||
50 | |||
51 | # Table of hash values for escaped characters: | ||
52 | my %g_escape_table; | ||
53 | foreach my $char (split //, '\\`*_{}[]()>#+-.!') { | ||
54 | $g_escape_table{$char} = md5_hex($char); | ||
55 | } | ||
56 | |||
57 | |||
58 | # Global hashes, used by various utility routines | ||
59 | my %g_urls; | ||
60 | my %g_titles; | ||
61 | my %g_html_blocks; | ||
62 | |||
63 | # Used to track when we're inside an ordered or unordered list | ||
64 | # (see _ProcessListItems() for details): | ||
65 | my $g_list_level = 0; | ||
66 | |||
67 | |||
68 | #### Blosxom plug-in interface ########################################## | ||
69 | |||
70 | # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine | ||
71 | # which posts Markdown should process, using a "meta-markup: markdown" | ||
72 | # header. If it's set to 0 (the default), Markdown will process all | ||
73 | # entries. | ||
74 | my $g_blosxom_use_meta = 0; | ||
75 | |||
76 | sub start { 1; } | ||
77 | sub story { | ||
78 | my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; | ||
79 | |||
80 | if ( (! $g_blosxom_use_meta) or | ||
81 | (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) | ||
82 | ){ | ||
83 | $$body_ref = Markdown($$body_ref); | ||
84 | } | ||
85 | 1; | ||
86 | } | ||
87 | |||
88 | |||
89 | #### Movable Type plug-in interface ##################################### | ||
90 | eval {require MT}; # Test to see if we're running in MT. | ||
91 | unless ($@) { | ||
92 | require MT; | ||
93 | import MT; | ||
94 | require MT::Template::Context; | ||
95 | import MT::Template::Context; | ||
96 | |||
97 | eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. | ||
98 | unless ($@) { | ||
99 | require MT::Plugin; | ||
100 | import MT::Plugin; | ||
101 | my $plugin = new MT::Plugin({ | ||
102 | name => "Markdown", | ||
103 | description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", | ||
104 | doc_link => 'http://daringfireball.net/projects/markdown/' | ||
105 | }); | ||
106 | MT->add_plugin( $plugin ); | ||
107 | } | ||
108 | |||
109 | MT::Template::Context->add_container_tag(MarkdownOptions => sub { | ||
110 | my $ctx = shift; | ||
111 | my $args = shift; | ||
112 | my $builder = $ctx->stash('builder'); | ||
113 | my $tokens = $ctx->stash('tokens'); | ||
114 | |||
115 | if (defined ($args->{'output'}) ) { | ||
116 | $ctx->stash('markdown_output', lc $args->{'output'}); | ||
117 | } | ||
118 | |||
119 | defined (my $str = $builder->build($ctx, $tokens) ) | ||
120 | or return $ctx->error($builder->errstr); | ||
121 | $str; # return value | ||
122 | }); | ||
123 | |||
124 | MT->add_text_filter('markdown' => { | ||
125 | label => 'Markdown', | ||
126 | docs => 'http://daringfireball.net/projects/markdown/', | ||
127 | on_format => sub { | ||
128 | my $text = shift; | ||
129 | my $ctx = shift; | ||
130 | my $raw = 0; | ||
131 | if (defined $ctx) { | ||
132 | my $output = $ctx->stash('markdown_output'); | ||
133 | if (defined $output && $output =~ m/^html/i) { | ||
134 | $g_empty_element_suffix = ">"; | ||
135 | $ctx->stash('markdown_output', ''); | ||
136 | } | ||
137 | elsif (defined $output && $output eq 'raw') { | ||
138 | $raw = 1; | ||
139 | $ctx->stash('markdown_output', ''); | ||
140 | } | ||
141 | else { | ||
142 | $raw = 0; | ||
143 | $g_empty_element_suffix = " />"; | ||
144 | } | ||
145 | } | ||
146 | $text = $raw ? $text : Markdown($text); | ||
147 | $text; | ||
148 | }, | ||
149 | }); | ||
150 | |||
151 | # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: | ||
152 | my $smartypants; | ||
153 | |||
154 | { | ||
155 | no warnings "once"; | ||
156 | $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; | ||
157 | } | ||
158 | |||
159 | if ($smartypants) { | ||
160 | MT->add_text_filter('markdown_with_smartypants' => { | ||
161 | label => 'Markdown With SmartyPants', | ||
162 | docs => 'http://daringfireball.net/projects/markdown/', | ||
163 | on_format => sub { | ||
164 | my $text = shift; | ||
165 | my $ctx = shift; | ||
166 | if (defined $ctx) { | ||
167 | my $output = $ctx->stash('markdown_output'); | ||
168 | if (defined $output && $output eq 'html') { | ||
169 | $g_empty_element_suffix = ">"; | ||
170 | } | ||
171 | else { | ||
172 | $g_empty_element_suffix = " />"; | ||
173 | } | ||
174 | } | ||
175 | $text = Markdown($text); | ||
176 | $text = $smartypants->($text, '1'); | ||
177 | }, | ||
178 | }); | ||
179 | } | ||
180 | } | ||
181 | else { | ||
182 | #### BBEdit/command-line text filter interface ########################## | ||
183 | # Needs to be hidden from MT (and Blosxom when running in static mode). | ||
184 | |||
185 | # We're only using $blosxom::version once; tell Perl not to warn us: | ||
186 | no warnings 'once'; | ||
187 | unless ( defined($blosxom::version) ) { | ||
188 | use warnings; | ||
189 | |||
190 | #### Check for command-line switches: ################# | ||
191 | my %cli_opts; | ||
192 | use Getopt::Long; | ||
193 | Getopt::Long::Configure('pass_through'); | ||
194 | GetOptions(\%cli_opts, | ||
195 | 'version', | ||
196 | 'shortversion', | ||
197 | 'html4tags', | ||
198 | ); | ||
199 | if ($cli_opts{'version'}) { # Version info | ||
200 | print "\nThis is Markdown, version $VERSION.\n"; | ||
201 | print "Copyright 2004 John Gruber\n"; | ||
202 | print "http://daringfireball.net/projects/markdown/\n\n"; | ||
203 | exit 0; | ||
204 | } | ||
205 | if ($cli_opts{'shortversion'}) { # Just the version number string. | ||
206 | print $VERSION; | ||
207 | exit 0; | ||
208 | } | ||
209 | if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML | ||
210 | $g_empty_element_suffix = ">"; | ||
211 | } | ||
212 | |||
213 | |||
214 | #### Process incoming text: ########################### | ||
215 | my $text; | ||
216 | { | ||
217 | local $/; # Slurp the whole file | ||
218 | $text = <>; | ||
219 | } | ||
220 | print <<'EOT'; | ||
221 | <style> | ||
222 | .markdown-body { | ||
223 | font-size: 14px; | ||
224 | line-height: 1.6; | ||
225 | overflow: hidden; | ||
226 | } | ||
227 | .markdown-body>*:first-child { | ||
228 | margin-top: 0 !important; | ||
229 | } | ||
230 | .markdown-body>*:last-child { | ||
231 | margin-bottom: 0 !important; | ||
232 | } | ||
233 | .markdown-body a.absent { | ||
234 | color: #c00; | ||
235 | } | ||
236 | .markdown-body a.anchor { | ||
237 | display: block; | ||
238 | padding-left: 30px; | ||
239 | margin-left: -30px; | ||
240 | cursor: pointer; | ||
241 | position: absolute; | ||
242 | top: 0; | ||
243 | left: 0; | ||
244 | bottom: 0; | ||
245 | } | ||
246 | .markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { | ||
247 | margin: 20px 0 10px; | ||
248 | padding: 0; | ||
249 | font-weight: bold; | ||
250 | -webkit-font-smoothing: antialiased; | ||
251 | cursor: text; | ||
252 | position: relative; | ||
253 | } | ||
254 | .markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { | ||
255 | display: none; | ||
256 | color: #000; | ||
257 | } | ||
258 | .markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { | ||
259 | text-decoration: none; | ||
260 | line-height: 1; | ||
261 | padding-left: 0; | ||
262 | margin-left: -22px; | ||
263 | top: 15%} | ||
264 | .markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { | ||
265 | display: inline-block; | ||
266 | } | ||
267 | .markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { | ||
268 | font-size: inherit; | ||
269 | } | ||
270 | .markdown-body h1 { | ||
271 | font-size: 28px; | ||
272 | color: #000; | ||
273 | } | ||
274 | .markdown-body h2 { | ||
275 | font-size: 24px; | ||
276 | border-bottom: 1px solid #ccc; | ||
277 | color: #000; | ||
278 | } | ||
279 | .markdown-body h3 { | ||
280 | font-size: 18px; | ||
281 | } | ||
282 | .markdown-body h4 { | ||
283 | font-size: 16px; | ||
284 | } | ||
285 | .markdown-body h5 { | ||
286 | font-size: 14px; | ||
287 | } | ||
288 | .markdown-body h6 { | ||
289 | color: #777; | ||
290 | font-size: 14px; | ||
291 | } | ||
292 | .markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { | ||
293 | margin: 15px 0; | ||
294 | } | ||
295 | .markdown-body hr { | ||
296 | background: transparent url("/dirty-shade.png") repeat-x 0 0; | ||
297 | border: 0 none; | ||
298 | color: #ccc; | ||
299 | height: 4px; | ||
300 | padding: 0; | ||
301 | } | ||
302 | .markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { | ||
303 | margin-top: 0; | ||
304 | padding-top: 0; | ||
305 | } | ||
306 | .markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { | ||
307 | margin-top: 0; | ||
308 | padding-top: 0; | ||
309 | } | ||
310 | .markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { | ||
311 | margin-top: 0; | ||
312 | } | ||
313 | .markdown-body li p.first { | ||
314 | display: inline-block; | ||
315 | } | ||
316 | .markdown-body ul, .markdown-body ol { | ||
317 | padding-left: 30px; | ||
318 | } | ||
319 | .markdown-body ul.no-list, .markdown-body ol.no-list { | ||
320 | list-style-type: none; | ||
321 | padding: 0; | ||
322 | } | ||
323 | .markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { | ||
324 | margin-top: 0px; | ||
325 | } | ||
326 | .markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { | ||
327 | margin-bottom: 0; | ||
328 | } | ||
329 | .markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { | ||
330 | margin-bottom: 0; | ||
331 | } | ||
332 | .markdown-body dl { | ||
333 | padding: 0; | ||
334 | } | ||
335 | .markdown-body dl dt { | ||
336 | font-size: 14px; | ||
337 | font-weight: bold; | ||
338 | font-style: italic; | ||
339 | padding: 0; | ||
340 | margin: 15px 0 5px; | ||
341 | } | ||
342 | .markdown-body dl dt:first-child { | ||
343 | padding: 0; | ||
344 | } | ||
345 | .markdown-body dl dt>:first-child { | ||
346 | margin-top: 0px; | ||
347 | } | ||
348 | .markdown-body dl dt>:last-child { | ||
349 | margin-bottom: 0px; | ||
350 | } | ||
351 | .markdown-body dl dd { | ||
352 | margin: 0 0 15px; | ||
353 | padding: 0 15px; | ||
354 | } | ||
355 | .markdown-body dl dd>:first-child { | ||
356 | margin-top: 0px; | ||
357 | } | ||
358 | .markdown-body dl dd>:last-child { | ||
359 | margin-bottom: 0px; | ||
360 | } | ||
361 | .markdown-body blockquote { | ||
362 | border-left: 4px solid #DDD; | ||
363 | padding: 0 15px; | ||
364 | color: #777; | ||
365 | } | ||
366 | .markdown-body blockquote>:first-child { | ||
367 | margin-top: 0px; | ||
368 | } | ||
369 | .markdown-body blockquote>:last-child { | ||
370 | margin-bottom: 0px; | ||
371 | } | ||
372 | .markdown-body table th { | ||
373 | font-weight: bold; | ||
374 | } | ||
375 | .markdown-body table th, .markdown-body table td { | ||
376 | border: 1px solid #ccc; | ||
377 | padding: 6px 13px; | ||
378 | } | ||
379 | .markdown-body table tr { | ||
380 | border-top: 1px solid #ccc; | ||
381 | background-color: #fff; | ||
382 | } | ||
383 | .markdown-body table tr:nth-child(2n) { | ||
384 | background-color: #f8f8f8; | ||
385 | } | ||
386 | .markdown-body img { | ||
387 | max-width: 100%; | ||
388 | -moz-box-sizing: border-box; | ||
389 | box-sizing: border-box; | ||
390 | } | ||
391 | .markdown-body span.frame { | ||
392 | display: block; | ||
393 | overflow: hidden; | ||
394 | } | ||
395 | .markdown-body span.frame>span { | ||
396 | border: 1px solid #ddd; | ||
397 | display: block; | ||
398 | float: left; | ||
399 | overflow: hidden; | ||
400 | margin: 13px 0 0; | ||
401 | padding: 7px; | ||
402 | width: auto; | ||
403 | } | ||
404 | .markdown-body span.frame span img { | ||
405 | display: block; | ||
406 | float: left; | ||
407 | } | ||
408 | .markdown-body span.frame span span { | ||
409 | clear: both; | ||
410 | color: #333; | ||
411 | display: block; | ||
412 | padding: 5px 0 0; | ||
413 | } | ||
414 | .markdown-body span.align-center { | ||
415 | display: block; | ||
416 | overflow: hidden; | ||
417 | clear: both; | ||
418 | } | ||
419 | .markdown-body span.align-center>span { | ||
420 | display: block; | ||
421 | overflow: hidden; | ||
422 | margin: 13px auto 0; | ||
423 | text-align: center; | ||
424 | } | ||
425 | .markdown-body span.align-center span img { | ||
426 | margin: 0 auto; | ||
427 | text-align: center; | ||
428 | } | ||
429 | .markdown-body span.align-right { | ||
430 | display: block; | ||
431 | overflow: hidden; | ||
432 | clear: both; | ||
433 | } | ||
434 | .markdown-body span.align-right>span { | ||
435 | display: block; | ||
436 | overflow: hidden; | ||
437 | margin: 13px 0 0; | ||
438 | text-align: right; | ||
439 | } | ||
440 | .markdown-body span.align-right span img { | ||
441 | margin: 0; | ||
442 | text-align: right; | ||
443 | } | ||
444 | .markdown-body span.float-left { | ||
445 | display: block; | ||
446 | margin-right: 13px; | ||
447 | overflow: hidden; | ||
448 | float: left; | ||
449 | } | ||
450 | .markdown-body span.float-left span { | ||
451 | margin: 13px 0 0; | ||
452 | } | ||
453 | .markdown-body span.float-right { | ||
454 | display: block; | ||
455 | margin-left: 13px; | ||
456 | overflow: hidden; | ||
457 | float: right; | ||
458 | } | ||
459 | .markdown-body span.float-right>span { | ||
460 | display: block; | ||
461 | overflow: hidden; | ||
462 | margin: 13px auto 0; | ||
463 | text-align: right; | ||
464 | } | ||
465 | .markdown-body code, .markdown-body tt { | ||
466 | margin: 0 2px; | ||
467 | padding: 0px 5px; | ||
468 | border: 1px solid #eaeaea; | ||
469 | background-color: #f8f8f8; | ||
470 | border-radius: 3px; | ||
471 | } | ||
472 | .markdown-body code { | ||
473 | white-space: nowrap; | ||
474 | } | ||
475 | .markdown-body pre>code { | ||
476 | margin: 0; | ||
477 | padding: 0; | ||
478 | white-space: pre; | ||
479 | border: none; | ||
480 | background: transparent; | ||
481 | } | ||
482 | .markdown-body .highlight pre, .markdown-body pre { | ||
483 | background-color: #f8f8f8; | ||
484 | border: 1px solid #ccc; | ||
485 | font-size: 13px; | ||
486 | line-height: 19px; | ||
487 | overflow: auto; | ||
488 | padding: 6px 10px; | ||
489 | border-radius: 3px; | ||
490 | } | ||
491 | .markdown-body pre code, .markdown-body pre tt { | ||
492 | margin: 0; | ||
493 | padding: 0; | ||
494 | background-color: transparent; | ||
495 | border: none; | ||
496 | } | ||
497 | </style> | ||
498 | EOT | ||
499 | print "<div class='markdown-body'>"; | ||
500 | print Markdown($text); | ||
501 | print "</div>"; | ||
502 | } | ||
503 | } | ||
504 | |||
505 | |||
506 | |||
507 | sub Markdown { | ||
508 | # | ||
509 | # Main function. The order in which other subs are called here is | ||
510 | # essential. Link and image substitutions need to happen before | ||
511 | # _EscapeSpecialChars(), so that any *'s or _'s in the <a> | ||
512 | # and <img> tags get encoded. | ||
513 | # | ||
514 | my $text = shift; | ||
515 | |||
516 | # Clear the global hashes. If we don't clear these, you get conflicts | ||
517 | # from other articles when generating a page which contains more than | ||
518 | # one article (e.g. an index page that shows the N most recent | ||
519 | # articles): | ||
520 | %g_urls = (); | ||
521 | %g_titles = (); | ||
522 | %g_html_blocks = (); | ||
523 | |||
524 | |||
525 | # Standardize line endings: | ||
526 | $text =~ s{\r\n}{\n}g; # DOS to Unix | ||
527 | $text =~ s{\r}{\n}g; # Mac to Unix | ||
528 | |||
529 | # Make sure $text ends with a couple of newlines: | ||
530 | $text .= "\n\n"; | ||
531 | |||
532 | # Convert all tabs to spaces. | ||
533 | $text = _Detab($text); | ||
534 | |||
535 | # Strip any lines consisting only of spaces and tabs. | ||
536 | # This makes subsequent regexen easier to write, because we can | ||
537 | # match consecutive blank lines with /\n+/ instead of something | ||
538 | # contorted like /[ \t]*\n+/ . | ||
539 | $text =~ s/^[ \t]+$//mg; | ||
540 | |||
541 | # Turn block-level HTML blocks into hash entries | ||
542 | $text = _HashHTMLBlocks($text); | ||
543 | |||
544 | # Strip link definitions, store in hashes. | ||
545 | $text = _StripLinkDefinitions($text); | ||
546 | |||
547 | $text = _RunBlockGamut($text); | ||
548 | |||
549 | $text = _UnescapeSpecialChars($text); | ||
550 | |||
551 | return $text . "\n"; | ||
552 | } | ||
553 | |||
554 | |||
555 | sub _StripLinkDefinitions { | ||
556 | # | ||
557 | # Strips link definitions from text, stores the URLs and titles in | ||
558 | # hash references. | ||
559 | # | ||
560 | my $text = shift; | ||
561 | my $less_than_tab = $g_tab_width - 1; | ||
562 | |||
563 | # Link defs are in the form: ^[id]: url "optional title" | ||
564 | while ($text =~ s{ | ||
565 | ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 | ||
566 | [ \t]* | ||
567 | \n? # maybe *one* newline | ||
568 | [ \t]* | ||
569 | <?(\S+?)>? # url = $2 | ||
570 | [ \t]* | ||
571 | \n? # maybe one newline | ||
572 | [ \t]* | ||
573 | (?: | ||
574 | (?<=\s) # lookbehind for whitespace | ||
575 | ["(] | ||
576 | (.+?) # title = $3 | ||
577 | [")] | ||
578 | [ \t]* | ||
579 | )? # title is optional | ||
580 | (?:\n+|\Z) | ||
581 | } | ||
582 | {}mx) { | ||
583 | $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive | ||
584 | if ($3) { | ||
585 | $g_titles{lc $1} = $3; | ||
586 | $g_titles{lc $1} =~ s/"/"/g; | ||
587 | } | ||
588 | } | ||
589 | |||
590 | return $text; | ||
591 | } | ||
592 | |||
593 | |||
594 | sub _HashHTMLBlocks { | ||
595 | my $text = shift; | ||
596 | my $less_than_tab = $g_tab_width - 1; | ||
597 | |||
598 | # Hashify HTML blocks: | ||
599 | # We only want to do this for block-level HTML tags, such as headers, | ||
600 | # lists, and tables. That's because we still want to wrap <p>s around | ||
601 | # "paragraphs" that are wrapped in non-block-level tags, such as anchors, | ||
602 | # phrase emphasis, and spans. The list of tags we're looking for is | ||
603 | # hard-coded: | ||
604 | my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; | ||
605 | my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; | ||
606 | |||
607 | # First, look for nested blocks, e.g.: | ||
608 | # <div> | ||
609 | # <div> | ||
610 | # tags for inner block must be indented. | ||
611 | # </div> | ||
612 | # </div> | ||
613 | # | ||
614 | # The outermost tags must start at the left margin for this to match, and | ||
615 | # the inner nested divs must be indented. | ||
616 | # We need to do this before the next, more liberal match, because the next | ||
617 | # match will start at the first `<div>` and stop at the first `</div>`. | ||
618 | $text =~ s{ | ||
619 | ( # save in $1 | ||
620 | ^ # start of line (with /m) | ||
621 | <($block_tags_a) # start tag = $2 | ||
622 | \b # word break | ||
623 | (.*\n)*? # any number of lines, minimally matching | ||
624 | </\2> # the matching end tag | ||
625 | [ \t]* # trailing spaces/tabs | ||
626 | (?=\n+|\Z) # followed by a newline or end of document | ||
627 | ) | ||
628 | }{ | ||
629 | my $key = md5_hex($1); | ||
630 | $g_html_blocks{$key} = $1; | ||
631 | "\n\n" . $key . "\n\n"; | ||
632 | }egmx; | ||
633 | |||
634 | |||
635 | # | ||
636 | # Now match more liberally, simply from `\n<tag>` to `</tag>\n` | ||
637 | # | ||
638 | $text =~ s{ | ||
639 | ( # save in $1 | ||
640 | ^ # start of line (with /m) | ||
641 | <($block_tags_b) # start tag = $2 | ||
642 | \b # word break | ||
643 | (.*\n)*? # any number of lines, minimally matching | ||
644 | .*</\2> # the matching end tag | ||
645 | [ \t]* # trailing spaces/tabs | ||
646 | (?=\n+|\Z) # followed by a newline or end of document | ||
647 | ) | ||
648 | }{ | ||
649 | my $key = md5_hex($1); | ||
650 | $g_html_blocks{$key} = $1; | ||
651 | "\n\n" . $key . "\n\n"; | ||
652 | }egmx; | ||
653 | # Special case just for <hr />. It was easier to make a special case than | ||
654 | # to make the other regex more complicated. | ||
655 | $text =~ s{ | ||
656 | (?: | ||
657 | (?<=\n\n) # Starting after a blank line | ||
658 | | # or | ||
659 | \A\n? # the beginning of the doc | ||
660 | ) | ||
661 | ( # save in $1 | ||
662 | [ ]{0,$less_than_tab} | ||
663 | <(hr) # start tag = $2 | ||
664 | \b # word break | ||
665 | ([^<>])*? # | ||
666 | /?> # the matching end tag | ||
667 | [ \t]* | ||
668 | (?=\n{2,}|\Z) # followed by a blank line or end of document | ||
669 | ) | ||
670 | }{ | ||
671 |