aboutsummaryrefslogtreecommitdiffstats
path: root/filters/html-converters
diff options
context:
space:
mode:
authorGravatar Jason A. Donenfeld <Jason@zx2c4.com>2013-05-28 04:39:43 (JST)
committerGravatar Jason A. Donenfeld <Jason@zx2c4.com>2013-05-28 04:54:16 (JST)
commit8149be213f1c8f52b0dbe6c213f6073af57fa954 (patch)
treee4d0315f53022bb7335f782ad394d8e7602f1b52 /filters/html-converters
parentdcbc0438b2543a733858d62170f3110a89edbed6 (diff)
downloadcgit-8149be213f1c8f52b0dbe6c213f6073af57fa954.zip
cgit-8149be213f1c8f52b0dbe6c213f6073af57fa954.tar.gz
filters: import more modern scripts
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'filters/html-converters')
-rwxr-xr-xfilters/html-converters/man2html5
-rwxr-xr-xfilters/html-converters/md2html2
-rwxr-xr-xfilters/html-converters/resources/markdown.pl1731
-rw-r--r--filters/html-converters/resources/rst-template.txt4
-rwxr-xr-xfilters/html-converters/rst2html2
-rwxr-xr-xfilters/html-converters/txt2html4
6 files changed, 1748 insertions, 0 deletions
diff --git a/filters/html-converters/man2html b/filters/html-converters/man2html
new file mode 100755
index 0000000..1b28437
--- /dev/null
+++ b/filters/html-converters/man2html
@@ -0,0 +1,5 @@
1#!/bin/sh
2echo "<div style=\"font-family: monospace\">"
3groff -mandoc -T html -P -r -P -l | egrep -v '(<html>|<head>|<meta|<title>|</title>|</head>|<body>|</body>|</html>|<!DOCTYPE|"http://www.w3.org)'
4echo "</div>"
5
diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html
new file mode 100755
index 0000000..5cab749
--- /dev/null
+++ b/filters/html-converters/md2html
@@ -0,0 +1,2 @@
1#!/bin/sh
2exec "$(dirname "$0")/resources/markdown.pl"
diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl
new file mode 100755
index 0000000..abec173
--- /dev/null
+++ b/filters/html-converters/resources/markdown.pl
@@ -0,0 +1,1731 @@
1#!/usr/bin/perl
2
3#
4# Markdown -- A text-to-HTML conversion tool for web writers
5#
6# Copyright (c) 2004 John Gruber
7# <http://daringfireball.net/projects/markdown/>
8#
9
10
11package Markdown;
12require 5.006_000;
13use strict;
14use warnings;
15
16use Digest::MD5 qw(md5_hex);
17use vars qw($VERSION);
18$VERSION = '1.0.1';
19# Tue 14 Dec 2004
20
21## Disabled; causes problems under Perl 5.6.1:
22use utf8;
23binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
24
25
26#
27# Global default settings:
28#
29my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
30my $g_tab_width = 4;
31
32
33#
34# Globals:
35#
36
37# Regex to match balanced [brackets]. See Friedl's
38# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
39my $g_nested_brackets;
40$g_nested_brackets = qr{
41 (?> # Atomic matching
42 [^\[\]]+ # Anything other than brackets
43 |
44 \[
45 (??{ $g_nested_brackets }) # Recursive set of nested brackets
46 \]
47 )*
48}x;
49
50
51# Table of hash values for escaped characters:
52my %g_escape_table;
53foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
54 $g_escape_table{$char} = md5_hex($char);
55}
56
57
58# Global hashes, used by various utility routines
59my %g_urls;
60my %g_titles;
61my %g_html_blocks;
62
63# Used to track when we're inside an ordered or unordered list
64# (see _ProcessListItems() for details):
65my $g_list_level = 0;
66
67
68#### Blosxom plug-in interface ##########################################
69
70# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
71# which posts Markdown should process, using a "meta-markup: markdown"
72# header. If it's set to 0 (the default), Markdown will process all
73# entries.
74my $g_blosxom_use_meta = 0;
75
76sub start { 1; }
77sub story {
78 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
79
80 if ( (! $g_blosxom_use_meta) or
81 (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
82 ){
83 $$body_ref = Markdown($$body_ref);
84 }
85 1;
86}
87
88
89#### Movable Type plug-in interface #####################################
90eval {require MT}; # Test to see if we're running in MT.
91unless ($@) {
92 require MT;
93 import MT;
94 require MT::Template::Context;
95 import MT::Template::Context;
96
97 eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
98 unless ($@) {
99 require MT::Plugin;
100 import MT::Plugin;
101 my $plugin = new MT::Plugin({
102 name => "Markdown",
103 description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
104 doc_link => 'http://daringfireball.net/projects/markdown/'
105 });
106 MT->add_plugin( $plugin );
107 }
108
109 MT::Template::Context->add_container_tag(MarkdownOptions => sub {
110 my $ctx = shift;
111 my $args = shift;
112 my $builder = $ctx->stash('builder');
113 my $tokens = $ctx->stash('tokens');
114
115 if (defined ($args->{'output'}) ) {
116 $ctx->stash('markdown_output', lc $args->{'output'});
117 }
118
119 defined (my $str = $builder->build($ctx, $tokens) )
120 or return $ctx->error($builder->errstr);
121 $str; # return value
122 });
123
124 MT->add_text_filter('markdown' => {
125 label => 'Markdown',
126 docs => 'http://daringfireball.net/projects/markdown/',
127 on_format => sub {
128 my $text = shift;
129 my $ctx = shift;
130 my $raw = 0;
131 if (defined $ctx) {
132 my $output = $ctx->stash('markdown_output');
133 if (defined $output && $output =~ m/^html/i) {
134 $g_empty_element_suffix = ">";
135 $ctx->stash('markdown_output', '');
136 }
137 elsif (defined $output && $output eq 'raw') {
138 $raw = 1;
139 $ctx->stash('markdown_output', '');
140 }
141 else {
142 $raw = 0;
143 $g_empty_element_suffix = " />";
144 }
145 }
146 $text = $raw ? $text : Markdown($text);
147 $text;
148 },
149 });
150
151 # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
152 my $smartypants;
153
154 {
155 no warnings "once";
156 $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
157 }
158
159 if ($smartypants) {
160 MT->add_text_filter('markdown_with_smartypants' => {
161 label => 'Markdown With SmartyPants',
162 docs => 'http://daringfireball.net/projects/markdown/',
163 on_format => sub {
164 my $text = shift;
165 my $ctx = shift;
166 if (defined $ctx) {
167 my $output = $ctx->stash('markdown_output');
168 if (defined $output && $output eq 'html') {
169 $g_empty_element_suffix = ">";
170 }
171 else {
172 $g_empty_element_suffix = " />";
173 }
174 }
175 $text = Markdown($text);
176 $text = $smartypants->($text, '1');
177 },
178 });
179 }
180}
181else {
182#### BBEdit/command-line text filter interface ##########################
183# Needs to be hidden from MT (and Blosxom when running in static mode).
184
185 # We're only using $blosxom::version once; tell Perl not to warn us:
186 no warnings 'once';
187 unless ( defined($blosxom::version) ) {
188 use warnings;
189
190 #### Check for command-line switches: #################
191 my %cli_opts;
192 use Getopt::Long;
193 Getopt::Long::Configure('pass_through');
194 GetOptions(\%cli_opts,
195 'version',
196 'shortversion',
197 'html4tags',
198 );
199 if ($cli_opts{'version'}) { # Version info
200 print "\nThis is Markdown, version $VERSION.\n";
201 print "Copyright 2004 John Gruber\n";
202 print "http://daringfireball.net/projects/markdown/\n\n";
203 exit 0;
204 }
205 if ($cli_opts{'shortversion'}) { # Just the version number string.
206 print $VERSION;
207 exit 0;
208 }
209 if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
210 $g_empty_element_suffix = ">";
211 }
212
213
214 #### Process incoming text: ###########################
215 my $text;
216 {
217 local $/; # Slurp the whole file
218 $text = <>;
219 }
220 print <<'EOT';
221<style>
222.markdown-body {
223 font-size: 14px;
224 line-height: 1.6;
225 overflow: hidden;
226}
227.markdown-body>*:first-child {
228 margin-top: 0 !important;
229}
230.markdown-body>*:last-child {
231 margin-bottom: 0 !important;
232}
233.markdown-body a.absent {
234 color: #c00;
235}
236.markdown-body a.anchor {
237 display: block;
238 padding-left: 30px;
239 margin-left: -30px;
240 cursor: pointer;
241 position: absolute;
242 top: 0;
243 left: 0;
244 bottom: 0;
245}
246.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 {
247 margin: 20px 0 10px;
248 padding: 0;
249 font-weight: bold;
250 -webkit-font-smoothing: antialiased;
251 cursor: text;
252 position: relative;
253}
254.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link {
255 display: none;
256 color: #000;
257}
258.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor {
259 text-decoration: none;
260 line-height: 1;
261 padding-left: 0;
262 margin-left: -22px;
263 top: 15%}
264.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link {
265 display: inline-block;
266}
267.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code {
268 font-size: inherit;
269}
270.markdown-body h1 {
271 font-size: 28px;
272 color: #000;
273}
274.markdown-body h2 {
275 font-size: 24px;
276 border-bottom: 1px solid #ccc;
277 color: #000;
278}
279.markdown-body h3 {
280 font-size: 18px;
281}
282.markdown-body h4 {
283 font-size: 16px;
284}
285.markdown-body h5 {
286 font-size: 14px;
287}
288.markdown-body h6 {
289 color: #777;
290 font-size: 14px;
291}
292.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre {
293 margin: 15px 0;
294}
295.markdown-body hr {
296 background: transparent url("/dirty-shade.png") repeat-x 0 0;
297 border: 0 none;
298 color: #ccc;
299 height: 4px;
300 padding: 0;
301}
302.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child {
303 margin-top: 0;
304 padding-top: 0;
305}
306.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 {
307 margin-top: 0;
308 padding-top: 0;
309}
310.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p {
311 margin-top: 0;
312}
313.markdown-body li p.first {
314 display: inline-block;
315}
316.markdown-body ul, .markdown-body ol {
317 padding-left: 30px;
318}
319.markdown-body ul.no-list, .markdown-body ol.no-list {
320 list-style-type: none;
321 padding: 0;
322}
323.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type {
324 margin-top: 0px;
325}
326.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type {
327 margin-bottom: 0;
328}
329.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul {
330 margin-bottom: 0;
331}
332.markdown-body dl {
333 padding: 0;
334}
335.markdown-body dl dt {
336 font-size: 14px;
337 font-weight: bold;
338 font-style: italic;
339 padding: 0;
340 margin: 15px 0 5px;
341}
342.markdown-body dl dt:first-child {
343 padding: 0;
344}
345.markdown-body dl dt>:first-child {
346 margin-top: 0px;
347}
348.markdown-body dl dt>:last-child {
349 margin-bottom: 0px;
350}
351.markdown-body dl dd {
352 margin: 0 0 15px;
353 padding: 0 15px;
354}
355.markdown-body dl dd>:first-child {
356 margin-top: 0px;
357}
358.markdown-body dl dd>:last-child {
359 margin-bottom: 0px;
360}
361.markdown-body blockquote {
362 border-left: 4px solid #DDD;
363 padding: 0 15px;
364 color: #777;
365}
366.markdown-body blockquote>:first-child {
367 margin-top: 0px;
368}
369.markdown-body blockquote>:last-child {
370 margin-bottom: 0px;
371}
372.markdown-body table th {
373 font-weight: bold;
374}
375.markdown-body table th, .markdown-body table td {
376 border: 1px solid #ccc;
377 padding: 6px 13px;
378}
379.markdown-body table tr {
380 border-top: 1px solid #ccc;
381 background-color: #fff;
382}
383.markdown-body table tr:nth-child(2n) {
384 background-color: #f8f8f8;
385}
386.markdown-body img {
387 max-width: 100%;
388 -moz-box-sizing: border-box;
389 box-sizing: border-box;
390}
391.markdown-body span.frame {
392 display: block;
393 overflow: hidden;
394}
395.markdown-body span.frame>span {
396 border: 1px solid #ddd;
397 display: block;
398 float: left;
399 overflow: hidden;
400 margin: 13px 0 0;
401 padding: 7px;
402 width: auto;
403}
404.markdown-body span.frame span img {
405 display: block;
406 float: left;
407}
408.markdown-body span.frame span span {
409 clear: both;
410 color: #333;
411 display: block;
412 padding: 5px 0 0;
413}
414.markdown-body span.align-center {
415 display: block;
416 overflow: hidden;
417 clear: both;
418}
419.markdown-body span.align-center>span {
420 display: block;
421 overflow: hidden;
422 margin: 13px auto 0;
423 text-align: center;
424}
425.markdown-body span.align-center span img {
426 margin: 0 auto;
427 text-align: center;
428}
429.markdown-body span.align-right {
430 display: block;
431 overflow: hidden;
432 clear: both;
433}
434.markdown-body span.align-right>span {
435 display: block;
436 overflow: hidden;
437 margin: 13px 0 0;
438 text-align: right;
439}
440.markdown-body span.align-right span img {
441 margin: 0;
442 text-align: right;
443}
444.markdown-body span.float-left {
445 display: block;
446 margin-right: 13px;
447 overflow: hidden;
448 float: left;
449}
450.markdown-body span.float-left span {
451 margin: 13px 0 0;
452}
453.markdown-body span.float-right {
454 display: block;
455 margin-left: 13px;
456 overflow: hidden;
457 float: right;
458}
459.markdown-body span.float-right>span {
460 display: block;
461 overflow: hidden;
462 margin: 13px auto 0;
463 text-align: right;
464}
465.markdown-body code, .markdown-body tt {
466 margin: 0 2px;
467 padding: 0px 5px;
468 border: 1px solid #eaeaea;
469 background-color: #f8f8f8;
470 border-radius: 3px;
471}
472.markdown-body code {
473 white-space: nowrap;
474}
475.markdown-body pre>code {
476 margin: 0;
477 padding: 0;
478 white-space: pre;
479 border: none;
480 background: transparent;
481}
482.markdown-body .highlight pre, .markdown-body pre {
483 background-color: #f8f8f8;
484 border: 1px solid #ccc;
485 font-size: 13px;
486 line-height: 19px;
487 overflow: auto;
488 padding: 6px 10px;
489 border-radius: 3px;
490}
491.markdown-body pre code, .markdown-body pre tt {
492 margin: 0;
493 padding: 0;
494 background-color: transparent;
495 border: none;
496}
497</style>
498EOT
499 print "<div class='markdown-body'>";
500 print Markdown($text);
501 print "</div>";
502 }
503}
504
505
506
507sub Markdown {
508#
509# Main function. The order in which other subs are called here is
510# essential. Link and image substitutions need to happen before
511# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
512# and <img> tags get encoded.
513#
514 my $text = shift;
515
516 # Clear the global hashes. If we don't clear these, you get conflicts
517 # from other articles when generating a page which contains more than
518 # one article (e.g. an index page that shows the N most recent
519 # articles):
520 %g_urls = ();
521 %g_titles = ();
522 %g_html_blocks = ();
523
524
525 # Standardize line endings:
526 $text =~ s{\r\n}{\n}g; # DOS to Unix
527 $text =~ s{\r}{\n}g; # Mac to Unix
528
529 # Make sure $text ends with a couple of newlines:
530 $text .= "\n\n";
531
532 # Convert all tabs to spaces.
533 $text = _Detab($text);
534
535 # Strip any lines consisting only of spaces and tabs.
536 # This makes subsequent regexen easier to write, because we can
537 # match consecutive blank lines with /\n+/ instead of something
538 # contorted like /[ \t]*\n+/ .
539 $text =~ s/^[ \t]+$//mg;
540
541 # Turn block-level HTML blocks into hash entries
542 $text = _HashHTMLBlocks($text);
543
544 # Strip link definitions, store in hashes.
545 $text = _StripLinkDefinitions($text);
546
547 $text = _RunBlockGamut($text);
548
549 $text = _UnescapeSpecialChars($text);
550
551 return $text . "\n";
552}
553
554
555sub _StripLinkDefinitions {
556#
557# Strips link definitions from text, stores the URLs and titles in
558# hash references.
559#
560 my $text = shift;
561 my $less_than_tab = $g_tab_width - 1;
562
563 # Link defs are in the form: ^[id]: url "optional title"
564 while ($text =~ s{
565 ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
566 [ \t]*
567 \n? # maybe *one* newline
568 [ \t]*
569 <?(\S+?)>? # url = $2
570 [ \t]*
571 \n? # maybe one newline
572 [ \t]*
573 (?:
574 (?<=\s) # lookbehind for whitespace
575 ["(]
576 (.+?) # title = $3
577 [")]
578 [ \t]*
579 )? # title is optional
580 (?:\n+|\Z)
581 }
582 {}mx) {
583 $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
584 if ($3) {
585 $g_titles{lc $1} = $3;
586 $g_titles{lc $1} =~ s/"/&quot;/g;
587 }
588 }
589
590 return $text;
591}
592
593
594sub _HashHTMLBlocks {
595 my $text = shift;
596 my $less_than_tab = $g_tab_width - 1;
597
598 # Hashify HTML blocks:
599 # We only want to do this for block-level HTML tags, such as headers,
600 # lists, and tables. That's because we still want to wrap <p>s around
601 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
602 # phrase emphasis, and spans. The list of tags we're looking for is
603 # hard-coded:
604 my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
605 my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
606
607 # First, look for nested blocks, e.g.:
608 # <div>
609 # <div>
610 # tags for inner block must be indented.
611 # </div>
612 # </div>
613 #
614 # The outermost tags must start at the left margin for this to match, and
615 # the inner nested divs must be indented.
616 # We need to do this before the next, more liberal match, because the next
617 # match will start at the first `<div>` and stop at the first `</div>`.
618 $text =~ s{
619 ( # save in $1
620 ^ # start of line (with /m)
621 <($block_tags_a) # start tag = $2
622 \b # word break
623 (.*\n)*? # any number of lines, minimally matching
624 </\2> # the matching end tag
625 [ \t]* # trailing spaces/tabs
626 (?=\n+|\Z) # followed by a newline or end of document
627 )
628 }{
629 my $key = md5_hex($1);
630 $g_html_blocks{$key} = $1;
631 "\n\n" . $key . "\n\n";
632 }egmx;
633
634
635 #
636 # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
637 #
638 $text =~ s{
639 ( # save in $1
640 ^ # start of line (with /m)
641 <($block_tags_b) # start tag = $2
642 \b # word break
643 (.*\n)*? # any number of lines, minimally matching
644 .*</\2> # the matching end tag
645 [ \t]* # trailing spaces/tabs
646 (?=\n+|\Z) # followed by a newline or end of document
647 )
648 }{
649 my $key = md5_hex($1);
650 $g_html_blocks{$key} = $1;
651 "\n\n" . $key . "\n\n";
652 }egmx;
653 # Special case just for <hr />. It was easier to make a special case than
654 # to make the other regex more complicated.
655 $text =~ s{
656 (?:
657 (?<=\n\n) # Starting after a blank line
658 | # or
659 \A\n? # the beginning of the doc
660 )
661 ( # save in $1
662 [ ]{0,$less_than_tab}
663 <(hr) # start tag = $2
664 \b # word break
665 ([^<>])*? #
666 /?> # the matching end tag
667 [ \t]*
668 (?=\n{2,}|\Z) # followed by a blank line or end of document
669 )
670 }{
671 my $key = md5_hex($1);
672 $g_html_blocks{$key} = $1;
673 "\n\n" . $key . "\n\n";
674 }egx;
675
676 # Special case for standalone HTML comments:
677 $text =~ s{
678 (?:
679 (?<=\n\n) # Starting after a blank line
680 | # or
681 \A\n? # the beginning of the doc
682 )
683 ( # save in $1
684 [ ]{0,$less_than_tab}
685 (?s:
686 <!
687 (--.*?--\s*)+
688 >
689 )
690 [ \t]*
691 (?=\n{2,}|\Z) # followed by a blank line or end of document
692 )
693 }{
694 my $key = md5_hex($1);
695 $g_html_blocks{$key} = $1;
696 "\n\n" . $key . "\n\n";
697 }egx;
698
699
700 return $text;
701}
702
703
704sub _RunBlockGamut {
705#
706# These are all the transformations that form block-level
707# tags like paragraphs, headers, and list items.
708#
709 my $text = shift;
710
711 $text = _DoHeaders($text);
712
713