1 // Released under MIT license
2 // Copyright (c) 2009-2010 Dominic Baggott
3 // Copyright (c) 2009-2010 Ash Berlin
4 // Copyright (c) 2011 Christoph Dorn <christoph@christophdorn.com> (http://www.christophdorn.com)
5 // Date: 2013-09-15T16:12Z
12 var MarkdownHelpers
= {};
14 // For Spidermonkey based engines
15 function mk_block_toSource() {
16 return "Markdown.mk_block( " +
17 uneval(this.toString()) +
19 uneval(this.trailing
) +
21 uneval(this.lineNumber
) +
26 function mk_block_inspect() {
27 var util
= require("util");
28 return "Markdown.mk_block( " +
29 util
.inspect(this.toString()) +
31 util
.inspect(this.trailing
) +
33 util
.inspect(this.lineNumber
) +
38 MarkdownHelpers
.mk_block = function(block
, trail
, line
) {
39 // Be helpful for default case in tests.
40 if ( arguments
.length
=== 1 )
43 // We actually need a String object, not a string primitive
45 var s
= new String(block
);
47 // To make it clear its not just a string
48 s
.inspect
= mk_block_inspect
;
49 s
.toSource
= mk_block_toSource
;
51 if ( line
!== undefined )
58 var isArray
= MarkdownHelpers
.isArray
= Array
.isArray
|| function(obj
) {
59 return Object
.prototype.toString
.call(obj
) === "[object Array]";
62 // Don't mess with Array.prototype. Its not friendly
63 if ( Array
.prototype.forEach
) {
64 MarkdownHelpers
.forEach
= function forEach( arr
, cb
, thisp
) {
65 return arr
.forEach( cb
, thisp
);
69 MarkdownHelpers
.forEach
= function forEach(arr
, cb
, thisp
) {
70 for (var i
= 0; i
< arr
.length
; i
++)
71 cb
.call(thisp
|| arr
, arr
[i
], i
, arr
);
75 MarkdownHelpers
.isEmpty
= function isEmpty( obj
) {
76 for ( var key
in obj
) {
77 if ( hasOwnProperty
.call( obj
, key
) )
83 MarkdownHelpers
.extract_attr
= function extract_attr( jsonml
) {
84 return isArray(jsonml
)
86 && typeof jsonml
[ 1 ] === "object"
87 && !( isArray(jsonml
[ 1 ]) )
98 * Markdown processing in Javascript done right. We have very particular views
99 * on what constitutes 'right' which include:
101 * - produces well-formed HTML (this means that em and strong nesting is
104 * - has an intermediate representation to allow processing of parsed data (We
105 * in fact have two, both as [JsonML]: a markdown tree and an HTML tree).
107 * - is easily extensible to add new dialects without having to rewrite the
108 * entire parsing mechanics
110 * - has a good test suite
112 * This implementation fulfills all of these (except that the test suite could
113 * do with expanding to automatically run all the fixtures from other Markdown
116 * ##### Intermediate Representation
118 * *TODO* Talk about this :) Its JsonML, but document the node names we use.
120 * [JsonML]: http://jsonml.org/ "JSON Markup Language"
122 var Markdown = function(dialect
) {
123 switch (typeof dialect
) {
125 this.dialect
= Markdown
.dialects
.Gruber
;
128 this.dialect
= dialect
;
131 if ( dialect
in Markdown
.dialects
)
132 this.dialect
= Markdown
.dialects
[dialect
];
134 throw new Error("Unknown Markdown dialect '" + String(dialect
) + "'");
138 this.strong_state
= [];
139 this.debug_indent
= "";
145 * Namespace of built-in dialects.
147 Markdown
.dialects
= {};
152 // Imported functions
153 var mk_block
= Markdown
.mk_block
= MarkdownHelpers
.mk_block
,
154 isArray
= MarkdownHelpers
.isArray
;
157 * parse( markdown, [dialect] ) -> JsonML
158 * - markdown (String): markdown string to parse
159 * - dialect (String | Dialect): the dialect to use, defaults to gruber
161 * Parse `markdown` and return a markdown document as a Markdown.JsonML tree.
163 Markdown
.parse = function( source
, dialect
) {
164 // dialect will default if undefined
165 var md
= new Markdown( dialect
);
166 return md
.toTree( source
);
169 function count_lines( str
) {
172 while ( ( i
= str
.indexOf("\n", i
+ 1) ) !== -1 )
177 // Internal - split source into rough blocks
178 Markdown
.prototype.split_blocks
= function splitBlocks( input
) {
179 input
= input
.replace(/(\r\n|\n|\r)/g, "\n");
180 // [\s\S] matches _anything_ (newline or space)
181 // [^] is equivalent but doesn't work in IEs.
182 var re
= /([\s\S]+?)($|\n#|\n(?:\s*\n|$)+)/g,
188 if ( ( m
= /^(\s*\n)/.exec(input
) ) !== null ) {
189 // skip (but count) leading blank lines
190 line_no
+= count_lines( m
[0] );
191 re
.lastIndex
= m
[0].length
;
194 while ( ( m
= re
.exec(input
) ) !== null ) {
195 if (m
[2] === "\n#") {
199 blocks
.push( mk_block( m
[1], m
[2], line_no
) );
200 line_no
+= count_lines( m
[0] );
207 * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ]
208 * - block (String): the block to process
209 * - next (Array): the following blocks
211 * Process `block` and return an array of JsonML nodes representing `block`.
213 * It does this by asking each block level function in the dialect to process
214 * the block until one can. Succesful handling is indicated by returning an
215 * array (with zero or more JsonML nodes), failure by a false value.
217 * Blocks handlers are responsible for calling [[Markdown#processInline]]
218 * themselves as appropriate.
220 * If the blocks were split incorrectly or adjacent blocks need collapsing you
221 * can adjust `next` in place using shift/splice etc.
223 * If any of this default behaviour is not right for the dialect, you can
224 * define a `__call__` method on the dialect that will get invoked to handle
225 * the block processing.
227 Markdown
.prototype.processBlock
= function processBlock( block
, next
) {
228 var cbs
= this.dialect
.block
,
231 if ( "__call__" in cbs
)
232 return cbs
.__call__
.call(this, block
, next
);
234 for ( var i
= 0; i
< ord
.length
; i
++ ) {
235 //D:this.debug( "Testing", ord[i] );
236 var res
= cbs
[ ord
[i
] ].call( this, block
, next
);
238 //D:this.debug(" matched");
239 if ( !isArray(res
) || ( res
.length
> 0 && !( isArray(res
[0]) ) ) )
240 this.debug(ord
[i
], "didn't return a proper array");
241 //D:this.debug( "" );
246 // Uhoh! no match! Should we throw an error?
250 Markdown
.prototype.processInline
= function processInline( block
) {
251 return this.dialect
.inline
.__call__
.call( this, String( block
) );
255 * Markdown#toTree( source ) -> JsonML
256 * - source (String): markdown source to parse
258 * Parse `source` into a JsonML tree representing the markdown document.
260 // custom_tree means set this.tree to `custom_tree` and restore old value on return
261 Markdown
.prototype.toTree
= function toTree( source
, custom_root
) {
262 var blocks
= source
instanceof Array
? source
: this.split_blocks( source
);
264 // Make tree a member variable so its easier to mess with in extensions
265 var old_tree
= this.tree
;
267 this.tree
= custom_root
|| this.tree
|| [ "markdown" ];
270 while ( blocks
.length
) {
271 var b
= this.processBlock( blocks
.shift(), blocks
);
273 // Reference blocks and the like won't return any content
275 continue blocks_loop
;
277 this.tree
.push
.apply( this.tree
, b
);
283 this.tree
= old_tree
;
288 Markdown
.prototype.debug = function () {
289 var args
= Array
.prototype.slice
.call( arguments
);
290 args
.unshift(this.debug_indent
);
291 if ( typeof print
!== "undefined" )
292 print
.apply( print
, args
);
293 if ( typeof console
!== "undefined" && typeof console
.log
!== "undefined" )
294 console
.log
.apply( null, args
);
297 Markdown
.prototype.loop_re_over_block = function( re
, block
, cb
) {
298 // Dont use /g regexps with this
302 while ( b
.length
&& (m
= re
.exec(b
) ) !== null ) {
303 b
= b
.substr( m
[0].length
);
309 // Build default order from insertion order.
310 Markdown
.buildBlockOrder = function(d
) {
313 if ( i
=== "__order__" || i
=== "__call__" )
320 // Build patterns for inline matcher
321 Markdown
.buildInlinePatterns = function(d
) {
325 // __foo__ is reserved and not a pattern
326 if ( i
.match( /^__.*__$/) )
328 var l
= i
.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" )
329 .replace( /\n/, "\\n" );
330 patterns
.push( i
.length
=== 1 ? l
: "(?:" + l
+ ")" );
333 patterns
= patterns
.join("|");
334 d
.__patterns__
= patterns
;
335 //print("patterns:", uneval( patterns ) );
338 d
.__call__ = function(text
, pattern
) {
339 if ( pattern
!== undefined )
340 return fn
.call(this, text
, pattern
);
342 return fn
.call(this, text
, patterns
);
349 var extract_attr
= MarkdownHelpers
.extract_attr
;
352 * renderJsonML( jsonml[, options] ) -> String
353 * - jsonml (Array): JsonML array to render to XML
354 * - options (Object): options
356 * Converts the given JsonML into well-formed XML.
358 * The options currently understood are:
360 * - root (Boolean): wether or not the root node should be included in the
361 * output, or just its children. The default `false` is to not include the
364 Markdown
.renderJsonML = function( jsonml
, options
) {
365 options
= options
|| {};
366 // include the root element in the rendered output?
367 options
.root
= options
.root
|| false;
371 if ( options
.root
) {
372 content
.push( render_tree( jsonml
) );
375 jsonml
.shift(); // get rid of the tag
376 if ( jsonml
.length
&& typeof jsonml
[ 0 ] === "object" && !( jsonml
[ 0 ] instanceof Array
) )
377 jsonml
.shift(); // get rid of the attributes
379 while ( jsonml
.length
)
380 content
.push( render_tree( jsonml
.shift() ) );
383 return content
.join( "\n\n" );
388 * toHTMLTree( markdown, [dialect] ) -> JsonML
389 * toHTMLTree( md_tree ) -> JsonML
390 * - markdown (String): markdown string to parse
391 * - dialect (String | Dialect): the dialect to use, defaults to gruber
392 * - md_tree (Markdown.JsonML): parsed markdown tree
394 * Turn markdown into HTML, represented as a JsonML tree. If a string is given
395 * to this function, it is first parsed into a markdown tree by calling
398 Markdown
.toHTMLTree
= function toHTMLTree( input
, dialect
, options
) {
400 // convert string input to an MD tree
401 if ( typeof input
=== "string" )
402 input
= this.parse( input
, dialect
);
404 // Now convert the MD tree to an HTML tree
406 // remove references from the tree
407 var attrs
= extract_attr( input
),
410 if ( attrs
&& attrs
.references
)
411 refs
= attrs
.references
;
413 var html
= convert_tree_to_html( input
, refs
, options
);
414 merge_text_nodes( html
);
419 * toHTML( markdown, [dialect] ) -> String
420 * toHTML( md_tree ) -> String
421 * - markdown (String): markdown string to parse
422 * - md_tree (Markdown.JsonML): parsed markdown tree
424 * Take markdown (either as a string or as a JsonML tree) and run it through
425 * [[toHTMLTree]] then turn it into a well-formated HTML fragment.
427 Markdown
.toHTML
= function toHTML( source
, dialect
, options
) {
428 var input
= this.toHTMLTree( source
, dialect
, options
);
430 return this.renderJsonML( input
);
434 function escapeHTML( text
) {
435 return text
.replace( /&/g
, "&" )
436 .replace( /</g
, "<" )
437 .replace( />/g
, ">" )
438 .replace( /"/g, ""
;" )
439 .replace( /'/g, "'" );
442 function render_tree( jsonml ) {
444 if ( typeof jsonml === "string
" )
445 return escapeHTML( jsonml );
447 var tag = jsonml.shift(),
451 if ( jsonml.length && typeof jsonml[ 0 ] === "object
" && !( jsonml[ 0 ] instanceof Array ) )
452 attributes = jsonml.shift();
454 while ( jsonml.length )
455 content.push( render_tree( jsonml.shift() ) );
458 for ( var a in attributes )
459 tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"';
461 // be careful about adding whitespace here for inline elements
462 if ( tag === "img
" || tag === "br
" || tag === "hr
" )
463 return "<"+ tag + tag_attrs + "/>";
465 return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">";
468 function convert_tree_to_html( tree, references, options ) {
470 options = options || {};
473 var jsonml = tree.slice( 0 );
475 if ( typeof options.preprocessTreeNode === "function" )
476 jsonml = options.preprocessTreeNode(jsonml, references);
478 // Clone attributes if they exist
479 var attrs = extract_attr( jsonml );
483 jsonml[ 1 ][ i ] = attrs[ i ];
489 if ( typeof jsonml === "string
" )
493 switch ( jsonml[ 0 ] ) {
495 jsonml[ 0 ] = "h
" + jsonml[ 1 ].level;
496 delete jsonml[ 1 ].level;
511 jsonml[ 0 ] = "html
";
513 delete attrs.references;
518 var code = [ "code
" ];
519 code.push.apply( code, jsonml.splice( i, jsonml.length - i ) );
523 jsonml[ 0 ] = "code
";
526 jsonml[ 1 ].src = jsonml[ 1 ].href;
527 delete jsonml[ 1 ].href;
538 // grab this ref and clean up the attribute node
539 var ref = references[ attrs.ref ];
541 // if the reference exists, make the link
545 // add in the href and title, if present
546 attrs.href = ref.href;
548 attrs.title = ref.title;
550 // get rid of the unneeded original text
551 delete attrs.original;
553 // the reference doesn't exist, so revert to plain text
555 return attrs.original;
561 // grab this ref and clean up the attribute node
562 var ref = references[ attrs.ref ];
564 // if the reference exists, make the link
568 // add in the href and title, if present
569 attrs.src = ref.href;
571 attrs.title = ref.title;
573 // get rid of the unneeded original text
574 delete attrs.original;
576 // the reference doesn't exist, so revert to plain text
578 return attrs.original;
583 // convert all the children
586 // deal with the attribute node, if it exists
588 // if there are keys, skip over it
589 for ( var key in jsonml[ 1 ] ) {
593 // if there aren't, remove it
595 jsonml.splice( i, 1 );
598 for ( ; i < jsonml.length; ++i ) {
599 jsonml[ i ] = convert_tree_to_html( jsonml[ i ], references, options );
606 // merges adjacent text nodes into a single node
607 function merge_text_nodes( jsonml ) {
608 // skip the tag name and attribute hash
609 var i = extract_attr( jsonml ) ? 2 : 1;
611 while ( i < jsonml.length ) {
612 // if it's a string check the next item too
613 if ( typeof jsonml[ i ] === "string
" ) {
614 if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string
" ) {
615 // merge the second string into the first and remove it
616 jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ];
622 // if it's not a string recurse
624 merge_text_nodes( jsonml[ i ] );
632 var DialectHelpers = {};
633 DialectHelpers.inline_until_char = function( text, want ) {
638 if ( text.charAt( consumed ) === want ) {
639 // Found the character we were looking for
641 return [ consumed, nodes ];
644 if ( consumed >= text.length ) {
645 // No closing char found. Abort.
649 var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ) );
650 consumed += res[ 0 ];
651 // Add any returned nodes.
652 nodes.push.apply( nodes, res.slice( 1 ) );
656 // Helper function to make sub-classing a dialect easier
657 DialectHelpers.subclassDialect = function( d ) {
659 Block.prototype = d.block;
661 Inline.prototype = d.inline;
663 return { block: new Block(), inline: new Inline() };
669 var forEach = MarkdownHelpers.forEach,
670 extract_attr = MarkdownHelpers.extract_attr,
671 mk_block = MarkdownHelpers.mk_block,
672 isEmpty = MarkdownHelpers.isEmpty,
673 inline_until_char = DialectHelpers.inline_until_char;
678 * The default dialect that follows the rules set out by John Gruber's
679 * markdown.pl as closely as possible. Well actually we follow the behaviour of
680 * that script which in some places is not exactly what the syntax web page
685 atxHeader: function atxHeader( block, next ) {
686 var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ );
691 var header = [ "header
", { level: m[ 1 ].length } ];
692 Array.prototype.push.apply(header, this.processInline(m[ 2 ]));
694 if ( m[0].length < block.length )
695 next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
700 setextHeader: function setextHeader( block, next ) {
701 var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ );
706 var level = ( m[ 2 ] === "=" ) ? 1 : 2,
707 header = [ "header
", { level : level }, m[ 1 ] ];
709 if ( m[0].length < block.length )
710 next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
715 code: function code( block, next ) {
718 // should be a code block followed by a paragraph. Fun
720 // There might also be adjacent code block to merge.
723 re = /^(?: {0,3}\t| {4})(.*)\n?/;
725 // 4 spaces + content
726 if ( !block.match( re ) )
731 // Now pull out the rest of the lines
732 var b = this.loop_re_over_block(
733 re, block.valueOf(), function( m ) { ret.push( m[1] ); } );
736 // Case alluded to in first comment. push it back on as a new block
737 next.unshift( mk_block(b, block.trailing) );
740 else if ( next.length ) {
741 // Check the next block - it might be code too
742 if ( !next[0].match( re ) )
745 // Pull how how many blanks lines follow - minus two to account for .join
746 ret.push ( block.trailing.replace(/[^\n]/g, "").substring(2) );
748 block = next.shift();
755 return [ [ "code_block
", ret.join("\n") ] ];
758 horizRule: function horizRule( block, next ) {
759 // this needs to find any hr in the block to handle abutting blocks
760 var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ );
765 var jsonml = [ [ "hr
" ] ];
767 // if there's a leading abutting block, process it
769 var contained = mk_block( m[ 1 ], "", block.lineNumber );
770 jsonml.unshift.apply( jsonml, this.toTree( contained, [] ) );
773 // if there's a trailing abutting block, stick it into next
775 next.unshift( mk_block( m[ 3 ], block.trailing, block.lineNumber + 1 ) );
780 // There are two types of lists. Tight and loose. Tight lists have no whitespace
781 // between the items (and result in text just in the <li>) and loose lists,
782 // which have an empty line between list items, resulting in (one or more)
783 // paragraphs inside the <li>.
785 // There are all sorts weird edge cases about the original markdown.pl's
786 // handling of lists:
788 // * Nested lists are supposed to be indented by four chars per level. But
789 // if they aren't, you can get a nested list by indenting by less than
790 // four so long as the indent doesn't match an indent of an existing list
791 // item in the 'nest stack'.
793 // * The type of the list (bullet or number) is controlled just by the
794 // first item at the indent. Subsequent changes are ignored unless they
795 // are for nested lists
797 lists: (function( ) {
798 // Use a closure to hide a few variables.
799 var any_list = "[*+-]|\\d
+\\.",
800 bullet_list = /[*+-]/,
801 // Capture leading indent as it matters for determining nested lists.
802 is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ),
803 indent_re = "(?: {0,3}\\t
| {4})";
805 // TODO: Cache this regexp for certain depths.
806 // Create a regexp suitable for matching an li for a given stack depth
807 function regex_for_depth( depth ) {
810 // m[1] = indent, m[2] = list_type
811 "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s
+)|" +
813 "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})"
816 function expand_tab( input ) {
817 return input.replace( / {0,3}\t/g, " " );
820 // Add inline content `inline` to `li`. inline comes from processInline
821 // so is an array of content
822 function add(li, loose, inline, nl) {
824 li.push( [ "para
" ].concat(inline) );
827 // Hmmm, should this be any block level element or just paras?
828 var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] === "para
"
832 // If there is already some content in this list, add the new line in
833 if ( nl && li.length > 1 )
836 for ( var i = 0; i < inline.length; i++ ) {
837 var what = inline[i],
838 is_str = typeof what === "string
";
839 if ( is_str && add_to.length > 1 && typeof add_to[add_to.length-1] === "string
" )
840 add_to[ add_to.length-1 ] += what;
846 // contained means have an indent greater than the current one. On
847 // *every* line in the block
848 function get_contained_blocks( depth, blocks ) {
850 var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n
?)*$" ),
851 replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm
"),
854 while ( blocks.length > 0 ) {
855 if ( re.exec( blocks[0] ) ) {
856 var b = blocks.shift(),
857 // Now remove that indent
858 x = b.replace( replace, "");
860 ret.push( mk_block( x, b.trailing, b.lineNumber ) );
868 // passed to stack.forEach to turn list items up the stack into paras
869 function paragraphify(s, i, stack) {
871 var last_li = list[list.length-1];
873 if ( last_li[1] instanceof Array && last_li[1][0] === "para
" )
875 if ( i + 1 === stack.length ) {
877 // Keep the same array, but replace the contents
878 last_li.push( ["para
"].concat( last_li.splice(1, last_li.length - 1) ) );
881 var sublist = last_li.pop();
882 last_li.push( ["para
"].concat( last_li.splice(1, last_li.length - 1) ), sublist );
886 // The matcher function
887 return function( block, next ) {
888 var m = block.match( is_list_re );
892 function make_list( m ) {
893 var list = bullet_list.exec( m[2] )
897 stack.push( { list: list, indent: m[1] } );
902 var stack = [], // Stack of lists for nesting.
903 list = make_list( m ),
906 ret = [ stack[0].list ],
909 // Loop to search over block looking for inner block elements and loose lists
912 // Split into lines preserving new lines at end of line
913 var lines = block.split( /(?=\n)/ );
915 // We have to grab all lines for a li and call processInline on them
916 // once as there are some inline things that can span lines.
917 var li_accumulate = "", nl = "";
919 // Loop over the lines in this block looking for tight lists.
921 for ( var line_no = 0; line_no < lines.length; line_no++ ) {
923 var l = lines[line_no].replace(/^\n/, function(n) { nl = n; return ""; });
926 // TODO: really should cache this
927 var line_re = regex_for_depth( stack.length );
929 m = l.match( line_re );
930 //print( "line
:", uneval(l), "\nline match
:", uneval(m) );
932 // We have a list item
933 if ( m[1] !== undefined ) {
934 // Process the previous list item, if any
935 if ( li_accumulate.length ) {
936 add( last_li, loose, this.processInline( li_accumulate ), nl );
937 // Loose mode will have been dealt with. Reset it
942 m[1] = expand_tab( m[1] );
943 var wanted_depth = Math.floor(m[1].length/4)+1;
944 //print( "want
:", wanted_depth, "stack
:", stack.length);
945 if ( wanted_depth > stack.length ) {
946 // Deep enough for a nested list outright
947 //print ( "new nested list
" );
948 list = make_list( m );
949 last_li.push( list );
950 last_li = list[1] = [ "listitem
" ];
953 // We aren't deep enough to be strictly a new level. This is
954 // where Md.pl goes nuts. If the indent matches a level in the
955 // stack, put it there, else put it one deeper then the
956 // wanted_depth deserves.
958 for ( i = 0; i < stack.length; i++ ) {
959 if ( stack[ i ].indent !== m[1] )
962 list = stack[ i ].list;
963 stack.splice( i+1, stack.length - (i+1) );
969 //print("not found
. l
:", uneval(l));
971 if ( wanted_depth <= stack.length ) {
972 stack.splice(wanted_depth, stack.length - wanted_depth);
973 //print("Desired depth now
", wanted_depth, "stack
:", stack.length);
974 list = stack[wanted_depth-1].list;
975 //print("list
:", uneval(list) );
978 //print ("made
new stack
for messy indent
");
984 //print( uneval(list), "last
", list === stack[stack.length-1].list );
985 last_li = [ "listitem
" ];
987 } // end depth of shenegains
992 if ( l.length > m[0].length )
993 li_accumulate += nl + l.substr( m[0].length );
996 if ( li_accumulate.length ) {
997 add( last_li, loose, this.processInline( li_accumulate ), nl );
998 // Loose mode will have been dealt with. Reset it
1003 // Look at the next block - we might have a loose list. Or an extra
1004 // paragraph for the current li
1005 var contained = get_contained_blocks( stack.length, next );
1007 // Deal with code blocks or properly nested lists
1008 if ( contained.length > 0 ) {
1009 // Make sure all listitems up the stack are paragraphs
1010 forEach( stack, paragraphify, this);
1012 last_li.push.apply( last_li, this.toTree( contained, [] ) );
1015 var next_block = next[0] && next[0].valueOf() || "";
1017 if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) {
1018 block = next.shift();
1020 // Check for an HR following a list: features/lists/hr_abutting
1021 var hr = this.dialect.block.horizRule( block, next );
1024 ret.push.apply(ret, hr);
1028 // Make sure all listitems up the stack are paragraphs
1029 forEach( stack, paragraphify, this);
1032 continue loose_search;
1041 blockquote: function blockquote( block, next ) {
1042 if ( !block.match( /^>/m ) )
1047 // separate out the leading abutting block, if any. I.e. in this case:
1052 if ( block[ 0 ] !== ">" ) {
1053 var lines = block.split( /\n/ ),
1055 line_no = block.lineNumber;
1057 // keep shifting lines until you find a crotchet
1058 while ( lines.length && lines[ 0 ][ 0 ] !== ">" ) {
1059 prev.push( lines.shift() );
1063 var abutting = mk_block( prev.join( "\n" ), "\n", block.lineNumber );
1064 jsonml.push.apply( jsonml, this.processBlock( abutting, [] ) );
1065 // reassemble new block of just block quotes!
1066 block = mk_block( lines.join( "\n" ), block.trailing, line_no );
1070 // if the next block is also a blockquote merge it in
1071 while ( next.length && next[ 0 ][ 0 ] === ">" ) {
1072 var b = next.shift();
1073 block = mk_block( block + block.trailing + b, b.trailing, block.lineNumber );
1076 // Strip off the leading "> " and re-process as a block.
1077 var input = block.replace( /^> ?/gm, "" ),
1078 old_tree = this.tree,
1079 processedBlock = this.toTree( input, [ "blockquote
" ] ),
1080 attr = extract_attr( processedBlock );
1082 // If any link references were found get rid of them
1083 if ( attr && attr.references ) {
1084 delete attr.references;
1085 // And then remove the attribute object if it's empty
1086 if ( isEmpty( attr ) )
1087 processedBlock.splice( 1, 1 );
1090 jsonml.push( processedBlock );
1094 referenceDefn: function referenceDefn( block, next) {
1095 var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/;
1096 // interesting matches are [ , ref_id, url, , title, title ]
1098 if ( !block
.match(re
) )
1101 // make an attribute node if it doesn't exist
1102 if ( !extract_attr( this.tree
) )
1103 this.tree
.splice( 1, 0, {} );
1105 var attrs
= extract_attr( this.tree
);
1107 // make a references hash if it doesn't exist
1108 if ( attrs
.references
=== undefined )
1109 attrs
.references
= {};
1111 var b
= this.loop_re_over_block(re
, block
, function( m
) {
1113 if ( m
[2] && m
[2][0] === "<" && m
[2][m
[2].length
-1] === ">" )
1114 m
[2] = m
[2].substring( 1, m
[2].length
- 1 );
1116 var ref
= attrs
.references
[ m
[1].toLowerCase() ] = {
1120 if ( m
[4] !== undefined )
1122 else if ( m
[5] !== undefined )
1128 next
.unshift( mk_block( b
, block
.trailing
) );
1133 para
: function para( block
) {
1134 // everything's a para!
1135 return [ ["para"].concat( this.processInline( block
) ) ];
1141 __oneElement__
: function oneElement( text
, patterns_or_re
, previous_nodes
) {
1145 patterns_or_re
= patterns_or_re
|| this.dialect
.inline
.__patterns__
;
1146 var re
= new RegExp( "([\\s\\S]*?)(" + (patterns_or_re
.source
|| patterns_or_re
) + ")" );
1148 m
= re
.exec( text
);
1151 return [ text
.length
, text
];
1154 // Some un-interesting text matched. Return that first
1155 return [ m
[1].length
, m
[1] ];
1159 if ( m
[2] in this.dialect
.inline
) {
1160 res
= this.dialect
.inline
[ m
[2] ].call(
1162 text
.substr( m
.index
), m
, previous_nodes
|| [] );
1164 // Default for now to make dev easier. just slurp special and output it.
1165 res
= res
|| [ m
[2].length
, m
[2] ];
1169 __call__
: function inline( text
, patterns
) {
1175 //D:self.debug(" adding output", uneval(x));
1176 if ( typeof x
=== "string" && typeof out
[out
.length
-1] === "string" )
1177 out
[ out
.length
-1 ] += x
;
1182 while ( text
.length
> 0 ) {
1183 res
= this.dialect
.inline
.__oneElement__
.call(this, text
, patterns
, out
);
1184 text
= text
.substr( res
.shift() );
1191 // These characters are intersting elsewhere, so have rules for them so that
1192 // chunks of plain text blocks don't include them
1193 "]": function () {},
1194 "}": function () {},
1196 __escape__
: /^\\[\\`\*_{}\[\]()#\+.!\-]/,
1198 "\\": function escaped( text
) {
1199 // [ length of input processed, node/children to add... ]
1200 // Only esacape: \ ` * _ { } [ ] ( ) # * + - . !
1201 if ( this.dialect
.inline
.__escape__
.exec( text
) )
1202 return [ 2, text
.charAt( 1 ) ];
1208 "![": function image( text
) {
1210 // Unlike images, alt text is plain text only. no other elements are
1213 // ![Alt text](/path/to/img.jpg "Optional title")
1214 // 1 2 3 4 <--- captures
1215 var m
= text
.match( /^!\[(.*?)\][ \t]*\([ \t]*([^")]*?)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ );
1218 if ( m
[2] && m
[2][0] === "<" && m
[2][m
[2].length
-1] === ">" )
1219 m
[2] = m
[2].substring( 1, m
[2].length
- 1 );
1221 m
[2] = this.dialect
.inline
.__call__
.call( this, m
[2], /\\/ )[0];
1223 var attrs
= { alt
: m
[1], href
: m
[2] || "" };
1224 if ( m
[4] !== undefined)
1227 return [ m
[0].length
, [ "img", attrs
] ];
1231 m
= text
.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ );
1234 // We can't check if the reference is known here as it likely wont be
1235 // found till after. Check it in md tree->hmtl tree conversion
1236 return [ m
[0].length
, [ "img_ref", { alt
: m
[1], ref
: m
[2].toLowerCase(), original
: m
[0] } ] ];
1239 // Just consume the '!['
1243 "[": function link( text
) {
1245 var orig
= String(text
);
1246 // Inline content is possible inside `link text`
1247 var res
= inline_until_char
.call( this, text
.substr(1), "]" );
1249 // No closing ']' found. Just consume the [
1253 var consumed
= 1 + res
[ 0 ],
1254 children
= res
[ 1 ],
1258 // At this point the first [...] has been parsed. See what follows to find
1259 // out which kind of link we are (reference or direct url)
1260 text
= text
.substr( consumed
);
1262 // [link text](/path/to/img.jpg "Optional title")
1263 // 1 2 3 <--- captures
1264 // This will capture up to the last paren in the block. We then pull
1265 // back based on if there a matching ones in the url
1266 // ([here](/url/(test))
1267 // The parens have to be balanced
1268 var m
= text
.match( /^\s*\([ \t]*([^"']*)(?:[ \t]+(["'])(.*?)\2)?[ \t]*\)/ );
1271 consumed
+= m
[0].length
;
1273 if ( url
&& url
[0] === "<" && url
[url
.length
-1] === ">" )
1274 url
= url
.substring( 1, url
.length
- 1 );
1276 // If there is a title we don't have to worry about parens in the url
1278 var open_parens
= 1; // One open that isn't in the capture
1279 for ( var len
= 0; len
< url
.length
; len
++ ) {
1280 switch ( url
[len
] ) {
1285 if ( --open_parens
=== 0) {
1286 consumed
-= url
.length
- len
;
1287 url
= url
.substring(0, len
);
1294 // Process escapes only
1295 url
= this.dialect
.inline
.__call__
.call( this, url
, /\\/ )[0];
1297 attrs
= { href
: url
|| "" };
1298 if ( m
[3] !== undefined)
1301 link
= [ "link", attrs
].concat( children
);
1302 return [ consumed
, link
];
1307 m
= text
.match( /^\s*\[(.*?)\]/ );
1311 consumed
+= m
[ 0 ].length
;
1313 // [links][] uses links as its reference
1314 attrs
= { ref
: ( m
[ 1 ] || String(children
) ).toLowerCase(), original
: orig
.substr( 0, consumed
) };
1316 link
= [ "link_ref", attrs
].concat( children
);
1318 // We can't check if the reference is known here as it likely wont be
1319 // found till after. Check it in md tree->hmtl tree conversion.
1320 // Store the original so that conversion can revert if the ref isn't found.
1321 return [ consumed
, link
];
1325 // Only if id is plain (no formatting.)
1326 if ( children
.length
=== 1 && typeof children
[0] === "string" ) {
1328 attrs
= { ref
: children
[0].toLowerCase(), original
: orig
.substr( 0, consumed
) };
1329 link
= [ "link_ref", attrs
, children
[0] ];
1330 return [ consumed
, link
];
1333 // Just consume the "["
1338 "<": function autoLink( text
) {
1341 if ( ( m
= text
.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) !== null ) {
1343 return [ m
[0].length
, [ "link", { href
: "mailto:" + m
[3] }, m
[3] ] ];
1344 else if ( m
[2] === "mailto" )
1345 return [ m
[0].length
, [ "link", { href
: m
[1] }, m
[1].substr("mailto:".length
) ] ];
1347 return [ m
[0].length
, [ "link", { href
: m
[1] }, m
[1] ] ];
1353 "`": function inlineCode( text
) {
1354 // Inline code block. as many backticks as you like to start it
1355 // Always skip over the opening ticks.
1356 var m
= text
.match( /(`+)(([\s\S]*?)\1)/ );
1359 return [ m
[1].length
+ m
[2].length
, [ "inlinecode", m
[3] ] ];
1361 // TODO: No matching end code found - warn!
1366 " \n": function lineBreak() {
1367 return [ 3, [ "linebreak" ] ];
1373 // Meta Helper/generator method for em and strong handling
1374 function strong_em( tag
, md
) {
1376 var state_slot
= tag
+ "_state",
1377 other_slot
= tag
=== "strong" ? "em_state" : "strong_state";
1379 function CloseTag(len
) {
1380 this.len_after
= len
;
1381 this.name
= "close_" + md
;
1384 return function ( text
) {
1386 if ( this[state_slot
][0] === md
) {
1387 // Most recent em is of this type
1388 //D:this.debug("closing", md);
1389 this[state_slot
].shift();
1391 // "Consume" everything to go back to the recrusion in the else-block below
1392 return[ text
.length
, new CloseTag(text
.length
-md
.length
) ];
1395 // Store a clone of the em/strong states
1396 var other
= this[other_slot
].slice(),
1397 state
= this[state_slot
].slice();
1399 this[state_slot
].unshift(md
);
1401 //D:this.debug_indent += " ";
1404 var res
= this.processInline( text
.substr( md
.length
) );
1405 //D:this.debug_indent = this.debug_indent.substr(2);
1407 var last
= res
[res
.length
- 1];
1409 //D:this.debug("processInline from", tag + ": ", uneval( res ) );
1411 var check
= this[state_slot
].shift();
1412 if ( last
instanceof CloseTag
) {
1414 // We matched! Huzzah.
1415 var consumed
= text
.length
- last
.len_after
;
1416 return [ consumed
, [ tag
].concat(res
) ];
1419 // Restore the state of the other kind. We might have mistakenly closed it.
1420 this[other_slot
] = other
;
1421 this[state_slot
] = state
;
1423 // We can't reuse the processed result as it could have wrong parsing contexts in it.
1424 return [ md
.length
, md
];
1427 }; // End returned function
1430 Gruber
.inline
["**"] = strong_em("strong", "**");
1431 Gruber
.inline
["__"] = strong_em("strong", "__");
1432 Gruber
.inline
["*"] = strong_em("em", "*");
1433 Gruber
.inline
["_"] = strong_em("em", "_");
1435 Markdown
.dialects
.Gruber
= Gruber
;
1436 Markdown
.buildBlockOrder ( Markdown
.dialects
.Gruber
.block
);
1437 Markdown
.buildInlinePatterns( Markdown
.dialects
.Gruber
.inline
);
1441 var Maruku
= DialectHelpers
.subclassDialect( Gruber
),
1442 extract_attr
= MarkdownHelpers
.extract_attr
,
1443 forEach
= MarkdownHelpers
.forEach
;
1445 Maruku
.processMetaHash
= function processMetaHash( meta_string
) {
1446 var meta
= split_meta_hash( meta_string
),
1449 for ( var i
= 0; i
< meta
.length
; ++i
) {
1451 if ( /^#/.test( meta
[ i
] ) )
1452 attr
.id
= meta
[ i
].substring( 1 );
1454 else if ( /^\./.test( meta
[ i
] ) ) {
1455 // if class already exists, append the new one
1456 if ( attr
["class"] )
1457 attr
["class"] = attr
["class"] + meta
[ i
].replace( /./, " " );
1459 attr
["class"] = meta
[ i
].substring( 1 );
1461 // attribute: foo=bar
1462 else if ( /\=/.test( meta
[ i
] ) ) {
1463 var s
= meta
[ i
].split( /\=/ );
1464 attr
[ s
[ 0 ] ] = s
[ 1 ];
1471 function split_meta_hash( meta_string
) {
1472 var meta
= meta_string
.split( "" ),
1476 while ( meta
.length
) {
1477 var letter
= meta
.shift();
1480 // if we're in a quoted section, keep it
1482 parts
[ parts
.length
- 1 ] += letter
;
1483 // otherwise make a new part
1489 // reverse the quotes and move straight on
1490 in_quotes
= !in_quotes
;
1493 // shift off the next letter to be used straight away.
1494 // it was escaped so we'll keep it whatever it is
1495 letter
= meta
.shift();
1498 parts
[ parts
.length
- 1 ] += letter
;
1506 Maruku
.block
.document_meta
= function document_meta( block
) {
1507 // we're only interested in the first block
1508 if ( block
.lineNumber
> 1 )
1511 // document_meta blocks consist of one or more lines of `Key: Value\n`
1512 if ( ! block
.match( /^(?:\w+:.*\n)*\w+:.*$/ ) )
1515 // make an attribute node if it doesn't exist
1516 if ( !extract_attr( this.tree
) )
1517 this.tree
.splice( 1, 0, {} );
1519 var pairs
= block
.split( /\n/ );
1520 for ( var p
in pairs
) {
1521 var m
= pairs
[ p
].match( /(\w+):\s*(.*)$/ ),
1522 key
= m
[ 1 ].toLowerCase(),
1525 this.tree
[ 1 ][ key
] = value
;
1528 // document_meta produces no content!
1532 Maruku
.block
.block_meta
= function block_meta( block
) {
1533 // check if the last line of the block is an meta hash
1534 var m
= block
.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ );
1538 // process the meta hash
1539 var attr
= this.dialect
.processMetaHash( m
[ 2 ] ),
1542 // if we matched ^ then we need to apply meta to the previous block
1543 if ( m
[ 1 ] === "" ) {
1544 var node
= this.tree
[ this.tree
.length
- 1 ];
1545 hash
= extract_attr( node
);
1547 // if the node is a string (rather than JsonML), bail
1548 if ( typeof node
=== "string" )
1551 // create the attribute hash if it doesn't exist
1554 node
.splice( 1, 0, hash
);
1557 // add the attributes in
1558 for ( var a
in attr
)
1559 hash
[ a
] = attr
[ a
];
1561 // return nothing so the meta hash is removed
1565 // pull the meta hash off the block and process what's left
1566 var b
= block
.replace( /\n.*$/, "" ),
1567 result
= this.processBlock( b
, [] );
1569 // get or make the attributes hash
1570 hash
= extract_attr( result
[ 0 ] );
1573 result
[ 0 ].splice( 1, 0, hash
);
1576 // attach the attributes to the block
1577 for ( var a
in attr
)
1578 hash
[ a
] = attr
[ a
];
1583 Maruku
.block
.definition_list
= function definition_list( block
, next
) {
1584 // one or more terms followed by one or more definitions, in a single block
1585 var tight
= /^((?:[^\s:].*\n)+):\s+([\s\S]+)$/,
1589 // see if we're dealing with a tight or loose block
1590 if ( ( m
= block
.match( tight
) ) ) {
1591 // pull subsequent tight DL blocks out of `next`
1592 var blocks
= [ block
];
1593 while ( next
.length
&& tight
.exec( next
[ 0 ] ) )
1594 blocks
.push( next
.shift() );
1596 for ( var b
= 0; b
< blocks
.length
; ++b
) {
1597 var m
= blocks
[ b
].match( tight
),
1598 terms
= m
[ 1 ].replace( /\n$/, "" ).split( /\n/ ),
1599 defns
= m
[ 2 ].split( /\n:\s+/ );
1601 // print( uneval( m ) );
1603 for ( i
= 0; i
< terms
.length
; ++i
)
1604 list
.push( [ "dt", terms
[ i
] ] );
1606 for ( i
= 0; i
< defns
.length
; ++i
) {
1607 // run inline processing over the definition
1608 list
.push( [ "dd" ].concat( this.processInline( defns
[ i
].replace( /(\n)\s+/, "$1" ) ) ) );
1619 // splits on unescaped instances of @ch. If @ch is not a character the result
1620 // can be unpredictable
1622 Maruku
.block
.table
= function table ( block
) {
1624 var _split_on_unescaped = function( s
, ch
) {
1626 if ( ch
.match(/^[\\|\[\]{}?*.+^$]$/) )
1629 r
= new RegExp('^((?:\\\\.|[^\\\\' + ch
+ '])*)' + ch
+ '(.*)'),
1631 while ( ( m
= s
.match( r
) ) ) {
1639 var leading_pipe
= /^ {0,3}\|(.+)\n {0,3}\|\s*([\-:]+[\-| :]*)\n((?:\s*\|.*(?:\n|$))*)(?=\n|$)/,
1640 // find at least an unescaped pipe in each line
1641 no_leading_pipe
= /^ {0,3}(\S(?:\\.|[^\\|])*\|.*)\n {0,3}([\-:]+\s*\|[\-| :]*)\n((?:(?:\\.|[^\\|])*\|.*(?:\n|$))*)(?=\n|$)/,
1644 if ( ( m
= block
.match( leading_pipe
) ) ) {
1645 // remove leading pipes in contents
1646 // (header and horizontal rule already have the leading pipe left out)
1647 m
[3] = m
[3].replace(/^\s*\|/gm, '');
1648 } else if ( ! ( m
= block
.match( no_leading_pipe
) ) ) {
1652 var table
= [ "table", [ "thead", [ "tr" ] ], [ "tbody" ] ];
1654 // remove trailing pipes, then split on pipes
1655 // (no escaped pipes are allowed in horizontal rule)
1656 m
[2] = m
[2].replace(/\|\s*$/, '').split('|');
1658 // process alignment
1659 var html_attrs
= [ ];
1660 forEach (m
[2], function (s
) {
1661 if (s
.match(/^\s*-+:\s*$/))
1662 html_attrs
.push({align
: "right"});
1663 else if (s
.match(/^\s*:-+\s*$/))
1664 html_attrs
.push({align
: "left"});
1665 else if (s
.match(/^\s*:-+:\s*$/))
1666 html_attrs
.push({align
: "center"});
1668 html_attrs
.push({});
1671 // now for the header, avoid escaped pipes
1672 m
[1] = _split_on_unescaped(m
[1].replace(/\|\s*$/, ''), '|');
1673 for (i
= 0; i
< m
[1].length
; i
++) {
1674 table
[1][1].push(['th', html_attrs
[i
] || {}].concat(
1675 this.processInline(m
[1][i
].trim())));
1678 // now for body contents
1679 forEach (m
[3].replace(/\|\s*$/mg, '').split('\n'), function (row
) {
1680 var html_row
= ['tr'];
1681 row
= _split_on_unescaped(row
, '|');
1682 for (i
= 0; i
< row
.length
; i
++)
1683 html_row
.push(['td', html_attrs
[i
] || {}].concat(this.processInline(row
[i
].trim())));
1684 table
[2].push(html_row
);
1690 Maruku
.inline
[ "{:" ] = function inline_meta( text
, matches
, out
) {
1694 // get the preceeding element
1695 var before
= out
[ out
.length
- 1 ];
1697 if ( typeof before
=== "string" )
1700 // match a meta hash
1701 var m
= text
.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ );
1703 // no match, false alarm
1707 // attach the attributes to the preceeding element
1708 var meta
= this.dialect
.processMetaHash( m
[ 1 ] ),
1709 attr
= extract_attr( before
);
1713 before
.splice( 1, 0, attr
);
1716 for ( var k
in meta
)
1717 attr
[ k
] = meta
[ k
];
1719 // cut out the string and replace it with nothing
1720 return [ m
[ 0 ].length
, "" ];
1724 Markdown
.dialects
.Maruku
= Maruku
;
1725 Markdown
.dialects
.Maruku
.inline
.__escape__
= /^\\[\\`\*_{}\[\]()#\+.!\-|:]/;
1726 Markdown
.buildBlockOrder ( Markdown
.dialects
.Maruku
.block
);
1727 Markdown
.buildInlinePatterns( Markdown
.dialects
.Maruku
.inline
);
1730 // Include all our depndencies and;
1731 expose
.Markdown
= Markdown
;
1732 expose
.parse
= Markdown
.parse
;
1733 expose
.toHTML
= Markdown
.toHTML
;
1734 expose
.toHTMLTree
= Markdown
.toHTMLTree
;
1735 expose
.renderJsonML
= Markdown
.renderJsonML
;
1738 window
.markdown
= {};
1739 return window
.markdown
;