git.m6w6.name Git - mdref/mdref/blob - markdown.js

   1 // Released under MIT license
   2 // Copyright (c) 2009-2010 Dominic Baggott
   3 // Copyright (c) 2009-2010 Ash Berlin
   4 // Copyright (c) 2011 Christoph Dorn <christoph@christophdorn.com> (http://www.christophdorn.com)
   5 // Date: 2013-09-15T16:12Z
   6
   7 (function(expose) {
   8
   9
  10
  11
  12   var MarkdownHelpers = {};
  13
  14   // For Spidermonkey based engines
  15   function mk_block_toSource() {
  16     return "Markdown.mk_block( " +
  17             uneval(this.toString()) +
  18             ", " +
  19             uneval(this.trailing) +
  20             ", " +
  21             uneval(this.lineNumber) +
  22             " )";
  23   }
  24
  25   // node
  26   function mk_block_inspect() {
  27     var util = require("util");
  28     return "Markdown.mk_block( " +
  29             util.inspect(this.toString()) +
  30             ", " +
  31             util.inspect(this.trailing) +
  32             ", " +
  33             util.inspect(this.lineNumber) +
  34             " )";
  35
  36   }
  37
  38   MarkdownHelpers.mk_block = function(block, trail, line) {
  39     // Be helpful for default case in tests.
  40     if ( arguments.length === 1 )
  41       trail = "\n\n";
  42
  43     // We actually need a String object, not a string primitive
  44     /* jshint -W053 */
  45     var s = new String(block);
  46     s.trailing = trail;
  47     // To make it clear its not just a string
  48     s.inspect = mk_block_inspect;
  49     s.toSource = mk_block_toSource;
  50
  51     if ( line !== undefined )
  52       s.lineNumber = line;
  53
  54     return s;
  55   };
  56
  57
  58   var isArray = MarkdownHelpers.isArray = Array.isArray || function(obj) {
  59     return Object.prototype.toString.call(obj) === "[object Array]";
  60   };
  61
  62   // Don't mess with Array.prototype. Its not friendly
  63   if ( Array.prototype.forEach ) {
  64     MarkdownHelpers.forEach = function forEach( arr, cb, thisp ) {
  65       return arr.forEach( cb, thisp );
  66     };
  67   }
  68   else {
  69     MarkdownHelpers.forEach = function forEach(arr, cb, thisp) {
  70       for (var i = 0; i < arr.length; i++)
  71         cb.call(thisp || arr, arr[i], i, arr);
  72     };
  73   }
  74
  75   MarkdownHelpers.isEmpty = function isEmpty( obj ) {
  76     for ( var key in obj ) {
  77       if ( hasOwnProperty.call( obj, key ) )
  78         return false;
  79     }
  80     return true;
  81   };
  82
  83   MarkdownHelpers.extract_attr = function extract_attr( jsonml ) {
  84     return isArray(jsonml)
  85         && jsonml.length > 1
  86         && typeof jsonml[ 1 ] === "object"
  87         && !( isArray(jsonml[ 1 ]) )
  88         ? jsonml[ 1 ]
  89         : undefined;
  90   };
  91
  92
  93
  94
  95  /**
  96    *  class Markdown
  97    *
  98    *  Markdown processing in Javascript done right. We have very particular views
  99    *  on what constitutes 'right' which include:
 100    *
 101    *  - produces well-formed HTML (this means that em and strong nesting is
 102    *    important)
 103    *
 104    *  - has an intermediate representation to allow processing of parsed data (We
 105    *    in fact have two, both as [JsonML]: a markdown tree and an HTML tree).
 106    *
 107    *  - is easily extensible to add new dialects without having to rewrite the
 108    *    entire parsing mechanics
 109    *
 110    *  - has a good test suite
 111    *
 112    *  This implementation fulfills all of these (except that the test suite could
 113    *  do with expanding to automatically run all the fixtures from other Markdown
 114    *  implementations.)
 115    *
 116    *  ##### Intermediate Representation
 117    *
 118    *  *TODO* Talk about this :) Its JsonML, but document the node names we use.
 119    *
 120    *  [JsonML]: http://jsonml.org/ "JSON Markup Language"
 121    **/
 122   var Markdown = function(dialect) {
 123     switch (typeof dialect) {
 124     case "undefined":
 125       this.dialect = Markdown.dialects.Gruber;
 126       break;
 127     case "object":
 128       this.dialect = dialect;
 129       break;
 130     default:
 131       if ( dialect in Markdown.dialects )
 132         this.dialect = Markdown.dialects[dialect];
 133       else
 134         throw new Error("Unknown Markdown dialect '" + String(dialect) + "'");
 135       break;
 136     }
 137     this.em_state = [];
 138     this.strong_state = [];
 139     this.debug_indent = "";
 140   };
 141
 142   /**
 143    * Markdown.dialects
 144    *
 145    * Namespace of built-in dialects.
 146    **/
 147   Markdown.dialects = {};
 148
 149
 150
 151
 152   // Imported functions
 153   var mk_block = Markdown.mk_block = MarkdownHelpers.mk_block,
 154       isArray = MarkdownHelpers.isArray;
 155
 156   /**
 157    *  parse( markdown, [dialect] ) -> JsonML
 158    *  - markdown (String): markdown string to parse
 159    *  - dialect (String | Dialect): the dialect to use, defaults to gruber
 160    *
 161    *  Parse `markdown` and return a markdown document as a Markdown.JsonML tree.
 162    **/
 163   Markdown.parse = function( source, dialect ) {
 164     // dialect will default if undefined
 165     var md = new Markdown( dialect );
 166     return md.toTree( source );
 167   };
 168
 169   function count_lines( str ) {
 170     var n = 0,
 171         i = -1;
 172     while ( ( i = str.indexOf("\n", i + 1) ) !== -1 )
 173       n++;
 174     return n;
 175   }
 176
 177   // Internal - split source into rough blocks
 178   Markdown.prototype.split_blocks = function splitBlocks( input ) {
 179     input = input.replace(/(\r\n|\n|\r)/g, "\n");
 180     // [\s\S] matches _anything_ (newline or space)
 181     // [^] is equivalent but doesn't work in IEs.
 182     var re = /([\s\S]+?)($|\n#|\n(?:\s*\n|$)+)/g,
 183         blocks = [],
 184         m;
 185
 186     var line_no = 1;
 187
 188     if ( ( m = /^(\s*\n)/.exec(input) ) !== null ) {
 189       // skip (but count) leading blank lines
 190       line_no += count_lines( m[0] );
 191       re.lastIndex = m[0].length;
 192     }
 193
 194     while ( ( m = re.exec(input) ) !== null ) {
 195       if (m[2] === "\n#") {
 196         m[2] = "\n";
 197         re.lastIndex--;
 198       }
 199       blocks.push( mk_block( m[1], m[2], line_no ) );
 200       line_no += count_lines( m[0] );
 201     }
 202
 203     return blocks;
 204   };
 205
 206   /**
 207    *  Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ]
 208    *  - block (String): the block to process
 209    *  - next (Array): the following blocks
 210    *
 211    * Process `block` and return an array of JsonML nodes representing `block`.
 212    *
 213    * It does this by asking each block level function in the dialect to process
 214    * the block until one can. Succesful handling is indicated by returning an
 215    * array (with zero or more JsonML nodes), failure by a false value.
 216    *
 217    * Blocks handlers are responsible for calling [[Markdown#processInline]]
 218    * themselves as appropriate.
 219    *
 220    * If the blocks were split incorrectly or adjacent blocks need collapsing you
 221    * can adjust `next` in place using shift/splice etc.
 222    *
 223    * If any of this default behaviour is not right for the dialect, you can
 224    * define a `__call__` method on the dialect that will get invoked to handle
 225    * the block processing.
 226    */
 227   Markdown.prototype.processBlock = function processBlock( block, next ) {
 228     var cbs = this.dialect.block,
 229         ord = cbs.__order__;
 230
 231     if ( "__call__" in cbs )
 232       return cbs.__call__.call(this, block, next);
 233
 234     for ( var i = 0; i < ord.length; i++ ) {
 235       //D:this.debug( "Testing", ord[i] );
 236       var res = cbs[ ord[i] ].call( this, block, next );
 237       if ( res ) {
 238         //D:this.debug("  matched");
 239         if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) ) )
 240           this.debug(ord[i], "didn't return a proper array");
 241         //D:this.debug( "" );
 242         return res;
 243       }
 244     }
 245
 246     // Uhoh! no match! Should we throw an error?
 247     return [];
 248   };
 249
 250   Markdown.prototype.processInline = function processInline( block ) {
 251     return this.dialect.inline.__call__.call( this, String( block ) );
 252   };
 253
 254   /**
 255    *  Markdown#toTree( source ) -> JsonML
 256    *  - source (String): markdown source to parse
 257    *
 258    *  Parse `source` into a JsonML tree representing the markdown document.
 259    **/
 260   // custom_tree means set this.tree to `custom_tree` and restore old value on return
 261   Markdown.prototype.toTree = function toTree( source, custom_root ) {
 262     var blocks = source instanceof Array ? source : this.split_blocks( source );
 263
 264     // Make tree a member variable so its easier to mess with in extensions
 265     var old_tree = this.tree;
 266     try {
 267       this.tree = custom_root || this.tree || [ "markdown" ];
 268
 269       blocks_loop:
 270       while ( blocks.length ) {
 271         var b = this.processBlock( blocks.shift(), blocks );
 272
 273         // Reference blocks and the like won't return any content
 274         if ( !b.length )
 275           continue blocks_loop;
 276
 277         this.tree.push.apply( this.tree, b );
 278       }
 279       return this.tree;
 280     }
 281     finally {
 282       if ( custom_root )
 283         this.tree = old_tree;
 284     }
 285   };
 286
 287   // Noop by default
 288   Markdown.prototype.debug = function () {
 289     var args = Array.prototype.slice.call( arguments);
 290     args.unshift(this.debug_indent);
 291     if ( typeof print !== "undefined" )
 292       print.apply( print, args );
 293     if ( typeof console !== "undefined" && typeof console.log !== "undefined" )
 294       console.log.apply( null, args );
 295   };
 296
 297   Markdown.prototype.loop_re_over_block = function( re, block, cb ) {
 298     // Dont use /g regexps with this
 299     var m,
 300         b = block.valueOf();
 301
 302     while ( b.length && (m = re.exec(b) ) !== null ) {
 303       b = b.substr( m[0].length );
 304       cb.call(this, m);
 305     }
 306     return b;
 307   };
 308
 309   // Build default order from insertion order.
 310   Markdown.buildBlockOrder = function(d) {
 311     var ord = [];
 312     for ( var i in d ) {
 313       if ( i === "__order__" || i === "__call__" )
 314         continue;
 315       ord.push( i );
 316     }
 317     d.__order__ = ord;
 318   };
 319
 320   // Build patterns for inline matcher
 321   Markdown.buildInlinePatterns = function(d) {
 322     var patterns = [];
 323
 324     for ( var i in d ) {
 325       // __foo__ is reserved and not a pattern
 326       if ( i.match( /^__.*__$/) )
 327         continue;
 328       var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" )
 329                .replace( /\n/, "\\n" );
 330       patterns.push( i.length === 1 ? l : "(?:" + l + ")" );
 331     }
 332
 333     patterns = patterns.join("|");
 334     d.__patterns__ = patterns;
 335     //print("patterns:", uneval( patterns ) );
 336
 337     var fn = d.__call__;
 338     d.__call__ = function(text, pattern) {
 339       if ( pattern !== undefined )
 340         return fn.call(this, text, pattern);
 341       else
 342         return fn.call(this, text, patterns);
 343     };
 344   };
 345
 346
 347
 348
 349   var extract_attr = MarkdownHelpers.extract_attr;
 350
 351   /**
 352    *  renderJsonML( jsonml[, options] ) -> String
 353    *  - jsonml (Array): JsonML array to render to XML
 354    *  - options (Object): options
 355    *
 356    *  Converts the given JsonML into well-formed XML.
 357    *
 358    *  The options currently understood are:
 359    *
 360    *  - root (Boolean): wether or not the root node should be included in the
 361    *    output, or just its children. The default `false` is to not include the
 362    *    root itself.
 363    */
 364   Markdown.renderJsonML = function( jsonml, options ) {
 365     options = options || {};
 366     // include the root element in the rendered output?
 367     options.root = options.root || false;
 368
 369     var content = [];
 370
 371     if ( options.root ) {
 372       content.push( render_tree( jsonml ) );
 373     }
 374     else {
 375       jsonml.shift(); // get rid of the tag
 376       if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) )
 377         jsonml.shift(); // get rid of the attributes
 378
 379       while ( jsonml.length )
 380         content.push( render_tree( jsonml.shift() ) );
 381     }
 382
 383     return content.join( "\n\n" );
 384   };
 385
 386
 387   /**
 388    *  toHTMLTree( markdown, [dialect] ) -> JsonML
 389    *  toHTMLTree( md_tree ) -> JsonML
 390    *  - markdown (String): markdown string to parse
 391    *  - dialect (String | Dialect): the dialect to use, defaults to gruber
 392    *  - md_tree (Markdown.JsonML): parsed markdown tree
 393    *
 394    *  Turn markdown into HTML, represented as a JsonML tree. If a string is given
 395    *  to this function, it is first parsed into a markdown tree by calling
 396    *  [[parse]].
 397    **/
 398   Markdown.toHTMLTree = function toHTMLTree( input, dialect , options ) {
 399
 400     // convert string input to an MD tree
 401     if ( typeof input === "string" )
 402       input = this.parse( input, dialect );
 403
 404     // Now convert the MD tree to an HTML tree
 405
 406     // remove references from the tree
 407     var attrs = extract_attr( input ),
 408         refs = {};
 409
 410     if ( attrs && attrs.references )
 411       refs = attrs.references;
 412
 413     var html = convert_tree_to_html( input, refs , options );
 414     merge_text_nodes( html );
 415     return html;
 416   };
 417
 418   /**
 419    *  toHTML( markdown, [dialect]  ) -> String
 420    *  toHTML( md_tree ) -> String
 421    *  - markdown (String): markdown string to parse
 422    *  - md_tree (Markdown.JsonML): parsed markdown tree
 423    *
 424    *  Take markdown (either as a string or as a JsonML tree) and run it through
 425    *  [[toHTMLTree]] then turn it into a well-formated HTML fragment.
 426    **/
 427   Markdown.toHTML = function toHTML( source , dialect , options ) {
 428     var input = this.toHTMLTree( source , dialect , options );
 429
 430     return this.renderJsonML( input );
 431   };
 432
 433
 434   function escapeHTML( text ) {
 435     return text.replace( /&/g, "&amp;" )
 436                .replace( /</g, "&lt;" )
 437                .replace( />/g, "&gt;" )
 438                .replace( /"/g, "&quot;" )
 439                .replace( /'/g, "&#39;" );
 440   }
 441
 442   function render_tree( jsonml ) {
 443     // basic case
 444     if ( typeof jsonml === "string" )
 445       return escapeHTML( jsonml );
 446
 447     var tag = jsonml.shift(),
 448         attributes = {},
 449         content = [];
 450
 451     if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) )
 452       attributes = jsonml.shift();
 453
 454     while ( jsonml.length )
 455       content.push( render_tree( jsonml.shift() ) );
 456
 457     var tag_attrs = "";
 458     for ( var a in attributes )
 459       tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"';
 460
 461     // be careful about adding whitespace here for inline elements
 462     if ( tag === "img" || tag === "br" || tag === "hr" )
 463       return "<"+ tag + tag_attrs + "/>";
 464     else
 465       return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">";
 466   }
 467
 468   function convert_tree_to_html( tree, references, options ) {
 469     var i;
 470     options = options || {};
 471
 472     // shallow clone
 473     var jsonml = tree.slice( 0 );
 474
 475     if ( typeof options.preprocessTreeNode === "function" )
 476       jsonml = options.preprocessTreeNode(jsonml, references);
 477
 478     // Clone attributes if they exist
 479     var attrs = extract_attr( jsonml );
 480     if ( attrs ) {
 481       jsonml[ 1 ] = {};
 482       for ( i in attrs ) {
 483         jsonml[ 1 ][ i ] = attrs[ i ];
 484       }
 485       attrs = jsonml[ 1 ];
 486     }
 487
 488     // basic case
 489     if ( typeof jsonml === "string" )
 490       return jsonml;
 491
 492     // convert this node
 493     switch ( jsonml[ 0 ] ) {
 494     case "header":
 495       jsonml[ 0 ] = "h" + jsonml[ 1 ].level;
 496       delete jsonml[ 1 ].level;
 497       break;
 498     case "bulletlist":
 499       jsonml[ 0 ] = "ul";
 500       break;
 501     case "numberlist":
 502       jsonml[ 0 ] = "ol";
 503       break;
 504     case "listitem":
 505       jsonml[ 0 ] = "li";
 506       break;
 507     case "para":
 508       jsonml[ 0 ] = "p";
 509       break;
 510     case "markdown":
 511       jsonml[ 0 ] = "html";
 512       if ( attrs )
 513         delete attrs.references;
 514       break;
 515     case "code_block":
 516       jsonml[ 0 ] = "pre";
 517       i = attrs ? 2 : 1;
 518       var code = [ "code" ];
 519       code.push.apply( code, jsonml.splice( i, jsonml.length - i ) );
 520       jsonml[ i ] = code;
 521       break;
 522     case "inlinecode":
 523       jsonml[ 0 ] = "code";
 524       break;
 525     case "img":
 526       jsonml[ 1 ].src = jsonml[ 1 ].href;
 527       delete jsonml[ 1 ].href;
 528       break;
 529     case "linebreak":
 530       jsonml[ 0 ] = "br";
 531       break;
 532     case "link":
 533       jsonml[ 0 ] = "a";
 534       break;
 535     case "link_ref":
 536       jsonml[ 0 ] = "a";
 537
 538       // grab this ref and clean up the attribute node
 539       var ref = references[ attrs.ref ];
 540
 541       // if the reference exists, make the link
 542       if ( ref ) {
 543         delete attrs.ref;
 544
 545         // add in the href and title, if present
 546         attrs.href = ref.href;
 547         if ( ref.title )
 548           attrs.title = ref.title;
 549
 550         // get rid of the unneeded original text
 551         delete attrs.original;
 552       }
 553       // the reference doesn't exist, so revert to plain text
 554       else {
 555         return attrs.original;
 556       }
 557       break;
 558     case "img_ref":
 559       jsonml[ 0 ] = "img";
 560
 561       // grab this ref and clean up the attribute node
 562       var ref = references[ attrs.ref ];
 563
 564       // if the reference exists, make the link
 565       if ( ref ) {
 566         delete attrs.ref;
 567
 568         // add in the href and title, if present
 569         attrs.src = ref.href;
 570         if ( ref.title )
 571           attrs.title = ref.title;
 572
 573         // get rid of the unneeded original text
 574         delete attrs.original;
 575       }
 576       // the reference doesn't exist, so revert to plain text
 577       else {
 578         return attrs.original;
 579       }
 580       break;
 581     }
 582
 583     // convert all the children
 584     i = 1;
 585
 586     // deal with the attribute node, if it exists
 587     if ( attrs ) {
 588       // if there are keys, skip over it
 589       for ( var key in jsonml[ 1 ] ) {
 590         i = 2;
 591         break;
 592       }
 593       // if there aren't, remove it
 594       if ( i === 1 )
 595         jsonml.splice( i, 1 );
 596     }
 597
 598     for ( ; i < jsonml.length; ++i ) {
 599       jsonml[ i ] = convert_tree_to_html( jsonml[ i ], references, options );
 600     }
 601
 602     return jsonml;
 603   }
 604
 605
 606   // merges adjacent text nodes into a single node
 607   function merge_text_nodes( jsonml ) {
 608     // skip the tag name and attribute hash
 609     var i = extract_attr( jsonml ) ? 2 : 1;
 610
 611     while ( i < jsonml.length ) {
 612       // if it's a string check the next item too
 613       if ( typeof jsonml[ i ] === "string" ) {
 614         if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) {
 615           // merge the second string into the first and remove it
 616           jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ];
 617         }
 618         else {
 619           ++i;
 620         }
 621       }
 622       // if it's not a string recurse
 623       else {
 624         merge_text_nodes( jsonml[ i ] );
 625         ++i;
 626       }
 627     }
 628   };
 629
 630
 631
 632   var DialectHelpers = {};
 633   DialectHelpers.inline_until_char = function( text, want ) {
 634     var consumed = 0,
 635         nodes = [];
 636
 637     while ( true ) {
 638       if ( text.charAt( consumed ) === want ) {
 639         // Found the character we were looking for
 640         consumed++;
 641         return [ consumed, nodes ];
 642       }
 643
 644       if ( consumed >= text.length ) {
 645         // No closing char found. Abort.
 646         return null;
 647       }
 648
 649       var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ) );
 650       consumed += res[ 0 ];
 651       // Add any returned nodes.
 652       nodes.push.apply( nodes, res.slice( 1 ) );
 653     }
 654   };
 655
 656   // Helper function to make sub-classing a dialect easier
 657   DialectHelpers.subclassDialect = function( d ) {
 658     function Block() {}
 659     Block.prototype = d.block;
 660     function Inline() {}
 661     Inline.prototype = d.inline;
 662
 663     return { block: new Block(), inline: new Inline() };
 664   };
 665
 666
 667
 668
 669   var forEach = MarkdownHelpers.forEach,
 670       extract_attr = MarkdownHelpers.extract_attr,
 671       mk_block = MarkdownHelpers.mk_block,
 672       isEmpty = MarkdownHelpers.isEmpty,
 673       inline_until_char = DialectHelpers.inline_until_char;
 674
 675   /**
 676    * Gruber dialect
 677    *
 678    * The default dialect that follows the rules set out by John Gruber's
 679    * markdown.pl as closely as possible. Well actually we follow the behaviour of
 680    * that script which in some places is not exactly what the syntax web page
 681    * says.
 682    **/
 683   var Gruber = {
 684     block: {
 685       atxHeader: function atxHeader( block, next ) {
 686         var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ );
 687
 688         if ( !m )
 689           return undefined;
 690
 691         var header = [ "header", { level: m[ 1 ].length } ];
 692         Array.prototype.push.apply(header, this.processInline(m[ 2 ]));
 693
 694         if ( m[0].length < block.length )
 695           next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
 696
 697         return [ header ];
 698       },
 699
 700       setextHeader: function setextHeader( block, next ) {
 701         var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ );
 702
 703         if ( !m )
 704           return undefined;
 705
 706         var level = ( m[ 2 ] === "=" ) ? 1 : 2,
 707             header = [ "header", { level : level }, m[ 1 ] ];
 708
 709         if ( m[0].length < block.length )
 710           next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) );
 711
 712         return [ header ];
 713       },
 714
 715       code: function code( block, next ) {
 716         // |    Foo
 717         // |bar
 718         // should be a code block followed by a paragraph. Fun
 719         //
 720         // There might also be adjacent code block to merge.
 721
 722         var ret = [],
 723             re = /^(?: {0,3}\t| {4})(.*)\n?/;
 724
 725         // 4 spaces + content
 726         if ( !block.match( re ) )
 727           return undefined;
 728
 729         block_search:
 730         do {
 731           // Now pull out the rest of the lines
 732           var b = this.loop_re_over_block(
 733                     re, block.valueOf(), function( m ) { ret.push( m[1] ); } );
 734
 735           if ( b.length ) {
 736             // Case alluded to in first comment. push it back on as a new block
 737             next.unshift( mk_block(b, block.trailing) );
 738             break block_search;
 739           }
 740           else if ( next.length ) {
 741             // Check the next block - it might be code too
 742             if ( !next[0].match( re ) )
 743               break block_search;
 744
 745             // Pull how how many blanks lines follow - minus two to account for .join
 746             ret.push ( block.trailing.replace(/[^\n]/g, "").substring(2) );
 747
 748             block = next.shift();
 749           }
 750           else {
 751             break block_search;
 752           }
 753         } while ( true );
 754
 755         return [ [ "code_block", ret.join("\n") ] ];
 756       },
 757
 758       horizRule: function horizRule( block, next ) {
 759         // this needs to find any hr in the block to handle abutting blocks
 760         var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ );
 761
 762         if ( !m )
 763           return undefined;
 764
 765         var jsonml = [ [ "hr" ] ];
 766
 767         // if there's a leading abutting block, process it
 768         if ( m[ 1 ] ) {
 769           var contained = mk_block( m[ 1 ], "", block.lineNumber );
 770           jsonml.unshift.apply( jsonml, this.toTree( contained, [] ) );
 771         }
 772
 773         // if there's a trailing abutting block, stick it into next
 774         if ( m[ 3 ] )
 775           next.unshift( mk_block( m[ 3 ], block.trailing, block.lineNumber + 1 ) );
 776
 777         return jsonml;
 778       },
 779
 780       // There are two types of lists. Tight and loose. Tight lists have no whitespace
 781       // between the items (and result in text just in the <li>) and loose lists,
 782       // which have an empty line between list items, resulting in (one or more)
 783       // paragraphs inside the <li>.
 784       //
 785       // There are all sorts weird edge cases about the original markdown.pl's
 786       // handling of lists:
 787       //
 788       // * Nested lists are supposed to be indented by four chars per level. But
 789       //   if they aren't, you can get a nested list by indenting by less than
 790       //   four so long as the indent doesn't match an indent of an existing list
 791       //   item in the 'nest stack'.
 792       //
 793       // * The type of the list (bullet or number) is controlled just by the
 794       //    first item at the indent. Subsequent changes are ignored unless they
 795       //    are for nested lists
 796       //
 797       lists: (function( ) {
 798         // Use a closure to hide a few variables.
 799         var any_list = "[*+-]|\\d+\\.",
 800             bullet_list = /[*+-]/,
 801             // Capture leading indent as it matters for determining nested lists.
 802             is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ),
 803             indent_re = "(?: {0,3}\\t| {4})";
 804
 805         // TODO: Cache this regexp for certain depths.
 806         // Create a regexp suitable for matching an li for a given stack depth
 807         function regex_for_depth( depth ) {
 808
 809           return new RegExp(
 810             // m[1] = indent, m[2] = list_type
 811             "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" +
 812             // m[3] = cont
 813             "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})"
 814           );
 815         }
 816         function expand_tab( input ) {
 817           return input.replace( / {0,3}\t/g, "    " );
 818         }
 819
 820         // Add inline content `inline` to `li`. inline comes from processInline
 821         // so is an array of content
 822         function add(li, loose, inline, nl) {
 823           if ( loose ) {
 824             li.push( [ "para" ].concat(inline) );
 825             return;
 826           }
 827           // Hmmm, should this be any block level element or just paras?
 828           var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] === "para"
 829                      ? li[li.length -1]
 830                      : li;
 831
 832           // If there is already some content in this list, add the new line in
 833           if ( nl && li.length > 1 )
 834             inline.unshift(nl);
 835
 836           for ( var i = 0; i < inline.length; i++ ) {
 837             var what = inline[i],
 838                 is_str = typeof what === "string";
 839             if ( is_str && add_to.length > 1 && typeof add_to[add_to.length-1] === "string" )
 840               add_to[ add_to.length-1 ] += what;
 841             else
 842               add_to.push( what );
 843           }
 844         }
 845
 846         // contained means have an indent greater than the current one. On
 847         // *every* line in the block
 848         function get_contained_blocks( depth, blocks ) {
 849
 850           var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ),
 851               replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"),
 852               ret = [];
 853
 854           while ( blocks.length > 0 ) {
 855             if ( re.exec( blocks[0] ) ) {
 856               var b = blocks.shift(),
 857                   // Now remove that indent
 858                   x = b.replace( replace, "");
 859
 860               ret.push( mk_block( x, b.trailing, b.lineNumber ) );
 861             }
 862             else
 863               break;
 864           }
 865           return ret;
 866         }
 867
 868         // passed to stack.forEach to turn list items up the stack into paras
 869         function paragraphify(s, i, stack) {
 870           var list = s.list;
 871           var last_li = list[list.length-1];
 872
 873           if ( last_li[1] instanceof Array && last_li[1][0] === "para" )
 874             return;
 875           if ( i + 1 === stack.length ) {
 876             // Last stack frame
 877             // Keep the same array, but replace the contents
 878             last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ) );
 879           }
 880           else {
 881             var sublist = last_li.pop();
 882             last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ), sublist );
 883           }
 884         }
 885
 886         // The matcher function
 887         return function( block, next ) {
 888           var m = block.match( is_list_re );
 889           if ( !m )
 890             return undefined;
 891
 892           function make_list( m ) {
 893             var list = bullet_list.exec( m[2] )
 894                      ? ["bulletlist"]
 895                      : ["numberlist"];
 896
 897             stack.push( { list: list, indent: m[1] } );
 898             return list;
 899           }
 900
 901
 902           var stack = [], // Stack of lists for nesting.
 903               list = make_list( m ),
 904               last_li,
 905               loose = false,
 906               ret = [ stack[0].list ],
 907               i;
 908
 909           // Loop to search over block looking for inner block elements and loose lists
 910           loose_search:
 911           while ( true ) {
 912             // Split into lines preserving new lines at end of line
 913             var lines = block.split( /(?=\n)/ );
 914
 915             // We have to grab all lines for a li and call processInline on them
 916             // once as there are some inline things that can span lines.
 917             var li_accumulate = "", nl = "";
 918
 919             // Loop over the lines in this block looking for tight lists.
 920             tight_search:
 921             for ( var line_no = 0; line_no < lines.length; line_no++ ) {
 922               nl = "";
 923               var l = lines[line_no].replace(/^\n/, function(n) { nl = n; return ""; });
 924
 925
 926               // TODO: really should cache this
 927               var line_re = regex_for_depth( stack.length );
 928
 929               m = l.match( line_re );
 930               //print( "line:", uneval(l), "\nline match:", uneval(m) );
 931
 932               // We have a list item
 933               if ( m[1] !== undefined ) {
 934                 // Process the previous list item, if any
 935                 if ( li_accumulate.length ) {
 936                   add( last_li, loose, this.processInline( li_accumulate ), nl );
 937                   // Loose mode will have been dealt with. Reset it
 938                   loose = false;
 939                   li_accumulate = "";
 940                 }
 941
 942                 m[1] = expand_tab( m[1] );
 943                 var wanted_depth = Math.floor(m[1].length/4)+1;
 944                 //print( "want:", wanted_depth, "stack:", stack.length);
 945                 if ( wanted_depth > stack.length ) {
 946                   // Deep enough for a nested list outright
 947                   //print ( "new nested list" );
 948                   list = make_list( m );
 949                   last_li.push( list );
 950                   last_li = list[1] = [ "listitem" ];
 951                 }
 952                 else {
 953                   // We aren't deep enough to be strictly a new level. This is
 954                   // where Md.pl goes nuts. If the indent matches a level in the
 955                   // stack, put it there, else put it one deeper then the
 956                   // wanted_depth deserves.
 957                   var found = false;
 958                   for ( i = 0; i < stack.length; i++ ) {
 959                     if ( stack[ i ].indent !== m[1] )
 960                       continue;
 961
 962                     list = stack[ i ].list;
 963                     stack.splice( i+1, stack.length - (i+1) );
 964                     found = true;
 965                     break;
 966                   }
 967
 968                   if (!found) {
 969                     //print("not found. l:", uneval(l));
 970                     wanted_depth++;
 971                     if ( wanted_depth <= stack.length ) {
 972                       stack.splice(wanted_depth, stack.length - wanted_depth);
 973                       //print("Desired depth now", wanted_depth, "stack:", stack.length);
 974                       list = stack[wanted_depth-1].list;
 975                       //print("list:", uneval(list) );
 976                     }
 977                     else {
 978                       //print ("made new stack for messy indent");
 979                       list = make_list(m);
 980                       last_li.push(list);
 981                     }
 982                   }
 983
 984                   //print( uneval(list), "last", list === stack[stack.length-1].list );
 985                   last_li = [ "listitem" ];
 986                   list.push(last_li);
 987                 } // end depth of shenegains
 988                 nl = "";
 989               }
 990
 991               // Add content
 992               if ( l.length > m[0].length )
 993                 li_accumulate += nl + l.substr( m[0].length );
 994             } // tight_search
 995
 996             if ( li_accumulate.length ) {
 997               add( last_li, loose, this.processInline( li_accumulate ), nl );
 998               // Loose mode will have been dealt with. Reset it
 999               loose = false;
1000               li_accumulate = "";
1001             }
1002
1003             // Look at the next block - we might have a loose list. Or an extra
1004             // paragraph for the current li
1005             var contained = get_contained_blocks( stack.length, next );
1006
1007             // Deal with code blocks or properly nested lists
1008             if ( contained.length > 0 ) {
1009               // Make sure all listitems up the stack are paragraphs
1010               forEach( stack, paragraphify, this);
1011
1012               last_li.push.apply( last_li, this.toTree( contained, [] ) );
1013             }
1014
1015             var next_block = next[0] && next[0].valueOf() || "";
1016
1017             if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) {
1018               block = next.shift();
1019
1020               // Check for an HR following a list: features/lists/hr_abutting
1021               var hr = this.dialect.block.horizRule( block, next );
1022
1023               if ( hr ) {
1024                 ret.push.apply(ret, hr);
1025                 break;
1026               }
1027
1028               // Make sure all listitems up the stack are paragraphs
1029               forEach( stack, paragraphify, this);
1030
1031               loose = true;
1032               continue loose_search;
1033             }
1034             break;
1035           } // loose_search
1036
1037           return ret;
1038         };
1039       })(),
1040
1041       blockquote: function blockquote( block, next ) {
1042         if ( !block.match( /^>/m ) )
1043           return undefined;
1044
1045         var jsonml = [];
1046
1047         // separate out the leading abutting block, if any. I.e. in this case:
1048         //
1049         //  a
1050         //  > b
1051         //
1052         if ( block[ 0 ] !== ">" ) {
1053           var lines = block.split( /\n/ ),
1054               prev = [],
1055               line_no = block.lineNumber;
1056
1057           // keep shifting lines until you find a crotchet
1058           while ( lines.length && lines[ 0 ][ 0 ] !== ">" ) {
1059             prev.push( lines.shift() );
1060             line_no++;
1061           }
1062
1063           var abutting = mk_block( prev.join( "\n" ), "\n", block.lineNumber );
1064           jsonml.push.apply( jsonml, this.processBlock( abutting, [] ) );
1065           // reassemble new block of just block quotes!
1066           block = mk_block( lines.join( "\n" ), block.trailing, line_no );
1067         }
1068
1069
1070         // if the next block is also a blockquote merge it in
1071         while ( next.length && next[ 0 ][ 0 ] === ">" ) {
1072           var b = next.shift();
1073           block = mk_block( block + block.trailing + b, b.trailing, block.lineNumber );
1074         }
1075
1076         // Strip off the leading "> " and re-process as a block.
1077         var input = block.replace( /^> ?/gm, "" ),
1078             old_tree = this.tree,
1079             processedBlock = this.toTree( input, [ "blockquote" ] ),
1080             attr = extract_attr( processedBlock );
1081
1082         // If any link references were found get rid of them
1083         if ( attr && attr.references ) {
1084           delete attr.references;
1085           // And then remove the attribute object if it's empty
1086           if ( isEmpty( attr ) )
1087             processedBlock.splice( 1, 1 );
1088         }
1089
1090         jsonml.push( processedBlock );
1091         return jsonml;
1092       },
1093
1094       referenceDefn: function referenceDefn( block, next) {
1095         var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/;
1096         // interesting matches are [ , ref_id, url, , title, title ]
1097
1098         if ( !block.match(re) )
1099           return undefined;
1100
1101         // make an attribute node if it doesn't exist
1102         if ( !extract_attr( this.tree ) )
1103           this.tree.splice( 1, 0, {} );
1104
1105         var attrs = extract_attr( this.tree );
1106
1107         // make a references hash if it doesn't exist
1108         if ( attrs.references === undefined )
1109           attrs.references = {};
1110
1111         var b = this.loop_re_over_block(re, block, function( m ) {
1112
1113           if ( m[2] && m[2][0] === "<" && m[2][m[2].length-1] === ">" )
1114             m[2] = m[2].substring( 1, m[2].length - 1 );
1115
1116           var ref = attrs.references[ m[1].toLowerCase() ] = {
1117             href: m[2]
1118           };
1119
1120           if ( m[4] !== undefined )
1121             ref.title = m[4];
1122           else if ( m[5] !== undefined )
1123             ref.title = m[5];
1124
1125         } );
1126
1127         if ( b.length )
1128           next.unshift( mk_block( b, block.trailing ) );
1129
1130         return [];
1131       },
1132
1133       para: function para( block ) {
1134         // everything's a para!
1135         return [ ["para"].concat( this.processInline( block ) ) ];
1136       }
1137     },
1138
1139     inline: {
1140
1141       __oneElement__: function oneElement( text, patterns_or_re, previous_nodes ) {
1142         var m,
1143             res;
1144
1145         patterns_or_re = patterns_or_re || this.dialect.inline.__patterns__;
1146         var re = new RegExp( "([\\s\\S]*?)(" + (patterns_or_re.source || patterns_or_re) + ")" );
1147
1148         m = re.exec( text );
1149         if (!m) {
1150           // Just boring text
1151           return [ text.length, text ];
1152         }
1153         else if ( m[1] ) {
1154           // Some un-interesting text matched. Return that first
1155           return [ m[1].length, m[1] ];
1156         }
1157
1158         var res;
1159         if ( m[2] in this.dialect.inline ) {
1160           res = this.dialect.inline[ m[2] ].call(
1161                     this,
1162                     text.substr( m.index ), m, previous_nodes || [] );
1163         }
1164         // Default for now to make dev easier. just slurp special and output it.
1165         res = res || [ m[2].length, m[2] ];
1166         return res;
1167       },
1168
1169       __call__: function inline( text, patterns ) {
1170
1171         var out = [],
1172             res;
1173
1174         function add(x) {
1175           //D:self.debug("  adding output", uneval(x));
1176           if ( typeof x === "string" && typeof out[out.length-1] === "string" )
1177             out[ out.length-1 ] += x;
1178           else
1179             out.push(x);
1180         }
1181
1182         while ( text.length > 0 ) {
1183           res = this.dialect.inline.__oneElement__.call(this, text, patterns, out );
1184           text = text.substr( res.shift() );
1185           forEach(res, add );
1186         }
1187
1188         return out;
1189       },
1190
1191       // These characters are intersting elsewhere, so have rules for them so that
1192       // chunks of plain text blocks don't include them
1193       "]": function () {},
1194       "}": function () {},
1195
1196       __escape__ : /^\\[\\`\*_{}\[\]()#\+.!\-]/,
1197
1198       "\\": function escaped( text ) {
1199         // [ length of input processed, node/children to add... ]
1200         // Only esacape: \ ` * _ { } [ ] ( ) # * + - . !
1201         if ( this.dialect.inline.__escape__.exec( text ) )
1202           return [ 2, text.charAt( 1 ) ];
1203         else
1204           // Not an esacpe
1205           return [ 1, "\\" ];
1206       },
1207
1208       "![": function image( text ) {
1209
1210         // Unlike images, alt text is plain text only. no other elements are
1211         // allowed in there
1212
1213         // ![Alt text](/path/to/img.jpg "Optional title")
1214         //      1          2            3       4         <--- captures
1215         var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*([^")]*?)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ );
1216
1217         if ( m ) {
1218           if ( m[2] && m[2][0] === "<" && m[2][m[2].length-1] === ">" )
1219             m[2] = m[2].substring( 1, m[2].length - 1 );
1220
1221           m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0];
1222
1223           var attrs = { alt: m[1], href: m[2] || "" };
1224           if ( m[4] !== undefined)
1225             attrs.title = m[4];
1226
1227           return [ m[0].length, [ "img", attrs ] ];
1228         }
1229
1230         // ![Alt text][id]
1231         m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ );
1232
1233         if ( m ) {
1234           // We can't check if the reference is known here as it likely wont be
1235           // found till after. Check it in md tree->hmtl tree conversion
1236           return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), original: m[0] } ] ];
1237         }
1238
1239         // Just consume the '!['
1240         return [ 2, "![" ];
1241       },
1242
1243       "[": function link( text ) {
1244
1245         var orig = String(text);
1246         // Inline content is possible inside `link text`
1247         var res = inline_until_char.call( this, text.substr(1), "]" );
1248
1249         // No closing ']' found. Just consume the [
1250         if ( !res )
1251           return [ 1, "[" ];
1252
1253         var consumed = 1 + res[ 0 ],
1254             children = res[ 1 ],
1255             link,
1256             attrs;
1257
1258         // At this point the first [...] has been parsed. See what follows to find
1259         // out which kind of link we are (reference or direct url)
1260         text = text.substr( consumed );
1261
1262         // [link text](/path/to/img.jpg "Optional title")
1263         //                 1            2       3         <--- captures
1264         // This will capture up to the last paren in the block. We then pull
1265         // back based on if there a matching ones in the url
1266         //    ([here](/url/(test))
1267         // The parens have to be balanced
1268         var m = text.match( /^\s*\([ \t]*([^"']*)(?:[ \t]+(["'])(.*?)\2)?[ \t]*\)/ );
1269         if ( m ) {
1270           var url = m[1];
1271           consumed += m[0].length;
1272
1273           if ( url && url[0] === "<" && url[url.length-1] === ">" )
1274             url = url.substring( 1, url.length - 1 );
1275
1276           // If there is a title we don't have to worry about parens in the url
1277           if ( !m[3] ) {
1278             var open_parens = 1; // One open that isn't in the capture
1279             for ( var len = 0; len < url.length; len++ ) {
1280               switch ( url[len] ) {
1281               case "(":
1282                 open_parens++;
1283                 break;
1284               case ")":
1285                 if ( --open_parens === 0) {
1286                   consumed -= url.length - len;
1287                   url = url.substring(0, len);
1288                 }
1289                 break;
1290               }
1291             }
1292           }
1293
1294           // Process escapes only
1295           url = this.dialect.inline.__call__.call( this, url, /\\/ )[0];
1296
1297           attrs = { href: url || "" };
1298           if ( m[3] !== undefined)
1299             attrs.title = m[3];
1300
1301           link = [ "link", attrs ].concat( children );
1302           return [ consumed, link ];
1303         }
1304
1305         // [Alt text][id]
1306         // [Alt text] [id]
1307         m = text.match( /^\s*\[(.*?)\]/ );
1308
1309         if ( m ) {
1310
1311           consumed += m[ 0 ].length;
1312
1313           // [links][] uses links as its reference
1314           attrs = { ref: ( m[ 1 ] || String(children) ).toLowerCase(),  original: orig.substr( 0, consumed ) };
1315
1316           link = [ "link_ref", attrs ].concat( children );
1317
1318           // We can't check if the reference is known here as it likely wont be
1319           // found till after. Check it in md tree->hmtl tree conversion.
1320           // Store the original so that conversion can revert if the ref isn't found.
1321           return [ consumed, link ];
1322         }
1323
1324         // [id]
1325         // Only if id is plain (no formatting.)
1326         if ( children.length === 1 && typeof children[0] === "string" ) {
1327
1328           attrs = { ref: children[0].toLowerCase(),  original: orig.substr( 0, consumed ) };
1329           link = [ "link_ref", attrs, children[0] ];
1330           return [ consumed, link ];
1331         }
1332
1333         // Just consume the "["
1334         return [ 1, "[" ];
1335       },
1336
1337
1338       "<": function autoLink( text ) {
1339         var m;
1340
1341         if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) !== null ) {
1342           if ( m[3] )
1343             return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ];
1344           else if ( m[2] === "mailto" )
1345             return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ];
1346           else
1347             return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ];
1348         }
1349
1350         return [ 1, "<" ];
1351       },
1352
1353       "`": function inlineCode( text ) {
1354         // Inline code block. as many backticks as you like to start it
1355         // Always skip over the opening ticks.
1356         var m = text.match( /(`+)(([\s\S]*?)\1)/ );
1357
1358         if ( m && m[2] )
1359           return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ];
1360         else {
1361           // TODO: No matching end code found - warn!
1362           return [ 1, "`" ];
1363         }
1364       },
1365
1366       "  \n": function lineBreak() {
1367         return [ 3, [ "linebreak" ] ];
1368       }
1369
1370     }
1371   };
1372
1373   // Meta Helper/generator method for em and strong handling
1374   function strong_em( tag, md ) {
1375
1376     var state_slot = tag + "_state",
1377         other_slot = tag === "strong" ? "em_state" : "strong_state";
1378
1379     function CloseTag(len) {
1380       this.len_after = len;
1381       this.name = "close_" + md;
1382     }
1383
1384     return function ( text ) {
1385
1386       if ( this[state_slot][0] === md ) {
1387         // Most recent em is of this type
1388         //D:this.debug("closing", md);
1389         this[state_slot].shift();
1390
1391         // "Consume" everything to go back to the recrusion in the else-block below
1392         return[ text.length, new CloseTag(text.length-md.length) ];
1393       }
1394       else {
1395         // Store a clone of the em/strong states
1396         var other = this[other_slot].slice(),
1397             state = this[state_slot].slice();
1398
1399         this[state_slot].unshift(md);
1400
1401         //D:this.debug_indent += "  ";
1402
1403         // Recurse
1404         var res = this.processInline( text.substr( md.length ) );
1405         //D:this.debug_indent = this.debug_indent.substr(2);
1406
1407         var last = res[res.length - 1];
1408
1409         //D:this.debug("processInline from", tag + ": ", uneval( res ) );
1410
1411         var check = this[state_slot].shift();
1412         if ( last instanceof CloseTag ) {
1413           res.pop();
1414           // We matched! Huzzah.
1415           var consumed = text.length - last.len_after;
1416           return [ consumed, [ tag ].concat(res) ];
1417         }
1418         else {
1419           // Restore the state of the other kind. We might have mistakenly closed it.
1420           this[other_slot] = other;
1421           this[state_slot] = state;
1422
1423           // We can't reuse the processed result as it could have wrong parsing contexts in it.
1424           return [ md.length, md ];
1425         }
1426       }
1427     }; // End returned function
1428   }
1429
1430   Gruber.inline["**"] = strong_em("strong", "**");
1431   Gruber.inline["__"] = strong_em("strong", "__");
1432   Gruber.inline["*"]  = strong_em("em", "*");
1433   Gruber.inline["_"]  = strong_em("em", "_");
1434
1435   Markdown.dialects.Gruber = Gruber;
1436   Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block );
1437   Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline );
1438
1439
1440
1441   var Maruku = DialectHelpers.subclassDialect( Gruber ),
1442       extract_attr = MarkdownHelpers.extract_attr,
1443       forEach = MarkdownHelpers.forEach;
1444
1445   Maruku.processMetaHash = function processMetaHash( meta_string ) {
1446     var meta = split_meta_hash( meta_string ),
1447         attr = {};
1448
1449     for ( var i = 0; i < meta.length; ++i ) {
1450       // id: #foo
1451       if ( /^#/.test( meta[ i ] ) )
1452         attr.id = meta[ i ].substring( 1 );
1453       // class: .foo
1454       else if ( /^\./.test( meta[ i ] ) ) {
1455         // if class already exists, append the new one
1456         if ( attr["class"] )
1457           attr["class"] = attr["class"] + meta[ i ].replace( /./, " " );
1458         else
1459           attr["class"] = meta[ i ].substring( 1 );
1460       }
1461       // attribute: foo=bar
1462       else if ( /\=/.test( meta[ i ] ) ) {
1463         var s = meta[ i ].split( /\=/ );
1464         attr[ s[ 0 ] ] = s[ 1 ];
1465       }
1466     }
1467
1468     return attr;
1469   };
1470
1471   function split_meta_hash( meta_string ) {
1472     var meta = meta_string.split( "" ),
1473         parts = [ "" ],
1474         in_quotes = false;
1475
1476     while ( meta.length ) {
1477       var letter = meta.shift();
1478       switch ( letter ) {
1479       case " " :
1480         // if we're in a quoted section, keep it
1481         if ( in_quotes )
1482           parts[ parts.length - 1 ] += letter;
1483         // otherwise make a new part
1484         else
1485           parts.push( "" );
1486         break;
1487       case "'" :
1488       case '"' :
1489         // reverse the quotes and move straight on
1490         in_quotes = !in_quotes;
1491         break;
1492       case "\\" :
1493         // shift off the next letter to be used straight away.
1494         // it was escaped so we'll keep it whatever it is
1495         letter = meta.shift();
1496         /* falls through */
1497       default :
1498         parts[ parts.length - 1 ] += letter;
1499         break;
1500       }
1501     }
1502
1503     return parts;
1504   }
1505
1506   Maruku.block.document_meta = function document_meta( block ) {
1507     // we're only interested in the first block
1508     if ( block.lineNumber > 1 )
1509       return undefined;
1510
1511     // document_meta blocks consist of one or more lines of `Key: Value\n`
1512     if ( ! block.match( /^(?:\w+:.*\n)*\w+:.*$/ ) )
1513       return undefined;
1514
1515     // make an attribute node if it doesn't exist
1516     if ( !extract_attr( this.tree ) )
1517       this.tree.splice( 1, 0, {} );
1518
1519     var pairs = block.split( /\n/ );
1520     for ( var p in pairs ) {
1521       var m = pairs[ p ].match( /(\w+):\s*(.*)$/ ),
1522           key = m[ 1 ].toLowerCase(),
1523           value = m[ 2 ];
1524
1525       this.tree[ 1 ][ key ] = value;
1526     }
1527
1528     // document_meta produces no content!
1529     return [];
1530   };
1531
1532   Maruku.block.block_meta = function block_meta( block ) {
1533     // check if the last line of the block is an meta hash
1534     var m = block.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ );
1535     if ( !m )
1536       return undefined;
1537
1538     // process the meta hash
1539     var attr = this.dialect.processMetaHash( m[ 2 ] ),
1540         hash;
1541
1542     // if we matched ^ then we need to apply meta to the previous block
1543     if ( m[ 1 ] === "" ) {
1544       var node = this.tree[ this.tree.length - 1 ];
1545       hash = extract_attr( node );
1546
1547       // if the node is a string (rather than JsonML), bail
1548       if ( typeof node === "string" )
1549         return undefined;
1550
1551       // create the attribute hash if it doesn't exist
1552       if ( !hash ) {
1553         hash = {};
1554         node.splice( 1, 0, hash );
1555       }
1556
1557       // add the attributes in
1558       for ( var a in attr )
1559         hash[ a ] = attr[ a ];
1560
1561       // return nothing so the meta hash is removed
1562       return [];
1563     }
1564
1565     // pull the meta hash off the block and process what's left
1566     var b = block.replace( /\n.*$/, "" ),
1567         result = this.processBlock( b, [] );
1568
1569     // get or make the attributes hash
1570     hash = extract_attr( result[ 0 ] );
1571     if ( !hash ) {
1572       hash = {};
1573       result[ 0 ].splice( 1, 0, hash );
1574     }
1575
1576     // attach the attributes to the block
1577     for ( var a in attr )
1578       hash[ a ] = attr[ a ];
1579
1580     return result;
1581   };
1582
1583   Maruku.block.definition_list = function definition_list( block, next ) {
1584     // one or more terms followed by one or more definitions, in a single block
1585     var tight = /^((?:[^\s:].*\n)+):\s+([\s\S]+)$/,
1586         list = [ "dl" ],
1587         i, m;
1588
1589     // see if we're dealing with a tight or loose block
1590     if ( ( m = block.match( tight ) ) ) {
1591       // pull subsequent tight DL blocks out of `next`
1592       var blocks = [ block ];
1593       while ( next.length && tight.exec( next[ 0 ] ) )
1594         blocks.push( next.shift() );
1595
1596       for ( var b = 0; b < blocks.length; ++b ) {
1597         var m = blocks[ b ].match( tight ),
1598             terms = m[ 1 ].replace( /\n$/, "" ).split( /\n/ ),
1599             defns = m[ 2 ].split( /\n:\s+/ );
1600
1601         // print( uneval( m ) );
1602
1603         for ( i = 0; i < terms.length; ++i )
1604           list.push( [ "dt", terms[ i ] ] );
1605
1606         for ( i = 0; i < defns.length; ++i ) {
1607           // run inline processing over the definition
1608           list.push( [ "dd" ].concat( this.processInline( defns[ i ].replace( /(\n)\s+/, "$1" ) ) ) );
1609         }
1610       }
1611     }
1612     else {
1613       return undefined;
1614     }
1615
1616     return [ list ];
1617   };
1618
1619   // splits on unescaped instances of @ch. If @ch is not a character the result
1620   // can be unpredictable
1621
1622   Maruku.block.table = function table ( block ) {
1623
1624     var _split_on_unescaped = function( s, ch ) {
1625       ch = ch || '\\s';
1626       if ( ch.match(/^[\\|\[\]{}?*.+^$]$/) )
1627         ch = '\\' + ch;
1628       var res = [ ],
1629           r = new RegExp('^((?:\\\\.|[^\\\\' + ch + '])*)' + ch + '(.*)'),
1630           m;
1631       while ( ( m = s.match( r ) ) ) {
1632         res.push( m[1] );
1633         s = m[2];
1634       }
1635       res.push(s);
1636       return res;
1637     };
1638
1639     var leading_pipe = /^ {0,3}\|(.+)\n {0,3}\|\s*([\-:]+[\-| :]*)\n((?:\s*\|.*(?:\n|$))*)(?=\n|$)/,
1640         // find at least an unescaped pipe in each line
1641         no_leading_pipe = /^ {0,3}(\S(?:\\.|[^\\|])*\|.*)\n {0,3}([\-:]+\s*\|[\-| :]*)\n((?:(?:\\.|[^\\|])*\|.*(?:\n|$))*)(?=\n|$)/,
1642         i,
1643         m;
1644     if ( ( m = block.match( leading_pipe ) ) ) {
1645       // remove leading pipes in contents
1646       // (header and horizontal rule already have the leading pipe left out)
1647       m[3] = m[3].replace(/^\s*\|/gm, '');
1648     } else if ( ! ( m = block.match( no_leading_pipe ) ) ) {
1649       return undefined;
1650     }
1651
1652     var table = [ "table", [ "thead", [ "tr" ] ], [ "tbody" ] ];
1653
1654     // remove trailing pipes, then split on pipes
1655     // (no escaped pipes are allowed in horizontal rule)
1656     m[2] = m[2].replace(/\|\s*$/, '').split('|');
1657
1658     // process alignment
1659     var html_attrs = [ ];
1660     forEach (m[2], function (s) {
1661       if (s.match(/^\s*-+:\s*$/))
1662         html_attrs.push({align: "right"});
1663       else if (s.match(/^\s*:-+\s*$/))
1664         html_attrs.push({align: "left"});
1665       else if (s.match(/^\s*:-+:\s*$/))
1666         html_attrs.push({align: "center"});
1667       else
1668         html_attrs.push({});
1669     });
1670
1671     // now for the header, avoid escaped pipes
1672     m[1] = _split_on_unescaped(m[1].replace(/\|\s*$/, ''), '|');
1673     for (i = 0; i < m[1].length; i++) {
1674       table[1][1].push(['th', html_attrs[i] || {}].concat(
1675         this.processInline(m[1][i].trim())));
1676     }
1677
1678     // now for body contents
1679     forEach (m[3].replace(/\|\s*$/mg, '').split('\n'), function (row) {
1680       var html_row = ['tr'];
1681       row = _split_on_unescaped(row, '|');
1682       for (i = 0; i < row.length; i++)
1683         html_row.push(['td', html_attrs[i] || {}].concat(this.processInline(row[i].trim())));
1684       table[2].push(html_row);
1685     }, this);
1686
1687     return [table];
1688   };
1689
1690   Maruku.inline[ "{:" ] = function inline_meta( text, matches, out ) {
1691     if ( !out.length )
1692       return [ 2, "{:" ];
1693
1694     // get the preceeding element
1695     var before = out[ out.length - 1 ];
1696
1697     if ( typeof before === "string" )
1698       return [ 2, "{:" ];
1699
1700     // match a meta hash
1701     var m = text.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ );
1702
1703     // no match, false alarm
1704     if ( !m )
1705       return [ 2, "{:" ];
1706
1707     // attach the attributes to the preceeding element
1708     var meta = this.dialect.processMetaHash( m[ 1 ] ),
1709         attr = extract_attr( before );
1710
1711     if ( !attr ) {
1712       attr = {};
1713       before.splice( 1, 0, attr );
1714     }
1715
1716     for ( var k in meta )
1717       attr[ k ] = meta[ k ];
1718
1719     // cut out the string and replace it with nothing
1720     return [ m[ 0 ].length, "" ];
1721   };
1722
1723
1724   Markdown.dialects.Maruku = Maruku;
1725   Markdown.dialects.Maruku.inline.__escape__ = /^\\[\\`\*_{}\[\]()#\+.!\-|:]/;
1726   Markdown.buildBlockOrder ( Markdown.dialects.Maruku.block );
1727   Markdown.buildInlinePatterns( Markdown.dialects.Maruku.inline );
1728
1729
1730 // Include all our depndencies and;
1731   expose.Markdown = Markdown;
1732   expose.parse = Markdown.parse;
1733   expose.toHTML = Markdown.toHTML;
1734   expose.toHTMLTree = Markdown.toHTMLTree;
1735   expose.renderJsonML = Markdown.renderJsonML;
1736
1737 })(function() {
1738   window.markdown = {};
1739   return window.markdown;
1740 }());