1 /** 2 Markdown parser implementation 3 4 Copyright: © 2012-2019 Sönke Ludwig 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module vibe.textfilter.markdown; 9 10 import vibe.core.log; 11 import vibe.textfilter.html; 12 import vibe.utils.string; 13 14 import std.algorithm : canFind, countUntil, min; 15 import std.array; 16 import std.format; 17 import std.range; 18 import std.string; 19 20 /* 21 TODO: 22 detect inline HTML tags 23 */ 24 25 version(MarkdownTest) 26 { 27 int main() 28 { 29 import std.file; 30 setLogLevel(LogLevel.Trace); 31 auto text = readText("test.txt"); 32 auto result = appender!string(); 33 filterMarkdown(result, text); 34 foreach( ln; splitLines(result.data) ) 35 logInfo(ln); 36 return 0; 37 } 38 } 39 40 /** Returns a Markdown filtered HTML string. 41 */ 42 string filterMarkdown()(string str, MarkdownFlags flags) 43 @trusted { // scope class is not @safe for DMD 2.072 44 scope settings = new MarkdownSettings; 45 settings.flags = flags; 46 return filterMarkdown(str, settings); 47 } 48 /// ditto 49 string filterMarkdown()(string str, scope MarkdownSettings settings = null) 50 @trusted { // Appender not @safe as of 2.065 51 auto dst = appender!string(); 52 filterMarkdown(dst, str, settings); 53 return dst.data; 54 } 55 56 57 /** Markdown filters the given string and writes the corresponding HTML to an output range. 58 */ 59 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags) 60 { 61 scope settings = new MarkdownSettings; 62 settings.flags = flags; 63 filterMarkdown(dst, src, settings); 64 } 65 /// ditto 66 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null) 67 { 68 if (!settings) settings = new MarkdownSettings; 69 70 auto all_lines = splitLines(src); 71 auto links = scanForReferences(all_lines); 72 auto lines = parseLines(all_lines, settings); 73 Block root_block; 74 parseBlocks(root_block, lines, null, settings); 75 writeBlock(dst, root_block, links, settings); 76 } 77 78 /** 79 Returns the hierarchy of sections 80 */ 81 Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null) 82 { 83 import std.conv : to; 84 85 if (!settings) settings = new MarkdownSettings; 86 auto all_lines = splitLines(markdown_source); 87 auto lines = parseLines(all_lines, settings); 88 Block root_block; 89 parseBlocks(root_block, lines, null, settings); 90 Section root; 91 92 foreach (ref sb; root_block.blocks) { 93 if (sb.type == BlockType.header) { 94 auto s = &root; 95 while (true) { 96 if (s.subSections.length == 0) break; 97 if (s.subSections[$-1].headingLevel >= sb.headerLevel) break; 98 s = &s.subSections[$-1]; 99 } 100 s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string); 101 } 102 } 103 104 return root.subSections; 105 } 106 107 /// 108 unittest { 109 import std.conv : to; 110 assert (getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") == 111 [ 112 Section(2, " first", "first"), 113 Section(2, " second", "second", [ 114 Section(3, " third", "third") 115 ]), 116 Section(1, " fourth", "fourth", [ 117 Section(3, " fifth", "fifth") 118 ]) 119 ] 120 ); 121 } 122 123 final class MarkdownSettings { 124 /// Controls the capabilities of the parser. 125 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 126 127 /// Heading tags will start at this level. 128 size_t headingBaseLevel = 1; 129 130 /// Called for every link/image URL to perform arbitrary transformations. 131 string delegate(string url_or_path, bool is_image) urlFilter; 132 133 /// White list of URI schemas that can occur in link/image targets 134 string[] allowedURISchemas = ["http", "https", "ftp", "mailto"]; 135 } 136 137 enum MarkdownFlags { 138 /** Same as `vanillaMarkdown` 139 */ 140 none = 0, 141 142 /** Convert line breaks into hard line breaks in the output 143 144 This option is useful when operating on text that may be formatted as 145 plain text, without having Markdown in mind, while still improving 146 the appearance of the text in many cases. A common example would be 147 to format e-mails or newsgroup posts. 148 */ 149 keepLineBreaks = 1<<0, 150 151 /** Support fenced code blocks. 152 */ 153 backtickCodeBlocks = 1<<1, 154 155 /** Disable support for embedded HTML 156 */ 157 noInlineHtml = 1<<2, 158 //noLinks = 1<<3, 159 //allowUnsafeHtml = 1<<4, 160 161 /** Support table definitions 162 163 The syntax is based on Markdown Extra and GitHub flavored Markdown. 164 */ 165 tables = 1<<5, 166 167 /** Support HTML attributes after links 168 169 Links or images directly followed by `{ … }` allow regular HTML 170 attributes to added to the generated HTML element. 171 */ 172 attributes = 1<<6, 173 174 /** Recognize figure definitions 175 176 Figures can be defined using a modified list syntax: 177 178 ``` 179 - %%% 180 This is the figure content 181 182 - ### 183 This is optional caption content 184 ``` 185 186 Just like for lists, arbitrary blocks can be nested within figure and 187 figure caption blocks. If only a single paragraph is present within a 188 figure caption block, the paragraph text will be emitted without the 189 surrounding `<p>` tags. The same is true for figure blocks that contain 190 only a single paragraph and any number of additional figure caption 191 blocks. 192 */ 193 figures = 1<<7, 194 195 /** Support only standard Markdown features 196 197 Note that the parser is not fully CommonMark compliant at the moment, 198 but this is the general idea behind this option. 199 */ 200 vanillaMarkdown = none, 201 202 /** Default set of flags suitable for use within an online forum 203 */ 204 forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml|tables 205 } 206 207 struct Section { 208 size_t headingLevel; 209 string caption; 210 string anchor; 211 Section[] subSections; 212 } 213 214 private { 215 immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; 216 } 217 218 private enum IndentType { 219 white, 220 quote 221 } 222 223 private enum LineType { 224 undefined, 225 blank, 226 plain, 227 hline, 228 atxHeader, 229 setextHeader, 230 tableSeparator, 231 uList, 232 oList, 233 figure, 234 figureCaption, 235 htmlBlock, 236 codeBlockDelimiter 237 } 238 239 private struct Line { 240 LineType type; 241 IndentType[] indent; 242 string text; 243 string unindented; 244 245 string unindent(size_t n) 246 pure @safe { 247 assert (n <= indent.length); 248 string ln = text; 249 foreach (i; 0 .. n) { 250 final switch(indent[i]){ 251 case IndentType.white: 252 if (ln[0] == ' ') ln = ln[4 .. $]; 253 else ln = ln[1 .. $]; 254 break; 255 case IndentType.quote: 256 ln = ln.stripLeft()[1 .. $]; 257 if (ln.startsWith(' ')) 258 ln.popFront(); 259 break; 260 } 261 } 262 return ln; 263 } 264 } 265 266 private Line[] parseLines(string[] lines, scope MarkdownSettings settings) 267 pure @safe { 268 Line[] ret; 269 while( !lines.empty ){ 270 auto ln = lines.front; 271 lines.popFront(); 272 273 Line lninfo; 274 lninfo.text = ln; 275 276 while (ln.length > 0) { 277 if (ln[0] == '\t') { 278 lninfo.indent ~= IndentType.white; 279 ln.popFront(); 280 } else if (ln.startsWith(" ")) { 281 lninfo.indent ~= IndentType.white; 282 ln.popFrontN(4); 283 } else { 284 if (ln.stripLeft().startsWith(">")) { 285 lninfo.indent ~= IndentType.quote; 286 ln = ln.stripLeft(); 287 ln.popFront(); 288 if (ln.startsWith(' ')) 289 ln.popFront(); 290 } else break; 291 } 292 } 293 lninfo.unindented = ln; 294 295 if ((settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) 296 lninfo.type = LineType.codeBlockDelimiter; 297 else if(isAtxHeaderLine(ln)) lninfo.type = LineType.atxHeader; 298 else if(isSetextHeaderLine(ln)) lninfo.type = LineType.setextHeader; 299 else if((settings.flags & MarkdownFlags.tables) && isTableSeparatorLine(ln)) 300 lninfo.type = LineType.tableSeparator; 301 else if(isHlineLine(ln)) lninfo.type = LineType.hline; 302 else if(isOListLine(ln)) lninfo.type = LineType.oList; 303 else if(isUListLine(ln)) { 304 if (settings.flags & MarkdownFlags.figures) { 305 auto suff = removeListPrefix(ln, LineType.uList); 306 if (suff == "%%%") lninfo.type = LineType.figure; 307 else if (suff == "###") lninfo.type = LineType.figureCaption; 308 else lninfo.type = LineType.uList; 309 } else lninfo.type = LineType.uList; 310 } else if(isLineBlank(ln)) lninfo.type = LineType.blank; 311 else if(!(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln)) 312 lninfo.type = LineType.htmlBlock; 313 else lninfo.type = LineType.plain; 314 315 ret ~= lninfo; 316 } 317 return ret; 318 } 319 320 unittest { 321 import std.conv : to; 322 auto s = new MarkdownSettings; 323 s.flags = MarkdownFlags.forumDefault; 324 auto lns = [">```D"]; 325 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); 326 lns = ["> ```D"]; 327 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); 328 lns = ["> ```D"]; 329 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], " ```D")]); 330 lns = ["> ```D"]; 331 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote, IndentType.white], lns[0], "```D")]); 332 lns = [">test"]; 333 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); 334 lns = ["> test"]; 335 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); 336 lns = ["> test"]; 337 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], " test")]); 338 lns = ["> test"]; 339 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote, IndentType.white], lns[0], "test")]); 340 } 341 342 private enum BlockType { 343 plain, 344 text, 345 paragraph, 346 header, 347 table, 348 oList, 349 uList, 350 listItem, 351 code, 352 quote, 353 figure, 354 figureCaption 355 } 356 357 private struct Block { 358 BlockType type; 359 Attribute[] attributes; 360 string[] text; 361 Block[] blocks; 362 size_t headerLevel; 363 Alignment[] columns; 364 } 365 366 private struct Attribute { 367 string attribute; 368 string value; 369 } 370 371 private enum Alignment { 372 none = 0, 373 left = 1<<0, 374 right = 1<<1, 375 center = left | right 376 } 377 378 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings) 379 pure @safe { 380 import std.conv : to; 381 import std.algorithm.comparison : among; 382 383 if (base_indent.length == 0) root.type = BlockType.text; 384 else if (base_indent[$-1] == IndentType.quote) root.type = BlockType.quote; 385 386 while (!lines.empty) { 387 auto ln = lines.front; 388 389 if (ln.type == LineType.blank) { 390 lines.popFront(); 391 continue; 392 } 393 394 if (ln.indent != base_indent) { 395 if (ln.indent.length < base_indent.length 396 || ln.indent[0 .. base_indent.length] != base_indent) 397 { 398 return; 399 } 400 401 auto cindent = base_indent ~ IndentType.white; 402 if (ln.indent == cindent) { 403 Block cblock; 404 cblock.type = BlockType.code; 405 while (!lines.empty && (lines.front.unindented.strip.empty 406 || lines.front.indent.length >= cindent.length 407 && lines.front.indent[0 .. cindent.length] == cindent)) 408 { 409 cblock.text ~= lines.front.indent.length >= cindent.length 410 ? lines.front.unindent(cindent.length) : ""; 411 lines.popFront(); 412 } 413 root.blocks ~= cblock; 414 } else { 415 Block subblock; 416 parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings); 417 root.blocks ~= subblock; 418 } 419 } else { 420 Block b; 421 final switch (ln.type) { 422 case LineType.undefined: assert (false); 423 case LineType.blank: assert (false); 424 case LineType.plain: 425 if (lines.length >= 2 && lines[1].type == LineType.setextHeader) { 426 auto setln = lines[1].unindented; 427 b.type = BlockType.header; 428 b.text = [ln.unindented]; 429 if (settings.flags & MarkdownFlags.attributes) 430 parseAttributeString(skipAttributes(b.text[0]), b.attributes); 431 if (!b.attributes.canFind!(a => a.attribute == "id")) 432 b.attributes ~= Attribute("id", asSlug(b.text[0]).to!string); 433 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 434 lines.popFrontN(2); 435 } else if (lines.length >= 2 && lines[1].type == LineType.tableSeparator 436 && ln.unindented.indexOf('|') >= 0) 437 { 438 auto setln = lines[1].unindented; 439 b.type = BlockType.table; 440 b.text = [ln.unindented]; 441 foreach (c; getTableColumns(setln)) { 442 Alignment a = Alignment.none; 443 if (c.startsWith(':')) a |= Alignment.left; 444 if (c.endsWith(':')) a |= Alignment.right; 445 b.columns ~= a; 446 } 447 448 lines.popFrontN(2); 449 while (!lines.empty && lines[0].unindented.indexOf('|') >= 0) { 450 b.text ~= lines.front.unindented; 451 lines.popFront(); 452 } 453 } else { 454 b.type = BlockType.paragraph; 455 b.text = skipText(lines, base_indent); 456 } 457 break; 458 case LineType.hline: 459 b.type = BlockType.plain; 460 b.text = ["<hr>"]; 461 lines.popFront(); 462 break; 463 case LineType.atxHeader: 464 b.type = BlockType.header; 465 string hl = ln.unindented; 466 b.headerLevel = 0; 467 while (hl.length > 0 && hl[0] == '#') { 468 b.headerLevel++; 469 hl = hl[1 .. $]; 470 } 471 472 if (settings.flags & MarkdownFlags.attributes) 473 parseAttributeString(skipAttributes(hl), b.attributes); 474 if (!b.attributes.canFind!(a => a.attribute == "id")) 475 b.attributes ~= Attribute("id", asSlug(hl).to!string); 476 477 while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ')) 478 hl = hl[0 .. $-1]; 479 b.text = [hl]; 480 lines.popFront(); 481 break; 482 case LineType.setextHeader: 483 lines.popFront(); 484 break; 485 case LineType.tableSeparator: 486 lines.popFront(); 487 break; 488 case LineType.figure: 489 case LineType.figureCaption: 490 b.type = ln.type == LineType.figure 491 ? BlockType.figure : BlockType.figureCaption; 492 493 auto itemindent = base_indent ~ IndentType.white; 494 lines.popFront(); 495 parseBlocks(b, lines, itemindent, settings); 496 break; 497 case LineType.uList: 498 case LineType.oList: 499 b.type = ln.type == LineType.uList ? BlockType.uList : BlockType.oList; 500 501 auto itemindent = base_indent ~ IndentType.white; 502 bool firstItem = true, paraMode = false; 503 while (!lines.empty && lines.front.type == ln.type 504 && lines.front.indent == base_indent) 505 { 506 Block itm; 507 itm.text = skipText(lines, itemindent); 508 itm.text[0] = removeListPrefix(itm.text[0], ln.type); 509 510 // emit <p>...</p> if there are blank lines between the items 511 if (firstItem && !lines.empty && lines.front.type == LineType.blank) { 512 lines.popFront(); 513 if (!lines.empty && lines.front.type == ln.type) 514 paraMode = true; 515 } 516 firstItem = false; 517 if (paraMode) { 518 Block para; 519 para.type = BlockType.paragraph; 520 para.text = itm.text; 521 itm.blocks ~= para; 522 itm.text = null; 523 } 524 525 parseBlocks(itm, lines, itemindent, settings); 526 itm.type = BlockType.listItem; 527 b.blocks ~= itm; 528 } 529 break; 530 case LineType.htmlBlock: 531 int nestlevel = 0; 532 auto starttag = parseHtmlBlockLine(ln.unindented); 533 if (!starttag.isHtmlBlock || !starttag.open) 534 break; 535 536 b.type = BlockType.plain; 537 while (!lines.empty) { 538 if (lines.front.indent.length < base_indent.length) 539 break; 540 if (lines.front.indent[0 .. base_indent.length] != base_indent) 541 break; 542 543 auto str = lines.front.unindent(base_indent.length); 544 auto taginfo = parseHtmlBlockLine(str); 545 b.text ~= lines.front.unindent(base_indent.length); 546 lines.popFront(); 547 if (taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName) 548 nestlevel += taginfo.open ? 1 : -1; 549 if (nestlevel <= 0) break; 550 } 551 break; 552 case LineType.codeBlockDelimiter: 553 lines.popFront(); // TODO: get language from line 554 b.type = BlockType.code; 555 while (!lines.empty) { 556 if (lines.front.indent.length < base_indent.length) 557 break; 558 if (lines.front.indent[0 .. base_indent.length] != base_indent) 559 break; 560 if (lines.front.type == LineType.codeBlockDelimiter) { 561 lines.popFront(); 562 break; 563 } 564 b.text ~= lines.front.unindent(base_indent.length); 565 lines.popFront(); 566 } 567 break; 568 } 569 root.blocks ~= b; 570 } 571 } 572 } 573 574 575 private string[] skipText(ref Line[] lines, IndentType[] indent) 576 pure @safe { 577 static bool matchesIndent(IndentType[] indent, IndentType[] base_indent) 578 { 579 if (indent.length > base_indent.length) return false; 580 if (indent != base_indent[0 .. indent.length]) return false; 581 sizediff_t qidx = -1; 582 foreach_reverse (i, tp; base_indent) 583 if (tp == IndentType.quote) { 584 qidx = i; 585 break; 586 } 587 if (qidx >= 0) { 588 qidx = base_indent.length-1 - qidx; 589 if( indent.length <= qidx ) return false; 590 } 591 return true; 592 } 593 594 // return value is used in variables that don't get bounds checks on the 595 // first element, so we should return at least one 596 if (lines.empty) 597 return [""]; 598 599 string[] ret; 600 601 while (true) { 602 ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length)); 603 lines.popFront(); 604 605 if (lines.empty || !matchesIndent(lines.front.indent, indent) 606 || lines.front.type != LineType.plain) 607 { 608 return ret; 609 } 610 } 611 } 612 613 /// private 614 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings) 615 { 616 final switch (block.type) { 617 case BlockType.plain: 618 foreach (ln; block.text) { 619 put(dst, ln); 620 put(dst, "\n"); 621 } 622 foreach (b; block.blocks) 623 writeBlock(dst, b, links, settings); 624 break; 625 case BlockType.text: 626 writeMarkdownEscaped(dst, block, links, settings); 627 foreach (b; block.blocks) 628 writeBlock(dst, b, links, settings); 629 break; 630 case BlockType.paragraph: 631 assert (block.blocks.length == 0); 632 put(dst, "<p>"); 633 writeMarkdownEscaped(dst, block, links, settings); 634 put(dst, "</p>\n"); 635 break; 636 case BlockType.header: 637 assert (block.blocks.length == 0); 638 assert (block.text.length == 1); 639 auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0); 640 dst.writeTag(block.attributes, "h", hlvl); 641 writeMarkdownEscaped(dst, block.text[0], links, settings); 642 dst.formattedWrite("</h%s>\n", hlvl); 643 break; 644 case BlockType.table: 645 import std.algorithm.iteration : splitter; 646 647 static string[Alignment.max+1] alstr = ["", " align=\"left\"", " align=\"right\"", " align=\"center\""]; 648 649 put(dst, "<table>\n"); 650 put(dst, "<tr>"); 651 size_t i = 0; 652 foreach (col; block.text[0].getTableColumns()) { 653 put(dst, "<th"); 654 put(dst, alstr[block.columns[i]]); 655 put(dst, '>'); 656 dst.writeMarkdownEscaped(col, links, settings); 657 put(dst, "</th>"); 658 if (i + 1 < block.columns.length) 659 i++; 660 } 661 put(dst, "</tr>\n"); 662 foreach (ln; block.text[1 .. $]) { 663 put(dst, "<tr>"); 664 i = 0; 665 foreach (col; ln.getTableColumns()) { 666 put(dst, "<td"); 667 put(dst, alstr[block.columns[i]]); 668 put(dst, '>'); 669 dst.writeMarkdownEscaped(col, links, settings); 670 put(dst, "</td>"); 671 if (i + 1 < block.columns.length) 672 i++; 673 } 674 put(dst, "</tr>\n"); 675 } 676 put(dst, "</table>\n"); 677 break; 678 case BlockType.oList: 679 put(dst, "<ol>\n"); 680 foreach (b; block.blocks) 681 writeBlock(dst, b, links, settings); 682 put(dst, "</ol>\n"); 683 break; 684 case BlockType.uList: 685 put(dst, "<ul>\n"); 686 foreach (b; block.blocks) 687 writeBlock(dst, b, links, settings); 688 put(dst, "</ul>\n"); 689 break; 690 case BlockType.listItem: 691 put(dst, "<li>"); 692 writeMarkdownEscaped(dst, block, links, settings); 693 foreach (b; block.blocks) 694 writeBlock(dst, b, links, settings); 695 put(dst, "</li>\n"); 696 break; 697 case BlockType.code: 698 assert (block.blocks.length == 0); 699 put(dst, "<pre class=\"prettyprint\"><code>"); 700 foreach (ln; block.text) { 701 filterHTMLEscape(dst, ln); 702 put(dst, "\n"); 703 } 704 put(dst, "</code></pre>\n"); 705 break; 706 case BlockType.quote: 707 put(dst, "<blockquote>"); 708 writeMarkdownEscaped(dst, block, links, settings); 709 foreach (b; block.blocks) 710 writeBlock(dst, b, links, settings); 711 put(dst, "</blockquote>\n"); 712 break; 713 case BlockType.figure: 714 put(dst, "<figure>"); 715 bool omit_para = block.blocks.count!(b => b.type != BlockType.figureCaption) == 1; 716 foreach (b; block.blocks) { 717 if (b.type == BlockType.paragraph && omit_para) { 718 writeMarkdownEscaped(dst, b, links, settings); 719 } else writeBlock(dst, b, links, settings); 720 } 721 put(dst, "</figure>\n"); 722 break; 723 case BlockType.figureCaption: 724 put(dst, "<figcaption>"); 725 if (block.blocks.length == 1 && block.blocks[0].type == BlockType.paragraph) { 726 writeMarkdownEscaped(dst, block.blocks[0], links, settings); 727 } else { 728 foreach (b; block.blocks) 729 writeBlock(dst, b, links, settings); 730 } 731 put(dst, "</figcaption>\n"); 732 break; 733 } 734 } 735 736 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings) 737 { 738 auto lines = () @trusted { return cast(string[])block.text; } (); 739 auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n"); 740 writeMarkdownEscaped(dst, text, links, settings); 741 if (lines.length) put(dst, "\n"); 742 } 743 744 /// private 745 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings) 746 { 747 bool isAllowedURI(string lnk) { 748 auto idx = lnk.indexOf('/'); 749 auto cidx = lnk.indexOf(':'); 750 // always allow local URIs 751 if (cidx < 0 || idx >= 0 && cidx > idx) return true; 752 return settings.allowedURISchemas.canFind(lnk[0 .. cidx]); 753 } 754 755 string filterLink(string lnk, bool is_image) { 756 if (isAllowedURI(lnk)) 757 return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk; 758 return "#"; // replace link with unknown schema with dummy URI 759 } 760 761 bool br = ln.endsWith(" "); 762 while (ln.length > 0) { 763 switch (ln[0]) { 764 default: 765 put(dst, ln[0]); 766 ln = ln[1 .. $]; 767 break; 768 case '\\': 769 if (ln.length >= 2) { 770 switch (ln[1]) { 771 default: 772 put(dst, ln[0 .. 2]); 773 ln = ln[2 .. $]; 774 break; 775 case '\'', '`', '*', '_', '{', '}', '[', ']', 776 '(', ')', '#', '+', '-', '.', '!': 777 put(dst, ln[1]); 778 ln = ln[2 .. $]; 779 break; 780 } 781 } else { 782 put(dst, ln[0]); 783 ln = ln[1 .. $]; 784 } 785 break; 786 case '_': 787 case '*': 788 string text; 789 if (auto em = parseEmphasis(ln, text)) { 790 put(dst, em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>"); 791 put(dst, text); 792 put(dst, em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>"); 793 } else { 794 put(dst, ln[0]); 795 ln = ln[1 .. $]; 796 } 797 break; 798 case '`': 799 string code; 800 if (parseInlineCode(ln, code)) { 801 put(dst, "<code class=\"prettyprint\">"); 802 filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal); 803 put(dst, "</code>"); 804 } else { 805 put(dst, ln[0]); 806 ln = ln[1 .. $]; 807 } 808 break; 809 case '[': 810 Link link; 811 Attribute[] attributes; 812 if (parseLink(ln, link, linkrefs, 813 settings.flags & MarkdownFlags.attributes ? &attributes : null)) 814 { 815 attributes ~= Attribute("href", filterLink(link.url, false)); 816 if (link.title.length) 817 attributes ~= Attribute("title", link.title); 818 dst.writeTag(attributes, "a"); 819 writeMarkdownEscaped(dst, link.text, linkrefs, settings); 820 put(dst, "</a>"); 821 } else { 822 put(dst, ln[0]); 823 ln = ln[1 .. $]; 824 } 825 break; 826 case '!': 827 Link link; 828 Attribute[] attributes; 829 if (parseLink(ln, link, linkrefs, 830 settings.flags & MarkdownFlags.attributes ? &attributes : null)) 831 { 832 attributes ~= Attribute("src", filterLink(link.url, true)); 833 attributes ~= Attribute("alt", link.text); 834 if (link.title.length) 835 attributes ~= Attribute("title", link.title); 836 dst.writeTag(attributes, "img"); 837 } else if( ln.length >= 2 ){ 838 put(dst, ln[0 .. 2]); 839 ln = ln[2 .. $]; 840 } else { 841 put(dst, ln[0]); 842 ln = ln[1 .. $]; 843 } 844 break; 845 case '>': 846 if (settings.flags & MarkdownFlags.noInlineHtml) put(dst, ">"); 847 else put(dst, ln[0]); 848 ln = ln[1 .. $]; 849 break; 850 case '<': 851 string url; 852 if (parseAutoLink(ln, url)) { 853 bool is_email = url.startsWith("mailto:"); 854 put(dst, "<a href=\""); 855 if (is_email) filterHTMLAllEscape(dst, url); 856 else filterHTMLAttribEscape(dst, filterLink(url, false)); 857 put(dst, "\">"); 858 if (is_email) filterHTMLAllEscape(dst, url[7 .. $]); 859 else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal); 860 put(dst, "</a>"); 861 } else { 862 if (ln.startsWith("<br>")) { 863 // always support line breaks, since we embed them here ourselves! 864 put(dst, "<br/>"); 865 ln = ln[4 .. $]; 866 } else if(ln.startsWith("<br/>")) { 867 put(dst, "<br/>"); 868 ln = ln[5 .. $]; 869 } else { 870 if (settings.flags & MarkdownFlags.noInlineHtml) 871 put(dst, "<"); 872 else put(dst, ln[0]); 873 ln = ln[1 .. $]; 874 } 875 } 876 break; 877 } 878 } 879 if (br) put(dst, "<br/>"); 880 } 881 882 private void writeTag(R, ARGS...)(ref R dst, string name, ARGS name_additions) 883 { 884 writeTag(dst, cast(Attribute[])null, name, name_additions); 885 } 886 887 private void writeTag(R, ARGS...)(ref R dst, scope const(Attribute)[] attributes, string name, ARGS name_additions) 888 { 889 dst.formattedWrite("<%s", name); 890 foreach (add; name_additions) 891 dst.formattedWrite("%s", add); 892 foreach (a; attributes) { 893 dst.formattedWrite(" %s=\"", a.attribute); 894 dst.filterHTMLAttribEscape(a.value); 895 put(dst, '\"'); 896 } 897 put(dst, '>'); 898 } 899 900 private bool isLineBlank(string ln) 901 pure @safe { 902 return allOf(ln, " \t"); 903 } 904 905 private bool isSetextHeaderLine(string ln) 906 pure @safe { 907 ln = stripLeft(ln); 908 if (ln.length < 1) return false; 909 if (ln[0] == '=') { 910 while (!ln.empty && ln.front == '=') ln.popFront(); 911 return allOf(ln, " \t"); 912 } 913 if (ln[0] == '-') { 914 while (!ln.empty && ln.front == '-') ln.popFront(); 915 return allOf(ln, " \t"); 916 } 917 return false; 918 } 919 920 private bool isAtxHeaderLine(string ln) 921 pure @safe { 922 ln = stripLeft(ln); 923 size_t i = 0; 924 while (i < ln.length && ln[i] == '#') i++; 925 if (i < 1 || i > 6 || i >= ln.length) return false; 926 return ln[i] == ' '; 927 } 928 929 private bool isTableSeparatorLine(string ln) 930 pure @safe { 931 import std.algorithm.iteration : splitter; 932 933 ln = strip(ln); 934 if (ln.startsWith("|")) ln = ln[1 .. $]; 935 if (ln.endsWith("|")) ln = ln[0 .. $-1]; 936 937 auto cols = ln.splitter('|'); 938 size_t cnt = 0; 939 foreach (c; cols) { 940 if (c.startsWith(':')) c = c[1 .. $]; 941 if (c.endsWith(':')) c = c[0 .. $-1]; 942 if (c.length < 3 || !c.allOf("-")) 943 return false; 944 cnt++; 945 } 946 return cnt >= 2; 947 } 948 949 private auto getTableColumns(string line) 950 pure @safe nothrow { 951 import std.algorithm.iteration : map, splitter; 952 953 if (line.startsWith("|")) line = line[1 .. $]; 954 if (line.endsWith("|")) line = line[0 .. $-1]; 955 return line.splitter('|').map!(s => s.strip()); 956 } 957 958 private size_t countTableColumns(string line) 959 pure @safe { 960 return getTableColumns(line).count(); 961 } 962 963 private bool isHlineLine(string ln) 964 pure @safe { 965 if (allOf(ln, " -") && count(ln, '-') >= 3) return true; 966 if (allOf(ln, " *") && count(ln, '*') >= 3) return true; 967 if (allOf(ln, " _") && count(ln, '_') >= 3) return true; 968 return false; 969 } 970 971 private bool isQuoteLine(string ln) 972 pure @safe { 973 return ln.stripLeft().startsWith(">"); 974 } 975 976 private size_t getQuoteLevel(string ln) 977 pure @safe { 978 size_t level = 0; 979 ln = stripLeft(ln); 980 while (ln.length > 0 && ln[0] == '>') { 981 level++; 982 ln = stripLeft(ln[1 .. $]); 983 } 984 return level; 985 } 986 987 private bool isUListLine(string ln) 988 pure @safe { 989 ln = stripLeft(ln); 990 if (ln.length < 2) return false; 991 if (!canFind("*+-", ln[0])) return false; 992 if (ln[1] != ' ' && ln[1] != '\t') return false; 993 return true; 994 } 995 996 private bool isOListLine(string ln) 997 pure @safe { 998 ln = stripLeft(ln); 999 if (ln.length < 1) return false; 1000 if (ln[0] < '0' || ln[0] > '9') return false; 1001 ln = ln[1 .. $]; 1002 while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') 1003 ln = ln[1 .. $]; 1004 if (ln.length < 2) return false; 1005 if (ln[0] != '.') return false; 1006 if (ln[1] != ' ' && ln[1] != '\t') 1007 return false; 1008 return true; 1009 } 1010 1011 private string removeListPrefix(string str, LineType tp) 1012 pure @safe { 1013 switch (tp) { 1014 default: assert (false); 1015 case LineType.oList: // skip bullets and output using normal escaping 1016 auto idx = str.indexOf('.'); 1017 assert (idx > 0); 1018 return str[idx+1 .. $].stripLeft(); 1019 case LineType.uList: 1020 return stripLeft(str.stripLeft()[1 .. $]); 1021 } 1022 } 1023 1024 1025 private auto parseHtmlBlockLine(string ln) 1026 pure @safe { 1027 struct HtmlBlockInfo { 1028 bool isHtmlBlock; 1029 string tagName; 1030 bool open; 1031 } 1032 1033 HtmlBlockInfo ret; 1034 ret.isHtmlBlock = false; 1035 ret.open = true; 1036 1037 ln = strip(ln); 1038 if (ln.length < 3) return ret; 1039 if (ln[0] != '<') return ret; 1040 if (ln[1] == '/') { 1041 ret.open = false; 1042 ln = ln[1 .. $]; 1043 } 1044 import std.ascii : isAlpha; 1045 if (!isAlpha(ln[1])) return ret; 1046 ln = ln[1 .. $]; 1047 size_t idx = 0; 1048 while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>') 1049 idx++; 1050 ret.tagName = ln[0 .. idx]; 1051 ln = ln[idx .. $]; 1052 1053 auto eidx = ln.indexOf('>'); 1054 if (eidx < 0) return ret; 1055 if (eidx != ln.length-1) return ret; 1056 1057 if (!s_blockTags.canFind(ret.tagName)) return ret; 1058 1059 ret.isHtmlBlock = true; 1060 return ret; 1061 } 1062 1063 private bool isHtmlBlockLine(string ln) 1064 pure @safe { 1065 auto bi = parseHtmlBlockLine(ln); 1066 return bi.isHtmlBlock && bi.open; 1067 } 1068 1069 private bool isHtmlBlockCloseLine(string ln) 1070 pure @safe { 1071 auto bi = parseHtmlBlockLine(ln); 1072 return bi.isHtmlBlock && !bi.open; 1073 } 1074 1075 private bool isCodeBlockDelimiter(string ln) 1076 pure @safe { 1077 return ln.stripLeft.startsWith("```"); 1078 } 1079 1080 private string getHtmlTagName(string ln) 1081 pure @safe { 1082 return parseHtmlBlockLine(ln).tagName; 1083 } 1084 1085 private bool isLineIndented(string ln) 1086 pure @safe { 1087 return ln.startsWith("\t") || ln.startsWith(" "); 1088 } 1089 1090 private string unindentLine(string ln) 1091 pure @safe { 1092 if (ln.startsWith("\t")) return ln[1 .. $]; 1093 if (ln.startsWith(" ")) return ln[4 .. $]; 1094 assert (false); 1095 } 1096 1097 private int parseEmphasis(ref string str, ref string text) 1098 pure @safe { 1099 string pstr = str; 1100 if (pstr.length < 3) return false; 1101 1102 string ctag; 1103 if (pstr.startsWith("***")) ctag = "***"; 1104 else if (pstr.startsWith("**")) ctag = "**"; 1105 else if (pstr.startsWith("*")) ctag = "*"; 1106 else if (pstr.startsWith("___")) ctag = "___"; 1107 else if (pstr.startsWith("__")) ctag = "__"; 1108 else if (pstr.startsWith("_")) ctag = "_"; 1109 else return false; 1110 1111 pstr = pstr[ctag.length .. $]; 1112 1113 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1114 if (cidx < 1) return false; 1115 1116 text = pstr[0 .. cidx]; 1117 1118 str = pstr[cidx+ctag.length .. $]; 1119 return cast(int)ctag.length; 1120 } 1121 1122 private bool parseInlineCode(ref string str, ref string code) 1123 pure @safe { 1124 string pstr = str; 1125 if (pstr.length < 3) return false; 1126 string ctag; 1127 if (pstr.startsWith("``")) ctag = "``"; 1128 else if (pstr.startsWith("`")) ctag = "`"; 1129 else return false; 1130 pstr = pstr[ctag.length .. $]; 1131 1132 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1133 if (cidx < 1) return false; 1134 1135 code = pstr[0 .. cidx]; 1136 str = pstr[cidx+ctag.length .. $]; 1137 return true; 1138 } 1139 1140 private bool parseLink(ref string str, ref Link dst, scope const(LinkRef[string]) linkrefs, scope Attribute[]* attributes) 1141 pure @safe { 1142 string pstr = str; 1143 if (pstr.length < 3) return false; 1144 // ignore img-link prefix 1145 if (pstr[0] == '!') pstr = pstr[1 .. $]; 1146 1147 // parse the text part [text] 1148 if (pstr[0] != '[') return false; 1149 auto cidx = pstr.matchBracket(); 1150 if (cidx < 1) return false; 1151 string refid; 1152 dst.text = pstr[1 .. cidx]; 1153 pstr = pstr[cidx+1 .. $]; 1154 1155 // parse either (link '['"title"']') or '[' ']'[refid] 1156 if (pstr.length < 2) return false; 1157 if (pstr[0] == '(') { 1158 cidx = pstr.matchBracket(); 1159 if (cidx < 1) return false; 1160 auto inner = pstr[1 .. cidx]; 1161 immutable qidx = inner.indexOf('"'); 1162 import std.ascii : isWhite; 1163 if (qidx > 1 && inner[qidx - 1].isWhite()) { 1164 dst.url = inner[0 .. qidx].stripRight(); 1165 immutable len = inner[qidx .. $].lastIndexOf('"'); 1166 if (len == 0) return false; 1167 assert (len > 0); 1168 dst.title = inner[qidx + 1 .. qidx + len]; 1169 } else { 1170 dst.url = inner.stripRight(); 1171 dst.title = null; 1172 } 1173 if (dst.url.startsWith("<") && dst.url.endsWith(">")) 1174 dst.url = dst.url[1 .. $-1]; 1175 pstr = pstr[cidx+1 .. $]; 1176 1177 if (attributes) { 1178 if (pstr.startsWith('{')) { 1179 auto idx = pstr.indexOf('}'); 1180 if (idx > 0) { 1181 parseAttributeString(pstr[1 .. idx], *attributes); 1182 pstr = pstr[idx+1 .. $]; 1183 } 1184 } 1185 } 1186 } else { 1187 if (pstr[0] == ' ') pstr = pstr[1 .. $]; 1188 if (pstr[0] != '[') return false; 1189 pstr = pstr[1 .. $]; 1190 cidx = pstr.indexOf(']'); 1191 if (cidx < 0) return false; 1192 if (cidx == 0) refid = dst.text; 1193 else refid = pstr[0 .. cidx]; 1194 pstr = pstr[cidx+1 .. $]; 1195 } 1196 1197 if (refid.length > 0) { 1198 auto pr = toLower(refid) in linkrefs; 1199 if (!pr) { 1200 debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid); 1201 return false; 1202 } 1203 dst.url = pr.url; 1204 dst.title = pr.title; 1205 if (attributes) *attributes ~= pr.attributes; 1206 } 1207 1208 str = pstr; 1209 return true; 1210 } 1211 1212 @safe unittest 1213 { 1214 static void testLink(string s, Link exp, in LinkRef[string] refs) 1215 { 1216 Link link; 1217 assert (parseLink(s, link, refs, null), s); 1218 assert (link == exp); 1219 } 1220 LinkRef[string] refs; 1221 refs["ref"] = LinkRef("ref", "target", "title"); 1222 1223 testLink(`[link](target)`, Link("link", "target"), null); 1224 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1225 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1226 testLink(`[link](target "title" )`, Link("link", "target", "title"), null); 1227 1228 testLink(`[link](target)`, Link("link", "target"), null); 1229 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1230 1231 testLink(`[link][ref]`, Link("link", "target", "title"), refs); 1232 testLink(`[ref][]`, Link("ref", "target", "title"), refs); 1233 1234 testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); 1235 testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); 1236 1237 testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); 1238 testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); 1239 1240 testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); 1241 testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); 1242 1243 testLink(`[link](target "")`, Link("link", "target", ""), null); 1244 testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); 1245 1246 testLink(`[link](<target>)`, Link("link", "target"), null); 1247 1248 auto failing = [ 1249 `text`, `[link](target`, `[link]target)`, `[link]`, 1250 `[link(target)`, `link](target)`, `[link] (target)`, 1251 `[link][noref]`, `[noref][]` 1252 ]; 1253 Link link; 1254 foreach (s; failing) 1255 assert (!parseLink(s, link, refs, null), s); 1256 } 1257 1258 @safe unittest { // attributes 1259 void test(string s, LinkRef[string] refs, bool parse_atts, string exprem, Link explnk, Attribute[] expatts...) 1260 @safe { 1261 Link lnk; 1262 Attribute[] atts; 1263 parseLink(s, lnk, refs, parse_atts ? () @trusted { return &atts; } () : null); 1264 assert (lnk == explnk); 1265 assert (s == exprem); 1266 assert (atts == expatts); 1267 } 1268 1269 test("[foo](bar){.baz}", null, false, "{.baz}", Link("foo", "bar", "")); 1270 test("[foo](bar){.baz}", null, true, "", Link("foo", "bar", ""), Attribute("class", "baz")); 1271 1272 auto refs = ["bar": LinkRef("bar", "url", "title", [Attribute("id", "hid")])]; 1273 test("[foo][bar]", refs, false, "", Link("foo", "url", "title")); 1274 test("[foo][bar]", refs, true, "", Link("foo", "url", "title"), Attribute("id", "hid")); 1275 } 1276 1277 private bool parseAutoLink(ref string str, ref string url) 1278 pure @safe { 1279 import std.algorithm.searching : all; 1280 import std.ascii : isAlphaNum; 1281 1282 string pstr = str; 1283 if (pstr.length < 3) return false; 1284 if (pstr[0] != '<') return false; 1285 pstr = pstr[1 .. $]; 1286 auto cidx = pstr.indexOf('>'); 1287 if (cidx < 0) return false; 1288 1289 url = pstr[0 .. cidx]; 1290 if (url.anyOf(" \t")) return false; 1291 auto atidx = url.indexOf('@'); 1292 auto colonidx = url.indexOf(':'); 1293 if (atidx < 0 && colonidx < 0) return false; 1294 1295 str = pstr[cidx+1 .. $]; 1296 if (atidx < 0) return true; 1297 if (colonidx < 0 || colonidx > atidx || 1298 !url[0 .. colonidx].all!(ch => ch.isAlphaNum)) 1299 url = "mailto:" ~ url; 1300 return true; 1301 } 1302 1303 unittest { 1304 void test(bool expected, string str, string url) 1305 { 1306 string strcpy = str; 1307 string outurl; 1308 if (!expected) { 1309 assert (!parseAutoLink(strcpy, outurl)); 1310 assert (outurl.length == 0); 1311 assert (strcpy == str); 1312 } else { 1313 assert (parseAutoLink(strcpy, outurl)); 1314 assert (outurl == url); 1315 assert (strcpy.length == 0); 1316 } 1317 } 1318 1319 test(true, "<http://foo/>", "http://foo/"); 1320 test(false, "<http://foo/", null); 1321 test(true, "<mailto:foo@bar>", "mailto:foo@bar"); 1322 test(true, "<foo@bar>", "mailto:foo@bar"); 1323 test(true, "<proto:foo@bar>", "proto:foo@bar"); 1324 test(true, "<proto:foo@bar:123>", "proto:foo@bar:123"); 1325 test(true, "<\"foo:bar\"@baz>", "mailto:\"foo:bar\"@baz"); 1326 } 1327 1328 private string skipAttributes(ref string line) 1329 @safe pure { 1330 auto strs = line.stripRight; 1331 if (!strs.endsWith("}")) return null; 1332 1333 auto idx = strs.lastIndexOf('{'); 1334 if (idx < 0) return null; 1335 1336 auto ret = strs[idx+1 .. $-1]; 1337 line = strs[0 .. idx]; 1338 return ret; 1339 } 1340 1341 unittest { 1342 void test(string inp, string outp, string att) 1343 { 1344 auto ratt = skipAttributes(inp); 1345 assert (ratt == att, ratt); 1346 assert (inp == outp, inp); 1347 } 1348 1349 test(" foo ", " foo ", null); 1350 test("foo {bar}", "foo ", "bar"); 1351 test("foo {bar} ", "foo ", "bar"); 1352 test("foo bar} ", "foo bar} ", null); 1353 test(" {bar} foo ", " {bar} foo ", null); 1354 test(" fo {o {bar} ", " fo {o ", "bar"); 1355 test(" fo {o} {bar} ", " fo {o} ", "bar"); 1356 } 1357 1358 private void parseAttributeString(string attributes, ref Attribute[] dst) 1359 @safe pure { 1360 import std.algorithm.iteration : splitter; 1361 1362 // TODO: handle custom attributes (requires a different approach than splitter) 1363 1364 foreach (el; attributes.splitter(' ')) { 1365 el = el.strip; 1366 if (!el.length) continue; 1367 if (el[0] == '#') { 1368 auto idx = dst.countUntil!(a => a.attribute == "id"); 1369 if (idx >= 0) dst[idx].value = el[1 .. $]; 1370 else dst ~= Attribute("id", el[1 .. $]); 1371 } else if (el[0] == '.') { 1372 auto idx = dst.countUntil!(a => a.attribute == "class"); 1373 if (idx >= 0) dst[idx].value ~= " " ~ el[1 .. $]; 1374 else dst ~= Attribute("class", el[1 .. $]); 1375 } 1376 } 1377 } 1378 1379 unittest { 1380 void test(string str, Attribute[] atts...) 1381 { 1382 Attribute[] res; 1383 parseAttributeString(str, res); 1384 assert (res == atts, format("%s: %s", str, res)); 1385 } 1386 1387 test(""); 1388 test(".foo", Attribute("class", "foo")); 1389 test("#foo", Attribute("id", "foo")); 1390 test("#foo #bar", Attribute("id", "bar")); 1391 test(".foo .bar", Attribute("class", "foo bar")); 1392 test("#foo #bar", Attribute("id", "bar")); 1393 test(".foo #bar .baz", Attribute("class", "foo baz"), Attribute("id", "bar")); 1394 } 1395 1396 private LinkRef[string] scanForReferences(ref string[] lines) 1397 pure @safe { 1398 LinkRef[string] ret; 1399 bool[size_t] reflines; 1400 1401 // search for reference definitions: 1402 // [refid] link "opt text" 1403 // [refid] <link> "opt text" 1404 // "opt text", 'opt text', (opt text) 1405 // line must not be indented 1406 foreach (lnidx, ln; lines) { 1407 if (isLineIndented(ln)) continue; 1408 ln = strip(ln); 1409 if (!ln.startsWith("[")) continue; 1410 ln = ln[1 .. $]; 1411 1412 auto idx = () @trusted { return ln.indexOf("]:"); }(); 1413 if (idx < 0) continue; 1414 string refid = ln[0 .. idx]; 1415 ln = stripLeft(ln[idx+2 .. $]); 1416 1417 string attstr = ln.skipAttributes(); 1418 1419 string url; 1420 if (ln.startsWith("<")) { 1421 idx = ln.indexOf('>'); 1422 if (idx < 0) continue; 1423 url = ln[1 .. idx]; 1424 ln = ln[idx+1 .. $]; 1425 } else { 1426 idx = ln.indexOf(' '); 1427 if (idx > 0) { 1428 url = ln[0 .. idx]; 1429 ln = ln[idx+1 .. $]; 1430 } else { 1431 idx = ln.indexOf('\t'); 1432 if (idx < 0) { 1433 url = ln; 1434 ln = ln[$ .. $]; 1435 } else { 1436 url = ln[0 .. idx]; 1437 ln = ln[idx+1 .. $]; 1438 } 1439 } 1440 } 1441 ln = stripLeft(ln); 1442 1443 string title; 1444 if (ln.length >= 3) { 1445 if (ln[0] == '(' && ln[$-1] == ')' 1446 || ln[0] == '\"' && ln[$-1] == '\"' 1447 || ln[0] == '\'' && ln[$-1] == '\'' ) 1448 { 1449 title = ln[1 .. $-1]; 1450 } 1451 } 1452 1453 LinkRef lref; 1454 lref.id = refid; 1455 lref.url = url; 1456 lref.title = title; 1457 parseAttributeString(attstr, lref.attributes); 1458 ret[toLower(refid)] = lref; 1459 reflines[lnidx] = true; 1460 1461 debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1); 1462 } 1463 1464 // remove all lines containing references 1465 auto nonreflines = appender!(string[])(); 1466 nonreflines.reserve(lines.length); 1467 foreach (i, ln; lines) 1468 if (i !in reflines) 1469 nonreflines.put(ln); 1470 lines = nonreflines.data(); 1471 1472 return ret; 1473 } 1474 1475 1476 /** 1477 Generates an identifier suitable to use as within a URL. 1478 1479 The resulting string will contain only ASCII lower case alphabetic or 1480 numeric characters, as well as dashes (-). Every sequence of 1481 non-alphanumeric characters will be replaced by a single dash. No dashes 1482 will be at either the front or the back of the result string. 1483 */ 1484 auto asSlug(R)(R text) 1485 if (isInputRange!R && is(typeof(R.init.front) == dchar)) 1486 { 1487 static struct SlugRange { 1488 private { 1489 R _input; 1490 bool _dash; 1491 } 1492 1493 this(R input) 1494 { 1495 _input = input; 1496 skipNonAlphaNum(); 1497 } 1498 1499 @property bool empty() const { return _dash ? false : _input.empty; } 1500 @property char front() const { 1501 if (_dash) return '-'; 1502 1503 char r = cast(char)_input.front; 1504 if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A')); 1505 return r; 1506 } 1507 1508 void popFront() 1509 { 1510 if (_dash) { 1511 _dash = false; 1512 return; 1513 } 1514 1515 _input.popFront(); 1516 auto na = skipNonAlphaNum(); 1517 if (na && !_input.empty) 1518 _dash = true; 1519 } 1520 1521 private bool skipNonAlphaNum() 1522 { 1523 bool have_skipped = false; 1524 while (!_input.empty) { 1525 switch (_input.front) { 1526 default: 1527 _input.popFront(); 1528 have_skipped = true; 1529 break; 1530 case 'a': .. case 'z': 1531 case 'A': .. case 'Z': 1532 case '0': .. case '9': 1533 return have_skipped; 1534 } 1535 } 1536 return have_skipped; 1537 } 1538 } 1539 return SlugRange(text); 1540 } 1541 1542 unittest { 1543 import std.algorithm : equal; 1544 assert ("".asSlug.equal("")); 1545 assert (".,-".asSlug.equal("")); 1546 assert ("abc".asSlug.equal("abc")); 1547 assert ("aBc123".asSlug.equal("abc123")); 1548 assert ("....aBc...123...".asSlug.equal("abc-123")); 1549 } 1550 1551 private struct LinkRef { 1552 string id; 1553 string url; 1554 string title; 1555 Attribute[] attributes; 1556 } 1557 1558 private struct Link { 1559 string text; 1560 string url; 1561 string title; 1562 } 1563 1564 @safe unittest { // alt and title attributes 1565 assert (filterMarkdown("![alt](http://example.org/image)") 1566 == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n"); 1567 assert (filterMarkdown("![alt](http://example.org/image \"Title\")") 1568 == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n"); 1569 } 1570 1571 @safe unittest { // complex links 1572 assert (filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and") 1573 == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n"); 1574 assert (filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)") 1575 == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n"); 1576 } 1577 1578 @safe unittest { // check CTFE-ability 1579 enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar"); 1580 assert (res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res); 1581 } 1582 1583 @safe unittest { // correct line breaks in restrictive mode 1584 auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault); 1585 assert (res == "<p>hello<br/>world\n</p>\n", res); 1586 } 1587 1588 /*@safe unittest { // code blocks and blockquotes 1589 assert (filterMarkdown("\tthis\n\tis\n\tcode") == 1590 "<pre><code>this\nis\ncode</code></pre>\n"); 1591 assert (filterMarkdown(" this\n is\n code") == 1592 "<pre><code>this\nis\ncode</code></pre>\n"); 1593 assert (filterMarkdown(" this\n is\n\tcode") == 1594 "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n"); 1595 assert (filterMarkdown("\tthis\n\n\tcode") == 1596 "<pre><code>this\n\ncode</code></pre>\n"); 1597 assert (filterMarkdown("\t> this") == 1598 "<pre><code>> this</code></pre>\n"); 1599 assert (filterMarkdown("> this") == 1600 "<blockquote><pre><code>this</code></pre></blockquote>\n"); 1601 assert (filterMarkdown("> this\n is code") == 1602 "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n"); 1603 }*/ 1604 1605 @safe unittest { 1606 assert (filterMarkdown("## Hello, World!") == "<h2 id=\"hello-world\"> Hello, World!</h2>\n", filterMarkdown("## Hello, World!")); 1607 } 1608 1609 @safe unittest { // tables 1610 assert (filterMarkdown("foo|bar\n---|---", MarkdownFlags.tables) 1611 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n</table>\n"); 1612 assert (filterMarkdown(" *foo* | bar \n---|---\n baz|bam", MarkdownFlags.tables) 1613 == "<table>\n<tr><th><em>foo</em></th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1614 assert (filterMarkdown("|foo|bar|\n---|---\n baz|bam", MarkdownFlags.tables) 1615 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1616 assert (filterMarkdown("foo|bar\n|---|---|\nbaz|bam", MarkdownFlags.tables) 1617 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1618 assert (filterMarkdown("foo|bar\n---|---\n|baz|bam|", MarkdownFlags.tables) 1619 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1620 assert (filterMarkdown("foo|bar|baz\n:---|---:|:---:\n|baz|bam|bap|", MarkdownFlags.tables) 1621 == "<table>\n<tr><th align=\"left\">foo</th><th align=\"right\">bar</th><th align=\"center\">baz</th></tr>\n" 1622 ~ "<tr><td align=\"left\">baz</td><td align=\"right\">bam</td><td align=\"center\">bap</td></tr>\n</table>\n"); 1623 assert (filterMarkdown(" |bar\n---|---", MarkdownFlags.tables) 1624 == "<table>\n<tr><th></th><th>bar</th></tr>\n</table>\n"); 1625 assert (filterMarkdown("foo|bar\n---|---\nbaz|", MarkdownFlags.tables) 1626 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td></tr>\n</table>\n"); 1627 } 1628 1629 @safe unittest { // issue #1527 - blank lines in code blocks 1630 assert (filterMarkdown(" foo\n\n bar\n") == 1631 "<pre class=\"prettyprint\"><code>foo\n\nbar\n</code></pre>\n"); 1632 } 1633 1634 @safe unittest { 1635 assert (filterMarkdown("> ```\r\n> test\r\n> ```", MarkdownFlags.forumDefault) == 1636 "<blockquote><pre class=\"prettyprint\"><code>test\n</code></pre>\n</blockquote>\n"); 1637 } 1638 1639 @safe unittest { // issue #1845 - malicious URI targets 1640 assert (filterMarkdown("[foo](javascript:foo) ![bar](javascript:bar) <javascript:baz>", MarkdownFlags.forumDefault) == 1641 "<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"bar\"> <a href=\"#\">javascript:baz</a>\n</p>\n"); 1642 assert (filterMarkdown("[foo][foo] ![foo][foo]\n[foo]: javascript:foo", MarkdownFlags.forumDefault) == 1643 "<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"foo\">\n</p>\n"); 1644 assert (filterMarkdown("[foo](javascript%3Abar)", MarkdownFlags.forumDefault) == 1645 "<p><a href=\"javascript%3Abar\">foo</a>\n</p>\n"); 1646 1647 // extra XSS regression tests 1648 assert (filterMarkdown("[<script></script>](bar)", MarkdownFlags.forumDefault) == 1649 "<p><a href=\"bar\"><script></script></a>\n</p>\n"); 1650 assert (filterMarkdown("[foo](\"><script></script><span foo=\")", MarkdownFlags.forumDefault) == 1651 "<p><a href=\""><script></script><span foo="\">foo</a>\n</p>\n"); 1652 assert (filterMarkdown("[foo](javascript:bar)", MarkdownFlags.forumDefault) == 1653 "<p><a href=\"javascript&#58;bar\">foo</a>\n</p>\n"); 1654 } 1655 1656 @safe unittest { // issue #2132 - table with more columns in body goes out of array bounds 1657 assert (filterMarkdown("| a | b |\n|--------|--------|\n| c | d | e |", MarkdownFlags.tables) == 1658 "<table>\n<tr><th>a</th><th>b</th></tr>\n<tr><td>c</td><td>d</td><td>e</td></tr>\n</table>\n"); 1659 } 1660 1661 @safe unittest { // lists 1662 assert (filterMarkdown("- foo\n- bar") == 1663 "<ul>\n<li>foo\n</li>\n<li>bar\n</li>\n</ul>\n"); 1664 assert (filterMarkdown("- foo\n\n- bar") == 1665 "<ul>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ul>\n"); 1666 assert (filterMarkdown("1. foo\n2. bar") == 1667 "<ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n"); 1668 assert (filterMarkdown("1. foo\n\n2. bar") == 1669 "<ol>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ol>\n"); 1670 } 1671 1672 @safe unittest { // figures 1673 assert (filterMarkdown("- %%%") == "<ul>\n<li>%%%\n</li>\n</ul>\n"); 1674 assert (filterMarkdown("- ###") == "<ul>\n<li>###\n</li>\n</ul>\n"); 1675 assert (filterMarkdown("- %%%", MarkdownFlags.figures) == "<figure></figure>\n"); 1676 assert (filterMarkdown("- ###", MarkdownFlags.figures) == "<figcaption></figcaption>\n"); 1677 assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar", MarkdownFlags.figures) == 1678 "<figure>foo\n<figcaption>bar\n</figcaption>\n</figure>\n"); 1679 assert (filterMarkdown("- %%%\n\tfoo\n\n\tbar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == 1680 "<figure><p>foo\n</p>\n<p>bar\n</p>\n<figcaption>baz\n</figcaption>\n</figure>\n"); 1681 assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar\n\n\t\tbaz", MarkdownFlags.figures) == 1682 "<figure>foo\n<figcaption><p>bar\n</p>\n<p>baz\n</p>\n</figcaption>\n</figure>\n"); 1683 assert (filterMarkdown("- %%%\n\t1. foo\n\t2. bar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == 1684 "<figure><ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n<figcaption>baz\n</figcaption>\n</figure>\n"); 1685 assert (filterMarkdown("- foo\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n"); 1686 assert (filterMarkdown("- foo\n\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n"); 1687 } 1688 1689 @safe unittest { // HTML entities 1690 assert(filterMarkdown(" ") == "<p> \n</p>\n"); 1691 assert(filterMarkdown("* *") == "<p><em> </em>\n</p>\n"); 1692 assert(filterMarkdown("` `") == "<p><code class=\"prettyprint\">&nbsp;</code>\n</p>\n"); 1693 }