1 /** 2 Markdown parser implementation 3 4 Copyright: © 2012-2019 Sönke Ludwig 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module vibe.textfilter.markdown; 9 10 import vibe.core.log; 11 import vibe.textfilter.html; 12 import vibe.utils.string; 13 14 import std.algorithm : canFind, countUntil, min; 15 import std.array; 16 import std.format; 17 import std.range; 18 import std.string; 19 20 /* 21 TODO: 22 detect inline HTML tags 23 */ 24 25 version(MarkdownTest) 26 { 27 int main() 28 { 29 import std.file; 30 setLogLevel(LogLevel.Trace); 31 auto text = readText("test.txt"); 32 auto result = appender!string(); 33 filterMarkdown(result, text); 34 foreach( ln; splitLines(result.data) ) 35 logInfo(ln); 36 return 0; 37 } 38 } 39 40 /** Returns a Markdown filtered HTML string. 41 */ 42 string filterMarkdown()(string str, MarkdownFlags flags) 43 @trusted { // scope class is not @safe for DMD 2.072 44 scope settings = new MarkdownSettings; 45 settings.flags = flags; 46 return filterMarkdown(str, settings); 47 } 48 /// ditto 49 string filterMarkdown()(string str, scope MarkdownSettings settings = null) 50 @trusted { // Appender not @safe as of 2.065 51 auto dst = appender!string(); 52 filterMarkdown(dst, str, settings); 53 return dst.data; 54 } 55 56 57 /** Markdown filters the given string and writes the corresponding HTML to an output range. 58 */ 59 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags) 60 { 61 scope settings = new MarkdownSettings; 62 settings.flags = flags; 63 filterMarkdown(dst, src, settings); 64 } 65 /// ditto 66 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null) 67 { 68 if (!settings) settings = new MarkdownSettings; 69 70 auto all_lines = splitLines(src); 71 auto links = scanForReferences(all_lines); 72 auto lines = parseLines(all_lines, settings); 73 Block root_block; 74 parseBlocks(root_block, lines, null, settings); 75 writeBlock(dst, root_block, links, settings); 76 } 77 78 /** 79 Returns the hierarchy of sections 80 */ 81 Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null) 82 { 83 import std.conv : to; 84 85 if (!settings) settings = new MarkdownSettings; 86 auto all_lines = splitLines(markdown_source); 87 auto lines = parseLines(all_lines, settings); 88 Block root_block; 89 parseBlocks(root_block, lines, null, settings); 90 Section root; 91 92 foreach (ref sb; root_block.blocks) { 93 if (sb.type == BlockType.header) { 94 auto s = &root; 95 while (true) { 96 if (s.subSections.length == 0) break; 97 if (s.subSections[$-1].headingLevel >= sb.headerLevel) break; 98 s = &s.subSections[$-1]; 99 } 100 s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string); 101 } 102 } 103 104 return root.subSections; 105 } 106 107 /// 108 unittest { 109 import std.conv : to; 110 assert (getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") == 111 [ 112 Section(2, " first", "first"), 113 Section(2, " second", "second", [ 114 Section(3, " third", "third") 115 ]), 116 Section(1, " fourth", "fourth", [ 117 Section(3, " fifth", "fifth") 118 ]) 119 ] 120 ); 121 } 122 123 final class MarkdownSettings { 124 /// Controls the capabilities of the parser. 125 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 126 127 /// Heading tags will start at this level. 128 size_t headingBaseLevel = 1; 129 130 /// Called for every link/image URL to perform arbitrary transformations. 131 string delegate(string url_or_path, bool is_image) urlFilter; 132 133 /// White list of URI schemas that can occur in link/image targets 134 string[] allowedURISchemas = ["http", "https", "ftp", "mailto"]; 135 } 136 137 enum MarkdownFlags { 138 /** Same as `vanillaMarkdown` 139 */ 140 none = 0, 141 142 /** Convert line breaks into hard line breaks in the output 143 144 This option is useful when operating on text that may be formatted as 145 plain text, without having Markdown in mind, while still improving 146 the appearance of the text in many cases. A common example would be 147 to format e-mails or newsgroup posts. 148 */ 149 keepLineBreaks = 1<<0, 150 151 /** Support fenced code blocks. 152 */ 153 backtickCodeBlocks = 1<<1, 154 155 /** Disable support for embedded HTML 156 */ 157 noInlineHtml = 1<<2, 158 //noLinks = 1<<3, 159 //allowUnsafeHtml = 1<<4, 160 161 /** Support table definitions 162 163 The syntax is based on Markdown Extra and GitHub flavored Markdown. 164 */ 165 tables = 1<<5, 166 167 /** Support HTML attributes after links 168 169 Links or images directly followed by `{ … }` allow regular HTML 170 attributes to added to the generated HTML element. 171 */ 172 attributes = 1<<6, 173 174 /** Recognize figure definitions 175 176 Figures can be defined using a modified list syntax: 177 178 ``` 179 - %%% 180 This is the figure content 181 182 - ### 183 This is optional caption content 184 ``` 185 186 Just like for lists, arbitrary blocks can be nested within figure and 187 figure caption blocks. If only a single paragraph is present within a 188 figure caption block, the paragraph text will be emitted without the 189 surrounding `<p>` tags. The same is true for figure blocks that contain 190 only a single paragraph and any number of additional figure caption 191 blocks. 192 */ 193 figures = 1<<7, 194 195 /** Support only standard Markdown features 196 197 Note that the parser is not fully CommonMark compliant at the moment, 198 but this is the general idea behind this option. 199 */ 200 vanillaMarkdown = none, 201 202 /** Default set of flags suitable for use within an online forum 203 */ 204 forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml|tables 205 } 206 207 struct Section { 208 size_t headingLevel; 209 string caption; 210 string anchor; 211 Section[] subSections; 212 } 213 214 private { 215 immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; 216 } 217 218 private enum IndentType { 219 white, 220 quote 221 } 222 223 private enum LineType { 224 undefined, 225 blank, 226 plain, 227 hline, 228 atxHeader, 229 setextHeader, 230 tableSeparator, 231 uList, 232 oList, 233 figure, 234 figureCaption, 235 htmlBlock, 236 codeBlockDelimiter 237 } 238 239 private struct Line { 240 LineType type; 241 IndentType[] indent; 242 string text; 243 string unindented; 244 245 string unindent(size_t n) 246 pure @safe { 247 assert (n <= indent.length); 248 string ln = text; 249 foreach (i; 0 .. n) { 250 final switch(indent[i]){ 251 case IndentType.white: 252 if (ln[0] == ' ') ln = ln[4 .. $]; 253 else ln = ln[1 .. $]; 254 break; 255 case IndentType.quote: 256 ln = ln.stripLeft()[1 .. $]; 257 if (ln.startsWith(' ')) 258 ln.popFront(); 259 break; 260 } 261 } 262 return ln; 263 } 264 } 265 266 private Line[] parseLines(string[] lines, scope MarkdownSettings settings) 267 pure @safe { 268 Line[] ret; 269 while( !lines.empty ){ 270 auto ln = lines.front; 271 lines.popFront(); 272 273 Line lninfo; 274 lninfo.text = ln; 275 276 while (ln.length > 0) { 277 if (ln[0] == '\t') { 278 lninfo.indent ~= IndentType.white; 279 ln.popFront(); 280 } else if (ln.startsWith(" ")) { 281 lninfo.indent ~= IndentType.white; 282 ln.popFrontN(4); 283 } else { 284 if (ln.stripLeft().startsWith(">")) { 285 lninfo.indent ~= IndentType.quote; 286 ln = ln.stripLeft(); 287 ln.popFront(); 288 if (ln.startsWith(' ')) 289 ln.popFront(); 290 } else break; 291 } 292 } 293 lninfo.unindented = ln; 294 295 if ((settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) 296 lninfo.type = LineType.codeBlockDelimiter; 297 else if(isAtxHeaderLine(ln)) lninfo.type = LineType.atxHeader; 298 else if(isSetextHeaderLine(ln)) lninfo.type = LineType.setextHeader; 299 else if((settings.flags & MarkdownFlags.tables) && isTableSeparatorLine(ln)) 300 lninfo.type = LineType.tableSeparator; 301 else if(isHlineLine(ln)) lninfo.type = LineType.hline; 302 else if(isOListLine(ln)) lninfo.type = LineType.oList; 303 else if(isUListLine(ln)) { 304 if (settings.flags & MarkdownFlags.figures) { 305 auto suff = removeListPrefix(ln, LineType.uList); 306 if (suff == "%%%") lninfo.type = LineType.figure; 307 else if (suff == "###") lninfo.type = LineType.figureCaption; 308 else lninfo.type = LineType.uList; 309 } else lninfo.type = LineType.uList; 310 } else if(isLineBlank(ln)) lninfo.type = LineType.blank; 311 else if(!(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln)) 312 lninfo.type = LineType.htmlBlock; 313 else lninfo.type = LineType.plain; 314 315 ret ~= lninfo; 316 } 317 return ret; 318 } 319 320 unittest { 321 import std.conv : to; 322 auto s = new MarkdownSettings; 323 s.flags = MarkdownFlags.forumDefault; 324 auto lns = [">```D"]; 325 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); 326 lns = ["> ```D"]; 327 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); 328 lns = ["> ```D"]; 329 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], " ```D")]); 330 lns = ["> ```D"]; 331 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote, IndentType.white], lns[0], "```D")]); 332 lns = [">test"]; 333 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); 334 lns = ["> test"]; 335 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); 336 lns = ["> test"]; 337 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], " test")]); 338 lns = ["> test"]; 339 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote, IndentType.white], lns[0], "test")]); 340 } 341 342 private enum BlockType { 343 plain, 344 text, 345 paragraph, 346 header, 347 table, 348 oList, 349 uList, 350 listItem, 351 code, 352 quote, 353 figure, 354 figureCaption 355 } 356 357 private struct Block { 358 BlockType type; 359 Attribute[] attributes; 360 string[] text; 361 Block[] blocks; 362 size_t headerLevel; 363 Alignment[] columns; 364 } 365 366 private struct Attribute { 367 string attribute; 368 string value; 369 } 370 371 private enum Alignment { 372 none = 0, 373 left = 1<<0, 374 right = 1<<1, 375 center = left | right 376 } 377 378 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings) 379 pure @safe { 380 import std.conv : to; 381 import std.algorithm.comparison : among; 382 383 if (base_indent.length == 0) root.type = BlockType.text; 384 else if (base_indent[$-1] == IndentType.quote) root.type = BlockType.quote; 385 386 while (!lines.empty) { 387 auto ln = lines.front; 388 389 if (ln.type == LineType.blank) { 390 lines.popFront(); 391 continue; 392 } 393 394 if (ln.indent != base_indent) { 395 if (ln.indent.length < base_indent.length 396 || ln.indent[0 .. base_indent.length] != base_indent) 397 { 398 return; 399 } 400 401 auto cindent = base_indent ~ IndentType.white; 402 if (ln.indent == cindent) { 403 Block cblock; 404 cblock.type = BlockType.code; 405 while (!lines.empty && (lines.front.unindented.strip.empty 406 || lines.front.indent.length >= cindent.length 407 && lines.front.indent[0 .. cindent.length] == cindent)) 408 { 409 cblock.text ~= lines.front.indent.length >= cindent.length 410 ? lines.front.unindent(cindent.length) : ""; 411 lines.popFront(); 412 } 413 root.blocks ~= cblock; 414 } else { 415 Block subblock; 416 parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings); 417 root.blocks ~= subblock; 418 } 419 } else { 420 Block b; 421 final switch (ln.type) { 422 case LineType.undefined: assert (false); 423 case LineType.blank: assert (false); 424 case LineType.plain: 425 if (lines.length >= 2 && lines[1].type == LineType.setextHeader) { 426 auto setln = lines[1].unindented; 427 b.type = BlockType.header; 428 b.text = [ln.unindented]; 429 if (settings.flags & MarkdownFlags.attributes) 430 parseAttributeString(skipAttributes(b.text[0]), b.attributes); 431 if (!b.attributes.canFind!(a => a.attribute == "id")) 432 b.attributes ~= Attribute("id", asSlug(b.text[0]).to!string); 433 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 434 lines.popFrontN(2); 435 } else if (lines.length >= 2 && lines[1].type == LineType.tableSeparator 436 && ln.unindented.indexOf('|') >= 0) 437 { 438 auto setln = lines[1].unindented; 439 b.type = BlockType.table; 440 b.text = [ln.unindented]; 441 foreach (c; getTableColumns(setln)) { 442 Alignment a = Alignment.none; 443 if (c.startsWith(':')) a |= Alignment.left; 444 if (c.endsWith(':')) a |= Alignment.right; 445 b.columns ~= a; 446 } 447 448 lines.popFrontN(2); 449 while (!lines.empty && lines[0].unindented.indexOf('|') >= 0) { 450 b.text ~= lines.front.unindented; 451 lines.popFront(); 452 } 453 } else { 454 b.type = BlockType.paragraph; 455 b.text = skipText(lines, base_indent); 456 } 457 break; 458 case LineType.hline: 459 b.type = BlockType.plain; 460 b.text = ["<hr>"]; 461 lines.popFront(); 462 break; 463 case LineType.atxHeader: 464 b.type = BlockType.header; 465 string hl = ln.unindented; 466 b.headerLevel = 0; 467 while (hl.length > 0 && hl[0] == '#') { 468 b.headerLevel++; 469 hl = hl[1 .. $]; 470 } 471 472 if (settings.flags & MarkdownFlags.attributes) 473 parseAttributeString(skipAttributes(hl), b.attributes); 474 if (!b.attributes.canFind!(a => a.attribute == "id")) 475 b.attributes ~= Attribute("id", asSlug(hl).to!string); 476 477 while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ')) 478 hl = hl[0 .. $-1]; 479 b.text = [hl]; 480 lines.popFront(); 481 break; 482 case LineType.setextHeader: 483 lines.popFront(); 484 break; 485 case LineType.tableSeparator: 486 lines.popFront(); 487 break; 488 case LineType.figure: 489 case LineType.figureCaption: 490 b.type = ln.type == LineType.figure 491 ? BlockType.figure : BlockType.figureCaption; 492 493 auto itemindent = base_indent ~ IndentType.white; 494 lines.popFront(); 495 parseBlocks(b, lines, itemindent, settings); 496 break; 497 case LineType.uList: 498 case LineType.oList: 499 b.type = ln.type == LineType.uList ? BlockType.uList : BlockType.oList; 500 501 auto itemindent = base_indent ~ IndentType.white; 502 bool paraMode = false; 503 504 // look ahead to determine whether the list is in paragraph 505 // mode (one or multiple <p></p> nested within each item 506 bool couldBeParaMode = false; 507 foreach (pln; lines[1 .. $]) { 508 if (pln.type == LineType.blank) { 509 couldBeParaMode = true; 510 continue; 511 } 512 if (!pln.indent.startsWith(base_indent)) break; 513 if (pln.indent == base_indent) { 514 if (pln.type == ln.type) 515 paraMode = couldBeParaMode; 516 break; 517 } 518 } 519 520 while (!lines.empty && lines.front.type == ln.type 521 && lines.front.indent == base_indent) 522 { 523 Block itm; 524 itm.text = skipText(lines, itemindent); 525 itm.text[0] = removeListPrefix(itm.text[0], ln.type); 526 527 if (paraMode) { 528 Block para; 529 para.type = BlockType.paragraph; 530 para.text = itm.text; 531 itm.blocks ~= para; 532 itm.text = null; 533 } 534 535 parseBlocks(itm, lines, itemindent, settings); 536 itm.type = BlockType.listItem; 537 b.blocks ~= itm; 538 } 539 break; 540 case LineType.htmlBlock: 541 int nestlevel = 0; 542 auto starttag = parseHtmlBlockLine(ln.unindented); 543 if (!starttag.isHtmlBlock || !starttag.open) 544 break; 545 546 b.type = BlockType.plain; 547 while (!lines.empty) { 548 if (lines.front.indent.length < base_indent.length) 549 break; 550 if (lines.front.indent[0 .. base_indent.length] != base_indent) 551 break; 552 553 auto str = lines.front.unindent(base_indent.length); 554 auto taginfo = parseHtmlBlockLine(str); 555 b.text ~= lines.front.unindent(base_indent.length); 556 lines.popFront(); 557 if (taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName) 558 nestlevel += taginfo.open ? 1 : -1; 559 if (nestlevel <= 0) break; 560 } 561 break; 562 case LineType.codeBlockDelimiter: 563 lines.popFront(); // TODO: get language from line 564 b.type = BlockType.code; 565 while (!lines.empty) { 566 if (lines.front.indent.length < base_indent.length) 567 break; 568 if (lines.front.indent[0 .. base_indent.length] != base_indent) 569 break; 570 if (lines.front.type == LineType.codeBlockDelimiter) { 571 lines.popFront(); 572 break; 573 } 574 b.text ~= lines.front.unindent(base_indent.length); 575 lines.popFront(); 576 } 577 break; 578 } 579 root.blocks ~= b; 580 } 581 } 582 } 583 584 585 private string[] skipText(ref Line[] lines, IndentType[] indent) 586 pure @safe { 587 static bool matchesIndent(IndentType[] indent, IndentType[] base_indent) 588 { 589 if (indent.length > base_indent.length) return false; 590 if (indent != base_indent[0 .. indent.length]) return false; 591 sizediff_t qidx = -1; 592 foreach_reverse (i, tp; base_indent) 593 if (tp == IndentType.quote) { 594 qidx = i; 595 break; 596 } 597 if (qidx >= 0) { 598 qidx = base_indent.length-1 - qidx; 599 if( indent.length <= qidx ) return false; 600 } 601 return true; 602 } 603 604 // return value is used in variables that don't get bounds checks on the 605 // first element, so we should return at least one 606 if (lines.empty) 607 return [""]; 608 609 string[] ret; 610 611 while (true) { 612 ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length)); 613 lines.popFront(); 614 615 if (lines.empty || !matchesIndent(lines.front.indent, indent) 616 || lines.front.type != LineType.plain) 617 { 618 return ret; 619 } 620 } 621 } 622 623 /// private 624 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings) 625 { 626 final switch (block.type) { 627 case BlockType.plain: 628 foreach (ln; block.text) { 629 put(dst, ln); 630 put(dst, "\n"); 631 } 632 foreach (b; block.blocks) 633 writeBlock(dst, b, links, settings); 634 break; 635 case BlockType.text: 636 writeMarkdownEscaped(dst, block, links, settings); 637 foreach (b; block.blocks) 638 writeBlock(dst, b, links, settings); 639 break; 640 case BlockType.paragraph: 641 assert (block.blocks.length == 0); 642 put(dst, "<p>"); 643 writeMarkdownEscaped(dst, block, links, settings); 644 put(dst, "</p>\n"); 645 break; 646 case BlockType.header: 647 assert (block.blocks.length == 0); 648 assert (block.text.length == 1); 649 auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0); 650 dst.writeTag(block.attributes, "h", hlvl); 651 writeMarkdownEscaped(dst, block.text[0], links, settings); 652 dst.formattedWrite("</h%s>\n", hlvl); 653 break; 654 case BlockType.table: 655 import std.algorithm.iteration : splitter; 656 657 static string[Alignment.max+1] alstr = ["", " align=\"left\"", " align=\"right\"", " align=\"center\""]; 658 659 put(dst, "<table>\n"); 660 put(dst, "<tr>"); 661 size_t i = 0; 662 foreach (col; block.text[0].getTableColumns()) { 663 put(dst, "<th"); 664 put(dst, alstr[block.columns[i]]); 665 put(dst, '>'); 666 dst.writeMarkdownEscaped(col, links, settings); 667 put(dst, "</th>"); 668 if (i + 1 < block.columns.length) 669 i++; 670 } 671 put(dst, "</tr>\n"); 672 foreach (ln; block.text[1 .. $]) { 673 put(dst, "<tr>"); 674 i = 0; 675 foreach (col; ln.getTableColumns()) { 676 put(dst, "<td"); 677 put(dst, alstr[block.columns[i]]); 678 put(dst, '>'); 679 dst.writeMarkdownEscaped(col, links, settings); 680 put(dst, "</td>"); 681 if (i + 1 < block.columns.length) 682 i++; 683 } 684 put(dst, "</tr>\n"); 685 } 686 put(dst, "</table>\n"); 687 break; 688 case BlockType.oList: 689 put(dst, "<ol>\n"); 690 foreach (b; block.blocks) 691 writeBlock(dst, b, links, settings); 692 put(dst, "</ol>\n"); 693 break; 694 case BlockType.uList: 695 put(dst, "<ul>\n"); 696 foreach (b; block.blocks) 697 writeBlock(dst, b, links, settings); 698 put(dst, "</ul>\n"); 699 break; 700 case BlockType.listItem: 701 put(dst, "<li>"); 702 writeMarkdownEscaped(dst, block, links, settings); 703 foreach (b; block.blocks) 704 writeBlock(dst, b, links, settings); 705 put(dst, "</li>\n"); 706 break; 707 case BlockType.code: 708 assert (block.blocks.length == 0); 709 put(dst, "<pre class=\"prettyprint\"><code>"); 710 foreach (ln; block.text) { 711 filterHTMLEscape(dst, ln); 712 put(dst, "\n"); 713 } 714 put(dst, "</code></pre>\n"); 715 break; 716 case BlockType.quote: 717 put(dst, "<blockquote>"); 718 writeMarkdownEscaped(dst, block, links, settings); 719 foreach (b; block.blocks) 720 writeBlock(dst, b, links, settings); 721 put(dst, "</blockquote>\n"); 722 break; 723 case BlockType.figure: 724 put(dst, "<figure>"); 725 bool omit_para = block.blocks.count!(b => b.type != BlockType.figureCaption) == 1; 726 foreach (b; block.blocks) { 727 if (b.type == BlockType.paragraph && omit_para) { 728 writeMarkdownEscaped(dst, b, links, settings); 729 } else writeBlock(dst, b, links, settings); 730 } 731 put(dst, "</figure>\n"); 732 break; 733 case BlockType.figureCaption: 734 put(dst, "<figcaption>"); 735 if (block.blocks.length == 1 && block.blocks[0].type == BlockType.paragraph) { 736 writeMarkdownEscaped(dst, block.blocks[0], links, settings); 737 } else { 738 foreach (b; block.blocks) 739 writeBlock(dst, b, links, settings); 740 } 741 put(dst, "</figcaption>\n"); 742 break; 743 } 744 } 745 746 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings) 747 { 748 auto lines = () @trusted { return cast(string[])block.text; } (); 749 auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n"); 750 writeMarkdownEscaped(dst, text, links, settings); 751 if (lines.length) put(dst, "\n"); 752 } 753 754 /// private 755 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings) 756 { 757 bool isAllowedURI(string lnk) { 758 auto idx = lnk.indexOf('/'); 759 auto cidx = lnk.indexOf(':'); 760 // always allow local URIs 761 if (cidx < 0 || idx >= 0 && cidx > idx) return true; 762 return settings.allowedURISchemas.canFind(lnk[0 .. cidx]); 763 } 764 765 string filterLink(string lnk, bool is_image) { 766 if (isAllowedURI(lnk)) 767 return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk; 768 return "#"; // replace link with unknown schema with dummy URI 769 } 770 771 bool br = ln.endsWith(" "); 772 while (ln.length > 0) { 773 switch (ln[0]) { 774 default: 775 put(dst, ln[0]); 776 ln = ln[1 .. $]; 777 break; 778 case '\\': 779 if (ln.length >= 2) { 780 switch (ln[1]) { 781 default: 782 put(dst, ln[0 .. 2]); 783 ln = ln[2 .. $]; 784 break; 785 case '\'', '`', '*', '_', '{', '}', '[', ']', 786 '(', ')', '#', '+', '-', '.', '!': 787 put(dst, ln[1]); 788 ln = ln[2 .. $]; 789 break; 790 } 791 } else { 792 put(dst, ln[0]); 793 ln = ln[1 .. $]; 794 } 795 break; 796 case '_': 797 case '*': 798 string text; 799 if (auto em = parseEmphasis(ln, text)) { 800 put(dst, em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>"); 801 put(dst, text); 802 put(dst, em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>"); 803 } else { 804 put(dst, ln[0]); 805 ln = ln[1 .. $]; 806 } 807 break; 808 case '`': 809 string code; 810 if (parseInlineCode(ln, code)) { 811 put(dst, "<code class=\"prettyprint\">"); 812 filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal); 813 put(dst, "</code>"); 814 } else { 815 put(dst, ln[0]); 816 ln = ln[1 .. $]; 817 } 818 break; 819 case '[': 820 Link link; 821 Attribute[] attributes; 822 if (parseLink(ln, link, linkrefs, 823 settings.flags & MarkdownFlags.attributes ? &attributes : null)) 824 { 825 attributes ~= Attribute("href", filterLink(link.url, false)); 826 if (link.title.length) 827 attributes ~= Attribute("title", link.title); 828 dst.writeTag(attributes, "a"); 829 writeMarkdownEscaped(dst, link.text, linkrefs, settings); 830 put(dst, "</a>"); 831 } else { 832 put(dst, ln[0]); 833 ln = ln[1 .. $]; 834 } 835 break; 836 case '!': 837 Link link; 838 Attribute[] attributes; 839 if (parseLink(ln, link, linkrefs, 840 settings.flags & MarkdownFlags.attributes ? &attributes : null)) 841 { 842 attributes ~= Attribute("src", filterLink(link.url, true)); 843 attributes ~= Attribute("alt", link.text); 844 if (link.title.length) 845 attributes ~= Attribute("title", link.title); 846 dst.writeTag(attributes, "img"); 847 } else if( ln.length >= 2 ){ 848 put(dst, ln[0 .. 2]); 849 ln = ln[2 .. $]; 850 } else { 851 put(dst, ln[0]); 852 ln = ln[1 .. $]; 853 } 854 break; 855 case '>': 856 if (settings.flags & MarkdownFlags.noInlineHtml) put(dst, ">"); 857 else put(dst, ln[0]); 858 ln = ln[1 .. $]; 859 break; 860 case '<': 861 string url; 862 if (parseAutoLink(ln, url)) { 863 bool is_email = url.startsWith("mailto:"); 864 put(dst, "<a href=\""); 865 if (is_email) filterHTMLAllEscape(dst, url); 866 else filterHTMLAttribEscape(dst, filterLink(url, false)); 867 put(dst, "\">"); 868 if (is_email) filterHTMLAllEscape(dst, url[7 .. $]); 869 else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal); 870 put(dst, "</a>"); 871 } else { 872 if (ln.startsWith("<br>")) { 873 // always support line breaks, since we embed them here ourselves! 874 put(dst, "<br/>"); 875 ln = ln[4 .. $]; 876 } else if(ln.startsWith("<br/>")) { 877 put(dst, "<br/>"); 878 ln = ln[5 .. $]; 879 } else { 880 if (settings.flags & MarkdownFlags.noInlineHtml) 881 put(dst, "<"); 882 else put(dst, ln[0]); 883 ln = ln[1 .. $]; 884 } 885 } 886 break; 887 } 888 } 889 if (br) put(dst, "<br/>"); 890 } 891 892 private void writeTag(R, ARGS...)(ref R dst, string name, ARGS name_additions) 893 { 894 writeTag(dst, cast(Attribute[])null, name, name_additions); 895 } 896 897 private void writeTag(R, ARGS...)(ref R dst, scope const(Attribute)[] attributes, string name, ARGS name_additions) 898 { 899 dst.formattedWrite("<%s", name); 900 foreach (add; name_additions) 901 dst.formattedWrite("%s", add); 902 foreach (a; attributes) { 903 dst.formattedWrite(" %s=\"", a.attribute); 904 dst.filterHTMLAttribEscape(a.value); 905 put(dst, '\"'); 906 } 907 put(dst, '>'); 908 } 909 910 private bool isLineBlank(string ln) 911 pure @safe { 912 return allOf(ln, " \t"); 913 } 914 915 private bool isSetextHeaderLine(string ln) 916 pure @safe { 917 ln = stripLeft(ln); 918 if (ln.length < 1) return false; 919 if (ln[0] == '=') { 920 while (!ln.empty && ln.front == '=') ln.popFront(); 921 return allOf(ln, " \t"); 922 } 923 if (ln[0] == '-') { 924 while (!ln.empty && ln.front == '-') ln.popFront(); 925 return allOf(ln, " \t"); 926 } 927 return false; 928 } 929 930 private bool isAtxHeaderLine(string ln) 931 pure @safe { 932 ln = stripLeft(ln); 933 size_t i = 0; 934 while (i < ln.length && ln[i] == '#') i++; 935 if (i < 1 || i > 6 || i >= ln.length) return false; 936 return ln[i] == ' '; 937 } 938 939 private bool isTableSeparatorLine(string ln) 940 pure @safe { 941 import std.algorithm.iteration : splitter; 942 943 ln = strip(ln); 944 if (ln.startsWith("|")) ln = ln[1 .. $]; 945 if (ln.endsWith("|")) ln = ln[0 .. $-1]; 946 947 auto cols = ln.splitter('|'); 948 size_t cnt = 0; 949 foreach (c; cols) { 950 c = c.strip(); 951 if (c.startsWith(':')) c = c[1 .. $]; 952 if (c.endsWith(':')) c = c[0 .. $-1]; 953 if (c.length < 3 || !c.allOf("-")) 954 return false; 955 cnt++; 956 } 957 return cnt >= 2; 958 } 959 960 unittest { 961 assert(isTableSeparatorLine("|----|---|")); 962 assert(isTableSeparatorLine("|:----:|---|")); 963 assert(isTableSeparatorLine("---|----")); 964 assert(isTableSeparatorLine("| --- | :---- |")); 965 assert(!isTableSeparatorLine("| ---- |")); 966 assert(!isTableSeparatorLine("| -- | -- |")); 967 assert(!isTableSeparatorLine("| --- - | ---- |")); 968 } 969 970 private auto getTableColumns(string line) 971 pure @safe nothrow { 972 import std.algorithm.iteration : map, splitter; 973 974 if (line.startsWith("|")) line = line[1 .. $]; 975 if (line.endsWith("|")) line = line[0 .. $-1]; 976 return line.splitter('|').map!(s => s.strip()); 977 } 978 979 private size_t countTableColumns(string line) 980 pure @safe { 981 return getTableColumns(line).count(); 982 } 983 984 private bool isHlineLine(string ln) 985 pure @safe { 986 if (allOf(ln, " -") && count(ln, '-') >= 3) return true; 987 if (allOf(ln, " *") && count(ln, '*') >= 3) return true; 988 if (allOf(ln, " _") && count(ln, '_') >= 3) return true; 989 return false; 990 } 991 992 private bool isQuoteLine(string ln) 993 pure @safe { 994 return ln.stripLeft().startsWith(">"); 995 } 996 997 private size_t getQuoteLevel(string ln) 998 pure @safe { 999 size_t level = 0; 1000 ln = stripLeft(ln); 1001 while (ln.length > 0 && ln[0] == '>') { 1002 level++; 1003 ln = stripLeft(ln[1 .. $]); 1004 } 1005 return level; 1006 } 1007 1008 private bool isUListLine(string ln) 1009 pure @safe { 1010 ln = stripLeft(ln); 1011 if (ln.length < 2) return false; 1012 if (!canFind("*+-", ln[0])) return false; 1013 if (ln[1] != ' ' && ln[1] != '\t') return false; 1014 return true; 1015 } 1016 1017 private bool isOListLine(string ln) 1018 pure @safe { 1019 ln = stripLeft(ln); 1020 if (ln.length < 1) return false; 1021 if (ln[0] < '0' || ln[0] > '9') return false; 1022 ln = ln[1 .. $]; 1023 while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') 1024 ln = ln[1 .. $]; 1025 if (ln.length < 2) return false; 1026 if (ln[0] != '.') return false; 1027 if (ln[1] != ' ' && ln[1] != '\t') 1028 return false; 1029 return true; 1030 } 1031 1032 private string removeListPrefix(string str, LineType tp) 1033 pure @safe { 1034 switch (tp) { 1035 default: assert (false); 1036 case LineType.oList: // skip bullets and output using normal escaping 1037 auto idx = str.indexOf('.'); 1038 assert (idx > 0); 1039 return str[idx+1 .. $].stripLeft(); 1040 case LineType.uList: 1041 return stripLeft(str.stripLeft()[1 .. $]); 1042 } 1043 } 1044 1045 1046 private auto parseHtmlBlockLine(string ln) 1047 pure @safe { 1048 struct HtmlBlockInfo { 1049 bool isHtmlBlock; 1050 string tagName; 1051 bool open; 1052 } 1053 1054 HtmlBlockInfo ret; 1055 ret.isHtmlBlock = false; 1056 ret.open = true; 1057 1058 ln = strip(ln); 1059 if (ln.length < 3) return ret; 1060 if (ln[0] != '<') return ret; 1061 if (ln[1] == '/') { 1062 ret.open = false; 1063 ln = ln[1 .. $]; 1064 } 1065 import std.ascii : isAlpha; 1066 if (!isAlpha(ln[1])) return ret; 1067 ln = ln[1 .. $]; 1068 size_t idx = 0; 1069 while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>') 1070 idx++; 1071 ret.tagName = ln[0 .. idx]; 1072 ln = ln[idx .. $]; 1073 1074 auto eidx = ln.indexOf('>'); 1075 if (eidx < 0) return ret; 1076 if (eidx != ln.length-1) return ret; 1077 1078 if (!s_blockTags.canFind(ret.tagName)) return ret; 1079 1080 ret.isHtmlBlock = true; 1081 return ret; 1082 } 1083 1084 private bool isHtmlBlockLine(string ln) 1085 pure @safe { 1086 auto bi = parseHtmlBlockLine(ln); 1087 return bi.isHtmlBlock && bi.open; 1088 } 1089 1090 private bool isHtmlBlockCloseLine(string ln) 1091 pure @safe { 1092 auto bi = parseHtmlBlockLine(ln); 1093 return bi.isHtmlBlock && !bi.open; 1094 } 1095 1096 private bool isCodeBlockDelimiter(string ln) 1097 pure @safe { 1098 return ln.stripLeft.startsWith("```"); 1099 } 1100 1101 private string getHtmlTagName(string ln) 1102 pure @safe { 1103 return parseHtmlBlockLine(ln).tagName; 1104 } 1105 1106 private bool isLineIndented(string ln) 1107 pure @safe { 1108 return ln.startsWith("\t") || ln.startsWith(" "); 1109 } 1110 1111 private string unindentLine(string ln) 1112 pure @safe { 1113 if (ln.startsWith("\t")) return ln[1 .. $]; 1114 if (ln.startsWith(" ")) return ln[4 .. $]; 1115 assert (false); 1116 } 1117 1118 private int parseEmphasis(ref string str, ref string text) 1119 pure @safe { 1120 string pstr = str; 1121 if (pstr.length < 3) return false; 1122 1123 string ctag; 1124 if (pstr.startsWith("***")) ctag = "***"; 1125 else if (pstr.startsWith("**")) ctag = "**"; 1126 else if (pstr.startsWith("*")) ctag = "*"; 1127 else if (pstr.startsWith("___")) ctag = "___"; 1128 else if (pstr.startsWith("__")) ctag = "__"; 1129 else if (pstr.startsWith("_")) ctag = "_"; 1130 else return false; 1131 1132 pstr = pstr[ctag.length .. $]; 1133 1134 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1135 if (cidx < 1) return false; 1136 1137 text = pstr[0 .. cidx]; 1138 1139 str = pstr[cidx+ctag.length .. $]; 1140 return cast(int)ctag.length; 1141 } 1142 1143 private bool parseInlineCode(ref string str, ref string code) 1144 pure @safe { 1145 string pstr = str; 1146 if (pstr.length < 3) return false; 1147 string ctag; 1148 if (pstr.startsWith("``")) ctag = "``"; 1149 else if (pstr.startsWith("`")) ctag = "`"; 1150 else return false; 1151 pstr = pstr[ctag.length .. $]; 1152 1153 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1154 if (cidx < 1) return false; 1155 1156 code = pstr[0 .. cidx]; 1157 str = pstr[cidx+ctag.length .. $]; 1158 return true; 1159 } 1160 1161 private bool parseLink(ref string str, ref Link dst, scope const(LinkRef[string]) linkrefs, scope Attribute[]* attributes) 1162 pure @safe { 1163 string pstr = str; 1164 if (pstr.length < 3) return false; 1165 // ignore img-link prefix 1166 if (pstr[0] == '!') pstr = pstr[1 .. $]; 1167 1168 // parse the text part [text] 1169 if (pstr[0] != '[') return false; 1170 auto cidx = pstr.matchBracket(); 1171 if (cidx < 1) return false; 1172 string refid; 1173 dst.text = pstr[1 .. cidx]; 1174 pstr = pstr[cidx+1 .. $]; 1175 1176 // parse either (link '['"title"']') or '[' ']'[refid] 1177 if (pstr.length < 2) return false; 1178 if (pstr[0] == '(') { 1179 cidx = pstr.matchBracket(); 1180 if (cidx < 1) return false; 1181 auto inner = pstr[1 .. cidx]; 1182 immutable qidx = inner.indexOf('"'); 1183 import std.ascii : isWhite; 1184 if (qidx > 1 && inner[qidx - 1].isWhite()) { 1185 dst.url = inner[0 .. qidx].stripRight(); 1186 immutable len = inner[qidx .. $].lastIndexOf('"'); 1187 if (len == 0) return false; 1188 assert (len > 0); 1189 dst.title = inner[qidx + 1 .. qidx + len]; 1190 } else { 1191 dst.url = inner.stripRight(); 1192 dst.title = null; 1193 } 1194 if (dst.url.startsWith("<") && dst.url.endsWith(">")) 1195 dst.url = dst.url[1 .. $-1]; 1196 pstr = pstr[cidx+1 .. $]; 1197 1198 if (attributes) { 1199 if (pstr.startsWith('{')) { 1200 auto idx = pstr.indexOf('}'); 1201 if (idx > 0) { 1202 parseAttributeString(pstr[1 .. idx], *attributes); 1203 pstr = pstr[idx+1 .. $]; 1204 } 1205 } 1206 } 1207 } else { 1208 if (pstr[0] == ' ') pstr = pstr[1 .. $]; 1209 if (pstr[0] != '[') return false; 1210 pstr = pstr[1 .. $]; 1211 cidx = pstr.indexOf(']'); 1212 if (cidx < 0) return false; 1213 if (cidx == 0) refid = dst.text; 1214 else refid = pstr[0 .. cidx]; 1215 pstr = pstr[cidx+1 .. $]; 1216 } 1217 1218 if (refid.length > 0) { 1219 auto pr = toLower(refid) in linkrefs; 1220 if (!pr) { 1221 debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid); 1222 return false; 1223 } 1224 dst.url = pr.url; 1225 dst.title = pr.title; 1226 if (attributes) *attributes ~= pr.attributes; 1227 } 1228 1229 str = pstr; 1230 return true; 1231 } 1232 1233 @safe unittest 1234 { 1235 static void testLink(string s, Link exp, in LinkRef[string] refs) 1236 { 1237 Link link; 1238 assert (parseLink(s, link, refs, null), s); 1239 assert (link == exp); 1240 } 1241 LinkRef[string] refs; 1242 refs["ref"] = LinkRef("ref", "target", "title"); 1243 1244 testLink(`[link](target)`, Link("link", "target"), null); 1245 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1246 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1247 testLink(`[link](target "title" )`, Link("link", "target", "title"), null); 1248 1249 testLink(`[link](target)`, Link("link", "target"), null); 1250 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1251 1252 testLink(`[link][ref]`, Link("link", "target", "title"), refs); 1253 testLink(`[ref][]`, Link("ref", "target", "title"), refs); 1254 1255 testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); 1256 testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); 1257 1258 testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); 1259 testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); 1260 1261 testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); 1262 testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); 1263 1264 testLink(`[link](target "")`, Link("link", "target", ""), null); 1265 testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); 1266 1267 testLink(`[link](<target>)`, Link("link", "target"), null); 1268 1269 auto failing = [ 1270 `text`, `[link](target`, `[link]target)`, `[link]`, 1271 `[link(target)`, `link](target)`, `[link] (target)`, 1272 `[link][noref]`, `[noref][]` 1273 ]; 1274 Link link; 1275 foreach (s; failing) 1276 assert (!parseLink(s, link, refs, null), s); 1277 } 1278 1279 @safe unittest { // attributes 1280 void test(string s, LinkRef[string] refs, bool parse_atts, string exprem, Link explnk, Attribute[] expatts...) 1281 @safe { 1282 Link lnk; 1283 Attribute[] atts; 1284 parseLink(s, lnk, refs, parse_atts ? () @trusted { return &atts; } () : null); 1285 assert (lnk == explnk); 1286 assert (s == exprem); 1287 assert (atts == expatts); 1288 } 1289 1290 test("[foo](bar){.baz}", null, false, "{.baz}", Link("foo", "bar", "")); 1291 test("[foo](bar){.baz}", null, true, "", Link("foo", "bar", ""), Attribute("class", "baz")); 1292 1293 auto refs = ["bar": LinkRef("bar", "url", "title", [Attribute("id", "hid")])]; 1294 test("[foo][bar]", refs, false, "", Link("foo", "url", "title")); 1295 test("[foo][bar]", refs, true, "", Link("foo", "url", "title"), Attribute("id", "hid")); 1296 } 1297 1298 private bool parseAutoLink(ref string str, ref string url) 1299 pure @safe { 1300 import std.algorithm.searching : all; 1301 import std.ascii : isAlphaNum; 1302 1303 string pstr = str; 1304 if (pstr.length < 3) return false; 1305 if (pstr[0] != '<') return false; 1306 pstr = pstr[1 .. $]; 1307 auto cidx = pstr.indexOf('>'); 1308 if (cidx < 0) return false; 1309 1310 url = pstr[0 .. cidx]; 1311 if (url.anyOf(" \t")) return false; 1312 auto atidx = url.indexOf('@'); 1313 auto colonidx = url.indexOf(':'); 1314 if (atidx < 0 && colonidx < 0) return false; 1315 1316 str = pstr[cidx+1 .. $]; 1317 if (atidx < 0) return true; 1318 if (colonidx < 0 || colonidx > atidx || 1319 !url[0 .. colonidx].all!(ch => ch.isAlphaNum)) 1320 url = "mailto:" ~ url; 1321 return true; 1322 } 1323 1324 unittest { 1325 void test(bool expected, string str, string url) 1326 { 1327 string strcpy = str; 1328 string outurl; 1329 if (!expected) { 1330 assert (!parseAutoLink(strcpy, outurl)); 1331 assert (outurl.length == 0); 1332 assert (strcpy == str); 1333 } else { 1334 assert (parseAutoLink(strcpy, outurl)); 1335 assert (outurl == url); 1336 assert (strcpy.length == 0); 1337 } 1338 } 1339 1340 test(true, "<http://foo/>", "http://foo/"); 1341 test(false, "<http://foo/", null); 1342 test(true, "<mailto:foo@bar>", "mailto:foo@bar"); 1343 test(true, "<foo@bar>", "mailto:foo@bar"); 1344 test(true, "<proto:foo@bar>", "proto:foo@bar"); 1345 test(true, "<proto:foo@bar:123>", "proto:foo@bar:123"); 1346 test(true, "<\"foo:bar\"@baz>", "mailto:\"foo:bar\"@baz"); 1347 } 1348 1349 private string skipAttributes(ref string line) 1350 @safe pure { 1351 auto strs = line.stripRight; 1352 if (!strs.endsWith("}")) return null; 1353 1354 auto idx = strs.lastIndexOf('{'); 1355 if (idx < 0) return null; 1356 1357 auto ret = strs[idx+1 .. $-1]; 1358 line = strs[0 .. idx]; 1359 return ret; 1360 } 1361 1362 unittest { 1363 void test(string inp, string outp, string att) 1364 { 1365 auto ratt = skipAttributes(inp); 1366 assert (ratt == att, ratt); 1367 assert (inp == outp, inp); 1368 } 1369 1370 test(" foo ", " foo ", null); 1371 test("foo {bar}", "foo ", "bar"); 1372 test("foo {bar} ", "foo ", "bar"); 1373 test("foo bar} ", "foo bar} ", null); 1374 test(" {bar} foo ", " {bar} foo ", null); 1375 test(" fo {o {bar} ", " fo {o ", "bar"); 1376 test(" fo {o} {bar} ", " fo {o} ", "bar"); 1377 } 1378 1379 private void parseAttributeString(string attributes, ref Attribute[] dst) 1380 @safe pure { 1381 import std.algorithm.iteration : splitter; 1382 1383 // TODO: handle custom attributes (requires a different approach than splitter) 1384 1385 foreach (el; attributes.splitter(' ')) { 1386 el = el.strip; 1387 if (!el.length) continue; 1388 if (el[0] == '#') { 1389 auto idx = dst.countUntil!(a => a.attribute == "id"); 1390 if (idx >= 0) dst[idx].value = el[1 .. $]; 1391 else dst ~= Attribute("id", el[1 .. $]); 1392 } else if (el[0] == '.') { 1393 auto idx = dst.countUntil!(a => a.attribute == "class"); 1394 if (idx >= 0) dst[idx].value ~= " " ~ el[1 .. $]; 1395 else dst ~= Attribute("class", el[1 .. $]); 1396 } 1397 } 1398 } 1399 1400 unittest { 1401 void test(string str, Attribute[] atts...) 1402 { 1403 Attribute[] res; 1404 parseAttributeString(str, res); 1405 assert (res == atts, format("%s: %s", str, res)); 1406 } 1407 1408 test(""); 1409 test(".foo", Attribute("class", "foo")); 1410 test("#foo", Attribute("id", "foo")); 1411 test("#foo #bar", Attribute("id", "bar")); 1412 test(".foo .bar", Attribute("class", "foo bar")); 1413 test("#foo #bar", Attribute("id", "bar")); 1414 test(".foo #bar .baz", Attribute("class", "foo baz"), Attribute("id", "bar")); 1415 } 1416 1417 private LinkRef[string] scanForReferences(ref string[] lines) 1418 pure @safe { 1419 LinkRef[string] ret; 1420 bool[size_t] reflines; 1421 1422 // search for reference definitions: 1423 // [refid] link "opt text" 1424 // [refid] <link> "opt text" 1425 // "opt text", 'opt text', (opt text) 1426 // line must not be indented 1427 foreach (lnidx, ln; lines) { 1428 if (isLineIndented(ln)) continue; 1429 ln = strip(ln); 1430 if (!ln.startsWith("[")) continue; 1431 ln = ln[1 .. $]; 1432 1433 auto idx = () @trusted { return ln.indexOf("]:"); }(); 1434 if (idx < 0) continue; 1435 string refid = ln[0 .. idx]; 1436 ln = stripLeft(ln[idx+2 .. $]); 1437 1438 string attstr = ln.skipAttributes(); 1439 1440 string url; 1441 if (ln.startsWith("<")) { 1442 idx = ln.indexOf('>'); 1443 if (idx < 0) continue; 1444 url = ln[1 .. idx]; 1445 ln = ln[idx+1 .. $]; 1446 } else { 1447 idx = ln.indexOf(' '); 1448 if (idx > 0) { 1449 url = ln[0 .. idx]; 1450 ln = ln[idx+1 .. $]; 1451 } else { 1452 idx = ln.indexOf('\t'); 1453 if (idx < 0) { 1454 url = ln; 1455 ln = ln[$ .. $]; 1456 } else { 1457 url = ln[0 .. idx]; 1458 ln = ln[idx+1 .. $]; 1459 } 1460 } 1461 } 1462 ln = stripLeft(ln); 1463 1464 string title; 1465 if (ln.length >= 3) { 1466 if (ln[0] == '(' && ln[$-1] == ')' 1467 || ln[0] == '\"' && ln[$-1] == '\"' 1468 || ln[0] == '\'' && ln[$-1] == '\'' ) 1469 { 1470 title = ln[1 .. $-1]; 1471 } 1472 } 1473 1474 LinkRef lref; 1475 lref.id = refid; 1476 lref.url = url; 1477 lref.title = title; 1478 parseAttributeString(attstr, lref.attributes); 1479 ret[toLower(refid)] = lref; 1480 reflines[lnidx] = true; 1481 1482 debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1); 1483 } 1484 1485 // remove all lines containing references 1486 auto nonreflines = appender!(string[])(); 1487 nonreflines.reserve(lines.length); 1488 foreach (i, ln; lines) 1489 if (i !in reflines) 1490 nonreflines.put(ln); 1491 lines = nonreflines.data(); 1492 1493 return ret; 1494 } 1495 1496 1497 /** 1498 Generates an identifier suitable to use as within a URL. 1499 1500 The resulting string will contain only ASCII lower case alphabetic or 1501 numeric characters, as well as dashes (-). Every sequence of 1502 non-alphanumeric characters will be replaced by a single dash. No dashes 1503 will be at either the front or the back of the result string. 1504 */ 1505 auto asSlug(R)(R text) 1506 if (isInputRange!R && is(typeof(R.init.front) == dchar)) 1507 { 1508 static struct SlugRange { 1509 private { 1510 R _input; 1511 bool _dash; 1512 } 1513 1514 this(R input) 1515 { 1516 _input = input; 1517 skipNonAlphaNum(); 1518 } 1519 1520 @property bool empty() const { return _dash ? false : _input.empty; } 1521 @property char front() const { 1522 if (_dash) return '-'; 1523 1524 char r = cast(char)_input.front; 1525 if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A')); 1526 return r; 1527 } 1528 1529 void popFront() 1530 { 1531 if (_dash) { 1532 _dash = false; 1533 return; 1534 } 1535 1536 _input.popFront(); 1537 auto na = skipNonAlphaNum(); 1538 if (na && !_input.empty) 1539 _dash = true; 1540 } 1541 1542 private bool skipNonAlphaNum() 1543 { 1544 bool have_skipped = false; 1545 while (!_input.empty) { 1546 switch (_input.front) { 1547 default: 1548 _input.popFront(); 1549 have_skipped = true; 1550 break; 1551 case 'a': .. case 'z': 1552 case 'A': .. case 'Z': 1553 case '0': .. case '9': 1554 return have_skipped; 1555 } 1556 } 1557 return have_skipped; 1558 } 1559 } 1560 return SlugRange(text); 1561 } 1562 1563 unittest { 1564 import std.algorithm : equal; 1565 assert ("".asSlug.equal("")); 1566 assert (".,-".asSlug.equal("")); 1567 assert ("abc".asSlug.equal("abc")); 1568 assert ("aBc123".asSlug.equal("abc123")); 1569 assert ("....aBc...123...".asSlug.equal("abc-123")); 1570 } 1571 1572 private struct LinkRef { 1573 string id; 1574 string url; 1575 string title; 1576 Attribute[] attributes; 1577 } 1578 1579 private struct Link { 1580 string text; 1581 string url; 1582 string title; 1583 } 1584 1585 @safe unittest { // alt and title attributes 1586 assert (filterMarkdown("![alt](http://example.org/image)") 1587 == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n"); 1588 assert (filterMarkdown("![alt](http://example.org/image \"Title\")") 1589 == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n"); 1590 } 1591 1592 @safe unittest { // complex links 1593 assert (filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and") 1594 == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n"); 1595 assert (filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)") 1596 == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n"); 1597 } 1598 1599 @safe unittest { // check CTFE-ability 1600 enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar"); 1601 assert (res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res); 1602 } 1603 1604 @safe unittest { // correct line breaks in restrictive mode 1605 auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault); 1606 assert (res == "<p>hello<br/>world\n</p>\n", res); 1607 } 1608 1609 /*@safe unittest { // code blocks and blockquotes 1610 assert (filterMarkdown("\tthis\n\tis\n\tcode") == 1611 "<pre><code>this\nis\ncode</code></pre>\n"); 1612 assert (filterMarkdown(" this\n is\n code") == 1613 "<pre><code>this\nis\ncode</code></pre>\n"); 1614 assert (filterMarkdown(" this\n is\n\tcode") == 1615 "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n"); 1616 assert (filterMarkdown("\tthis\n\n\tcode") == 1617 "<pre><code>this\n\ncode</code></pre>\n"); 1618 assert (filterMarkdown("\t> this") == 1619 "<pre><code>> this</code></pre>\n"); 1620 assert (filterMarkdown("> this") == 1621 "<blockquote><pre><code>this</code></pre></blockquote>\n"); 1622 assert (filterMarkdown("> this\n is code") == 1623 "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n"); 1624 }*/ 1625 1626 @safe unittest { 1627 assert (filterMarkdown("## Hello, World!") == "<h2 id=\"hello-world\"> Hello, World!</h2>\n", filterMarkdown("## Hello, World!")); 1628 } 1629 1630 @safe unittest { // tables 1631 assert (filterMarkdown("foo|bar\n---|---", MarkdownFlags.tables) 1632 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n</table>\n"); 1633 assert (filterMarkdown(" *foo* | bar \n---|---\n baz|bam", MarkdownFlags.tables) 1634 == "<table>\n<tr><th><em>foo</em></th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1635 assert (filterMarkdown("|foo|bar|\n---|---\n baz|bam", MarkdownFlags.tables) 1636 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1637 assert (filterMarkdown("foo|bar\n|---|---|\nbaz|bam", MarkdownFlags.tables) 1638 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1639 assert (filterMarkdown("foo|bar\n---|---\n|baz|bam|", MarkdownFlags.tables) 1640 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1641 assert (filterMarkdown("foo|bar|baz\n:---|---:|:---:\n|baz|bam|bap|", MarkdownFlags.tables) 1642 == "<table>\n<tr><th align=\"left\">foo</th><th align=\"right\">bar</th><th align=\"center\">baz</th></tr>\n" 1643 ~ "<tr><td align=\"left\">baz</td><td align=\"right\">bam</td><td align=\"center\">bap</td></tr>\n</table>\n"); 1644 assert (filterMarkdown(" |bar\n---|---", MarkdownFlags.tables) 1645 == "<table>\n<tr><th></th><th>bar</th></tr>\n</table>\n"); 1646 assert (filterMarkdown("foo|bar\n---|---\nbaz|", MarkdownFlags.tables) 1647 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td></tr>\n</table>\n"); 1648 } 1649 1650 @safe unittest { // issue #1527 - blank lines in code blocks 1651 assert (filterMarkdown(" foo\n\n bar\n") == 1652 "<pre class=\"prettyprint\"><code>foo\n\nbar\n</code></pre>\n"); 1653 } 1654 1655 @safe unittest { 1656 assert (filterMarkdown("> ```\r\n> test\r\n> ```", MarkdownFlags.forumDefault) == 1657 "<blockquote><pre class=\"prettyprint\"><code>test\n</code></pre>\n</blockquote>\n"); 1658 } 1659 1660 @safe unittest { // issue #1845 - malicious URI targets 1661 assert (filterMarkdown("[foo](javascript:foo) ![bar](javascript:bar) <javascript:baz>", MarkdownFlags.forumDefault) == 1662 "<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"bar\"> <a href=\"#\">javascript:baz</a>\n</p>\n"); 1663 assert (filterMarkdown("[foo][foo] ![foo][foo]\n[foo]: javascript:foo", MarkdownFlags.forumDefault) == 1664 "<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"foo\">\n</p>\n"); 1665 assert (filterMarkdown("[foo](javascript%3Abar)", MarkdownFlags.forumDefault) == 1666 "<p><a href=\"javascript%3Abar\">foo</a>\n</p>\n"); 1667 1668 // extra XSS regression tests 1669 assert (filterMarkdown("[<script></script>](bar)", MarkdownFlags.forumDefault) == 1670 "<p><a href=\"bar\"><script></script></a>\n</p>\n"); 1671 assert (filterMarkdown("[foo](\"><script></script><span foo=\")", MarkdownFlags.forumDefault) == 1672 "<p><a href=\""><script></script><span foo="\">foo</a>\n</p>\n"); 1673 assert (filterMarkdown("[foo](javascript:bar)", MarkdownFlags.forumDefault) == 1674 "<p><a href=\"javascript&#58;bar\">foo</a>\n</p>\n"); 1675 } 1676 1677 @safe unittest { // issue #2132 - table with more columns in body goes out of array bounds 1678 assert (filterMarkdown("| a | b |\n|--------|--------|\n| c | d | e |", MarkdownFlags.tables) == 1679 "<table>\n<tr><th>a</th><th>b</th></tr>\n<tr><td>c</td><td>d</td><td>e</td></tr>\n</table>\n"); 1680 } 1681 1682 @safe unittest { // lists 1683 assert (filterMarkdown("- foo\n- bar") == 1684 "<ul>\n<li>foo\n</li>\n<li>bar\n</li>\n</ul>\n"); 1685 assert (filterMarkdown("- foo\n\n- bar") == 1686 "<ul>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ul>\n"); 1687 assert (filterMarkdown("1. foo\n2. bar") == 1688 "<ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n"); 1689 assert (filterMarkdown("1. foo\n\n2. bar") == 1690 "<ol>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ol>\n"); 1691 assert (filterMarkdown("1. foo\n\n\tbar\n\n2. bar\n\n\tbaz\n\n") == 1692 "<ol>\n<li><p>foo\n</p>\n<p>bar\n</p>\n</li>\n<li><p>bar\n</p>\n<p>baz\n</p>\n</li>\n</ol>\n"); 1693 } 1694 1695 @safe unittest { // figures 1696 assert (filterMarkdown("- %%%") == "<ul>\n<li>%%%\n</li>\n</ul>\n"); 1697 assert (filterMarkdown("- ###") == "<ul>\n<li>###\n</li>\n</ul>\n"); 1698 assert (filterMarkdown("- %%%", MarkdownFlags.figures) == "<figure></figure>\n"); 1699 assert (filterMarkdown("- ###", MarkdownFlags.figures) == "<figcaption></figcaption>\n"); 1700 assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar", MarkdownFlags.figures) == 1701 "<figure>foo\n<figcaption>bar\n</figcaption>\n</figure>\n"); 1702 assert (filterMarkdown("- %%%\n\tfoo\n\n\tbar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == 1703 "<figure><p>foo\n</p>\n<p>bar\n</p>\n<figcaption>baz\n</figcaption>\n</figure>\n"); 1704 assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar\n\n\t\tbaz", MarkdownFlags.figures) == 1705 "<figure>foo\n<figcaption><p>bar\n</p>\n<p>baz\n</p>\n</figcaption>\n</figure>\n"); 1706 assert (filterMarkdown("- %%%\n\t1. foo\n\t2. bar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == 1707 "<figure><ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n<figcaption>baz\n</figcaption>\n</figure>\n"); 1708 assert (filterMarkdown("- foo\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n"); 1709 assert (filterMarkdown("- foo\n\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n"); 1710 } 1711 1712 @safe unittest { // HTML entities 1713 assert(filterMarkdown(" ") == "<p> \n</p>\n"); 1714 assert(filterMarkdown("* *") == "<p><em> </em>\n</p>\n"); 1715 assert(filterMarkdown("` `") == "<p><code class=\"prettyprint\">&nbsp;</code>\n</p>\n"); 1716 }