1 /** 2 Markdown parser implementation 3 4 Copyright: © 2012-2019 Sönke Ludwig 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module vibe.textfilter.markdown; 9 10 import vibe.core.log; 11 import vibe.textfilter.html; 12 import vibe.utils.string; 13 14 import std.algorithm : canFind, countUntil, min; 15 import std.array; 16 import std.format; 17 import std.range; 18 import std.string; 19 20 /* 21 TODO: 22 detect inline HTML tags 23 */ 24 25 version(MarkdownTest) 26 { 27 int main() 28 { 29 import std.file; 30 setLogLevel(LogLevel.Trace); 31 auto text = readText("test.txt"); 32 auto result = appender!string(); 33 filterMarkdown(result, text); 34 foreach( ln; splitLines(result.data) ) 35 logInfo(ln); 36 return 0; 37 } 38 } 39 40 /** Returns a Markdown filtered HTML string. 41 */ 42 string filterMarkdown()(string str, MarkdownFlags flags) 43 @trusted { // scope class is not @safe for DMD 2.072 44 scope settings = new MarkdownSettings; 45 settings.flags = flags; 46 return filterMarkdown(str, settings); 47 } 48 /// ditto 49 string filterMarkdown()(string str, scope MarkdownSettings settings = null) 50 @trusted { // Appender not @safe as of 2.065 51 auto dst = appender!string(); 52 filterMarkdown(dst, str, settings); 53 return dst.data; 54 } 55 56 57 /** Markdown filters the given string and writes the corresponding HTML to an output range. 58 */ 59 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags) 60 { 61 scope settings = new MarkdownSettings; 62 settings.flags = flags; 63 filterMarkdown(dst, src, settings); 64 } 65 /// ditto 66 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null) 67 { 68 if (!settings) settings = new MarkdownSettings; 69 70 auto all_lines = splitLines(src); 71 auto links = scanForReferences(all_lines); 72 auto lines = parseLines(all_lines, settings); 73 Block root_block; 74 parseBlocks(root_block, lines, null, settings); 75 writeBlock(dst, root_block, links, settings); 76 } 77 78 /** 79 Returns the hierarchy of sections 80 */ 81 Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null) 82 { 83 import std.conv : to; 84 85 if (!settings) settings = new MarkdownSettings; 86 auto all_lines = splitLines(markdown_source); 87 auto lines = parseLines(all_lines, settings); 88 Block root_block; 89 parseBlocks(root_block, lines, null, settings); 90 Section root; 91 92 foreach (ref sb; root_block.blocks) { 93 if (sb.type == BlockType.header) { 94 auto s = &root; 95 while (true) { 96 if (s.subSections.length == 0) break; 97 if (s.subSections[$-1].headingLevel >= sb.headerLevel) break; 98 s = &s.subSections[$-1]; 99 } 100 s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string); 101 } 102 } 103 104 return root.subSections; 105 } 106 107 /// 108 unittest { 109 import std.conv : to; 110 assert (getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") == 111 [ 112 Section(2, " first", "first"), 113 Section(2, " second", "second", [ 114 Section(3, " third", "third") 115 ]), 116 Section(1, " fourth", "fourth", [ 117 Section(3, " fifth", "fifth") 118 ]) 119 ] 120 ); 121 } 122 123 final class MarkdownSettings { 124 /// Controls the capabilities of the parser. 125 MarkdownFlags flags = MarkdownFlags.vanillaMarkdown; 126 127 /// Heading tags will start at this level. 128 size_t headingBaseLevel = 1; 129 130 /// Called for every link/image URL to perform arbitrary transformations. 131 string delegate(string url_or_path, bool is_image) urlFilter; 132 133 /// White list of URI schemas that can occur in link/image targets 134 string[] allowedURISchemas = ["http", "https", "ftp", "mailto"]; 135 } 136 137 enum MarkdownFlags { 138 /** Same as `vanillaMarkdown` 139 */ 140 none = 0, 141 142 /** Convert line breaks into hard line breaks in the output 143 144 This option is useful when operating on text that may be formatted as 145 plain text, without having Markdown in mind, while still improving 146 the appearance of the text in many cases. A common example would be 147 to format e-mails or newsgroup posts. 148 */ 149 keepLineBreaks = 1<<0, 150 151 /** Support fenced code blocks. 152 */ 153 backtickCodeBlocks = 1<<1, 154 155 /** Disable support for embedded HTML 156 */ 157 noInlineHtml = 1<<2, 158 //noLinks = 1<<3, 159 //allowUnsafeHtml = 1<<4, 160 161 /** Support table definitions 162 163 The syntax is based on Markdown Extra and GitHub flavored Markdown. 164 */ 165 tables = 1<<5, 166 167 /** Support HTML attributes after links 168 169 Links or images directly followed by `{ … }` allow regular HTML 170 attributes to added to the generated HTML element. 171 */ 172 attributes = 1<<6, 173 174 /** Recognize figure definitions 175 176 Figures can be defined using a modified list syntax: 177 178 ``` 179 - %%% 180 This is the figure content 181 182 - ### 183 This is optional caption content 184 ``` 185 186 Just like for lists, arbitrary blocks can be nested within figure and 187 figure caption blocks. If only a single paragraph is present within a 188 figure caption block, the paragraph text will be emitted without the 189 surrounding `<p>` tags. The same is true for figure blocks that contain 190 only a single paragraph and any number of additional figure caption 191 blocks. 192 */ 193 figures = 1<<7, 194 195 /** Support only standard Markdown features 196 197 Note that the parser is not fully CommonMark compliant at the moment, 198 but this is the general idea behind this option. 199 */ 200 vanillaMarkdown = none, 201 202 /** Default set of flags suitable for use within an online forum 203 */ 204 forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml|tables 205 } 206 207 struct Section { 208 size_t headingLevel; 209 string caption; 210 string anchor; 211 Section[] subSections; 212 } 213 214 private { 215 immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"]; 216 } 217 218 private enum IndentType { 219 white, 220 quote 221 } 222 223 private enum LineType { 224 undefined, 225 blank, 226 plain, 227 hline, 228 atxHeader, 229 setextHeader, 230 tableSeparator, 231 uList, 232 oList, 233 figure, 234 figureCaption, 235 htmlBlock, 236 codeBlockDelimiter 237 } 238 239 private struct Line { 240 LineType type; 241 IndentType[] indent; 242 string text; 243 string unindented; 244 245 string unindent(size_t n) 246 pure @safe { 247 assert (n <= indent.length); 248 string ln = text; 249 foreach (i; 0 .. n) { 250 final switch(indent[i]){ 251 case IndentType.white: 252 if (ln[0] == ' ') ln = ln[4 .. $]; 253 else ln = ln[1 .. $]; 254 break; 255 case IndentType.quote: 256 ln = ln.stripLeft()[1 .. $]; 257 if (ln.startsWith(' ')) 258 ln.popFront(); 259 break; 260 } 261 } 262 return ln; 263 } 264 } 265 266 private Line[] parseLines(string[] lines, scope MarkdownSettings settings) 267 pure @safe { 268 Line[] ret; 269 while( !lines.empty ){ 270 auto ln = lines.front; 271 lines.popFront(); 272 273 Line lninfo; 274 lninfo.text = ln; 275 276 while (ln.length > 0) { 277 if (ln[0] == '\t') { 278 lninfo.indent ~= IndentType.white; 279 ln.popFront(); 280 } else if (ln.startsWith(" ")) { 281 lninfo.indent ~= IndentType.white; 282 ln.popFrontN(4); 283 } else { 284 if (ln.stripLeft().startsWith(">")) { 285 lninfo.indent ~= IndentType.quote; 286 ln = ln.stripLeft(); 287 ln.popFront(); 288 if (ln.startsWith(' ')) 289 ln.popFront(); 290 } else break; 291 } 292 } 293 lninfo.unindented = ln; 294 295 if ((settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln)) 296 lninfo.type = LineType.codeBlockDelimiter; 297 else if(isAtxHeaderLine(ln)) lninfo.type = LineType.atxHeader; 298 else if(isSetextHeaderLine(ln)) lninfo.type = LineType.setextHeader; 299 else if((settings.flags & MarkdownFlags.tables) && isTableSeparatorLine(ln)) 300 lninfo.type = LineType.tableSeparator; 301 else if(isHlineLine(ln)) lninfo.type = LineType.hline; 302 else if(isOListLine(ln)) lninfo.type = LineType.oList; 303 else if(isUListLine(ln)) { 304 if (settings.flags & MarkdownFlags.figures) { 305 auto suff = removeListPrefix(ln, LineType.uList); 306 if (suff == "%%%") lninfo.type = LineType.figure; 307 else if (suff == "###") lninfo.type = LineType.figureCaption; 308 else lninfo.type = LineType.uList; 309 } else lninfo.type = LineType.uList; 310 } else if(isLineBlank(ln)) lninfo.type = LineType.blank; 311 else if(!(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln)) 312 lninfo.type = LineType.htmlBlock; 313 else lninfo.type = LineType.plain; 314 315 ret ~= lninfo; 316 } 317 return ret; 318 } 319 320 unittest { 321 import std.conv : to; 322 auto s = new MarkdownSettings; 323 s.flags = MarkdownFlags.forumDefault; 324 auto lns = [">```D"]; 325 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); 326 lns = ["> ```D"]; 327 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]); 328 lns = ["> ```D"]; 329 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], " ```D")]); 330 lns = ["> ```D"]; 331 assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote, IndentType.white], lns[0], "```D")]); 332 lns = [">test"]; 333 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); 334 lns = ["> test"]; 335 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]); 336 lns = ["> test"]; 337 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], " test")]); 338 lns = ["> test"]; 339 assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote, IndentType.white], lns[0], "test")]); 340 } 341 342 private enum BlockType { 343 plain, 344 text, 345 paragraph, 346 header, 347 table, 348 oList, 349 uList, 350 listItem, 351 code, 352 quote, 353 figure, 354 figureCaption 355 } 356 357 private struct Block { 358 BlockType type; 359 Attribute[] attributes; 360 string[] text; 361 Block[] blocks; 362 size_t headerLevel; 363 Alignment[] columns; 364 } 365 366 private struct Attribute { 367 string attribute; 368 string value; 369 } 370 371 private enum Alignment { 372 none = 0, 373 left = 1<<0, 374 right = 1<<1, 375 center = left | right 376 } 377 378 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings) 379 pure @safe { 380 import std.conv : to; 381 import std.algorithm.comparison : among; 382 383 if (base_indent.length == 0) root.type = BlockType.text; 384 else if (base_indent[$-1] == IndentType.quote) root.type = BlockType.quote; 385 386 while (!lines.empty) { 387 auto ln = lines.front; 388 389 if (ln.type == LineType.blank) { 390 lines.popFront(); 391 continue; 392 } 393 394 if (ln.indent != base_indent) { 395 if (ln.indent.length < base_indent.length 396 || ln.indent[0 .. base_indent.length] != base_indent) 397 { 398 return; 399 } 400 401 auto cindent = base_indent ~ IndentType.white; 402 if (ln.indent == cindent) { 403 Block cblock; 404 cblock.type = BlockType.code; 405 while (!lines.empty && (lines.front.unindented.strip.empty 406 || lines.front.indent.length >= cindent.length 407 && lines.front.indent[0 .. cindent.length] == cindent)) 408 { 409 cblock.text ~= lines.front.indent.length >= cindent.length 410 ? lines.front.unindent(cindent.length) : ""; 411 lines.popFront(); 412 } 413 root.blocks ~= cblock; 414 } else { 415 Block subblock; 416 parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings); 417 root.blocks ~= subblock; 418 } 419 } else { 420 Block b; 421 final switch (ln.type) { 422 case LineType.undefined: assert (false); 423 case LineType.blank: assert (false); 424 case LineType.plain: 425 if (lines.length >= 2 && lines[1].type == LineType.setextHeader) { 426 auto setln = lines[1].unindented; 427 b.type = BlockType.header; 428 b.text = [ln.unindented]; 429 if (settings.flags & MarkdownFlags.attributes) 430 parseAttributeString(skipAttributes(b.text[0]), b.attributes); 431 if (!b.attributes.canFind!(a => a.attribute == "id")) 432 b.attributes ~= Attribute("id", asSlug(b.text[0]).to!string); 433 b.headerLevel = setln.strip()[0] == '=' ? 1 : 2; 434 lines.popFrontN(2); 435 } else if (lines.length >= 2 && lines[1].type == LineType.tableSeparator 436 && ln.unindented.indexOf('|') >= 0) 437 { 438 auto setln = lines[1].unindented; 439 b.type = BlockType.table; 440 b.text = [ln.unindented]; 441 foreach (c; getTableColumns(setln)) { 442 Alignment a = Alignment.none; 443 if (c.startsWith(':')) a |= Alignment.left; 444 if (c.endsWith(':')) a |= Alignment.right; 445 b.columns ~= a; 446 } 447 448 lines.popFrontN(2); 449 while (!lines.empty && lines[0].unindented.indexOf('|') >= 0) { 450 b.text ~= lines.front.unindented; 451 lines.popFront(); 452 } 453 } else { 454 b.type = BlockType.paragraph; 455 b.text = skipText(lines, base_indent); 456 } 457 break; 458 case LineType.hline: 459 b.type = BlockType.plain; 460 b.text = ["<hr>"]; 461 lines.popFront(); 462 break; 463 case LineType.atxHeader: 464 b.type = BlockType.header; 465 string hl = ln.unindented; 466 b.headerLevel = 0; 467 while (hl.length > 0 && hl[0] == '#') { 468 b.headerLevel++; 469 hl = hl[1 .. $]; 470 } 471 472 if (settings.flags & MarkdownFlags.attributes) 473 parseAttributeString(skipAttributes(hl), b.attributes); 474 if (!b.attributes.canFind!(a => a.attribute == "id")) 475 b.attributes ~= Attribute("id", asSlug(hl).to!string); 476 477 while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' ')) 478 hl = hl[0 .. $-1]; 479 b.text = [hl]; 480 lines.popFront(); 481 break; 482 case LineType.setextHeader: 483 lines.popFront(); 484 break; 485 case LineType.tableSeparator: 486 lines.popFront(); 487 break; 488 case LineType.figure: 489 case LineType.figureCaption: 490 b.type = ln.type == LineType.figure 491 ? BlockType.figure : BlockType.figureCaption; 492 493 auto itemindent = base_indent ~ IndentType.white; 494 lines.popFront(); 495 parseBlocks(b, lines, itemindent, settings); 496 break; 497 case LineType.uList: 498 case LineType.oList: 499 b.type = ln.type == LineType.uList ? BlockType.uList : BlockType.oList; 500 501 auto itemindent = base_indent ~ IndentType.white; 502 bool firstItem = true, paraMode = false; 503 while (!lines.empty && lines.front.type == ln.type 504 && lines.front.indent == base_indent) 505 { 506 Block itm; 507 itm.text = skipText(lines, itemindent); 508 itm.text[0] = removeListPrefix(itm.text[0], ln.type); 509 510 // emit <p>...</p> if there are blank lines between the items 511 if (firstItem && !lines.empty && lines.front.type == LineType.blank) { 512 lines.popFront(); 513 if (!lines.empty && lines.front.type == ln.type) 514 paraMode = true; 515 } 516 firstItem = false; 517 if (paraMode) { 518 Block para; 519 para.type = BlockType.paragraph; 520 para.text = itm.text; 521 itm.blocks ~= para; 522 itm.text = null; 523 } 524 525 parseBlocks(itm, lines, itemindent, settings); 526 itm.type = BlockType.listItem; 527 b.blocks ~= itm; 528 } 529 break; 530 case LineType.htmlBlock: 531 int nestlevel = 0; 532 auto starttag = parseHtmlBlockLine(ln.unindented); 533 if (!starttag.isHtmlBlock || !starttag.open) 534 break; 535 536 b.type = BlockType.plain; 537 while (!lines.empty) { 538 if (lines.front.indent.length < base_indent.length) 539 break; 540 if (lines.front.indent[0 .. base_indent.length] != base_indent) 541 break; 542 543 auto str = lines.front.unindent(base_indent.length); 544 auto taginfo = parseHtmlBlockLine(str); 545 b.text ~= lines.front.unindent(base_indent.length); 546 lines.popFront(); 547 if (taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName) 548 nestlevel += taginfo.open ? 1 : -1; 549 if (nestlevel <= 0) break; 550 } 551 break; 552 case LineType.codeBlockDelimiter: 553 lines.popFront(); // TODO: get language from line 554 b.type = BlockType.code; 555 while (!lines.empty) { 556 if (lines.front.indent.length < base_indent.length) 557 break; 558 if (lines.front.indent[0 .. base_indent.length] != base_indent) 559 break; 560 if (lines.front.type == LineType.codeBlockDelimiter) { 561 lines.popFront(); 562 break; 563 } 564 b.text ~= lines.front.unindent(base_indent.length); 565 lines.popFront(); 566 } 567 break; 568 } 569 root.blocks ~= b; 570 } 571 } 572 } 573 574 575 private string[] skipText(ref Line[] lines, IndentType[] indent) 576 pure @safe { 577 static bool matchesIndent(IndentType[] indent, IndentType[] base_indent) 578 { 579 if (indent.length > base_indent.length) return false; 580 if (indent != base_indent[0 .. indent.length]) return false; 581 sizediff_t qidx = -1; 582 foreach_reverse (i, tp; base_indent) 583 if (tp == IndentType.quote) { 584 qidx = i; 585 break; 586 } 587 if (qidx >= 0) { 588 qidx = base_indent.length-1 - qidx; 589 if( indent.length <= qidx ) return false; 590 } 591 return true; 592 } 593 594 // return value is used in variables that don't get bounds checks on the 595 // first element, so we should return at least one 596 if (lines.empty) 597 return [""]; 598 599 string[] ret; 600 601 while (true) { 602 ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length)); 603 lines.popFront(); 604 605 if (lines.empty || !matchesIndent(lines.front.indent, indent) 606 || lines.front.type != LineType.plain) 607 { 608 return ret; 609 } 610 } 611 } 612 613 /// private 614 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings) 615 { 616 final switch (block.type) { 617 case BlockType.plain: 618 foreach (ln; block.text) { 619 put(dst, ln); 620 put(dst, "\n"); 621 } 622 foreach (b; block.blocks) 623 writeBlock(dst, b, links, settings); 624 break; 625 case BlockType.text: 626 writeMarkdownEscaped(dst, block, links, settings); 627 foreach (b; block.blocks) 628 writeBlock(dst, b, links, settings); 629 break; 630 case BlockType.paragraph: 631 assert (block.blocks.length == 0); 632 put(dst, "<p>"); 633 writeMarkdownEscaped(dst, block, links, settings); 634 put(dst, "</p>\n"); 635 break; 636 case BlockType.header: 637 assert (block.blocks.length == 0); 638 assert (block.text.length == 1); 639 auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0); 640 dst.writeTag(block.attributes, "h", hlvl); 641 writeMarkdownEscaped(dst, block.text[0], links, settings); 642 dst.formattedWrite("</h%s>\n", hlvl); 643 break; 644 case BlockType.table: 645 import std.algorithm.iteration : splitter; 646 647 static string[Alignment.max+1] alstr = ["", " align=\"left\"", " align=\"right\"", " align=\"center\""]; 648 649 put(dst, "<table>\n"); 650 put(dst, "<tr>"); 651 size_t i = 0; 652 foreach (col; block.text[0].getTableColumns()) { 653 put(dst, "<th"); 654 put(dst, alstr[block.columns[i]]); 655 put(dst, '>'); 656 dst.writeMarkdownEscaped(col, links, settings); 657 put(dst, "</th>"); 658 if (i + 1 < block.columns.length) 659 i++; 660 } 661 put(dst, "</tr>\n"); 662 foreach (ln; block.text[1 .. $]) { 663 put(dst, "<tr>"); 664 i = 0; 665 foreach (col; ln.getTableColumns()) { 666 put(dst, "<td"); 667 put(dst, alstr[block.columns[i]]); 668 put(dst, '>'); 669 dst.writeMarkdownEscaped(col, links, settings); 670 put(dst, "</td>"); 671 if (i + 1 < block.columns.length) 672 i++; 673 } 674 put(dst, "</tr>\n"); 675 } 676 put(dst, "</table>\n"); 677 break; 678 case BlockType.oList: 679 put(dst, "<ol>\n"); 680 foreach (b; block.blocks) 681 writeBlock(dst, b, links, settings); 682 put(dst, "</ol>\n"); 683 break; 684 case BlockType.uList: 685 put(dst, "<ul>\n"); 686 foreach (b; block.blocks) 687 writeBlock(dst, b, links, settings); 688 put(dst, "</ul>\n"); 689 break; 690 case BlockType.listItem: 691 put(dst, "<li>"); 692 writeMarkdownEscaped(dst, block, links, settings); 693 foreach (b; block.blocks) 694 writeBlock(dst, b, links, settings); 695 put(dst, "</li>\n"); 696 break; 697 case BlockType.code: 698 assert (block.blocks.length == 0); 699 put(dst, "<pre class=\"prettyprint\"><code>"); 700 foreach (ln; block.text) { 701 filterHTMLEscape(dst, ln); 702 put(dst, "\n"); 703 } 704 put(dst, "</code></pre>\n"); 705 break; 706 case BlockType.quote: 707 put(dst, "<blockquote>"); 708 writeMarkdownEscaped(dst, block, links, settings); 709 foreach (b; block.blocks) 710 writeBlock(dst, b, links, settings); 711 put(dst, "</blockquote>\n"); 712 break; 713 case BlockType.figure: 714 put(dst, "<figure>"); 715 bool omit_para = block.blocks.count!(b => b.type != BlockType.figureCaption) == 1; 716 foreach (b; block.blocks) { 717 if (b.type == BlockType.paragraph && omit_para) { 718 writeMarkdownEscaped(dst, b, links, settings); 719 } else writeBlock(dst, b, links, settings); 720 } 721 put(dst, "</figure>\n"); 722 break; 723 case BlockType.figureCaption: 724 put(dst, "<figcaption>"); 725 if (block.blocks.length == 1 && block.blocks[0].type == BlockType.paragraph) { 726 writeMarkdownEscaped(dst, block.blocks[0], links, settings); 727 } else { 728 foreach (b; block.blocks) 729 writeBlock(dst, b, links, settings); 730 } 731 put(dst, "</figcaption>\n"); 732 break; 733 } 734 } 735 736 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings) 737 { 738 auto lines = () @trusted { return cast(string[])block.text; } (); 739 auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n"); 740 writeMarkdownEscaped(dst, text, links, settings); 741 if (lines.length) put(dst, "\n"); 742 } 743 744 /// private 745 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings) 746 { 747 bool isAllowedURI(string lnk) { 748 auto idx = lnk.indexOf('/'); 749 auto cidx = lnk.indexOf(':'); 750 // always allow local URIs 751 if (cidx < 0 || idx >= 0 && cidx > idx) return true; 752 return settings.allowedURISchemas.canFind(lnk[0 .. cidx]); 753 } 754 755 string filterLink(string lnk, bool is_image) { 756 if (isAllowedURI(lnk)) 757 return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk; 758 return "#"; // replace link with unknown schema with dummy URI 759 } 760 761 bool br = ln.endsWith(" "); 762 while (ln.length > 0) { 763 switch (ln[0]) { 764 default: 765 put(dst, ln[0]); 766 ln = ln[1 .. $]; 767 break; 768 case '\\': 769 if (ln.length >= 2) { 770 switch (ln[1]) { 771 default: 772 put(dst, ln[0 .. 2]); 773 ln = ln[2 .. $]; 774 break; 775 case '\'', '`', '*', '_', '{', '}', '[', ']', 776 '(', ')', '#', '+', '-', '.', '!': 777 put(dst, ln[1]); 778 ln = ln[2 .. $]; 779 break; 780 } 781 } else { 782 put(dst, ln[0]); 783 ln = ln[1 .. $]; 784 } 785 break; 786 case '_': 787 case '*': 788 string text; 789 if (auto em = parseEmphasis(ln, text)) { 790 put(dst, em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>"); 791 put(dst, text); 792 put(dst, em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>"); 793 } else { 794 put(dst, ln[0]); 795 ln = ln[1 .. $]; 796 } 797 break; 798 case '`': 799 string code; 800 if (parseInlineCode(ln, code)) { 801 put(dst, "<code class=\"prettyprint\">"); 802 filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal); 803 put(dst, "</code>"); 804 } else { 805 put(dst, ln[0]); 806 ln = ln[1 .. $]; 807 } 808 break; 809 case '[': 810 Link link; 811 Attribute[] attributes; 812 if (parseLink(ln, link, linkrefs, 813 settings.flags & MarkdownFlags.attributes ? &attributes : null)) 814 { 815 attributes ~= Attribute("href", filterLink(link.url, false)); 816 if (link.title.length) 817 attributes ~= Attribute("title", link.title); 818 dst.writeTag(attributes, "a"); 819 writeMarkdownEscaped(dst, link.text, linkrefs, settings); 820 put(dst, "</a>"); 821 } else { 822 put(dst, ln[0]); 823 ln = ln[1 .. $]; 824 } 825 break; 826 case '!': 827 Link link; 828 Attribute[] attributes; 829 if (parseLink(ln, link, linkrefs, 830 settings.flags & MarkdownFlags.attributes ? &attributes : null)) 831 { 832 attributes ~= Attribute("src", filterLink(link.url, true)); 833 attributes ~= Attribute("alt", link.text); 834 if (link.title.length) 835 attributes ~= Attribute("title", link.title); 836 dst.writeTag(attributes, "img"); 837 } else if( ln.length >= 2 ){ 838 put(dst, ln[0 .. 2]); 839 ln = ln[2 .. $]; 840 } else { 841 put(dst, ln[0]); 842 ln = ln[1 .. $]; 843 } 844 break; 845 case '>': 846 if (settings.flags & MarkdownFlags.noInlineHtml) put(dst, ">"); 847 else put(dst, ln[0]); 848 ln = ln[1 .. $]; 849 break; 850 case '<': 851 string url; 852 if (parseAutoLink(ln, url)) { 853 bool is_email = url.startsWith("mailto:"); 854 put(dst, "<a href=\""); 855 if (is_email) filterHTMLAllEscape(dst, url); 856 else filterHTMLAttribEscape(dst, filterLink(url, false)); 857 put(dst, "\">"); 858 if (is_email) filterHTMLAllEscape(dst, url[7 .. $]); 859 else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal); 860 put(dst, "</a>"); 861 } else { 862 if (ln.startsWith("<br>")) { 863 // always support line breaks, since we embed them here ourselves! 864 put(dst, "<br/>"); 865 ln = ln[4 .. $]; 866 } else if(ln.startsWith("<br/>")) { 867 put(dst, "<br/>"); 868 ln = ln[5 .. $]; 869 } else { 870 if (settings.flags & MarkdownFlags.noInlineHtml) 871 put(dst, "<"); 872 else put(dst, ln[0]); 873 ln = ln[1 .. $]; 874 } 875 } 876 break; 877 } 878 } 879 if (br) put(dst, "<br/>"); 880 } 881 882 private void writeTag(R, ARGS...)(ref R dst, string name, ARGS name_additions) 883 { 884 writeTag(dst, cast(Attribute[])null, name, name_additions); 885 } 886 887 private void writeTag(R, ARGS...)(ref R dst, scope const(Attribute)[] attributes, string name, ARGS name_additions) 888 { 889 dst.formattedWrite("<%s", name); 890 foreach (add; name_additions) 891 dst.formattedWrite("%s", add); 892 foreach (a; attributes) { 893 dst.formattedWrite(" %s=\"", a.attribute); 894 dst.filterHTMLAttribEscape(a.value); 895 put(dst, '\"'); 896 } 897 put(dst, '>'); 898 } 899 900 private bool isLineBlank(string ln) 901 pure @safe { 902 return allOf(ln, " \t"); 903 } 904 905 private bool isSetextHeaderLine(string ln) 906 pure @safe { 907 ln = stripLeft(ln); 908 if (ln.length < 1) return false; 909 if (ln[0] == '=') { 910 while (!ln.empty && ln.front == '=') ln.popFront(); 911 return allOf(ln, " \t"); 912 } 913 if (ln[0] == '-') { 914 while (!ln.empty && ln.front == '-') ln.popFront(); 915 return allOf(ln, " \t"); 916 } 917 return false; 918 } 919 920 private bool isAtxHeaderLine(string ln) 921 pure @safe { 922 ln = stripLeft(ln); 923 size_t i = 0; 924 while (i < ln.length && ln[i] == '#') i++; 925 if (i < 1 || i > 6 || i >= ln.length) return false; 926 return ln[i] == ' '; 927 } 928 929 private bool isTableSeparatorLine(string ln) 930 pure @safe { 931 import std.algorithm.iteration : splitter; 932 933 ln = strip(ln); 934 if (ln.startsWith("|")) ln = ln[1 .. $]; 935 if (ln.endsWith("|")) ln = ln[0 .. $-1]; 936 937 auto cols = ln.splitter('|'); 938 size_t cnt = 0; 939 foreach (c; cols) { 940 c = c.strip(); 941 if (c.startsWith(':')) c = c[1 .. $]; 942 if (c.endsWith(':')) c = c[0 .. $-1]; 943 if (c.length < 3 || !c.allOf("-")) 944 return false; 945 cnt++; 946 } 947 return cnt >= 2; 948 } 949 950 unittest { 951 assert(isTableSeparatorLine("|----|---|")); 952 assert(isTableSeparatorLine("|:----:|---|")); 953 assert(isTableSeparatorLine("---|----")); 954 assert(isTableSeparatorLine("| --- | :---- |")); 955 assert(!isTableSeparatorLine("| ---- |")); 956 assert(!isTableSeparatorLine("| -- | -- |")); 957 assert(!isTableSeparatorLine("| --- - | ---- |")); 958 } 959 960 private auto getTableColumns(string line) 961 pure @safe nothrow { 962 import std.algorithm.iteration : map, splitter; 963 964 if (line.startsWith("|")) line = line[1 .. $]; 965 if (line.endsWith("|")) line = line[0 .. $-1]; 966 return line.splitter('|').map!(s => s.strip()); 967 } 968 969 private size_t countTableColumns(string line) 970 pure @safe { 971 return getTableColumns(line).count(); 972 } 973 974 private bool isHlineLine(string ln) 975 pure @safe { 976 if (allOf(ln, " -") && count(ln, '-') >= 3) return true; 977 if (allOf(ln, " *") && count(ln, '*') >= 3) return true; 978 if (allOf(ln, " _") && count(ln, '_') >= 3) return true; 979 return false; 980 } 981 982 private bool isQuoteLine(string ln) 983 pure @safe { 984 return ln.stripLeft().startsWith(">"); 985 } 986 987 private size_t getQuoteLevel(string ln) 988 pure @safe { 989 size_t level = 0; 990 ln = stripLeft(ln); 991 while (ln.length > 0 && ln[0] == '>') { 992 level++; 993 ln = stripLeft(ln[1 .. $]); 994 } 995 return level; 996 } 997 998 private bool isUListLine(string ln) 999 pure @safe { 1000 ln = stripLeft(ln); 1001 if (ln.length < 2) return false; 1002 if (!canFind("*+-", ln[0])) return false; 1003 if (ln[1] != ' ' && ln[1] != '\t') return false; 1004 return true; 1005 } 1006 1007 private bool isOListLine(string ln) 1008 pure @safe { 1009 ln = stripLeft(ln); 1010 if (ln.length < 1) return false; 1011 if (ln[0] < '0' || ln[0] > '9') return false; 1012 ln = ln[1 .. $]; 1013 while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9') 1014 ln = ln[1 .. $]; 1015 if (ln.length < 2) return false; 1016 if (ln[0] != '.') return false; 1017 if (ln[1] != ' ' && ln[1] != '\t') 1018 return false; 1019 return true; 1020 } 1021 1022 private string removeListPrefix(string str, LineType tp) 1023 pure @safe { 1024 switch (tp) { 1025 default: assert (false); 1026 case LineType.oList: // skip bullets and output using normal escaping 1027 auto idx = str.indexOf('.'); 1028 assert (idx > 0); 1029 return str[idx+1 .. $].stripLeft(); 1030 case LineType.uList: 1031 return stripLeft(str.stripLeft()[1 .. $]); 1032 } 1033 } 1034 1035 1036 private auto parseHtmlBlockLine(string ln) 1037 pure @safe { 1038 struct HtmlBlockInfo { 1039 bool isHtmlBlock; 1040 string tagName; 1041 bool open; 1042 } 1043 1044 HtmlBlockInfo ret; 1045 ret.isHtmlBlock = false; 1046 ret.open = true; 1047 1048 ln = strip(ln); 1049 if (ln.length < 3) return ret; 1050 if (ln[0] != '<') return ret; 1051 if (ln[1] == '/') { 1052 ret.open = false; 1053 ln = ln[1 .. $]; 1054 } 1055 import std.ascii : isAlpha; 1056 if (!isAlpha(ln[1])) return ret; 1057 ln = ln[1 .. $]; 1058 size_t idx = 0; 1059 while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>') 1060 idx++; 1061 ret.tagName = ln[0 .. idx]; 1062 ln = ln[idx .. $]; 1063 1064 auto eidx = ln.indexOf('>'); 1065 if (eidx < 0) return ret; 1066 if (eidx != ln.length-1) return ret; 1067 1068 if (!s_blockTags.canFind(ret.tagName)) return ret; 1069 1070 ret.isHtmlBlock = true; 1071 return ret; 1072 } 1073 1074 private bool isHtmlBlockLine(string ln) 1075 pure @safe { 1076 auto bi = parseHtmlBlockLine(ln); 1077 return bi.isHtmlBlock && bi.open; 1078 } 1079 1080 private bool isHtmlBlockCloseLine(string ln) 1081 pure @safe { 1082 auto bi = parseHtmlBlockLine(ln); 1083 return bi.isHtmlBlock && !bi.open; 1084 } 1085 1086 private bool isCodeBlockDelimiter(string ln) 1087 pure @safe { 1088 return ln.stripLeft.startsWith("```"); 1089 } 1090 1091 private string getHtmlTagName(string ln) 1092 pure @safe { 1093 return parseHtmlBlockLine(ln).tagName; 1094 } 1095 1096 private bool isLineIndented(string ln) 1097 pure @safe { 1098 return ln.startsWith("\t") || ln.startsWith(" "); 1099 } 1100 1101 private string unindentLine(string ln) 1102 pure @safe { 1103 if (ln.startsWith("\t")) return ln[1 .. $]; 1104 if (ln.startsWith(" ")) return ln[4 .. $]; 1105 assert (false); 1106 } 1107 1108 private int parseEmphasis(ref string str, ref string text) 1109 pure @safe { 1110 string pstr = str; 1111 if (pstr.length < 3) return false; 1112 1113 string ctag; 1114 if (pstr.startsWith("***")) ctag = "***"; 1115 else if (pstr.startsWith("**")) ctag = "**"; 1116 else if (pstr.startsWith("*")) ctag = "*"; 1117 else if (pstr.startsWith("___")) ctag = "___"; 1118 else if (pstr.startsWith("__")) ctag = "__"; 1119 else if (pstr.startsWith("_")) ctag = "_"; 1120 else return false; 1121 1122 pstr = pstr[ctag.length .. $]; 1123 1124 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1125 if (cidx < 1) return false; 1126 1127 text = pstr[0 .. cidx]; 1128 1129 str = pstr[cidx+ctag.length .. $]; 1130 return cast(int)ctag.length; 1131 } 1132 1133 private bool parseInlineCode(ref string str, ref string code) 1134 pure @safe { 1135 string pstr = str; 1136 if (pstr.length < 3) return false; 1137 string ctag; 1138 if (pstr.startsWith("``")) ctag = "``"; 1139 else if (pstr.startsWith("`")) ctag = "`"; 1140 else return false; 1141 pstr = pstr[ctag.length .. $]; 1142 1143 auto cidx = () @trusted { return pstr.indexOf(ctag); }(); 1144 if (cidx < 1) return false; 1145 1146 code = pstr[0 .. cidx]; 1147 str = pstr[cidx+ctag.length .. $]; 1148 return true; 1149 } 1150 1151 private bool parseLink(ref string str, ref Link dst, scope const(LinkRef[string]) linkrefs, scope Attribute[]* attributes) 1152 pure @safe { 1153 string pstr = str; 1154 if (pstr.length < 3) return false; 1155 // ignore img-link prefix 1156 if (pstr[0] == '!') pstr = pstr[1 .. $]; 1157 1158 // parse the text part [text] 1159 if (pstr[0] != '[') return false; 1160 auto cidx = pstr.matchBracket(); 1161 if (cidx < 1) return false; 1162 string refid; 1163 dst.text = pstr[1 .. cidx]; 1164 pstr = pstr[cidx+1 .. $]; 1165 1166 // parse either (link '['"title"']') or '[' ']'[refid] 1167 if (pstr.length < 2) return false; 1168 if (pstr[0] == '(') { 1169 cidx = pstr.matchBracket(); 1170 if (cidx < 1) return false; 1171 auto inner = pstr[1 .. cidx]; 1172 immutable qidx = inner.indexOf('"'); 1173 import std.ascii : isWhite; 1174 if (qidx > 1 && inner[qidx - 1].isWhite()) { 1175 dst.url = inner[0 .. qidx].stripRight(); 1176 immutable len = inner[qidx .. $].lastIndexOf('"'); 1177 if (len == 0) return false; 1178 assert (len > 0); 1179 dst.title = inner[qidx + 1 .. qidx + len]; 1180 } else { 1181 dst.url = inner.stripRight(); 1182 dst.title = null; 1183 } 1184 if (dst.url.startsWith("<") && dst.url.endsWith(">")) 1185 dst.url = dst.url[1 .. $-1]; 1186 pstr = pstr[cidx+1 .. $]; 1187 1188 if (attributes) { 1189 if (pstr.startsWith('{')) { 1190 auto idx = pstr.indexOf('}'); 1191 if (idx > 0) { 1192 parseAttributeString(pstr[1 .. idx], *attributes); 1193 pstr = pstr[idx+1 .. $]; 1194 } 1195 } 1196 } 1197 } else { 1198 if (pstr[0] == ' ') pstr = pstr[1 .. $]; 1199 if (pstr[0] != '[') return false; 1200 pstr = pstr[1 .. $]; 1201 cidx = pstr.indexOf(']'); 1202 if (cidx < 0) return false; 1203 if (cidx == 0) refid = dst.text; 1204 else refid = pstr[0 .. cidx]; 1205 pstr = pstr[cidx+1 .. $]; 1206 } 1207 1208 if (refid.length > 0) { 1209 auto pr = toLower(refid) in linkrefs; 1210 if (!pr) { 1211 debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid); 1212 return false; 1213 } 1214 dst.url = pr.url; 1215 dst.title = pr.title; 1216 if (attributes) *attributes ~= pr.attributes; 1217 } 1218 1219 str = pstr; 1220 return true; 1221 } 1222 1223 @safe unittest 1224 { 1225 static void testLink(string s, Link exp, in LinkRef[string] refs) 1226 { 1227 Link link; 1228 assert (parseLink(s, link, refs, null), s); 1229 assert (link == exp); 1230 } 1231 LinkRef[string] refs; 1232 refs["ref"] = LinkRef("ref", "target", "title"); 1233 1234 testLink(`[link](target)`, Link("link", "target"), null); 1235 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1236 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1237 testLink(`[link](target "title" )`, Link("link", "target", "title"), null); 1238 1239 testLink(`[link](target)`, Link("link", "target"), null); 1240 testLink(`[link](target "title")`, Link("link", "target", "title"), null); 1241 1242 testLink(`[link][ref]`, Link("link", "target", "title"), refs); 1243 testLink(`[ref][]`, Link("ref", "target", "title"), refs); 1244 1245 testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null); 1246 testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs); 1247 1248 testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null); 1249 testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null); 1250 1251 testLink(`[link](white-space "around title" )`, Link("link", "white-space", "around title"), null); 1252 testLink(`[link](tabs "around title" )`, Link("link", "tabs", "around title"), null); 1253 1254 testLink(`[link](target "")`, Link("link", "target", ""), null); 1255 testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null); 1256 1257 testLink(`[link](<target>)`, Link("link", "target"), null); 1258 1259 auto failing = [ 1260 `text`, `[link](target`, `[link]target)`, `[link]`, 1261 `[link(target)`, `link](target)`, `[link] (target)`, 1262 `[link][noref]`, `[noref][]` 1263 ]; 1264 Link link; 1265 foreach (s; failing) 1266 assert (!parseLink(s, link, refs, null), s); 1267 } 1268 1269 @safe unittest { // attributes 1270 void test(string s, LinkRef[string] refs, bool parse_atts, string exprem, Link explnk, Attribute[] expatts...) 1271 @safe { 1272 Link lnk; 1273 Attribute[] atts; 1274 parseLink(s, lnk, refs, parse_atts ? () @trusted { return &atts; } () : null); 1275 assert (lnk == explnk); 1276 assert (s == exprem); 1277 assert (atts == expatts); 1278 } 1279 1280 test("[foo](bar){.baz}", null, false, "{.baz}", Link("foo", "bar", "")); 1281 test("[foo](bar){.baz}", null, true, "", Link("foo", "bar", ""), Attribute("class", "baz")); 1282 1283 auto refs = ["bar": LinkRef("bar", "url", "title", [Attribute("id", "hid")])]; 1284 test("[foo][bar]", refs, false, "", Link("foo", "url", "title")); 1285 test("[foo][bar]", refs, true, "", Link("foo", "url", "title"), Attribute("id", "hid")); 1286 } 1287 1288 private bool parseAutoLink(ref string str, ref string url) 1289 pure @safe { 1290 import std.algorithm.searching : all; 1291 import std.ascii : isAlphaNum; 1292 1293 string pstr = str; 1294 if (pstr.length < 3) return false; 1295 if (pstr[0] != '<') return false; 1296 pstr = pstr[1 .. $]; 1297 auto cidx = pstr.indexOf('>'); 1298 if (cidx < 0) return false; 1299 1300 url = pstr[0 .. cidx]; 1301 if (url.anyOf(" \t")) return false; 1302 auto atidx = url.indexOf('@'); 1303 auto colonidx = url.indexOf(':'); 1304 if (atidx < 0 && colonidx < 0) return false; 1305 1306 str = pstr[cidx+1 .. $]; 1307 if (atidx < 0) return true; 1308 if (colonidx < 0 || colonidx > atidx || 1309 !url[0 .. colonidx].all!(ch => ch.isAlphaNum)) 1310 url = "mailto:" ~ url; 1311 return true; 1312 } 1313 1314 unittest { 1315 void test(bool expected, string str, string url) 1316 { 1317 string strcpy = str; 1318 string outurl; 1319 if (!expected) { 1320 assert (!parseAutoLink(strcpy, outurl)); 1321 assert (outurl.length == 0); 1322 assert (strcpy == str); 1323 } else { 1324 assert (parseAutoLink(strcpy, outurl)); 1325 assert (outurl == url); 1326 assert (strcpy.length == 0); 1327 } 1328 } 1329 1330 test(true, "<http://foo/>", "http://foo/"); 1331 test(false, "<http://foo/", null); 1332 test(true, "<mailto:foo@bar>", "mailto:foo@bar"); 1333 test(true, "<foo@bar>", "mailto:foo@bar"); 1334 test(true, "<proto:foo@bar>", "proto:foo@bar"); 1335 test(true, "<proto:foo@bar:123>", "proto:foo@bar:123"); 1336 test(true, "<\"foo:bar\"@baz>", "mailto:\"foo:bar\"@baz"); 1337 } 1338 1339 private string skipAttributes(ref string line) 1340 @safe pure { 1341 auto strs = line.stripRight; 1342 if (!strs.endsWith("}")) return null; 1343 1344 auto idx = strs.lastIndexOf('{'); 1345 if (idx < 0) return null; 1346 1347 auto ret = strs[idx+1 .. $-1]; 1348 line = strs[0 .. idx]; 1349 return ret; 1350 } 1351 1352 unittest { 1353 void test(string inp, string outp, string att) 1354 { 1355 auto ratt = skipAttributes(inp); 1356 assert (ratt == att, ratt); 1357 assert (inp == outp, inp); 1358 } 1359 1360 test(" foo ", " foo ", null); 1361 test("foo {bar}", "foo ", "bar"); 1362 test("foo {bar} ", "foo ", "bar"); 1363 test("foo bar} ", "foo bar} ", null); 1364 test(" {bar} foo ", " {bar} foo ", null); 1365 test(" fo {o {bar} ", " fo {o ", "bar"); 1366 test(" fo {o} {bar} ", " fo {o} ", "bar"); 1367 } 1368 1369 private void parseAttributeString(string attributes, ref Attribute[] dst) 1370 @safe pure { 1371 import std.algorithm.iteration : splitter; 1372 1373 // TODO: handle custom attributes (requires a different approach than splitter) 1374 1375 foreach (el; attributes.splitter(' ')) { 1376 el = el.strip; 1377 if (!el.length) continue; 1378 if (el[0] == '#') { 1379 auto idx = dst.countUntil!(a => a.attribute == "id"); 1380 if (idx >= 0) dst[idx].value = el[1 .. $]; 1381 else dst ~= Attribute("id", el[1 .. $]); 1382 } else if (el[0] == '.') { 1383 auto idx = dst.countUntil!(a => a.attribute == "class"); 1384 if (idx >= 0) dst[idx].value ~= " " ~ el[1 .. $]; 1385 else dst ~= Attribute("class", el[1 .. $]); 1386 } 1387 } 1388 } 1389 1390 unittest { 1391 void test(string str, Attribute[] atts...) 1392 { 1393 Attribute[] res; 1394 parseAttributeString(str, res); 1395 assert (res == atts, format("%s: %s", str, res)); 1396 } 1397 1398 test(""); 1399 test(".foo", Attribute("class", "foo")); 1400 test("#foo", Attribute("id", "foo")); 1401 test("#foo #bar", Attribute("id", "bar")); 1402 test(".foo .bar", Attribute("class", "foo bar")); 1403 test("#foo #bar", Attribute("id", "bar")); 1404 test(".foo #bar .baz", Attribute("class", "foo baz"), Attribute("id", "bar")); 1405 } 1406 1407 private LinkRef[string] scanForReferences(ref string[] lines) 1408 pure @safe { 1409 LinkRef[string] ret; 1410 bool[size_t] reflines; 1411 1412 // search for reference definitions: 1413 // [refid] link "opt text" 1414 // [refid] <link> "opt text" 1415 // "opt text", 'opt text', (opt text) 1416 // line must not be indented 1417 foreach (lnidx, ln; lines) { 1418 if (isLineIndented(ln)) continue; 1419 ln = strip(ln); 1420 if (!ln.startsWith("[")) continue; 1421 ln = ln[1 .. $]; 1422 1423 auto idx = () @trusted { return ln.indexOf("]:"); }(); 1424 if (idx < 0) continue; 1425 string refid = ln[0 .. idx]; 1426 ln = stripLeft(ln[idx+2 .. $]); 1427 1428 string attstr = ln.skipAttributes(); 1429 1430 string url; 1431 if (ln.startsWith("<")) { 1432 idx = ln.indexOf('>'); 1433 if (idx < 0) continue; 1434 url = ln[1 .. idx]; 1435 ln = ln[idx+1 .. $]; 1436 } else { 1437 idx = ln.indexOf(' '); 1438 if (idx > 0) { 1439 url = ln[0 .. idx]; 1440 ln = ln[idx+1 .. $]; 1441 } else { 1442 idx = ln.indexOf('\t'); 1443 if (idx < 0) { 1444 url = ln; 1445 ln = ln[$ .. $]; 1446 } else { 1447 url = ln[0 .. idx]; 1448 ln = ln[idx+1 .. $]; 1449 } 1450 } 1451 } 1452 ln = stripLeft(ln); 1453 1454 string title; 1455 if (ln.length >= 3) { 1456 if (ln[0] == '(' && ln[$-1] == ')' 1457 || ln[0] == '\"' && ln[$-1] == '\"' 1458 || ln[0] == '\'' && ln[$-1] == '\'' ) 1459 { 1460 title = ln[1 .. $-1]; 1461 } 1462 } 1463 1464 LinkRef lref; 1465 lref.id = refid; 1466 lref.url = url; 1467 lref.title = title; 1468 parseAttributeString(attstr, lref.attributes); 1469 ret[toLower(refid)] = lref; 1470 reflines[lnidx] = true; 1471 1472 debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1); 1473 } 1474 1475 // remove all lines containing references 1476 auto nonreflines = appender!(string[])(); 1477 nonreflines.reserve(lines.length); 1478 foreach (i, ln; lines) 1479 if (i !in reflines) 1480 nonreflines.put(ln); 1481 lines = nonreflines.data(); 1482 1483 return ret; 1484 } 1485 1486 1487 /** 1488 Generates an identifier suitable to use as within a URL. 1489 1490 The resulting string will contain only ASCII lower case alphabetic or 1491 numeric characters, as well as dashes (-). Every sequence of 1492 non-alphanumeric characters will be replaced by a single dash. No dashes 1493 will be at either the front or the back of the result string. 1494 */ 1495 auto asSlug(R)(R text) 1496 if (isInputRange!R && is(typeof(R.init.front) == dchar)) 1497 { 1498 static struct SlugRange { 1499 private { 1500 R _input; 1501 bool _dash; 1502 } 1503 1504 this(R input) 1505 { 1506 _input = input; 1507 skipNonAlphaNum(); 1508 } 1509 1510 @property bool empty() const { return _dash ? false : _input.empty; } 1511 @property char front() const { 1512 if (_dash) return '-'; 1513 1514 char r = cast(char)_input.front; 1515 if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A')); 1516 return r; 1517 } 1518 1519 void popFront() 1520 { 1521 if (_dash) { 1522 _dash = false; 1523 return; 1524 } 1525 1526 _input.popFront(); 1527 auto na = skipNonAlphaNum(); 1528 if (na && !_input.empty) 1529 _dash = true; 1530 } 1531 1532 private bool skipNonAlphaNum() 1533 { 1534 bool have_skipped = false; 1535 while (!_input.empty) { 1536 switch (_input.front) { 1537 default: 1538 _input.popFront(); 1539 have_skipped = true; 1540 break; 1541 case 'a': .. case 'z': 1542 case 'A': .. case 'Z': 1543 case '0': .. case '9': 1544 return have_skipped; 1545 } 1546 } 1547 return have_skipped; 1548 } 1549 } 1550 return SlugRange(text); 1551 } 1552 1553 unittest { 1554 import std.algorithm : equal; 1555 assert ("".asSlug.equal("")); 1556 assert (".,-".asSlug.equal("")); 1557 assert ("abc".asSlug.equal("abc")); 1558 assert ("aBc123".asSlug.equal("abc123")); 1559 assert ("....aBc...123...".asSlug.equal("abc-123")); 1560 } 1561 1562 private struct LinkRef { 1563 string id; 1564 string url; 1565 string title; 1566 Attribute[] attributes; 1567 } 1568 1569 private struct Link { 1570 string text; 1571 string url; 1572 string title; 1573 } 1574 1575 @safe unittest { // alt and title attributes 1576 assert (filterMarkdown("![alt](http://example.org/image)") 1577 == "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n"); 1578 assert (filterMarkdown("![alt](http://example.org/image \"Title\")") 1579 == "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n"); 1580 } 1581 1582 @safe unittest { // complex links 1583 assert (filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and") 1584 == "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n"); 1585 assert (filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)") 1586 == "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n"); 1587 } 1588 1589 @safe unittest { // check CTFE-ability 1590 enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar"); 1591 assert (res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res); 1592 } 1593 1594 @safe unittest { // correct line breaks in restrictive mode 1595 auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault); 1596 assert (res == "<p>hello<br/>world\n</p>\n", res); 1597 } 1598 1599 /*@safe unittest { // code blocks and blockquotes 1600 assert (filterMarkdown("\tthis\n\tis\n\tcode") == 1601 "<pre><code>this\nis\ncode</code></pre>\n"); 1602 assert (filterMarkdown(" this\n is\n code") == 1603 "<pre><code>this\nis\ncode</code></pre>\n"); 1604 assert (filterMarkdown(" this\n is\n\tcode") == 1605 "<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n"); 1606 assert (filterMarkdown("\tthis\n\n\tcode") == 1607 "<pre><code>this\n\ncode</code></pre>\n"); 1608 assert (filterMarkdown("\t> this") == 1609 "<pre><code>> this</code></pre>\n"); 1610 assert (filterMarkdown("> this") == 1611 "<blockquote><pre><code>this</code></pre></blockquote>\n"); 1612 assert (filterMarkdown("> this\n is code") == 1613 "<blockquote><pre><code>this\nis code</code></pre></blockquote>\n"); 1614 }*/ 1615 1616 @safe unittest { 1617 assert (filterMarkdown("## Hello, World!") == "<h2 id=\"hello-world\"> Hello, World!</h2>\n", filterMarkdown("## Hello, World!")); 1618 } 1619 1620 @safe unittest { // tables 1621 assert (filterMarkdown("foo|bar\n---|---", MarkdownFlags.tables) 1622 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n</table>\n"); 1623 assert (filterMarkdown(" *foo* | bar \n---|---\n baz|bam", MarkdownFlags.tables) 1624 == "<table>\n<tr><th><em>foo</em></th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1625 assert (filterMarkdown("|foo|bar|\n---|---\n baz|bam", MarkdownFlags.tables) 1626 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1627 assert (filterMarkdown("foo|bar\n|---|---|\nbaz|bam", MarkdownFlags.tables) 1628 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1629 assert (filterMarkdown("foo|bar\n---|---\n|baz|bam|", MarkdownFlags.tables) 1630 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n"); 1631 assert (filterMarkdown("foo|bar|baz\n:---|---:|:---:\n|baz|bam|bap|", MarkdownFlags.tables) 1632 == "<table>\n<tr><th align=\"left\">foo</th><th align=\"right\">bar</th><th align=\"center\">baz</th></tr>\n" 1633 ~ "<tr><td align=\"left\">baz</td><td align=\"right\">bam</td><td align=\"center\">bap</td></tr>\n</table>\n"); 1634 assert (filterMarkdown(" |bar\n---|---", MarkdownFlags.tables) 1635 == "<table>\n<tr><th></th><th>bar</th></tr>\n</table>\n"); 1636 assert (filterMarkdown("foo|bar\n---|---\nbaz|", MarkdownFlags.tables) 1637 == "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td></tr>\n</table>\n"); 1638 } 1639 1640 @safe unittest { // issue #1527 - blank lines in code blocks 1641 assert (filterMarkdown(" foo\n\n bar\n") == 1642 "<pre class=\"prettyprint\"><code>foo\n\nbar\n</code></pre>\n"); 1643 } 1644 1645 @safe unittest { 1646 assert (filterMarkdown("> ```\r\n> test\r\n> ```", MarkdownFlags.forumDefault) == 1647 "<blockquote><pre class=\"prettyprint\"><code>test\n</code></pre>\n</blockquote>\n"); 1648 } 1649 1650 @safe unittest { // issue #1845 - malicious URI targets 1651 assert (filterMarkdown("[foo](javascript:foo) ![bar](javascript:bar) <javascript:baz>", MarkdownFlags.forumDefault) == 1652 "<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"bar\"> <a href=\"#\">javascript:baz</a>\n</p>\n"); 1653 assert (filterMarkdown("[foo][foo] ![foo][foo]\n[foo]: javascript:foo", MarkdownFlags.forumDefault) == 1654 "<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"foo\">\n</p>\n"); 1655 assert (filterMarkdown("[foo](javascript%3Abar)", MarkdownFlags.forumDefault) == 1656 "<p><a href=\"javascript%3Abar\">foo</a>\n</p>\n"); 1657 1658 // extra XSS regression tests 1659 assert (filterMarkdown("[<script></script>](bar)", MarkdownFlags.forumDefault) == 1660 "<p><a href=\"bar\"><script></script></a>\n</p>\n"); 1661 assert (filterMarkdown("[foo](\"><script></script><span foo=\")", MarkdownFlags.forumDefault) == 1662 "<p><a href=\""><script></script><span foo="\">foo</a>\n</p>\n"); 1663 assert (filterMarkdown("[foo](javascript:bar)", MarkdownFlags.forumDefault) == 1664 "<p><a href=\"javascript&#58;bar\">foo</a>\n</p>\n"); 1665 } 1666 1667 @safe unittest { // issue #2132 - table with more columns in body goes out of array bounds 1668 assert (filterMarkdown("| a | b |\n|--------|--------|\n| c | d | e |", MarkdownFlags.tables) == 1669 "<table>\n<tr><th>a</th><th>b</th></tr>\n<tr><td>c</td><td>d</td><td>e</td></tr>\n</table>\n"); 1670 } 1671 1672 @safe unittest { // lists 1673 assert (filterMarkdown("- foo\n- bar") == 1674 "<ul>\n<li>foo\n</li>\n<li>bar\n</li>\n</ul>\n"); 1675 assert (filterMarkdown("- foo\n\n- bar") == 1676 "<ul>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ul>\n"); 1677 assert (filterMarkdown("1. foo\n2. bar") == 1678 "<ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n"); 1679 assert (filterMarkdown("1. foo\n\n2. bar") == 1680 "<ol>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ol>\n"); 1681 } 1682 1683 @safe unittest { // figures 1684 assert (filterMarkdown("- %%%") == "<ul>\n<li>%%%\n</li>\n</ul>\n"); 1685 assert (filterMarkdown("- ###") == "<ul>\n<li>###\n</li>\n</ul>\n"); 1686 assert (filterMarkdown("- %%%", MarkdownFlags.figures) == "<figure></figure>\n"); 1687 assert (filterMarkdown("- ###", MarkdownFlags.figures) == "<figcaption></figcaption>\n"); 1688 assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar", MarkdownFlags.figures) == 1689 "<figure>foo\n<figcaption>bar\n</figcaption>\n</figure>\n"); 1690 assert (filterMarkdown("- %%%\n\tfoo\n\n\tbar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == 1691 "<figure><p>foo\n</p>\n<p>bar\n</p>\n<figcaption>baz\n</figcaption>\n</figure>\n"); 1692 assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar\n\n\t\tbaz", MarkdownFlags.figures) == 1693 "<figure>foo\n<figcaption><p>bar\n</p>\n<p>baz\n</p>\n</figcaption>\n</figure>\n"); 1694 assert (filterMarkdown("- %%%\n\t1. foo\n\t2. bar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) == 1695 "<figure><ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n<figcaption>baz\n</figcaption>\n</figure>\n"); 1696 assert (filterMarkdown("- foo\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n"); 1697 assert (filterMarkdown("- foo\n\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n"); 1698 } 1699 1700 @safe unittest { // HTML entities 1701 assert(filterMarkdown(" ") == "<p> \n</p>\n"); 1702 assert(filterMarkdown("* *") == "<p><em> </em>\n</p>\n"); 1703 assert(filterMarkdown("` `") == "<p><code class=\"prettyprint\">&nbsp;</code>\n</p>\n"); 1704 }