vibe.textfilter.markdown source code

1 /**
2 	Markdown parser implementation
3 
4 	Copyright: © 2012-2019 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.textfilter.markdown;
9 
10 import vibe.core.log;
11 import vibe.textfilter.html;
12 import vibe.utils.string;
13 
14 import std.algorithm : canFind, countUntil, min;
15 import std.array;
16 import std.format;
17 import std.range;
18 import std.string;
19 
20 /*
21 	TODO:
22 		detect inline HTML tags
23 */
24 
25 version(MarkdownTest)
26 {
27 	int main()
28 	{
29 		import std.file;
30 		setLogLevel(LogLevel.Trace);
31 		auto text = readText("test.txt");
32 		auto result = appender!string();
33 		filterMarkdown(result, text);
34 		foreach( ln; splitLines(result.data) )
35 			logInfo(ln);
36 		return 0;
37 	}
38 }
39 
40 /** Returns a Markdown filtered HTML string.
41 */
42 string filterMarkdown()(string str, MarkdownFlags flags)
43 @trusted { // scope class is not @safe for DMD 2.072
44 	scope settings = new MarkdownSettings;
45 	settings.flags = flags;
46 	return filterMarkdown(str, settings);
47 }
48 /// ditto
49 string filterMarkdown()(string str, scope MarkdownSettings settings = null)
50 @trusted { // Appender not @safe as of 2.065
51 	auto dst = appender!string();
52 	filterMarkdown(dst, str, settings);
53 	return dst.data;
54 }
55 
56 
57 /** Markdown filters the given string and writes the corresponding HTML to an output range.
58 */
59 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags)
60 {
61 	scope settings = new MarkdownSettings;
62 	settings.flags = flags;
63 	filterMarkdown(dst, src, settings);
64 }
65 /// ditto
66 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null)
67 {
68 	if (!settings) settings = new MarkdownSettings;
69 
70 	auto all_lines = splitLines(src);
71 	auto links = scanForReferences(all_lines);
72 	auto lines = parseLines(all_lines, settings);
73 	Block root_block;
74 	parseBlocks(root_block, lines, null, settings);
75 	writeBlock(dst, root_block, links, settings);
76 }
77 
78 /**
79 	Returns the hierarchy of sections
80 */
81 Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null)
82 {
83 	import std.conv : to;
84 
85 	if (!settings) settings = new MarkdownSettings;
86 	auto all_lines = splitLines(markdown_source);
87 	auto lines = parseLines(all_lines, settings);
88 	Block root_block;
89 	parseBlocks(root_block, lines, null, settings);
90 	Section root;
91 
92 	foreach (ref sb; root_block.blocks) {
93 		if (sb.type == BlockType.header) {
94 			auto s = &root;
95 			while (true) {
96 				if (s.subSections.length == 0) break;
97 				if (s.subSections[$-1].headingLevel >= sb.headerLevel) break;
98 				s = &s.subSections[$-1];
99 			}
100 			s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string);
101 		}
102 	}
103 
104 	return root.subSections;
105 }
106 
107 ///
108 unittest {
109 	import std.conv : to;
110 	assert (getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") ==
111 		[
112 			Section(2, " first", "first"),
113 			Section(2, " second", "second", [
114 				Section(3, " third", "third")
115 			]),
116 			Section(1, " fourth", "fourth", [
117 				Section(3, " fifth", "fifth")
118 			])
119 		]
120 	);
121 }
122 
123 final class MarkdownSettings {
124 	/// Controls the capabilities of the parser.
125 	MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
126 
127 	/// Heading tags will start at this level.
128 	size_t headingBaseLevel = 1;
129 
130 	/// Called for every link/image URL to perform arbitrary transformations.
131 	string delegate(string url_or_path, bool is_image) urlFilter;
132 
133 	/// White list of URI schemas that can occur in link/image targets
134 	string[] allowedURISchemas = ["http", "https", "ftp", "mailto"];
135 }
136 
137 enum MarkdownFlags {
138 	/** Same as `vanillaMarkdown`
139 	*/
140 	none = 0,
141 
142 	/** Convert line breaks into hard line breaks in the output
143 
144 		This option is useful when operating on text that may be formatted as
145 		plain text, without having Markdown in mind, while still improving
146 		the appearance of the text in many cases. A common example would be
147 		to format e-mails or newsgroup posts.
148 	*/
149 	keepLineBreaks = 1<<0,
150 
151 	/** Support fenced code blocks.
152 	*/
153 	backtickCodeBlocks = 1<<1,
154 
155 	/** Disable support for embedded HTML
156 	*/
157 	noInlineHtml = 1<<2,
158 	//noLinks = 1<<3,
159 	//allowUnsafeHtml = 1<<4,
160 
161 	/** Support table definitions
162 
163 		The syntax is based on Markdown Extra and GitHub flavored Markdown.
164 	*/
165 	tables = 1<<5,
166 
167 	/** Support HTML attributes after links
168 
169 		Links or images directly followed by `{ … }` allow regular HTML
170 		attributes to added to the generated HTML element.
171 	*/
172 	attributes = 1<<6,
173 
174 	/** Recognize figure definitions
175 
176 		Figures can be defined using a modified list syntax:
177 
178 		```
179 		- %%%
180 			This is the figure content
181 
182 			- ###
183 				This is optional caption content
184 		```
185 
186 		Just like for lists, arbitrary blocks can be nested within figure and
187 		figure caption blocks. If only a single paragraph is present within a
188 		figure caption block, the paragraph text will be emitted without the
189 		surrounding `<p>` tags. The same is true for figure blocks that contain
190 		only a single paragraph and any number of additional figure caption
191 		blocks.
192 	*/
193 	figures = 1<<7,
194 
195 	/** Support only standard Markdown features
196 
197 		Note that the parser is not fully CommonMark compliant at the moment,
198 		but this is the general idea behind this option.
199 	*/
200 	vanillaMarkdown = none,
201 
202 	/** Default set of flags suitable for use within an online forum
203 	*/
204 	forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml|tables
205 }
206 
207 struct Section {
208 	size_t headingLevel;
209 	string caption;
210 	string anchor;
211 	Section[] subSections;
212 }
213 
214 private {
215 	immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"];
216 }
217 
218 private enum IndentType {
219 	white,
220 	quote
221 }
222 
223 private enum LineType {
224 	undefined,
225 	blank,
226 	plain,
227 	hline,
228 	atxHeader,
229 	setextHeader,
230 	tableSeparator,
231 	uList,
232 	oList,
233 	figure,
234 	figureCaption,
235 	htmlBlock,
236 	codeBlockDelimiter
237 }
238 
239 private struct Line {
240 	LineType type;
241 	IndentType[] indent;
242 	string text;
243 	string unindented;
244 
245 	string unindent(size_t n)
246 	pure @safe {
247 		assert (n <= indent.length);
248 		string ln = text;
249 		foreach (i; 0 .. n) {
250 			final switch(indent[i]){
251 				case IndentType.white:
252 					if (ln[0] == ' ') ln = ln[4 .. $];
253 					else ln = ln[1 .. $];
254 					break;
255 				case IndentType.quote:
256 					ln = ln.stripLeft()[1 .. $];
257 					if (ln.startsWith(' '))
258 						ln.popFront();
259 					break;
260 			}
261 		}
262 		return ln;
263 	}
264 }
265 
266 private Line[] parseLines(string[] lines, scope MarkdownSettings settings)
267 pure @safe {
268 	Line[] ret;
269 	while( !lines.empty ){
270 		auto ln = lines.front;
271 		lines.popFront();
272 
273 		Line lninfo;
274 		lninfo.text = ln;
275 
276 		while (ln.length > 0) {
277 			if (ln[0] == '\t') {
278 				lninfo.indent ~= IndentType.white;
279 				ln.popFront();
280 			} else if (ln.startsWith("    ")) {
281 				lninfo.indent ~= IndentType.white;
282 				ln.popFrontN(4);
283 			} else {
284 				if (ln.stripLeft().startsWith(">")) {
285 					lninfo.indent ~= IndentType.quote;
286 					ln = ln.stripLeft();
287 					ln.popFront();
288 					if (ln.startsWith(' '))
289 						ln.popFront();
290 				} else break;
291 			}
292 		}
293 		lninfo.unindented = ln;
294 
295 		if ((settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln))
296 			lninfo.type = LineType.codeBlockDelimiter;
297 		else if(isAtxHeaderLine(ln)) lninfo.type = LineType.atxHeader;
298 		else if(isSetextHeaderLine(ln)) lninfo.type = LineType.setextHeader;
299 		else if((settings.flags & MarkdownFlags.tables) && isTableSeparatorLine(ln))
300 			lninfo.type = LineType.tableSeparator;
301 		else if(isHlineLine(ln)) lninfo.type = LineType.hline;
302 		else if(isOListLine(ln)) lninfo.type = LineType.oList;
303 		else if(isUListLine(ln)) {
304 			if (settings.flags & MarkdownFlags.figures) {
305 				auto suff = removeListPrefix(ln, LineType.uList);
306 				if (suff == "%%%") lninfo.type = LineType.figure;
307 				else if (suff == "###") lninfo.type = LineType.figureCaption;
308 				else lninfo.type = LineType.uList;
309 			} else lninfo.type = LineType.uList;
310 		} else if(isLineBlank(ln)) lninfo.type = LineType.blank;
311 		else if(!(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln))
312 			lninfo.type = LineType.htmlBlock;
313 		else lninfo.type = LineType.plain;
314 
315 		ret ~= lninfo;
316 	}
317 	return ret;
318 }
319 
320 unittest {
321 	import std.conv : to;
322 	auto s = new MarkdownSettings;
323 	s.flags = MarkdownFlags.forumDefault;
324 	auto lns = [">```D"];
325 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]);
326 	lns = ["> ```D"];
327 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]);
328 	lns = [">    ```D"];
329 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "   ```D")]);
330 	lns = [">     ```D"];
331 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote, IndentType.white], lns[0], "```D")]);
332 	lns = [">test"];
333 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]);
334 	lns = ["> test"];
335 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]);
336 	lns = [">    test"];
337 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "   test")]);
338 	lns = [">     test"];
339 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote, IndentType.white], lns[0], "test")]);
340 }
341 
342 private enum BlockType {
343 	plain,
344 	text,
345 	paragraph,
346 	header,
347 	table,
348 	oList,
349 	uList,
350 	listItem,
351 	code,
352 	quote,
353 	figure,
354 	figureCaption
355 }
356 
357 private struct Block {
358 	BlockType type;
359 	Attribute[] attributes;
360 	string[] text;
361 	Block[] blocks;
362 	size_t headerLevel;
363 	Alignment[] columns;
364 }
365 
366 private struct Attribute {
367 	string attribute;
368 	string value;
369 }
370 
371 private enum Alignment {
372 	none = 0,
373 	left = 1<<0,
374 	right = 1<<1,
375 	center = left | right
376 }
377 
378 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings)
379 pure @safe {
380 	import std.conv : to;
381 	import std.algorithm.comparison : among;
382 
383 	if (base_indent.length == 0) root.type = BlockType.text;
384 	else if (base_indent[$-1] == IndentType.quote) root.type = BlockType.quote;
385 
386 	while (!lines.empty) {
387 		auto ln = lines.front;
388 
389 		if (ln.type == LineType.blank) {
390 			lines.popFront();
391 			continue;
392 		}
393 
394 		if (ln.indent != base_indent) {
395 			if (ln.indent.length < base_indent.length
396 				|| ln.indent[0 .. base_indent.length] != base_indent)
397 			{
398 				return;
399 			}
400 
401 			auto cindent = base_indent ~ IndentType.white;
402 			if (ln.indent == cindent) {
403 				Block cblock;
404 				cblock.type = BlockType.code;
405 				while (!lines.empty && (lines.front.unindented.strip.empty
406 					|| lines.front.indent.length >= cindent.length
407 					&& lines.front.indent[0 .. cindent.length] == cindent))
408 				{
409 					cblock.text ~= lines.front.indent.length >= cindent.length
410 						? lines.front.unindent(cindent.length) : "";
411 					lines.popFront();
412 				}
413 				root.blocks ~= cblock;
414 			} else {
415 				Block subblock;
416 				parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings);
417 				root.blocks ~= subblock;
418 			}
419 		} else {
420 			Block b;
421 			final switch (ln.type) {
422 				case LineType.undefined: assert (false);
423 				case LineType.blank: assert (false);
424 				case LineType.plain:
425 					if (lines.length >= 2 && lines[1].type == LineType.setextHeader) {
426 						auto setln = lines[1].unindented;
427 						b.type = BlockType.header;
428 						b.text = [ln.unindented];
429 						if (settings.flags & MarkdownFlags.attributes)
430 							parseAttributeString(skipAttributes(b.text[0]), b.attributes);
431 						if (!b.attributes.canFind!(a => a.attribute == "id"))
432 							b.attributes ~= Attribute("id", asSlug(b.text[0]).to!string);
433 						b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
434 						lines.popFrontN(2);
435 					} else if (lines.length >= 2 && lines[1].type == LineType.tableSeparator
436 						&& ln.unindented.indexOf('|') >= 0)
437 					{
438 						auto setln = lines[1].unindented;
439 						b.type = BlockType.table;
440 						b.text = [ln.unindented];
441 						foreach (c; getTableColumns(setln)) {
442 							Alignment a = Alignment.none;
443 							if (c.startsWith(':')) a |= Alignment.left;
444 							if (c.endsWith(':')) a |= Alignment.right;
445 							b.columns ~= a;
446 						}
447 
448 						lines.popFrontN(2);
449 						while (!lines.empty && lines[0].unindented.indexOf('|') >= 0) {
450 							b.text ~= lines.front.unindented;
451 							lines.popFront();
452 						}
453 					} else {
454 						b.type = BlockType.paragraph;
455 						b.text = skipText(lines, base_indent);
456 					}
457 					break;
458 				case LineType.hline:
459 					b.type = BlockType.plain;
460 					b.text = ["<hr>"];
461 					lines.popFront();
462 					break;
463 				case LineType.atxHeader:
464 					b.type = BlockType.header;
465 					string hl = ln.unindented;
466 					b.headerLevel = 0;
467 					while (hl.length > 0 && hl[0] == '#') {
468 						b.headerLevel++;
469 						hl = hl[1 .. $];
470 					}
471 
472 					if (settings.flags & MarkdownFlags.attributes)
473 						parseAttributeString(skipAttributes(hl), b.attributes);
474 					if (!b.attributes.canFind!(a => a.attribute == "id"))
475 						b.attributes ~= Attribute("id", asSlug(hl).to!string);
476 
477 					while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' '))
478 						hl = hl[0 .. $-1];
479 					b.text = [hl];
480 					lines.popFront();
481 					break;
482 				case LineType.setextHeader:
483 					lines.popFront();
484 					break;
485 				case LineType.tableSeparator:
486 					lines.popFront();
487 					break;
488 				case LineType.figure:
489 				case LineType.figureCaption:
490 					b.type = ln.type == LineType.figure
491 						? BlockType.figure : BlockType.figureCaption;
492 
493 					auto itemindent = base_indent ~ IndentType.white;
494 					lines.popFront();
495 					parseBlocks(b, lines, itemindent, settings);
496 					break;
497 				case LineType.uList:
498 				case LineType.oList:
499 					b.type = ln.type == LineType.uList ? BlockType.uList : BlockType.oList;
500 
501 					auto itemindent = base_indent ~ IndentType.white;
502 					bool firstItem = true, paraMode = false;
503 					while (!lines.empty && lines.front.type == ln.type
504 						&& lines.front.indent == base_indent)
505 					{
506 						Block itm;
507 						itm.text = skipText(lines, itemindent);
508 						itm.text[0] = removeListPrefix(itm.text[0], ln.type);
509 
510 						// emit <p>...</p> if there are blank lines between the items
511 						if (firstItem && !lines.empty && lines.front.type == LineType.blank) {
512 							lines.popFront();
513 							if (!lines.empty && lines.front.type == ln.type)
514 								paraMode = true;
515 						}
516 						firstItem = false;
517 						if (paraMode) {
518 							Block para;
519 							para.type = BlockType.paragraph;
520 							para.text = itm.text;
521 							itm.blocks ~= para;
522 							itm.text = null;
523 						}
524 
525 						parseBlocks(itm, lines, itemindent, settings);
526 						itm.type = BlockType.listItem;
527 						b.blocks ~= itm;
528 					}
529 					break;
530 				case LineType.htmlBlock:
531 					int nestlevel = 0;
532 					auto starttag = parseHtmlBlockLine(ln.unindented);
533 					if (!starttag.isHtmlBlock || !starttag.open)
534 						break;
535 
536 					b.type = BlockType.plain;
537 					while (!lines.empty) {
538 						if (lines.front.indent.length < base_indent.length)
539 							break;
540 						if (lines.front.indent[0 .. base_indent.length] != base_indent)
541 							break;
542 
543 						auto str = lines.front.unindent(base_indent.length);
544 						auto taginfo = parseHtmlBlockLine(str);
545 						b.text ~= lines.front.unindent(base_indent.length);
546 						lines.popFront();
547 						if (taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName)
548 							nestlevel += taginfo.open ? 1 : -1;
549 						if (nestlevel <= 0) break;
550 					}
551 					break;
552 				case LineType.codeBlockDelimiter:
553 					lines.popFront(); // TODO: get language from line
554 					b.type = BlockType.code;
555 					while (!lines.empty) {
556 						if (lines.front.indent.length < base_indent.length)
557 							break;
558 						if (lines.front.indent[0 .. base_indent.length] != base_indent)
559 							break;
560 						if (lines.front.type == LineType.codeBlockDelimiter) {
561 							lines.popFront();
562 							break;
563 						}
564 						b.text ~= lines.front.unindent(base_indent.length);
565 						lines.popFront();
566 					}
567 					break;
568 			}
569 			root.blocks ~= b;
570 		}
571 	}
572 }
573 
574 
575 private string[] skipText(ref Line[] lines, IndentType[] indent)
576 pure @safe {
577 	static bool matchesIndent(IndentType[] indent, IndentType[] base_indent)
578 	{
579 		if (indent.length > base_indent.length) return false;
580 		if (indent != base_indent[0 .. indent.length]) return false;
581 		sizediff_t qidx = -1;
582 		foreach_reverse (i, tp; base_indent)
583 			if (tp == IndentType.quote) {
584 				qidx = i;
585 				break;
586 			}
587 		if (qidx >= 0) {
588 			qidx = base_indent.length-1 - qidx;
589 			if( indent.length <= qidx ) return false;
590 		}
591 		return true;
592 	}
593 
594 	// return value is used in variables that don't get bounds checks on the
595 	// first element, so we should return at least one
596 	if (lines.empty)
597 		return [""];
598 
599 	string[] ret;
600 
601 	while (true) {
602 		ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length));
603 		lines.popFront();
604 
605 		if (lines.empty || !matchesIndent(lines.front.indent, indent)
606 			|| lines.front.type != LineType.plain)
607 		{
608 			return ret;
609 		}
610 	}
611 }
612 
613 /// private
614 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings)
615 {
616 	final switch (block.type) {
617 		case BlockType.plain:
618 			foreach (ln; block.text) {
619 				put(dst, ln);
620 				put(dst, "\n");
621 			}
622 			foreach (b; block.blocks)
623 				writeBlock(dst, b, links, settings);
624 			break;
625 		case BlockType.text:
626 			writeMarkdownEscaped(dst, block, links, settings);
627 			foreach (b; block.blocks)
628 				writeBlock(dst, b, links, settings);
629 			break;
630 		case BlockType.paragraph:
631 			assert (block.blocks.length == 0);
632 			put(dst, "<p>");
633 			writeMarkdownEscaped(dst, block, links, settings);
634 			put(dst, "</p>\n");
635 			break;
636 		case BlockType.header:
637 			assert (block.blocks.length == 0);
638 			assert (block.text.length == 1);
639 			auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0);
640 			dst.writeTag(block.attributes, "h", hlvl);
641 			writeMarkdownEscaped(dst, block.text[0], links, settings);
642 			dst.formattedWrite("</h%s>\n", hlvl);
643 			break;
644 		case BlockType.table:
645 			import std.algorithm.iteration : splitter;
646 
647 			static string[Alignment.max+1] alstr = ["", " align=\"left\"", " align=\"right\"", " align=\"center\""];
648 
649 			put(dst, "<table>\n");
650 			put(dst, "<tr>");
651 			size_t i = 0;
652 			foreach (col; block.text[0].getTableColumns()) {
653 				put(dst, "<th");
654 				put(dst, alstr[block.columns[i]]);
655 				put(dst, '>');
656 				dst.writeMarkdownEscaped(col, links, settings);
657 				put(dst, "</th>");
658 				if (i + 1 < block.columns.length)
659 					i++;
660 			}
661 			put(dst, "</tr>\n");
662 			foreach (ln; block.text[1 .. $]) {
663 				put(dst, "<tr>");
664 				i = 0;
665 				foreach (col; ln.getTableColumns()) {
666 					put(dst, "<td");
667 					put(dst, alstr[block.columns[i]]);
668 					put(dst, '>');
669 					dst.writeMarkdownEscaped(col, links, settings);
670 					put(dst, "</td>");
671 					if (i + 1 < block.columns.length)
672 						i++;
673 				}
674 				put(dst, "</tr>\n");
675 			}
676 			put(dst, "</table>\n");
677 			break;
678 		case BlockType.oList:
679 			put(dst, "<ol>\n");
680 			foreach (b; block.blocks)
681 				writeBlock(dst, b, links, settings);
682 			put(dst, "</ol>\n");
683 			break;
684 		case BlockType.uList:
685 			put(dst, "<ul>\n");
686 			foreach (b; block.blocks)
687 				writeBlock(dst, b, links, settings);
688 			put(dst, "</ul>\n");
689 			break;
690 		case BlockType.listItem:
691 			put(dst, "<li>");
692 			writeMarkdownEscaped(dst, block, links, settings);
693 			foreach (b; block.blocks)
694 				writeBlock(dst, b, links, settings);
695 			put(dst, "</li>\n");
696 			break;
697 		case BlockType.code:
698 			assert (block.blocks.length == 0);
699 			put(dst, "<pre class=\"prettyprint\"><code>");
700 			foreach (ln; block.text) {
701 				filterHTMLEscape(dst, ln);
702 				put(dst, "\n");
703 			}
704 			put(dst, "</code></pre>\n");
705 			break;
706 		case BlockType.quote:
707 			put(dst, "<blockquote>");
708 			writeMarkdownEscaped(dst, block, links, settings);
709 			foreach (b; block.blocks)
710 				writeBlock(dst, b, links, settings);
711 			put(dst, "</blockquote>\n");
712 			break;
713 		case BlockType.figure:
714 			put(dst, "<figure>");
715 			bool omit_para = block.blocks.count!(b => b.type != BlockType.figureCaption) == 1;
716 			foreach (b; block.blocks) {
717 				if (b.type == BlockType.paragraph && omit_para) {
718 					writeMarkdownEscaped(dst, b, links, settings);
719 				} else writeBlock(dst, b, links, settings);
720 			}
721 			put(dst, "</figure>\n");
722 			break;
723 		case BlockType.figureCaption:
724 			put(dst, "<figcaption>");
725 			if (block.blocks.length == 1 && block.blocks[0].type == BlockType.paragraph) {
726 				writeMarkdownEscaped(dst, block.blocks[0], links, settings);
727 			} else {
728 				foreach (b; block.blocks)
729 					writeBlock(dst, b, links, settings);
730 			}
731 			put(dst, "</figcaption>\n");
732 			break;
733 	}
734 }
735 
736 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings)
737 {
738 	auto lines = () @trusted { return cast(string[])block.text; } ();
739 	auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n");
740 	writeMarkdownEscaped(dst, text, links, settings);
741 	if (lines.length) put(dst, "\n");
742 }
743 
744 /// private
745 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings)
746 {
747 	bool isAllowedURI(string lnk) {
748 		auto idx = lnk.indexOf('/');
749 		auto cidx = lnk.indexOf(':');
750 		// always allow local URIs
751 		if (cidx < 0 || idx >= 0 && cidx > idx) return true;
752 		return settings.allowedURISchemas.canFind(lnk[0 .. cidx]);
753 	}
754 
755 	string filterLink(string lnk, bool is_image) {
756 		if (isAllowedURI(lnk))
757 			return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk;
758 		return "#"; // replace link with unknown schema with dummy URI
759 	}
760 
761 	bool br = ln.endsWith("  ");
762 	while (ln.length > 0) {
763 		switch (ln[0]) {
764 			default:
765 				put(dst, ln[0]);
766 				ln = ln[1 .. $];
767 				break;
768 			case '\\':
769 				if (ln.length >= 2) {
770 					switch (ln[1]) {
771 						default:
772 							put(dst, ln[0 .. 2]);
773 							ln = ln[2 .. $];
774 							break;
775 						case '\'', '`', '*', '_', '{', '}', '[', ']',
776 							'(', ')', '#', '+', '-', '.', '!':
777 							put(dst, ln[1]);
778 							ln = ln[2 .. $];
779 							break;
780 					}
781 				} else {
782 					put(dst, ln[0]);
783 					ln = ln[1 .. $];
784 				}
785 				break;
786 			case '_':
787 			case '*':
788 				string text;
789 				if (auto em = parseEmphasis(ln, text)) {
790 					put(dst, em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>");
791 					put(dst, text);
792 					put(dst, em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>");
793 				} else {
794 					put(dst, ln[0]);
795 					ln = ln[1 .. $];
796 				}
797 				break;
798 			case '`':
799 				string code;
800 				if (parseInlineCode(ln, code)) {
801 					put(dst, "<code class=\"prettyprint\">");
802 					filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal);
803 					put(dst, "</code>");
804 				} else {
805 					put(dst, ln[0]);
806 					ln = ln[1 .. $];
807 				}
808 				break;
809 			case '[':
810 				Link link;
811 				Attribute[] attributes;
812 				if (parseLink(ln, link, linkrefs,
813 					settings.flags & MarkdownFlags.attributes ? &attributes : null))
814 				{
815 					attributes ~= Attribute("href", filterLink(link.url, false));
816 					if (link.title.length)
817 						attributes ~= Attribute("title", link.title);
818 					dst.writeTag(attributes, "a");
819 					writeMarkdownEscaped(dst, link.text, linkrefs, settings);
820 					put(dst, "</a>");
821 				} else {
822 					put(dst, ln[0]);
823 					ln = ln[1 .. $];
824 				}
825 				break;
826 			case '!':
827 				Link link;
828 				Attribute[] attributes;
829 				if (parseLink(ln, link, linkrefs,
830 					settings.flags & MarkdownFlags.attributes ? &attributes : null))
831 				{
832 					attributes ~= Attribute("src", filterLink(link.url, true));
833 					attributes ~= Attribute("alt", link.text);
834 					if (link.title.length)
835 						attributes ~= Attribute("title", link.title);
836 					dst.writeTag(attributes, "img");
837 				} else if( ln.length >= 2 ){
838 					put(dst, ln[0 .. 2]);
839 					ln = ln[2 .. $];
840 				} else {
841 					put(dst, ln[0]);
842 					ln = ln[1 .. $];
843 				}
844 				break;
845 			case '>':
846 				if (settings.flags & MarkdownFlags.noInlineHtml) put(dst, "&gt;");
847 				else put(dst, ln[0]);
848 				ln = ln[1 .. $];
849 				break;
850 			case '<':
851 				string url;
852 				if (parseAutoLink(ln, url)) {
853 					bool is_email = url.startsWith("mailto:");
854 					put(dst, "<a href=\"");
855 					if (is_email) filterHTMLAllEscape(dst, url);
856 					else filterHTMLAttribEscape(dst, filterLink(url, false));
857 					put(dst, "\">");
858 					if (is_email) filterHTMLAllEscape(dst, url[7 .. $]);
859 					else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal);
860 					put(dst, "</a>");
861 				} else {
862 					if (ln.startsWith("<br>")) {
863 						// always support line breaks, since we embed them here ourselves!
864 						put(dst, "<br/>");
865 						ln = ln[4 .. $];
866 					} else if(ln.startsWith("<br/>")) {
867 						put(dst, "<br/>");
868 						ln = ln[5 .. $];
869 					} else {
870 						if (settings.flags & MarkdownFlags.noInlineHtml)
871 							put(dst, "&lt;");
872 						else put(dst, ln[0]);
873 						ln = ln[1 .. $];
874 					}
875 				}
876 				break;
877 		}
878 	}
879 	if (br) put(dst, "<br/>");
880 }
881 
882 private void writeTag(R, ARGS...)(ref R dst, string name, ARGS name_additions)
883 {
884 	writeTag(dst, cast(Attribute[])null, name, name_additions);
885 }
886 
887 private void writeTag(R, ARGS...)(ref R dst, scope const(Attribute)[] attributes, string name, ARGS name_additions)
888 {
889 	dst.formattedWrite("<%s", name);
890 	foreach (add; name_additions)
891 		dst.formattedWrite("%s", add);
892 	foreach (a; attributes) {
893 		dst.formattedWrite(" %s=\"", a.attribute);
894 		dst.filterHTMLAttribEscape(a.value);
895 		put(dst, '\"');
896 	}
897 	put(dst, '>');
898 }
899 
900 private bool isLineBlank(string ln)
901 pure @safe {
902 	return allOf(ln, " \t");
903 }
904 
905 private bool isSetextHeaderLine(string ln)
906 pure @safe {
907 	ln = stripLeft(ln);
908 	if (ln.length < 1) return false;
909 	if (ln[0] == '=') {
910 		while (!ln.empty && ln.front == '=') ln.popFront();
911 		return allOf(ln, " \t");
912 	}
913 	if (ln[0] == '-') {
914 		while (!ln.empty && ln.front == '-') ln.popFront();
915 		return allOf(ln, " \t");
916 	}
917 	return false;
918 }
919 
920 private bool isAtxHeaderLine(string ln)
921 pure @safe {
922 	ln = stripLeft(ln);
923 	size_t i = 0;
924 	while (i < ln.length && ln[i] == '#') i++;
925 	if (i < 1 || i > 6 || i >= ln.length) return false;
926 	return ln[i] == ' ';
927 }
928 
929 private bool isTableSeparatorLine(string ln)
930 pure @safe {
931 	import std.algorithm.iteration : splitter;
932 
933 	ln = strip(ln);
934 	if (ln.startsWith("|")) ln = ln[1 .. $];
935 	if (ln.endsWith("|")) ln = ln[0 .. $-1];
936 
937 	auto cols = ln.splitter('|');
938 	size_t cnt = 0;
939 	foreach (c; cols) {
940 		c = c.strip();
941 		if (c.startsWith(':')) c = c[1 .. $];
942 		if (c.endsWith(':')) c = c[0 .. $-1];
943 		if (c.length < 3 || !c.allOf("-"))
944 			return false;
945 		cnt++;
946 	}
947 	return cnt >= 2;
948 }
949 
950 unittest {
951 	assert(isTableSeparatorLine("|----|---|"));
952 	assert(isTableSeparatorLine("|:----:|---|"));
953 	assert(isTableSeparatorLine("---|----"));
954 	assert(isTableSeparatorLine("| --- | :---- |"));
955 	assert(!isTableSeparatorLine("| ---- |"));
956 	assert(!isTableSeparatorLine("| -- | -- |"));
957 	assert(!isTableSeparatorLine("| --- - | ---- |"));
958 }
959 
960 private auto getTableColumns(string line)
961 pure @safe nothrow {
962 	import std.algorithm.iteration : map, splitter;
963 
964 	if (line.startsWith("|")) line = line[1 .. $];
965 	if (line.endsWith("|")) line = line[0 .. $-1];
966 	return line.splitter('|').map!(s => s.strip());
967 }
968 
969 private size_t countTableColumns(string line)
970 pure @safe {
971 	return getTableColumns(line).count();
972 }
973 
974 private bool isHlineLine(string ln)
975 pure @safe {
976 	if (allOf(ln, " -") && count(ln, '-') >= 3) return true;
977 	if (allOf(ln, " *") && count(ln, '*') >= 3) return true;
978 	if (allOf(ln, " _") && count(ln, '_') >= 3) return true;
979 	return false;
980 }
981 
982 private bool isQuoteLine(string ln)
983 pure @safe {
984 	return ln.stripLeft().startsWith(">");
985 }
986 
987 private size_t getQuoteLevel(string ln)
988 pure @safe {
989 	size_t level = 0;
990 	ln = stripLeft(ln);
991 	while (ln.length > 0 && ln[0] == '>') {
992 		level++;
993 		ln = stripLeft(ln[1 .. $]);
994 	}
995 	return level;
996 }
997 
998 private bool isUListLine(string ln)
999 pure @safe {
1000 	ln = stripLeft(ln);
1001 	if (ln.length < 2) return false;
1002 	if (!canFind("*+-", ln[0])) return false;
1003 	if (ln[1] != ' ' && ln[1] != '\t') return false;
1004 	return true;
1005 }
1006 
1007 private bool isOListLine(string ln)
1008 pure @safe {
1009 	ln = stripLeft(ln);
1010 	if (ln.length < 1) return false;
1011 	if (ln[0] < '0' || ln[0] > '9') return false;
1012 	ln = ln[1 .. $];
1013 	while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9')
1014 		ln = ln[1 .. $];
1015 	if (ln.length < 2) return false;
1016 	if (ln[0] != '.') return false;
1017 	if (ln[1] != ' ' && ln[1] != '\t')
1018 		return false;
1019 	return true;
1020 }
1021 
1022 private string removeListPrefix(string str, LineType tp)
1023 pure @safe {
1024 	switch (tp) {
1025 		default: assert (false);
1026 		case LineType.oList: // skip bullets and output using normal escaping
1027 			auto idx = str.indexOf('.');
1028 			assert (idx > 0);
1029 			return str[idx+1 .. $].stripLeft();
1030 		case LineType.uList:
1031 			return stripLeft(str.stripLeft()[1 .. $]);
1032 	}
1033 }
1034 
1035 
1036 private auto parseHtmlBlockLine(string ln)
1037 pure @safe {
1038 	struct HtmlBlockInfo {
1039 		bool isHtmlBlock;
1040 		string tagName;
1041 		bool open;
1042 	}
1043 
1044 	HtmlBlockInfo ret;
1045 	ret.isHtmlBlock = false;
1046 	ret.open = true;
1047 
1048 	ln = strip(ln);
1049 	if (ln.length < 3) return ret;
1050 	if (ln[0] != '<') return ret;
1051 	if (ln[1] == '/') {
1052 		ret.open = false;
1053 		ln = ln[1 .. $];
1054 	}
1055 	import std.ascii : isAlpha;
1056 	if (!isAlpha(ln[1])) return ret;
1057 	ln = ln[1 .. $];
1058 	size_t idx = 0;
1059 	while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>')
1060 		idx++;
1061 	ret.tagName = ln[0 .. idx];
1062 	ln = ln[idx .. $];
1063 
1064 	auto eidx = ln.indexOf('>');
1065 	if (eidx < 0) return ret;
1066 	if (eidx != ln.length-1) return ret;
1067 
1068 	if (!s_blockTags.canFind(ret.tagName)) return ret;
1069 
1070 	ret.isHtmlBlock = true;
1071 	return ret;
1072 }
1073 
1074 private bool isHtmlBlockLine(string ln)
1075 pure @safe {
1076 	auto bi = parseHtmlBlockLine(ln);
1077 	return bi.isHtmlBlock && bi.open;
1078 }
1079 
1080 private bool isHtmlBlockCloseLine(string ln)
1081 pure @safe {
1082 	auto bi = parseHtmlBlockLine(ln);
1083 	return bi.isHtmlBlock && !bi.open;
1084 }
1085 
1086 private bool isCodeBlockDelimiter(string ln)
1087 pure @safe {
1088 	return ln.stripLeft.startsWith("```");
1089 }
1090 
1091 private string getHtmlTagName(string ln)
1092 pure @safe {
1093 	return parseHtmlBlockLine(ln).tagName;
1094 }
1095 
1096 private bool isLineIndented(string ln)
1097 pure @safe {
1098 	return ln.startsWith("\t") || ln.startsWith("    ");
1099 }
1100 
1101 private string unindentLine(string ln)
1102 pure @safe {
1103 	if (ln.startsWith("\t")) return ln[1 .. $];
1104 	if (ln.startsWith("    ")) return ln[4 .. $];
1105 	assert (false);
1106 }
1107 
1108 private int parseEmphasis(ref string str, ref string text)
1109 pure @safe {
1110 	string pstr = str;
1111 	if (pstr.length < 3) return false;
1112 
1113 	string ctag;
1114 	if (pstr.startsWith("***")) ctag = "***";
1115 	else if (pstr.startsWith("**")) ctag = "**";
1116 	else if (pstr.startsWith("*")) ctag = "*";
1117 	else if (pstr.startsWith("___")) ctag = "___";
1118 	else if (pstr.startsWith("__")) ctag = "__";
1119 	else if (pstr.startsWith("_")) ctag = "_";
1120 	else return false;
1121 
1122 	pstr = pstr[ctag.length .. $];
1123 
1124 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1125 	if (cidx < 1) return false;
1126 
1127 	text = pstr[0 .. cidx];
1128 
1129 	str = pstr[cidx+ctag.length .. $];
1130 	return cast(int)ctag.length;
1131 }
1132 
1133 private bool parseInlineCode(ref string str, ref string code)
1134 pure @safe {
1135 	string pstr = str;
1136 	if (pstr.length < 3) return false;
1137 	string ctag;
1138 	if (pstr.startsWith("``")) ctag = "``";
1139 	else if (pstr.startsWith("`")) ctag = "`";
1140 	else return false;
1141 	pstr = pstr[ctag.length .. $];
1142 
1143 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1144 	if (cidx < 1) return false;
1145 
1146 	code = pstr[0 .. cidx];
1147 	str = pstr[cidx+ctag.length .. $];
1148 	return true;
1149 }
1150 
1151 private bool parseLink(ref string str, ref Link dst, scope const(LinkRef[string]) linkrefs, scope Attribute[]* attributes)
1152 pure @safe {
1153 	string pstr = str;
1154 	if (pstr.length < 3) return false;
1155 	// ignore img-link prefix
1156 	if (pstr[0] == '!') pstr = pstr[1 .. $];
1157 
1158 	// parse the text part [text]
1159 	if (pstr[0] != '[') return false;
1160 	auto cidx = pstr.matchBracket();
1161 	if (cidx < 1) return false;
1162 	string refid;
1163 	dst.text = pstr[1 .. cidx];
1164 	pstr = pstr[cidx+1 .. $];
1165 
1166 	// parse either (link '['"title"']') or '[' ']'[refid]
1167 	if (pstr.length < 2) return false;
1168 	if (pstr[0] == '(') {
1169 		cidx = pstr.matchBracket();
1170 		if (cidx < 1) return false;
1171 		auto inner = pstr[1 .. cidx];
1172 		immutable qidx = inner.indexOf('"');
1173 		import std.ascii : isWhite;
1174 		if (qidx > 1 && inner[qidx - 1].isWhite()) {
1175 			dst.url = inner[0 .. qidx].stripRight();
1176 			immutable len = inner[qidx .. $].lastIndexOf('"');
1177 			if (len == 0) return false;
1178 			assert (len > 0);
1179 			dst.title = inner[qidx + 1 .. qidx + len];
1180 		} else {
1181 			dst.url = inner.stripRight();
1182 			dst.title = null;
1183 		}
1184 		if (dst.url.startsWith("<") && dst.url.endsWith(">"))
1185 			dst.url = dst.url[1 .. $-1];
1186 		pstr = pstr[cidx+1 .. $];
1187 
1188 		if (attributes) {
1189 			if (pstr.startsWith('{')) {
1190 				auto idx = pstr.indexOf('}');
1191 				if (idx > 0) {
1192 					parseAttributeString(pstr[1 .. idx], *attributes);
1193 					pstr = pstr[idx+1 .. $];
1194 				}
1195 			}
1196 		}
1197 	} else {
1198 		if (pstr[0] == ' ') pstr = pstr[1 .. $];
1199 		if (pstr[0] != '[') return false;
1200 		pstr = pstr[1 .. $];
1201 		cidx = pstr.indexOf(']');
1202 		if (cidx < 0) return false;
1203 		if (cidx == 0) refid = dst.text;
1204 		else refid = pstr[0 .. cidx];
1205 		pstr = pstr[cidx+1 .. $];
1206 	}
1207 
1208 	if (refid.length > 0) {
1209 		auto pr = toLower(refid) in linkrefs;
1210 		if (!pr) {
1211 			debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid);
1212 			return false;
1213 		}
1214 		dst.url = pr.url;
1215 		dst.title = pr.title;
1216 		if (attributes) *attributes ~= pr.attributes;
1217 	}
1218 
1219 	str = pstr;
1220 	return true;
1221 }
1222 
1223 @safe unittest
1224 {
1225 	static void testLink(string s, Link exp, in LinkRef[string] refs)
1226 	{
1227 		Link link;
1228 		assert (parseLink(s, link, refs, null), s);
1229 		assert (link == exp);
1230 	}
1231 	LinkRef[string] refs;
1232 	refs["ref"] = LinkRef("ref", "target", "title");
1233 
1234 	testLink(`[link](target)`, Link("link", "target"), null);
1235 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1236 	testLink(`[link](target  "title")`, Link("link", "target", "title"), null);
1237 	testLink(`[link](target "title"  )`, Link("link", "target", "title"), null);
1238 
1239 	testLink(`[link](target)`, Link("link", "target"), null);
1240 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1241 
1242 	testLink(`[link][ref]`, Link("link", "target", "title"), refs);
1243 	testLink(`[ref][]`, Link("ref", "target", "title"), refs);
1244 
1245 	testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null);
1246 	testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs);
1247 
1248 	testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null);
1249 	testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null);
1250 
1251 	testLink(`[link](white-space  "around title" )`, Link("link", "white-space", "around title"), null);
1252 	testLink(`[link](tabs	"around title"	)`, Link("link", "tabs", "around title"), null);
1253 
1254 	testLink(`[link](target "")`, Link("link", "target", ""), null);
1255 	testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null);
1256 
1257 	testLink(`[link](<target>)`, Link("link", "target"), null);
1258 
1259 	auto failing = [
1260 		`text`, `[link](target`, `[link]target)`, `[link]`,
1261 		`[link(target)`, `link](target)`, `[link] (target)`,
1262 		`[link][noref]`, `[noref][]`
1263 	];
1264 	Link link;
1265 	foreach (s; failing)
1266 		assert (!parseLink(s, link, refs, null), s);
1267 }
1268 
1269 @safe unittest { // attributes
1270 	void test(string s, LinkRef[string] refs, bool parse_atts, string exprem, Link explnk, Attribute[] expatts...)
1271 	@safe {
1272 		Link lnk;
1273 		Attribute[] atts;
1274 		parseLink(s, lnk, refs, parse_atts ? () @trusted { return &atts; } () : null);
1275 		assert (lnk == explnk);
1276 		assert (s == exprem);
1277 		assert (atts == expatts);
1278 	}
1279 
1280 	test("[foo](bar){.baz}", null, false, "{.baz}", Link("foo", "bar", ""));
1281 	test("[foo](bar){.baz}", null, true, "", Link("foo", "bar", ""), Attribute("class", "baz"));
1282 
1283 	auto refs = ["bar": LinkRef("bar", "url", "title", [Attribute("id", "hid")])];
1284 	test("[foo][bar]", refs, false, "", Link("foo", "url", "title"));
1285 	test("[foo][bar]", refs, true, "", Link("foo", "url", "title"), Attribute("id", "hid"));
1286 }
1287 
1288 private bool parseAutoLink(ref string str, ref string url)
1289 pure @safe {
1290 	import std.algorithm.searching : all;
1291 	import std.ascii : isAlphaNum;
1292 
1293 	string pstr = str;
1294 	if (pstr.length < 3) return false;
1295 	if (pstr[0] != '<') return false;
1296 	pstr = pstr[1 .. $];
1297 	auto cidx = pstr.indexOf('>');
1298 	if (cidx < 0) return false;
1299 
1300 	url = pstr[0 .. cidx];
1301 	if (url.anyOf(" \t")) return false;
1302 	auto atidx = url.indexOf('@');
1303 	auto colonidx = url.indexOf(':');
1304 	if (atidx < 0 && colonidx < 0) return false;
1305 
1306 	str = pstr[cidx+1 .. $];
1307 	if (atidx < 0) return true;
1308 	if (colonidx < 0 || colonidx > atidx ||
1309 		!url[0 .. colonidx].all!(ch => ch.isAlphaNum))
1310 			url = "mailto:" ~ url;
1311 	return true;
1312 }
1313 
1314 unittest {
1315 	void test(bool expected, string str, string url)
1316 	{
1317 		string strcpy = str;
1318 		string outurl;
1319 		if (!expected) {
1320 			assert (!parseAutoLink(strcpy, outurl));
1321 			assert (outurl.length == 0);
1322 			assert (strcpy == str);
1323 		} else {
1324 			assert (parseAutoLink(strcpy, outurl));
1325 			assert (outurl == url);
1326 			assert (strcpy.length == 0);
1327 		}
1328 	}
1329 
1330 	test(true, "<http://foo/>", "http://foo/");
1331 	test(false, "<http://foo/", null);
1332 	test(true, "<mailto:foo@bar>", "mailto:foo@bar");
1333 	test(true, "<foo@bar>", "mailto:foo@bar");
1334 	test(true, "<proto:foo@bar>", "proto:foo@bar");
1335 	test(true, "<proto:foo@bar:123>", "proto:foo@bar:123");
1336 	test(true, "<\"foo:bar\"@baz>", "mailto:\"foo:bar\"@baz");
1337 }
1338 
1339 private string skipAttributes(ref string line)
1340 @safe pure {
1341 	auto strs = line.stripRight;
1342 	if (!strs.endsWith("}")) return null;
1343 
1344 	auto idx = strs.lastIndexOf('{');
1345 	if (idx < 0) return null;
1346 
1347 	auto ret = strs[idx+1 .. $-1];
1348 	line = strs[0 .. idx];
1349 	return ret;
1350 }
1351 
1352 unittest {
1353 	void test(string inp, string outp, string att)
1354 	{
1355 		auto ratt = skipAttributes(inp);
1356 		assert (ratt == att, ratt);
1357 		assert (inp == outp, inp);
1358 	}
1359 
1360 	test(" foo ", " foo ", null);
1361 	test("foo {bar}", "foo ", "bar");
1362 	test("foo {bar}  ", "foo ", "bar");
1363 	test("foo bar} ", "foo bar} ", null);
1364 	test(" {bar} foo ", " {bar} foo ", null);
1365 	test(" fo {o {bar} ", " fo {o ", "bar");
1366 	test(" fo {o} {bar} ", " fo {o} ", "bar");
1367 }
1368 
1369 private void parseAttributeString(string attributes, ref Attribute[] dst)
1370 @safe pure {
1371 	import std.algorithm.iteration : splitter;
1372 
1373 	// TODO: handle custom attributes (requires a different approach than splitter)
1374 
1375 	foreach (el; attributes.splitter(' ')) {
1376 		el = el.strip;
1377 		if (!el.length) continue;
1378 		if (el[0] == '#') {
1379 			auto idx = dst.countUntil!(a => a.attribute == "id");
1380 			if (idx >= 0) dst[idx].value = el[1 .. $];
1381 			else dst ~= Attribute("id", el[1 .. $]);
1382 		} else if (el[0] == '.') {
1383 			auto idx = dst.countUntil!(a => a.attribute == "class");
1384 			if (idx >= 0) dst[idx].value ~= " " ~ el[1 .. $];
1385 			else dst ~= Attribute("class", el[1 .. $]);
1386 		}
1387 	}
1388 }
1389 
1390 unittest {
1391 	void test(string str, Attribute[] atts...)
1392 	{
1393 		Attribute[] res;
1394 		parseAttributeString(str, res);
1395 		assert (res == atts, format("%s: %s", str, res));
1396 	}
1397 
1398 	test("");
1399 	test(".foo", Attribute("class", "foo"));
1400 	test("#foo", Attribute("id", "foo"));
1401 	test("#foo #bar", Attribute("id", "bar"));
1402 	test(".foo .bar", Attribute("class", "foo bar"));
1403 	test("#foo #bar", Attribute("id", "bar"));
1404 	test(".foo #bar .baz", Attribute("class", "foo baz"), Attribute("id", "bar"));
1405 }
1406 
1407 private LinkRef[string] scanForReferences(ref string[] lines)
1408 pure @safe {
1409 	LinkRef[string] ret;
1410 	bool[size_t] reflines;
1411 
1412 	// search for reference definitions:
1413 	//   [refid] link "opt text"
1414 	//   [refid] <link> "opt text"
1415 	//   "opt text", 'opt text', (opt text)
1416 	//   line must not be indented
1417 	foreach (lnidx, ln; lines) {
1418 		if (isLineIndented(ln)) continue;
1419 		ln = strip(ln);
1420 		if (!ln.startsWith("[")) continue;
1421 		ln = ln[1 .. $];
1422 
1423 		auto idx = () @trusted { return ln.indexOf("]:"); }();
1424 		if (idx < 0) continue;
1425 		string refid = ln[0 .. idx];
1426 		ln = stripLeft(ln[idx+2 .. $]);
1427 
1428 		string attstr = ln.skipAttributes();
1429 
1430 		string url;
1431 		if (ln.startsWith("<")) {
1432 			idx = ln.indexOf('>');
1433 			if (idx < 0) continue;
1434 			url = ln[1 .. idx];
1435 			ln = ln[idx+1 .. $];
1436 		} else {
1437 			idx = ln.indexOf(' ');
1438 			if (idx > 0) {
1439 				url = ln[0 .. idx];
1440 				ln = ln[idx+1 .. $];
1441 			} else {
1442 				idx = ln.indexOf('\t');
1443 				if (idx < 0) {
1444 					url = ln;
1445 					ln = ln[$ .. $];
1446 				} else {
1447 					url = ln[0 .. idx];
1448 					ln = ln[idx+1 .. $];
1449 				}
1450 			}
1451 		}
1452 		ln = stripLeft(ln);
1453 
1454 		string title;
1455 		if (ln.length >= 3) {
1456 			if (ln[0] == '(' && ln[$-1] == ')'
1457 				|| ln[0] == '\"' && ln[$-1] == '\"'
1458 				|| ln[0] == '\'' && ln[$-1] == '\'' )
1459 			{
1460 				title = ln[1 .. $-1];
1461 			}
1462 		}
1463 
1464 		LinkRef lref;
1465 		lref.id = refid;
1466 		lref.url = url;
1467 		lref.title = title;
1468 		parseAttributeString(attstr, lref.attributes);
1469 		ret[toLower(refid)] = lref;
1470 		reflines[lnidx] = true;
1471 
1472 		debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1);
1473 	}
1474 
1475 	// remove all lines containing references
1476 	auto nonreflines = appender!(string[])();
1477 	nonreflines.reserve(lines.length);
1478 	foreach (i, ln; lines)
1479 		if (i !in reflines)
1480 			nonreflines.put(ln);
1481 	lines = nonreflines.data();
1482 
1483 	return ret;
1484 }
1485 
1486 
1487 /**
1488 	Generates an identifier suitable to use as within a URL.
1489 
1490 	The resulting string will contain only ASCII lower case alphabetic or
1491 	numeric characters, as well as dashes (-). Every sequence of
1492 	non-alphanumeric characters will be replaced by a single dash. No dashes
1493 	will be at either the front or the back of the result string.
1494 */
1495 auto asSlug(R)(R text)
1496 	if (isInputRange!R && is(typeof(R.init.front) == dchar))
1497 {
1498 	static struct SlugRange {
1499 		private {
1500 			R _input;
1501 			bool _dash;
1502 		}
1503 
1504 		this(R input)
1505 		{
1506 			_input = input;
1507 			skipNonAlphaNum();
1508 		}
1509 
1510 		@property bool empty() const { return _dash ? false : _input.empty; }
1511 		@property char front() const {
1512 			if (_dash) return '-';
1513 
1514 			char r = cast(char)_input.front;
1515 			if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A'));
1516 			return r;
1517 		}
1518 
1519 		void popFront()
1520 		{
1521 			if (_dash) {
1522 				_dash = false;
1523 				return;
1524 			}
1525 
1526 			_input.popFront();
1527 			auto na = skipNonAlphaNum();
1528 			if (na && !_input.empty)
1529 				_dash = true;
1530 		}
1531 
1532 		private bool skipNonAlphaNum()
1533 		{
1534 			bool have_skipped = false;
1535 			while (!_input.empty) {
1536 				switch (_input.front) {
1537 					default:
1538 						_input.popFront();
1539 						have_skipped = true;
1540 						break;
1541 					case 'a': .. case 'z':
1542 					case 'A': .. case 'Z':
1543 					case '0': .. case '9':
1544 						return have_skipped;
1545 				}
1546 			}
1547 			return have_skipped;
1548 		}
1549 	}
1550 	return SlugRange(text);
1551 }
1552 
1553 unittest {
1554 	import std.algorithm : equal;
1555 	assert ("".asSlug.equal(""));
1556 	assert (".,-".asSlug.equal(""));
1557 	assert ("abc".asSlug.equal("abc"));
1558 	assert ("aBc123".asSlug.equal("abc123"));
1559 	assert ("....aBc...123...".asSlug.equal("abc-123"));
1560 }
1561 
1562 private struct LinkRef {
1563 	string id;
1564 	string url;
1565 	string title;
1566 	Attribute[] attributes;
1567 }
1568 
1569 private struct Link {
1570 	string text;
1571 	string url;
1572 	string title;
1573 }
1574 
1575 @safe unittest { // alt and title attributes
1576 	assert (filterMarkdown("![alt](http://example.org/image)")
1577 		== "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n");
1578 	assert (filterMarkdown("![alt](http://example.org/image \"Title\")")
1579 		== "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n");
1580 }
1581 
1582 @safe unittest { // complex links
1583 	assert (filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and")
1584 		== "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n");
1585 	assert (filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)")
1586 		== "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n");
1587 }
1588 
1589 @safe unittest { // check CTFE-ability
1590 	enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar");
1591 	assert (res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res);
1592 }
1593 
1594 @safe unittest { // correct line breaks in restrictive mode
1595 	auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault);
1596 	assert (res == "<p>hello<br/>world\n</p>\n", res);
1597 }
1598 
1599 /*@safe unittest { // code blocks and blockquotes
1600 	assert (filterMarkdown("\tthis\n\tis\n\tcode") ==
1601 		"<pre><code>this\nis\ncode</code></pre>\n");
1602 	assert (filterMarkdown("    this\n    is\n    code") ==
1603 		"<pre><code>this\nis\ncode</code></pre>\n");
1604 	assert (filterMarkdown("    this\n    is\n\tcode") ==
1605 		"<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n");
1606 	assert (filterMarkdown("\tthis\n\n\tcode") ==
1607 		"<pre><code>this\n\ncode</code></pre>\n");
1608 	assert (filterMarkdown("\t> this") ==
1609 		"<pre><code>&gt; this</code></pre>\n");
1610 	assert (filterMarkdown(">     this") ==
1611 		"<blockquote><pre><code>this</code></pre></blockquote>\n");
1612 	assert (filterMarkdown(">     this\n    is code") ==
1613 		"<blockquote><pre><code>this\nis code</code></pre></blockquote>\n");
1614 }*/
1615 
1616 @safe unittest {
1617 	assert (filterMarkdown("## Hello, World!") == "<h2 id=\"hello-world\"> Hello, World!</h2>\n", filterMarkdown("## Hello, World!"));
1618 }
1619 
1620 @safe unittest { // tables
1621 	assert (filterMarkdown("foo|bar\n---|---", MarkdownFlags.tables)
1622 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n</table>\n");
1623 	assert (filterMarkdown(" *foo* | bar \n---|---\n baz|bam", MarkdownFlags.tables)
1624 		== "<table>\n<tr><th><em>foo</em></th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1625 	assert (filterMarkdown("|foo|bar|\n---|---\n baz|bam", MarkdownFlags.tables)
1626 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1627 	assert (filterMarkdown("foo|bar\n|---|---|\nbaz|bam", MarkdownFlags.tables)
1628 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1629 	assert (filterMarkdown("foo|bar\n---|---\n|baz|bam|", MarkdownFlags.tables)
1630 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1631 	assert (filterMarkdown("foo|bar|baz\n:---|---:|:---:\n|baz|bam|bap|", MarkdownFlags.tables)
1632 		== "<table>\n<tr><th align=\"left\">foo</th><th align=\"right\">bar</th><th align=\"center\">baz</th></tr>\n"
1633 		~ "<tr><td align=\"left\">baz</td><td align=\"right\">bam</td><td align=\"center\">bap</td></tr>\n</table>\n");
1634 	assert (filterMarkdown(" |bar\n---|---", MarkdownFlags.tables)
1635 		== "<table>\n<tr><th></th><th>bar</th></tr>\n</table>\n");
1636 	assert (filterMarkdown("foo|bar\n---|---\nbaz|", MarkdownFlags.tables)
1637 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td></tr>\n</table>\n");
1638 }
1639 
1640 @safe unittest { // issue #1527 - blank lines in code blocks
1641 	assert (filterMarkdown("    foo\n\n    bar\n") ==
1642 		"<pre class=\"prettyprint\"><code>foo\n\nbar\n</code></pre>\n");
1643 }
1644 
1645 @safe unittest {
1646 	assert (filterMarkdown("> ```\r\n> test\r\n> ```", MarkdownFlags.forumDefault) ==
1647 		"<blockquote><pre class=\"prettyprint\"><code>test\n</code></pre>\n</blockquote>\n");
1648 }
1649 
1650 @safe unittest { // issue #1845 - malicious URI targets
1651 	assert (filterMarkdown("[foo](javascript:foo) ![bar](javascript:bar) <javascript:baz>", MarkdownFlags.forumDefault) ==
1652 		"<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"bar\"> <a href=\"#\">javascript:baz</a>\n</p>\n");
1653 	assert (filterMarkdown("[foo][foo] ![foo][foo]\n[foo]: javascript:foo", MarkdownFlags.forumDefault) ==
1654 		"<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"foo\">\n</p>\n");
1655 	assert (filterMarkdown("[foo](javascript%3Abar)", MarkdownFlags.forumDefault) ==
1656 		"<p><a href=\"javascript%3Abar\">foo</a>\n</p>\n");
1657 
1658 	// extra XSS regression tests
1659 	assert (filterMarkdown("[<script></script>](bar)", MarkdownFlags.forumDefault) ==
1660 		"<p><a href=\"bar\">&lt;script&gt;&lt;/script&gt;</a>\n</p>\n");
1661 	assert (filterMarkdown("[foo](\"><script></script><span foo=\")", MarkdownFlags.forumDefault) ==
1662 		"<p><a href=\"&quot;&gt;&lt;script&gt;&lt;/script&gt;&lt;span foo=&quot;\">foo</a>\n</p>\n");
1663 	assert (filterMarkdown("[foo](javascript&#58;bar)", MarkdownFlags.forumDefault) ==
1664 		"<p><a href=\"javascript&amp;#58;bar\">foo</a>\n</p>\n");
1665 }
1666 
1667 @safe unittest { // issue #2132 - table with more columns in body goes out of array bounds
1668 	assert (filterMarkdown("| a | b |\n|--------|--------|\n|   c    | d  | e |", MarkdownFlags.tables) ==
1669 		"<table>\n<tr><th>a</th><th>b</th></tr>\n<tr><td>c</td><td>d</td><td>e</td></tr>\n</table>\n");
1670 }
1671 
1672 @safe unittest { // lists
1673 	assert (filterMarkdown("- foo\n- bar") ==
1674 		"<ul>\n<li>foo\n</li>\n<li>bar\n</li>\n</ul>\n");
1675 	assert (filterMarkdown("- foo\n\n- bar") ==
1676 		"<ul>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ul>\n");
1677 	assert (filterMarkdown("1. foo\n2. bar") ==
1678 		"<ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n");
1679 	assert (filterMarkdown("1. foo\n\n2. bar") ==
1680 		"<ol>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ol>\n");
1681 }
1682 
1683 @safe unittest { // figures
1684 	assert (filterMarkdown("- %%%") == "<ul>\n<li>%%%\n</li>\n</ul>\n");
1685 	assert (filterMarkdown("- ###") == "<ul>\n<li>###\n</li>\n</ul>\n");
1686 	assert (filterMarkdown("- %%%", MarkdownFlags.figures) == "<figure></figure>\n");
1687 	assert (filterMarkdown("- ###", MarkdownFlags.figures) == "<figcaption></figcaption>\n");
1688 	assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar", MarkdownFlags.figures) ==
1689 		"<figure>foo\n<figcaption>bar\n</figcaption>\n</figure>\n");
1690 	assert (filterMarkdown("- %%%\n\tfoo\n\n\tbar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) ==
1691 		"<figure><p>foo\n</p>\n<p>bar\n</p>\n<figcaption>baz\n</figcaption>\n</figure>\n");
1692 	assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar\n\n\t\tbaz", MarkdownFlags.figures) ==
1693 		"<figure>foo\n<figcaption><p>bar\n</p>\n<p>baz\n</p>\n</figcaption>\n</figure>\n");
1694 	assert (filterMarkdown("- %%%\n\t1. foo\n\t2. bar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) ==
1695 		"<figure><ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n<figcaption>baz\n</figcaption>\n</figure>\n");
1696 	assert (filterMarkdown("- foo\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n");
1697 	assert (filterMarkdown("- foo\n\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n");
1698 }
1699 
1700 @safe unittest { // HTML entities
1701 	assert(filterMarkdown("&nbsp;") == "<p>&nbsp;\n</p>\n");
1702 	assert(filterMarkdown("*&nbsp;*") == "<p><em>&nbsp;</em>\n</p>\n");
1703 	assert(filterMarkdown("`&nbsp;`") == "<p><code class=\"prettyprint\">&amp;nbsp;</code>\n</p>\n");
1704 }