vibe.textfilter.markdown source code

1 /**
2 	Markdown parser implementation
3 
4 	Copyright: © 2012-2019 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.textfilter.markdown;
9 
10 import vibe.core.log;
11 import vibe.textfilter.html;
12 import vibe.utils.string;
13 
14 import std.algorithm : canFind, countUntil, min;
15 import std.array;
16 import std.format;
17 import std.range;
18 import std.string;
19 
20 /*
21 	TODO:
22 		detect inline HTML tags
23 */
24 
25 version(MarkdownTest)
26 {
27 	int main()
28 	{
29 		import std.file;
30 		setLogLevel(LogLevel.Trace);
31 		auto text = readText("test.txt");
32 		auto result = appender!string();
33 		filterMarkdown(result, text);
34 		foreach( ln; splitLines(result.data) )
35 			logInfo(ln);
36 		return 0;
37 	}
38 }
39 
40 /** Returns a Markdown filtered HTML string.
41 */
42 string filterMarkdown()(string str, MarkdownFlags flags)
43 @trusted { // scope class is not @safe for DMD 2.072
44 	scope settings = new MarkdownSettings;
45 	settings.flags = flags;
46 	return filterMarkdown(str, settings);
47 }
48 /// ditto
49 string filterMarkdown()(string str, scope MarkdownSettings settings = null)
50 @trusted { // Appender not @safe as of 2.065
51 	auto dst = appender!string();
52 	filterMarkdown(dst, str, settings);
53 	return dst.data;
54 }
55 
56 
57 /** Markdown filters the given string and writes the corresponding HTML to an output range.
58 */
59 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags)
60 {
61 	scope settings = new MarkdownSettings;
62 	settings.flags = flags;
63 	filterMarkdown(dst, src, settings);
64 }
65 /// ditto
66 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null)
67 {
68 	if (!settings) settings = new MarkdownSettings;
69 
70 	auto all_lines = splitLines(src);
71 	auto links = scanForReferences(all_lines);
72 	auto lines = parseLines(all_lines, settings);
73 	Block root_block;
74 	parseBlocks(root_block, lines, null, settings);
75 	writeBlock(dst, root_block, links, settings);
76 }
77 
78 /**
79 	Returns the hierarchy of sections
80 */
81 Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null)
82 {
83 	import std.conv : to;
84 
85 	if (!settings) settings = new MarkdownSettings;
86 	auto all_lines = splitLines(markdown_source);
87 	auto lines = parseLines(all_lines, settings);
88 	Block root_block;
89 	parseBlocks(root_block, lines, null, settings);
90 	Section root;
91 
92 	foreach (ref sb; root_block.blocks) {
93 		if (sb.type == BlockType.header) {
94 			auto s = &root;
95 			while (true) {
96 				if (s.subSections.length == 0) break;
97 				if (s.subSections[$-1].headingLevel >= sb.headerLevel) break;
98 				s = &s.subSections[$-1];
99 			}
100 			s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string);
101 		}
102 	}
103 
104 	return root.subSections;
105 }
106 
107 ///
108 unittest {
109 	import std.conv : to;
110 	assert (getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") ==
111 		[
112 			Section(2, " first", "first"),
113 			Section(2, " second", "second", [
114 				Section(3, " third", "third")
115 			]),
116 			Section(1, " fourth", "fourth", [
117 				Section(3, " fifth", "fifth")
118 			])
119 		]
120 	);
121 }
122 
123 final class MarkdownSettings {
124 	/// Controls the capabilities of the parser.
125 	MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
126 
127 	/// Heading tags will start at this level.
128 	size_t headingBaseLevel = 1;
129 
130 	/// Called for every link/image URL to perform arbitrary transformations.
131 	string delegate(string url_or_path, bool is_image) urlFilter;
132 
133 	/// White list of URI schemas that can occur in link/image targets
134 	string[] allowedURISchemas = ["http", "https", "ftp", "mailto"];
135 }
136 
137 enum MarkdownFlags {
138 	/** Same as `vanillaMarkdown`
139 	*/
140 	none = 0,
141 
142 	/** Convert line breaks into hard line breaks in the output
143 
144 		This option is useful when operating on text that may be formatted as
145 		plain text, without having Markdown in mind, while still improving
146 		the appearance of the text in many cases. A common example would be
147 		to format e-mails or newsgroup posts.
148 	*/
149 	keepLineBreaks = 1<<0,
150 
151 	/** Support fenced code blocks.
152 	*/
153 	backtickCodeBlocks = 1<<1,
154 
155 	/** Disable support for embedded HTML
156 	*/
157 	noInlineHtml = 1<<2,
158 	//noLinks = 1<<3,
159 	//allowUnsafeHtml = 1<<4,
160 
161 	/** Support table definitions
162 
163 		The syntax is based on Markdown Extra and GitHub flavored Markdown.
164 	*/
165 	tables = 1<<5,
166 
167 	/** Support HTML attributes after links
168 
169 		Links or images directly followed by `{ … }` allow regular HTML
170 		attributes to added to the generated HTML element.
171 	*/
172 	attributes = 1<<6,
173 
174 	/** Recognize figure definitions
175 
176 		Figures can be defined using a modified list syntax:
177 
178 		```
179 		- %%%
180 			This is the figure content
181 
182 			- ###
183 				This is optional caption content
184 		```
185 
186 		Just like for lists, arbitrary blocks can be nested within figure and
187 		figure caption blocks. If only a single paragraph is present within a
188 		figure caption block, the paragraph text will be emitted without the
189 		surrounding `<p>` tags. The same is true for figure blocks that contain
190 		only a single paragraph and any number of additional figure caption
191 		blocks.
192 	*/
193 	figures = 1<<7,
194 
195 	/** Support only standard Markdown features
196 
197 		Note that the parser is not fully CommonMark compliant at the moment,
198 		but this is the general idea behind this option.
199 	*/
200 	vanillaMarkdown = none,
201 
202 	/** Default set of flags suitable for use within an online forum
203 	*/
204 	forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml|tables
205 }
206 
207 struct Section {
208 	size_t headingLevel;
209 	string caption;
210 	string anchor;
211 	Section[] subSections;
212 }
213 
214 private {
215 	immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"];
216 }
217 
218 private enum IndentType {
219 	white,
220 	quote
221 }
222 
223 private enum LineType {
224 	undefined,
225 	blank,
226 	plain,
227 	hline,
228 	atxHeader,
229 	setextHeader,
230 	tableSeparator,
231 	uList,
232 	oList,
233 	figure,
234 	figureCaption,
235 	htmlBlock,
236 	codeBlockDelimiter
237 }
238 
239 private struct Line {
240 	LineType type;
241 	IndentType[] indent;
242 	string text;
243 	string unindented;
244 
245 	string unindent(size_t n)
246 	pure @safe {
247 		assert (n <= indent.length);
248 		string ln = text;
249 		foreach (i; 0 .. n) {
250 			final switch(indent[i]){
251 				case IndentType.white:
252 					if (ln[0] == ' ') ln = ln[4 .. $];
253 					else ln = ln[1 .. $];
254 					break;
255 				case IndentType.quote:
256 					ln = ln.stripLeft()[1 .. $];
257 					if (ln.startsWith(' '))
258 						ln.popFront();
259 					break;
260 			}
261 		}
262 		return ln;
263 	}
264 }
265 
266 private Line[] parseLines(string[] lines, scope MarkdownSettings settings)
267 pure @safe {
268 	Line[] ret;
269 	while( !lines.empty ){
270 		auto ln = lines.front;
271 		lines.popFront();
272 
273 		Line lninfo;
274 		lninfo.text = ln;
275 
276 		while (ln.length > 0) {
277 			if (ln[0] == '\t') {
278 				lninfo.indent ~= IndentType.white;
279 				ln.popFront();
280 			} else if (ln.startsWith("    ")) {
281 				lninfo.indent ~= IndentType.white;
282 				ln.popFrontN(4);
283 			} else {
284 				if (ln.stripLeft().startsWith(">")) {
285 					lninfo.indent ~= IndentType.quote;
286 					ln = ln.stripLeft();
287 					ln.popFront();
288 					if (ln.startsWith(' '))
289 						ln.popFront();
290 				} else break;
291 			}
292 		}
293 		lninfo.unindented = ln;
294 
295 		if ((settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln))
296 			lninfo.type = LineType.codeBlockDelimiter;
297 		else if(isAtxHeaderLine(ln)) lninfo.type = LineType.atxHeader;
298 		else if(isSetextHeaderLine(ln)) lninfo.type = LineType.setextHeader;
299 		else if((settings.flags & MarkdownFlags.tables) && isTableSeparatorLine(ln))
300 			lninfo.type = LineType.tableSeparator;
301 		else if(isHlineLine(ln)) lninfo.type = LineType.hline;
302 		else if(isOListLine(ln)) lninfo.type = LineType.oList;
303 		else if(isUListLine(ln)) {
304 			if (settings.flags & MarkdownFlags.figures) {
305 				auto suff = removeListPrefix(ln, LineType.uList);
306 				if (suff == "%%%") lninfo.type = LineType.figure;
307 				else if (suff == "###") lninfo.type = LineType.figureCaption;
308 				else lninfo.type = LineType.uList;
309 			} else lninfo.type = LineType.uList;
310 		} else if(isLineBlank(ln)) lninfo.type = LineType.blank;
311 		else if(!(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln))
312 			lninfo.type = LineType.htmlBlock;
313 		else lninfo.type = LineType.plain;
314 
315 		ret ~= lninfo;
316 	}
317 	return ret;
318 }
319 
320 unittest {
321 	import std.conv : to;
322 	auto s = new MarkdownSettings;
323 	s.flags = MarkdownFlags.forumDefault;
324 	auto lns = [">```D"];
325 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]);
326 	lns = ["> ```D"];
327 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]);
328 	lns = [">    ```D"];
329 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "   ```D")]);
330 	lns = [">     ```D"];
331 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote, IndentType.white], lns[0], "```D")]);
332 	lns = [">test"];
333 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]);
334 	lns = ["> test"];
335 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]);
336 	lns = [">    test"];
337 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "   test")]);
338 	lns = [">     test"];
339 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote, IndentType.white], lns[0], "test")]);
340 }
341 
342 private enum BlockType {
343 	plain,
344 	text,
345 	paragraph,
346 	header,
347 	table,
348 	oList,
349 	uList,
350 	listItem,
351 	code,
352 	quote,
353 	figure,
354 	figureCaption
355 }
356 
357 private struct Block {
358 	BlockType type;
359 	Attribute[] attributes;
360 	string[] text;
361 	Block[] blocks;
362 	size_t headerLevel;
363 	Alignment[] columns;
364 }
365 
366 private struct Attribute {
367 	string attribute;
368 	string value;
369 }
370 
371 private enum Alignment {
372 	none = 0,
373 	left = 1<<0,
374 	right = 1<<1,
375 	center = left | right
376 }
377 
378 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings)
379 pure @safe {
380 	import std.conv : to;
381 	import std.algorithm.comparison : among;
382 
383 	if (base_indent.length == 0) root.type = BlockType.text;
384 	else if (base_indent[$-1] == IndentType.quote) root.type = BlockType.quote;
385 
386 	while (!lines.empty) {
387 		auto ln = lines.front;
388 
389 		if (ln.type == LineType.blank) {
390 			lines.popFront();
391 			continue;
392 		}
393 
394 		if (ln.indent != base_indent) {
395 			if (ln.indent.length < base_indent.length
396 				|| ln.indent[0 .. base_indent.length] != base_indent)
397 			{
398 				return;
399 			}
400 
401 			auto cindent = base_indent ~ IndentType.white;
402 			if (ln.indent == cindent) {
403 				Block cblock;
404 				cblock.type = BlockType.code;
405 				while (!lines.empty && (lines.front.unindented.strip.empty
406 					|| lines.front.indent.length >= cindent.length
407 					&& lines.front.indent[0 .. cindent.length] == cindent))
408 				{
409 					cblock.text ~= lines.front.indent.length >= cindent.length
410 						? lines.front.unindent(cindent.length) : "";
411 					lines.popFront();
412 				}
413 				root.blocks ~= cblock;
414 			} else {
415 				Block subblock;
416 				parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings);
417 				root.blocks ~= subblock;
418 			}
419 		} else {
420 			Block b;
421 			final switch (ln.type) {
422 				case LineType.undefined: assert (false);
423 				case LineType.blank: assert (false);
424 				case LineType.plain:
425 					if (lines.length >= 2 && lines[1].type == LineType.setextHeader) {
426 						auto setln = lines[1].unindented;
427 						b.type = BlockType.header;
428 						b.text = [ln.unindented];
429 						if (settings.flags & MarkdownFlags.attributes)
430 							parseAttributeString(skipAttributes(b.text[0]), b.attributes);
431 						if (!b.attributes.canFind!(a => a.attribute == "id"))
432 							b.attributes ~= Attribute("id", asSlug(b.text[0]).to!string);
433 						b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
434 						lines.popFrontN(2);
435 					} else if (lines.length >= 2 && lines[1].type == LineType.tableSeparator
436 						&& ln.unindented.indexOf('|') >= 0)
437 					{
438 						auto setln = lines[1].unindented;
439 						b.type = BlockType.table;
440 						b.text = [ln.unindented];
441 						foreach (c; getTableColumns(setln)) {
442 							Alignment a = Alignment.none;
443 							if (c.startsWith(':')) a |= Alignment.left;
444 							if (c.endsWith(':')) a |= Alignment.right;
445 							b.columns ~= a;
446 						}
447 
448 						lines.popFrontN(2);
449 						while (!lines.empty && lines[0].unindented.indexOf('|') >= 0) {
450 							b.text ~= lines.front.unindented;
451 							lines.popFront();
452 						}
453 					} else {
454 						b.type = BlockType.paragraph;
455 						b.text = skipText(lines, base_indent);
456 					}
457 					break;
458 				case LineType.hline:
459 					b.type = BlockType.plain;
460 					b.text = ["<hr>"];
461 					lines.popFront();
462 					break;
463 				case LineType.atxHeader:
464 					b.type = BlockType.header;
465 					string hl = ln.unindented;
466 					b.headerLevel = 0;
467 					while (hl.length > 0 && hl[0] == '#') {
468 						b.headerLevel++;
469 						hl = hl[1 .. $];
470 					}
471 
472 					if (settings.flags & MarkdownFlags.attributes)
473 						parseAttributeString(skipAttributes(hl), b.attributes);
474 					if (!b.attributes.canFind!(a => a.attribute == "id"))
475 						b.attributes ~= Attribute("id", asSlug(hl).to!string);
476 
477 					while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' '))
478 						hl = hl[0 .. $-1];
479 					b.text = [hl];
480 					lines.popFront();
481 					break;
482 				case LineType.setextHeader:
483 					lines.popFront();
484 					break;
485 				case LineType.tableSeparator:
486 					lines.popFront();
487 					break;
488 				case LineType.figure:
489 				case LineType.figureCaption:
490 					b.type = ln.type == LineType.figure
491 						? BlockType.figure : BlockType.figureCaption;
492 
493 					auto itemindent = base_indent ~ IndentType.white;
494 					lines.popFront();
495 					parseBlocks(b, lines, itemindent, settings);
496 					break;
497 				case LineType.uList:
498 				case LineType.oList:
499 					b.type = ln.type == LineType.uList ? BlockType.uList : BlockType.oList;
500 
501 					auto itemindent = base_indent ~ IndentType.white;
502 					bool firstItem = true, paraMode = false;
503 					while (!lines.empty && lines.front.type == ln.type
504 						&& lines.front.indent == base_indent)
505 					{
506 						Block itm;
507 						itm.text = skipText(lines, itemindent);
508 						itm.text[0] = removeListPrefix(itm.text[0], ln.type);
509 
510 						// emit <p>...</p> if there are blank lines between the items
511 						if (firstItem && !lines.empty && lines.front.type == LineType.blank) {
512 							lines.popFront();
513 							if (!lines.empty && lines.front.type == ln.type)
514 								paraMode = true;
515 						}
516 						firstItem = false;
517 						if (paraMode) {
518 							Block para;
519 							para.type = BlockType.paragraph;
520 							para.text = itm.text;
521 							itm.blocks ~= para;
522 							itm.text = null;
523 						}
524 
525 						parseBlocks(itm, lines, itemindent, settings);
526 						itm.type = BlockType.listItem;
527 						b.blocks ~= itm;
528 					}
529 					break;
530 				case LineType.htmlBlock:
531 					int nestlevel = 0;
532 					auto starttag = parseHtmlBlockLine(ln.unindented);
533 					if (!starttag.isHtmlBlock || !starttag.open)
534 						break;
535 
536 					b.type = BlockType.plain;
537 					while (!lines.empty) {
538 						if (lines.front.indent.length < base_indent.length)
539 							break;
540 						if (lines.front.indent[0 .. base_indent.length] != base_indent)
541 							break;
542 
543 						auto str = lines.front.unindent(base_indent.length);
544 						auto taginfo = parseHtmlBlockLine(str);
545 						b.text ~= lines.front.unindent(base_indent.length);
546 						lines.popFront();
547 						if (taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName)
548 							nestlevel += taginfo.open ? 1 : -1;
549 						if (nestlevel <= 0) break;
550 					}
551 					break;
552 				case LineType.codeBlockDelimiter:
553 					lines.popFront(); // TODO: get language from line
554 					b.type = BlockType.code;
555 					while (!lines.empty) {
556 						if (lines.front.indent.length < base_indent.length)
557 							break;
558 						if (lines.front.indent[0 .. base_indent.length] != base_indent)
559 							break;
560 						if (lines.front.type == LineType.codeBlockDelimiter) {
561 							lines.popFront();
562 							break;
563 						}
564 						b.text ~= lines.front.unindent(base_indent.length);
565 						lines.popFront();
566 					}
567 					break;
568 			}
569 			root.blocks ~= b;
570 		}
571 	}
572 }
573 
574 
575 private string[] skipText(ref Line[] lines, IndentType[] indent)
576 pure @safe {
577 	static bool matchesIndent(IndentType[] indent, IndentType[] base_indent)
578 	{
579 		if (indent.length > base_indent.length) return false;
580 		if (indent != base_indent[0 .. indent.length]) return false;
581 		sizediff_t qidx = -1;
582 		foreach_reverse (i, tp; base_indent)
583 			if (tp == IndentType.quote) {
584 				qidx = i;
585 				break;
586 			}
587 		if (qidx >= 0) {
588 			qidx = base_indent.length-1 - qidx;
589 			if( indent.length <= qidx ) return false;
590 		}
591 		return true;
592 	}
593 
594 	// return value is used in variables that don't get bounds checks on the
595 	// first element, so we should return at least one
596 	if (lines.empty)
597 		return [""];
598 
599 	string[] ret;
600 
601 	while (true) {
602 		ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length));
603 		lines.popFront();
604 
605 		if (lines.empty || !matchesIndent(lines.front.indent, indent)
606 			|| lines.front.type != LineType.plain)
607 		{
608 			return ret;
609 		}
610 	}
611 }
612 
613 /// private
614 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings)
615 {
616 	final switch (block.type) {
617 		case BlockType.plain:
618 			foreach (ln; block.text) {
619 				put(dst, ln);
620 				put(dst, "\n");
621 			}
622 			foreach (b; block.blocks)
623 				writeBlock(dst, b, links, settings);
624 			break;
625 		case BlockType.text:
626 			writeMarkdownEscaped(dst, block, links, settings);
627 			foreach (b; block.blocks)
628 				writeBlock(dst, b, links, settings);
629 			break;
630 		case BlockType.paragraph:
631 			assert (block.blocks.length == 0);
632 			put(dst, "<p>");
633 			writeMarkdownEscaped(dst, block, links, settings);
634 			put(dst, "</p>\n");
635 			break;
636 		case BlockType.header:
637 			assert (block.blocks.length == 0);
638 			assert (block.text.length == 1);
639 			auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0);
640 			dst.writeTag(block.attributes, "h", hlvl);
641 			writeMarkdownEscaped(dst, block.text[0], links, settings);
642 			dst.formattedWrite("</h%s>\n", hlvl);
643 			break;
644 		case BlockType.table:
645 			import std.algorithm.iteration : splitter;
646 
647 			static string[Alignment.max+1] alstr = ["", " align=\"left\"", " align=\"right\"", " align=\"center\""];
648 
649 			put(dst, "<table>\n");
650 			put(dst, "<tr>");
651 			size_t i = 0;
652 			foreach (col; block.text[0].getTableColumns()) {
653 				put(dst, "<th");
654 				put(dst, alstr[block.columns[i]]);
655 				put(dst, '>');
656 				dst.writeMarkdownEscaped(col, links, settings);
657 				put(dst, "</th>");
658 				if (i + 1 < block.columns.length)
659 					i++;
660 			}
661 			put(dst, "</tr>\n");
662 			foreach (ln; block.text[1 .. $]) {
663 				put(dst, "<tr>");
664 				i = 0;
665 				foreach (col; ln.getTableColumns()) {
666 					put(dst, "<td");
667 					put(dst, alstr[block.columns[i]]);
668 					put(dst, '>');
669 					dst.writeMarkdownEscaped(col, links, settings);
670 					put(dst, "</td>");
671 					if (i + 1 < block.columns.length)
672 						i++;
673 				}
674 				put(dst, "</tr>\n");
675 			}
676 			put(dst, "</table>\n");
677 			break;
678 		case BlockType.oList:
679 			put(dst, "<ol>\n");
680 			foreach (b; block.blocks)
681 				writeBlock(dst, b, links, settings);
682 			put(dst, "</ol>\n");
683 			break;
684 		case BlockType.uList:
685 			put(dst, "<ul>\n");
686 			foreach (b; block.blocks)
687 				writeBlock(dst, b, links, settings);
688 			put(dst, "</ul>\n");
689 			break;
690 		case BlockType.listItem:
691 			put(dst, "<li>");
692 			writeMarkdownEscaped(dst, block, links, settings);
693 			foreach (b; block.blocks)
694 				writeBlock(dst, b, links, settings);
695 			put(dst, "</li>\n");
696 			break;
697 		case BlockType.code:
698 			assert (block.blocks.length == 0);
699 			put(dst, "<pre class=\"prettyprint\"><code>");
700 			foreach (ln; block.text) {
701 				filterHTMLEscape(dst, ln);
702 				put(dst, "\n");
703 			}
704 			put(dst, "</code></pre>\n");
705 			break;
706 		case BlockType.quote:
707 			put(dst, "<blockquote>");
708 			writeMarkdownEscaped(dst, block, links, settings);
709 			foreach (b; block.blocks)
710 				writeBlock(dst, b, links, settings);
711 			put(dst, "</blockquote>\n");
712 			break;
713 		case BlockType.figure:
714 			put(dst, "<figure>");
715 			bool omit_para = block.blocks.count!(b => b.type != BlockType.figureCaption) == 1;
716 			foreach (b; block.blocks) {
717 				if (b.type == BlockType.paragraph && omit_para) {
718 					writeMarkdownEscaped(dst, b, links, settings);
719 				} else writeBlock(dst, b, links, settings);
720 			}
721 			put(dst, "</figure>\n");
722 			break;
723 		case BlockType.figureCaption:
724 			put(dst, "<figcaption>");
725 			if (block.blocks.length == 1 && block.blocks[0].type == BlockType.paragraph) {
726 				writeMarkdownEscaped(dst, block.blocks[0], links, settings);
727 			} else {
728 				foreach (b; block.blocks)
729 					writeBlock(dst, b, links, settings);
730 			}
731 			put(dst, "</figcaption>\n");
732 			break;
733 	}
734 }
735 
736 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings)
737 {
738 	auto lines = () @trusted { return cast(string[])block.text; } ();
739 	auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n");
740 	writeMarkdownEscaped(dst, text, links, settings);
741 	if (lines.length) put(dst, "\n");
742 }
743 
744 /// private
745 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings)
746 {
747 	bool isAllowedURI(string lnk) {
748 		auto idx = lnk.indexOf('/');
749 		auto cidx = lnk.indexOf(':');
750 		// always allow local URIs
751 		if (cidx < 0 || idx >= 0 && cidx > idx) return true;
752 		return settings.allowedURISchemas.canFind(lnk[0 .. cidx]);
753 	}
754 
755 	string filterLink(string lnk, bool is_image) {
756 		if (isAllowedURI(lnk))
757 			return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk;
758 		return "#"; // replace link with unknown schema with dummy URI
759 	}
760 
761 	bool br = ln.endsWith("  ");
762 	while (ln.length > 0) {
763 		switch (ln[0]) {
764 			default:
765 				put(dst, ln[0]);
766 				ln = ln[1 .. $];
767 				break;
768 			case '\\':
769 				if (ln.length >= 2) {
770 					switch (ln[1]) {
771 						default:
772 							put(dst, ln[0 .. 2]);
773 							ln = ln[2 .. $];
774 							break;
775 						case '\'', '`', '*', '_', '{', '}', '[', ']',
776 							'(', ')', '#', '+', '-', '.', '!':
777 							put(dst, ln[1]);
778 							ln = ln[2 .. $];
779 							break;
780 					}
781 				} else {
782 					put(dst, ln[0]);
783 					ln = ln[1 .. $];
784 				}
785 				break;
786 			case '_':
787 			case '*':
788 				string text;
789 				if (auto em = parseEmphasis(ln, text)) {
790 					put(dst, em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>");
791 					put(dst, text);
792 					put(dst, em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>");
793 				} else {
794 					put(dst, ln[0]);
795 					ln = ln[1 .. $];
796 				}
797 				break;
798 			case '`':
799 				string code;
800 				if (parseInlineCode(ln, code)) {
801 					put(dst, "<code class=\"prettyprint\">");
802 					filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal);
803 					put(dst, "</code>");
804 				} else {
805 					put(dst, ln[0]);
806 					ln = ln[1 .. $];
807 				}
808 				break;
809 			case '[':
810 				Link link;
811 				Attribute[] attributes;
812 				if (parseLink(ln, link, linkrefs,
813 					settings.flags & MarkdownFlags.attributes ? &attributes : null))
814 				{
815 					attributes ~= Attribute("href", filterLink(link.url, false));
816 					if (link.title.length)
817 						attributes ~= Attribute("title", link.title);
818 					dst.writeTag(attributes, "a");
819 					writeMarkdownEscaped(dst, link.text, linkrefs, settings);
820 					put(dst, "</a>");
821 				} else {
822 					put(dst, ln[0]);
823 					ln = ln[1 .. $];
824 				}
825 				break;
826 			case '!':
827 				Link link;
828 				Attribute[] attributes;
829 				if (parseLink(ln, link, linkrefs,
830 					settings.flags & MarkdownFlags.attributes ? &attributes : null))
831 				{
832 					attributes ~= Attribute("src", filterLink(link.url, true));
833 					attributes ~= Attribute("alt", link.text);
834 					if (link.title.length)
835 						attributes ~= Attribute("title", link.title);
836 					dst.writeTag(attributes, "img");
837 				} else if( ln.length >= 2 ){
838 					put(dst, ln[0 .. 2]);
839 					ln = ln[2 .. $];
840 				} else {
841 					put(dst, ln[0]);
842 					ln = ln[1 .. $];
843 				}
844 				break;
845 			case '>':
846 				if (settings.flags & MarkdownFlags.noInlineHtml) put(dst, "&gt;");
847 				else put(dst, ln[0]);
848 				ln = ln[1 .. $];
849 				break;
850 			case '<':
851 				string url;
852 				if (parseAutoLink(ln, url)) {
853 					bool is_email = url.startsWith("mailto:");
854 					put(dst, "<a href=\"");
855 					if (is_email) filterHTMLAllEscape(dst, url);
856 					else filterHTMLAttribEscape(dst, filterLink(url, false));
857 					put(dst, "\">");
858 					if (is_email) filterHTMLAllEscape(dst, url[7 .. $]);
859 					else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal);
860 					put(dst, "</a>");
861 				} else {
862 					if (ln.startsWith("<br>")) {
863 						// always support line breaks, since we embed them here ourselves!
864 						put(dst, "<br/>");
865 						ln = ln[4 .. $];
866 					} else if(ln.startsWith("<br/>")) {
867 						put(dst, "<br/>");
868 						ln = ln[5 .. $];
869 					} else {
870 						if (settings.flags & MarkdownFlags.noInlineHtml)
871 							put(dst, "&lt;");
872 						else put(dst, ln[0]);
873 						ln = ln[1 .. $];
874 					}
875 				}
876 				break;
877 		}
878 	}
879 	if (br) put(dst, "<br/>");
880 }
881 
882 private void writeTag(R, ARGS...)(ref R dst, string name, ARGS name_additions)
883 {
884 	writeTag(dst, cast(Attribute[])null, name, name_additions);
885 }
886 
887 private void writeTag(R, ARGS...)(ref R dst, scope const(Attribute)[] attributes, string name, ARGS name_additions)
888 {
889 	dst.formattedWrite("<%s", name);
890 	foreach (add; name_additions)
891 		dst.formattedWrite("%s", add);
892 	foreach (a; attributes) {
893 		dst.formattedWrite(" %s=\"", a.attribute);
894 		dst.filterHTMLAttribEscape(a.value);
895 		put(dst, '\"');
896 	}
897 	put(dst, '>');
898 }
899 
900 private bool isLineBlank(string ln)
901 pure @safe {
902 	return allOf(ln, " \t");
903 }
904 
905 private bool isSetextHeaderLine(string ln)
906 pure @safe {
907 	ln = stripLeft(ln);
908 	if (ln.length < 1) return false;
909 	if (ln[0] == '=') {
910 		while (!ln.empty && ln.front == '=') ln.popFront();
911 		return allOf(ln, " \t");
912 	}
913 	if (ln[0] == '-') {
914 		while (!ln.empty && ln.front == '-') ln.popFront();
915 		return allOf(ln, " \t");
916 	}
917 	return false;
918 }
919 
920 private bool isAtxHeaderLine(string ln)
921 pure @safe {
922 	ln = stripLeft(ln);
923 	size_t i = 0;
924 	while (i < ln.length && ln[i] == '#') i++;
925 	if (i < 1 || i > 6 || i >= ln.length) return false;
926 	return ln[i] == ' ';
927 }
928 
929 private bool isTableSeparatorLine(string ln)
930 pure @safe {
931 	import std.algorithm.iteration : splitter;
932 
933 	ln = strip(ln);
934 	if (ln.startsWith("|")) ln = ln[1 .. $];
935 	if (ln.endsWith("|")) ln = ln[0 .. $-1];
936 
937 	auto cols = ln.splitter('|');
938 	size_t cnt = 0;
939 	foreach (c; cols) {
940 		if (c.startsWith(':')) c = c[1 .. $];
941 		if (c.endsWith(':')) c = c[0 .. $-1];
942 		if (c.length < 3 || !c.allOf("-"))
943 			return false;
944 		cnt++;
945 	}
946 	return cnt >= 2;
947 }
948 
949 private auto getTableColumns(string line)
950 pure @safe nothrow {
951 	import std.algorithm.iteration : map, splitter;
952 
953 	if (line.startsWith("|")) line = line[1 .. $];
954 	if (line.endsWith("|")) line = line[0 .. $-1];
955 	return line.splitter('|').map!(s => s.strip());
956 }
957 
958 private size_t countTableColumns(string line)
959 pure @safe {
960 	return getTableColumns(line).count();
961 }
962 
963 private bool isHlineLine(string ln)
964 pure @safe {
965 	if (allOf(ln, " -") && count(ln, '-') >= 3) return true;
966 	if (allOf(ln, " *") && count(ln, '*') >= 3) return true;
967 	if (allOf(ln, " _") && count(ln, '_') >= 3) return true;
968 	return false;
969 }
970 
971 private bool isQuoteLine(string ln)
972 pure @safe {
973 	return ln.stripLeft().startsWith(">");
974 }
975 
976 private size_t getQuoteLevel(string ln)
977 pure @safe {
978 	size_t level = 0;
979 	ln = stripLeft(ln);
980 	while (ln.length > 0 && ln[0] == '>') {
981 		level++;
982 		ln = stripLeft(ln[1 .. $]);
983 	}
984 	return level;
985 }
986 
987 private bool isUListLine(string ln)
988 pure @safe {
989 	ln = stripLeft(ln);
990 	if (ln.length < 2) return false;
991 	if (!canFind("*+-", ln[0])) return false;
992 	if (ln[1] != ' ' && ln[1] != '\t') return false;
993 	return true;
994 }
995 
996 private bool isOListLine(string ln)
997 pure @safe {
998 	ln = stripLeft(ln);
999 	if (ln.length < 1) return false;
1000 	if (ln[0] < '0' || ln[0] > '9') return false;
1001 	ln = ln[1 .. $];
1002 	while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9')
1003 		ln = ln[1 .. $];
1004 	if (ln.length < 2) return false;
1005 	if (ln[0] != '.') return false;
1006 	if (ln[1] != ' ' && ln[1] != '\t')
1007 		return false;
1008 	return true;
1009 }
1010 
1011 private string removeListPrefix(string str, LineType tp)
1012 pure @safe {
1013 	switch (tp) {
1014 		default: assert (false);
1015 		case LineType.oList: // skip bullets and output using normal escaping
1016 			auto idx = str.indexOf('.');
1017 			assert (idx > 0);
1018 			return str[idx+1 .. $].stripLeft();
1019 		case LineType.uList:
1020 			return stripLeft(str.stripLeft()[1 .. $]);
1021 	}
1022 }
1023 
1024 
1025 private auto parseHtmlBlockLine(string ln)
1026 pure @safe {
1027 	struct HtmlBlockInfo {
1028 		bool isHtmlBlock;
1029 		string tagName;
1030 		bool open;
1031 	}
1032 
1033 	HtmlBlockInfo ret;
1034 	ret.isHtmlBlock = false;
1035 	ret.open = true;
1036 
1037 	ln = strip(ln);
1038 	if (ln.length < 3) return ret;
1039 	if (ln[0] != '<') return ret;
1040 	if (ln[1] == '/') {
1041 		ret.open = false;
1042 		ln = ln[1 .. $];
1043 	}
1044 	import std.ascii : isAlpha;
1045 	if (!isAlpha(ln[1])) return ret;
1046 	ln = ln[1 .. $];
1047 	size_t idx = 0;
1048 	while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>')
1049 		idx++;
1050 	ret.tagName = ln[0 .. idx];
1051 	ln = ln[idx .. $];
1052 
1053 	auto eidx = ln.indexOf('>');
1054 	if (eidx < 0) return ret;
1055 	if (eidx != ln.length-1) return ret;
1056 
1057 	if (!s_blockTags.canFind(ret.tagName)) return ret;
1058 
1059 	ret.isHtmlBlock = true;
1060 	return ret;
1061 }
1062 
1063 private bool isHtmlBlockLine(string ln)
1064 pure @safe {
1065 	auto bi = parseHtmlBlockLine(ln);
1066 	return bi.isHtmlBlock && bi.open;
1067 }
1068 
1069 private bool isHtmlBlockCloseLine(string ln)
1070 pure @safe {
1071 	auto bi = parseHtmlBlockLine(ln);
1072 	return bi.isHtmlBlock && !bi.open;
1073 }
1074 
1075 private bool isCodeBlockDelimiter(string ln)
1076 pure @safe {
1077 	return ln.stripLeft.startsWith("```");
1078 }
1079 
1080 private string getHtmlTagName(string ln)
1081 pure @safe {
1082 	return parseHtmlBlockLine(ln).tagName;
1083 }
1084 
1085 private bool isLineIndented(string ln)
1086 pure @safe {
1087 	return ln.startsWith("\t") || ln.startsWith("    ");
1088 }
1089 
1090 private string unindentLine(string ln)
1091 pure @safe {
1092 	if (ln.startsWith("\t")) return ln[1 .. $];
1093 	if (ln.startsWith("    ")) return ln[4 .. $];
1094 	assert (false);
1095 }
1096 
1097 private int parseEmphasis(ref string str, ref string text)
1098 pure @safe {
1099 	string pstr = str;
1100 	if (pstr.length < 3) return false;
1101 
1102 	string ctag;
1103 	if (pstr.startsWith("***")) ctag = "***";
1104 	else if (pstr.startsWith("**")) ctag = "**";
1105 	else if (pstr.startsWith("*")) ctag = "*";
1106 	else if (pstr.startsWith("___")) ctag = "___";
1107 	else if (pstr.startsWith("__")) ctag = "__";
1108 	else if (pstr.startsWith("_")) ctag = "_";
1109 	else return false;
1110 
1111 	pstr = pstr[ctag.length .. $];
1112 
1113 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1114 	if (cidx < 1) return false;
1115 
1116 	text = pstr[0 .. cidx];
1117 
1118 	str = pstr[cidx+ctag.length .. $];
1119 	return cast(int)ctag.length;
1120 }
1121 
1122 private bool parseInlineCode(ref string str, ref string code)
1123 pure @safe {
1124 	string pstr = str;
1125 	if (pstr.length < 3) return false;
1126 	string ctag;
1127 	if (pstr.startsWith("``")) ctag = "``";
1128 	else if (pstr.startsWith("`")) ctag = "`";
1129 	else return false;
1130 	pstr = pstr[ctag.length .. $];
1131 
1132 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1133 	if (cidx < 1) return false;
1134 
1135 	code = pstr[0 .. cidx];
1136 	str = pstr[cidx+ctag.length .. $];
1137 	return true;
1138 }
1139 
1140 private bool parseLink(ref string str, ref Link dst, scope const(LinkRef[string]) linkrefs, scope Attribute[]* attributes)
1141 pure @safe {
1142 	string pstr = str;
1143 	if (pstr.length < 3) return false;
1144 	// ignore img-link prefix
1145 	if (pstr[0] == '!') pstr = pstr[1 .. $];
1146 
1147 	// parse the text part [text]
1148 	if (pstr[0] != '[') return false;
1149 	auto cidx = pstr.matchBracket();
1150 	if (cidx < 1) return false;
1151 	string refid;
1152 	dst.text = pstr[1 .. cidx];
1153 	pstr = pstr[cidx+1 .. $];
1154 
1155 	// parse either (link '['"title"']') or '[' ']'[refid]
1156 	if (pstr.length < 2) return false;
1157 	if (pstr[0] == '(') {
1158 		cidx = pstr.matchBracket();
1159 		if (cidx < 1) return false;
1160 		auto inner = pstr[1 .. cidx];
1161 		immutable qidx = inner.indexOf('"');
1162 		import std.ascii : isWhite;
1163 		if (qidx > 1 && inner[qidx - 1].isWhite()) {
1164 			dst.url = inner[0 .. qidx].stripRight();
1165 			immutable len = inner[qidx .. $].lastIndexOf('"');
1166 			if (len == 0) return false;
1167 			assert (len > 0);
1168 			dst.title = inner[qidx + 1 .. qidx + len];
1169 		} else {
1170 			dst.url = inner.stripRight();
1171 			dst.title = null;
1172 		}
1173 		if (dst.url.startsWith("<") && dst.url.endsWith(">"))
1174 			dst.url = dst.url[1 .. $-1];
1175 		pstr = pstr[cidx+1 .. $];
1176 
1177 		if (attributes) {
1178 			if (pstr.startsWith('{')) {
1179 				auto idx = pstr.indexOf('}');
1180 				if (idx > 0) {
1181 					parseAttributeString(pstr[1 .. idx], *attributes);
1182 					pstr = pstr[idx+1 .. $];
1183 				}
1184 			}
1185 		}
1186 	} else {
1187 		if (pstr[0] == ' ') pstr = pstr[1 .. $];
1188 		if (pstr[0] != '[') return false;
1189 		pstr = pstr[1 .. $];
1190 		cidx = pstr.indexOf(']');
1191 		if (cidx < 0) return false;
1192 		if (cidx == 0) refid = dst.text;
1193 		else refid = pstr[0 .. cidx];
1194 		pstr = pstr[cidx+1 .. $];
1195 	}
1196 
1197 	if (refid.length > 0) {
1198 		auto pr = toLower(refid) in linkrefs;
1199 		if (!pr) {
1200 			debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid);
1201 			return false;
1202 		}
1203 		dst.url = pr.url;
1204 		dst.title = pr.title;
1205 		if (attributes) *attributes ~= pr.attributes;
1206 	}
1207 
1208 	str = pstr;
1209 	return true;
1210 }
1211 
1212 @safe unittest
1213 {
1214 	static void testLink(string s, Link exp, in LinkRef[string] refs)
1215 	{
1216 		Link link;
1217 		assert (parseLink(s, link, refs, null), s);
1218 		assert (link == exp);
1219 	}
1220 	LinkRef[string] refs;
1221 	refs["ref"] = LinkRef("ref", "target", "title");
1222 
1223 	testLink(`[link](target)`, Link("link", "target"), null);
1224 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1225 	testLink(`[link](target  "title")`, Link("link", "target", "title"), null);
1226 	testLink(`[link](target "title"  )`, Link("link", "target", "title"), null);
1227 
1228 	testLink(`[link](target)`, Link("link", "target"), null);
1229 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1230 
1231 	testLink(`[link][ref]`, Link("link", "target", "title"), refs);
1232 	testLink(`[ref][]`, Link("ref", "target", "title"), refs);
1233 
1234 	testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null);
1235 	testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs);
1236 
1237 	testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null);
1238 	testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null);
1239 
1240 	testLink(`[link](white-space  "around title" )`, Link("link", "white-space", "around title"), null);
1241 	testLink(`[link](tabs	"around title"	)`, Link("link", "tabs", "around title"), null);
1242 
1243 	testLink(`[link](target "")`, Link("link", "target", ""), null);
1244 	testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null);
1245 
1246 	testLink(`[link](<target>)`, Link("link", "target"), null);
1247 
1248 	auto failing = [
1249 		`text`, `[link](target`, `[link]target)`, `[link]`,
1250 		`[link(target)`, `link](target)`, `[link] (target)`,
1251 		`[link][noref]`, `[noref][]`
1252 	];
1253 	Link link;
1254 	foreach (s; failing)
1255 		assert (!parseLink(s, link, refs, null), s);
1256 }
1257 
1258 @safe unittest { // attributes
1259 	void test(string s, LinkRef[string] refs, bool parse_atts, string exprem, Link explnk, Attribute[] expatts...)
1260 	@safe {
1261 		Link lnk;
1262 		Attribute[] atts;
1263 		parseLink(s, lnk, refs, parse_atts ? () @trusted { return &atts; } () : null);
1264 		assert (lnk == explnk);
1265 		assert (s == exprem);
1266 		assert (atts == expatts);
1267 	}
1268 
1269 	test("[foo](bar){.baz}", null, false, "{.baz}", Link("foo", "bar", ""));
1270 	test("[foo](bar){.baz}", null, true, "", Link("foo", "bar", ""), Attribute("class", "baz"));
1271 
1272 	auto refs = ["bar": LinkRef("bar", "url", "title", [Attribute("id", "hid")])];
1273 	test("[foo][bar]", refs, false, "", Link("foo", "url", "title"));
1274 	test("[foo][bar]", refs, true, "", Link("foo", "url", "title"), Attribute("id", "hid"));
1275 }
1276 
1277 private bool parseAutoLink(ref string str, ref string url)
1278 pure @safe {
1279 	import std.algorithm.searching : all;
1280 	import std.ascii : isAlphaNum;
1281 
1282 	string pstr = str;
1283 	if (pstr.length < 3) return false;
1284 	if (pstr[0] != '<') return false;
1285 	pstr = pstr[1 .. $];
1286 	auto cidx = pstr.indexOf('>');
1287 	if (cidx < 0) return false;
1288 
1289 	url = pstr[0 .. cidx];
1290 	if (url.anyOf(" \t")) return false;
1291 	auto atidx = url.indexOf('@');
1292 	auto colonidx = url.indexOf(':');
1293 	if (atidx < 0 && colonidx < 0) return false;
1294 
1295 	str = pstr[cidx+1 .. $];
1296 	if (atidx < 0) return true;
1297 	if (colonidx < 0 || colonidx > atidx ||
1298 		!url[0 .. colonidx].all!(ch => ch.isAlphaNum))
1299 			url = "mailto:" ~ url;
1300 	return true;
1301 }
1302 
1303 unittest {
1304 	void test(bool expected, string str, string url)
1305 	{
1306 		string strcpy = str;
1307 		string outurl;
1308 		if (!expected) {
1309 			assert (!parseAutoLink(strcpy, outurl));
1310 			assert (outurl.length == 0);
1311 			assert (strcpy == str);
1312 		} else {
1313 			assert (parseAutoLink(strcpy, outurl));
1314 			assert (outurl == url);
1315 			assert (strcpy.length == 0);
1316 		}
1317 	}
1318 
1319 	test(true, "<http://foo/>", "http://foo/");
1320 	test(false, "<http://foo/", null);
1321 	test(true, "<mailto:foo@bar>", "mailto:foo@bar");
1322 	test(true, "<foo@bar>", "mailto:foo@bar");
1323 	test(true, "<proto:foo@bar>", "proto:foo@bar");
1324 	test(true, "<proto:foo@bar:123>", "proto:foo@bar:123");
1325 	test(true, "<\"foo:bar\"@baz>", "mailto:\"foo:bar\"@baz");
1326 }
1327 
1328 private string skipAttributes(ref string line)
1329 @safe pure {
1330 	auto strs = line.stripRight;
1331 	if (!strs.endsWith("}")) return null;
1332 
1333 	auto idx = strs.lastIndexOf('{');
1334 	if (idx < 0) return null;
1335 
1336 	auto ret = strs[idx+1 .. $-1];
1337 	line = strs[0 .. idx];
1338 	return ret;
1339 }
1340 
1341 unittest {
1342 	void test(string inp, string outp, string att)
1343 	{
1344 		auto ratt = skipAttributes(inp);
1345 		assert (ratt == att, ratt);
1346 		assert (inp == outp, inp);
1347 	}
1348 
1349 	test(" foo ", " foo ", null);
1350 	test("foo {bar}", "foo ", "bar");
1351 	test("foo {bar}  ", "foo ", "bar");
1352 	test("foo bar} ", "foo bar} ", null);
1353 	test(" {bar} foo ", " {bar} foo ", null);
1354 	test(" fo {o {bar} ", " fo {o ", "bar");
1355 	test(" fo {o} {bar} ", " fo {o} ", "bar");
1356 }
1357 
1358 private void parseAttributeString(string attributes, ref Attribute[] dst)
1359 @safe pure {
1360 	import std.algorithm.iteration : splitter;
1361 
1362 	// TODO: handle custom attributes (requires a different approach than splitter)
1363 
1364 	foreach (el; attributes.splitter(' ')) {
1365 		el = el.strip;
1366 		if (!el.length) continue;
1367 		if (el[0] == '#') {
1368 			auto idx = dst.countUntil!(a => a.attribute == "id");
1369 			if (idx >= 0) dst[idx].value = el[1 .. $];
1370 			else dst ~= Attribute("id", el[1 .. $]);
1371 		} else if (el[0] == '.') {
1372 			auto idx = dst.countUntil!(a => a.attribute == "class");
1373 			if (idx >= 0) dst[idx].value ~= " " ~ el[1 .. $];
1374 			else dst ~= Attribute("class", el[1 .. $]);
1375 		}
1376 	}
1377 }
1378 
1379 unittest {
1380 	void test(string str, Attribute[] atts...)
1381 	{
1382 		Attribute[] res;
1383 		parseAttributeString(str, res);
1384 		assert (res == atts, format("%s: %s", str, res));
1385 	}
1386 
1387 	test("");
1388 	test(".foo", Attribute("class", "foo"));
1389 	test("#foo", Attribute("id", "foo"));
1390 	test("#foo #bar", Attribute("id", "bar"));
1391 	test(".foo .bar", Attribute("class", "foo bar"));
1392 	test("#foo #bar", Attribute("id", "bar"));
1393 	test(".foo #bar .baz", Attribute("class", "foo baz"), Attribute("id", "bar"));
1394 }
1395 
1396 private LinkRef[string] scanForReferences(ref string[] lines)
1397 pure @safe {
1398 	LinkRef[string] ret;
1399 	bool[size_t] reflines;
1400 
1401 	// search for reference definitions:
1402 	//   [refid] link "opt text"
1403 	//   [refid] <link> "opt text"
1404 	//   "opt text", 'opt text', (opt text)
1405 	//   line must not be indented
1406 	foreach (lnidx, ln; lines) {
1407 		if (isLineIndented(ln)) continue;
1408 		ln = strip(ln);
1409 		if (!ln.startsWith("[")) continue;
1410 		ln = ln[1 .. $];
1411 
1412 		auto idx = () @trusted { return ln.indexOf("]:"); }();
1413 		if (idx < 0) continue;
1414 		string refid = ln[0 .. idx];
1415 		ln = stripLeft(ln[idx+2 .. $]);
1416 
1417 		string attstr = ln.skipAttributes();
1418 
1419 		string url;
1420 		if (ln.startsWith("<")) {
1421 			idx = ln.indexOf('>');
1422 			if (idx < 0) continue;
1423 			url = ln[1 .. idx];
1424 			ln = ln[idx+1 .. $];
1425 		} else {
1426 			idx = ln.indexOf(' ');
1427 			if (idx > 0) {
1428 				url = ln[0 .. idx];
1429 				ln = ln[idx+1 .. $];
1430 			} else {
1431 				idx = ln.indexOf('\t');
1432 				if (idx < 0) {
1433 					url = ln;
1434 					ln = ln[$ .. $];
1435 				} else {
1436 					url = ln[0 .. idx];
1437 					ln = ln[idx+1 .. $];
1438 				}
1439 			}
1440 		}
1441 		ln = stripLeft(ln);
1442 
1443 		string title;
1444 		if (ln.length >= 3) {
1445 			if (ln[0] == '(' && ln[$-1] == ')'
1446 				|| ln[0] == '\"' && ln[$-1] == '\"'
1447 				|| ln[0] == '\'' && ln[$-1] == '\'' )
1448 			{
1449 				title = ln[1 .. $-1];
1450 			}
1451 		}
1452 
1453 		LinkRef lref;
1454 		lref.id = refid;
1455 		lref.url = url;
1456 		lref.title = title;
1457 		parseAttributeString(attstr, lref.attributes);
1458 		ret[toLower(refid)] = lref;
1459 		reflines[lnidx] = true;
1460 
1461 		debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1);
1462 	}
1463 
1464 	// remove all lines containing references
1465 	auto nonreflines = appender!(string[])();
1466 	nonreflines.reserve(lines.length);
1467 	foreach (i, ln; lines)
1468 		if (i !in reflines)
1469 			nonreflines.put(ln);
1470 	lines = nonreflines.data();
1471 
1472 	return ret;
1473 }
1474 
1475 
1476 /**
1477 	Generates an identifier suitable to use as within a URL.
1478 
1479 	The resulting string will contain only ASCII lower case alphabetic or
1480 	numeric characters, as well as dashes (-). Every sequence of
1481 	non-alphanumeric characters will be replaced by a single dash. No dashes
1482 	will be at either the front or the back of the result string.
1483 */
1484 auto asSlug(R)(R text)
1485 	if (isInputRange!R && is(typeof(R.init.front) == dchar))
1486 {
1487 	static struct SlugRange {
1488 		private {
1489 			R _input;
1490 			bool _dash;
1491 		}
1492 
1493 		this(R input)
1494 		{
1495 			_input = input;
1496 			skipNonAlphaNum();
1497 		}
1498 
1499 		@property bool empty() const { return _dash ? false : _input.empty; }
1500 		@property char front() const {
1501 			if (_dash) return '-';
1502 
1503 			char r = cast(char)_input.front;
1504 			if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A'));
1505 			return r;
1506 		}
1507 
1508 		void popFront()
1509 		{
1510 			if (_dash) {
1511 				_dash = false;
1512 				return;
1513 			}
1514 
1515 			_input.popFront();
1516 			auto na = skipNonAlphaNum();
1517 			if (na && !_input.empty)
1518 				_dash = true;
1519 		}
1520 
1521 		private bool skipNonAlphaNum()
1522 		{
1523 			bool have_skipped = false;
1524 			while (!_input.empty) {
1525 				switch (_input.front) {
1526 					default:
1527 						_input.popFront();
1528 						have_skipped = true;
1529 						break;
1530 					case 'a': .. case 'z':
1531 					case 'A': .. case 'Z':
1532 					case '0': .. case '9':
1533 						return have_skipped;
1534 				}
1535 			}
1536 			return have_skipped;
1537 		}
1538 	}
1539 	return SlugRange(text);
1540 }
1541 
1542 unittest {
1543 	import std.algorithm : equal;
1544 	assert ("".asSlug.equal(""));
1545 	assert (".,-".asSlug.equal(""));
1546 	assert ("abc".asSlug.equal("abc"));
1547 	assert ("aBc123".asSlug.equal("abc123"));
1548 	assert ("....aBc...123...".asSlug.equal("abc-123"));
1549 }
1550 
1551 private struct LinkRef {
1552 	string id;
1553 	string url;
1554 	string title;
1555 	Attribute[] attributes;
1556 }
1557 
1558 private struct Link {
1559 	string text;
1560 	string url;
1561 	string title;
1562 }
1563 
1564 @safe unittest { // alt and title attributes
1565 	assert (filterMarkdown("![alt](http://example.org/image)")
1566 		== "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n");
1567 	assert (filterMarkdown("![alt](http://example.org/image \"Title\")")
1568 		== "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n");
1569 }
1570 
1571 @safe unittest { // complex links
1572 	assert (filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and")
1573 		== "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n");
1574 	assert (filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)")
1575 		== "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n");
1576 }
1577 
1578 @safe unittest { // check CTFE-ability
1579 	enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar");
1580 	assert (res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res);
1581 }
1582 
1583 @safe unittest { // correct line breaks in restrictive mode
1584 	auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault);
1585 	assert (res == "<p>hello<br/>world\n</p>\n", res);
1586 }
1587 
1588 /*@safe unittest { // code blocks and blockquotes
1589 	assert (filterMarkdown("\tthis\n\tis\n\tcode") ==
1590 		"<pre><code>this\nis\ncode</code></pre>\n");
1591 	assert (filterMarkdown("    this\n    is\n    code") ==
1592 		"<pre><code>this\nis\ncode</code></pre>\n");
1593 	assert (filterMarkdown("    this\n    is\n\tcode") ==
1594 		"<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n");
1595 	assert (filterMarkdown("\tthis\n\n\tcode") ==
1596 		"<pre><code>this\n\ncode</code></pre>\n");
1597 	assert (filterMarkdown("\t> this") ==
1598 		"<pre><code>&gt; this</code></pre>\n");
1599 	assert (filterMarkdown(">     this") ==
1600 		"<blockquote><pre><code>this</code></pre></blockquote>\n");
1601 	assert (filterMarkdown(">     this\n    is code") ==
1602 		"<blockquote><pre><code>this\nis code</code></pre></blockquote>\n");
1603 }*/
1604 
1605 @safe unittest {
1606 	assert (filterMarkdown("## Hello, World!") == "<h2 id=\"hello-world\"> Hello, World!</h2>\n", filterMarkdown("## Hello, World!"));
1607 }
1608 
1609 @safe unittest { // tables
1610 	assert (filterMarkdown("foo|bar\n---|---", MarkdownFlags.tables)
1611 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n</table>\n");
1612 	assert (filterMarkdown(" *foo* | bar \n---|---\n baz|bam", MarkdownFlags.tables)
1613 		== "<table>\n<tr><th><em>foo</em></th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1614 	assert (filterMarkdown("|foo|bar|\n---|---\n baz|bam", MarkdownFlags.tables)
1615 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1616 	assert (filterMarkdown("foo|bar\n|---|---|\nbaz|bam", MarkdownFlags.tables)
1617 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1618 	assert (filterMarkdown("foo|bar\n---|---\n|baz|bam|", MarkdownFlags.tables)
1619 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1620 	assert (filterMarkdown("foo|bar|baz\n:---|---:|:---:\n|baz|bam|bap|", MarkdownFlags.tables)
1621 		== "<table>\n<tr><th align=\"left\">foo</th><th align=\"right\">bar</th><th align=\"center\">baz</th></tr>\n"
1622 		~ "<tr><td align=\"left\">baz</td><td align=\"right\">bam</td><td align=\"center\">bap</td></tr>\n</table>\n");
1623 	assert (filterMarkdown(" |bar\n---|---", MarkdownFlags.tables)
1624 		== "<table>\n<tr><th></th><th>bar</th></tr>\n</table>\n");
1625 	assert (filterMarkdown("foo|bar\n---|---\nbaz|", MarkdownFlags.tables)
1626 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td></tr>\n</table>\n");
1627 }
1628 
1629 @safe unittest { // issue #1527 - blank lines in code blocks
1630 	assert (filterMarkdown("    foo\n\n    bar\n") ==
1631 		"<pre class=\"prettyprint\"><code>foo\n\nbar\n</code></pre>\n");
1632 }
1633 
1634 @safe unittest {
1635 	assert (filterMarkdown("> ```\r\n> test\r\n> ```", MarkdownFlags.forumDefault) ==
1636 		"<blockquote><pre class=\"prettyprint\"><code>test\n</code></pre>\n</blockquote>\n");
1637 }
1638 
1639 @safe unittest { // issue #1845 - malicious URI targets
1640 	assert (filterMarkdown("[foo](javascript:foo) ![bar](javascript:bar) <javascript:baz>", MarkdownFlags.forumDefault) ==
1641 		"<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"bar\"> <a href=\"#\">javascript:baz</a>\n</p>\n");
1642 	assert (filterMarkdown("[foo][foo] ![foo][foo]\n[foo]: javascript:foo", MarkdownFlags.forumDefault) ==
1643 		"<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"foo\">\n</p>\n");
1644 	assert (filterMarkdown("[foo](javascript%3Abar)", MarkdownFlags.forumDefault) ==
1645 		"<p><a href=\"javascript%3Abar\">foo</a>\n</p>\n");
1646 
1647 	// extra XSS regression tests
1648 	assert (filterMarkdown("[<script></script>](bar)", MarkdownFlags.forumDefault) ==
1649 		"<p><a href=\"bar\">&lt;script&gt;&lt;/script&gt;</a>\n</p>\n");
1650 	assert (filterMarkdown("[foo](\"><script></script><span foo=\")", MarkdownFlags.forumDefault) ==
1651 		"<p><a href=\"&quot;&gt;&lt;script&gt;&lt;/script&gt;&lt;span foo=&quot;\">foo</a>\n</p>\n");
1652 	assert (filterMarkdown("[foo](javascript&#58;bar)", MarkdownFlags.forumDefault) ==
1653 		"<p><a href=\"javascript&amp;#58;bar\">foo</a>\n</p>\n");
1654 }
1655 
1656 @safe unittest { // issue #2132 - table with more columns in body goes out of array bounds
1657 	assert (filterMarkdown("| a | b |\n|--------|--------|\n|   c    | d  | e |", MarkdownFlags.tables) ==
1658 		"<table>\n<tr><th>a</th><th>b</th></tr>\n<tr><td>c</td><td>d</td><td>e</td></tr>\n</table>\n");
1659 }
1660 
1661 @safe unittest { // lists
1662 	assert (filterMarkdown("- foo\n- bar") ==
1663 		"<ul>\n<li>foo\n</li>\n<li>bar\n</li>\n</ul>\n");
1664 	assert (filterMarkdown("- foo\n\n- bar") ==
1665 		"<ul>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ul>\n");
1666 	assert (filterMarkdown("1. foo\n2. bar") ==
1667 		"<ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n");
1668 	assert (filterMarkdown("1. foo\n\n2. bar") ==
1669 		"<ol>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ol>\n");
1670 }
1671 
1672 @safe unittest { // figures
1673 	assert (filterMarkdown("- %%%") == "<ul>\n<li>%%%\n</li>\n</ul>\n");
1674 	assert (filterMarkdown("- ###") == "<ul>\n<li>###\n</li>\n</ul>\n");
1675 	assert (filterMarkdown("- %%%", MarkdownFlags.figures) == "<figure></figure>\n");
1676 	assert (filterMarkdown("- ###", MarkdownFlags.figures) == "<figcaption></figcaption>\n");
1677 	assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar", MarkdownFlags.figures) ==
1678 		"<figure>foo\n<figcaption>bar\n</figcaption>\n</figure>\n");
1679 	assert (filterMarkdown("- %%%\n\tfoo\n\n\tbar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) ==
1680 		"<figure><p>foo\n</p>\n<p>bar\n</p>\n<figcaption>baz\n</figcaption>\n</figure>\n");
1681 	assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar\n\n\t\tbaz", MarkdownFlags.figures) ==
1682 		"<figure>foo\n<figcaption><p>bar\n</p>\n<p>baz\n</p>\n</figcaption>\n</figure>\n");
1683 	assert (filterMarkdown("- %%%\n\t1. foo\n\t2. bar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) ==
1684 		"<figure><ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n<figcaption>baz\n</figcaption>\n</figure>\n");
1685 	assert (filterMarkdown("- foo\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n");
1686 	assert (filterMarkdown("- foo\n\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n");
1687 }
1688 
1689 @safe unittest { // HTML entities
1690 	assert(filterMarkdown("&nbsp;") == "<p>&nbsp;\n</p>\n");
1691 	assert(filterMarkdown("*&nbsp;*") == "<p><em>&nbsp;</em>\n</p>\n");
1692 	assert(filterMarkdown("`&nbsp;`") == "<p><code class=\"prettyprint\">&amp;nbsp;</code>\n</p>\n");
1693 }