1 /**
2 	Markdown parser implementation
3 
4 	Copyright: © 2012-2019 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.textfilter.markdown;
9 
10 import vibe.core.log;
11 import vibe.textfilter.html;
12 import vibe.utils.string;
13 
14 import std.algorithm : canFind, countUntil, min;
15 import std.array;
16 import std.format;
17 import std.range;
18 import std.string;
19 
20 /*
21 	TODO:
22 		detect inline HTML tags
23 */
24 
25 version(MarkdownTest)
26 {
27 	int main()
28 	{
29 		import std.file;
30 		setLogLevel(LogLevel.Trace);
31 		auto text = readText("test.txt");
32 		auto result = appender!string();
33 		filterMarkdown(result, text);
34 		foreach( ln; splitLines(result.data) )
35 			logInfo(ln);
36 		return 0;
37 	}
38 }
39 
40 /** Returns a Markdown filtered HTML string.
41 */
42 string filterMarkdown()(string str, MarkdownFlags flags)
43 @trusted { // scope class is not @safe for DMD 2.072
44 	scope settings = new MarkdownSettings;
45 	settings.flags = flags;
46 	return filterMarkdown(str, settings);
47 }
48 /// ditto
49 string filterMarkdown()(string str, scope MarkdownSettings settings = null)
50 @trusted { // Appender not @safe as of 2.065
51 	auto dst = appender!string();
52 	filterMarkdown(dst, str, settings);
53 	return dst.data;
54 }
55 
56 
57 /** Markdown filters the given string and writes the corresponding HTML to an output range.
58 */
59 void filterMarkdown(R)(ref R dst, string src, MarkdownFlags flags)
60 {
61 	scope settings = new MarkdownSettings;
62 	settings.flags = flags;
63 	filterMarkdown(dst, src, settings);
64 }
65 /// ditto
66 void filterMarkdown(R)(ref R dst, string src, scope MarkdownSettings settings = null)
67 {
68 	if (!settings) settings = new MarkdownSettings;
69 
70 	auto all_lines = splitLines(src);
71 	auto links = scanForReferences(all_lines);
72 	auto lines = parseLines(all_lines, settings);
73 	Block root_block;
74 	parseBlocks(root_block, lines, null, settings);
75 	writeBlock(dst, root_block, links, settings);
76 }
77 
78 /**
79 	Returns the hierarchy of sections
80 */
81 Section[] getMarkdownOutline(string markdown_source, scope MarkdownSettings settings = null)
82 {
83 	import std.conv : to;
84 
85 	if (!settings) settings = new MarkdownSettings;
86 	auto all_lines = splitLines(markdown_source);
87 	auto lines = parseLines(all_lines, settings);
88 	Block root_block;
89 	parseBlocks(root_block, lines, null, settings);
90 	Section root;
91 
92 	foreach (ref sb; root_block.blocks) {
93 		if (sb.type == BlockType.header) {
94 			auto s = &root;
95 			while (true) {
96 				if (s.subSections.length == 0) break;
97 				if (s.subSections[$-1].headingLevel >= sb.headerLevel) break;
98 				s = &s.subSections[$-1];
99 			}
100 			s.subSections ~= Section(sb.headerLevel, sb.text[0], sb.text[0].asSlug.to!string);
101 		}
102 	}
103 
104 	return root.subSections;
105 }
106 
107 ///
108 unittest {
109 	import std.conv : to;
110 	assert (getMarkdownOutline("## first\n## second\n### third\n# fourth\n### fifth") ==
111 		[
112 			Section(2, " first", "first"),
113 			Section(2, " second", "second", [
114 				Section(3, " third", "third")
115 			]),
116 			Section(1, " fourth", "fourth", [
117 				Section(3, " fifth", "fifth")
118 			])
119 		]
120 	);
121 }
122 
123 final class MarkdownSettings {
124 	/// Controls the capabilities of the parser.
125 	MarkdownFlags flags = MarkdownFlags.vanillaMarkdown;
126 
127 	/// Heading tags will start at this level.
128 	size_t headingBaseLevel = 1;
129 
130 	/// Called for every link/image URL to perform arbitrary transformations.
131 	string delegate(string url_or_path, bool is_image) urlFilter;
132 
133 	/// White list of URI schemas that can occur in link/image targets
134 	string[] allowedURISchemas = ["http", "https", "ftp", "mailto"];
135 }
136 
137 enum MarkdownFlags {
138 	/** Same as `vanillaMarkdown`
139 	*/
140 	none = 0,
141 
142 	/** Convert line breaks into hard line breaks in the output
143 
144 		This option is useful when operating on text that may be formatted as
145 		plain text, without having Markdown in mind, while still improving
146 		the appearance of the text in many cases. A common example would be
147 		to format e-mails or newsgroup posts.
148 	*/
149 	keepLineBreaks = 1<<0,
150 
151 	/** Support fenced code blocks.
152 	*/
153 	backtickCodeBlocks = 1<<1,
154 
155 	/** Disable support for embedded HTML
156 	*/
157 	noInlineHtml = 1<<2,
158 	//noLinks = 1<<3,
159 	//allowUnsafeHtml = 1<<4,
160 
161 	/** Support table definitions
162 
163 		The syntax is based on Markdown Extra and GitHub flavored Markdown.
164 	*/
165 	tables = 1<<5,
166 
167 	/** Support HTML attributes after links
168 
169 		Links or images directly followed by `{ … }` allow regular HTML
170 		attributes to added to the generated HTML element.
171 	*/
172 	attributes = 1<<6,
173 
174 	/** Recognize figure definitions
175 
176 		Figures can be defined using a modified list syntax:
177 
178 		```
179 		- %%%
180 			This is the figure content
181 
182 			- ###
183 				This is optional caption content
184 		```
185 
186 		Just like for lists, arbitrary blocks can be nested within figure and
187 		figure caption blocks. If only a single paragraph is present within a
188 		figure caption block, the paragraph text will be emitted without the
189 		surrounding `<p>` tags. The same is true for figure blocks that contain
190 		only a single paragraph and any number of additional figure caption
191 		blocks.
192 	*/
193 	figures = 1<<7,
194 
195 	/** Support only standard Markdown features
196 
197 		Note that the parser is not fully CommonMark compliant at the moment,
198 		but this is the general idea behind this option.
199 	*/
200 	vanillaMarkdown = none,
201 
202 	/** Default set of flags suitable for use within an online forum
203 	*/
204 	forumDefault = keepLineBreaks|backtickCodeBlocks|noInlineHtml|tables
205 }
206 
207 struct Section {
208 	size_t headingLevel;
209 	string caption;
210 	string anchor;
211 	Section[] subSections;
212 }
213 
214 private {
215 	immutable s_blockTags = ["div", "ol", "p", "pre", "section", "table", "ul"];
216 }
217 
218 private enum IndentType {
219 	white,
220 	quote
221 }
222 
223 private enum LineType {
224 	undefined,
225 	blank,
226 	plain,
227 	hline,
228 	atxHeader,
229 	setextHeader,
230 	tableSeparator,
231 	uList,
232 	oList,
233 	figure,
234 	figureCaption,
235 	htmlBlock,
236 	codeBlockDelimiter
237 }
238 
239 private struct Line {
240 	LineType type;
241 	IndentType[] indent;
242 	string text;
243 	string unindented;
244 
245 	string unindent(size_t n)
246 	pure @safe {
247 		assert (n <= indent.length);
248 		string ln = text;
249 		foreach (i; 0 .. n) {
250 			final switch(indent[i]){
251 				case IndentType.white:
252 					if (ln[0] == ' ') ln = ln[4 .. $];
253 					else ln = ln[1 .. $];
254 					break;
255 				case IndentType.quote:
256 					ln = ln.stripLeft()[1 .. $];
257 					if (ln.startsWith(' '))
258 						ln.popFront();
259 					break;
260 			}
261 		}
262 		return ln;
263 	}
264 }
265 
266 private Line[] parseLines(string[] lines, scope MarkdownSettings settings)
267 pure @safe {
268 	Line[] ret;
269 	while( !lines.empty ){
270 		auto ln = lines.front;
271 		lines.popFront();
272 
273 		Line lninfo;
274 		lninfo.text = ln;
275 
276 		while (ln.length > 0) {
277 			if (ln[0] == '\t') {
278 				lninfo.indent ~= IndentType.white;
279 				ln.popFront();
280 			} else if (ln.startsWith("    ")) {
281 				lninfo.indent ~= IndentType.white;
282 				ln.popFrontN(4);
283 			} else {
284 				if (ln.stripLeft().startsWith(">")) {
285 					lninfo.indent ~= IndentType.quote;
286 					ln = ln.stripLeft();
287 					ln.popFront();
288 					if (ln.startsWith(' '))
289 						ln.popFront();
290 				} else break;
291 			}
292 		}
293 		lninfo.unindented = ln;
294 
295 		if ((settings.flags & MarkdownFlags.backtickCodeBlocks) && isCodeBlockDelimiter(ln))
296 			lninfo.type = LineType.codeBlockDelimiter;
297 		else if(isAtxHeaderLine(ln)) lninfo.type = LineType.atxHeader;
298 		else if(isSetextHeaderLine(ln)) lninfo.type = LineType.setextHeader;
299 		else if((settings.flags & MarkdownFlags.tables) && isTableSeparatorLine(ln))
300 			lninfo.type = LineType.tableSeparator;
301 		else if(isHlineLine(ln)) lninfo.type = LineType.hline;
302 		else if(isOListLine(ln)) lninfo.type = LineType.oList;
303 		else if(isUListLine(ln)) {
304 			if (settings.flags & MarkdownFlags.figures) {
305 				auto suff = removeListPrefix(ln, LineType.uList);
306 				if (suff == "%%%") lninfo.type = LineType.figure;
307 				else if (suff == "###") lninfo.type = LineType.figureCaption;
308 				else lninfo.type = LineType.uList;
309 			} else lninfo.type = LineType.uList;
310 		} else if(isLineBlank(ln)) lninfo.type = LineType.blank;
311 		else if(!(settings.flags & MarkdownFlags.noInlineHtml) && isHtmlBlockLine(ln))
312 			lninfo.type = LineType.htmlBlock;
313 		else lninfo.type = LineType.plain;
314 
315 		ret ~= lninfo;
316 	}
317 	return ret;
318 }
319 
320 unittest {
321 	import std.conv : to;
322 	auto s = new MarkdownSettings;
323 	s.flags = MarkdownFlags.forumDefault;
324 	auto lns = [">```D"];
325 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]);
326 	lns = ["> ```D"];
327 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "```D")]);
328 	lns = [">    ```D"];
329 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote], lns[0], "   ```D")]);
330 	lns = [">     ```D"];
331 	assert (parseLines(lns, s) == [Line(LineType.codeBlockDelimiter, [IndentType.quote, IndentType.white], lns[0], "```D")]);
332 	lns = [">test"];
333 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]);
334 	lns = ["> test"];
335 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "test")]);
336 	lns = [">    test"];
337 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote], lns[0], "   test")]);
338 	lns = [">     test"];
339 	assert (parseLines(lns, s) == [Line(LineType.plain, [IndentType.quote, IndentType.white], lns[0], "test")]);
340 }
341 
342 private enum BlockType {
343 	plain,
344 	text,
345 	paragraph,
346 	header,
347 	table,
348 	oList,
349 	uList,
350 	listItem,
351 	code,
352 	quote,
353 	figure,
354 	figureCaption
355 }
356 
357 private struct Block {
358 	BlockType type;
359 	Attribute[] attributes;
360 	string[] text;
361 	Block[] blocks;
362 	size_t headerLevel;
363 	Alignment[] columns;
364 }
365 
366 private struct Attribute {
367 	string attribute;
368 	string value;
369 }
370 
371 private enum Alignment {
372 	none = 0,
373 	left = 1<<0,
374 	right = 1<<1,
375 	center = left | right
376 }
377 
378 private void parseBlocks(ref Block root, ref Line[] lines, IndentType[] base_indent, scope MarkdownSettings settings)
379 pure @safe {
380 	import std.conv : to;
381 	import std.algorithm.comparison : among;
382 
383 	if (base_indent.length == 0) root.type = BlockType.text;
384 	else if (base_indent[$-1] == IndentType.quote) root.type = BlockType.quote;
385 
386 	while (!lines.empty) {
387 		auto ln = lines.front;
388 
389 		if (ln.type == LineType.blank) {
390 			lines.popFront();
391 			continue;
392 		}
393 
394 		if (ln.indent != base_indent) {
395 			if (ln.indent.length < base_indent.length
396 				|| ln.indent[0 .. base_indent.length] != base_indent)
397 			{
398 				return;
399 			}
400 
401 			auto cindent = base_indent ~ IndentType.white;
402 			if (ln.indent == cindent) {
403 				Block cblock;
404 				cblock.type = BlockType.code;
405 				while (!lines.empty && (lines.front.unindented.strip.empty
406 					|| lines.front.indent.length >= cindent.length
407 					&& lines.front.indent[0 .. cindent.length] == cindent))
408 				{
409 					cblock.text ~= lines.front.indent.length >= cindent.length
410 						? lines.front.unindent(cindent.length) : "";
411 					lines.popFront();
412 				}
413 				root.blocks ~= cblock;
414 			} else {
415 				Block subblock;
416 				parseBlocks(subblock, lines, ln.indent[0 .. base_indent.length+1], settings);
417 				root.blocks ~= subblock;
418 			}
419 		} else {
420 			Block b;
421 			final switch (ln.type) {
422 				case LineType.undefined: assert (false);
423 				case LineType.blank: assert (false);
424 				case LineType.plain:
425 					if (lines.length >= 2 && lines[1].type == LineType.setextHeader) {
426 						auto setln = lines[1].unindented;
427 						b.type = BlockType.header;
428 						b.text = [ln.unindented];
429 						if (settings.flags & MarkdownFlags.attributes)
430 							parseAttributeString(skipAttributes(b.text[0]), b.attributes);
431 						if (!b.attributes.canFind!(a => a.attribute == "id"))
432 							b.attributes ~= Attribute("id", asSlug(b.text[0]).to!string);
433 						b.headerLevel = setln.strip()[0] == '=' ? 1 : 2;
434 						lines.popFrontN(2);
435 					} else if (lines.length >= 2 && lines[1].type == LineType.tableSeparator
436 						&& ln.unindented.indexOf('|') >= 0)
437 					{
438 						auto setln = lines[1].unindented;
439 						b.type = BlockType.table;
440 						b.text = [ln.unindented];
441 						foreach (c; getTableColumns(setln)) {
442 							Alignment a = Alignment.none;
443 							if (c.startsWith(':')) a |= Alignment.left;
444 							if (c.endsWith(':')) a |= Alignment.right;
445 							b.columns ~= a;
446 						}
447 
448 						lines.popFrontN(2);
449 						while (!lines.empty && lines[0].unindented.indexOf('|') >= 0) {
450 							b.text ~= lines.front.unindented;
451 							lines.popFront();
452 						}
453 					} else {
454 						b.type = BlockType.paragraph;
455 						b.text = skipText(lines, base_indent);
456 					}
457 					break;
458 				case LineType.hline:
459 					b.type = BlockType.plain;
460 					b.text = ["<hr>"];
461 					lines.popFront();
462 					break;
463 				case LineType.atxHeader:
464 					b.type = BlockType.header;
465 					string hl = ln.unindented;
466 					b.headerLevel = 0;
467 					while (hl.length > 0 && hl[0] == '#') {
468 						b.headerLevel++;
469 						hl = hl[1 .. $];
470 					}
471 
472 					if (settings.flags & MarkdownFlags.attributes)
473 						parseAttributeString(skipAttributes(hl), b.attributes);
474 					if (!b.attributes.canFind!(a => a.attribute == "id"))
475 						b.attributes ~= Attribute("id", asSlug(hl).to!string);
476 
477 					while (hl.length > 0 && (hl[$-1] == '#' || hl[$-1] == ' '))
478 						hl = hl[0 .. $-1];
479 					b.text = [hl];
480 					lines.popFront();
481 					break;
482 				case LineType.setextHeader:
483 					lines.popFront();
484 					break;
485 				case LineType.tableSeparator:
486 					lines.popFront();
487 					break;
488 				case LineType.figure:
489 				case LineType.figureCaption:
490 					b.type = ln.type == LineType.figure
491 						? BlockType.figure : BlockType.figureCaption;
492 
493 					auto itemindent = base_indent ~ IndentType.white;
494 					lines.popFront();
495 					parseBlocks(b, lines, itemindent, settings);
496 					break;
497 				case LineType.uList:
498 				case LineType.oList:
499 					b.type = ln.type == LineType.uList ? BlockType.uList : BlockType.oList;
500 
501 					auto itemindent = base_indent ~ IndentType.white;
502 					bool paraMode = false;
503 
504 					// look ahead to determine whether the list is in paragraph
505 					// mode (one or multiple <p></p> nested within each item
506 					bool couldBeParaMode = false;
507 					foreach (pln; lines[1 .. $]) {
508 						if (pln.type == LineType.blank) {
509 							couldBeParaMode = true;
510 							continue;
511 						}
512 						if (!pln.indent.startsWith(base_indent)) break;
513 						if (pln.indent == base_indent) {
514 							if (pln.type == ln.type)
515 								paraMode = couldBeParaMode;
516 							break;
517 						}
518 					}
519 
520 					while (!lines.empty && lines.front.type == ln.type
521 						&& lines.front.indent == base_indent)
522 					{
523 						Block itm;
524 						itm.text = skipText(lines, itemindent);
525 						itm.text[0] = removeListPrefix(itm.text[0], ln.type);
526 
527 						if (paraMode) {
528 							Block para;
529 							para.type = BlockType.paragraph;
530 							para.text = itm.text;
531 							itm.blocks ~= para;
532 							itm.text = null;
533 						}
534 
535 						parseBlocks(itm, lines, itemindent, settings);
536 						itm.type = BlockType.listItem;
537 						b.blocks ~= itm;
538 					}
539 					break;
540 				case LineType.htmlBlock:
541 					int nestlevel = 0;
542 					auto starttag = parseHtmlBlockLine(ln.unindented);
543 					if (!starttag.isHtmlBlock || !starttag.open)
544 						break;
545 
546 					b.type = BlockType.plain;
547 					while (!lines.empty) {
548 						if (lines.front.indent.length < base_indent.length)
549 							break;
550 						if (lines.front.indent[0 .. base_indent.length] != base_indent)
551 							break;
552 
553 						auto str = lines.front.unindent(base_indent.length);
554 						auto taginfo = parseHtmlBlockLine(str);
555 						b.text ~= lines.front.unindent(base_indent.length);
556 						lines.popFront();
557 						if (taginfo.isHtmlBlock && taginfo.tagName == starttag.tagName)
558 							nestlevel += taginfo.open ? 1 : -1;
559 						if (nestlevel <= 0) break;
560 					}
561 					break;
562 				case LineType.codeBlockDelimiter:
563 					lines.popFront(); // TODO: get language from line
564 					b.type = BlockType.code;
565 					while (!lines.empty) {
566 						if (lines.front.indent.length < base_indent.length)
567 							break;
568 						if (lines.front.indent[0 .. base_indent.length] != base_indent)
569 							break;
570 						if (lines.front.type == LineType.codeBlockDelimiter) {
571 							lines.popFront();
572 							break;
573 						}
574 						b.text ~= lines.front.unindent(base_indent.length);
575 						lines.popFront();
576 					}
577 					break;
578 			}
579 			root.blocks ~= b;
580 		}
581 	}
582 }
583 
584 
585 private string[] skipText(ref Line[] lines, IndentType[] indent)
586 pure @safe {
587 	static bool matchesIndent(IndentType[] indent, IndentType[] base_indent)
588 	{
589 		if (indent.length > base_indent.length) return false;
590 		if (indent != base_indent[0 .. indent.length]) return false;
591 		sizediff_t qidx = -1;
592 		foreach_reverse (i, tp; base_indent)
593 			if (tp == IndentType.quote) {
594 				qidx = i;
595 				break;
596 			}
597 		if (qidx >= 0) {
598 			qidx = base_indent.length-1 - qidx;
599 			if( indent.length <= qidx ) return false;
600 		}
601 		return true;
602 	}
603 
604 	// return value is used in variables that don't get bounds checks on the
605 	// first element, so we should return at least one
606 	if (lines.empty)
607 		return [""];
608 
609 	string[] ret;
610 
611 	while (true) {
612 		ret ~= lines.front.unindent(min(indent.length, lines.front.indent.length));
613 		lines.popFront();
614 
615 		if (lines.empty || !matchesIndent(lines.front.indent, indent)
616 			|| lines.front.type != LineType.plain)
617 		{
618 			return ret;
619 		}
620 	}
621 }
622 
623 /// private
624 private void writeBlock(R)(ref R dst, ref const Block block, LinkRef[string] links, scope MarkdownSettings settings)
625 {
626 	final switch (block.type) {
627 		case BlockType.plain:
628 			foreach (ln; block.text) {
629 				put(dst, ln);
630 				put(dst, "\n");
631 			}
632 			foreach (b; block.blocks)
633 				writeBlock(dst, b, links, settings);
634 			break;
635 		case BlockType.text:
636 			writeMarkdownEscaped(dst, block, links, settings);
637 			foreach (b; block.blocks)
638 				writeBlock(dst, b, links, settings);
639 			break;
640 		case BlockType.paragraph:
641 			assert (block.blocks.length == 0);
642 			put(dst, "<p>");
643 			writeMarkdownEscaped(dst, block, links, settings);
644 			put(dst, "</p>\n");
645 			break;
646 		case BlockType.header:
647 			assert (block.blocks.length == 0);
648 			assert (block.text.length == 1);
649 			auto hlvl = block.headerLevel + (settings ? settings.headingBaseLevel-1 : 0);
650 			dst.writeTag(block.attributes, "h", hlvl);
651 			writeMarkdownEscaped(dst, block.text[0], links, settings);
652 			dst.formattedWrite("</h%s>\n", hlvl);
653 			break;
654 		case BlockType.table:
655 			import std.algorithm.iteration : splitter;
656 
657 			static string[Alignment.max+1] alstr = ["", " align=\"left\"", " align=\"right\"", " align=\"center\""];
658 
659 			put(dst, "<table>\n");
660 			put(dst, "<tr>");
661 			size_t i = 0;
662 			foreach (col; block.text[0].getTableColumns()) {
663 				put(dst, "<th");
664 				put(dst, alstr[block.columns[i]]);
665 				put(dst, '>');
666 				dst.writeMarkdownEscaped(col, links, settings);
667 				put(dst, "</th>");
668 				if (i + 1 < block.columns.length)
669 					i++;
670 			}
671 			put(dst, "</tr>\n");
672 			foreach (ln; block.text[1 .. $]) {
673 				put(dst, "<tr>");
674 				i = 0;
675 				foreach (col; ln.getTableColumns()) {
676 					put(dst, "<td");
677 					put(dst, alstr[block.columns[i]]);
678 					put(dst, '>');
679 					dst.writeMarkdownEscaped(col, links, settings);
680 					put(dst, "</td>");
681 					if (i + 1 < block.columns.length)
682 						i++;
683 				}
684 				put(dst, "</tr>\n");
685 			}
686 			put(dst, "</table>\n");
687 			break;
688 		case BlockType.oList:
689 			put(dst, "<ol>\n");
690 			foreach (b; block.blocks)
691 				writeBlock(dst, b, links, settings);
692 			put(dst, "</ol>\n");
693 			break;
694 		case BlockType.uList:
695 			put(dst, "<ul>\n");
696 			foreach (b; block.blocks)
697 				writeBlock(dst, b, links, settings);
698 			put(dst, "</ul>\n");
699 			break;
700 		case BlockType.listItem:
701 			put(dst, "<li>");
702 			writeMarkdownEscaped(dst, block, links, settings);
703 			foreach (b; block.blocks)
704 				writeBlock(dst, b, links, settings);
705 			put(dst, "</li>\n");
706 			break;
707 		case BlockType.code:
708 			assert (block.blocks.length == 0);
709 			put(dst, "<pre class=\"prettyprint\"><code>");
710 			foreach (ln; block.text) {
711 				filterHTMLEscape(dst, ln);
712 				put(dst, "\n");
713 			}
714 			put(dst, "</code></pre>\n");
715 			break;
716 		case BlockType.quote:
717 			put(dst, "<blockquote>");
718 			writeMarkdownEscaped(dst, block, links, settings);
719 			foreach (b; block.blocks)
720 				writeBlock(dst, b, links, settings);
721 			put(dst, "</blockquote>\n");
722 			break;
723 		case BlockType.figure:
724 			put(dst, "<figure>");
725 			bool omit_para = block.blocks.count!(b => b.type != BlockType.figureCaption) == 1;
726 			foreach (b; block.blocks) {
727 				if (b.type == BlockType.paragraph && omit_para) {
728 					writeMarkdownEscaped(dst, b, links, settings);
729 				} else writeBlock(dst, b, links, settings);
730 			}
731 			put(dst, "</figure>\n");
732 			break;
733 		case BlockType.figureCaption:
734 			put(dst, "<figcaption>");
735 			if (block.blocks.length == 1 && block.blocks[0].type == BlockType.paragraph) {
736 				writeMarkdownEscaped(dst, block.blocks[0], links, settings);
737 			} else {
738 				foreach (b; block.blocks)
739 					writeBlock(dst, b, links, settings);
740 			}
741 			put(dst, "</figcaption>\n");
742 			break;
743 	}
744 }
745 
746 private void writeMarkdownEscaped(R)(ref R dst, ref const Block block, in LinkRef[string] links, scope MarkdownSettings settings)
747 {
748 	auto lines = () @trusted { return cast(string[])block.text; } ();
749 	auto text = settings.flags & MarkdownFlags.keepLineBreaks ? lines.join("<br>") : lines.join("\n");
750 	writeMarkdownEscaped(dst, text, links, settings);
751 	if (lines.length) put(dst, "\n");
752 }
753 
754 /// private
755 private void writeMarkdownEscaped(R)(ref R dst, string ln, in LinkRef[string] linkrefs, scope MarkdownSettings settings)
756 {
757 	bool isAllowedURI(string lnk) {
758 		auto idx = lnk.indexOf('/');
759 		auto cidx = lnk.indexOf(':');
760 		// always allow local URIs
761 		if (cidx < 0 || idx >= 0 && cidx > idx) return true;
762 		return settings.allowedURISchemas.canFind(lnk[0 .. cidx]);
763 	}
764 
765 	string filterLink(string lnk, bool is_image) {
766 		if (isAllowedURI(lnk))
767 			return settings.urlFilter ? settings.urlFilter(lnk, is_image) : lnk;
768 		return "#"; // replace link with unknown schema with dummy URI
769 	}
770 
771 	bool br = ln.endsWith("  ");
772 	while (ln.length > 0) {
773 		switch (ln[0]) {
774 			default:
775 				put(dst, ln[0]);
776 				ln = ln[1 .. $];
777 				break;
778 			case '\\':
779 				if (ln.length >= 2) {
780 					switch (ln[1]) {
781 						default:
782 							put(dst, ln[0 .. 2]);
783 							ln = ln[2 .. $];
784 							break;
785 						case '\'', '`', '*', '_', '{', '}', '[', ']',
786 							'(', ')', '#', '+', '-', '.', '!':
787 							put(dst, ln[1]);
788 							ln = ln[2 .. $];
789 							break;
790 					}
791 				} else {
792 					put(dst, ln[0]);
793 					ln = ln[1 .. $];
794 				}
795 				break;
796 			case '_':
797 			case '*':
798 				string text;
799 				if (auto em = parseEmphasis(ln, text)) {
800 					put(dst, em == 1 ? "<em>" : em == 2 ? "<strong>" : "<strong><em>");
801 					put(dst, text);
802 					put(dst, em == 1 ? "</em>" : em == 2 ? "</strong>": "</em></strong>");
803 				} else {
804 					put(dst, ln[0]);
805 					ln = ln[1 .. $];
806 				}
807 				break;
808 			case '`':
809 				string code;
810 				if (parseInlineCode(ln, code)) {
811 					put(dst, "<code class=\"prettyprint\">");
812 					filterHTMLEscape(dst, code, HTMLEscapeFlags.escapeMinimal);
813 					put(dst, "</code>");
814 				} else {
815 					put(dst, ln[0]);
816 					ln = ln[1 .. $];
817 				}
818 				break;
819 			case '[':
820 				Link link;
821 				Attribute[] attributes;
822 				if (parseLink(ln, link, linkrefs,
823 					settings.flags & MarkdownFlags.attributes ? &attributes : null))
824 				{
825 					attributes ~= Attribute("href", filterLink(link.url, false));
826 					if (link.title.length)
827 						attributes ~= Attribute("title", link.title);
828 					dst.writeTag(attributes, "a");
829 					writeMarkdownEscaped(dst, link.text, linkrefs, settings);
830 					put(dst, "</a>");
831 				} else {
832 					put(dst, ln[0]);
833 					ln = ln[1 .. $];
834 				}
835 				break;
836 			case '!':
837 				Link link;
838 				Attribute[] attributes;
839 				if (parseLink(ln, link, linkrefs,
840 					settings.flags & MarkdownFlags.attributes ? &attributes : null))
841 				{
842 					attributes ~= Attribute("src", filterLink(link.url, true));
843 					attributes ~= Attribute("alt", link.text);
844 					if (link.title.length)
845 						attributes ~= Attribute("title", link.title);
846 					dst.writeTag(attributes, "img");
847 				} else if( ln.length >= 2 ){
848 					put(dst, ln[0 .. 2]);
849 					ln = ln[2 .. $];
850 				} else {
851 					put(dst, ln[0]);
852 					ln = ln[1 .. $];
853 				}
854 				break;
855 			case '>':
856 				if (settings.flags & MarkdownFlags.noInlineHtml) put(dst, "&gt;");
857 				else put(dst, ln[0]);
858 				ln = ln[1 .. $];
859 				break;
860 			case '<':
861 				string url;
862 				if (parseAutoLink(ln, url)) {
863 					bool is_email = url.startsWith("mailto:");
864 					put(dst, "<a href=\"");
865 					if (is_email) filterHTMLAllEscape(dst, url);
866 					else filterHTMLAttribEscape(dst, filterLink(url, false));
867 					put(dst, "\">");
868 					if (is_email) filterHTMLAllEscape(dst, url[7 .. $]);
869 					else filterHTMLEscape(dst, url, HTMLEscapeFlags.escapeMinimal);
870 					put(dst, "</a>");
871 				} else {
872 					if (ln.startsWith("<br>")) {
873 						// always support line breaks, since we embed them here ourselves!
874 						put(dst, "<br/>");
875 						ln = ln[4 .. $];
876 					} else if(ln.startsWith("<br/>")) {
877 						put(dst, "<br/>");
878 						ln = ln[5 .. $];
879 					} else {
880 						if (settings.flags & MarkdownFlags.noInlineHtml)
881 							put(dst, "&lt;");
882 						else put(dst, ln[0]);
883 						ln = ln[1 .. $];
884 					}
885 				}
886 				break;
887 		}
888 	}
889 	if (br) put(dst, "<br/>");
890 }
891 
892 private void writeTag(R, ARGS...)(ref R dst, string name, ARGS name_additions)
893 {
894 	writeTag(dst, cast(Attribute[])null, name, name_additions);
895 }
896 
897 private void writeTag(R, ARGS...)(ref R dst, scope const(Attribute)[] attributes, string name, ARGS name_additions)
898 {
899 	dst.formattedWrite("<%s", name);
900 	foreach (add; name_additions)
901 		dst.formattedWrite("%s", add);
902 	foreach (a; attributes) {
903 		dst.formattedWrite(" %s=\"", a.attribute);
904 		dst.filterHTMLAttribEscape(a.value);
905 		put(dst, '\"');
906 	}
907 	put(dst, '>');
908 }
909 
910 private bool isLineBlank(string ln)
911 pure @safe {
912 	return allOf(ln, " \t");
913 }
914 
915 private bool isSetextHeaderLine(string ln)
916 pure @safe {
917 	ln = stripLeft(ln);
918 	if (ln.length < 1) return false;
919 	if (ln[0] == '=') {
920 		while (!ln.empty && ln.front == '=') ln.popFront();
921 		return allOf(ln, " \t");
922 	}
923 	if (ln[0] == '-') {
924 		while (!ln.empty && ln.front == '-') ln.popFront();
925 		return allOf(ln, " \t");
926 	}
927 	return false;
928 }
929 
930 private bool isAtxHeaderLine(string ln)
931 pure @safe {
932 	ln = stripLeft(ln);
933 	size_t i = 0;
934 	while (i < ln.length && ln[i] == '#') i++;
935 	if (i < 1 || i > 6 || i >= ln.length) return false;
936 	return ln[i] == ' ';
937 }
938 
939 private bool isTableSeparatorLine(string ln)
940 pure @safe {
941 	import std.algorithm.iteration : splitter;
942 
943 	ln = strip(ln);
944 	if (ln.startsWith("|")) ln = ln[1 .. $];
945 	if (ln.endsWith("|")) ln = ln[0 .. $-1];
946 
947 	auto cols = ln.splitter('|');
948 	size_t cnt = 0;
949 	foreach (c; cols) {
950 		c = c.strip();
951 		if (c.startsWith(':')) c = c[1 .. $];
952 		if (c.endsWith(':')) c = c[0 .. $-1];
953 		if (c.length < 3 || !c.allOf("-"))
954 			return false;
955 		cnt++;
956 	}
957 	return cnt >= 2;
958 }
959 
960 unittest {
961 	assert(isTableSeparatorLine("|----|---|"));
962 	assert(isTableSeparatorLine("|:----:|---|"));
963 	assert(isTableSeparatorLine("---|----"));
964 	assert(isTableSeparatorLine("| --- | :---- |"));
965 	assert(!isTableSeparatorLine("| ---- |"));
966 	assert(!isTableSeparatorLine("| -- | -- |"));
967 	assert(!isTableSeparatorLine("| --- - | ---- |"));
968 }
969 
970 private auto getTableColumns(string line)
971 pure @safe nothrow {
972 	import std.algorithm.iteration : map, splitter;
973 
974 	if (line.startsWith("|")) line = line[1 .. $];
975 	if (line.endsWith("|")) line = line[0 .. $-1];
976 	return line.splitter('|').map!(s => s.strip());
977 }
978 
979 private size_t countTableColumns(string line)
980 pure @safe {
981 	return getTableColumns(line).count();
982 }
983 
984 private bool isHlineLine(string ln)
985 pure @safe {
986 	if (allOf(ln, " -") && count(ln, '-') >= 3) return true;
987 	if (allOf(ln, " *") && count(ln, '*') >= 3) return true;
988 	if (allOf(ln, " _") && count(ln, '_') >= 3) return true;
989 	return false;
990 }
991 
992 private bool isQuoteLine(string ln)
993 pure @safe {
994 	return ln.stripLeft().startsWith(">");
995 }
996 
997 private size_t getQuoteLevel(string ln)
998 pure @safe {
999 	size_t level = 0;
1000 	ln = stripLeft(ln);
1001 	while (ln.length > 0 && ln[0] == '>') {
1002 		level++;
1003 		ln = stripLeft(ln[1 .. $]);
1004 	}
1005 	return level;
1006 }
1007 
1008 private bool isUListLine(string ln)
1009 pure @safe {
1010 	ln = stripLeft(ln);
1011 	if (ln.length < 2) return false;
1012 	if (!canFind("*+-", ln[0])) return false;
1013 	if (ln[1] != ' ' && ln[1] != '\t') return false;
1014 	return true;
1015 }
1016 
1017 private bool isOListLine(string ln)
1018 pure @safe {
1019 	ln = stripLeft(ln);
1020 	if (ln.length < 1) return false;
1021 	if (ln[0] < '0' || ln[0] > '9') return false;
1022 	ln = ln[1 .. $];
1023 	while (ln.length > 0 && ln[0] >= '0' && ln[0] <= '9')
1024 		ln = ln[1 .. $];
1025 	if (ln.length < 2) return false;
1026 	if (ln[0] != '.') return false;
1027 	if (ln[1] != ' ' && ln[1] != '\t')
1028 		return false;
1029 	return true;
1030 }
1031 
1032 private string removeListPrefix(string str, LineType tp)
1033 pure @safe {
1034 	switch (tp) {
1035 		default: assert (false);
1036 		case LineType.oList: // skip bullets and output using normal escaping
1037 			auto idx = str.indexOf('.');
1038 			assert (idx > 0);
1039 			return str[idx+1 .. $].stripLeft();
1040 		case LineType.uList:
1041 			return stripLeft(str.stripLeft()[1 .. $]);
1042 	}
1043 }
1044 
1045 
1046 private auto parseHtmlBlockLine(string ln)
1047 pure @safe {
1048 	struct HtmlBlockInfo {
1049 		bool isHtmlBlock;
1050 		string tagName;
1051 		bool open;
1052 	}
1053 
1054 	HtmlBlockInfo ret;
1055 	ret.isHtmlBlock = false;
1056 	ret.open = true;
1057 
1058 	ln = strip(ln);
1059 	if (ln.length < 3) return ret;
1060 	if (ln[0] != '<') return ret;
1061 	if (ln[1] == '/') {
1062 		ret.open = false;
1063 		ln = ln[1 .. $];
1064 	}
1065 	import std.ascii : isAlpha;
1066 	if (!isAlpha(ln[1])) return ret;
1067 	ln = ln[1 .. $];
1068 	size_t idx = 0;
1069 	while (idx < ln.length && ln[idx] != ' ' && ln[idx] != '>')
1070 		idx++;
1071 	ret.tagName = ln[0 .. idx];
1072 	ln = ln[idx .. $];
1073 
1074 	auto eidx = ln.indexOf('>');
1075 	if (eidx < 0) return ret;
1076 	if (eidx != ln.length-1) return ret;
1077 
1078 	if (!s_blockTags.canFind(ret.tagName)) return ret;
1079 
1080 	ret.isHtmlBlock = true;
1081 	return ret;
1082 }
1083 
1084 private bool isHtmlBlockLine(string ln)
1085 pure @safe {
1086 	auto bi = parseHtmlBlockLine(ln);
1087 	return bi.isHtmlBlock && bi.open;
1088 }
1089 
1090 private bool isHtmlBlockCloseLine(string ln)
1091 pure @safe {
1092 	auto bi = parseHtmlBlockLine(ln);
1093 	return bi.isHtmlBlock && !bi.open;
1094 }
1095 
1096 private bool isCodeBlockDelimiter(string ln)
1097 pure @safe {
1098 	return ln.stripLeft.startsWith("```");
1099 }
1100 
1101 private string getHtmlTagName(string ln)
1102 pure @safe {
1103 	return parseHtmlBlockLine(ln).tagName;
1104 }
1105 
1106 private bool isLineIndented(string ln)
1107 pure @safe {
1108 	return ln.startsWith("\t") || ln.startsWith("    ");
1109 }
1110 
1111 private string unindentLine(string ln)
1112 pure @safe {
1113 	if (ln.startsWith("\t")) return ln[1 .. $];
1114 	if (ln.startsWith("    ")) return ln[4 .. $];
1115 	assert (false);
1116 }
1117 
1118 private int parseEmphasis(ref string str, ref string text)
1119 pure @safe {
1120 	string pstr = str;
1121 	if (pstr.length < 3) return false;
1122 
1123 	string ctag;
1124 	if (pstr.startsWith("***")) ctag = "***";
1125 	else if (pstr.startsWith("**")) ctag = "**";
1126 	else if (pstr.startsWith("*")) ctag = "*";
1127 	else if (pstr.startsWith("___")) ctag = "___";
1128 	else if (pstr.startsWith("__")) ctag = "__";
1129 	else if (pstr.startsWith("_")) ctag = "_";
1130 	else return false;
1131 
1132 	pstr = pstr[ctag.length .. $];
1133 
1134 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1135 	if (cidx < 1) return false;
1136 
1137 	text = pstr[0 .. cidx];
1138 
1139 	str = pstr[cidx+ctag.length .. $];
1140 	return cast(int)ctag.length;
1141 }
1142 
1143 private bool parseInlineCode(ref string str, ref string code)
1144 pure @safe {
1145 	string pstr = str;
1146 	if (pstr.length < 3) return false;
1147 	string ctag;
1148 	if (pstr.startsWith("``")) ctag = "``";
1149 	else if (pstr.startsWith("`")) ctag = "`";
1150 	else return false;
1151 	pstr = pstr[ctag.length .. $];
1152 
1153 	auto cidx = () @trusted { return pstr.indexOf(ctag); }();
1154 	if (cidx < 1) return false;
1155 
1156 	code = pstr[0 .. cidx];
1157 	str = pstr[cidx+ctag.length .. $];
1158 	return true;
1159 }
1160 
1161 private bool parseLink(ref string str, ref Link dst, scope const(LinkRef[string]) linkrefs, scope Attribute[]* attributes)
1162 pure @safe {
1163 	string pstr = str;
1164 	if (pstr.length < 3) return false;
1165 	// ignore img-link prefix
1166 	if (pstr[0] == '!') pstr = pstr[1 .. $];
1167 
1168 	// parse the text part [text]
1169 	if (pstr[0] != '[') return false;
1170 	auto cidx = pstr.matchBracket();
1171 	if (cidx < 1) return false;
1172 	string refid;
1173 	dst.text = pstr[1 .. cidx];
1174 	pstr = pstr[cidx+1 .. $];
1175 
1176 	// parse either (link '['"title"']') or '[' ']'[refid]
1177 	if (pstr.length < 2) return false;
1178 	if (pstr[0] == '(') {
1179 		cidx = pstr.matchBracket();
1180 		if (cidx < 1) return false;
1181 		auto inner = pstr[1 .. cidx];
1182 		immutable qidx = inner.indexOf('"');
1183 		import std.ascii : isWhite;
1184 		if (qidx > 1 && inner[qidx - 1].isWhite()) {
1185 			dst.url = inner[0 .. qidx].stripRight();
1186 			immutable len = inner[qidx .. $].lastIndexOf('"');
1187 			if (len == 0) return false;
1188 			assert (len > 0);
1189 			dst.title = inner[qidx + 1 .. qidx + len];
1190 		} else {
1191 			dst.url = inner.stripRight();
1192 			dst.title = null;
1193 		}
1194 		if (dst.url.startsWith("<") && dst.url.endsWith(">"))
1195 			dst.url = dst.url[1 .. $-1];
1196 		pstr = pstr[cidx+1 .. $];
1197 
1198 		if (attributes) {
1199 			if (pstr.startsWith('{')) {
1200 				auto idx = pstr.indexOf('}');
1201 				if (idx > 0) {
1202 					parseAttributeString(pstr[1 .. idx], *attributes);
1203 					pstr = pstr[idx+1 .. $];
1204 				}
1205 			}
1206 		}
1207 	} else {
1208 		if (pstr[0] == ' ') pstr = pstr[1 .. $];
1209 		if (pstr[0] != '[') return false;
1210 		pstr = pstr[1 .. $];
1211 		cidx = pstr.indexOf(']');
1212 		if (cidx < 0) return false;
1213 		if (cidx == 0) refid = dst.text;
1214 		else refid = pstr[0 .. cidx];
1215 		pstr = pstr[cidx+1 .. $];
1216 	}
1217 
1218 	if (refid.length > 0) {
1219 		auto pr = toLower(refid) in linkrefs;
1220 		if (!pr) {
1221 			debug if (!__ctfe) logDebug("[LINK REF NOT FOUND: '%s'", refid);
1222 			return false;
1223 		}
1224 		dst.url = pr.url;
1225 		dst.title = pr.title;
1226 		if (attributes) *attributes ~= pr.attributes;
1227 	}
1228 
1229 	str = pstr;
1230 	return true;
1231 }
1232 
1233 @safe unittest
1234 {
1235 	static void testLink(string s, Link exp, in LinkRef[string] refs)
1236 	{
1237 		Link link;
1238 		assert (parseLink(s, link, refs, null), s);
1239 		assert (link == exp);
1240 	}
1241 	LinkRef[string] refs;
1242 	refs["ref"] = LinkRef("ref", "target", "title");
1243 
1244 	testLink(`[link](target)`, Link("link", "target"), null);
1245 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1246 	testLink(`[link](target  "title")`, Link("link", "target", "title"), null);
1247 	testLink(`[link](target "title"  )`, Link("link", "target", "title"), null);
1248 
1249 	testLink(`[link](target)`, Link("link", "target"), null);
1250 	testLink(`[link](target "title")`, Link("link", "target", "title"), null);
1251 
1252 	testLink(`[link][ref]`, Link("link", "target", "title"), refs);
1253 	testLink(`[ref][]`, Link("ref", "target", "title"), refs);
1254 
1255 	testLink(`[link[with brackets]](target)`, Link("link[with brackets]", "target"), null);
1256 	testLink(`[link[with brackets]][ref]`, Link("link[with brackets]", "target", "title"), refs);
1257 
1258 	testLink(`[link](/target with spaces )`, Link("link", "/target with spaces"), null);
1259 	testLink(`[link](/target with spaces "title")`, Link("link", "/target with spaces", "title"), null);
1260 
1261 	testLink(`[link](white-space  "around title" )`, Link("link", "white-space", "around title"), null);
1262 	testLink(`[link](tabs	"around title"	)`, Link("link", "tabs", "around title"), null);
1263 
1264 	testLink(`[link](target "")`, Link("link", "target", ""), null);
1265 	testLink(`[link](target-no-title"foo" )`, Link("link", "target-no-title\"foo\"", ""), null);
1266 
1267 	testLink(`[link](<target>)`, Link("link", "target"), null);
1268 
1269 	auto failing = [
1270 		`text`, `[link](target`, `[link]target)`, `[link]`,
1271 		`[link(target)`, `link](target)`, `[link] (target)`,
1272 		`[link][noref]`, `[noref][]`
1273 	];
1274 	Link link;
1275 	foreach (s; failing)
1276 		assert (!parseLink(s, link, refs, null), s);
1277 }
1278 
1279 @safe unittest { // attributes
1280 	void test(string s, LinkRef[string] refs, bool parse_atts, string exprem, Link explnk, Attribute[] expatts...)
1281 	@safe {
1282 		Link lnk;
1283 		Attribute[] atts;
1284 		parseLink(s, lnk, refs, parse_atts ? () @trusted { return &atts; } () : null);
1285 		assert (lnk == explnk);
1286 		assert (s == exprem);
1287 		assert (atts == expatts);
1288 	}
1289 
1290 	test("[foo](bar){.baz}", null, false, "{.baz}", Link("foo", "bar", ""));
1291 	test("[foo](bar){.baz}", null, true, "", Link("foo", "bar", ""), Attribute("class", "baz"));
1292 
1293 	auto refs = ["bar": LinkRef("bar", "url", "title", [Attribute("id", "hid")])];
1294 	test("[foo][bar]", refs, false, "", Link("foo", "url", "title"));
1295 	test("[foo][bar]", refs, true, "", Link("foo", "url", "title"), Attribute("id", "hid"));
1296 }
1297 
1298 private bool parseAutoLink(ref string str, ref string url)
1299 pure @safe {
1300 	import std.algorithm.searching : all;
1301 	import std.ascii : isAlphaNum;
1302 
1303 	string pstr = str;
1304 	if (pstr.length < 3) return false;
1305 	if (pstr[0] != '<') return false;
1306 	pstr = pstr[1 .. $];
1307 	auto cidx = pstr.indexOf('>');
1308 	if (cidx < 0) return false;
1309 
1310 	url = pstr[0 .. cidx];
1311 	if (url.anyOf(" \t")) return false;
1312 	auto atidx = url.indexOf('@');
1313 	auto colonidx = url.indexOf(':');
1314 	if (atidx < 0 && colonidx < 0) return false;
1315 
1316 	str = pstr[cidx+1 .. $];
1317 	if (atidx < 0) return true;
1318 	if (colonidx < 0 || colonidx > atidx ||
1319 		!url[0 .. colonidx].all!(ch => ch.isAlphaNum))
1320 			url = "mailto:" ~ url;
1321 	return true;
1322 }
1323 
1324 unittest {
1325 	void test(bool expected, string str, string url)
1326 	{
1327 		string strcpy = str;
1328 		string outurl;
1329 		if (!expected) {
1330 			assert (!parseAutoLink(strcpy, outurl));
1331 			assert (outurl.length == 0);
1332 			assert (strcpy == str);
1333 		} else {
1334 			assert (parseAutoLink(strcpy, outurl));
1335 			assert (outurl == url);
1336 			assert (strcpy.length == 0);
1337 		}
1338 	}
1339 
1340 	test(true, "<http://foo/>", "http://foo/");
1341 	test(false, "<http://foo/", null);
1342 	test(true, "<mailto:foo@bar>", "mailto:foo@bar");
1343 	test(true, "<foo@bar>", "mailto:foo@bar");
1344 	test(true, "<proto:foo@bar>", "proto:foo@bar");
1345 	test(true, "<proto:foo@bar:123>", "proto:foo@bar:123");
1346 	test(true, "<\"foo:bar\"@baz>", "mailto:\"foo:bar\"@baz");
1347 }
1348 
1349 private string skipAttributes(ref string line)
1350 @safe pure {
1351 	auto strs = line.stripRight;
1352 	if (!strs.endsWith("}")) return null;
1353 
1354 	auto idx = strs.lastIndexOf('{');
1355 	if (idx < 0) return null;
1356 
1357 	auto ret = strs[idx+1 .. $-1];
1358 	line = strs[0 .. idx];
1359 	return ret;
1360 }
1361 
1362 unittest {
1363 	void test(string inp, string outp, string att)
1364 	{
1365 		auto ratt = skipAttributes(inp);
1366 		assert (ratt == att, ratt);
1367 		assert (inp == outp, inp);
1368 	}
1369 
1370 	test(" foo ", " foo ", null);
1371 	test("foo {bar}", "foo ", "bar");
1372 	test("foo {bar}  ", "foo ", "bar");
1373 	test("foo bar} ", "foo bar} ", null);
1374 	test(" {bar} foo ", " {bar} foo ", null);
1375 	test(" fo {o {bar} ", " fo {o ", "bar");
1376 	test(" fo {o} {bar} ", " fo {o} ", "bar");
1377 }
1378 
1379 private void parseAttributeString(string attributes, ref Attribute[] dst)
1380 @safe pure {
1381 	import std.algorithm.iteration : splitter;
1382 
1383 	// TODO: handle custom attributes (requires a different approach than splitter)
1384 
1385 	foreach (el; attributes.splitter(' ')) {
1386 		el = el.strip;
1387 		if (!el.length) continue;
1388 		if (el[0] == '#') {
1389 			auto idx = dst.countUntil!(a => a.attribute == "id");
1390 			if (idx >= 0) dst[idx].value = el[1 .. $];
1391 			else dst ~= Attribute("id", el[1 .. $]);
1392 		} else if (el[0] == '.') {
1393 			auto idx = dst.countUntil!(a => a.attribute == "class");
1394 			if (idx >= 0) dst[idx].value ~= " " ~ el[1 .. $];
1395 			else dst ~= Attribute("class", el[1 .. $]);
1396 		}
1397 	}
1398 }
1399 
1400 unittest {
1401 	void test(string str, Attribute[] atts...)
1402 	{
1403 		Attribute[] res;
1404 		parseAttributeString(str, res);
1405 		assert (res == atts, format("%s: %s", str, res));
1406 	}
1407 
1408 	test("");
1409 	test(".foo", Attribute("class", "foo"));
1410 	test("#foo", Attribute("id", "foo"));
1411 	test("#foo #bar", Attribute("id", "bar"));
1412 	test(".foo .bar", Attribute("class", "foo bar"));
1413 	test("#foo #bar", Attribute("id", "bar"));
1414 	test(".foo #bar .baz", Attribute("class", "foo baz"), Attribute("id", "bar"));
1415 }
1416 
1417 private LinkRef[string] scanForReferences(ref string[] lines)
1418 pure @safe {
1419 	LinkRef[string] ret;
1420 	bool[size_t] reflines;
1421 
1422 	// search for reference definitions:
1423 	//   [refid] link "opt text"
1424 	//   [refid] <link> "opt text"
1425 	//   "opt text", 'opt text', (opt text)
1426 	//   line must not be indented
1427 	foreach (lnidx, ln; lines) {
1428 		if (isLineIndented(ln)) continue;
1429 		ln = strip(ln);
1430 		if (!ln.startsWith("[")) continue;
1431 		ln = ln[1 .. $];
1432 
1433 		auto idx = () @trusted { return ln.indexOf("]:"); }();
1434 		if (idx < 0) continue;
1435 		string refid = ln[0 .. idx];
1436 		ln = stripLeft(ln[idx+2 .. $]);
1437 
1438 		string attstr = ln.skipAttributes();
1439 
1440 		string url;
1441 		if (ln.startsWith("<")) {
1442 			idx = ln.indexOf('>');
1443 			if (idx < 0) continue;
1444 			url = ln[1 .. idx];
1445 			ln = ln[idx+1 .. $];
1446 		} else {
1447 			idx = ln.indexOf(' ');
1448 			if (idx > 0) {
1449 				url = ln[0 .. idx];
1450 				ln = ln[idx+1 .. $];
1451 			} else {
1452 				idx = ln.indexOf('\t');
1453 				if (idx < 0) {
1454 					url = ln;
1455 					ln = ln[$ .. $];
1456 				} else {
1457 					url = ln[0 .. idx];
1458 					ln = ln[idx+1 .. $];
1459 				}
1460 			}
1461 		}
1462 		ln = stripLeft(ln);
1463 
1464 		string title;
1465 		if (ln.length >= 3) {
1466 			if (ln[0] == '(' && ln[$-1] == ')'
1467 				|| ln[0] == '\"' && ln[$-1] == '\"'
1468 				|| ln[0] == '\'' && ln[$-1] == '\'' )
1469 			{
1470 				title = ln[1 .. $-1];
1471 			}
1472 		}
1473 
1474 		LinkRef lref;
1475 		lref.id = refid;
1476 		lref.url = url;
1477 		lref.title = title;
1478 		parseAttributeString(attstr, lref.attributes);
1479 		ret[toLower(refid)] = lref;
1480 		reflines[lnidx] = true;
1481 
1482 		debug if (!__ctfe) logTrace("[detected ref on line %d]", lnidx+1);
1483 	}
1484 
1485 	// remove all lines containing references
1486 	auto nonreflines = appender!(string[])();
1487 	nonreflines.reserve(lines.length);
1488 	foreach (i, ln; lines)
1489 		if (i !in reflines)
1490 			nonreflines.put(ln);
1491 	lines = nonreflines.data();
1492 
1493 	return ret;
1494 }
1495 
1496 
1497 /**
1498 	Generates an identifier suitable to use as within a URL.
1499 
1500 	The resulting string will contain only ASCII lower case alphabetic or
1501 	numeric characters, as well as dashes (-). Every sequence of
1502 	non-alphanumeric characters will be replaced by a single dash. No dashes
1503 	will be at either the front or the back of the result string.
1504 */
1505 auto asSlug(R)(R text)
1506 	if (isInputRange!R && is(typeof(R.init.front) == dchar))
1507 {
1508 	static struct SlugRange {
1509 		private {
1510 			R _input;
1511 			bool _dash;
1512 		}
1513 
1514 		this(R input)
1515 		{
1516 			_input = input;
1517 			skipNonAlphaNum();
1518 		}
1519 
1520 		@property bool empty() const { return _dash ? false : _input.empty; }
1521 		@property char front() const {
1522 			if (_dash) return '-';
1523 
1524 			char r = cast(char)_input.front;
1525 			if (r >= 'A' && r <= 'Z') return cast(char)(r + ('a' - 'A'));
1526 			return r;
1527 		}
1528 
1529 		void popFront()
1530 		{
1531 			if (_dash) {
1532 				_dash = false;
1533 				return;
1534 			}
1535 
1536 			_input.popFront();
1537 			auto na = skipNonAlphaNum();
1538 			if (na && !_input.empty)
1539 				_dash = true;
1540 		}
1541 
1542 		private bool skipNonAlphaNum()
1543 		{
1544 			bool have_skipped = false;
1545 			while (!_input.empty) {
1546 				switch (_input.front) {
1547 					default:
1548 						_input.popFront();
1549 						have_skipped = true;
1550 						break;
1551 					case 'a': .. case 'z':
1552 					case 'A': .. case 'Z':
1553 					case '0': .. case '9':
1554 						return have_skipped;
1555 				}
1556 			}
1557 			return have_skipped;
1558 		}
1559 	}
1560 	return SlugRange(text);
1561 }
1562 
1563 unittest {
1564 	import std.algorithm : equal;
1565 	assert ("".asSlug.equal(""));
1566 	assert (".,-".asSlug.equal(""));
1567 	assert ("abc".asSlug.equal("abc"));
1568 	assert ("aBc123".asSlug.equal("abc123"));
1569 	assert ("....aBc...123...".asSlug.equal("abc-123"));
1570 }
1571 
1572 private struct LinkRef {
1573 	string id;
1574 	string url;
1575 	string title;
1576 	Attribute[] attributes;
1577 }
1578 
1579 private struct Link {
1580 	string text;
1581 	string url;
1582 	string title;
1583 }
1584 
1585 @safe unittest { // alt and title attributes
1586 	assert (filterMarkdown("![alt](http://example.org/image)")
1587 		== "<p><img src=\"http://example.org/image\" alt=\"alt\">\n</p>\n");
1588 	assert (filterMarkdown("![alt](http://example.org/image \"Title\")")
1589 		== "<p><img src=\"http://example.org/image\" alt=\"alt\" title=\"Title\">\n</p>\n");
1590 }
1591 
1592 @safe unittest { // complex links
1593 	assert (filterMarkdown("their [install\ninstructions](<http://www.brew.sh>) and")
1594 		== "<p>their <a href=\"http://www.brew.sh\">install\ninstructions</a> and\n</p>\n");
1595 	assert (filterMarkdown("[![Build Status](https://travis-ci.org/rejectedsoftware/vibe.d.png)](https://travis-ci.org/rejectedsoftware/vibe.d)")
1596 		== "<p><a href=\"https://travis-ci.org/rejectedsoftware/vibe.d\"><img src=\"https://travis-ci.org/rejectedsoftware/vibe.d.png\" alt=\"Build Status\"></a>\n</p>\n");
1597 }
1598 
1599 @safe unittest { // check CTFE-ability
1600 	enum res = filterMarkdown("### some markdown\n[foo][]\n[foo]: /bar");
1601 	assert (res == "<h3 id=\"some-markdown\"> some markdown</h3>\n<p><a href=\"/bar\">foo</a>\n</p>\n", res);
1602 }
1603 
1604 @safe unittest { // correct line breaks in restrictive mode
1605 	auto res = filterMarkdown("hello\nworld", MarkdownFlags.forumDefault);
1606 	assert (res == "<p>hello<br/>world\n</p>\n", res);
1607 }
1608 
1609 /*@safe unittest { // code blocks and blockquotes
1610 	assert (filterMarkdown("\tthis\n\tis\n\tcode") ==
1611 		"<pre><code>this\nis\ncode</code></pre>\n");
1612 	assert (filterMarkdown("    this\n    is\n    code") ==
1613 		"<pre><code>this\nis\ncode</code></pre>\n");
1614 	assert (filterMarkdown("    this\n    is\n\tcode") ==
1615 		"<pre><code>this\nis</code></pre>\n<pre><code>code</code></pre>\n");
1616 	assert (filterMarkdown("\tthis\n\n\tcode") ==
1617 		"<pre><code>this\n\ncode</code></pre>\n");
1618 	assert (filterMarkdown("\t> this") ==
1619 		"<pre><code>&gt; this</code></pre>\n");
1620 	assert (filterMarkdown(">     this") ==
1621 		"<blockquote><pre><code>this</code></pre></blockquote>\n");
1622 	assert (filterMarkdown(">     this\n    is code") ==
1623 		"<blockquote><pre><code>this\nis code</code></pre></blockquote>\n");
1624 }*/
1625 
1626 @safe unittest {
1627 	assert (filterMarkdown("## Hello, World!") == "<h2 id=\"hello-world\"> Hello, World!</h2>\n", filterMarkdown("## Hello, World!"));
1628 }
1629 
1630 @safe unittest { // tables
1631 	assert (filterMarkdown("foo|bar\n---|---", MarkdownFlags.tables)
1632 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n</table>\n");
1633 	assert (filterMarkdown(" *foo* | bar \n---|---\n baz|bam", MarkdownFlags.tables)
1634 		== "<table>\n<tr><th><em>foo</em></th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1635 	assert (filterMarkdown("|foo|bar|\n---|---\n baz|bam", MarkdownFlags.tables)
1636 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1637 	assert (filterMarkdown("foo|bar\n|---|---|\nbaz|bam", MarkdownFlags.tables)
1638 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1639 	assert (filterMarkdown("foo|bar\n---|---\n|baz|bam|", MarkdownFlags.tables)
1640 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td><td>bam</td></tr>\n</table>\n");
1641 	assert (filterMarkdown("foo|bar|baz\n:---|---:|:---:\n|baz|bam|bap|", MarkdownFlags.tables)
1642 		== "<table>\n<tr><th align=\"left\">foo</th><th align=\"right\">bar</th><th align=\"center\">baz</th></tr>\n"
1643 		~ "<tr><td align=\"left\">baz</td><td align=\"right\">bam</td><td align=\"center\">bap</td></tr>\n</table>\n");
1644 	assert (filterMarkdown(" |bar\n---|---", MarkdownFlags.tables)
1645 		== "<table>\n<tr><th></th><th>bar</th></tr>\n</table>\n");
1646 	assert (filterMarkdown("foo|bar\n---|---\nbaz|", MarkdownFlags.tables)
1647 		== "<table>\n<tr><th>foo</th><th>bar</th></tr>\n<tr><td>baz</td></tr>\n</table>\n");
1648 }
1649 
1650 @safe unittest { // issue #1527 - blank lines in code blocks
1651 	assert (filterMarkdown("    foo\n\n    bar\n") ==
1652 		"<pre class=\"prettyprint\"><code>foo\n\nbar\n</code></pre>\n");
1653 }
1654 
1655 @safe unittest {
1656 	assert (filterMarkdown("> ```\r\n> test\r\n> ```", MarkdownFlags.forumDefault) ==
1657 		"<blockquote><pre class=\"prettyprint\"><code>test\n</code></pre>\n</blockquote>\n");
1658 }
1659 
1660 @safe unittest { // issue #1845 - malicious URI targets
1661 	assert (filterMarkdown("[foo](javascript:foo) ![bar](javascript:bar) <javascript:baz>", MarkdownFlags.forumDefault) ==
1662 		"<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"bar\"> <a href=\"#\">javascript:baz</a>\n</p>\n");
1663 	assert (filterMarkdown("[foo][foo] ![foo][foo]\n[foo]: javascript:foo", MarkdownFlags.forumDefault) ==
1664 		"<p><a href=\"#\">foo</a> <img src=\"#\" alt=\"foo\">\n</p>\n");
1665 	assert (filterMarkdown("[foo](javascript%3Abar)", MarkdownFlags.forumDefault) ==
1666 		"<p><a href=\"javascript%3Abar\">foo</a>\n</p>\n");
1667 
1668 	// extra XSS regression tests
1669 	assert (filterMarkdown("[<script></script>](bar)", MarkdownFlags.forumDefault) ==
1670 		"<p><a href=\"bar\">&lt;script&gt;&lt;/script&gt;</a>\n</p>\n");
1671 	assert (filterMarkdown("[foo](\"><script></script><span foo=\")", MarkdownFlags.forumDefault) ==
1672 		"<p><a href=\"&quot;&gt;&lt;script&gt;&lt;/script&gt;&lt;span foo=&quot;\">foo</a>\n</p>\n");
1673 	assert (filterMarkdown("[foo](javascript&#58;bar)", MarkdownFlags.forumDefault) ==
1674 		"<p><a href=\"javascript&amp;#58;bar\">foo</a>\n</p>\n");
1675 }
1676 
1677 @safe unittest { // issue #2132 - table with more columns in body goes out of array bounds
1678 	assert (filterMarkdown("| a | b |\n|--------|--------|\n|   c    | d  | e |", MarkdownFlags.tables) ==
1679 		"<table>\n<tr><th>a</th><th>b</th></tr>\n<tr><td>c</td><td>d</td><td>e</td></tr>\n</table>\n");
1680 }
1681 
1682 @safe unittest { // lists
1683 	assert (filterMarkdown("- foo\n- bar") ==
1684 		"<ul>\n<li>foo\n</li>\n<li>bar\n</li>\n</ul>\n");
1685 	assert (filterMarkdown("- foo\n\n- bar") ==
1686 		"<ul>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ul>\n");
1687 	assert (filterMarkdown("1. foo\n2. bar") ==
1688 		"<ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n");
1689 	assert (filterMarkdown("1. foo\n\n2. bar") ==
1690 		"<ol>\n<li><p>foo\n</p>\n</li>\n<li><p>bar\n</p>\n</li>\n</ol>\n");
1691 	assert (filterMarkdown("1. foo\n\n\tbar\n\n2. bar\n\n\tbaz\n\n") ==
1692 		"<ol>\n<li><p>foo\n</p>\n<p>bar\n</p>\n</li>\n<li><p>bar\n</p>\n<p>baz\n</p>\n</li>\n</ol>\n");
1693 }
1694 
1695 @safe unittest { // figures
1696 	assert (filterMarkdown("- %%%") == "<ul>\n<li>%%%\n</li>\n</ul>\n");
1697 	assert (filterMarkdown("- ###") == "<ul>\n<li>###\n</li>\n</ul>\n");
1698 	assert (filterMarkdown("- %%%", MarkdownFlags.figures) == "<figure></figure>\n");
1699 	assert (filterMarkdown("- ###", MarkdownFlags.figures) == "<figcaption></figcaption>\n");
1700 	assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar", MarkdownFlags.figures) ==
1701 		"<figure>foo\n<figcaption>bar\n</figcaption>\n</figure>\n");
1702 	assert (filterMarkdown("- %%%\n\tfoo\n\n\tbar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) ==
1703 		"<figure><p>foo\n</p>\n<p>bar\n</p>\n<figcaption>baz\n</figcaption>\n</figure>\n");
1704 	assert (filterMarkdown("- %%%\n\tfoo\n\n\t- ###\n\t\tbar\n\n\t\tbaz", MarkdownFlags.figures) ==
1705 		"<figure>foo\n<figcaption><p>bar\n</p>\n<p>baz\n</p>\n</figcaption>\n</figure>\n");
1706 	assert (filterMarkdown("- %%%\n\t1. foo\n\t2. bar\n\n\t- ###\n\t\tbaz", MarkdownFlags.figures) ==
1707 		"<figure><ol>\n<li>foo\n</li>\n<li>bar\n</li>\n</ol>\n<figcaption>baz\n</figcaption>\n</figure>\n");
1708 	assert (filterMarkdown("- foo\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n");
1709 	assert (filterMarkdown("- foo\n\n- %%%", MarkdownFlags.figures) == "<ul>\n<li>foo\n</li>\n</ul>\n<figure></figure>\n");
1710 }
1711 
1712 @safe unittest { // HTML entities
1713 	assert(filterMarkdown("&nbsp;") == "<p>&nbsp;\n</p>\n");
1714 	assert(filterMarkdown("*&nbsp;*") == "<p><em>&nbsp;</em>\n</p>\n");
1715 	assert(filterMarkdown("`&nbsp;`") == "<p><code class=\"prettyprint\">&amp;nbsp;</code>\n</p>\n");
1716 }