1 /**
2 	HTML character entity escaping.
3 
4 	TODO: Make things @safe once Appender is.
5 
6 	Copyright: © 2012-2014 Sönke Ludwig
7 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
8 	Authors: Sönke Ludwig
9 */
10 module vibe.textfilter.html;
11 
12 import std.array;
13 import std.conv;
14 import std.range;
15 
16 
17 /** Returns the HTML escaped version of a given string.
18 */
19 string htmlEscape(R)(R str) @trusted
20 	if (isInputRange!R)
21 {
22 	if (__ctfe) { // appender is a performance/memory hog in ctfe
23 		StringAppender dst;
24 		filterHTMLEscape(dst, str);
25 		return dst.data;
26 	} else {
27 		auto dst = appender!string();
28 		filterHTMLEscape(dst, str);
29 		return dst.data;
30 	}
31 }
32 
33 ///
34 unittest {
35 	assert(htmlEscape(`"Hello", <World>!`) == `"Hello", &lt;World&gt;!`);
36 }
37 
38 
39 /** Writes the HTML escaped version of a given string to an output range.
40 */
41 void filterHTMLEscape(R, S)(ref R dst, S str, HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline)
42 	if (isOutputRange!(R, dchar) && isInputRange!S)
43 {
44 	for (;!str.empty;str.popFront())
45 		filterHTMLEscape(dst, str.front, flags);
46 }
47 
48 
49 /** Returns the HTML escaped version of a given string (also escapes double quotes).
50 */
51 string htmlAttribEscape(R)(R str) @trusted
52 	if (isInputRange!R)
53 {
54 	if (__ctfe) { // appender is a performance/memory hog in ctfe
55 		StringAppender dst;
56 		filterHTMLAttribEscape(dst, str);
57 		return dst.data;
58 	} else {
59 		auto dst = appender!string();
60 		filterHTMLAttribEscape(dst, str);
61 		return dst.data;
62 	}
63 }
64 
65 ///
66 unittest {
67 	assert(htmlAttribEscape(`"Hello", <World>!`) == `&quot;Hello&quot;, &lt;World&gt;!`);
68 }
69 
70 
71 /** Writes the HTML escaped version of a given string to an output range (also escapes double quotes).
72 */
73 void filterHTMLAttribEscape(R, S)(ref R dst, S str)
74 	if (isOutputRange!(R, dchar) && isInputRange!S)
75 {
76 	for (; !str.empty; str.popFront())
77 		filterHTMLEscape(dst, str.front, HTMLEscapeFlags.escapeNewline|HTMLEscapeFlags.escapeQuotes);
78 }
79 
80 
81 /** Returns the HTML escaped version of a given string (escapes every character).
82 */
83 string htmlAllEscape(R)(R str) @trusted
84 	if (isInputRange!R)
85 {
86 	if (__ctfe) { // appender is a performance/memory hog in ctfe
87 		StringAppender dst;
88 		filterHTMLAllEscape(dst, str);
89 		return dst.data;
90 	} else {
91 		auto dst = appender!string();
92 		filterHTMLAllEscape(dst, str);
93 		return dst.data;
94 	}
95 }
96 
97 ///
98 unittest {
99 	assert(htmlAllEscape("Hello!") == "&#72;&#101;&#108;&#108;&#111;&#33;");
100 }
101 
102 
103 /** Writes the HTML escaped version of a given string to an output range (escapes every character).
104 */
105 void filterHTMLAllEscape(R, S)(ref R dst, S str)
106 	if (isOutputRange!(R, dchar) && isInputRange!S)
107 {
108 	for (; !str.empty; str.popFront()) {
109 		put(dst, "&#");
110 		put(dst, to!string(cast(uint)str.front));
111 		put(dst, ';');
112 	}
113 }
114 
115 
116 /**
117 	Minimally escapes a text so that no HTML tags appear in it.
118 */
119 string htmlEscapeMin(R)(R str) @trusted
120 	if (isInputRange!R)
121 {
122 	auto dst = appender!string();
123 	for (; !str.empty; str.popFront())
124 		filterHTMLEscape(dst, str.front, HTMLEscapeFlags.escapeMinimal);
125 	return dst.data();
126 }
127 
128 
129 /**
130 	Writes the HTML escaped version of a character to an output range.
131 */
132 void filterHTMLEscape(R)(ref R dst, dchar ch, HTMLEscapeFlags flags = HTMLEscapeFlags.escapeNewline )
133 {
134 	switch (ch) {
135 		default:
136 			if (flags & HTMLEscapeFlags.escapeUnknown) {
137 				put(dst, "&#");
138 				put(dst, to!string(cast(uint)ch));
139 				put(dst, ';');
140 			} else put(dst, ch);
141 			break;
142 		case '"':
143 			if (flags & HTMLEscapeFlags.escapeQuotes) put(dst, "&quot;");
144 			else put(dst, '"');
145 			break;
146 		case '\'':
147 			if (flags & HTMLEscapeFlags.escapeQuotes) put(dst, "&#39;");
148 			else put(dst, '\'');
149 			break;
150 		case '\r', '\n':
151 			if (flags & HTMLEscapeFlags.escapeNewline) {
152 				put(dst, "&#");
153 				put(dst, to!string(cast(uint)ch));
154 				put(dst, ';');
155 			} else put(dst, ch);
156 			break;
157 		case 'a': .. case 'z': goto case;
158 		case 'A': .. case 'Z': goto case;
159 		case '0': .. case '9': goto case;
160 		case ' ', '\t', '-', '_', '.', ':', ',', ';',
161 			 '#', '+', '*', '?', '=', '(', ')', '/', '!',
162 			 '%' , '{', '}', '[', ']', '`', '´', '$', '^', '~':
163 			put(dst, cast(char)ch);
164 			break;
165 		case '<': put(dst, "&lt;"); break;
166 		case '>': put(dst, "&gt;"); break;
167 		case '&': put(dst, "&amp;"); break;
168 	}
169 }
170 
171 
172 enum HTMLEscapeFlags {
173 	escapeMinimal = 0,
174 	escapeQuotes = 1<<0,
175 	escapeNewline = 1<<1,
176 	escapeUnknown = 1<<2
177 }
178 
179 private struct StringAppender {
180 @safe:
181 
182 	string data;
183 	void put(string s) { data ~= s; }
184 	void put(char ch) { data ~= ch; }
185 	void put(dchar ch) {
186 		import std.utf;
187 		char[4] dst;
188 		data ~= dst[0 .. encode(dst, ch)];
189 	}
190 }