1 /**
2 	URL-encoding implementation
3 
4 	Copyright: © 2012-2015 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Jan Krüger, Sönke Ludwig
7 */
8 module vibe.textfilter.urlencode;
9 
10 import vibe.utils.string;
11 
12 import std.algorithm;
13 import std.array;
14 import std.conv;
15 import std.exception;
16 import std.format;
17 
18 
19 /**
20  * Returns:
21  *   the URL encoded version of a given string, in a newly-allocated string.
22  */
23 T[] urlEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[]))
24 {
25 	auto dst = StringSliceAppender!(T[])(str);
26 	filterURLEncode(dst, str, allowed_chars);
27 	return dst.data;
28 }
29 
30 @safe unittest {
31 	string s = "hello-world";
32 	assert(s.urlEncode().ptr == s.ptr);
33 }
34 
35 private auto isCorrectHexNum(const(char)[] str)
36 @safe {
37 	foreach (char c; str) {
38 		switch(c) {
39 			case '0': .. case '9':
40 			case 'A': .. case 'F':
41 			case 'a': .. case 'f':
42 				break;
43 			default:
44 				return false;
45 		}
46 	}
47 	return true;
48 }
49 
50 /** Checks whether a given string has valid URL encoding.
51 */
52 bool isURLEncoded(const(char)[] str, const(char)[] reserved_chars = null)
53 @safe {
54 	for (size_t i = 0; i < str.length; i++) {
55 		switch (str[i]) {
56 			case '-':
57 			case '.':
58 			case '0': .. case '9':
59 			case 'A': .. case 'Z':
60 			case '_':
61 			case 'a': .. case 'z':
62 			case '~':
63 				break;
64 			case '%':
65 				if (i + 2 >= str.length)
66 					return false;
67 				if (!isCorrectHexNum(str[i+1 .. i+3]))
68 					return false;
69 				i += 2;
70 				break;
71 			default:
72 				if (reserved_chars.canFind(str[i]))
73 					return false;
74 				break;
75 		}
76 	}
77 	return true;
78 }
79 
80 @safe unittest {
81 	assert(isURLEncoded("hello-world"));
82 	assert(isURLEncoded("he%2F%af"));
83 	assert(!isURLEncoded("hello world", " "));
84 	assert(!isURLEncoded("he%f"));
85 	assert(!isURLEncoded("he%fx"));
86 }
87 
88 /** Returns the decoded version of a given URL encoded string.
89 */
90 T[] urlDecode(T)(T[] str) if (is(T[] : const(char)[]))
91 {
92 	if (!str.anyOf("%")) return str;
93 	auto dst = StringSliceAppender!(T[])(str);
94 	filterURLDecode(dst, str);
95 	return dst.data;
96 }
97 
98 /** Returns the form encoded version of a given string.
99 
100 	Form encoding is the same as normal URL encoding, except that
101 	spaces are replaced by plus characters.
102 
103 	Note that newlines should always be represented as \r\n sequences
104 	according to the HTTP standard.
105 */
106 T[] formEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[]))
107 {
108 	auto dst = StringSliceAppender!(T[])(str);
109 	filterURLEncode(dst, str, allowed_chars, true);
110 	return dst.data;
111 }
112 
113 /** Returns the decoded version of a form encoded string.
114 
115 	Form encoding is the same as normal URL encoding, except that
116 	spaces are replaced by plus characters.
117 */
118 T[] formDecode(T)(T[] str) if (is(T[] : const(char)[]))
119 {
120 	if (!str.anyOf("%+")) return str;
121 	auto dst = StringSliceAppender!(T[])(str);
122 	filterURLDecode(dst, str, true);
123 	return dst.data;
124 }
125 
126 /** Writes the URL encoded version of the given string to an output range.
127 */
128 void filterURLEncode(R)(ref R dst, const(char)[] str,
129                         const(char)[] allowed_chars = null,
130                         bool form_encoding = false)
131 {
132 	while (str.length > 0) {
133 		switch (str[0]) {
134 			default:
135 				if (allowed_chars.canFind(str[0])) dst.put(str[0]);
136 				else {
137 					static if (is(typeof({ R a, b; b = a; })))
138 						formattedWrite(dst, "%%%02X", str[0]);
139 					else
140 						formattedWrite(() @trusted { return &dst; } (), "%%%02X", str[0]);
141 				}
142 				break;
143 			case ' ':
144 				if (form_encoding) {
145 					dst.put('+');
146 					break;
147 				}
148 				goto default;
149 			case 'A': .. case 'Z':
150 			case 'a': .. case 'z':
151 			case '0': .. case '9':
152 			case '-': case '_': case '.': case '~':
153 				dst.put(str[0]);
154 				break;
155 		}
156 		str = str[1 .. $];
157 	}
158 }
159 
160 
161 /** Writes the decoded version of the given URL encoded string to an output range.
162 */
163 void filterURLDecode(R)(ref R dst, const(char)[] str, bool form_encoding = false)
164 {
165 	while( str.length > 0 ) {
166 		switch(str[0]) {
167 			case '%':
168 				enforce(str.length >= 3, "invalid percent encoding");
169 				auto hex = str[1..3];
170 				auto c = cast(char)parse!int(hex, 16);
171 				enforce(hex.length == 0, "invalid percent encoding");
172 				dst.put(c);
173 				str = str[3 .. $];
174 				break;
175 			case '+':
176 				if (form_encoding) {
177 					dst.put(' ');
178 					str = str[1 .. $];
179 					break;
180 				}
181 				goto default;
182 			default:
183 				dst.put(str[0]);
184 				str = str[1 .. $];
185 				break;
186 		}
187 	}
188 }
189 
190 
191 @safe unittest
192 {
193 	assert(urlEncode("\r\n") == "%0D%0A"); // github #65
194 	assert(urlEncode("This-is~a_test") == "This-is~a_test");
195 	assert(urlEncode("This is a test") == "This%20is%20a%20test");
196 	assert(urlEncode("This{is}test") == "This%7Bis%7Dtest");
197 	assert(formEncode("This is a test") == "This+is+a+test");
198 	assert(formEncode("this/test", "/") == "this/test");
199 	assert(formEncode("this/test") == "this%2Ftest");
200 	assert(urlEncode("%") == "%25");
201 	assert(urlEncode("!") == "%21");
202 	assert(urlDecode("%0D%0a") == "\r\n");
203 	assert(urlDecode("%c2%aE") == "®");
204 	assert(urlDecode("This+is%20a+test") == "This+is a+test");
205 	assert(formDecode("This+is%20a+test") == "This is a test");
206 
207 	string a = "This~is a-test!\r\nHello, Wörld.. ";
208 	string aenc = urlEncode(a);
209 	assert(aenc == "This~is%20a-test%21%0D%0AHello%2C%20W%C3%B6rld..%20");
210 	assert(urlDecode(urlEncode(a)) == a);
211 }
212 
213 
214 private struct StringSliceAppender(S) {
215 	private {
216 		Appender!S m_appender;
217 		S m_source;
218 		size_t m_prefixLength;
219 	}
220 
221 	this(S source)
222 	{
223 		m_source = source;
224 		if (m_source.length == 0)
225 			m_appender = appender!S();
226 	}
227 
228 	@disable this(this);
229 
230 	void put(char ch)
231 	{
232 		if (m_source.length) {
233 			if (m_prefixLength < m_source.length && m_source[m_prefixLength] == ch) {
234 				m_prefixLength++;
235 				return;
236 			}
237 
238 			m_appender = appender!S();
239 			m_appender.put(m_source[0 .. m_prefixLength]);
240 			m_appender.put(ch);
241 			m_source = S.init;
242 		} else m_appender.put(ch);
243 	}
244 
245 	void put(S s)
246 	{
247 		if (m_source.length) {
248 			foreach (char ch; s)
249 				put(ch);
250 		} else m_appender.put(s);
251 	}
252 
253 	void put(dchar ch)
254 	{
255 		import std.encoding : encode;
256 		char[6] chars;
257 		static if (__VERSION__ < 2072)
258 			auto n = () @trusted { return encode(ch, chars[]); } ();
259 		else
260 			auto n = encode(ch, chars[]);
261 		foreach (char c; chars[0 .. n]) put(c);
262 	}
263 
264 	@property S data()
265 	{
266 		return m_source.length ? m_source[0 .. m_prefixLength] : m_appender.data;
267 	}
268 }
269 
270 @safe unittest {
271 	string s = "foo";
272 	auto a = StringSliceAppender!string(s);
273 	a.put("f"); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
274 	a.put('o'); assert(a.data == "fo"); assert(a.data.ptr is s.ptr);
275 	a.put('o'); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
276 	a.put('ä'); assert(a.data == "fooä");
277 
278 	a = StringSliceAppender!string(s);
279 	a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
280 	a.put("oobar"); assert(a.data == "foobar");
281 
282 	a = StringSliceAppender!string(s);
283 	a.put(cast(dchar)'f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
284 	a.put('b'); assert(a.data == "fb");
285 
286 	a = StringSliceAppender!string(s);
287 	a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
288 	a.put("b"); assert(a.data == "fb");
289 
290 	a = StringSliceAppender!string(s);
291 	a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
292 	a.put("ä"); assert(a.data == "fä");
293 
294 	a = StringSliceAppender!string(s);
295 	a.put("bar"); assert(a.data == "bar");
296 
297 	a = StringSliceAppender!string(s);
298 	a.put('b'); assert(a.data == "b");
299 
300 	a = StringSliceAppender!string(s);
301 	a.put('ä'); assert(a.data == "ä");
302 
303 	a = StringSliceAppender!string(s);
304 	a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
305 	a.put("bar"); assert(a.data == "foobar");
306 
307 	a = StringSliceAppender!string(s);
308 	a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
309 	a.put('b'); assert(a.data == "foob");
310 }