1 /**
2 	URL-encoding implementation
3 
4 	Copyright: © 2012-2015 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Jan Krüger, Sönke Ludwig
7 */
8 module vibe.textfilter.urlencode;
9 
10 import vibe.utils.string;
11 
12 import std.algorithm;
13 import std.array;
14 import std.conv;
15 import std.exception;
16 import std.format;
17 import std.range;
18 
19 
20 /**
21  * Returns:
22  *   the URL encoded version of a given string, in a newly-allocated string.
23  */
24 T[] urlEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[]))
25 {
26 	auto dst = StringSliceAppender!(T[])(str);
27 	filterURLEncode(dst, str, allowed_chars);
28 	return dst.data;
29 }
30 
31 @safe unittest {
32 	string s = "hello-world";
33 	assert(s.urlEncode().ptr == s.ptr);
34 }
35 
36 private auto isCorrectHexNum(const(char)[] str)
37 @safe {
38 	foreach (char c; str) {
39 		switch(c) {
40 			case '0': .. case '9':
41 			case 'A': .. case 'F':
42 			case 'a': .. case 'f':
43 				break;
44 			default:
45 				return false;
46 		}
47 	}
48 	return true;
49 }
50 
51 /** Checks whether a given string has valid URL encoding.
52 */
53 bool isURLEncoded(const(char)[] str, const(char)[] reserved_chars = null)
54 @safe nothrow {
55 	import std.string : representation;
56 
57 	for (size_t i = 0; i < str.length; i++) {
58 		if (isAsciiAlphaNum(str[i]))
59 			continue;
60 
61 		switch (str[i]) {
62 			case '-':
63 			case '.':
64 			case '_':
65 			case '~':
66 				break;
67 			case '%':
68 				if (i + 2 >= str.length)
69 					return false;
70 				if (!isCorrectHexNum(str[i+1 .. i+3]))
71 					return false;
72 				i += 2;
73 				break;
74 			default:
75 				if (reserved_chars.representation.canFind(str[i]))
76 					return false;
77 				break;
78 		}
79 	}
80 	return true;
81 }
82 
83 @safe nothrow unittest {
84 	assert(isURLEncoded("hello-world"));
85 	assert(isURLEncoded("he%2F%af"));
86 	assert(!isURLEncoded("hello world", " "));
87 	assert(!isURLEncoded("he%f"));
88 	assert(!isURLEncoded("he%fx"));
89 }
90 
91 /** Returns the decoded version of a given URL encoded string.
92 */
93 T[] urlDecode(T)(T[] str) if (is(T[] : const(char)[]))
94 {
95 	if (!str.anyOf("%")) return str;
96 	auto dst = StringSliceAppender!(T[])(str);
97 	filterURLDecode(dst, str);
98 	return dst.data;
99 }
100 
101 /** Returns the form encoded version of a given string.
102 
103 	Form encoding is the same as normal URL encoding, except that
104 	spaces are replaced by plus characters.
105 
106 	Note that newlines should always be represented as \r\n sequences
107 	according to the HTTP standard.
108 */
109 T[] formEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[]))
110 {
111 	auto dst = StringSliceAppender!(T[])(str);
112 	filterURLEncode(dst, str, allowed_chars, true);
113 	return dst.data;
114 }
115 
116 /** Returns the decoded version of a form encoded string.
117 
118 	Form encoding is the same as normal URL encoding, except that
119 	spaces are replaced by plus characters.
120 */
121 T[] formDecode(T)(T[] str) if (is(T[] : const(char)[]))
122 {
123 	if (!str.anyOf("%+")) return str;
124 	auto dst = StringSliceAppender!(T[])(str);
125 	filterURLDecode(dst, str, true);
126 	return dst.data;
127 }
128 
129 /** Writes the URL encoded version of the given string to an output range.
130 */
131 void filterURLEncode(R)(ref R dst, const(char)[] str,
132                         const(char)[] allowed_chars = null,
133                         bool form_encoding = false)
134 {
135 	while (str.length > 0) {
136 		if (isAsciiAlphaNum(str[0])) {
137 			put(dst, str[0]);
138 		} else switch (str[0]) {
139 			default:
140 				if (allowed_chars.canFind(str[0])) put(dst, str[0]);
141 				else {
142 					static if (is(typeof({ R a, b; b = a; })))
143 						formattedWrite(dst, "%%%02X", str[0]);
144 					else
145 						formattedWrite(() @trusted { return &dst; } (), "%%%02X", str[0]);
146 				}
147 				break;
148 			case ' ':
149 				if (form_encoding) {
150 					put(dst, '+');
151 					break;
152 				}
153 				goto default;
154 			case '-': case '_': case '.': case '~':
155 				put(dst, str[0]);
156 				break;
157 		}
158 		str = str[1 .. $];
159 	}
160 }
161 
162 
163 /** Writes the decoded version of the given URL encoded string to an output range.
164 */
165 void filterURLDecode(R)(ref R dst, const(char)[] str, bool form_encoding = false)
166 {
167 	while( str.length > 0 ) {
168 		switch(str[0]) {
169 			case '%':
170 				enforce(str.length >= 3, "invalid percent encoding");
171 				auto hex = str[1..3];
172 				auto c = cast(char)parse!int(hex, 16);
173 				enforce(hex.length == 0, "invalid percent encoding");
174 				put(dst, c);
175 				str = str[3 .. $];
176 				break;
177 			case '+':
178 				if (form_encoding) {
179 					put(dst, ' ');
180 					str = str[1 .. $];
181 					break;
182 				}
183 				goto default;
184 			default:
185 				put(dst, str[0]);
186 				str = str[1 .. $];
187 				break;
188 		}
189 	}
190 }
191 
192 
193 @safe unittest
194 {
195 	assert(urlEncode("\r\n") == "%0D%0A"); // github #65
196 	assert(urlEncode("This-is~a_test") == "This-is~a_test");
197 	assert(urlEncode("This is a test") == "This%20is%20a%20test");
198 	assert(urlEncode("This{is}test") == "This%7Bis%7Dtest");
199 	assert(formEncode("This is a test") == "This+is+a+test");
200 	assert(formEncode("this/test", "/") == "this/test");
201 	assert(formEncode("this/test") == "this%2Ftest");
202 	assert(urlEncode("%") == "%25");
203 	assert(urlEncode("!") == "%21");
204 	assert(urlDecode("%0D%0a") == "\r\n");
205 	assert(urlDecode("%c2%aE") == "®");
206 	assert(urlDecode("This+is%20a+test") == "This+is a+test");
207 	assert(formDecode("This+is%20a+test") == "This is a test");
208 
209 	string a = "This~is a-test!\r\nHello, Wörld.. ";
210 	string aenc = urlEncode(a);
211 	assert(aenc == "This~is%20a-test%21%0D%0AHello%2C%20W%C3%B6rld..%20");
212 	assert(urlDecode(urlEncode(a)) == a);
213 }
214 
215 // for issue https://github.com/vibe-d/vibe.d/issues/2541
216 @safe unittest
217 {
218     static struct LimitedRange
219     {
220         char[] buf;
221         void put(const(char)[] data) {
222             .put(buf, data);
223         }
224     }
225 
226     char[100] buf1;
227     char[100] buf2;
228     auto r = LimitedRange(buf1[]);
229     r.filterURLEncode("This-is~a_test");
230     auto result = buf1[0 .. buf1.length - r.buf.length];
231     assert(result == "This-is~a_test");
232 
233     r = LimitedRange(buf1[]);
234     r.filterURLEncode("This is a test");
235     result = buf1[0 .. buf1.length - r.buf.length];
236     assert(result == "This%20is%20a%20test");
237 
238     r = LimitedRange(buf2[]);
239     r.filterURLDecode(result);
240     result = buf2[0 .. buf2.length - r.buf.length];
241     assert(result == "This is a test");
242 }
243 
244 
245 private struct StringSliceAppender(S) {
246 	private {
247 		Appender!S m_appender;
248 		S m_source;
249 		size_t m_prefixLength;
250 	}
251 
252 	this(S source)
253 	{
254 		m_source = source;
255 		if (m_source.length == 0)
256 			m_appender = appender!S();
257 	}
258 
259 	@disable this(this);
260 
261 	void put(char ch)
262 	{
263 		if (m_source.length) {
264 			if (m_prefixLength < m_source.length && m_source[m_prefixLength] == ch) {
265 				m_prefixLength++;
266 				return;
267 			}
268 
269 			m_appender = appender!S();
270 			m_appender.put(m_source[0 .. m_prefixLength]);
271 			m_appender.put(ch);
272 			m_source = S.init;
273 		} else m_appender.put(ch);
274 	}
275 
276 	void put(S s)
277 	{
278 		if (m_source.length) {
279 			foreach (char ch; s)
280 				put(ch);
281 		} else m_appender.put(s);
282 	}
283 
284 	void put(dchar ch)
285 	{
286 		import std.encoding : encode;
287 		char[6] chars;
288 		auto n = encode(ch, chars[]);
289 		foreach (char c; chars[0 .. n]) put(c);
290 	}
291 
292 	@property S data()
293 	{
294 		return m_source.length ? m_source[0 .. m_prefixLength] : m_appender.data;
295 	}
296 }
297 
298 @safe unittest {
299 	string s = "foo";
300 	auto a = StringSliceAppender!string(s);
301 	a.put("f"); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
302 	a.put('o'); assert(a.data == "fo"); assert(a.data.ptr is s.ptr);
303 	a.put('o'); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
304 	a.put('ä'); assert(a.data == "fooä");
305 
306 	a = StringSliceAppender!string(s);
307 	a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
308 	a.put("oobar"); assert(a.data == "foobar");
309 
310 	a = StringSliceAppender!string(s);
311 	a.put(cast(dchar)'f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
312 	a.put('b'); assert(a.data == "fb");
313 
314 	a = StringSliceAppender!string(s);
315 	a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
316 	a.put("b"); assert(a.data == "fb");
317 
318 	a = StringSliceAppender!string(s);
319 	a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
320 	a.put("ä"); assert(a.data == "fä");
321 
322 	a = StringSliceAppender!string(s);
323 	a.put("bar"); assert(a.data == "bar");
324 
325 	a = StringSliceAppender!string(s);
326 	a.put('b'); assert(a.data == "b");
327 
328 	a = StringSliceAppender!string(s);
329 	a.put('ä'); assert(a.data == "ä");
330 
331 	a = StringSliceAppender!string(s);
332 	a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
333 	a.put("bar"); assert(a.data == "foobar");
334 
335 	a = StringSliceAppender!string(s);
336 	a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
337 	a.put('b'); assert(a.data == "foob");
338 }
339 
340 private static bool isAsciiAlphaNum(char ch)
341 @safe nothrow pure @nogc {
342 	return (uint(ch) & 0xDF) - 0x41 < 26 || uint(ch) - '0' <= 9;
343 }
344 
345 unittest {
346 	assert(!isAsciiAlphaNum('@'));
347 	assert(isAsciiAlphaNum('A'));
348 	assert(isAsciiAlphaNum('Z'));
349 	assert(!isAsciiAlphaNum('['));
350 	assert(!isAsciiAlphaNum('`'));
351 	assert(isAsciiAlphaNum('a'));
352 	assert(isAsciiAlphaNum('z'));
353 	assert(!isAsciiAlphaNum('{'));
354 	assert(!isAsciiAlphaNum('/'));
355 	assert(isAsciiAlphaNum('0'));
356 	assert(isAsciiAlphaNum('9'));
357 	assert(!isAsciiAlphaNum(':'));
358 }