1 /**
2 URL-encoding implementation
3
4 Copyright: © 2012-2015 Sönke Ludwig
5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 Authors: Jan Krüger, Sönke Ludwig
7 */
8 module vibe.textfilter.urlencode;
9
10 import vibe.utils.string;
11
12 import std.algorithm;
13 import std.array;
14 import std.conv;
15 import std.exception;
16 import std.format;
17 import std.range;
18
19
20 /**
21 * Returns:
22 * the URL encoded version of a given string, in a newly-allocated string.
23 */
24 T[] urlEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[]))
25 {
26 auto dst = StringSliceAppender!(T[])(str);
27 filterURLEncode(dst, str, allowed_chars);
28 return dst.data;
29 }
30
31 @safe unittest {
32 string s = "hello-world";
33 assert(s.urlEncode().ptr == s.ptr);
34 }
35
36 private auto isCorrectHexNum(const(char)[] str)
37 @safe {
38 foreach (char c; str) {
39 switch(c) {
40 case '0': .. case '9':
41 case 'A': .. case 'F':
42 case 'a': .. case 'f':
43 break;
44 default:
45 return false;
46 }
47 }
48 return true;
49 }
50
51 /** Checks whether a given string has valid URL encoding.
52 */
53 bool isURLEncoded(const(char)[] str, const(char)[] reserved_chars = null)
54 @safe nothrow {
55 import std.string : representation;
56
57 for (size_t i = 0; i < str.length; i++) {
58 if (isAsciiAlphaNum(str[i]))
59 continue;
60
61 switch (str[i]) {
62 case '-':
63 case '.':
64 case '_':
65 case '~':
66 break;
67 case '%':
68 if (i + 2 >= str.length)
69 return false;
70 if (!isCorrectHexNum(str[i+1 .. i+3]))
71 return false;
72 i += 2;
73 break;
74 default:
75 if (reserved_chars.representation.canFind(str[i]))
76 return false;
77 break;
78 }
79 }
80 return true;
81 }
82
83 @safe nothrow unittest {
84 assert(isURLEncoded("hello-world"));
85 assert(isURLEncoded("he%2F%af"));
86 assert(!isURLEncoded("hello world", " "));
87 assert(!isURLEncoded("he%f"));
88 assert(!isURLEncoded("he%fx"));
89 }
90
91 /** Returns the decoded version of a given URL encoded string.
92 */
93 T[] urlDecode(T)(T[] str) if (is(T[] : const(char)[]))
94 {
95 if (!str.anyOf("%")) return str;
96 auto dst = StringSliceAppender!(T[])(str);
97 filterURLDecode(dst, str);
98 return dst.data;
99 }
100
101 /** Returns the form encoded version of a given string.
102
103 Form encoding is the same as normal URL encoding, except that
104 spaces are replaced by plus characters.
105
106 Note that newlines should always be represented as \r\n sequences
107 according to the HTTP standard.
108 */
109 T[] formEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[]))
110 {
111 auto dst = StringSliceAppender!(T[])(str);
112 filterURLEncode(dst, str, allowed_chars, true);
113 return dst.data;
114 }
115
116 /** Returns the decoded version of a form encoded string.
117
118 Form encoding is the same as normal URL encoding, except that
119 spaces are replaced by plus characters.
120 */
121 T[] formDecode(T)(T[] str) if (is(T[] : const(char)[]))
122 {
123 if (!str.anyOf("%+")) return str;
124 auto dst = StringSliceAppender!(T[])(str);
125 filterURLDecode(dst, str, true);
126 return dst.data;
127 }
128
129 /** Writes the URL encoded version of the given string to an output range.
130 */
131 void filterURLEncode(R)(ref R dst, const(char)[] str,
132 const(char)[] allowed_chars = null,
133 bool form_encoding = false)
134 {
135 while (str.length > 0) {
136 if (isAsciiAlphaNum(str[0])) {
137 put(dst, str[0]);
138 } else switch (str[0]) {
139 default:
140 if (allowed_chars.canFind(str[0])) put(dst, str[0]);
141 else {
142 static if (is(typeof({ R a, b; b = a; })))
143 formattedWrite(dst, "%%%02X", str[0]);
144 else
145 formattedWrite(() @trusted { return &dst; } (), "%%%02X", str[0]);
146 }
147 break;
148 case ' ':
149 if (form_encoding) {
150 put(dst, '+');
151 break;
152 }
153 goto default;
154 case '-': case '_': case '.': case '~':
155 put(dst, str[0]);
156 break;
157 }
158 str = str[1 .. $];
159 }
160 }
161
162
163 /** Writes the decoded version of the given URL encoded string to an output range.
164 */
165 void filterURLDecode(R)(ref R dst, const(char)[] str, bool form_encoding = false)
166 {
167 while( str.length > 0 ) {
168 switch(str[0]) {
169 case '%':
170 enforce(str.length >= 3, "invalid percent encoding");
171 auto hex = str[1..3];
172 auto c = cast(char)parse!int(hex, 16);
173 enforce(hex.length == 0, "invalid percent encoding");
174 put(dst, c);
175 str = str[3 .. $];
176 break;
177 case '+':
178 if (form_encoding) {
179 put(dst, ' ');
180 str = str[1 .. $];
181 break;
182 }
183 goto default;
184 default:
185 put(dst, str[0]);
186 str = str[1 .. $];
187 break;
188 }
189 }
190 }
191
192
193 @safe unittest
194 {
195 assert(urlEncode("\r\n") == "%0D%0A"); // github #65
196 assert(urlEncode("This-is~a_test") == "This-is~a_test");
197 assert(urlEncode("This is a test") == "This%20is%20a%20test");
198 assert(urlEncode("This{is}test") == "This%7Bis%7Dtest");
199 assert(formEncode("This is a test") == "This+is+a+test");
200 assert(formEncode("this/test", "/") == "this/test");
201 assert(formEncode("this/test") == "this%2Ftest");
202 assert(urlEncode("%") == "%25");
203 assert(urlEncode("!") == "%21");
204 assert(urlDecode("%0D%0a") == "\r\n");
205 assert(urlDecode("%c2%aE") == "®");
206 assert(urlDecode("This+is%20a+test") == "This+is a+test");
207 assert(formDecode("This+is%20a+test") == "This is a test");
208
209 string a = "This~is a-test!\r\nHello, Wörld.. ";
210 string aenc = urlEncode(a);
211 assert(aenc == "This~is%20a-test%21%0D%0AHello%2C%20W%C3%B6rld..%20");
212 assert(urlDecode(urlEncode(a)) == a);
213 }
214
215 // for issue https://github.com/vibe-d/vibe.d/issues/2541
216 @safe unittest
217 {
218 static struct LimitedRange
219 {
220 char[] buf;
221 void put(const(char)[] data) {
222 .put(buf, data);
223 }
224 }
225
226 char[100] buf1;
227 char[100] buf2;
228 auto r = LimitedRange(buf1[]);
229 r.filterURLEncode("This-is~a_test");
230 auto result = buf1[0 .. buf1.length - r.buf.length];
231 assert(result == "This-is~a_test");
232
233 r = LimitedRange(buf1[]);
234 r.filterURLEncode("This is a test");
235 result = buf1[0 .. buf1.length - r.buf.length];
236 assert(result == "This%20is%20a%20test");
237
238 r = LimitedRange(buf2[]);
239 r.filterURLDecode(result);
240 result = buf2[0 .. buf2.length - r.buf.length];
241 assert(result == "This is a test");
242 }
243
244
245 private struct StringSliceAppender(S) {
246 private {
247 Appender!S m_appender;
248 S m_source;
249 size_t m_prefixLength;
250 }
251
252 this(S source)
253 {
254 m_source = source;
255 if (m_source.length == 0)
256 m_appender = appender!S();
257 }
258
259 @disable this(this);
260
261 void put(char ch)
262 {
263 if (m_source.length) {
264 if (m_prefixLength < m_source.length && m_source[m_prefixLength] == ch) {
265 m_prefixLength++;
266 return;
267 }
268
269 m_appender = appender!S();
270 m_appender.put(m_source[0 .. m_prefixLength]);
271 m_appender.put(ch);
272 m_source = S.init;
273 } else m_appender.put(ch);
274 }
275
276 void put(S s)
277 {
278 if (m_source.length) {
279 foreach (char ch; s)
280 put(ch);
281 } else m_appender.put(s);
282 }
283
284 void put(dchar ch)
285 {
286 import std.encoding : encode;
287 char[6] chars;
288 auto n = encode(ch, chars[]);
289 foreach (char c; chars[0 .. n]) put(c);
290 }
291
292 @property S data()
293 {
294 return m_source.length ? m_source[0 .. m_prefixLength] : m_appender.data;
295 }
296 }
297
298 @safe unittest {
299 string s = "foo";
300 auto a = StringSliceAppender!string(s);
301 a.put("f"); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
302 a.put('o'); assert(a.data == "fo"); assert(a.data.ptr is s.ptr);
303 a.put('o'); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
304 a.put('ä'); assert(a.data == "fooä");
305
306 a = StringSliceAppender!string(s);
307 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
308 a.put("oobar"); assert(a.data == "foobar");
309
310 a = StringSliceAppender!string(s);
311 a.put(cast(dchar)'f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
312 a.put('b'); assert(a.data == "fb");
313
314 a = StringSliceAppender!string(s);
315 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
316 a.put("b"); assert(a.data == "fb");
317
318 a = StringSliceAppender!string(s);
319 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr);
320 a.put("ä"); assert(a.data == "fä");
321
322 a = StringSliceAppender!string(s);
323 a.put("bar"); assert(a.data == "bar");
324
325 a = StringSliceAppender!string(s);
326 a.put('b'); assert(a.data == "b");
327
328 a = StringSliceAppender!string(s);
329 a.put('ä'); assert(a.data == "ä");
330
331 a = StringSliceAppender!string(s);
332 a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
333 a.put("bar"); assert(a.data == "foobar");
334
335 a = StringSliceAppender!string(s);
336 a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr);
337 a.put('b'); assert(a.data == "foob");
338 }
339
340 private static bool isAsciiAlphaNum(char ch)
341 @safe nothrow pure @nogc {
342 return (uint(ch) & 0xDF) - 0x41 < 26 || uint(ch) - '0' <= 9;
343 }
344
345 unittest {
346 assert(!isAsciiAlphaNum('@'));
347 assert(isAsciiAlphaNum('A'));
348 assert(isAsciiAlphaNum('Z'));
349 assert(!isAsciiAlphaNum('['));
350 assert(!isAsciiAlphaNum('`'));
351 assert(isAsciiAlphaNum('a'));
352 assert(isAsciiAlphaNum('z'));
353 assert(!isAsciiAlphaNum('{'));
354 assert(!isAsciiAlphaNum('/'));
355 assert(isAsciiAlphaNum('0'));
356 assert(isAsciiAlphaNum('9'));
357 assert(!isAsciiAlphaNum(':'));
358 }