1 /** 2 URL-encoding implementation 3 4 Copyright: © 2012-2015 Sönke Ludwig 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Jan Krüger, Sönke Ludwig 7 */ 8 module vibe.textfilter.urlencode; 9 10 import vibe.utils.string; 11 12 import std.algorithm; 13 import std.array; 14 import std.conv; 15 import std.exception; 16 import std.format; 17 import std.range; 18 19 20 /** 21 * Returns: 22 * the URL encoded version of a given string, in a newly-allocated string. 23 */ 24 T[] urlEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[])) 25 { 26 auto dst = StringSliceAppender!(T[])(str); 27 filterURLEncode(dst, str, allowed_chars); 28 return dst.data; 29 } 30 31 @safe unittest { 32 string s = "hello-world"; 33 assert(s.urlEncode().ptr == s.ptr); 34 } 35 36 private auto isCorrectHexNum(const(char)[] str) 37 @safe { 38 foreach (char c; str) { 39 switch(c) { 40 case '0': .. case '9': 41 case 'A': .. case 'F': 42 case 'a': .. case 'f': 43 break; 44 default: 45 return false; 46 } 47 } 48 return true; 49 } 50 51 /** Checks whether a given string has valid URL encoding. 52 */ 53 bool isURLEncoded(const(char)[] str, const(char)[] reserved_chars = null) 54 @safe nothrow { 55 import std.string : representation; 56 57 for (size_t i = 0; i < str.length; i++) { 58 if (isAsciiAlphaNum(str[i])) 59 continue; 60 61 switch (str[i]) { 62 case '-': 63 case '.': 64 case '_': 65 case '~': 66 break; 67 case '%': 68 if (i + 2 >= str.length) 69 return false; 70 if (!isCorrectHexNum(str[i+1 .. i+3])) 71 return false; 72 i += 2; 73 break; 74 default: 75 if (reserved_chars.representation.canFind(str[i])) 76 return false; 77 break; 78 } 79 } 80 return true; 81 } 82 83 @safe nothrow unittest { 84 assert(isURLEncoded("hello-world")); 85 assert(isURLEncoded("he%2F%af")); 86 assert(!isURLEncoded("hello world", " ")); 87 assert(!isURLEncoded("he%f")); 88 assert(!isURLEncoded("he%fx")); 89 } 90 91 /** Returns the decoded version of a given URL encoded string. 92 */ 93 T[] urlDecode(T)(T[] str) if (is(T[] : const(char)[])) 94 { 95 if (!str.anyOf("%")) return str; 96 auto dst = StringSliceAppender!(T[])(str); 97 filterURLDecode(dst, str); 98 return dst.data; 99 } 100 101 /** Returns the form encoded version of a given string. 102 103 Form encoding is the same as normal URL encoding, except that 104 spaces are replaced by plus characters. 105 106 Note that newlines should always be represented as \r\n sequences 107 according to the HTTP standard. 108 */ 109 T[] formEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[])) 110 { 111 auto dst = StringSliceAppender!(T[])(str); 112 filterURLEncode(dst, str, allowed_chars, true); 113 return dst.data; 114 } 115 116 /** Returns the decoded version of a form encoded string. 117 118 Form encoding is the same as normal URL encoding, except that 119 spaces are replaced by plus characters. 120 */ 121 T[] formDecode(T)(T[] str) if (is(T[] : const(char)[])) 122 { 123 if (!str.anyOf("%+")) return str; 124 auto dst = StringSliceAppender!(T[])(str); 125 filterURLDecode(dst, str, true); 126 return dst.data; 127 } 128 129 /** Writes the URL encoded version of the given string to an output range. 130 */ 131 void filterURLEncode(R)(ref R dst, const(char)[] str, 132 const(char)[] allowed_chars = null, 133 bool form_encoding = false) 134 { 135 while (str.length > 0) { 136 if (isAsciiAlphaNum(str[0])) { 137 put(dst, str[0]); 138 } else switch (str[0]) { 139 default: 140 if (allowed_chars.canFind(str[0])) put(dst, str[0]); 141 else { 142 static if (is(typeof({ R a, b; b = a; }))) 143 formattedWrite(dst, "%%%02X", str[0]); 144 else 145 formattedWrite(() @trusted { return &dst; } (), "%%%02X", str[0]); 146 } 147 break; 148 case ' ': 149 if (form_encoding) { 150 put(dst, '+'); 151 break; 152 } 153 goto default; 154 case '-': case '_': case '.': case '~': 155 put(dst, str[0]); 156 break; 157 } 158 str = str[1 .. $]; 159 } 160 } 161 162 163 /** Writes the decoded version of the given URL encoded string to an output range. 164 */ 165 void filterURLDecode(R)(ref R dst, const(char)[] str, bool form_encoding = false) 166 { 167 while( str.length > 0 ) { 168 switch(str[0]) { 169 case '%': 170 enforce(str.length >= 3, "invalid percent encoding"); 171 auto hex = str[1..3]; 172 auto c = cast(char)parse!int(hex, 16); 173 enforce(hex.length == 0, "invalid percent encoding"); 174 put(dst, c); 175 str = str[3 .. $]; 176 break; 177 case '+': 178 if (form_encoding) { 179 put(dst, ' '); 180 str = str[1 .. $]; 181 break; 182 } 183 goto default; 184 default: 185 put(dst, str[0]); 186 str = str[1 .. $]; 187 break; 188 } 189 } 190 } 191 192 193 @safe unittest 194 { 195 assert(urlEncode("\r\n") == "%0D%0A"); // github #65 196 assert(urlEncode("This-is~a_test") == "This-is~a_test"); 197 assert(urlEncode("This is a test") == "This%20is%20a%20test"); 198 assert(urlEncode("This{is}test") == "This%7Bis%7Dtest"); 199 assert(formEncode("This is a test") == "This+is+a+test"); 200 assert(formEncode("this/test", "/") == "this/test"); 201 assert(formEncode("this/test") == "this%2Ftest"); 202 assert(urlEncode("%") == "%25"); 203 assert(urlEncode("!") == "%21"); 204 assert(urlDecode("%0D%0a") == "\r\n"); 205 assert(urlDecode("%c2%aE") == "®"); 206 assert(urlDecode("This+is%20a+test") == "This+is a+test"); 207 assert(formDecode("This+is%20a+test") == "This is a test"); 208 209 string a = "This~is a-test!\r\nHello, Wörld.. "; 210 string aenc = urlEncode(a); 211 assert(aenc == "This~is%20a-test%21%0D%0AHello%2C%20W%C3%B6rld..%20"); 212 assert(urlDecode(urlEncode(a)) == a); 213 } 214 215 // for issue https://github.com/vibe-d/vibe.d/issues/2541 216 @safe unittest 217 { 218 static struct LimitedRange 219 { 220 char[] buf; 221 void put(const(char)[] data) { 222 .put(buf, data); 223 } 224 } 225 226 char[100] buf1; 227 char[100] buf2; 228 auto r = LimitedRange(buf1[]); 229 r.filterURLEncode("This-is~a_test"); 230 auto result = buf1[0 .. buf1.length - r.buf.length]; 231 assert(result == "This-is~a_test"); 232 233 r = LimitedRange(buf1[]); 234 r.filterURLEncode("This is a test"); 235 result = buf1[0 .. buf1.length - r.buf.length]; 236 assert(result == "This%20is%20a%20test"); 237 238 r = LimitedRange(buf2[]); 239 r.filterURLDecode(result); 240 result = buf2[0 .. buf2.length - r.buf.length]; 241 assert(result == "This is a test"); 242 } 243 244 245 private struct StringSliceAppender(S) { 246 private { 247 Appender!S m_appender; 248 S m_source; 249 size_t m_prefixLength; 250 } 251 252 this(S source) 253 { 254 m_source = source; 255 if (m_source.length == 0) 256 m_appender = appender!S(); 257 } 258 259 @disable this(this); 260 261 void put(char ch) 262 { 263 if (m_source.length) { 264 if (m_prefixLength < m_source.length && m_source[m_prefixLength] == ch) { 265 m_prefixLength++; 266 return; 267 } 268 269 m_appender = appender!S(); 270 m_appender.put(m_source[0 .. m_prefixLength]); 271 m_appender.put(ch); 272 m_source = S.init; 273 } else m_appender.put(ch); 274 } 275 276 void put(S s) 277 { 278 if (m_source.length) { 279 foreach (char ch; s) 280 put(ch); 281 } else m_appender.put(s); 282 } 283 284 void put(dchar ch) 285 { 286 import std.encoding : encode; 287 char[6] chars; 288 auto n = encode(ch, chars[]); 289 foreach (char c; chars[0 .. n]) put(c); 290 } 291 292 @property S data() 293 { 294 return m_source.length ? m_source[0 .. m_prefixLength] : m_appender.data; 295 } 296 } 297 298 @safe unittest { 299 string s = "foo"; 300 auto a = StringSliceAppender!string(s); 301 a.put("f"); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 302 a.put('o'); assert(a.data == "fo"); assert(a.data.ptr is s.ptr); 303 a.put('o'); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); 304 a.put('ä'); assert(a.data == "fooä"); 305 306 a = StringSliceAppender!string(s); 307 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 308 a.put("oobar"); assert(a.data == "foobar"); 309 310 a = StringSliceAppender!string(s); 311 a.put(cast(dchar)'f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 312 a.put('b'); assert(a.data == "fb"); 313 314 a = StringSliceAppender!string(s); 315 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 316 a.put("b"); assert(a.data == "fb"); 317 318 a = StringSliceAppender!string(s); 319 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 320 a.put("ä"); assert(a.data == "fä"); 321 322 a = StringSliceAppender!string(s); 323 a.put("bar"); assert(a.data == "bar"); 324 325 a = StringSliceAppender!string(s); 326 a.put('b'); assert(a.data == "b"); 327 328 a = StringSliceAppender!string(s); 329 a.put('ä'); assert(a.data == "ä"); 330 331 a = StringSliceAppender!string(s); 332 a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); 333 a.put("bar"); assert(a.data == "foobar"); 334 335 a = StringSliceAppender!string(s); 336 a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); 337 a.put('b'); assert(a.data == "foob"); 338 } 339 340 private static bool isAsciiAlphaNum(char ch) 341 @safe nothrow pure @nogc { 342 return (uint(ch) & 0xDF) - 0x41 < 26 || uint(ch) - '0' <= 9; 343 } 344 345 unittest { 346 assert(!isAsciiAlphaNum('@')); 347 assert(isAsciiAlphaNum('A')); 348 assert(isAsciiAlphaNum('Z')); 349 assert(!isAsciiAlphaNum('[')); 350 assert(!isAsciiAlphaNum('`')); 351 assert(isAsciiAlphaNum('a')); 352 assert(isAsciiAlphaNum('z')); 353 assert(!isAsciiAlphaNum('{')); 354 assert(!isAsciiAlphaNum('/')); 355 assert(isAsciiAlphaNum('0')); 356 assert(isAsciiAlphaNum('9')); 357 assert(!isAsciiAlphaNum(':')); 358 }