1 /** 2 URL-encoding implementation 3 4 Copyright: © 2012-2015 RejectedSoftware e.K. 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Jan Krüger, Sönke Ludwig 7 */ 8 module vibe.textfilter.urlencode; 9 10 import vibe.utils.string; 11 12 import std.algorithm; 13 import std.array; 14 import std.conv; 15 import std.exception; 16 import std.format; 17 18 19 /** 20 * Returns: 21 * the URL encoded version of a given string, in a newly-allocated string. 22 */ 23 T[] urlEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[])) 24 { 25 auto dst = StringSliceAppender!(T[])(str); 26 filterURLEncode(dst, str, allowed_chars); 27 return dst.data; 28 } 29 30 @safe unittest { 31 string s = "hello-world"; 32 assert(s.urlEncode().ptr == s.ptr); 33 } 34 35 private auto isCorrectHexNum(const(char)[] str) 36 @safe { 37 foreach (char c; str) { 38 switch(c) { 39 case '0': .. case '9': 40 case 'A': .. case 'F': 41 case 'a': .. case 'f': 42 break; 43 default: 44 return false; 45 } 46 } 47 return true; 48 } 49 50 /** Checks whether a given string has valid URL encoding. 51 */ 52 bool isURLEncoded(const(char)[] str, const(char)[] reserved_chars = null) 53 @safe { 54 for (size_t i = 0; i < str.length; i++) { 55 switch (str[i]) { 56 case '-': 57 case '.': 58 case '0': .. case '9': 59 case 'A': .. case 'Z': 60 case '_': 61 case 'a': .. case 'z': 62 case '~': 63 break; 64 case '%': 65 if (i + 2 >= str.length) 66 return false; 67 if (!isCorrectHexNum(str[i+1 .. i+3])) 68 return false; 69 i += 2; 70 break; 71 default: 72 if (reserved_chars.canFind(str[i])) 73 return false; 74 break; 75 } 76 } 77 return true; 78 } 79 80 @safe unittest { 81 assert(isURLEncoded("hello-world")); 82 assert(isURLEncoded("he%2F%af")); 83 assert(!isURLEncoded("hello world", " ")); 84 assert(!isURLEncoded("he%f")); 85 assert(!isURLEncoded("he%fx")); 86 } 87 88 /** Returns the decoded version of a given URL encoded string. 89 */ 90 T[] urlDecode(T)(T[] str) if (is(T[] : const(char)[])) 91 { 92 if (!str.anyOf("%")) return str; 93 auto dst = StringSliceAppender!(T[])(str); 94 filterURLDecode(dst, str); 95 return dst.data; 96 } 97 98 /** Returns the form encoded version of a given string. 99 100 Form encoding is the same as normal URL encoding, except that 101 spaces are replaced by plus characters. 102 103 Note that newlines should always be represented as \r\n sequences 104 according to the HTTP standard. 105 */ 106 T[] formEncode(T)(T[] str, const(char)[] allowed_chars = null) if (is(T[] : const(char)[])) 107 { 108 auto dst = StringSliceAppender!(T[])(str); 109 filterURLEncode(dst, str, allowed_chars, true); 110 return dst.data; 111 } 112 113 /** Returns the decoded version of a form encoded string. 114 115 Form encoding is the same as normal URL encoding, except that 116 spaces are replaced by plus characters. 117 */ 118 T[] formDecode(T)(T[] str) if (is(T[] : const(char)[])) 119 { 120 if (!str.anyOf("%+")) return str; 121 auto dst = StringSliceAppender!(T[])(str); 122 filterURLDecode(dst, str, true); 123 return dst.data; 124 } 125 126 /** Writes the URL encoded version of the given string to an output range. 127 */ 128 void filterURLEncode(R)(ref R dst, const(char)[] str, 129 const(char)[] allowed_chars = null, 130 bool form_encoding = false) 131 { 132 while (str.length > 0) { 133 switch (str[0]) { 134 default: 135 if (allowed_chars.canFind(str[0])) dst.put(str[0]); 136 else { 137 static if (is(typeof({ R a, b; b = a; }))) 138 formattedWrite(dst, "%%%02X", str[0]); 139 else 140 formattedWrite(() @trusted { return &dst; } (), "%%%02X", str[0]); 141 } 142 break; 143 case ' ': 144 if (form_encoding) { 145 dst.put('+'); 146 break; 147 } 148 goto default; 149 case 'A': .. case 'Z': 150 case 'a': .. case 'z': 151 case '0': .. case '9': 152 case '-': case '_': case '.': case '~': 153 dst.put(str[0]); 154 break; 155 } 156 str = str[1 .. $]; 157 } 158 } 159 160 161 /** Writes the decoded version of the given URL encoded string to an output range. 162 */ 163 void filterURLDecode(R)(ref R dst, const(char)[] str, bool form_encoding = false) 164 { 165 while( str.length > 0 ) { 166 switch(str[0]) { 167 case '%': 168 enforce(str.length >= 3, "invalid percent encoding"); 169 auto hex = str[1..3]; 170 auto c = cast(char)parse!int(hex, 16); 171 enforce(hex.length == 0, "invalid percent encoding"); 172 dst.put(c); 173 str = str[3 .. $]; 174 break; 175 case '+': 176 if (form_encoding) { 177 dst.put(' '); 178 str = str[1 .. $]; 179 break; 180 } 181 goto default; 182 default: 183 dst.put(str[0]); 184 str = str[1 .. $]; 185 break; 186 } 187 } 188 } 189 190 191 @safe unittest 192 { 193 assert(urlEncode("\r\n") == "%0D%0A"); // github #65 194 assert(urlEncode("This-is~a_test") == "This-is~a_test"); 195 assert(urlEncode("This is a test") == "This%20is%20a%20test"); 196 assert(urlEncode("This{is}test") == "This%7Bis%7Dtest"); 197 assert(formEncode("This is a test") == "This+is+a+test"); 198 assert(formEncode("this/test", "/") == "this/test"); 199 assert(formEncode("this/test") == "this%2Ftest"); 200 assert(urlEncode("%") == "%25"); 201 assert(urlEncode("!") == "%21"); 202 assert(urlDecode("%0D%0a") == "\r\n"); 203 assert(urlDecode("%c2%aE") == "®"); 204 assert(urlDecode("This+is%20a+test") == "This+is a+test"); 205 assert(formDecode("This+is%20a+test") == "This is a test"); 206 207 string a = "This~is a-test!\r\nHello, Wörld.. "; 208 string aenc = urlEncode(a); 209 assert(aenc == "This~is%20a-test%21%0D%0AHello%2C%20W%C3%B6rld..%20"); 210 assert(urlDecode(urlEncode(a)) == a); 211 } 212 213 214 private struct StringSliceAppender(S) { 215 private { 216 Appender!S m_appender; 217 S m_source; 218 size_t m_prefixLength; 219 } 220 221 this(S source) 222 { 223 m_source = source; 224 if (m_source.length == 0) 225 m_appender = appender!S(); 226 } 227 228 @disable this(this); 229 230 void put(char ch) 231 { 232 if (m_source.length) { 233 if (m_prefixLength < m_source.length && m_source[m_prefixLength] == ch) { 234 m_prefixLength++; 235 return; 236 } 237 238 m_appender = appender!S(); 239 m_appender.put(m_source[0 .. m_prefixLength]); 240 m_appender.put(ch); 241 m_source = S.init; 242 } else m_appender.put(ch); 243 } 244 245 void put(S s) 246 { 247 if (m_source.length) { 248 foreach (char ch; s) 249 put(ch); 250 } else m_appender.put(s); 251 } 252 253 void put(dchar ch) 254 { 255 import std.encoding : encode; 256 char[6] chars; 257 static if (__VERSION__ < 2072) 258 auto n = () @trusted { return encode(ch, chars[]); } (); 259 else 260 auto n = encode(ch, chars[]); 261 foreach (char c; chars[0 .. n]) put(c); 262 } 263 264 @property S data() 265 { 266 return m_source.length ? m_source[0 .. m_prefixLength] : m_appender.data; 267 } 268 } 269 270 @safe unittest { 271 string s = "foo"; 272 auto a = StringSliceAppender!string(s); 273 a.put("f"); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 274 a.put('o'); assert(a.data == "fo"); assert(a.data.ptr is s.ptr); 275 a.put('o'); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); 276 a.put('ä'); assert(a.data == "fooä"); 277 278 a = StringSliceAppender!string(s); 279 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 280 a.put("oobar"); assert(a.data == "foobar"); 281 282 a = StringSliceAppender!string(s); 283 a.put(cast(dchar)'f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 284 a.put('b'); assert(a.data == "fb"); 285 286 a = StringSliceAppender!string(s); 287 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 288 a.put("b"); assert(a.data == "fb"); 289 290 a = StringSliceAppender!string(s); 291 a.put('f'); assert(a.data == "f"); assert(a.data.ptr is s.ptr); 292 a.put("ä"); assert(a.data == "fä"); 293 294 a = StringSliceAppender!string(s); 295 a.put("bar"); assert(a.data == "bar"); 296 297 a = StringSliceAppender!string(s); 298 a.put('b'); assert(a.data == "b"); 299 300 a = StringSliceAppender!string(s); 301 a.put('ä'); assert(a.data == "ä"); 302 303 a = StringSliceAppender!string(s); 304 a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); 305 a.put("bar"); assert(a.data == "foobar"); 306 307 a = StringSliceAppender!string(s); 308 a.put("foo"); assert(a.data == "foo"); assert(a.data.ptr is s.ptr); 309 a.put('b'); assert(a.data == "foob"); 310 }