1 /** 2 URL parsing routines. 3 4 Copyright: © 2012-2017 RejectedSoftware e.K. 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module vibe.inet.url; 9 10 public import vibe.core.path; 11 12 import vibe.textfilter.urlencode; 13 import vibe.utils.string; 14 15 import std.array; 16 import std.conv; 17 import std.exception; 18 import std.string; 19 import std.traits : isInstanceOf; 20 21 22 /** 23 Represents a URL decomposed into its components. 24 */ 25 struct URL { 26 @safe: 27 private { 28 string m_schema; 29 string m_pathString; 30 string m_host; 31 ushort m_port; 32 string m_username; 33 string m_password; 34 string m_queryString; 35 string m_anchor; 36 } 37 38 /// Constructs a new URL object from its components. 39 this(string schema, string host, ushort port, InetPath path) 40 { 41 m_schema = schema; 42 m_host = host; 43 m_port = port; 44 version (Have_vibe_core) m_pathString = path.toString(); 45 else m_pathString = urlEncode(path.toString(), "/"); 46 } 47 /// ditto 48 this(string schema, InetPath path) 49 { 50 this(schema, null, 0, path); 51 } 52 53 version (Have_vibe_core) { 54 /// ditto 55 this(string schema, string host, ushort port, PosixPath path) 56 { 57 this(schema, host, port, cast(InetPath)path); 58 } 59 /// ditto 60 this(string schema, PosixPath path) 61 { 62 this(schema, null, 0, cast(InetPath)path); 63 } 64 /// ditto 65 this(string schema, string host, ushort port, WindowsPath path) 66 { 67 this(schema, host, port, cast(InetPath)path); 68 } 69 /// ditto 70 this(string schema, WindowsPath path) 71 { 72 this(schema, null, 0, cast(InetPath)path); 73 } 74 } 75 76 /** Constructs a URL from its string representation. 77 78 TODO: additional validation required (e.g. valid host and user names and port) 79 */ 80 this(string url_string) 81 { 82 auto str = url_string; 83 enforce(str.length > 0, "Empty URL."); 84 if( str[0] != '/' ){ 85 auto idx = str.indexOfCT(':'); 86 enforce(idx > 0, "No schema in URL:"~str); 87 m_schema = str[0 .. idx]; 88 str = str[idx+1 .. $]; 89 bool requires_host = false; 90 91 if (isDoubleSlashSchema(m_schema)) { 92 // proto://server/path style 93 enforce(str.startsWith("//"), "URL must start with proto://..."); 94 requires_host = true; 95 str = str[2 .. $]; 96 } 97 98 if (schema != "file") { 99 auto si = str.indexOfCT('/'); 100 if( si < 0 ) si = str.length; 101 auto ai = str[0 .. si].indexOfCT('@'); 102 sizediff_t hs = 0; 103 if( ai >= 0 ){ 104 hs = ai+1; 105 auto ci = str[0 .. ai].indexOfCT(':'); 106 if( ci >= 0 ){ 107 m_username = str[0 .. ci]; 108 m_password = str[ci+1 .. ai]; 109 } else m_username = str[0 .. ai]; 110 enforce(m_username.length > 0, "Empty user name in URL."); 111 } 112 113 m_host = str[hs .. si]; 114 115 auto findPort ( string src ) 116 { 117 auto pi = src.indexOfCT(':'); 118 if(pi > 0) { 119 enforce(pi < src.length-1, "Empty port in URL."); 120 m_port = to!ushort(src[pi+1..$]); 121 } 122 return pi; 123 } 124 125 126 auto ip6 = m_host.indexOfCT('['); 127 if (ip6 == 0) { // [ must be first char 128 auto pe = m_host.indexOfCT(']'); 129 if (pe > 0) { 130 findPort(m_host[pe..$]); 131 m_host = m_host[1 .. pe]; 132 } 133 } 134 else { 135 auto pi = findPort(m_host); 136 if(pi > 0) { 137 m_host = m_host[0 .. pi]; 138 } 139 } 140 141 enforce(!requires_host || m_schema == "file" || m_host.length > 0, 142 "Empty server name in URL."); 143 str = str[si .. $]; 144 } 145 } 146 147 this.localURI = str; 148 } 149 /// ditto 150 static URL parse(string url_string) 151 { 152 return URL(url_string); 153 } 154 /// ditto 155 static URL fromString(string url_string) 156 { 157 return URL(url_string); 158 } 159 160 invariant() 161 { 162 assert(isURLEncoded(m_pathString), "Wrong URL encoding of '"~m_pathString~"'"); 163 } 164 165 /// The schema/protocol part of the URL 166 @property string schema() const { return m_schema; } 167 /// ditto 168 @property void schema(string v) { m_schema = v; } 169 170 /// The url encoded path part of the URL 171 @property string pathString() const { return m_pathString; } 172 173 /// Set the path part of the URL. It should be properly encoded. 174 @property void pathString(string s) 175 { 176 enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'"); 177 m_pathString = s; 178 } 179 180 /// The path part of the URL 181 @property InetPath path() const { 182 version (Have_vibe_core) 183 return InetPath(m_pathString); 184 else 185 return Path(urlDecode(m_pathString)); 186 } 187 version (Have_vibe_core) { 188 /// ditto 189 @property void path(Path)(Path p) 190 if (isInstanceOf!(GenericPath, Path)) 191 { 192 m_pathString = (cast(InetPath)p).toString(); 193 } 194 } else { 195 /// ditto 196 @property void path(Path p) 197 { 198 m_pathString = p.toString().urlEncode("/"); 199 } 200 } 201 202 /// The host part of the URL (depends on the schema) 203 @property string host() const { return m_host; } 204 /// ditto 205 @property void host(string v) { m_host = v; } 206 207 /// The port part of the URL (optional) 208 @property ushort port() const { return m_port; } 209 /// ditto 210 @property port(ushort v) { m_port = v; } 211 212 /// Get the default port for the given schema or 0 213 static ushort defaultPort(string schema) { 214 switch(schema){ 215 default: 216 case "file": return 0; 217 case "http": return 80; 218 case "https": return 443; 219 case "ftp": return 21; 220 case "spdy": return 443; 221 case "sftp": return 22; 222 } 223 } 224 /// ditto 225 ushort defaultPort() const { 226 return defaultPort(m_schema); 227 } 228 229 /// The user name part of the URL (optional) 230 @property string username() const { return m_username; } 231 /// ditto 232 @property void username(string v) { m_username = v; } 233 234 /// The password part of the URL (optional) 235 @property string password() const { return m_password; } 236 /// ditto 237 @property void password(string v) { m_password = v; } 238 239 /// The query string part of the URL (optional) 240 @property string queryString() const { return m_queryString; } 241 /// ditto 242 @property void queryString(string v) { m_queryString = v; } 243 244 /// The anchor part of the URL (optional) 245 @property string anchor() const { return m_anchor; } 246 247 /// The path part plus query string and anchor 248 @property string localURI() 249 const { 250 auto str = appender!string(); 251 // m_pathString is already encoded 252 str.put(m_pathString); 253 if( queryString.length ) { 254 str.put("?"); 255 str.put(queryString); 256 } 257 if( anchor.length ) { 258 str.put("#"); 259 str.put(anchor); 260 } 261 return str.data; 262 } 263 /// ditto 264 @property void localURI(string str) 265 { 266 auto ai = str.indexOfCT('#'); 267 if( ai >= 0 ){ 268 m_anchor = str[ai+1 .. $]; 269 str = str[0 .. ai]; 270 } else m_anchor = null; 271 272 auto qi = str.indexOfCT('?'); 273 if( qi >= 0 ){ 274 m_queryString = str[qi+1 .. $]; 275 str = str[0 .. qi]; 276 } else m_queryString = null; 277 278 this.pathString = str; 279 } 280 281 /// The URL to the parent path with query string and anchor stripped. 282 @property URL parentURL() const { 283 URL ret; 284 ret.schema = schema; 285 ret.host = host; 286 ret.port = port; 287 ret.username = username; 288 ret.password = password; 289 ret.path = path.parentPath; 290 return ret; 291 } 292 293 /// Converts this URL object to its string representation. 294 string toString() 295 const { 296 import std.format; 297 auto dst = appender!string(); 298 dst.put(schema); 299 dst.put(":"); 300 if (isDoubleSlashSchema(schema)) 301 dst.put("//"); 302 if (m_username.length || m_password.length) { 303 dst.put(username); 304 dst.put(':'); 305 dst.put(password); 306 dst.put('@'); 307 } 308 309 import std.algorithm : canFind; 310 auto ipv6 = host.canFind(":"); 311 312 if ( ipv6 ) dst.put('['); 313 dst.put(host); 314 if ( ipv6 ) dst.put(']'); 315 316 if( m_port > 0 ) formattedWrite(dst, ":%d", m_port); 317 dst.put(localURI); 318 return dst.data; 319 } 320 321 bool startsWith(const URL rhs) const { 322 if( m_schema != rhs.m_schema ) return false; 323 if( m_host != rhs.m_host ) return false; 324 // FIXME: also consider user, port, querystring, anchor etc 325 version (Have_vibe_core) 326 return this.path.bySegment.startsWith(rhs.path.bySegment); 327 else return this.path.startsWith(rhs.path); 328 } 329 330 URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 331 URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 332 void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 333 void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 334 335 /// Tests two URLs for equality using '=='. 336 bool opEquals(ref const URL rhs) const { 337 if( m_schema != rhs.m_schema ) return false; 338 if( m_host != rhs.m_host ) return false; 339 if( m_pathString != rhs.m_pathString ) return false; 340 return true; 341 } 342 /// ditto 343 bool opEquals(const URL other) const { return opEquals(other); } 344 345 int opCmp(ref const URL rhs) const { 346 if( m_schema != rhs.m_schema ) return m_schema.cmp(rhs.m_schema); 347 if( m_host != rhs.m_host ) return m_host.cmp(rhs.m_host); 348 if( m_pathString != rhs.m_pathString ) return cmp(m_pathString, rhs.m_pathString); 349 return true; 350 } 351 } 352 353 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath); 354 355 private bool isDoubleSlashSchema(string schema) 356 @safe nothrow @nogc { 357 switch (schema) { 358 case "ftp", "http", "https", "http+unix", "https+unix": 359 case "spdy", "sftp", "ws", "wss", "file", "redis", "tcp": 360 return true; 361 default: 362 return false; 363 } 364 } 365 366 unittest { // IPv6 367 auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc"; 368 auto url = URL.parse(urlstr); 369 assert(url.schema == "http", url.schema); 370 assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host); 371 assert(url.port == 8091); 372 assert(url.path == InetPath("/abc"), url.path.toString()); 373 assert(url.toString == urlstr); 374 375 url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc"; 376 urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc"; 377 assert(url.toString == urlstr); 378 } 379 380 381 unittest { 382 auto urlstr = "https://www.example.net/index.html"; 383 auto url = URL.parse(urlstr); 384 assert(url.schema == "https", url.schema); 385 assert(url.host == "www.example.net", url.host); 386 assert(url.path == InetPath("/index.html"), url.path.toString()); 387 assert(url.toString == urlstr); 388 389 urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor"; 390 url = URL.parse(urlstr); 391 assert(url.schema == "http", url.schema); 392 assert(url.username == "jo.doe", url.username); 393 assert(url.password == "password", url.password); 394 assert(url.port == 4711, to!string(url.port)); 395 assert(url.host == "sub.www.example.net", url.host); 396 assert(url.path.toString() == "/sub2/index.html", url.path.toString()); 397 assert(url.queryString == "query", url.queryString); 398 assert(url.anchor == "anchor", url.anchor); 399 assert(url.toString == urlstr); 400 } 401 402 unittest { // issue #1044 403 URL url = URL.parse("http://example.com/p?query#anchor"); 404 assert(url.schema == "http"); 405 assert(url.host == "example.com"); 406 assert(url.queryString == "query"); 407 assert(url.anchor == "anchor"); 408 assert(url.pathString == "/p"); 409 url.localURI = "/q"; 410 assert(url.schema == "http"); 411 assert(url.host == "example.com"); 412 assert(url.queryString == ""); 413 assert(url.anchor == ""); 414 assert(url.pathString == "/q"); 415 url.localURI = "/q?query"; 416 assert(url.schema == "http"); 417 assert(url.host == "example.com"); 418 assert(url.queryString == "query"); 419 assert(url.anchor == ""); 420 assert(url.pathString == "/q"); 421 url.localURI = "/q#anchor"; 422 assert(url.schema == "http"); 423 assert(url.host == "example.com"); 424 assert(url.queryString == ""); 425 assert(url.anchor == "anchor"); 426 assert(url.pathString == "/q"); 427 } 428 429 //websocket unittest 430 unittest { 431 URL url = URL("ws://127.0.0.1:8080/echo"); 432 assert(url.host == "127.0.0.1"); 433 assert(url.port == 8080); 434 assert(url.localURI == "/echo"); 435 } 436 437 unittest { 438 auto p = PosixPath("/foo bar/boo oom/"); 439 URL url = URL("http", "example.com", 0, p); // constructor test 440 assert(url.path == cast(InetPath)p); 441 url.path = p; 442 assert(url.path == cast(InetPath)p); // path assignement test 443 assert(url.pathString == "/foo%20bar/boo%20oom/"); 444 assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/"); 445 url.pathString = "/foo%20bar/boo%2foom/"; 446 assert(url.pathString == "/foo%20bar/boo%2foom/"); 447 assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/"); 448 } 449 450 unittest { 451 auto url = URL("http://example.com/some%2bpath"); 452 assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString()); 453 } 454 455 unittest { 456 assert(URL("file:///test").pathString == "/test"); 457 assert(URL("file:///test").path.toString() == "/test"); 458 assert(URL("file://test").pathString == "test"); 459 assert(URL("file://test").path.toString() == "test"); 460 assert(URL("file://./test").pathString == "./test"); 461 assert(URL("file://./test").path.toString() == "./test"); 462 } 463 464 unittest { // issue #1318 465 try { 466 URL("http://something/inval%id"); 467 assert(false, "Expected to throw an exception."); 468 } catch (Exception e) {} 469 } 470 471 unittest { 472 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix"); 473 assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix"); 474 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock"); 475 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == ""); 476 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json"); 477 auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json"); 478 assert(URL(url.toString()) == url); 479 } 480 481 unittest { 482 import vibe.data.serialization; 483 static assert(isStringSerializable!URL); 484 } 485 486 unittest { // issue #1732 487 auto url = URL("tcp://0.0.0.0:1234"); 488 url.port = 4321; 489 assert(url.toString == "tcp://0.0.0.0:4321", url.toString); 490 }