1 /** 2 URL parsing routines. 3 4 Copyright: © 2012-2017 Sönke Ludwig 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module vibe.inet.url; 9 10 public import vibe.core.path; 11 12 import vibe.textfilter.urlencode; 13 import vibe.utils.string; 14 15 import std.array; 16 import std.algorithm; 17 import std.conv; 18 import std.exception; 19 import std.string; 20 import std.traits : isInstanceOf; 21 import std.ascii : isAlpha, isASCII, toLower; 22 import std.uri: decode, encode; 23 24 import core.checkedint : addu; 25 26 27 /** Parses a user-provided URL with relaxed rules. 28 29 Unlike `URL.parse`, this allows the URL to use special characters as part of 30 the host name and path, automatically employing puny code or percent-encoding 31 to convert this to a valid URL. 32 33 Params: 34 url = String representation of the URL 35 default_schema = If `url` does not contain a schema name, the URL parser 36 may choose to use this schema instead. A browser might use "http" or 37 "https", for example. 38 */ 39 URL parseUserURL(string url, string default_schema) 40 { 41 return URL(url, false).normalized; 42 } 43 44 unittest { 45 // special characters in path 46 auto url = parseUserURL("http://example.com/hello-🌍", "foo"); 47 assert(url.pathString == "/hello-%F0%9F%8C%8D"); 48 url = parseUserURL("http://example.com/안녕하세요-세계", "foo"); 49 assert(url.pathString == "/%EC%95%88%EB%85%95%ED%95%98%EC%84%B8%EC%9A%94-%EC%84%B8%EA%B3%84"); 50 // special characters in host name 51 url = parseUserURL("http://hello-🌍.com/", "foo"); 52 assert(url.host == "xn--hello--8k34e.com"); 53 url = parseUserURL("http://hello-🌍.com:8080/", "foo"); 54 assert(url.host == "xn--hello--8k34e.com"); 55 url = parseUserURL("http://i-❤-이모티콘.io", "foo"); 56 assert(url.host == "xn--i---5r6aq903fubqabumj4g.io"); 57 url = parseUserURL("https://hello🌍.i-❤-이모티콘.com", "foo"); 58 assert(url.host == "xn--hello-oe93d.xn--i---5r6aq903fubqabumj4g.com"); 59 } 60 61 62 /** 63 Represents a URL decomposed into its components. 64 */ 65 struct URL { 66 @safe: 67 private { 68 string m_schema; 69 InetPath m_path; 70 string m_host; 71 ushort m_port; 72 string m_username; 73 string m_password; 74 string m_queryString; 75 string m_anchor; 76 } 77 78 /// Constructs a new URL object from its components. 79 this(string schema, string host, ushort port, InetPath path) pure nothrow 80 in { 81 assert(isValidSchema(schema), "Invalid URL schema name: " ~ schema); 82 assert(host.length == 0 || isValidHostName(host), "Invalid URL host name: " ~ host); 83 } 84 do { 85 m_schema = schema; 86 m_host = host; 87 m_port = port; 88 m_path = path; 89 } 90 /// ditto 91 this(string schema, InetPath path) pure nothrow 92 in { assert(isValidSchema(schema)); } 93 do { 94 this(schema, null, 0, path); 95 } 96 /// ditto 97 this(string schema, string host, ushort port, PosixPath path) pure nothrow 98 in { 99 assert(isValidSchema(schema)); 100 assert(host.length == 0 || isValidHostName(host)); 101 } 102 do { 103 InetPath ip; 104 try ip = cast(InetPath)path; 105 catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths 106 this(schema, host, port, ip); 107 } 108 /// ditto 109 this(string schema, PosixPath path) pure nothrow 110 in { assert(isValidSchema(schema)); } 111 do { 112 this(schema, null, 0, path); 113 } 114 /// ditto 115 this(string schema, string host, ushort port, WindowsPath path) pure nothrow 116 in { 117 assert(isValidSchema(schema)); 118 assert(host.length == 0 || isValidHostName(host)); 119 } 120 do { 121 InetPath ip; 122 try ip = cast(InetPath)path; 123 catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths 124 this(schema, host, port, ip); 125 } 126 /// ditto 127 this(string schema, WindowsPath path) pure nothrow 128 in { assert(isValidSchema(schema)); } 129 do { 130 this(schema, null, 0, path); 131 } 132 133 /** Constructs a "file:" URL from a native file system path. 134 135 Note that the path must be absolute. On Windows, both, paths starting 136 with a drive letter and UNC paths are supported. 137 */ 138 this(WindowsPath path) pure 139 { 140 import std.algorithm.iteration : map; 141 import std.range : chain, only, repeat; 142 143 enforce(path.absolute, "Only absolute paths can be converted to a URL."); 144 145 // treat UNC paths properly 146 if (path.startsWith(WindowsPath(`\\`))) { 147 static if (is(InetPath.Segment2)) { 148 auto segs = path.bySegment2; 149 } else { 150 auto segs = path.bySegment; 151 } 152 segs.popFront(); 153 segs.popFront(); 154 auto host = segs.front.name; 155 segs.popFront(); 156 157 InetPath ip; 158 static if (is(InetPath.Segment2)) { 159 ip = InetPath(only(InetPath.Segment2.fromTrustedString("", '/')) 160 .chain(segs.map!(s => cast(InetPath.Segment2)s))); 161 } else { 162 ip = InetPath(only(InetPath.Segment("", '/')) 163 .chain(segs.map!(s => cast(InetPath.Segment)s))); 164 } 165 166 this("file", host, 0, ip); 167 } else this("file", host, 0, cast(InetPath)path); 168 } 169 /// ditto 170 this(PosixPath path) pure 171 { 172 enforce(path.absolute, "Only absolute paths can be converted to a URL."); 173 174 this("file", null, 0, cast(InetPath)path); 175 } 176 177 /** Constructs a URL from its string representation. 178 179 TODO: additional validation required (e.g. valid host and user names and port) 180 */ 181 this(string url_string) 182 { 183 this(url_string, true); 184 } 185 186 private this(string url_string, bool encoded) 187 { 188 auto str = url_string; 189 enforce(str.length > 0, "Empty URL."); 190 if( str[0] != '/' ){ 191 auto idx = str.indexOf(':'); 192 enforce(idx > 0, "No schema in URL:"~str); 193 m_schema = str[0 .. idx]; 194 enforce(m_schema[0].isAlpha, 195 "Schema must start with an alphabetical char, found: " ~ 196 m_schema[0]); 197 str = str[idx+1 .. $]; 198 bool requires_host = false; 199 200 if (str.startsWith("//")) { 201 // proto://server/path style 202 requires_host = true; 203 str = str[2 .. $]; 204 } 205 206 auto si = str.indexOf('/'); 207 if( si < 0 ) si = str.length; 208 auto ai = str[0 .. si].indexOf('@'); 209 sizediff_t hs = 0; 210 if( ai >= 0 ){ 211 hs = ai+1; 212 auto ci = str[0 .. ai].indexOf(':'); 213 if( ci >= 0 ){ 214 m_username = str[0 .. ci]; 215 m_password = str[ci+1 .. ai]; 216 } else m_username = str[0 .. ai]; 217 enforce(m_username.length > 0, "Empty user name in URL."); 218 } 219 220 m_host = str[hs .. si]; 221 222 auto findPort ( string src ) 223 { 224 auto pi = src.indexOf(':'); 225 if(pi > 0) { 226 enforce(pi < src.length-1, "Empty port in URL."); 227 m_port = to!ushort(src[pi+1..$]); 228 } 229 return pi; 230 } 231 232 233 auto ip6 = m_host.indexOf('['); 234 if (ip6 == 0) { // [ must be first char 235 auto pe = m_host.indexOf(']'); 236 if (pe > 0) { 237 findPort(m_host[pe..$]); 238 m_host = m_host[1 .. pe]; 239 } 240 } 241 else { 242 auto pi = findPort(m_host); 243 if(pi > 0) { 244 m_host = m_host[0 .. pi]; 245 } 246 if (!encoded) 247 m_host = m_host.splitter('.').map!(punyEncode).join('.'); 248 } 249 250 enforce(!requires_host || m_schema == "file" || m_host.length > 0, 251 "Empty server name in URL."); 252 str = str[si .. $]; 253 } 254 255 this.localURI = (encoded) ? str : str.encode; 256 } 257 /// ditto 258 static URL parse(string url_string) 259 { 260 return URL(url_string); 261 } 262 /// ditto 263 static URL fromString(string url_string) 264 { 265 return URL(url_string); 266 } 267 268 /// The schema/protocol part of the URL 269 @property string schema() const nothrow { return m_schema; } 270 /// ditto 271 @property void schema(string v) { m_schema = v; } 272 273 /// The url encoded path part of the URL 274 @property string pathString() const nothrow { return m_path.toString; } 275 276 /// Set the path part of the URL. It should be properly encoded. 277 @property void pathString(string s) 278 { 279 enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'"); 280 m_path = InetPath(s); 281 } 282 283 /// The path part of the URL 284 @property InetPath path() const nothrow { return m_path; } 285 /// ditto 286 @property void path(InetPath p) 287 nothrow { 288 m_path = p; 289 } 290 /// ditto 291 @property void path(Path)(Path p) 292 if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath)) 293 { 294 m_path = cast(InetPath)p; 295 } 296 297 /// The host part of the URL (depends on the schema) 298 @property string host() const pure nothrow { return m_host; } 299 /// ditto 300 @property void host(string v) { m_host = v; } 301 302 /// The port part of the URL (optional) 303 @property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); } 304 /// ditto 305 @property port(ushort v) nothrow { m_port = v; } 306 307 /// Get the default port for the given schema or 0 308 static ushort defaultPort(string schema) 309 nothrow { 310 import core.atomic : atomicLoad; 311 import std.uni : toLower; 312 313 string lowerschema; 314 315 try 316 lowerschema = schema.toLower(); 317 catch (Exception e) 318 assert(false, e.msg); 319 320 if (auto set = atomicLoad(map_commonInternetSchemas)) 321 if (set.contains(lowerschema)) 322 return set.get(lowerschema); 323 324 return 0; 325 } 326 /// ditto 327 ushort defaultPort() 328 const nothrow { 329 return defaultPort(m_schema); 330 } 331 332 /// The user name part of the URL (optional) 333 @property string username() const nothrow { return m_username; } 334 /// ditto 335 @property void username(string v) { m_username = v; } 336 337 /// The password part of the URL (optional) 338 @property string password() const nothrow { return m_password; } 339 /// ditto 340 @property void password(string v) { m_password = v; } 341 342 /// The query string part of the URL (optional) 343 @property string queryString() const nothrow { return m_queryString; } 344 /// ditto 345 @property void queryString(string v) { m_queryString = v; } 346 347 /// The anchor part of the URL (optional) 348 @property string anchor() const nothrow { return m_anchor; } 349 350 /// The path part plus query string and anchor 351 @property string localURI() 352 const nothrow { 353 auto str = appender!string(); 354 str.put(m_path.toString); 355 if( queryString.length ) { 356 str.put("?"); 357 str.put(queryString); 358 } 359 if( anchor.length ) { 360 str.put("#"); 361 str.put(anchor); 362 } 363 return str.data; 364 } 365 /// ditto 366 @property void localURI(string str) 367 { 368 auto ai = str.indexOf('#'); 369 if( ai >= 0 ){ 370 m_anchor = str[ai+1 .. $]; 371 str = str[0 .. ai]; 372 } else m_anchor = null; 373 374 auto qi = str.indexOf('?'); 375 if( qi >= 0 ){ 376 m_queryString = str[qi+1 .. $]; 377 str = str[0 .. qi]; 378 } else m_queryString = null; 379 380 this.pathString = str; 381 } 382 383 /// The URL to the parent path with query string and anchor stripped. 384 @property URL parentURL() 385 const { 386 URL ret; 387 ret.schema = schema; 388 ret.host = host; 389 ret.port = port; 390 ret.username = username; 391 ret.password = password; 392 ret.path = path.parentPath; 393 return ret; 394 } 395 396 /// Converts this URL object to its string representation. 397 string toString() 398 const nothrow { 399 auto dst = appender!string(); 400 try this.toString(dst); 401 catch (Exception e) assert(false, e.msg); 402 return dst.data; 403 } 404 405 /// Ditto 406 void toString(OutputRange) (ref OutputRange dst) const { 407 import std.format; 408 dst.put(schema); 409 dst.put(":"); 410 if (isCommonInternetSchema(schema)) 411 dst.put("//"); 412 if (m_username.length || m_password.length) { 413 dst.put(username); 414 if (m_password.length) 415 { 416 dst.put(':'); 417 dst.put(password); 418 } 419 dst.put('@'); 420 } 421 422 import std.algorithm : canFind; 423 auto ipv6 = host.canFind(":"); 424 425 if ( ipv6 ) dst.put('['); 426 dst.put(host); 427 if ( ipv6 ) dst.put(']'); 428 429 if (m_port > 0) 430 formattedWrite(dst, ":%d", m_port); 431 432 dst.put(localURI); 433 } 434 435 /** Converts a "file" URL back to a native file system path. 436 */ 437 NativePath toNativePath() 438 const { 439 import std.algorithm.iteration : map; 440 import std.range : dropOne; 441 442 enforce(this.schema == "file", "Only file:// URLs can be converted to a native path."); 443 444 version (Windows) { 445 if (this.host.length) { 446 static if (is(NativePath.Segment2)) { 447 auto p = NativePath(this.path 448 .bySegment2 449 .dropOne 450 .map!(s => cast(WindowsPath.Segment2)s) 451 ); 452 } else { 453 auto p = NativePath(this.path 454 .bySegment 455 .dropOne 456 .map!(s => cast(WindowsPath.Segment)s) 457 ); 458 } 459 return NativePath.fromTrustedString(`\\`~this.host) ~ p; 460 } 461 } 462 463 return cast(NativePath)this.path; 464 } 465 466 /// Decode percent encoded triplets for unreserved or convert to uppercase 467 private string normalize_percent_encoding(scope const(char)[] input) 468 { 469 auto normalized = appender!string; 470 normalized.reserve(input.length); 471 472 for (size_t i = 0; i < input.length; i++) 473 { 474 const char c = input[i]; 475 if (c == '%') 476 { 477 if (input.length < i + 3) 478 assert(false, "Invalid percent encoding"); 479 480 char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16); 481 switch (conv) 482 { 483 case 'A': .. case 'Z': 484 case 'a': .. case 'z': 485 case '0': .. case '9': 486 case '-': case '.': case '_': case '~': 487 normalized ~= conv; // Decode unreserved 488 break; 489 default: 490 normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX 491 break; 492 } 493 494 i += 2; 495 } 496 else 497 normalized ~= c; 498 } 499 500 return normalized.data; 501 } 502 503 /** 504 * Normalize the content of this `URL` in place 505 * 506 * Normalization can be used to create a more consistent and human-friendly 507 * string representation of the `URL`. 508 * The list of transformations applied in the process of normalization is as follows: 509 - Converting schema and host to lowercase 510 - Removing port if it is the default port for schema 511 - Removing dot segments in path 512 - Converting percent-encoded triplets to uppercase 513 - Adding slash when path is empty 514 - Adding slash to path when path represents a directory 515 - Decoding percent encoded triplets for unreserved characters 516 A-Z a-z 0-9 - . _ ~ 517 518 Params: 519 isDirectory = Path of the URL represents a directory, if one is 520 not already present, a trailing slash will be appended when `true` 521 */ 522 void normalize(bool isDirectory = false) 523 { 524 import std.uni : toLower; 525 526 // Lowercase host and schema 527 this.m_schema = this.m_schema.toLower(); 528 this.m_host = this.m_host.toLower(); 529 530 // Remove default port 531 if (this.m_port == URL.defaultPort(this.m_schema)) 532 this.m_port = 0; 533 534 // Normalize percent encoding, decode unreserved or uppercase hex 535 this.m_queryString = normalize_percent_encoding(this.m_queryString); 536 this.m_anchor = normalize_percent_encoding(this.m_anchor); 537 538 // Normalize path (first remove dot segments then normalize path segments) 539 this.m_path = InetPath(this.m_path.normalized.bySegment2.map!( 540 n => InetPath.Segment2.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName)) 541 ).array); 542 543 // Add trailing slash to empty path 544 if (this.m_path.empty || isDirectory) 545 this.m_path.endsWithSlash = true; 546 } 547 548 /** Returns the normalized form of the URL. 549 550 See `normalize` for a full description. 551 */ 552 URL normalized() 553 const { 554 URL ret = this; 555 ret.normalize(); 556 return ret; 557 } 558 559 bool startsWith(const URL rhs) 560 const nothrow { 561 if( m_schema != rhs.m_schema ) return false; 562 if( m_host != rhs.m_host ) return false; 563 // FIXME: also consider user, port, querystring, anchor etc 564 static if (is(InetPath.Segment2)) 565 return this.path.bySegment2.startsWith(rhs.path.bySegment2); 566 else return this.path.bySegment.startsWith(rhs.path.bySegment); 567 } 568 569 URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 570 URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 571 void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 572 void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 573 static if (is(InetPath.Segment2)) { 574 URL opBinary(string OP, Path)(Path.Segment2 rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 575 void opOpAssign(string OP, Path)(Path.Segment2 rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 576 } 577 578 /// Tests two URLs for equality using '=='. 579 bool opEquals(ref const URL rhs) 580 const nothrow { 581 if (m_schema != rhs.m_schema) return false; 582 if (m_host != rhs.m_host) return false; 583 if (m_path != rhs.m_path) return false; 584 if (m_port != rhs.m_port) return false; 585 return true; 586 } 587 /// ditto 588 bool opEquals(const URL other) const nothrow { return opEquals(other); } 589 590 int opCmp(ref const URL rhs) const nothrow { 591 if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema); 592 if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host); 593 if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString); 594 return true; 595 } 596 } 597 598 bool isValidSchema(string schema) 599 @safe pure nothrow { 600 if (schema.length < 1) return false; 601 602 foreach (char ch; schema) { 603 switch (ch) { 604 default: return false; 605 case 'a': .. case 'z': break; 606 case 'A': .. case 'Z': break; 607 case '0': .. case '9': break; 608 case '+', '.', '-': break; 609 } 610 } 611 612 return true; 613 } 614 615 unittest { 616 assert(isValidSchema("http+ssh")); 617 assert(isValidSchema("http")); 618 assert(!isValidSchema("http/ssh")); 619 assert(isValidSchema("HTtp")); 620 } 621 622 623 bool isValidHostName(string name) 624 @safe pure nothrow { 625 import std.algorithm.iteration : splitter; 626 import std.string : representation; 627 628 // According to RFC 1034 629 if (name.length < 1) return false; 630 if (name.length > 255) return false; 631 foreach (seg; name.representation.splitter('.')) { 632 if (seg.length < 1) return false; 633 if (seg.length > 63) return false; 634 if (seg[0] == '-') return false; 635 636 foreach (char ch; seg) { 637 switch (ch) { 638 default: return false; 639 case 'a': .. case 'z': break; 640 case 'A': .. case 'Z': break; 641 case '0': .. case '9': break; 642 case '-': break; 643 } 644 } 645 } 646 return true; 647 } 648 649 unittest { 650 assert(isValidHostName("foo")); 651 assert(isValidHostName("foo-")); 652 assert(isValidHostName("foo.bar")); 653 assert(isValidHostName("foo.bar-baz")); 654 assert(isValidHostName("foo1")); 655 assert(!isValidHostName("-foo")); 656 } 657 658 659 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath); 660 661 private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas; 662 663 shared static this() { 664 auto initial_schemas = new SchemaDefaultPortMap; 665 initial_schemas.add("file", 0); 666 initial_schemas.add("tcp", 0); 667 initial_schemas.add("ftp", 21); 668 initial_schemas.add("sftp", 22); 669 initial_schemas.add("http", 80); 670 initial_schemas.add("https", 443); 671 initial_schemas.add("http+unix", 80); 672 initial_schemas.add("https+unix", 443); 673 initial_schemas.add("spdy", 443); 674 initial_schemas.add("ws", 80); 675 initial_schemas.add("wss", 443); 676 initial_schemas.add("redis", 6379); 677 initial_schemas.add("rtsp", 554); 678 initial_schemas.add("rtsps", 322); 679 680 map_commonInternetSchemas = cast(immutable)initial_schemas; 681 } 682 683 deprecated("Use the overload that accepts a `ushort port` as second argument") 684 void registerCommonInternetSchema(string schema) 685 { 686 registerCommonInternetSchema(schema, 0); 687 } 688 689 /** Adds the name of a schema to be treated as double-slash style. 690 691 Params: 692 schema = Name of the schema 693 port = Default port for the schema 694 695 See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1 696 */ 697 void registerCommonInternetSchema(string schema, ushort port) 698 @trusted nothrow { 699 import core.atomic : atomicLoad, cas; 700 import std.uni : toLower; 701 702 string lowerschema; 703 try { 704 lowerschema = schema.toLower(); 705 } catch (Exception e) { 706 assert(false, e.msg); 707 } 708 709 assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported"); 710 711 while (true) { 712 auto olds = atomicLoad(map_commonInternetSchemas); 713 auto news = olds ? olds.dup : new SchemaDefaultPortMap; 714 news.add(lowerschema, port); 715 static if (__VERSION__ < 2094) { 716 // work around bogus shared violation error on earlier versions of Druntime 717 if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news)) 718 break; 719 } else { 720 if (cas(&map_commonInternetSchemas, olds, cast(immutable)news)) 721 break; 722 } 723 } 724 } 725 726 727 /** Determines whether an URL schema is double-slash based. 728 729 Double slash based schemas are of the form `schema://[host]/<path>` 730 and are parsed differently compared to generic schemas, which are simply 731 parsed as `schema:<path>`. 732 733 Built-in recognized double-slash schemas: ftp, http, https, 734 http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp, 735 rtsp, rtsps 736 737 See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1 738 */ 739 bool isCommonInternetSchema(string schema) 740 @safe nothrow @nogc { 741 import core.atomic : atomicLoad; 742 char[128] buffer; 743 744 if (schema.length >= 128) return false; 745 746 foreach (ix, char c; schema) 747 { 748 if (!isASCII(c)) return false; 749 buffer[ix] = toLower(c); 750 } 751 752 scope lowerschema = buffer[0 .. schema.length]; 753 754 return () @trusted { 755 auto set = atomicLoad(map_commonInternetSchemas); 756 return set ? set.contains(cast(string) lowerschema) : false; 757 } (); 758 } 759 760 unittest { 761 assert(isCommonInternetSchema("http")); 762 assert(isCommonInternetSchema("HTtP")); 763 assert(URL.defaultPort("http") == 80); 764 assert(!isCommonInternetSchema("foobar")); 765 registerCommonInternetSchema("fooBar", 2522); 766 assert(isCommonInternetSchema("foobar")); 767 assert(isCommonInternetSchema("fOObAR")); 768 assert(URL.defaultPort("foobar") == 2522); 769 assert(URL.defaultPort("fOObar") == 2522); 770 771 assert(URL.defaultPort("unregistered") == 0); 772 } 773 774 775 private struct SchemaDefaultPortMap { 776 ushort[string] m_data; 777 778 void add(string str, ushort port) @safe nothrow { m_data[str] = port; } 779 bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); } 780 ushort get(string str) const @safe nothrow { return m_data[str]; } 781 SchemaDefaultPortMap* dup() const @safe nothrow { 782 auto ret = new SchemaDefaultPortMap; 783 foreach (s; m_data.byKeyValue) ret.add(s.key, s.value); 784 return ret; 785 } 786 } 787 788 // Puny encoding 789 private { 790 /** Bootstring parameters for Punycode 791 These parameters are designed for Unicode 792 793 See also: RFC 3492 Section 5 794 */ 795 enum uint base = 36; 796 enum uint tmin = 1; 797 enum uint tmax = 26; 798 enum uint skew = 38; 799 enum uint damp = 700; 800 enum uint initial_bias = 72; 801 enum uint initial_n = 128; 802 803 /* Bias adaptation 804 805 See also: RFC 3492 Section 6.1 806 */ 807 uint punyAdapt (uint pdelta, int numpoints, bool firsttime) 808 @safe @nogc nothrow pure { 809 uint delta = firsttime ? pdelta / damp : pdelta / 2; 810 delta += delta / numpoints; 811 uint k = 0; 812 813 while (delta > ((base - tmin) * tmax) / 2) 814 { 815 delta /= (base - tmin); 816 k += base; 817 } 818 819 return k + (((base - tmin + 1) * delta) / (delta + skew)); 820 } 821 822 /* Converts puny digit-codes to code point 823 824 See also: RFC 3492 Section 5 825 */ 826 dchar punyDigitToCP (uint digit) 827 @safe @nogc nothrow pure { 828 return cast(dchar) (digit + 22 + 75 * (digit < 26)); 829 } 830 831 /* Encodes `input` with puny encoding 832 833 If input is all characters below `initial_n` 834 input is returned as is. 835 836 See also: RFC 3492 Section 6.3 837 */ 838 string punyEncode (in string input) 839 @safe { 840 uint n = initial_n; 841 uint delta = 0; 842 uint bias = initial_bias; 843 uint h; 844 uint b; 845 dchar m = dchar.max; // minchar 846 bool delta_overflow; 847 848 uint input_len = 0; 849 auto output = appender!string(); 850 851 output.put("xn--"); 852 853 foreach (dchar cp; input) 854 { 855 if (cp <= initial_n) 856 { 857 output.put(cast(char) cp); 858 h += 1; 859 } 860 // Count length of input as code points, `input.length` counts bytes 861 input_len += 1; 862 } 863 864 b = h; 865 if (b == input_len) 866 return input; // No need to puny encode 867 868 if (b > 0) 869 output.put('-'); 870 871 while (h < input_len) 872 { 873 m = dchar.max; 874 foreach (dchar cp; input) 875 { 876 if (n <= cp && cp < m) 877 m = cp; 878 } 879 880 assert(m != dchar.max, "Punyencoding failed, cannot find code point"); 881 882 delta = addu(delta, ((m - n) * (h + 1)), delta_overflow); 883 assert(!delta_overflow, "Punyencoding failed, delta overflow"); 884 885 n = m; 886 887 foreach (dchar cp; input) 888 { 889 if (cp < n) 890 delta += 1; 891 892 if (cp == n) 893 { 894 uint q = delta; 895 uint k = base; 896 897 while (true) 898 { 899 uint t; 900 if (k <= bias /* + tmin */) 901 t = tmin; 902 else if (k >= bias + tmax) 903 t = tmax; 904 else 905 t = k - bias; 906 907 if (q < t) break; 908 909 output.put(punyDigitToCP(t + ((q - t) % (base - t)))); 910 q = (q - t) / (base - t); 911 k += base; 912 } 913 output.put(punyDigitToCP(q)); 914 bias = punyAdapt(delta, h + 1, h == b); 915 delta = 0; 916 h += 1; 917 } 918 } 919 delta += 1; 920 n += 1; 921 } 922 923 return output.data; 924 } 925 } 926 927 unittest { // IPv6 928 auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc"; 929 auto url = URL.parse(urlstr); 930 assert(url.schema == "http", url.schema); 931 assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host); 932 assert(url.port == 8091); 933 assert(url.path == InetPath("/abc"), url.path.toString()); 934 assert(url.toString == urlstr); 935 936 url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc"; 937 urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc"; 938 assert(url.toString == urlstr); 939 } 940 941 942 unittest { 943 auto urlstr = "https://www.example.net/index.html"; 944 auto url = URL.parse(urlstr); 945 assert(url.schema == "https", url.schema); 946 assert(url.host == "www.example.net", url.host); 947 assert(url.path == InetPath("/index.html"), url.path.toString()); 948 assert(url.port == 443); 949 assert(url.toString == urlstr); 950 951 urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor"; 952 url = URL.parse(urlstr); 953 assert(url.schema == "http", url.schema); 954 assert(url.username == "jo.doe", url.username); 955 assert(url.password == "password", url.password); 956 assert(url.port == 4711, to!string(url.port)); 957 assert(url.host == "sub.www.example.net", url.host); 958 assert(url.path.toString() == "/sub2/index.html", url.path.toString()); 959 assert(url.queryString == "query", url.queryString); 960 assert(url.anchor == "anchor", url.anchor); 961 assert(url.toString == urlstr); 962 } 963 964 unittest { // issue #1044 965 URL url = URL.parse("http://example.com/p?query#anchor"); 966 assert(url.schema == "http"); 967 assert(url.host == "example.com"); 968 assert(url.port == 80); 969 assert(url.queryString == "query"); 970 assert(url.anchor == "anchor"); 971 assert(url.pathString == "/p"); 972 url.localURI = "/q"; 973 assert(url.schema == "http"); 974 assert(url.host == "example.com"); 975 assert(url.queryString == ""); 976 assert(url.anchor == ""); 977 assert(url.pathString == "/q"); 978 url.localURI = "/q?query"; 979 assert(url.schema == "http"); 980 assert(url.host == "example.com"); 981 assert(url.queryString == "query"); 982 assert(url.anchor == ""); 983 assert(url.pathString == "/q"); 984 url.localURI = "/q#anchor"; 985 assert(url.schema == "http"); 986 assert(url.host == "example.com"); 987 assert(url.queryString == ""); 988 assert(url.anchor == "anchor"); 989 assert(url.pathString == "/q"); 990 } 991 992 //websocket unittest 993 unittest { 994 URL url = URL("ws://127.0.0.1:8080/echo"); 995 assert(url.host == "127.0.0.1"); 996 assert(url.port == 8080); 997 assert(url.localURI == "/echo"); 998 } 999 1000 //rtsp unittest 1001 unittest { 1002 URL url = URL("rtsp://127.0.0.1:554/echo"); 1003 assert(url.host == "127.0.0.1"); 1004 assert(url.port == 554); 1005 assert(url.localURI == "/echo"); 1006 } 1007 1008 unittest { 1009 auto p = PosixPath("/foo bar/boo oom/"); 1010 URL url = URL("http", "example.com", 0, p); // constructor test 1011 assert(url.path == cast(InetPath)p); 1012 url.path = p; 1013 assert(url.path == cast(InetPath)p); // path assignement test 1014 assert(url.pathString == "/foo%20bar/boo%20oom/"); 1015 assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/"); 1016 url.pathString = "/foo%20bar/boo%2foom/"; 1017 assert(url.pathString == "/foo%20bar/boo%2foom/"); 1018 assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/"); 1019 } 1020 1021 unittest { 1022 URL url = URL("http://user:password@example.com"); 1023 assert(url.toString() == "http://user:password@example.com"); 1024 1025 url = URL("http://user@example.com"); 1026 assert(url.toString() == "http://user@example.com"); 1027 } 1028 1029 unittest { 1030 auto url = URL("http://example.com/some%2bpath"); 1031 assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString()); 1032 } 1033 1034 unittest { 1035 assert(URL("file:///test").pathString == "/test"); 1036 assert(URL("file:///test").port == 0); 1037 assert(URL("file:///test").path.toString() == "/test"); 1038 assert(URL("file://test").host == "test"); 1039 assert(URL("file://test").pathString() == ""); 1040 assert(URL("file://./test").host == "."); 1041 assert(URL("file://./test").pathString == "/test"); 1042 assert(URL("file://./test").path.toString() == "/test"); 1043 } 1044 1045 unittest { // issue #1318 1046 try { 1047 URL("http://something/inval%id"); 1048 assert(false, "Expected to throw an exception."); 1049 } catch (Exception e) {} 1050 } 1051 1052 unittest { 1053 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix"); 1054 assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix"); 1055 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock"); 1056 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == ""); 1057 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json"); 1058 auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json"); 1059 assert(URL(url.toString()) == url); 1060 } 1061 1062 unittest { 1063 import vibe.data.serialization; 1064 static assert(isStringSerializable!URL); 1065 } 1066 1067 unittest { // issue #1732 1068 auto url = URL("tcp://0.0.0.0:1234"); 1069 url.port = 4321; 1070 assert(url.toString == "tcp://0.0.0.0:4321", url.toString); 1071 } 1072 1073 unittest { // host name role in file:// URLs 1074 auto url = URL.parse("file:///foo/bar"); 1075 assert(url.host == ""); 1076 assert(url.path == InetPath("/foo/bar")); 1077 assert(url.toString() == "file:///foo/bar"); 1078 1079 url = URL.parse("file://foo/bar/baz"); 1080 assert(url.host == "foo"); 1081 assert(url.path == InetPath("/bar/baz")); 1082 assert(url.toString() == "file://foo/bar/baz"); 1083 } 1084 1085 unittest { // native path <-> URL conversion 1086 import std.exception : assertThrown; 1087 1088 auto url = URL(NativePath("/foo/bar")); 1089 assert(url.schema == "file"); 1090 assert(url.host == ""); 1091 assert(url.path == InetPath("/foo/bar")); 1092 assert(url.toNativePath == NativePath("/foo/bar")); 1093 1094 assertThrown(URL("http://example.org/").toNativePath); 1095 assertThrown(URL(NativePath("foo/bar"))); 1096 } 1097 1098 unittest { // URL Normalization 1099 auto url = URL.parse("http://example.com/foo%2a"); 1100 assert(url.normalized.toString() == "http://example.com/foo%2A"); 1101 1102 url = URL.parse("HTTP://User@Example.COM/Foo"); 1103 assert(url.normalized.toString() == "http://User@example.com/Foo"); 1104 1105 url = URL.parse("http://example.com/%7Efoo"); 1106 assert(url.normalized.toString() == "http://example.com/~foo"); 1107 1108 url = URL.parse("http://example.com/foo/./bar/baz/../qux"); 1109 assert(url.normalized.toString() == "http://example.com/foo/bar/qux"); 1110 1111 url = URL.parse("http://example.com"); 1112 assert(url.normalized.toString() == "http://example.com/"); 1113 1114 url = URL.parse("http://example.com:80/"); 1115 assert(url.normalized.toString() == "http://example.com/"); 1116 1117 url = URL.parse("hTTPs://examPLe.COM:443/my/path"); 1118 assert(url.normalized.toString() == "https://example.com/my/path"); 1119 1120 url = URL.parse("http://example.com/foo"); 1121 url.normalize(true); 1122 assert(url.toString() == "http://example.com/foo/"); 1123 } 1124 1125 version (Windows) unittest { // Windows drive letter paths 1126 auto url = URL(WindowsPath(`C:\foo`)); 1127 assert(url.schema == "file"); 1128 assert(url.host == ""); 1129 assert(url.path == InetPath("/C:/foo")); 1130 auto p = url.toNativePath; 1131 p.normalize(); 1132 assert(p == WindowsPath(`C:\foo`)); 1133 } 1134 1135 version (Windows) unittest { // UNC paths 1136 auto url = URL(WindowsPath(`\\server\share\path`)); 1137 assert(url.schema == "file"); 1138 assert(url.host == "server"); 1139 assert(url.path == InetPath("/share/path")); 1140 1141 auto p = url.toNativePath; 1142 p.normalize(); // convert slash to backslash if necessary 1143 assert(p == WindowsPath(`\\server\share\path`)); 1144 }