1 /** 2 URL parsing routines. 3 4 Copyright: © 2012-2017 Sönke Ludwig 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module vibe.inet.url; 9 10 public import vibe.core.path; 11 12 import vibe.textfilter.urlencode; 13 import vibe.utils.string; 14 15 import std.array; 16 import std.algorithm; 17 import std.conv; 18 import std.exception; 19 import std.string; 20 import std.traits : isInstanceOf; 21 import std.ascii : isAlpha, isASCII, toLower; 22 import std.uri: decode, encode; 23 24 import core.checkedint : addu; 25 26 27 /** Parses a user-provided URL with relaxed rules. 28 29 Unlike `URL.parse`, this allows the URL to use special characters as part of 30 the host name and path, automatically employing puny code or percent-encoding 31 to convert this to a valid URL. 32 33 Params: 34 url = String representation of the URL 35 default_schema = If `url` does not contain a schema name, the URL parser 36 may choose to use this schema instead. A browser might use "http" or 37 "https", for example. 38 */ 39 URL parseUserURL(string url, string default_schema) 40 { 41 if (default_schema.length && !url.startsWith("/") && !url.canFind("://")) 42 url = default_schema ~ "://" ~ url; 43 44 return URL(url, false).normalized; 45 } 46 47 unittest { 48 // special characters in path 49 auto url = parseUserURL("http://example.com/hello-🌍", "foo"); 50 assert(url.pathString == "/hello-%F0%9F%8C%8D"); 51 url = parseUserURL("http://example.com/안녕하세요-세계", "foo"); 52 assert(url.pathString == "/%EC%95%88%EB%85%95%ED%95%98%EC%84%B8%EC%9A%94-%EC%84%B8%EA%B3%84"); 53 // special characters in host name 54 url = parseUserURL("http://hello-🌍.com/", "foo"); 55 assert(url.host == "xn--hello--8k34e.com"); 56 url = parseUserURL("http://hello-🌍.com:8080/", "foo"); 57 assert(url.host == "xn--hello--8k34e.com"); 58 url = parseUserURL("http://i-❤-이모티콘.io", "foo"); 59 assert(url.host == "xn--i---5r6aq903fubqabumj4g.io"); 60 url = parseUserURL("https://hello🌍.i-❤-이모티콘.com", "foo"); 61 assert(url.host == "xn--hello-oe93d.xn--i---5r6aq903fubqabumj4g.com"); 62 // default schema addition 63 assert(parseUserURL("example.com/foo/bar", "sftp") == URL("sftp://example.com/foo/bar")); 64 assert(parseUserURL("example.com:1234", "https") == URL("https://example.com:1234/")); 65 } 66 67 68 /** 69 Represents a URL decomposed into its components. 70 */ 71 struct URL { 72 @safe: 73 private { 74 string m_schema; 75 InetPath m_path; 76 string m_host; 77 ushort m_port; 78 string m_username; 79 string m_password; 80 string m_queryString; 81 string m_anchor; 82 } 83 84 /// Constructs a new URL object from its components. 85 this(string schema, string host, ushort port, InetPath path) pure nothrow 86 in { 87 assert(isValidSchema(schema), "Invalid URL schema name: " ~ schema); 88 assert(host.length == 0 || isValidHostName(host), "Invalid URL host name: " ~ host); 89 } 90 do { 91 m_schema = schema; 92 m_host = host; 93 m_port = port; 94 m_path = path; 95 } 96 /// ditto 97 this(string schema, InetPath path) pure nothrow 98 in { assert(isValidSchema(schema)); } 99 do { 100 this(schema, null, 0, path); 101 } 102 /// ditto 103 this(string schema, string host, ushort port, PosixPath path) pure nothrow 104 in { 105 assert(isValidSchema(schema)); 106 assert(host.length == 0 || isValidHostName(host)); 107 } 108 do { 109 InetPath ip; 110 try ip = cast(InetPath)path; 111 catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths 112 this(schema, host, port, ip); 113 } 114 /// ditto 115 this(string schema, PosixPath path) pure nothrow 116 in { assert(isValidSchema(schema)); } 117 do { 118 this(schema, null, 0, path); 119 } 120 /// ditto 121 this(string schema, string host, ushort port, WindowsPath path) pure nothrow 122 in { 123 assert(isValidSchema(schema)); 124 assert(host.length == 0 || isValidHostName(host)); 125 } 126 do { 127 InetPath ip; 128 try ip = cast(InetPath)path; 129 catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths 130 this(schema, host, port, ip); 131 } 132 /// ditto 133 this(string schema, WindowsPath path) pure nothrow 134 in { assert(isValidSchema(schema)); } 135 do { 136 this(schema, null, 0, path); 137 } 138 139 /** Constructs a "file:" URL from a native file system path. 140 141 Note that the path must be absolute. On Windows, both, paths starting 142 with a drive letter and UNC paths are supported. 143 */ 144 this(WindowsPath path) pure 145 { 146 import std.algorithm.iteration : map; 147 import std.range : chain, only, repeat; 148 149 enforce(path.absolute, "Only absolute paths can be converted to a URL."); 150 151 // treat UNC paths properly 152 if (path.startsWith(WindowsPath(`\\`))) { 153 static if (is(InetPath.Segment2)) { 154 auto segs = path.bySegment2; 155 } else { 156 auto segs = path.bySegment; 157 } 158 segs.popFront(); 159 segs.popFront(); 160 auto host = segs.front.name; 161 segs.popFront(); 162 163 InetPath ip; 164 static if (is(InetPath.Segment2)) { 165 ip = InetPath(only(InetPath.Segment2.fromTrustedString("", '/')) 166 .chain(segs.map!(s => cast(InetPath.Segment2)s))); 167 } else { 168 ip = InetPath(only(InetPath.Segment("", '/')) 169 .chain(segs.map!(s => cast(InetPath.Segment)s))); 170 } 171 172 this("file", host, 0, ip); 173 } else this("file", host, 0, cast(InetPath)path); 174 } 175 /// ditto 176 this(PosixPath path) pure 177 { 178 enforce(path.absolute, "Only absolute paths can be converted to a URL."); 179 180 this("file", null, 0, cast(InetPath)path); 181 } 182 183 /** Constructs a URL from its string representation. 184 185 TODO: additional validation required (e.g. valid host and user names and port) 186 */ 187 this(string url_string) 188 { 189 this(url_string, true); 190 } 191 192 private this(string url_string, bool encoded) 193 { 194 auto str = url_string; 195 enforce(str.length > 0, "Empty URL."); 196 if( str[0] != '/' ){ 197 auto idx = str.indexOf(':'); 198 enforce(idx > 0, "No schema in URL:"~str); 199 m_schema = str[0 .. idx]; 200 enforce(m_schema[0].isAlpha, 201 "Schema must start with an alphabetical char, found: " ~ 202 m_schema[0]); 203 str = str[idx+1 .. $]; 204 bool requires_host = false; 205 206 if (str.startsWith("//")) { 207 // proto://server/path style 208 requires_host = true; 209 str = str[2 .. $]; 210 } 211 212 auto si = str.indexOf('/'); 213 if( si < 0 ) si = str.length; 214 auto ai = str[0 .. si].indexOf('@'); 215 sizediff_t hs = 0; 216 if( ai >= 0 ){ 217 hs = ai+1; 218 auto ci = str[0 .. ai].indexOf(':'); 219 if( ci >= 0 ){ 220 m_username = str[0 .. ci]; 221 m_password = str[ci+1 .. ai]; 222 } else m_username = str[0 .. ai]; 223 enforce(m_username.length > 0, "Empty user name in URL."); 224 } 225 226 m_host = str[hs .. si]; 227 228 auto findPort ( string src ) 229 { 230 auto pi = src.indexOf(':'); 231 if(pi > 0) { 232 enforce(pi < src.length-1, "Empty port in URL."); 233 m_port = to!ushort(src[pi+1..$]); 234 } 235 return pi; 236 } 237 238 239 auto ip6 = m_host.indexOf('['); 240 if (ip6 == 0) { // [ must be first char 241 auto pe = m_host.indexOf(']'); 242 if (pe > 0) { 243 findPort(m_host[pe..$]); 244 m_host = m_host[1 .. pe]; 245 } 246 } 247 else { 248 auto pi = findPort(m_host); 249 if(pi > 0) { 250 m_host = m_host[0 .. pi]; 251 } 252 if (!encoded) 253 m_host = m_host.splitter('.').map!(punyEncode).join('.'); 254 } 255 256 enforce(!requires_host || m_schema == "file" || m_host.length > 0, 257 "Empty server name in URL."); 258 str = str[si .. $]; 259 } 260 261 this.localURI = (encoded) ? str : str.encode; 262 } 263 /// ditto 264 static URL parse(string url_string) 265 { 266 return URL(url_string); 267 } 268 /// ditto 269 static URL fromString(string url_string) 270 { 271 return URL(url_string); 272 } 273 274 /// The schema/protocol part of the URL 275 @property string schema() const nothrow { return m_schema; } 276 /// ditto 277 @property void schema(string v) { m_schema = v; } 278 279 /// The url encoded path part of the URL 280 @property string pathString() const nothrow { return m_path.toString; } 281 282 /// Set the path part of the URL. It should be properly encoded. 283 @property void pathString(string s) 284 { 285 enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'"); 286 m_path = InetPath(s); 287 } 288 289 /// The path part of the URL 290 @property InetPath path() const nothrow { return m_path; } 291 /// ditto 292 @property void path(InetPath p) 293 nothrow { 294 m_path = p; 295 } 296 /// ditto 297 @property void path(Path)(Path p) 298 if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath)) 299 { 300 m_path = cast(InetPath)p; 301 } 302 303 /// The host part of the URL (depends on the schema) 304 @property string host() const pure nothrow { return m_host; } 305 /// ditto 306 @property void host(string v) { m_host = v; } 307 308 /// The port part of the URL (optional) 309 @property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); } 310 /// ditto 311 @property port(ushort v) nothrow { m_port = v; } 312 313 /// Get the default port for the given schema or 0 314 static ushort defaultPort(string schema) 315 nothrow { 316 import core.atomic : atomicLoad; 317 import std.uni : toLower; 318 319 string lowerschema; 320 321 try 322 lowerschema = schema.toLower(); 323 catch (Exception e) 324 assert(false, e.msg); 325 326 if (auto set = atomicLoad(map_commonInternetSchemas)) 327 if (set.contains(lowerschema)) 328 return set.get(lowerschema); 329 330 return 0; 331 } 332 /// ditto 333 ushort defaultPort() 334 const nothrow { 335 return defaultPort(m_schema); 336 } 337 338 /// The user name part of the URL (optional) 339 @property string username() const nothrow { return m_username; } 340 /// ditto 341 @property void username(string v) { m_username = v; } 342 343 /// The password part of the URL (optional) 344 @property string password() const nothrow { return m_password; } 345 /// ditto 346 @property void password(string v) { m_password = v; } 347 348 /// The query string part of the URL (optional) 349 @property string queryString() const nothrow { return m_queryString; } 350 /// ditto 351 @property void queryString(string v) { m_queryString = v; } 352 353 /// The anchor part of the URL (optional) 354 @property string anchor() const nothrow { return m_anchor; } 355 356 /// The path part plus query string and anchor 357 @property string localURI() 358 const nothrow { 359 auto str = appender!string(); 360 str.put(m_path.toString); 361 if( queryString.length ) { 362 str.put("?"); 363 str.put(queryString); 364 } 365 if( anchor.length ) { 366 str.put("#"); 367 str.put(anchor); 368 } 369 return str.data; 370 } 371 /// ditto 372 @property void localURI(string str) 373 { 374 auto ai = str.indexOf('#'); 375 if( ai >= 0 ){ 376 m_anchor = str[ai+1 .. $]; 377 str = str[0 .. ai]; 378 } else m_anchor = null; 379 380 auto qi = str.indexOf('?'); 381 if( qi >= 0 ){ 382 m_queryString = str[qi+1 .. $]; 383 str = str[0 .. qi]; 384 } else m_queryString = null; 385 386 this.pathString = str; 387 } 388 389 /// The URL to the parent path with query string and anchor stripped. 390 @property URL parentURL() 391 const { 392 URL ret; 393 ret.schema = schema; 394 ret.host = host; 395 ret.port = port; 396 ret.username = username; 397 ret.password = password; 398 ret.path = path.parentPath; 399 return ret; 400 } 401 402 /// Converts this URL object to its string representation. 403 string toString() 404 const nothrow { 405 auto dst = appender!string(); 406 try this.toString(dst); 407 catch (Exception e) assert(false, e.msg); 408 return dst.data; 409 } 410 411 /// Ditto 412 void toString(OutputRange) (ref OutputRange dst) const { 413 import std.format; 414 dst.put(schema); 415 dst.put(":"); 416 if (isCommonInternetSchema(schema)) 417 dst.put("//"); 418 if (m_username.length || m_password.length) { 419 dst.put(username); 420 if (m_password.length) 421 { 422 dst.put(':'); 423 dst.put(password); 424 } 425 dst.put('@'); 426 } 427 428 import std.algorithm : canFind; 429 auto ipv6 = host.canFind(":"); 430 431 if ( ipv6 ) dst.put('['); 432 dst.put(host); 433 if ( ipv6 ) dst.put(']'); 434 435 if (m_port > 0) 436 formattedWrite(dst, ":%d", m_port); 437 438 dst.put(localURI); 439 } 440 441 /** Converts a "file" URL back to a native file system path. 442 */ 443 NativePath toNativePath() 444 const { 445 import std.algorithm.iteration : map; 446 import std.range : dropOne; 447 448 enforce(this.schema == "file", "Only file:// URLs can be converted to a native path."); 449 450 version (Windows) { 451 if (this.host.length) { 452 static if (is(NativePath.Segment2)) { 453 auto p = NativePath(this.path 454 .bySegment2 455 .dropOne 456 .map!(s => cast(WindowsPath.Segment2)s) 457 ); 458 } else { 459 auto p = NativePath(this.path 460 .bySegment 461 .dropOne 462 .map!(s => cast(WindowsPath.Segment)s) 463 ); 464 } 465 return NativePath.fromTrustedString(`\\`~this.host) ~ p; 466 } 467 } 468 469 return cast(NativePath)this.path; 470 } 471 472 /// Decode percent encoded triplets for unreserved or convert to uppercase 473 private string normalize_percent_encoding(scope const(char)[] input) 474 { 475 auto normalized = appender!string; 476 normalized.reserve(input.length); 477 478 for (size_t i = 0; i < input.length; i++) 479 { 480 const char c = input[i]; 481 if (c == '%') 482 { 483 if (input.length < i + 3) 484 assert(false, "Invalid percent encoding"); 485 486 char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16); 487 switch (conv) 488 { 489 case 'A': .. case 'Z': 490 case 'a': .. case 'z': 491 case '0': .. case '9': 492 case '-': case '.': case '_': case '~': 493 normalized ~= conv; // Decode unreserved 494 break; 495 default: 496 normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX 497 break; 498 } 499 500 i += 2; 501 } 502 else 503 normalized ~= c; 504 } 505 506 return normalized.data; 507 } 508 509 /** 510 * Normalize the content of this `URL` in place 511 * 512 * Normalization can be used to create a more consistent and human-friendly 513 * string representation of the `URL`. 514 * The list of transformations applied in the process of normalization is as follows: 515 - Converting schema and host to lowercase 516 - Removing port if it is the default port for schema 517 - Removing dot segments in path 518 - Converting percent-encoded triplets to uppercase 519 - Adding slash when path is empty 520 - Adding slash to path when path represents a directory 521 - Decoding percent encoded triplets for unreserved characters 522 A-Z a-z 0-9 - . _ ~ 523 524 Params: 525 isDirectory = Path of the URL represents a directory, if one is 526 not already present, a trailing slash will be appended when `true` 527 */ 528 void normalize(bool isDirectory = false) 529 { 530 import std.uni : toLower; 531 532 // Lowercase host and schema 533 this.m_schema = this.m_schema.toLower(); 534 this.m_host = this.m_host.toLower(); 535 536 // Remove default port 537 if (this.m_port == URL.defaultPort(this.m_schema)) 538 this.m_port = 0; 539 540 // Normalize percent encoding, decode unreserved or uppercase hex 541 this.m_queryString = normalize_percent_encoding(this.m_queryString); 542 this.m_anchor = normalize_percent_encoding(this.m_anchor); 543 544 // Normalize path (first remove dot segments then normalize path segments) 545 this.m_path = InetPath(this.m_path.normalized.bySegment2.map!( 546 n => InetPath.Segment2.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName)) 547 ).array); 548 549 // Add trailing slash to empty path 550 if (this.m_path.empty || isDirectory) 551 this.m_path.endsWithSlash = true; 552 } 553 554 /** Returns the normalized form of the URL. 555 556 See `normalize` for a full description. 557 */ 558 URL normalized() 559 const { 560 URL ret = this; 561 ret.normalize(); 562 return ret; 563 } 564 565 bool startsWith(const URL rhs) 566 const nothrow { 567 if( m_schema != rhs.m_schema ) return false; 568 if( m_host != rhs.m_host ) return false; 569 // FIXME: also consider user, port, querystring, anchor etc 570 static if (is(InetPath.Segment2)) 571 return this.path.bySegment2.startsWith(rhs.path.bySegment2); 572 else return this.path.bySegment.startsWith(rhs.path.bySegment); 573 } 574 575 URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 576 URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 577 void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 578 void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 579 static if (is(InetPath.Segment2)) { 580 URL opBinary(string OP, Path)(Path.Segment2 rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); } 581 void opOpAssign(string OP, Path)(Path.Segment2 rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; } 582 } 583 584 /// Tests two URLs for equality using '=='. 585 bool opEquals(ref const URL rhs) 586 const nothrow { 587 if (m_schema != rhs.m_schema) return false; 588 if (m_host != rhs.m_host) return false; 589 if (m_path != rhs.m_path) return false; 590 if (m_port != rhs.m_port) return false; 591 return true; 592 } 593 /// ditto 594 bool opEquals(const URL other) const nothrow { return opEquals(other); } 595 596 int opCmp(ref const URL rhs) const nothrow { 597 if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema); 598 if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host); 599 if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString); 600 return true; 601 } 602 } 603 604 bool isValidSchema(string schema) 605 @safe pure nothrow { 606 if (schema.length < 1) return false; 607 608 foreach (char ch; schema) { 609 switch (ch) { 610 default: return false; 611 case 'a': .. case 'z': break; 612 case 'A': .. case 'Z': break; 613 case '0': .. case '9': break; 614 case '+', '.', '-': break; 615 } 616 } 617 618 return true; 619 } 620 621 unittest { 622 assert(isValidSchema("http+ssh")); 623 assert(isValidSchema("http")); 624 assert(!isValidSchema("http/ssh")); 625 assert(isValidSchema("HTtp")); 626 } 627 628 629 bool isValidHostName(string name) 630 @safe pure nothrow { 631 import std.algorithm.iteration : splitter; 632 import std.string : representation; 633 634 // According to RFC 1034 635 if (name.length < 1) return false; 636 if (name.length > 255) return false; 637 foreach (seg; name.representation.splitter('.')) { 638 if (seg.length < 1) return false; 639 if (seg.length > 63) return false; 640 if (seg[0] == '-') return false; 641 642 foreach (char ch; seg) { 643 switch (ch) { 644 default: return false; 645 case 'a': .. case 'z': break; 646 case 'A': .. case 'Z': break; 647 case '0': .. case '9': break; 648 case '-': break; 649 } 650 } 651 } 652 return true; 653 } 654 655 unittest { 656 assert(isValidHostName("foo")); 657 assert(isValidHostName("foo-")); 658 assert(isValidHostName("foo.bar")); 659 assert(isValidHostName("foo.bar-baz")); 660 assert(isValidHostName("foo1")); 661 assert(!isValidHostName("-foo")); 662 } 663 664 665 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath); 666 667 private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas; 668 669 shared static this() { 670 auto initial_schemas = new SchemaDefaultPortMap; 671 initial_schemas.add("file", 0); 672 initial_schemas.add("tcp", 0); 673 initial_schemas.add("ftp", 21); 674 initial_schemas.add("sftp", 22); 675 initial_schemas.add("http", 80); 676 initial_schemas.add("https", 443); 677 initial_schemas.add("http+unix", 80); 678 initial_schemas.add("https+unix", 443); 679 initial_schemas.add("spdy", 443); 680 initial_schemas.add("ws", 80); 681 initial_schemas.add("wss", 443); 682 initial_schemas.add("redis", 6379); 683 initial_schemas.add("rtsp", 554); 684 initial_schemas.add("rtsps", 322); 685 686 map_commonInternetSchemas = cast(immutable)initial_schemas; 687 } 688 689 deprecated("Use the overload that accepts a `ushort port` as second argument") 690 void registerCommonInternetSchema(string schema) 691 { 692 registerCommonInternetSchema(schema, 0); 693 } 694 695 /** Adds the name of a schema to be treated as double-slash style. 696 697 Params: 698 schema = Name of the schema 699 port = Default port for the schema 700 701 See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1 702 */ 703 void registerCommonInternetSchema(string schema, ushort port) 704 @trusted nothrow { 705 import core.atomic : atomicLoad, cas; 706 import std.uni : toLower; 707 708 string lowerschema; 709 try { 710 lowerschema = schema.toLower(); 711 } catch (Exception e) { 712 assert(false, e.msg); 713 } 714 715 assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported"); 716 717 while (true) { 718 auto olds = atomicLoad(map_commonInternetSchemas); 719 auto news = olds ? olds.dup : new SchemaDefaultPortMap; 720 news.add(lowerschema, port); 721 static if (__VERSION__ < 2094) { 722 // work around bogus shared violation error on earlier versions of Druntime 723 if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news)) 724 break; 725 } else { 726 if (cas(&map_commonInternetSchemas, olds, cast(immutable)news)) 727 break; 728 } 729 } 730 } 731 732 733 /** Determines whether an URL schema is double-slash based. 734 735 Double slash based schemas are of the form `schema://[host]/<path>` 736 and are parsed differently compared to generic schemas, which are simply 737 parsed as `schema:<path>`. 738 739 Built-in recognized double-slash schemas: ftp, http, https, 740 http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp, 741 rtsp, rtsps 742 743 See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1 744 */ 745 bool isCommonInternetSchema(string schema) 746 @safe nothrow @nogc { 747 import core.atomic : atomicLoad; 748 char[128] buffer; 749 750 if (schema.length >= 128) return false; 751 752 foreach (ix, char c; schema) 753 { 754 if (!isASCII(c)) return false; 755 buffer[ix] = toLower(c); 756 } 757 758 scope lowerschema = buffer[0 .. schema.length]; 759 760 return () @trusted { 761 auto set = atomicLoad(map_commonInternetSchemas); 762 return set ? set.contains(cast(string) lowerschema) : false; 763 } (); 764 } 765 766 unittest { 767 assert(isCommonInternetSchema("http")); 768 assert(isCommonInternetSchema("HTtP")); 769 assert(URL.defaultPort("http") == 80); 770 assert(!isCommonInternetSchema("foobar")); 771 registerCommonInternetSchema("fooBar", 2522); 772 assert(isCommonInternetSchema("foobar")); 773 assert(isCommonInternetSchema("fOObAR")); 774 assert(URL.defaultPort("foobar") == 2522); 775 assert(URL.defaultPort("fOObar") == 2522); 776 777 assert(URL.defaultPort("unregistered") == 0); 778 } 779 780 781 private struct SchemaDefaultPortMap { 782 ushort[string] m_data; 783 784 void add(string str, ushort port) @safe nothrow { m_data[str] = port; } 785 bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); } 786 ushort get(string str) const @safe nothrow { return m_data[str]; } 787 SchemaDefaultPortMap* dup() const @safe nothrow { 788 auto ret = new SchemaDefaultPortMap; 789 foreach (s; m_data.byKeyValue) ret.add(s.key, s.value); 790 return ret; 791 } 792 } 793 794 // Puny encoding 795 private { 796 /** Bootstring parameters for Punycode 797 These parameters are designed for Unicode 798 799 See also: RFC 3492 Section 5 800 */ 801 enum uint base = 36; 802 enum uint tmin = 1; 803 enum uint tmax = 26; 804 enum uint skew = 38; 805 enum uint damp = 700; 806 enum uint initial_bias = 72; 807 enum uint initial_n = 128; 808 809 /* Bias adaptation 810 811 See also: RFC 3492 Section 6.1 812 */ 813 uint punyAdapt (uint pdelta, int numpoints, bool firsttime) 814 @safe @nogc nothrow pure { 815 uint delta = firsttime ? pdelta / damp : pdelta / 2; 816 delta += delta / numpoints; 817 uint k = 0; 818 819 while (delta > ((base - tmin) * tmax) / 2) 820 { 821 delta /= (base - tmin); 822 k += base; 823 } 824 825 return k + (((base - tmin + 1) * delta) / (delta + skew)); 826 } 827 828 /* Converts puny digit-codes to code point 829 830 See also: RFC 3492 Section 5 831 */ 832 dchar punyDigitToCP (uint digit) 833 @safe @nogc nothrow pure { 834 return cast(dchar) (digit + 22 + 75 * (digit < 26)); 835 } 836 837 /* Encodes `input` with puny encoding 838 839 If input is all characters below `initial_n` 840 input is returned as is. 841 842 See also: RFC 3492 Section 6.3 843 */ 844 string punyEncode (in string input) 845 @safe { 846 uint n = initial_n; 847 uint delta = 0; 848 uint bias = initial_bias; 849 uint h; 850 uint b; 851 dchar m = dchar.max; // minchar 852 bool delta_overflow; 853 854 uint input_len = 0; 855 auto output = appender!string(); 856 857 output.put("xn--"); 858 859 foreach (dchar cp; input) 860 { 861 if (cp <= initial_n) 862 { 863 output.put(cast(char) cp); 864 h += 1; 865 } 866 // Count length of input as code points, `input.length` counts bytes 867 input_len += 1; 868 } 869 870 b = h; 871 if (b == input_len) 872 return input; // No need to puny encode 873 874 if (b > 0) 875 output.put('-'); 876 877 while (h < input_len) 878 { 879 m = dchar.max; 880 foreach (dchar cp; input) 881 { 882 if (n <= cp && cp < m) 883 m = cp; 884 } 885 886 assert(m != dchar.max, "Punyencoding failed, cannot find code point"); 887 888 delta = addu(delta, ((m - n) * (h + 1)), delta_overflow); 889 assert(!delta_overflow, "Punyencoding failed, delta overflow"); 890 891 n = m; 892 893 foreach (dchar cp; input) 894 { 895 if (cp < n) 896 delta += 1; 897 898 if (cp == n) 899 { 900 uint q = delta; 901 uint k = base; 902 903 while (true) 904 { 905 uint t; 906 if (k <= bias /* + tmin */) 907 t = tmin; 908 else if (k >= bias + tmax) 909 t = tmax; 910 else 911 t = k - bias; 912 913 if (q < t) break; 914 915 output.put(punyDigitToCP(t + ((q - t) % (base - t)))); 916 q = (q - t) / (base - t); 917 k += base; 918 } 919 output.put(punyDigitToCP(q)); 920 bias = punyAdapt(delta, h + 1, h == b); 921 delta = 0; 922 h += 1; 923 } 924 } 925 delta += 1; 926 n += 1; 927 } 928 929 return output.data; 930 } 931 } 932 933 unittest { // IPv6 934 auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc"; 935 auto url = URL.parse(urlstr); 936 assert(url.schema == "http", url.schema); 937 assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host); 938 assert(url.port == 8091); 939 assert(url.path == InetPath("/abc"), url.path.toString()); 940 assert(url.toString == urlstr); 941 942 url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc"; 943 urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc"; 944 assert(url.toString == urlstr); 945 } 946 947 948 unittest { 949 auto urlstr = "https://www.example.net/index.html"; 950 auto url = URL.parse(urlstr); 951 assert(url.schema == "https", url.schema); 952 assert(url.host == "www.example.net", url.host); 953 assert(url.path == InetPath("/index.html"), url.path.toString()); 954 assert(url.port == 443); 955 assert(url.toString == urlstr); 956 957 urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor"; 958 url = URL.parse(urlstr); 959 assert(url.schema == "http", url.schema); 960 assert(url.username == "jo.doe", url.username); 961 assert(url.password == "password", url.password); 962 assert(url.port == 4711, to!string(url.port)); 963 assert(url.host == "sub.www.example.net", url.host); 964 assert(url.path.toString() == "/sub2/index.html", url.path.toString()); 965 assert(url.queryString == "query", url.queryString); 966 assert(url.anchor == "anchor", url.anchor); 967 assert(url.toString == urlstr); 968 } 969 970 unittest { // issue #1044 971 URL url = URL.parse("http://example.com/p?query#anchor"); 972 assert(url.schema == "http"); 973 assert(url.host == "example.com"); 974 assert(url.port == 80); 975 assert(url.queryString == "query"); 976 assert(url.anchor == "anchor"); 977 assert(url.pathString == "/p"); 978 url.localURI = "/q"; 979 assert(url.schema == "http"); 980 assert(url.host == "example.com"); 981 assert(url.queryString == ""); 982 assert(url.anchor == ""); 983 assert(url.pathString == "/q"); 984 url.localURI = "/q?query"; 985 assert(url.schema == "http"); 986 assert(url.host == "example.com"); 987 assert(url.queryString == "query"); 988 assert(url.anchor == ""); 989 assert(url.pathString == "/q"); 990 url.localURI = "/q#anchor"; 991 assert(url.schema == "http"); 992 assert(url.host == "example.com"); 993 assert(url.queryString == ""); 994 assert(url.anchor == "anchor"); 995 assert(url.pathString == "/q"); 996 } 997 998 //websocket unittest 999 unittest { 1000 URL url = URL("ws://127.0.0.1:8080/echo"); 1001 assert(url.host == "127.0.0.1"); 1002 assert(url.port == 8080); 1003 assert(url.localURI == "/echo"); 1004 } 1005 1006 //rtsp unittest 1007 unittest { 1008 URL url = URL("rtsp://127.0.0.1:554/echo"); 1009 assert(url.host == "127.0.0.1"); 1010 assert(url.port == 554); 1011 assert(url.localURI == "/echo"); 1012 } 1013 1014 unittest { 1015 auto p = PosixPath("/foo bar/boo oom/"); 1016 URL url = URL("http", "example.com", 0, p); // constructor test 1017 assert(url.path == cast(InetPath)p); 1018 url.path = p; 1019 assert(url.path == cast(InetPath)p); // path assignement test 1020 assert(url.pathString == "/foo%20bar/boo%20oom/"); 1021 assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/"); 1022 url.pathString = "/foo%20bar/boo%2foom/"; 1023 assert(url.pathString == "/foo%20bar/boo%2foom/"); 1024 assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/"); 1025 } 1026 1027 unittest { 1028 URL url = URL("http://user:password@example.com"); 1029 assert(url.toString() == "http://user:password@example.com"); 1030 1031 url = URL("http://user@example.com"); 1032 assert(url.toString() == "http://user@example.com"); 1033 } 1034 1035 unittest { 1036 auto url = URL("http://example.com/some%2bpath"); 1037 assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString()); 1038 } 1039 1040 unittest { 1041 assert(URL("file:///test").pathString == "/test"); 1042 assert(URL("file:///test").port == 0); 1043 assert(URL("file:///test").path.toString() == "/test"); 1044 assert(URL("file://test").host == "test"); 1045 assert(URL("file://test").pathString() == ""); 1046 assert(URL("file://./test").host == "."); 1047 assert(URL("file://./test").pathString == "/test"); 1048 assert(URL("file://./test").path.toString() == "/test"); 1049 } 1050 1051 unittest { // issue #1318 1052 try { 1053 URL("http://something/inval%id"); 1054 assert(false, "Expected to throw an exception."); 1055 } catch (Exception e) {} 1056 } 1057 1058 unittest { 1059 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix"); 1060 assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix"); 1061 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock"); 1062 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == ""); 1063 assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json"); 1064 auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json"); 1065 assert(URL(url.toString()) == url); 1066 } 1067 1068 unittest { 1069 import vibe.data.serialization; 1070 static assert(isStringSerializable!URL); 1071 } 1072 1073 unittest { // issue #1732 1074 auto url = URL("tcp://0.0.0.0:1234"); 1075 url.port = 4321; 1076 assert(url.toString == "tcp://0.0.0.0:4321", url.toString); 1077 } 1078 1079 unittest { // host name role in file:// URLs 1080 auto url = URL.parse("file:///foo/bar"); 1081 assert(url.host == ""); 1082 assert(url.path == InetPath("/foo/bar")); 1083 assert(url.toString() == "file:///foo/bar"); 1084 1085 url = URL.parse("file://foo/bar/baz"); 1086 assert(url.host == "foo"); 1087 assert(url.path == InetPath("/bar/baz")); 1088 assert(url.toString() == "file://foo/bar/baz"); 1089 } 1090 1091 unittest { // native path <-> URL conversion 1092 import std.exception : assertThrown; 1093 1094 auto url = URL(NativePath("/foo/bar")); 1095 assert(url.schema == "file"); 1096 assert(url.host == ""); 1097 assert(url.path == InetPath("/foo/bar")); 1098 assert(url.toNativePath == NativePath("/foo/bar")); 1099 1100 assertThrown(URL("http://example.org/").toNativePath); 1101 assertThrown(URL(NativePath("foo/bar"))); 1102 } 1103 1104 unittest { // URL Normalization 1105 auto url = URL.parse("http://example.com/foo%2a"); 1106 assert(url.normalized.toString() == "http://example.com/foo%2A"); 1107 1108 url = URL.parse("HTTP://User@Example.COM/Foo"); 1109 assert(url.normalized.toString() == "http://User@example.com/Foo"); 1110 1111 url = URL.parse("http://example.com/%7Efoo"); 1112 assert(url.normalized.toString() == "http://example.com/~foo"); 1113 1114 url = URL.parse("http://example.com/foo/./bar/baz/../qux"); 1115 assert(url.normalized.toString() == "http://example.com/foo/bar/qux"); 1116 1117 url = URL.parse("http://example.com"); 1118 assert(url.normalized.toString() == "http://example.com/"); 1119 1120 url = URL.parse("http://example.com:80/"); 1121 assert(url.normalized.toString() == "http://example.com/"); 1122 1123 url = URL.parse("hTTPs://examPLe.COM:443/my/path"); 1124 assert(url.normalized.toString() == "https://example.com/my/path"); 1125 1126 url = URL.parse("http://example.com/foo"); 1127 url.normalize(true); 1128 assert(url.toString() == "http://example.com/foo/"); 1129 } 1130 1131 version (Windows) unittest { // Windows drive letter paths 1132 auto url = URL(WindowsPath(`C:\foo`)); 1133 assert(url.schema == "file"); 1134 assert(url.host == ""); 1135 assert(url.path == InetPath("/C:/foo")); 1136 auto p = url.toNativePath; 1137 p.normalize(); 1138 assert(p == WindowsPath(`C:\foo`)); 1139 } 1140 1141 version (Windows) unittest { // UNC paths 1142 auto url = URL(WindowsPath(`\\server\share\path`)); 1143 assert(url.schema == "file"); 1144 assert(url.host == "server"); 1145 assert(url.path == InetPath("/share/path")); 1146 1147 auto p = url.toNativePath; 1148 p.normalize(); // convert slash to backslash if necessary 1149 assert(p == WindowsPath(`\\server\share\path`)); 1150 }