1 /**
2 	URL parsing routines.
3 
4 	Copyright: © 2012-2017 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.inet.url;
9 
10 public import vibe.core.path;
11 
12 import vibe.textfilter.urlencode;
13 import vibe.utils.string;
14 
15 import std.array;
16 import std.algorithm;
17 import std.conv;
18 import std.exception;
19 import std.string;
20 import std.traits : isInstanceOf;
21 import std.ascii : isAlpha, isASCII, toLower;
22 import std.uri: decode, encode;
23 
24 import core.checkedint : addu;
25 
26 
27 /** Parses a user-provided URL with relaxed rules.
28 
29 	Unlike `URL.parse`, this allows the URL to use special characters as part of
30 	the host name and path, automatically employing puny code or percent-encoding
31 	to convert this to a valid URL.
32 
33 	Params:
34 		url = String representation of the URL
35 		default_schema = If `url` does not contain a schema name, the URL parser
36 			may choose to use this schema instead. A browser might use "http" or
37 			"https", for example.
38 */
39 URL parseUserURL(string url, string default_schema)
40 {
41 	return URL(url, false).normalized;
42 }
43 
44 unittest {
45 	// special characters in path
46 	auto url = parseUserURL("http://example.com/hello-🌍", "foo");
47 	assert(url.pathString == "/hello-%F0%9F%8C%8D");
48 	url = parseUserURL("http://example.com/안녕하세요-세계", "foo");
49 	assert(url.pathString == "/%EC%95%88%EB%85%95%ED%95%98%EC%84%B8%EC%9A%94-%EC%84%B8%EA%B3%84");
50 	// special characters in host name
51 	url = parseUserURL("http://hello-🌍.com/", "foo");
52 	assert(url.host == "xn--hello--8k34e.com");
53 	url = parseUserURL("http://hello-🌍.com:8080/", "foo");
54 	assert(url.host == "xn--hello--8k34e.com");
55 	url = parseUserURL("http://i-❤-이모티콘.io", "foo");
56 	assert(url.host == "xn--i---5r6aq903fubqabumj4g.io");
57 	url = parseUserURL("https://hello🌍.i-❤-이모티콘.com", "foo");
58 	assert(url.host == "xn--hello-oe93d.xn--i---5r6aq903fubqabumj4g.com");
59 }
60 
61 
62 /**
63 	Represents a URL decomposed into its components.
64 */
65 struct URL {
66 @safe:
67 	private {
68 		string m_schema;
69 		InetPath m_path;
70 		string m_host;
71 		ushort m_port;
72 		string m_username;
73 		string m_password;
74 		string m_queryString;
75 		string m_anchor;
76 	}
77 
78 	/// Constructs a new URL object from its components.
79 	this(string schema, string host, ushort port, InetPath path) pure nothrow
80 	in {
81 		assert(isValidSchema(schema), "Invalid URL schema name: " ~ schema);
82 		assert(host.length == 0 || isValidHostName(host), "Invalid URL host name: " ~ host);
83 	}
84 	do {
85 		m_schema = schema;
86 		m_host = host;
87 		m_port = port;
88 		m_path = path;
89 	}
90 	/// ditto
91 	this(string schema, InetPath path) pure nothrow
92 	in { assert(isValidSchema(schema)); }
93 	do {
94 		this(schema, null, 0, path);
95 	}
96 	/// ditto
97 	this(string schema, string host, ushort port, PosixPath path) pure nothrow
98 	in {
99 		assert(isValidSchema(schema));
100 		assert(host.length == 0 || isValidHostName(host));
101 	}
102 	do {
103 		InetPath ip;
104 		try ip = cast(InetPath)path;
105 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
106 		this(schema, host, port, ip);
107 	}
108 	/// ditto
109 	this(string schema, PosixPath path) pure nothrow
110 	in { assert(isValidSchema(schema)); }
111 	do {
112 		this(schema, null, 0, path);
113 	}
114 	/// ditto
115 	this(string schema, string host, ushort port, WindowsPath path) pure nothrow
116 	in {
117 		assert(isValidSchema(schema));
118 		assert(host.length == 0 || isValidHostName(host));
119 	}
120 	do {
121 		InetPath ip;
122 		try ip = cast(InetPath)path;
123 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
124 		this(schema, host, port, ip);
125 	}
126 	/// ditto
127 	this(string schema, WindowsPath path) pure nothrow
128 	in { assert(isValidSchema(schema)); }
129 	do {
130 		this(schema, null, 0, path);
131 	}
132 
133 	/** Constructs a "file:" URL from a native file system path.
134 
135 		Note that the path must be absolute. On Windows, both, paths starting
136 		with a drive letter and UNC paths are supported.
137 	*/
138 	this(WindowsPath path) pure
139 	{
140 		import std.algorithm.iteration : map;
141 		import std.range : chain, only, repeat;
142 
143 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
144 
145 		// treat UNC paths properly
146 		if (path.startsWith(WindowsPath(`\\`))) {
147 			static if (is(InetPath.Segment2)) {
148 				auto segs = path.bySegment2;
149 			} else {
150 				auto segs = path.bySegment;
151 			}
152 			segs.popFront();
153 			segs.popFront();
154 			auto host = segs.front.name;
155 			segs.popFront();
156 
157 			InetPath ip;
158 			static if (is(InetPath.Segment2)) {
159 				ip = InetPath(only(InetPath.Segment2.fromTrustedString("", '/'))
160 					.chain(segs.map!(s => cast(InetPath.Segment2)s)));
161 			} else {
162 				ip = InetPath(only(InetPath.Segment("", '/'))
163 					.chain(segs.map!(s => cast(InetPath.Segment)s)));
164 			}
165 
166 			this("file", host, 0, ip);
167 		} else this("file", host, 0, cast(InetPath)path);
168 	}
169 	/// ditto
170 	this(PosixPath path) pure
171 	{
172 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
173 
174 		this("file", null, 0, cast(InetPath)path);
175 	}
176 
177 	/** Constructs a URL from its string representation.
178 
179 		TODO: additional validation required (e.g. valid host and user names and port)
180 	*/
181 	this(string url_string)
182 	{
183 		this(url_string, true);
184 	}
185 
186 	private this(string url_string, bool encoded)
187 	{
188 		auto str = url_string;
189 		enforce(str.length > 0, "Empty URL.");
190 		if( str[0] != '/' ){
191 			auto idx = str.indexOf(':');
192 			enforce(idx > 0, "No schema in URL:"~str);
193 			m_schema = str[0 .. idx];
194 			enforce(m_schema[0].isAlpha,
195 					"Schema must start with an alphabetical char, found: " ~
196 					m_schema[0]);
197 			str = str[idx+1 .. $];
198 			bool requires_host = false;
199 
200 			if (str.startsWith("//")) {
201 				// proto://server/path style
202 				requires_host = true;
203 				str = str[2 .. $];
204 			}
205 
206 			auto si = str.indexOf('/');
207 			if( si < 0 ) si = str.length;
208 			auto ai = str[0 .. si].indexOf('@');
209 			sizediff_t hs = 0;
210 			if( ai >= 0 ){
211 				hs = ai+1;
212 				auto ci = str[0 .. ai].indexOf(':');
213 				if( ci >= 0 ){
214 					m_username = str[0 .. ci];
215 					m_password = str[ci+1 .. ai];
216 				} else m_username = str[0 .. ai];
217 				enforce(m_username.length > 0, "Empty user name in URL.");
218 			}
219 
220 			m_host = str[hs .. si];
221 
222 			auto findPort ( string src )
223 			{
224 				auto pi = src.indexOf(':');
225 				if(pi > 0) {
226 					enforce(pi < src.length-1, "Empty port in URL.");
227 					m_port = to!ushort(src[pi+1..$]);
228 				}
229 				return pi;
230 			}
231 
232 
233 			auto ip6 = m_host.indexOf('[');
234 			if (ip6 == 0) { // [ must be first char
235 				auto pe = m_host.indexOf(']');
236 				if (pe > 0) {
237 					findPort(m_host[pe..$]);
238 					m_host = m_host[1 .. pe];
239 				}
240 			}
241 			else {
242 				auto pi = findPort(m_host);
243 				if(pi > 0) {
244 					m_host = m_host[0 .. pi];
245 				}
246 				if (!encoded)
247 					m_host = m_host.splitter('.').map!(punyEncode).join('.');
248 			}
249 
250 			enforce(!requires_host || m_schema == "file" || m_host.length > 0,
251 					"Empty server name in URL.");
252 			str = str[si .. $];
253 		}
254 
255 		this.localURI = (encoded) ? str : str.encode;
256 	}
257 	/// ditto
258 	static URL parse(string url_string)
259 	{
260 		return URL(url_string);
261 	}
262 	/// ditto
263 	static URL fromString(string url_string)
264 	{
265 		return URL(url_string);
266 	}
267 
268 	/// The schema/protocol part of the URL
269 	@property string schema() const nothrow { return m_schema; }
270 	/// ditto
271 	@property void schema(string v) { m_schema = v; }
272 
273 	/// The url encoded path part of the URL
274 	@property string pathString() const nothrow { return m_path.toString; }
275 
276 	/// Set the path part of the URL. It should be properly encoded.
277 	@property void pathString(string s)
278 	{
279 		enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'");
280 		m_path = InetPath(s);
281 	}
282 
283 	/// The path part of the URL
284 	@property InetPath path() const nothrow { return m_path; }
285 	/// ditto
286 	@property void path(InetPath p)
287 	nothrow {
288 		m_path = p;
289 	}
290 	/// ditto
291 	@property void path(Path)(Path p)
292 		if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath))
293 	{
294 		m_path = cast(InetPath)p;
295 	}
296 
297 	/// The host part of the URL (depends on the schema)
298 	@property string host() const pure nothrow { return m_host; }
299 	/// ditto
300 	@property void host(string v) { m_host = v; }
301 
302 	/// The port part of the URL (optional)
303 	@property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); }
304 	/// ditto
305 	@property port(ushort v) nothrow { m_port = v; }
306 
307 	/// Get the default port for the given schema or 0
308 	static ushort defaultPort(string schema)
309 	nothrow {
310 		import core.atomic : atomicLoad;
311 		import std.uni : toLower;
312 
313 		string lowerschema;
314 
315 		try
316 			lowerschema = schema.toLower();
317 		catch (Exception e)
318 			assert(false, e.msg);
319 		
320 		if (auto set = atomicLoad(map_commonInternetSchemas))
321 			if (set.contains(lowerschema))
322 				return set.get(lowerschema);
323 
324 		return 0;
325 	}
326 	/// ditto
327 	ushort defaultPort()
328 	const nothrow {
329 		return defaultPort(m_schema);
330 	}
331 
332 	/// The user name part of the URL (optional)
333 	@property string username() const nothrow { return m_username; }
334 	/// ditto
335 	@property void username(string v) { m_username = v; }
336 
337 	/// The password part of the URL (optional)
338 	@property string password() const nothrow { return m_password; }
339 	/// ditto
340 	@property void password(string v) { m_password = v; }
341 
342 	/// The query string part of the URL (optional)
343 	@property string queryString() const nothrow { return m_queryString; }
344 	/// ditto
345 	@property void queryString(string v) { m_queryString = v; }
346 
347 	/// The anchor part of the URL (optional)
348 	@property string anchor() const nothrow { return m_anchor; }
349 
350 	/// The path part plus query string and anchor
351 	@property string localURI()
352 	const nothrow {
353 		auto str = appender!string();
354 		str.put(m_path.toString);
355 		if( queryString.length ) {
356 			str.put("?");
357 			str.put(queryString);
358 		}
359 		if( anchor.length ) {
360 			str.put("#");
361 			str.put(anchor);
362 		}
363 		return str.data;
364 	}
365 	/// ditto
366 	@property void localURI(string str)
367 	{
368 		auto ai = str.indexOf('#');
369 		if( ai >= 0 ){
370 			m_anchor = str[ai+1 .. $];
371 			str = str[0 .. ai];
372 		} else m_anchor = null;
373 
374 		auto qi = str.indexOf('?');
375 		if( qi >= 0 ){
376 			m_queryString = str[qi+1 .. $];
377 			str = str[0 .. qi];
378 		} else m_queryString = null;
379 
380 		this.pathString = str;
381 	}
382 
383 	/// The URL to the parent path with query string and anchor stripped.
384 	@property URL parentURL()
385 	const {
386 		URL ret;
387 		ret.schema = schema;
388 		ret.host = host;
389 		ret.port = port;
390 		ret.username = username;
391 		ret.password = password;
392 		ret.path = path.parentPath;
393 		return ret;
394 	}
395 
396 	/// Converts this URL object to its string representation.
397 	string toString()
398 	const nothrow {
399 		auto dst = appender!string();
400 		try this.toString(dst);
401 		catch (Exception e) assert(false, e.msg);
402 		return dst.data;
403 	}
404 
405 	/// Ditto
406 	void toString(OutputRange) (ref OutputRange dst) const {
407 		import std.format;
408 		dst.put(schema);
409 		dst.put(":");
410 		if (isCommonInternetSchema(schema))
411 			dst.put("//");
412 		if (m_username.length || m_password.length) {
413 			dst.put(username);
414 			if (m_password.length)
415 			{
416 				dst.put(':');
417 				dst.put(password);
418 			}
419 			dst.put('@');
420 		}
421 
422 		import std.algorithm : canFind;
423 		auto ipv6 = host.canFind(":");
424 
425 		if ( ipv6 ) dst.put('[');
426 		dst.put(host);
427 		if ( ipv6 ) dst.put(']');
428 
429 		if (m_port > 0)
430 			formattedWrite(dst, ":%d", m_port);
431 
432 		dst.put(localURI);
433 	}
434 
435 	/** Converts a "file" URL back to a native file system path.
436 	*/
437 	NativePath toNativePath()
438 	const {
439 		import std.algorithm.iteration : map;
440 		import std.range : dropOne;
441 
442 		enforce(this.schema == "file", "Only file:// URLs can be converted to a native path.");
443 
444 		version (Windows) {
445 			if (this.host.length) {
446 				static if (is(NativePath.Segment2)) {
447 					auto p = NativePath(this.path
448 							.bySegment2
449 							.dropOne
450 							.map!(s => cast(WindowsPath.Segment2)s)
451 						);
452 				} else {
453 					auto p = NativePath(this.path
454 							.bySegment
455 							.dropOne
456 							.map!(s => cast(WindowsPath.Segment)s)
457 						);
458 				}
459 				return NativePath.fromTrustedString(`\\`~this.host) ~ p;
460 			}
461 		}
462 
463 		return cast(NativePath)this.path;
464 	}
465 
466 	/// Decode percent encoded triplets for unreserved or convert to uppercase
467 	private string normalize_percent_encoding(scope const(char)[] input)
468 	{
469 		auto normalized = appender!string;
470 		normalized.reserve(input.length);
471 
472 		for (size_t i = 0; i < input.length; i++)
473 		{
474 			const char c = input[i];
475 			if (c == '%')
476 			{
477 				if (input.length < i + 3)
478 					assert(false, "Invalid percent encoding");
479 				
480 				char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16);
481 				switch (conv)
482 				{
483 					case 'A': .. case 'Z':
484 					case 'a': .. case 'z':
485 					case '0': .. case '9':
486 					case '-': case '.': case '_': case '~':
487 						normalized ~= conv; // Decode unreserved
488 						break;
489 					default:
490 						normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX
491 						break;
492 				}
493 
494 				i += 2;
495 			}
496 			else
497 				normalized ~= c;
498 		}
499 
500 		return normalized.data;
501 	}
502 
503 	/**
504 	  * Normalize the content of this `URL` in place
505 	  *
506 	  * Normalization can be used to create a more consistent and human-friendly
507 	  * string representation of the `URL`.
508 	  * The list of transformations applied in the process of normalization is as follows:
509 			- Converting schema and host to lowercase
510 			- Removing port if it is the default port for schema
511 			- Removing dot segments in path
512 			- Converting percent-encoded triplets to uppercase
513 			- Adding slash when path is empty
514 			- Adding slash to path when path represents a directory
515 			- Decoding percent encoded triplets for unreserved characters
516 				A-Z a-z 0-9 - . _ ~ 
517 
518 		Params:
519 			isDirectory = Path of the URL represents a directory, if one is 
520 			not already present, a trailing slash will be appended when `true`
521 	*/
522 	void normalize(bool isDirectory = false)
523 	{
524 		import std.uni : toLower;
525 		
526 		// Lowercase host and schema
527 		this.m_schema = this.m_schema.toLower();
528 		this.m_host = this.m_host.toLower();
529 
530 		// Remove default port
531 		if (this.m_port == URL.defaultPort(this.m_schema))
532 			this.m_port = 0;
533 
534 		// Normalize percent encoding, decode unreserved or uppercase hex
535 		this.m_queryString = normalize_percent_encoding(this.m_queryString);
536 		this.m_anchor = normalize_percent_encoding(this.m_anchor);
537 
538 		// Normalize path (first remove dot segments then normalize path segments)
539 		this.m_path = InetPath(this.m_path.normalized.bySegment2.map!(
540 				n => InetPath.Segment2.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName))
541 			).array);
542 
543 		// Add trailing slash to empty path
544 		if (this.m_path.empty || isDirectory)
545 			this.m_path.endsWithSlash = true;		
546 	}
547 
548 	/** Returns the normalized form of the URL.
549 
550 		See `normalize` for a full description.
551 	*/
552 	URL normalized()
553 	const {
554 		URL ret = this;
555 		ret.normalize();
556 		return ret;
557 	}
558 
559 	bool startsWith(const URL rhs)
560 	const nothrow {
561 		if( m_schema != rhs.m_schema ) return false;
562 		if( m_host != rhs.m_host ) return false;
563 		// FIXME: also consider user, port, querystring, anchor etc
564 		static if (is(InetPath.Segment2))
565 			return this.path.bySegment2.startsWith(rhs.path.bySegment2);
566 		else return this.path.bySegment.startsWith(rhs.path.bySegment);
567 	}
568 
569 	URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
570 	URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
571 	void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
572 	void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
573 	static if (is(InetPath.Segment2)) {
574 		URL opBinary(string OP, Path)(Path.Segment2 rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
575 		void opOpAssign(string OP, Path)(Path.Segment2 rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
576 	}
577 
578 	/// Tests two URLs for equality using '=='.
579 	bool opEquals(ref const URL rhs)
580 	const nothrow {
581 		if (m_schema != rhs.m_schema) return false;
582 		if (m_host != rhs.m_host) return false;
583 		if (m_path != rhs.m_path) return false;
584 		if (m_port != rhs.m_port) return false;
585 		return true;
586 	}
587 	/// ditto
588 	bool opEquals(const URL other) const nothrow { return opEquals(other); }
589 
590 	int opCmp(ref const URL rhs) const nothrow {
591 		if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema);
592 		if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host);
593 		if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString);
594 		return true;
595 	}
596 }
597 
598 bool isValidSchema(string schema)
599 @safe pure nothrow {
600 	if (schema.length < 1) return false;
601 
602 	foreach (char ch; schema) {
603 		switch (ch) {
604 			default: return false;
605 			case 'a': .. case 'z': break;
606 			case 'A': .. case 'Z': break;
607 			case '0': .. case '9': break;
608 			case '+', '.', '-': break;
609 		}
610 	}
611 
612 	return true;
613 }
614 
615 unittest {
616 	assert(isValidSchema("http+ssh"));
617 	assert(isValidSchema("http"));
618 	assert(!isValidSchema("http/ssh"));
619 	assert(isValidSchema("HTtp"));
620 }
621 
622 
623 bool isValidHostName(string name)
624 @safe pure nothrow {
625 	import std.algorithm.iteration : splitter;
626 	import std.string : representation;
627 
628 	// According to RFC 1034
629 	if (name.length < 1) return false;
630 	if (name.length > 255) return false;
631 	foreach (seg; name.representation.splitter('.')) {
632 		if (seg.length < 1) return false;
633 		if (seg.length > 63) return false;
634 		if (seg[0] == '-') return false;
635 
636 		foreach (char ch; seg) {
637 			switch (ch) {
638 				default: return false;
639 				case 'a': .. case 'z': break;
640 				case 'A': .. case 'Z': break;
641 				case '0': .. case '9': break;
642 				case '-': break;
643 			}
644 		}
645 	}
646 	return true;
647 }
648 
649 unittest {
650 	assert(isValidHostName("foo"));
651 	assert(isValidHostName("foo-"));
652 	assert(isValidHostName("foo.bar"));
653 	assert(isValidHostName("foo.bar-baz"));
654 	assert(isValidHostName("foo1"));
655 	assert(!isValidHostName("-foo"));
656 }
657 
658 
659 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath);
660 
661 private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas;
662 
663 shared static this() {
664 	auto initial_schemas = new SchemaDefaultPortMap;
665 	initial_schemas.add("file", 0);
666 	initial_schemas.add("tcp", 0);
667 	initial_schemas.add("ftp", 21);
668 	initial_schemas.add("sftp", 22);
669 	initial_schemas.add("http", 80);
670 	initial_schemas.add("https", 443);
671 	initial_schemas.add("http+unix", 80);
672 	initial_schemas.add("https+unix", 443);
673 	initial_schemas.add("spdy", 443);
674 	initial_schemas.add("ws", 80);
675 	initial_schemas.add("wss", 443);
676 	initial_schemas.add("redis", 6379);
677 	initial_schemas.add("rtsp", 554);
678 	initial_schemas.add("rtsps", 322);
679 
680 	map_commonInternetSchemas = cast(immutable)initial_schemas;
681 }
682 
683 deprecated("Use the overload that accepts a `ushort port` as second argument")
684 void registerCommonInternetSchema(string schema)
685 {
686     registerCommonInternetSchema(schema, 0);
687 }
688 
689 /** Adds the name of a schema to be treated as double-slash style.
690 
691 	Params:
692 		schema = Name of the schema
693 		port = Default port for the schema
694 
695 	See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1
696 */
697 void registerCommonInternetSchema(string schema, ushort port)
698 @trusted nothrow {
699 	import core.atomic : atomicLoad, cas;
700 	import std.uni : toLower;
701 
702 	string lowerschema;
703 	try {
704 		lowerschema = schema.toLower();
705 	} catch (Exception e) {
706 		assert(false, e.msg);
707 	}
708 
709 	assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported");
710 
711 	while (true) {
712 		auto olds = atomicLoad(map_commonInternetSchemas);
713 		auto news = olds ? olds.dup : new SchemaDefaultPortMap;
714 		news.add(lowerschema, port);
715 		static if (__VERSION__ < 2094) {
716 			// work around bogus shared violation error on earlier versions of Druntime
717 			if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news))
718 				break;
719 		} else {
720 			if (cas(&map_commonInternetSchemas, olds, cast(immutable)news))
721 				break;
722 		}
723 	}
724 }
725 
726 
727 /** Determines whether an URL schema is double-slash based.
728 
729 	Double slash based schemas are of the form `schema://[host]/<path>`
730 	and are parsed differently compared to generic schemas, which are simply
731 	parsed as `schema:<path>`.
732 
733 	Built-in recognized double-slash schemas: ftp, http, https,
734 	http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp,
735 	rtsp, rtsps
736 
737 	See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1
738 */
739 bool isCommonInternetSchema(string schema)
740 @safe nothrow @nogc {
741 	import core.atomic : atomicLoad;
742 	char[128] buffer;
743 
744 	if (schema.length >= 128) return false;
745 
746 	foreach (ix, char c; schema)
747 	{
748 		if (!isASCII(c)) return false;
749 		buffer[ix] = toLower(c);
750 	}
751 
752 	scope lowerschema = buffer[0 .. schema.length];
753 
754 	return () @trusted {
755 		auto set = atomicLoad(map_commonInternetSchemas);
756 		return set ? set.contains(cast(string) lowerschema) : false;
757 	} ();
758 }
759 
760 unittest {
761 	assert(isCommonInternetSchema("http"));
762 	assert(isCommonInternetSchema("HTtP"));
763 	assert(URL.defaultPort("http") == 80);
764 	assert(!isCommonInternetSchema("foobar"));
765 	registerCommonInternetSchema("fooBar", 2522);
766 	assert(isCommonInternetSchema("foobar"));
767 	assert(isCommonInternetSchema("fOObAR"));
768 	assert(URL.defaultPort("foobar") == 2522);
769 	assert(URL.defaultPort("fOObar") == 2522);
770 
771 	assert(URL.defaultPort("unregistered") == 0);
772 }
773 
774 
775 private struct SchemaDefaultPortMap {
776 	ushort[string] m_data;
777 
778 	void add(string str, ushort port) @safe nothrow { m_data[str] = port; }
779 	bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); }
780 	ushort get(string str) const @safe nothrow { return m_data[str]; }
781 	SchemaDefaultPortMap* dup() const @safe nothrow {
782 		auto ret = new SchemaDefaultPortMap;
783 		foreach (s; m_data.byKeyValue) ret.add(s.key, s.value);
784 		return ret;
785 	}
786 }
787 
788 // Puny encoding
789 private {
790 	/** Bootstring parameters for Punycode
791 		These parameters are designed for Unicode
792 
793 		See also: RFC 3492 Section 5
794 	*/
795 	enum uint base = 36;
796 	enum uint tmin = 1;
797 	enum uint tmax = 26;
798 	enum uint skew = 38;
799 	enum uint damp = 700;
800 	enum uint initial_bias = 72;
801 	enum uint initial_n = 128;
802 
803 	/*	Bias adaptation
804 
805 		See also: RFC 3492 Section 6.1
806 	*/
807 	uint punyAdapt (uint pdelta, int numpoints, bool firsttime)
808 	@safe @nogc nothrow pure {
809 		uint delta = firsttime ? pdelta / damp : pdelta / 2;
810 		delta += delta / numpoints;
811 		uint k = 0;
812 
813 		while (delta > ((base - tmin) * tmax) / 2)
814 		{
815 			delta /= (base - tmin);
816 			k += base;
817 		}
818 
819 		return k + (((base - tmin + 1) * delta) / (delta + skew));
820 	}
821 
822 	/*	Converts puny digit-codes to code point
823 
824 		See also: RFC 3492 Section 5
825 	*/
826 	dchar punyDigitToCP (uint digit)
827 	@safe @nogc nothrow pure {
828 		return cast(dchar) (digit + 22 + 75 * (digit < 26));
829 	}
830 
831 	/*	Encodes `input` with puny encoding
832 		
833 		If input is all characters below `initial_n`
834 		input is returned as is.
835 
836 		See also: RFC 3492 Section 6.3
837 	*/
838 	string punyEncode (in string input)
839 	@safe {
840 		uint n = initial_n;
841 		uint delta = 0;
842 		uint bias = initial_bias;
843 		uint h;
844 		uint b;
845 		dchar m = dchar.max; // minchar
846 		bool delta_overflow;
847 		
848 		uint input_len = 0;
849 		auto output = appender!string();
850 		
851 		output.put("xn--");
852 
853 		foreach (dchar cp; input)
854 		{
855 			if (cp <= initial_n)
856 			{
857 				output.put(cast(char) cp);
858 				h += 1;
859 			}
860 			// Count length of input as code points, `input.length` counts bytes
861 			input_len += 1;
862 		}
863 
864 		b = h;
865 		if (b == input_len)
866 			return input; // No need to puny encode
867 
868 		if (b > 0)
869 			output.put('-');
870 
871 		while (h < input_len)
872 		{
873 			m = dchar.max;
874 			foreach (dchar cp; input)
875 			{
876 				if (n <= cp && cp < m)
877 					m = cp;
878 			}
879 
880 			assert(m != dchar.max, "Punyencoding failed, cannot find code point");
881 
882 			delta = addu(delta, ((m - n) * (h + 1)), delta_overflow);
883 			assert(!delta_overflow, "Punyencoding failed, delta overflow");
884 
885 			n = m;
886 
887 			foreach (dchar cp; input)
888 			{
889 				if (cp < n)
890 					delta += 1;
891 
892 				if (cp == n)
893 				{
894 					uint q = delta;
895 					uint k = base;
896 
897 					while (true)
898 					{
899 						uint t;
900 						if (k <= bias /* + tmin */)
901 							t = tmin;
902 						else if (k >=  bias + tmax)
903 							t = tmax;
904 						else
905 							t = k - bias;
906 
907 						if (q < t) break;
908 
909 						output.put(punyDigitToCP(t + ((q - t) % (base - t))));
910 						q = (q - t) / (base - t);
911 						k += base;
912 					}
913 					output.put(punyDigitToCP(q));
914 					bias = punyAdapt(delta, h + 1, h == b);
915 					delta = 0;
916 					h += 1;
917 				}
918 			}
919 			delta += 1;
920 			n += 1;
921 		}
922 
923 		return output.data;
924 	}
925 }
926 
927 unittest { // IPv6
928 	auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc";
929 	auto url = URL.parse(urlstr);
930 	assert(url.schema == "http", url.schema);
931 	assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host);
932 	assert(url.port == 8091);
933 	assert(url.path == InetPath("/abc"), url.path.toString());
934 	assert(url.toString == urlstr);
935 
936 	url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc";
937 	urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc";
938 	assert(url.toString == urlstr);
939 }
940 
941 
942 unittest {
943 	auto urlstr = "https://www.example.net/index.html";
944 	auto url = URL.parse(urlstr);
945 	assert(url.schema == "https", url.schema);
946 	assert(url.host == "www.example.net", url.host);
947 	assert(url.path == InetPath("/index.html"), url.path.toString());
948 	assert(url.port == 443);
949 	assert(url.toString == urlstr);
950 
951 	urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor";
952 	url = URL.parse(urlstr);
953 	assert(url.schema == "http", url.schema);
954 	assert(url.username == "jo.doe", url.username);
955 	assert(url.password == "password", url.password);
956 	assert(url.port == 4711, to!string(url.port));
957 	assert(url.host == "sub.www.example.net", url.host);
958 	assert(url.path.toString() == "/sub2/index.html", url.path.toString());
959 	assert(url.queryString == "query", url.queryString);
960 	assert(url.anchor == "anchor", url.anchor);
961 	assert(url.toString == urlstr);
962 }
963 
964 unittest { // issue #1044
965 	URL url = URL.parse("http://example.com/p?query#anchor");
966 	assert(url.schema == "http");
967 	assert(url.host == "example.com");
968 	assert(url.port == 80);
969 	assert(url.queryString == "query");
970 	assert(url.anchor == "anchor");
971 	assert(url.pathString == "/p");
972 	url.localURI = "/q";
973 	assert(url.schema == "http");
974 	assert(url.host == "example.com");
975 	assert(url.queryString == "");
976 	assert(url.anchor == "");
977 	assert(url.pathString == "/q");
978 	url.localURI = "/q?query";
979 	assert(url.schema == "http");
980 	assert(url.host == "example.com");
981 	assert(url.queryString == "query");
982 	assert(url.anchor == "");
983 	assert(url.pathString == "/q");
984 	url.localURI = "/q#anchor";
985 	assert(url.schema == "http");
986 	assert(url.host == "example.com");
987 	assert(url.queryString == "");
988 	assert(url.anchor == "anchor");
989 	assert(url.pathString == "/q");
990 }
991 
992 //websocket unittest
993 unittest {
994 	URL url = URL("ws://127.0.0.1:8080/echo");
995 	assert(url.host == "127.0.0.1");
996 	assert(url.port == 8080);
997 	assert(url.localURI == "/echo");
998 }
999 
1000 //rtsp unittest
1001 unittest {
1002 	URL url = URL("rtsp://127.0.0.1:554/echo");
1003 	assert(url.host == "127.0.0.1");
1004 	assert(url.port == 554);
1005 	assert(url.localURI == "/echo");
1006 }
1007 
1008 unittest {
1009 	auto p = PosixPath("/foo bar/boo oom/");
1010 	URL url = URL("http", "example.com", 0, p); // constructor test
1011 	assert(url.path == cast(InetPath)p);
1012 	url.path = p;
1013 	assert(url.path == cast(InetPath)p);					   // path assignement test
1014 	assert(url.pathString == "/foo%20bar/boo%20oom/");
1015 	assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/");
1016 	url.pathString = "/foo%20bar/boo%2foom/";
1017 	assert(url.pathString == "/foo%20bar/boo%2foom/");
1018 	assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/");
1019 }
1020 
1021 unittest {
1022 	URL url = URL("http://user:password@example.com");
1023 	assert(url.toString() == "http://user:password@example.com");
1024 
1025 	url = URL("http://user@example.com");
1026 	assert(url.toString() == "http://user@example.com");
1027 }
1028 
1029 unittest {
1030 	auto url = URL("http://example.com/some%2bpath");
1031 	assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString());
1032 }
1033 
1034 unittest {
1035 	assert(URL("file:///test").pathString == "/test");
1036 	assert(URL("file:///test").port == 0);
1037 	assert(URL("file:///test").path.toString() == "/test");
1038 	assert(URL("file://test").host == "test");
1039 	assert(URL("file://test").pathString() == "");
1040 	assert(URL("file://./test").host == ".");
1041 	assert(URL("file://./test").pathString == "/test");
1042 	assert(URL("file://./test").path.toString() == "/test");
1043 }
1044 
1045 unittest { // issue #1318
1046 	try {
1047 		URL("http://something/inval%id");
1048 		assert(false, "Expected to throw an exception.");
1049 	} catch (Exception e) {}
1050 }
1051 
1052 unittest {
1053 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix");
1054 	assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix");
1055 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock");
1056 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == "");
1057 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json");
1058 	auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json");
1059 	assert(URL(url.toString()) == url);
1060 }
1061 
1062 unittest {
1063 	import vibe.data.serialization;
1064 	static assert(isStringSerializable!URL);
1065 }
1066 
1067 unittest { // issue #1732
1068 	auto url = URL("tcp://0.0.0.0:1234");
1069 	url.port = 4321;
1070 	assert(url.toString == "tcp://0.0.0.0:4321", url.toString);
1071 }
1072 
1073 unittest { // host name role in file:// URLs
1074 	auto url = URL.parse("file:///foo/bar");
1075 	assert(url.host == "");
1076 	assert(url.path == InetPath("/foo/bar"));
1077 	assert(url.toString() == "file:///foo/bar");
1078 
1079 	url = URL.parse("file://foo/bar/baz");
1080 	assert(url.host == "foo");
1081 	assert(url.path == InetPath("/bar/baz"));
1082 	assert(url.toString() == "file://foo/bar/baz");
1083 }
1084 
1085 unittest { // native path <-> URL conversion
1086 	import std.exception : assertThrown;
1087 
1088 	auto url = URL(NativePath("/foo/bar"));
1089 	assert(url.schema == "file");
1090 	assert(url.host == "");
1091 	assert(url.path == InetPath("/foo/bar"));
1092 	assert(url.toNativePath == NativePath("/foo/bar"));
1093 
1094 	assertThrown(URL("http://example.org/").toNativePath);
1095 	assertThrown(URL(NativePath("foo/bar")));
1096 }
1097 
1098 unittest { // URL Normalization
1099 	auto url = URL.parse("http://example.com/foo%2a");
1100 	assert(url.normalized.toString() == "http://example.com/foo%2A");
1101 
1102 	url = URL.parse("HTTP://User@Example.COM/Foo");
1103 	assert(url.normalized.toString() == "http://User@example.com/Foo");
1104 	
1105 	url = URL.parse("http://example.com/%7Efoo");
1106 	assert(url.normalized.toString() == "http://example.com/~foo");
1107 	
1108 	url = URL.parse("http://example.com/foo/./bar/baz/../qux");
1109 	assert(url.normalized.toString() == "http://example.com/foo/bar/qux");
1110 	
1111 	url = URL.parse("http://example.com");
1112 	assert(url.normalized.toString() == "http://example.com/");
1113 	
1114 	url = URL.parse("http://example.com:80/");
1115 	assert(url.normalized.toString() == "http://example.com/");
1116 
1117 	url = URL.parse("hTTPs://examPLe.COM:443/my/path");
1118 	assert(url.normalized.toString() == "https://example.com/my/path");
1119 
1120 	url = URL.parse("http://example.com/foo");
1121 	url.normalize(true);
1122 	assert(url.toString() == "http://example.com/foo/");
1123 }
1124 
1125 version (Windows) unittest { // Windows drive letter paths
1126 	auto url = URL(WindowsPath(`C:\foo`));
1127 	assert(url.schema == "file");
1128 	assert(url.host == "");
1129 	assert(url.path == InetPath("/C:/foo"));
1130 	auto p = url.toNativePath;
1131 	p.normalize();
1132 	assert(p == WindowsPath(`C:\foo`));
1133 }
1134 
1135 version (Windows) unittest { // UNC paths
1136 	auto url = URL(WindowsPath(`\\server\share\path`));
1137 	assert(url.schema == "file");
1138 	assert(url.host == "server");
1139 	assert(url.path == InetPath("/share/path"));
1140 
1141 	auto p = url.toNativePath;
1142 	p.normalize(); // convert slash to backslash if necessary
1143 	assert(p == WindowsPath(`\\server\share\path`));
1144 }