1 /**
2 	URL parsing routines.
3 
4 	Copyright: © 2012-2017 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.inet.url;
9 
10 public import vibe.core.path;
11 
12 import vibe.textfilter.urlencode;
13 import vibe.utils.string;
14 
15 import std.array;
16 import std.algorithm;
17 import std.conv;
18 import std.exception;
19 import std.string;
20 import std.traits : isInstanceOf;
21 import std.ascii : isAlpha, isASCII, toLower;
22 import std.uri: encode;
23 
24 import core.checkedint : addu;
25 
26 
27 /**
28 	Represents a URL decomposed into its components.
29 */
30 struct URL {
31 @safe:
32 	private {
33 		string m_schema;
34 		InetPath m_path;
35 		string m_host;
36 		ushort m_port;
37 		string m_username;
38 		string m_password;
39 		string m_queryString;
40 		string m_anchor;
41 	}
42 
43 	/// Constructs a new URL object from its components.
44 	this(string schema, string host, ushort port, InetPath path) pure nothrow
45 	in {
46 		assert(isValidSchema(schema), "Invalid URL schema name: " ~ schema);
47 		assert(host.length == 0 || isValidHostName(host), "Invalid URL host name: " ~ host);
48 	}
49 	do {
50 		m_schema = schema;
51 		m_host = host;
52 		m_port = port;
53 		m_path = path;
54 	}
55 	/// ditto
56 	this(string schema, InetPath path) pure nothrow
57 	in { assert(isValidSchema(schema)); }
58 	do {
59 		this(schema, null, 0, path);
60 	}
61 	/// ditto
62 	this(string schema, string host, ushort port, PosixPath path) pure nothrow
63 	in {
64 		assert(isValidSchema(schema));
65 		assert(host.length == 0 || isValidHostName(host));
66 	}
67 	do {
68 		InetPath ip;
69 		try ip = cast(InetPath)path;
70 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
71 		this(schema, host, port, ip);
72 	}
73 	/// ditto
74 	this(string schema, PosixPath path) pure nothrow
75 	in { assert(isValidSchema(schema)); }
76 	do {
77 		this(schema, null, 0, path);
78 	}
79 	/// ditto
80 	this(string schema, string host, ushort port, WindowsPath path) pure nothrow
81 	in {
82 		assert(isValidSchema(schema));
83 		assert(host.length == 0 || isValidHostName(host));
84 	}
85 	do {
86 		InetPath ip;
87 		try ip = cast(InetPath)path;
88 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
89 		this(schema, host, port, ip);
90 	}
91 	/// ditto
92 	this(string schema, WindowsPath path) pure nothrow
93 	in { assert(isValidSchema(schema)); }
94 	do {
95 		this(schema, null, 0, path);
96 	}
97 
98 	/** Constructs a "file:" URL from a native file system path.
99 
100 		Note that the path must be absolute. On Windows, both, paths starting
101 		with a drive letter and UNC paths are supported.
102 	*/
103 	this(WindowsPath path) pure
104 	{
105 		import std.algorithm.iteration : map;
106 		import std.range : chain, only, repeat;
107 
108 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
109 
110 		// treat UNC paths properly
111 		if (path.startsWith(WindowsPath(`\\`))) {
112 			static if (is(InetPath.Segment2)) {
113 				auto segs = path.bySegment2;
114 			} else {
115 				auto segs = path.bySegment;
116 			}
117 			segs.popFront();
118 			segs.popFront();
119 			auto host = segs.front.name;
120 			segs.popFront();
121 
122 			InetPath ip;
123 			static if (is(InetPath.Segment2)) {
124 				ip = InetPath(only(InetPath.Segment2.fromTrustedString("", '/'))
125 					.chain(segs.map!(s => cast(InetPath.Segment2)s)));
126 			} else {
127 				ip = InetPath(only(InetPath.Segment("", '/'))
128 					.chain(segs.map!(s => cast(InetPath.Segment)s)));
129 			}
130 
131 			this("file", host, 0, ip);
132 		} else this("file", host, 0, cast(InetPath)path);
133 	}
134 	/// ditto
135 	this(PosixPath path) pure
136 	{
137 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
138 
139 		this("file", null, 0, cast(InetPath)path);
140 	}
141 
142 	/** Constructs a URL from its string representation.
143 
144 		TODO: additional validation required (e.g. valid host and user names and port)
145 	*/
146 	this(string url_string, bool encoded = true)
147 	{
148 		auto str = url_string;
149 		enforce(str.length > 0, "Empty URL.");
150 		if( str[0] != '/' ){
151 			auto idx = str.indexOf(':');
152 			enforce(idx > 0, "No schema in URL:"~str);
153 			m_schema = str[0 .. idx];
154 			enforce(m_schema[0].isAlpha,
155 					"Schema must start with an alphabetical char, found: " ~
156 					m_schema[0]);
157 			str = str[idx+1 .. $];
158 			bool requires_host = false;
159 
160 			if (isCommonInternetSchema(m_schema)) {
161 				// proto://server/path style
162 				enforce(str.startsWith("//"), "URL must start with proto://...");
163 				requires_host = true;
164 				str = str[2 .. $];
165 			}
166 
167 			auto si = str.indexOf('/');
168 			if( si < 0 ) si = str.length;
169 			auto ai = str[0 .. si].indexOf('@');
170 			sizediff_t hs = 0;
171 			if( ai >= 0 ){
172 				hs = ai+1;
173 				auto ci = str[0 .. ai].indexOf(':');
174 				if( ci >= 0 ){
175 					m_username = str[0 .. ci];
176 					m_password = str[ci+1 .. ai];
177 				} else m_username = str[0 .. ai];
178 				enforce(m_username.length > 0, "Empty user name in URL.");
179 			}
180 
181 			m_host = str[hs .. si];
182 
183 			auto findPort ( string src )
184 			{
185 				auto pi = src.indexOf(':');
186 				if(pi > 0) {
187 					enforce(pi < src.length-1, "Empty port in URL.");
188 					m_port = to!ushort(src[pi+1..$]);
189 				}
190 				return pi;
191 			}
192 
193 
194 			auto ip6 = m_host.indexOf('[');
195 			if (ip6 == 0) { // [ must be first char
196 				auto pe = m_host.indexOf(']');
197 				if (pe > 0) {
198 					findPort(m_host[pe..$]);
199 					m_host = m_host[1 .. pe];
200 				}
201 			}
202 			else {
203 				auto pi = findPort(m_host);
204 				if(pi > 0) {
205 					m_host = m_host[0 .. pi];
206 				}
207 				if (!encoded)
208 					m_host = m_host.splitter('.').map!(punyEncode).join('.');
209 			}
210 
211 			enforce(!requires_host || m_schema == "file" || m_host.length > 0,
212 					"Empty server name in URL.");
213 			str = str[si .. $];
214 		}
215 
216 		this.localURI = (encoded) ? str : str.encode;
217 	}
218 	/// ditto
219 	static URL parse(string url_string)
220 	{
221 		return URL(url_string);
222 	}
223 	
224 	/**
225 	* Parse a 'plain' string into an `URL`.
226 	*
227 	* Unlike `URL.parse`, this expects its argument to be a 
228 	* plain url (not percent-encoded nor punyencoded), and thus can contain
229 	* non-ASCII characters as well as reserved ones (e.g. a space).
230 	*
231 	* Params:
232 	*   url_string = A plaintext URL, which will be percent-encoded in the result.
233 	*/
234 	static URL parsePlain(string url_string)
235 	{
236 		return URL(url_string, false);
237 	}
238 	/// ditto
239 	static URL fromString(string url_string)
240 	{
241 		return URL(url_string);
242 	}
243 
244 	/// The schema/protocol part of the URL
245 	@property string schema() const nothrow { return m_schema; }
246 	/// ditto
247 	@property void schema(string v) { m_schema = v; }
248 
249 	/// The url encoded path part of the URL
250 	@property string pathString() const nothrow { return m_path.toString; }
251 
252 	/// Set the path part of the URL. It should be properly encoded.
253 	@property void pathString(string s)
254 	{
255 		enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'");
256 		m_path = InetPath(s);
257 	}
258 
259 	/// The path part of the URL
260 	@property InetPath path() const nothrow { return m_path; }
261 	/// ditto
262 	@property void path(InetPath p)
263 	nothrow {
264 		m_path = p;
265 	}
266 	/// ditto
267 	@property void path(Path)(Path p)
268 		if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath))
269 	{
270 		m_path = cast(InetPath)p;
271 	}
272 
273 	/// The host part of the URL (depends on the schema)
274 	@property string host() const pure nothrow { return m_host; }
275 	/// ditto
276 	@property void host(string v) { m_host = v; }
277 
278 	/// The port part of the URL (optional)
279 	@property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); }
280 	/// ditto
281 	@property port(ushort v) nothrow { m_port = v; }
282 
283 	/// Get the default port for the given schema or 0
284 	static ushort defaultPort(string schema)
285 	nothrow {
286 		import core.atomic : atomicLoad;
287 		import std.uni : toLower;
288 
289 		string lowerschema;
290 
291 		try
292 			lowerschema = schema.toLower();
293 		catch (Exception e)
294 			assert(false, e.msg);
295 		
296 		if (auto set = atomicLoad(map_commonInternetSchemas))
297 			if (set.contains(lowerschema))
298 				return set.get(lowerschema);
299 
300 		return 0;
301 	}
302 	/// ditto
303 	ushort defaultPort()
304 	const nothrow {
305 		return defaultPort(m_schema);
306 	}
307 
308 	/// The user name part of the URL (optional)
309 	@property string username() const nothrow { return m_username; }
310 	/// ditto
311 	@property void username(string v) { m_username = v; }
312 
313 	/// The password part of the URL (optional)
314 	@property string password() const nothrow { return m_password; }
315 	/// ditto
316 	@property void password(string v) { m_password = v; }
317 
318 	/// The query string part of the URL (optional)
319 	@property string queryString() const nothrow { return m_queryString; }
320 	/// ditto
321 	@property void queryString(string v) { m_queryString = v; }
322 
323 	/// The anchor part of the URL (optional)
324 	@property string anchor() const nothrow { return m_anchor; }
325 
326 	/// The path part plus query string and anchor
327 	@property string localURI()
328 	const nothrow {
329 		auto str = appender!string();
330 		str.put(m_path.toString);
331 		if( queryString.length ) {
332 			str.put("?");
333 			str.put(queryString);
334 		}
335 		if( anchor.length ) {
336 			str.put("#");
337 			str.put(anchor);
338 		}
339 		return str.data;
340 	}
341 	/// ditto
342 	@property void localURI(string str)
343 	{
344 		auto ai = str.indexOf('#');
345 		if( ai >= 0 ){
346 			m_anchor = str[ai+1 .. $];
347 			str = str[0 .. ai];
348 		} else m_anchor = null;
349 
350 		auto qi = str.indexOf('?');
351 		if( qi >= 0 ){
352 			m_queryString = str[qi+1 .. $];
353 			str = str[0 .. qi];
354 		} else m_queryString = null;
355 
356 		this.pathString = str;
357 	}
358 
359 	/// The URL to the parent path with query string and anchor stripped.
360 	@property URL parentURL()
361 	const {
362 		URL ret;
363 		ret.schema = schema;
364 		ret.host = host;
365 		ret.port = port;
366 		ret.username = username;
367 		ret.password = password;
368 		ret.path = path.parentPath;
369 		return ret;
370 	}
371 
372 	/// Converts this URL object to its string representation.
373 	string toString()
374 	const nothrow {
375 		auto dst = appender!string();
376 		try this.toString(dst);
377 		catch (Exception e) assert(false, e.msg);
378 		return dst.data;
379 	}
380 
381 	/// Ditto
382 	void toString(OutputRange) (ref OutputRange dst) const {
383 		import std.format;
384 		dst.put(schema);
385 		dst.put(":");
386 		if (isCommonInternetSchema(schema))
387 			dst.put("//");
388 		if (m_username.length || m_password.length) {
389 			dst.put(username);
390 			if (m_password.length)
391 			{
392 				dst.put(':');
393 				dst.put(password);
394 			}
395 			dst.put('@');
396 		}
397 
398 		import std.algorithm : canFind;
399 		auto ipv6 = host.canFind(":");
400 
401 		if ( ipv6 ) dst.put('[');
402 		dst.put(host);
403 		if ( ipv6 ) dst.put(']');
404 
405 		if (m_port > 0)
406 			formattedWrite(dst, ":%d", m_port);
407 
408 		dst.put(localURI);
409 	}
410 
411 	/** Converts a "file" URL back to a native file system path.
412 	*/
413 	NativePath toNativePath()
414 	const {
415 		import std.algorithm.iteration : map;
416 		import std.range : dropOne;
417 
418 		enforce(this.schema == "file", "Only file:// URLs can be converted to a native path.");
419 
420 		version (Windows) {
421 			if (this.host.length) {
422 				static if (is(NativePath.Segment2)) {
423 					auto p = NativePath(this.path
424 							.bySegment2
425 							.dropOne
426 							.map!(s => cast(WindowsPath.Segment2)s)
427 						);
428 				} else {
429 					auto p = NativePath(this.path
430 							.bySegment
431 							.dropOne
432 							.map!(s => cast(WindowsPath.Segment)s)
433 						);
434 				}
435 				return NativePath.fromTrustedString(`\\`~this.host) ~ p;
436 			}
437 		}
438 
439 		return cast(NativePath)this.path;
440 	}
441 
442 	/// Decode percent encoded triplets for unreserved or convert to uppercase
443 	private string normalize_percent_encoding(scope const(char)[] input)
444 	{
445 		auto normalized = appender!string;
446 		normalized.reserve(input.length);
447 
448 		for (size_t i = 0; i < input.length; i++)
449 		{
450 			const char c = input[i];
451 			if (c == '%')
452 			{
453 				if (input.length < i + 3)
454 					assert(false, "Invalid percent encoding");
455 				
456 				char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16);
457 				switch (conv)
458 				{
459 					case 'A': .. case 'Z':
460 					case 'a': .. case 'z':
461 					case '0': .. case '9':
462 					case '-': case '.': case '_': case '~':
463 						normalized ~= conv; // Decode unreserved
464 						break;
465 					default:
466 						normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX
467 						break;
468 				}
469 
470 				i += 2;
471 			}
472 			else
473 				normalized ~= c;
474 		}
475 
476 		return normalized.data;
477 	}
478 
479 	/**
480 	  * Normalize the content of this `URL` in place
481 	  *
482 	  * Normalization can be used to create a more consistent and human-friendly
483 	  * string representation of the `URL`.
484 	  * The list of transformations applied in the process of normalization is as follows:
485 			- Converting schema and host to lowercase
486 			- Removing port if it is the default port for schema
487 			- Removing dot segments in path
488 			- Converting percent-encoded triplets to uppercase
489 			- Adding slash when path is empty
490 			- Adding slash to path when path represents a directory
491 			- Decoding percent encoded triplets for unreserved characters
492 				A-Z a-z 0-9 - . _ ~ 
493 
494 		Params:
495 			isDirectory = Path of the URL represents a directory, if one is 
496 			not already present, a trailing slash will be appended when `true`
497 	*/
498 	void normalize(bool isDirectory = false)
499 	{
500 		import std.uni : toLower;
501 		
502 		// Lowercase host and schema
503 		this.m_schema = this.m_schema.toLower();
504 		this.m_host = this.m_host.toLower();
505 
506 		// Remove default port
507 		if (this.m_port == URL.defaultPort(this.m_schema))
508 			this.m_port = 0;
509 
510 		// Normalize percent encoding, decode unreserved or uppercase hex
511 		this.m_queryString = normalize_percent_encoding(this.m_queryString);
512 		this.m_anchor = normalize_percent_encoding(this.m_anchor);
513 
514 		// Normalize path (first remove dot segments then normalize path segments)
515 		this.m_path = InetPath(this.m_path.normalized.bySegment2.map!(
516 				n => InetPath.Segment2.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName))
517 			).array);
518 
519 		// Add trailing slash to empty path
520 		if (this.m_path.empty || isDirectory)
521 			this.m_path.endsWithSlash = true;		
522 	}
523 
524 	/** Returns the normalized form of the URL.
525 
526 		See `normalize` for a full description.
527 	*/
528 	URL normalized()
529 	const {
530 		URL ret = this;
531 		ret.normalize();
532 		return ret;
533 	}
534 
535 	bool startsWith(const URL rhs)
536 	const nothrow {
537 		if( m_schema != rhs.m_schema ) return false;
538 		if( m_host != rhs.m_host ) return false;
539 		// FIXME: also consider user, port, querystring, anchor etc
540 		static if (is(InetPath.Segment2))
541 			return this.path.bySegment2.startsWith(rhs.path.bySegment2);
542 		else return this.path.bySegment.startsWith(rhs.path.bySegment);
543 	}
544 
545 	URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
546 	URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
547 	void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
548 	void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
549 	static if (is(InetPath.Segment2)) {
550 		URL opBinary(string OP, Path)(Path.Segment2 rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
551 		void opOpAssign(string OP, Path)(Path.Segment2 rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
552 	}
553 
554 	/// Tests two URLs for equality using '=='.
555 	bool opEquals(ref const URL rhs)
556 	const nothrow {
557 		if (m_schema != rhs.m_schema) return false;
558 		if (m_host != rhs.m_host) return false;
559 		if (m_path != rhs.m_path) return false;
560 		if (m_port != rhs.m_port) return false;
561 		return true;
562 	}
563 	/// ditto
564 	bool opEquals(const URL other) const nothrow { return opEquals(other); }
565 
566 	int opCmp(ref const URL rhs) const nothrow {
567 		if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema);
568 		if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host);
569 		if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString);
570 		return true;
571 	}
572 }
573 
574 bool isValidSchema(string schema)
575 @safe pure nothrow {
576 	if (schema.length < 1) return false;
577 
578 	foreach (char ch; schema) {
579 		switch (ch) {
580 			default: return false;
581 			case 'a': .. case 'z': break;
582 			case 'A': .. case 'Z': break;
583 			case '0': .. case '9': break;
584 			case '+', '.', '-': break;
585 		}
586 	}
587 
588 	return true;
589 }
590 
591 unittest {
592 	assert(isValidSchema("http+ssh"));
593 	assert(isValidSchema("http"));
594 	assert(!isValidSchema("http/ssh"));
595 	assert(isValidSchema("HTtp"));
596 }
597 
598 
599 bool isValidHostName(string name)
600 @safe pure nothrow {
601 	import std.algorithm.iteration : splitter;
602 	import std.string : representation;
603 
604 	// According to RFC 1034
605 	if (name.length < 1) return false;
606 	if (name.length > 255) return false;
607 	foreach (seg; name.representation.splitter('.')) {
608 		if (seg.length < 1) return false;
609 		if (seg.length > 63) return false;
610 		if (seg[0] == '-') return false;
611 
612 		foreach (char ch; seg) {
613 			switch (ch) {
614 				default: return false;
615 				case 'a': .. case 'z': break;
616 				case 'A': .. case 'Z': break;
617 				case '0': .. case '9': break;
618 				case '-': break;
619 			}
620 		}
621 	}
622 	return true;
623 }
624 
625 unittest {
626 	assert(isValidHostName("foo"));
627 	assert(isValidHostName("foo-"));
628 	assert(isValidHostName("foo.bar"));
629 	assert(isValidHostName("foo.bar-baz"));
630 	assert(isValidHostName("foo1"));
631 	assert(!isValidHostName("-foo"));
632 }
633 
634 
635 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath);
636 
637 private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas;
638 
639 shared static this() {
640 	auto initial_schemas = new SchemaDefaultPortMap;
641 	initial_schemas.add("file", 0);
642 	initial_schemas.add("tcp", 0);
643 	initial_schemas.add("ftp", 21);
644 	initial_schemas.add("sftp", 22);
645 	initial_schemas.add("http", 80);
646 	initial_schemas.add("https", 443);
647 	initial_schemas.add("http+unix", 80);
648 	initial_schemas.add("https+unix", 443);
649 	initial_schemas.add("spdy", 443);
650 	initial_schemas.add("ws", 80);
651 	initial_schemas.add("wss", 443);
652 	initial_schemas.add("redis", 6379);
653 	initial_schemas.add("rtsp", 554);
654 	initial_schemas.add("rtsps", 322);
655 
656 	map_commonInternetSchemas = cast(immutable)initial_schemas;
657 }
658 
659 deprecated("Use the overload that accepts a `ushort port` as second argument")
660 void registerCommonInternetSchema(string schema)
661 {
662     registerCommonInternetSchema(schema, 0);
663 }
664 
665 /** Adds the name of a schema to be treated as double-slash style.
666 
667 	Params:
668 		schema = Name of the schema
669 		port = Default port for the schema
670 
671 	See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1
672 */
673 void registerCommonInternetSchema(string schema, ushort port)
674 @trusted nothrow {
675 	import core.atomic : atomicLoad, cas;
676 	import std.uni : toLower;
677 
678 	string lowerschema;
679 	try {
680 		lowerschema = schema.toLower();
681 	} catch (Exception e) {
682 		assert(false, e.msg);
683 	}
684 
685 	assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported");
686 
687 	while (true) {
688 		auto olds = atomicLoad(map_commonInternetSchemas);
689 		auto news = olds ? olds.dup : new SchemaDefaultPortMap;
690 		news.add(lowerschema, port);
691 		static if (__VERSION__ < 2094) {
692 			// work around bogus shared violation error on earlier versions of Druntime
693 			if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news))
694 				break;
695 		} else {
696 			if (cas(&map_commonInternetSchemas, olds, cast(immutable)news))
697 				break;
698 		}
699 	}
700 }
701 
702 
703 /** Determines whether an URL schema is double-slash based.
704 
705 	Double slash based schemas are of the form `schema://[host]/<path>`
706 	and are parsed differently compared to generic schemas, which are simply
707 	parsed as `schema:<path>`.
708 
709 	Built-in recognized double-slash schemas: ftp, http, https,
710 	http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp,
711 	rtsp, rtsps
712 
713 	See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1
714 */
715 bool isCommonInternetSchema(string schema)
716 @safe nothrow @nogc {
717 	import core.atomic : atomicLoad;
718 	char[128] buffer;
719 
720 	if (schema.length >= 128) return false;
721 
722 	foreach (ix, char c; schema)
723 	{
724 		if (!isASCII(c)) return false;
725 		buffer[ix] = toLower(c);
726 	}
727 
728 	scope lowerschema = buffer[0 .. schema.length];
729 
730 	return () @trusted {
731 		auto set = atomicLoad(map_commonInternetSchemas);
732 		return set ? set.contains(cast(string) lowerschema) : false;
733 	} ();
734 }
735 
736 unittest {
737 	assert(isCommonInternetSchema("http"));
738 	assert(isCommonInternetSchema("HTtP"));
739 	assert(URL.defaultPort("http") == 80);
740 	assert(!isCommonInternetSchema("foobar"));
741 	registerCommonInternetSchema("fooBar", 2522);
742 	assert(isCommonInternetSchema("foobar"));
743 	assert(isCommonInternetSchema("fOObAR"));
744 	assert(URL.defaultPort("foobar") == 2522);
745 	assert(URL.defaultPort("fOObar") == 2522);
746 
747 	assert(URL.defaultPort("unregistered") == 0);
748 }
749 
750 
751 private struct SchemaDefaultPortMap {
752 	ushort[string] m_data;
753 
754 	void add(string str, ushort port) @safe nothrow { m_data[str] = port; }
755 	bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); }
756 	ushort get(string str) const @safe nothrow { return m_data[str]; }
757 	SchemaDefaultPortMap* dup() const @safe nothrow {
758 		auto ret = new SchemaDefaultPortMap;
759 		foreach (s; m_data.byKeyValue) ret.add(s.key, s.value);
760 		return ret;
761 	}
762 }
763 
764 // Puny encoding
765 private {
766 	/** Bootstring parameters for Punycode
767 		These parameters are designed for Unicode
768 
769 		See also: RFC 3492 Section 5
770 	*/
771 	enum uint base = 36;
772 	enum uint tmin = 1;
773 	enum uint tmax = 26;
774 	enum uint skew = 38;
775 	enum uint damp = 700;
776 	enum uint initial_bias = 72;
777 	enum uint initial_n = 128;
778 
779 	/*	Bias adaptation
780 
781 		See also: RFC 3492 Section 6.1
782 	*/
783 	uint punyAdapt (uint pdelta, int numpoints, bool firsttime)
784 	@safe @nogc nothrow pure {
785 		uint delta = firsttime ? pdelta / damp : pdelta / 2;
786 		delta += delta / numpoints;
787 		uint k = 0;
788 
789 		while (delta > ((base - tmin) * tmax) / 2)
790 		{
791 			delta /= (base - tmin);
792 			k += base;
793 		}
794 
795 		return k + (((base - tmin + 1) * delta) / (delta + skew));
796 	}
797 
798 	/*	Converts puny digit-codes to code point
799 
800 		See also: RFC 3492 Section 5
801 	*/
802 	dchar punyDigitToCP (uint digit)
803 	@safe @nogc nothrow pure {
804 		return cast(dchar) (digit + 22 + 75 * (digit < 26));
805 	}
806 
807 	/*	Encodes `input` with puny encoding
808 		
809 		If input is all characters below `initial_n`
810 		input is returned as is.
811 
812 		See also: RFC 3492 Section 6.3
813 	*/
814 	string punyEncode (in string input)
815 	@safe {
816 		uint n = initial_n;
817 		uint delta = 0;
818 		uint bias = initial_bias;
819 		uint h;
820 		uint b;
821 		dchar m = dchar.max; // minchar
822 		bool delta_overflow;
823 		
824 		uint input_len = 0;
825 		auto output = appender!string();
826 		
827 		output.put("xn--");
828 
829 		foreach (dchar cp; input)
830 		{
831 			if (cp <= initial_n)
832 			{
833 				output.put(cast(char) cp);
834 				h += 1;
835 			}
836 			// Count length of input as code points, `input.length` counts bytes
837 			input_len += 1;
838 		}
839 
840 		b = h;
841 		if (b == input_len)
842 			return input; // No need to puny encode
843 
844 		if (b > 0)
845 			output.put('-');
846 
847 		while (h < input_len)
848 		{
849 			m = dchar.max;
850 			foreach (dchar cp; input)
851 			{
852 				if (n <= cp && cp < m)
853 					m = cp;
854 			}
855 
856 			assert(m != dchar.max, "Punyencoding failed, cannot find code point");
857 
858 			delta = addu(delta, ((m - n) * (h + 1)), delta_overflow);
859 			assert(!delta_overflow, "Punyencoding failed, delta overflow");
860 
861 			n = m;
862 
863 			foreach (dchar cp; input)
864 			{
865 				if (cp < n)
866 					delta += 1;
867 
868 				if (cp == n)
869 				{
870 					uint q = delta;
871 					uint k = base;
872 
873 					while (true)
874 					{
875 						uint t;
876 						if (k <= bias /* + tmin */)
877 							t = tmin;
878 						else if (k >=  bias + tmax)
879 							t = tmax;
880 						else
881 							t = k - bias;
882 
883 						if (q < t) break;
884 
885 						output.put(punyDigitToCP(t + ((q - t) % (base - t))));
886 						q = (q - t) / (base - t);
887 						k += base;
888 					}
889 					output.put(punyDigitToCP(q));
890 					bias = punyAdapt(delta, h + 1, h == b);
891 					delta = 0;
892 					h += 1;
893 				}
894 			}
895 			delta += 1;
896 			n += 1;
897 		}
898 
899 		return output.data;
900 	}
901 }
902 
903 unittest { // IPv6
904 	auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc";
905 	auto url = URL.parse(urlstr);
906 	assert(url.schema == "http", url.schema);
907 	assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host);
908 	assert(url.port == 8091);
909 	assert(url.path == InetPath("/abc"), url.path.toString());
910 	assert(url.toString == urlstr);
911 
912 	url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc";
913 	urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc";
914 	assert(url.toString == urlstr);
915 }
916 
917 
918 unittest {
919 	auto urlstr = "https://www.example.net/index.html";
920 	auto url = URL.parse(urlstr);
921 	assert(url.schema == "https", url.schema);
922 	assert(url.host == "www.example.net", url.host);
923 	assert(url.path == InetPath("/index.html"), url.path.toString());
924 	assert(url.port == 443);
925 	assert(url.toString == urlstr);
926 
927 	urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor";
928 	url = URL.parse(urlstr);
929 	assert(url.schema == "http", url.schema);
930 	assert(url.username == "jo.doe", url.username);
931 	assert(url.password == "password", url.password);
932 	assert(url.port == 4711, to!string(url.port));
933 	assert(url.host == "sub.www.example.net", url.host);
934 	assert(url.path.toString() == "/sub2/index.html", url.path.toString());
935 	assert(url.queryString == "query", url.queryString);
936 	assert(url.anchor == "anchor", url.anchor);
937 	assert(url.toString == urlstr);
938 }
939 
940 unittest { // issue #1044
941 	URL url = URL.parse("http://example.com/p?query#anchor");
942 	assert(url.schema == "http");
943 	assert(url.host == "example.com");
944 	assert(url.port == 80);
945 	assert(url.queryString == "query");
946 	assert(url.anchor == "anchor");
947 	assert(url.pathString == "/p");
948 	url.localURI = "/q";
949 	assert(url.schema == "http");
950 	assert(url.host == "example.com");
951 	assert(url.queryString == "");
952 	assert(url.anchor == "");
953 	assert(url.pathString == "/q");
954 	url.localURI = "/q?query";
955 	assert(url.schema == "http");
956 	assert(url.host == "example.com");
957 	assert(url.queryString == "query");
958 	assert(url.anchor == "");
959 	assert(url.pathString == "/q");
960 	url.localURI = "/q#anchor";
961 	assert(url.schema == "http");
962 	assert(url.host == "example.com");
963 	assert(url.queryString == "");
964 	assert(url.anchor == "anchor");
965 	assert(url.pathString == "/q");
966 }
967 
968 //websocket unittest
969 unittest {
970 	URL url = URL("ws://127.0.0.1:8080/echo");
971 	assert(url.host == "127.0.0.1");
972 	assert(url.port == 8080);
973 	assert(url.localURI == "/echo");
974 }
975 
976 //rtsp unittest
977 unittest {
978 	URL url = URL("rtsp://127.0.0.1:554/echo");
979 	assert(url.host == "127.0.0.1");
980 	assert(url.port == 554);
981 	assert(url.localURI == "/echo");
982 }
983 
984 unittest {
985 	auto p = PosixPath("/foo bar/boo oom/");
986 	URL url = URL("http", "example.com", 0, p); // constructor test
987 	assert(url.path == cast(InetPath)p);
988 	url.path = p;
989 	assert(url.path == cast(InetPath)p);					   // path assignement test
990 	assert(url.pathString == "/foo%20bar/boo%20oom/");
991 	assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/");
992 	url.pathString = "/foo%20bar/boo%2foom/";
993 	assert(url.pathString == "/foo%20bar/boo%2foom/");
994 	assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/");
995 }
996 
997 unittest {
998 	URL url = URL("http://user:password@example.com");
999 	assert(url.toString() == "http://user:password@example.com");
1000 
1001 	url = URL("http://user@example.com");
1002 	assert(url.toString() == "http://user@example.com");
1003 }
1004 
1005 unittest {
1006 	auto url = URL("http://example.com/some%2bpath");
1007 	assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString());
1008 }
1009 
1010 unittest {
1011 	auto url = URL("http://example.com/hello-🌍", false);
1012 	assert(url.pathString == "/hello-%F0%9F%8C%8D");
1013 	url = URL.parsePlain("http://example.com/안녕하세요-세계");
1014 	assert(url.pathString == "/%EC%95%88%EB%85%95%ED%95%98%EC%84%B8%EC%9A%94-%EC%84%B8%EA%B3%84");
1015 	url = URL.parsePlain("http://hello-🌍.com/");
1016 	assert(url.host == "xn--hello--8k34e.com");
1017 	url = URL.parsePlain("http://hello-🌍.com:8080/");
1018 	assert(url.host == "xn--hello--8k34e.com");
1019 	url = URL.parsePlain("http://i-❤-이모티콘.io");
1020 	assert(url.host == "xn--i---5r6aq903fubqabumj4g.io");
1021 	url = URL.parsePlain("https://hello🌍.i-❤-이모티콘.com");
1022 	assert(url.host == "xn--hello-oe93d.xn--i---5r6aq903fubqabumj4g.com");
1023 }
1024 
1025 unittest {
1026 	assert(URL("file:///test").pathString == "/test");
1027 	assert(URL("file:///test").port == 0);
1028 	assert(URL("file:///test").path.toString() == "/test");
1029 	assert(URL("file://test").host == "test");
1030 	assert(URL("file://test").pathString() == "");
1031 	assert(URL("file://./test").host == ".");
1032 	assert(URL("file://./test").pathString == "/test");
1033 	assert(URL("file://./test").path.toString() == "/test");
1034 }
1035 
1036 unittest { // issue #1318
1037 	try {
1038 		URL("http://something/inval%id");
1039 		assert(false, "Expected to throw an exception.");
1040 	} catch (Exception e) {}
1041 }
1042 
1043 unittest {
1044 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix");
1045 	assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix");
1046 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock");
1047 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == "");
1048 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json");
1049 	auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json");
1050 	assert(URL(url.toString()) == url);
1051 }
1052 
1053 unittest {
1054 	import vibe.data.serialization;
1055 	static assert(isStringSerializable!URL);
1056 }
1057 
1058 unittest { // issue #1732
1059 	auto url = URL("tcp://0.0.0.0:1234");
1060 	url.port = 4321;
1061 	assert(url.toString == "tcp://0.0.0.0:4321", url.toString);
1062 }
1063 
1064 unittest { // host name role in file:// URLs
1065 	auto url = URL.parse("file:///foo/bar");
1066 	assert(url.host == "");
1067 	assert(url.path == InetPath("/foo/bar"));
1068 	assert(url.toString() == "file:///foo/bar");
1069 
1070 	url = URL.parse("file://foo/bar/baz");
1071 	assert(url.host == "foo");
1072 	assert(url.path == InetPath("/bar/baz"));
1073 	assert(url.toString() == "file://foo/bar/baz");
1074 }
1075 
1076 unittest { // native path <-> URL conversion
1077 	import std.exception : assertThrown;
1078 
1079 	auto url = URL(NativePath("/foo/bar"));
1080 	assert(url.schema == "file");
1081 	assert(url.host == "");
1082 	assert(url.path == InetPath("/foo/bar"));
1083 	assert(url.toNativePath == NativePath("/foo/bar"));
1084 
1085 	assertThrown(URL("http://example.org/").toNativePath);
1086 	assertThrown(URL(NativePath("foo/bar")));
1087 }
1088 
1089 unittest { // URL Normalization
1090 	auto url = URL.parse("http://example.com/foo%2a");
1091 	assert(url.normalized.toString() == "http://example.com/foo%2A");
1092 
1093 	url = URL.parse("HTTP://User@Example.COM/Foo");
1094 	assert(url.normalized.toString() == "http://User@example.com/Foo");
1095 	
1096 	url = URL.parse("http://example.com/%7Efoo");
1097 	assert(url.normalized.toString() == "http://example.com/~foo");
1098 	
1099 	url = URL.parse("http://example.com/foo/./bar/baz/../qux");
1100 	assert(url.normalized.toString() == "http://example.com/foo/bar/qux");
1101 	
1102 	url = URL.parse("http://example.com");
1103 	assert(url.normalized.toString() == "http://example.com/");
1104 	
1105 	url = URL.parse("http://example.com:80/");
1106 	assert(url.normalized.toString() == "http://example.com/");
1107 
1108 	url = URL.parse("hTTPs://examPLe.COM:443/my/path");
1109 	assert(url.normalized.toString() == "https://example.com/my/path");
1110 
1111 	url = URL.parse("http://example.com/foo");
1112 	url.normalize(true);
1113 	assert(url.toString() == "http://example.com/foo/");
1114 }
1115 
1116 version (Windows) unittest { // Windows drive letter paths
1117 	auto url = URL(WindowsPath(`C:\foo`));
1118 	assert(url.schema == "file");
1119 	assert(url.host == "");
1120 	assert(url.path == InetPath("/C:/foo"));
1121 	auto p = url.toNativePath;
1122 	p.normalize();
1123 	assert(p == WindowsPath(`C:\foo`));
1124 }
1125 
1126 version (Windows) unittest { // UNC paths
1127 	auto url = URL(WindowsPath(`\\server\share\path`));
1128 	assert(url.schema == "file");
1129 	assert(url.host == "server");
1130 	assert(url.path == InetPath("/share/path"));
1131 
1132 	auto p = url.toNativePath;
1133 	p.normalize(); // convert slash to backslash if necessary
1134 	assert(p == WindowsPath(`\\server\share\path`));
1135 }