1 /**
2 	URL parsing routines.
3 
4 	Copyright: © 2012-2017 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.inet.url;
9 
10 public import vibe.core.path;
11 
12 import vibe.textfilter.urlencode;
13 import vibe.utils.string;
14 
15 import std.array;
16 import std.algorithm;
17 import std.conv;
18 import std.exception;
19 import std.string;
20 import std.traits : isInstanceOf;
21 import std.ascii : isAlpha, isASCII, toLower;
22 import std.uri: decode, encode;
23 
24 import core.checkedint : addu;
25 
26 
27 /** Parses a user-provided URL with relaxed rules.
28 
29 	Unlike `URL.parse`, this allows the URL to use special characters as part of
30 	the host name and path, automatically employing puny code or percent-encoding
31 	to convert this to a valid URL.
32 
33 	Params:
34 		url = String representation of the URL
35 		default_schema = If `url` does not contain a schema name, the URL parser
36 			may choose to use this schema instead. A browser might use "http" or
37 			"https", for example.
38 */
39 URL parseUserURL(string url, string default_schema)
40 {
41 	if (default_schema.length && !url.startsWith("/") && !url.canFind("://"))
42 		url = default_schema ~ "://" ~ url;
43 
44 	return URL(url, false).normalized;
45 }
46 
47 unittest {
48 	// special characters in path
49 	auto url = parseUserURL("http://example.com/hello-🌍", "foo");
50 	assert(url.pathString == "/hello-%F0%9F%8C%8D");
51 	url = parseUserURL("http://example.com/안녕하세요-세계", "foo");
52 	assert(url.pathString == "/%EC%95%88%EB%85%95%ED%95%98%EC%84%B8%EC%9A%94-%EC%84%B8%EA%B3%84");
53 	// special characters in host name
54 	url = parseUserURL("http://hello-🌍.com/", "foo");
55 	assert(url.host == "xn--hello--8k34e.com");
56 	url = parseUserURL("http://hello-🌍.com:8080/", "foo");
57 	assert(url.host == "xn--hello--8k34e.com");
58 	url = parseUserURL("http://i-❤-이모티콘.io", "foo");
59 	assert(url.host == "xn--i---5r6aq903fubqabumj4g.io");
60 	url = parseUserURL("https://hello🌍.i-❤-이모티콘.com", "foo");
61 	assert(url.host == "xn--hello-oe93d.xn--i---5r6aq903fubqabumj4g.com");
62 	// default schema addition
63 	assert(parseUserURL("example.com/foo/bar", "sftp") == URL("sftp://example.com/foo/bar"));
64 	assert(parseUserURL("example.com:1234", "https") == URL("https://example.com:1234/"));
65 }
66 
67 
68 /**
69 	Represents a URL decomposed into its components.
70 */
71 struct URL {
72 @safe:
73 	private {
74 		string m_schema;
75 		InetPath m_path;
76 		string m_host;
77 		ushort m_port;
78 		string m_username;
79 		string m_password;
80 		string m_queryString;
81 		string m_anchor;
82 	}
83 
84 	/// Constructs a new URL object from its components.
85 	this(string schema, string host, ushort port, InetPath path) pure nothrow
86 	in {
87 		assert(isValidSchema(schema), "Invalid URL schema name: " ~ schema);
88 		assert(host.length == 0 || isValidHostName(host), "Invalid URL host name: " ~ host);
89 	}
90 	do {
91 		m_schema = schema;
92 		m_host = host;
93 		m_port = port;
94 		m_path = path;
95 	}
96 	/// ditto
97 	this(string schema, InetPath path) pure nothrow
98 	in { assert(isValidSchema(schema)); }
99 	do {
100 		this(schema, null, 0, path);
101 	}
102 	/// ditto
103 	this(string schema, string host, ushort port, PosixPath path) pure nothrow
104 	in {
105 		assert(isValidSchema(schema));
106 		assert(host.length == 0 || isValidHostName(host));
107 	}
108 	do {
109 		InetPath ip;
110 		try ip = cast(InetPath)path;
111 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
112 		this(schema, host, port, ip);
113 	}
114 	/// ditto
115 	this(string schema, PosixPath path) pure nothrow
116 	in { assert(isValidSchema(schema)); }
117 	do {
118 		this(schema, null, 0, path);
119 	}
120 	/// ditto
121 	this(string schema, string host, ushort port, WindowsPath path) pure nothrow
122 	in {
123 		assert(isValidSchema(schema));
124 		assert(host.length == 0 || isValidHostName(host));
125 	}
126 	do {
127 		InetPath ip;
128 		try ip = cast(InetPath)path;
129 		catch (Exception e) assert(false, e.msg); // InetPath should be able to capture all paths
130 		this(schema, host, port, ip);
131 	}
132 	/// ditto
133 	this(string schema, WindowsPath path) pure nothrow
134 	in { assert(isValidSchema(schema)); }
135 	do {
136 		this(schema, null, 0, path);
137 	}
138 
139 	/** Constructs a "file:" URL from a native file system path.
140 
141 		Note that the path must be absolute. On Windows, both, paths starting
142 		with a drive letter and UNC paths are supported.
143 	*/
144 	this(WindowsPath path) pure
145 	{
146 		import std.algorithm.iteration : map;
147 		import std.range : chain, only, repeat;
148 
149 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
150 
151 		// treat UNC paths properly
152 		if (path.startsWith(WindowsPath(`\\`))) {
153 			static if (is(InetPath.Segment2)) {
154 				auto segs = path.bySegment2;
155 			} else {
156 				auto segs = path.bySegment;
157 			}
158 			segs.popFront();
159 			segs.popFront();
160 			auto host = segs.front.name;
161 			segs.popFront();
162 
163 			InetPath ip;
164 			static if (is(InetPath.Segment2)) {
165 				ip = InetPath(only(InetPath.Segment2.fromTrustedString("", '/'))
166 					.chain(segs.map!(s => cast(InetPath.Segment2)s)));
167 			} else {
168 				ip = InetPath(only(InetPath.Segment("", '/'))
169 					.chain(segs.map!(s => cast(InetPath.Segment)s)));
170 			}
171 
172 			this("file", host, 0, ip);
173 		} else this("file", host, 0, cast(InetPath)path);
174 	}
175 	/// ditto
176 	this(PosixPath path) pure
177 	{
178 		enforce(path.absolute, "Only absolute paths can be converted to a URL.");
179 
180 		this("file", null, 0, cast(InetPath)path);
181 	}
182 
183 	/** Constructs a URL from its string representation.
184 
185 		TODO: additional validation required (e.g. valid host and user names and port)
186 	*/
187 	this(string url_string)
188 	{
189 		this(url_string, true);
190 	}
191 
192 	private this(string url_string, bool encoded)
193 	{
194 		auto str = url_string;
195 		enforce(str.length > 0, "Empty URL.");
196 		if( str[0] != '/' ){
197 			auto idx = str.indexOf(':');
198 			enforce(idx > 0, "No schema in URL:"~str);
199 			m_schema = str[0 .. idx];
200 			enforce(m_schema[0].isAlpha,
201 					"Schema must start with an alphabetical char, found: " ~
202 					m_schema[0]);
203 			str = str[idx+1 .. $];
204 			bool requires_host = false;
205 
206 			if (str.startsWith("//")) {
207 				// proto://server/path style
208 				requires_host = true;
209 				str = str[2 .. $];
210 			}
211 
212 			auto si = str.indexOf('/');
213 			if( si < 0 ) si = str.length;
214 			auto ai = str[0 .. si].indexOf('@');
215 			sizediff_t hs = 0;
216 			if( ai >= 0 ){
217 				hs = ai+1;
218 				auto ci = str[0 .. ai].indexOf(':');
219 				if( ci >= 0 ){
220 					m_username = str[0 .. ci];
221 					m_password = str[ci+1 .. ai];
222 				} else m_username = str[0 .. ai];
223 				enforce(m_username.length > 0, "Empty user name in URL.");
224 			}
225 
226 			m_host = str[hs .. si];
227 
228 			auto findPort ( string src )
229 			{
230 				auto pi = src.indexOf(':');
231 				if(pi > 0) {
232 					enforce(pi < src.length-1, "Empty port in URL.");
233 					m_port = to!ushort(src[pi+1..$]);
234 				}
235 				return pi;
236 			}
237 
238 
239 			auto ip6 = m_host.indexOf('[');
240 			if (ip6 == 0) { // [ must be first char
241 				auto pe = m_host.indexOf(']');
242 				if (pe > 0) {
243 					findPort(m_host[pe..$]);
244 					m_host = m_host[1 .. pe];
245 				}
246 			}
247 			else {
248 				auto pi = findPort(m_host);
249 				if(pi > 0) {
250 					m_host = m_host[0 .. pi];
251 				}
252 				if (!encoded)
253 					m_host = m_host.splitter('.').map!(punyEncode).join('.');
254 			}
255 
256 			enforce(!requires_host || m_schema == "file" || m_host.length > 0,
257 					"Empty server name in URL.");
258 			str = str[si .. $];
259 		}
260 
261 		this.localURI = (encoded) ? str : str.encode;
262 	}
263 	/// ditto
264 	static URL parse(string url_string)
265 	{
266 		return URL(url_string);
267 	}
268 	/// ditto
269 	static URL fromString(string url_string)
270 	{
271 		return URL(url_string);
272 	}
273 
274 	/// The schema/protocol part of the URL
275 	@property string schema() const nothrow { return m_schema; }
276 	/// ditto
277 	@property void schema(string v) { m_schema = v; }
278 
279 	/// The url encoded path part of the URL
280 	@property string pathString() const nothrow { return m_path.toString; }
281 
282 	/// Set the path part of the URL. It should be properly encoded.
283 	@property void pathString(string s)
284 	{
285 		enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'");
286 		m_path = InetPath(s);
287 	}
288 
289 	/// The path part of the URL
290 	@property InetPath path() const nothrow { return m_path; }
291 	/// ditto
292 	@property void path(InetPath p)
293 	nothrow {
294 		m_path = p;
295 	}
296 	/// ditto
297 	@property void path(Path)(Path p)
298 		if (isInstanceOf!(GenericPath, Path) && !is(Path == InetPath))
299 	{
300 		m_path = cast(InetPath)p;
301 	}
302 
303 	/// The host part of the URL (depends on the schema)
304 	@property string host() const pure nothrow { return m_host; }
305 	/// ditto
306 	@property void host(string v) { m_host = v; }
307 
308 	/// The port part of the URL (optional)
309 	@property ushort port() const nothrow { return m_port ? m_port : defaultPort(m_schema); }
310 	/// ditto
311 	@property port(ushort v) nothrow { m_port = v; }
312 
313 	/// Get the default port for the given schema or 0
314 	static ushort defaultPort(string schema)
315 	nothrow {
316 		import core.atomic : atomicLoad;
317 		import std.uni : toLower;
318 
319 		string lowerschema;
320 
321 		try
322 			lowerschema = schema.toLower();
323 		catch (Exception e)
324 			assert(false, e.msg);
325 		
326 		if (auto set = atomicLoad(map_commonInternetSchemas))
327 			if (set.contains(lowerschema))
328 				return set.get(lowerschema);
329 
330 		return 0;
331 	}
332 	/// ditto
333 	ushort defaultPort()
334 	const nothrow {
335 		return defaultPort(m_schema);
336 	}
337 
338 	/// The user name part of the URL (optional)
339 	@property string username() const nothrow { return m_username; }
340 	/// ditto
341 	@property void username(string v) { m_username = v; }
342 
343 	/// The password part of the URL (optional)
344 	@property string password() const nothrow { return m_password; }
345 	/// ditto
346 	@property void password(string v) { m_password = v; }
347 
348 	/// The query string part of the URL (optional)
349 	@property string queryString() const nothrow { return m_queryString; }
350 	/// ditto
351 	@property void queryString(string v) { m_queryString = v; }
352 
353 	/// The anchor part of the URL (optional)
354 	@property string anchor() const nothrow { return m_anchor; }
355 
356 	/// The path part plus query string and anchor
357 	@property string localURI()
358 	const nothrow {
359 		auto str = appender!string();
360 		str.put(m_path.toString);
361 		if( queryString.length ) {
362 			str.put("?");
363 			str.put(queryString);
364 		}
365 		if( anchor.length ) {
366 			str.put("#");
367 			str.put(anchor);
368 		}
369 		return str.data;
370 	}
371 	/// ditto
372 	@property void localURI(string str)
373 	{
374 		auto ai = str.indexOf('#');
375 		if( ai >= 0 ){
376 			m_anchor = str[ai+1 .. $];
377 			str = str[0 .. ai];
378 		} else m_anchor = null;
379 
380 		auto qi = str.indexOf('?');
381 		if( qi >= 0 ){
382 			m_queryString = str[qi+1 .. $];
383 			str = str[0 .. qi];
384 		} else m_queryString = null;
385 
386 		this.pathString = str;
387 	}
388 
389 	/// The URL to the parent path with query string and anchor stripped.
390 	@property URL parentURL()
391 	const {
392 		URL ret;
393 		ret.schema = schema;
394 		ret.host = host;
395 		ret.port = port;
396 		ret.username = username;
397 		ret.password = password;
398 		ret.path = path.parentPath;
399 		return ret;
400 	}
401 
402 	/// Converts this URL object to its string representation.
403 	string toString()
404 	const nothrow {
405 		auto dst = appender!string();
406 		try this.toString(dst);
407 		catch (Exception e) assert(false, e.msg);
408 		return dst.data;
409 	}
410 
411 	/// Ditto
412 	void toString(OutputRange) (ref OutputRange dst) const {
413 		import std.format;
414 		dst.put(schema);
415 		dst.put(":");
416 		if (isCommonInternetSchema(schema))
417 			dst.put("//");
418 		if (m_username.length || m_password.length) {
419 			dst.put(username);
420 			if (m_password.length)
421 			{
422 				dst.put(':');
423 				dst.put(password);
424 			}
425 			dst.put('@');
426 		}
427 
428 		import std.algorithm : canFind;
429 		auto ipv6 = host.canFind(":");
430 
431 		if ( ipv6 ) dst.put('[');
432 		dst.put(host);
433 		if ( ipv6 ) dst.put(']');
434 
435 		if (m_port > 0)
436 			formattedWrite(dst, ":%d", m_port);
437 
438 		dst.put(localURI);
439 	}
440 
441 	/** Converts a "file" URL back to a native file system path.
442 	*/
443 	NativePath toNativePath()
444 	const {
445 		import std.algorithm.iteration : map;
446 		import std.range : dropOne;
447 
448 		enforce(this.schema == "file", "Only file:// URLs can be converted to a native path.");
449 
450 		version (Windows) {
451 			if (this.host.length) {
452 				static if (is(NativePath.Segment2)) {
453 					auto p = NativePath(this.path
454 							.bySegment2
455 							.dropOne
456 							.map!(s => cast(WindowsPath.Segment2)s)
457 						);
458 				} else {
459 					auto p = NativePath(this.path
460 							.bySegment
461 							.dropOne
462 							.map!(s => cast(WindowsPath.Segment)s)
463 						);
464 				}
465 				return NativePath.fromTrustedString(`\\`~this.host) ~ p;
466 			}
467 		}
468 
469 		return cast(NativePath)this.path;
470 	}
471 
472 	/// Decode percent encoded triplets for unreserved or convert to uppercase
473 	private string normalize_percent_encoding(scope const(char)[] input)
474 	{
475 		auto normalized = appender!string;
476 		normalized.reserve(input.length);
477 
478 		for (size_t i = 0; i < input.length; i++)
479 		{
480 			const char c = input[i];
481 			if (c == '%')
482 			{
483 				if (input.length < i + 3)
484 					assert(false, "Invalid percent encoding");
485 				
486 				char conv = cast(char) input[i + 1 .. i + 3].to!ubyte(16);
487 				switch (conv)
488 				{
489 					case 'A': .. case 'Z':
490 					case 'a': .. case 'z':
491 					case '0': .. case '9':
492 					case '-': case '.': case '_': case '~':
493 						normalized ~= conv; // Decode unreserved
494 						break;
495 					default:
496 						normalized ~= input[i .. i + 3].toUpper(); // Uppercase HEX
497 						break;
498 				}
499 
500 				i += 2;
501 			}
502 			else
503 				normalized ~= c;
504 		}
505 
506 		return normalized.data;
507 	}
508 
509 	/**
510 	  * Normalize the content of this `URL` in place
511 	  *
512 	  * Normalization can be used to create a more consistent and human-friendly
513 	  * string representation of the `URL`.
514 	  * The list of transformations applied in the process of normalization is as follows:
515 			- Converting schema and host to lowercase
516 			- Removing port if it is the default port for schema
517 			- Removing dot segments in path
518 			- Converting percent-encoded triplets to uppercase
519 			- Adding slash when path is empty
520 			- Adding slash to path when path represents a directory
521 			- Decoding percent encoded triplets for unreserved characters
522 				A-Z a-z 0-9 - . _ ~ 
523 
524 		Params:
525 			isDirectory = Path of the URL represents a directory, if one is 
526 			not already present, a trailing slash will be appended when `true`
527 	*/
528 	void normalize(bool isDirectory = false)
529 	{
530 		import std.uni : toLower;
531 		
532 		// Lowercase host and schema
533 		this.m_schema = this.m_schema.toLower();
534 		this.m_host = this.m_host.toLower();
535 
536 		// Remove default port
537 		if (this.m_port == URL.defaultPort(this.m_schema))
538 			this.m_port = 0;
539 
540 		// Normalize percent encoding, decode unreserved or uppercase hex
541 		this.m_queryString = normalize_percent_encoding(this.m_queryString);
542 		this.m_anchor = normalize_percent_encoding(this.m_anchor);
543 
544 		// Normalize path (first remove dot segments then normalize path segments)
545 		this.m_path = InetPath(this.m_path.normalized.bySegment2.map!(
546 				n => InetPath.Segment2.fromTrustedEncodedString(normalize_percent_encoding(n.encodedName))
547 			).array);
548 
549 		// Add trailing slash to empty path
550 		if (this.m_path.empty || isDirectory)
551 			this.m_path.endsWithSlash = true;		
552 	}
553 
554 	/** Returns the normalized form of the URL.
555 
556 		See `normalize` for a full description.
557 	*/
558 	URL normalized()
559 	const {
560 		URL ret = this;
561 		ret.normalize();
562 		return ret;
563 	}
564 
565 	bool startsWith(const URL rhs)
566 	const nothrow {
567 		if( m_schema != rhs.m_schema ) return false;
568 		if( m_host != rhs.m_host ) return false;
569 		// FIXME: also consider user, port, querystring, anchor etc
570 		static if (is(InetPath.Segment2))
571 			return this.path.bySegment2.startsWith(rhs.path.bySegment2);
572 		else return this.path.bySegment.startsWith(rhs.path.bySegment);
573 	}
574 
575 	URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
576 	URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
577 	void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
578 	void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
579 	static if (is(InetPath.Segment2) && !is(InetPath.Segment2 == InetPath.Segment)) {
580 		URL opBinary(string OP, Path)(Path.Segment2 rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
581 		void opOpAssign(string OP, Path)(Path.Segment2 rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
582 	}
583 
584 	/// Tests two URLs for equality using '=='.
585 	bool opEquals(ref const URL rhs)
586 	const nothrow {
587 		if (m_schema != rhs.m_schema) return false;
588 		if (m_host != rhs.m_host) return false;
589 		if (m_path != rhs.m_path) return false;
590 		if (m_port != rhs.m_port) return false;
591 		return true;
592 	}
593 	/// ditto
594 	bool opEquals(const URL other) const nothrow { return opEquals(other); }
595 
596 	int opCmp(ref const URL rhs) const nothrow {
597 		if (m_schema != rhs.m_schema) return m_schema.cmp(rhs.m_schema);
598 		if (m_host != rhs.m_host) return m_host.cmp(rhs.m_host);
599 		if (m_path != rhs.m_path) return cmp(m_path.toString, rhs.m_path.toString);
600 		return true;
601 	}
602 }
603 
604 bool isValidSchema(string schema)
605 @safe pure nothrow {
606 	if (schema.length < 1) return false;
607 
608 	foreach (char ch; schema) {
609 		switch (ch) {
610 			default: return false;
611 			case 'a': .. case 'z': break;
612 			case 'A': .. case 'Z': break;
613 			case '0': .. case '9': break;
614 			case '+', '.', '-': break;
615 		}
616 	}
617 
618 	return true;
619 }
620 
621 unittest {
622 	assert(isValidSchema("http+ssh"));
623 	assert(isValidSchema("http"));
624 	assert(!isValidSchema("http/ssh"));
625 	assert(isValidSchema("HTtp"));
626 }
627 
628 
629 bool isValidHostName(string name)
630 @safe pure nothrow {
631 	import std.algorithm.iteration : splitter;
632 	import std.string : representation;
633 
634 	// According to RFC 1034
635 	if (name.length < 1) return false;
636 	if (name.length > 255) return false;
637 	foreach (seg; name.representation.splitter('.')) {
638 		if (seg.length < 1) return false;
639 		if (seg.length > 63) return false;
640 		if (seg[0] == '-') return false;
641 
642 		foreach (char ch; seg) {
643 			switch (ch) {
644 				default: return false;
645 				case 'a': .. case 'z': break;
646 				case 'A': .. case 'Z': break;
647 				case '0': .. case '9': break;
648 				case '-': break;
649 			}
650 		}
651 	}
652 	return true;
653 }
654 
655 unittest {
656 	assert(isValidHostName("foo"));
657 	assert(isValidHostName("foo-"));
658 	assert(isValidHostName("foo.bar"));
659 	assert(isValidHostName("foo.bar-baz"));
660 	assert(isValidHostName("foo1"));
661 	assert(!isValidHostName("-foo"));
662 }
663 
664 
665 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath);
666 
667 private shared immutable(SchemaDefaultPortMap)* map_commonInternetSchemas;
668 
669 shared static this() {
670 	auto initial_schemas = new SchemaDefaultPortMap;
671 	initial_schemas.add("file", 0);
672 	initial_schemas.add("tcp", 0);
673 	initial_schemas.add("ftp", 21);
674 	initial_schemas.add("sftp", 22);
675 	initial_schemas.add("http", 80);
676 	initial_schemas.add("https", 443);
677 	initial_schemas.add("http+unix", 80);
678 	initial_schemas.add("https+unix", 443);
679 	initial_schemas.add("spdy", 443);
680 	initial_schemas.add("ws", 80);
681 	initial_schemas.add("wss", 443);
682 	initial_schemas.add("redis", 6379);
683 	initial_schemas.add("rtsp", 554);
684 	initial_schemas.add("rtsps", 322);
685 
686 	map_commonInternetSchemas = cast(immutable)initial_schemas;
687 }
688 
689 deprecated("Use the overload that accepts a `ushort port` as second argument")
690 void registerCommonInternetSchema(string schema)
691 {
692     registerCommonInternetSchema(schema, 0);
693 }
694 
695 /** Adds the name of a schema to be treated as double-slash style.
696 
697 	Params:
698 		schema = Name of the schema
699 		port = Default port for the schema
700 
701 	See_also: `isCommonInternetSchema`, RFC 1738 Section 3.1
702 */
703 void registerCommonInternetSchema(string schema, ushort port)
704 @trusted nothrow {
705 	import core.atomic : atomicLoad, cas;
706 	import std.uni : toLower;
707 
708 	string lowerschema;
709 	try {
710 		lowerschema = schema.toLower();
711 	} catch (Exception e) {
712 		assert(false, e.msg);
713 	}
714 
715 	assert(lowerschema.length < 128, "Only schemas with less than 128 characters are supported");
716 
717 	while (true) {
718 		auto olds = atomicLoad(map_commonInternetSchemas);
719 		auto news = olds ? olds.dup : new SchemaDefaultPortMap;
720 		news.add(lowerschema, port);
721 		static if (__VERSION__ < 2094) {
722 			// work around bogus shared violation error on earlier versions of Druntime
723 			if (cas(cast(shared(SchemaDefaultPortMap*)*)&map_commonInternetSchemas, cast(shared(SchemaDefaultPortMap)*)olds, cast(shared(SchemaDefaultPortMap)*)news))
724 				break;
725 		} else {
726 			if (cas(&map_commonInternetSchemas, olds, cast(immutable)news))
727 				break;
728 		}
729 	}
730 }
731 
732 
733 /** Determines whether an URL schema is double-slash based.
734 
735 	Double slash based schemas are of the form `schema://[host]/<path>`
736 	and are parsed differently compared to generic schemas, which are simply
737 	parsed as `schema:<path>`.
738 
739 	Built-in recognized double-slash schemas: ftp, http, https,
740 	http+unix, https+unix, spdy, sftp, ws, wss, file, redis, tcp,
741 	rtsp, rtsps
742 
743 	See_also: `registerCommonInternetSchema`, RFC 1738 Section 3.1
744 */
745 bool isCommonInternetSchema(string schema)
746 @safe nothrow @nogc {
747 	import core.atomic : atomicLoad;
748 	char[128] buffer;
749 
750 	if (schema.length >= 128) return false;
751 
752 	foreach (ix, char c; schema)
753 	{
754 		if (!isASCII(c)) return false;
755 		buffer[ix] = toLower(c);
756 	}
757 
758 	scope lowerschema = buffer[0 .. schema.length];
759 
760 	return () @trusted {
761 		auto set = atomicLoad(map_commonInternetSchemas);
762 		return set ? set.contains(cast(string) lowerschema) : false;
763 	} ();
764 }
765 
766 unittest {
767 	assert(isCommonInternetSchema("http"));
768 	assert(isCommonInternetSchema("HTtP"));
769 	assert(URL.defaultPort("http") == 80);
770 	assert(!isCommonInternetSchema("foobar"));
771 	registerCommonInternetSchema("fooBar", 2522);
772 	assert(isCommonInternetSchema("foobar"));
773 	assert(isCommonInternetSchema("fOObAR"));
774 	assert(URL.defaultPort("foobar") == 2522);
775 	assert(URL.defaultPort("fOObar") == 2522);
776 
777 	assert(URL.defaultPort("unregistered") == 0);
778 }
779 
780 
781 private struct SchemaDefaultPortMap {
782 	ushort[string] m_data;
783 
784 	void add(string str, ushort port) @safe nothrow { m_data[str] = port; }
785 	bool contains(string str) const @safe nothrow @nogc { return !!(str in m_data); }
786 	ushort get(string str) const @safe nothrow { return m_data[str]; }
787 	SchemaDefaultPortMap* dup() const @safe nothrow {
788 		auto ret = new SchemaDefaultPortMap;
789 		foreach (s; m_data.byKeyValue) ret.add(s.key, s.value);
790 		return ret;
791 	}
792 }
793 
794 // Puny encoding
795 private {
796 	/** Bootstring parameters for Punycode
797 		These parameters are designed for Unicode
798 
799 		See also: RFC 3492 Section 5
800 	*/
801 	enum uint base = 36;
802 	enum uint tmin = 1;
803 	enum uint tmax = 26;
804 	enum uint skew = 38;
805 	enum uint damp = 700;
806 	enum uint initial_bias = 72;
807 	enum uint initial_n = 128;
808 
809 	/*	Bias adaptation
810 
811 		See also: RFC 3492 Section 6.1
812 	*/
813 	uint punyAdapt (uint pdelta, int numpoints, bool firsttime)
814 	@safe @nogc nothrow pure {
815 		uint delta = firsttime ? pdelta / damp : pdelta / 2;
816 		delta += delta / numpoints;
817 		uint k = 0;
818 
819 		while (delta > ((base - tmin) * tmax) / 2)
820 		{
821 			delta /= (base - tmin);
822 			k += base;
823 		}
824 
825 		return k + (((base - tmin + 1) * delta) / (delta + skew));
826 	}
827 
828 	/*	Converts puny digit-codes to code point
829 
830 		See also: RFC 3492 Section 5
831 	*/
832 	dchar punyDigitToCP (uint digit)
833 	@safe @nogc nothrow pure {
834 		return cast(dchar) (digit + 22 + 75 * (digit < 26));
835 	}
836 
837 	/*	Encodes `input` with puny encoding
838 		
839 		If input is all characters below `initial_n`
840 		input is returned as is.
841 
842 		See also: RFC 3492 Section 6.3
843 	*/
844 	string punyEncode (in string input)
845 	@safe {
846 		uint n = initial_n;
847 		uint delta = 0;
848 		uint bias = initial_bias;
849 		uint h;
850 		uint b;
851 		dchar m = dchar.max; // minchar
852 		bool delta_overflow;
853 		
854 		uint input_len = 0;
855 		auto output = appender!string();
856 		
857 		output.put("xn--");
858 
859 		foreach (dchar cp; input)
860 		{
861 			if (cp <= initial_n)
862 			{
863 				output.put(cast(char) cp);
864 				h += 1;
865 			}
866 			// Count length of input as code points, `input.length` counts bytes
867 			input_len += 1;
868 		}
869 
870 		b = h;
871 		if (b == input_len)
872 			return input; // No need to puny encode
873 
874 		if (b > 0)
875 			output.put('-');
876 
877 		while (h < input_len)
878 		{
879 			m = dchar.max;
880 			foreach (dchar cp; input)
881 			{
882 				if (n <= cp && cp < m)
883 					m = cp;
884 			}
885 
886 			assert(m != dchar.max, "Punyencoding failed, cannot find code point");
887 
888 			delta = addu(delta, ((m - n) * (h + 1)), delta_overflow);
889 			assert(!delta_overflow, "Punyencoding failed, delta overflow");
890 
891 			n = m;
892 
893 			foreach (dchar cp; input)
894 			{
895 				if (cp < n)
896 					delta += 1;
897 
898 				if (cp == n)
899 				{
900 					uint q = delta;
901 					uint k = base;
902 
903 					while (true)
904 					{
905 						uint t;
906 						if (k <= bias /* + tmin */)
907 							t = tmin;
908 						else if (k >=  bias + tmax)
909 							t = tmax;
910 						else
911 							t = k - bias;
912 
913 						if (q < t) break;
914 
915 						output.put(punyDigitToCP(t + ((q - t) % (base - t))));
916 						q = (q - t) / (base - t);
917 						k += base;
918 					}
919 					output.put(punyDigitToCP(q));
920 					bias = punyAdapt(delta, h + 1, h == b);
921 					delta = 0;
922 					h += 1;
923 				}
924 			}
925 			delta += 1;
926 			n += 1;
927 		}
928 
929 		return output.data;
930 	}
931 }
932 
933 unittest { // IPv6
934 	auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc";
935 	auto url = URL.parse(urlstr);
936 	assert(url.schema == "http", url.schema);
937 	assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host);
938 	assert(url.port == 8091);
939 	assert(url.path == InetPath("/abc"), url.path.toString());
940 	assert(url.toString == urlstr);
941 
942 	url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc";
943 	urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc";
944 	assert(url.toString == urlstr);
945 }
946 
947 
948 unittest {
949 	auto urlstr = "https://www.example.net/index.html";
950 	auto url = URL.parse(urlstr);
951 	assert(url.schema == "https", url.schema);
952 	assert(url.host == "www.example.net", url.host);
953 	assert(url.path == InetPath("/index.html"), url.path.toString());
954 	assert(url.port == 443);
955 	assert(url.toString == urlstr);
956 
957 	urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor";
958 	url = URL.parse(urlstr);
959 	assert(url.schema == "http", url.schema);
960 	assert(url.username == "jo.doe", url.username);
961 	assert(url.password == "password", url.password);
962 	assert(url.port == 4711, to!string(url.port));
963 	assert(url.host == "sub.www.example.net", url.host);
964 	assert(url.path.toString() == "/sub2/index.html", url.path.toString());
965 	assert(url.queryString == "query", url.queryString);
966 	assert(url.anchor == "anchor", url.anchor);
967 	assert(url.toString == urlstr);
968 }
969 
970 unittest { // issue #1044
971 	URL url = URL.parse("http://example.com/p?query#anchor");
972 	assert(url.schema == "http");
973 	assert(url.host == "example.com");
974 	assert(url.port == 80);
975 	assert(url.queryString == "query");
976 	assert(url.anchor == "anchor");
977 	assert(url.pathString == "/p");
978 	url.localURI = "/q";
979 	assert(url.schema == "http");
980 	assert(url.host == "example.com");
981 	assert(url.queryString == "");
982 	assert(url.anchor == "");
983 	assert(url.pathString == "/q");
984 	url.localURI = "/q?query";
985 	assert(url.schema == "http");
986 	assert(url.host == "example.com");
987 	assert(url.queryString == "query");
988 	assert(url.anchor == "");
989 	assert(url.pathString == "/q");
990 	url.localURI = "/q#anchor";
991 	assert(url.schema == "http");
992 	assert(url.host == "example.com");
993 	assert(url.queryString == "");
994 	assert(url.anchor == "anchor");
995 	assert(url.pathString == "/q");
996 }
997 
998 //websocket unittest
999 unittest {
1000 	URL url = URL("ws://127.0.0.1:8080/echo");
1001 	assert(url.host == "127.0.0.1");
1002 	assert(url.port == 8080);
1003 	assert(url.localURI == "/echo");
1004 }
1005 
1006 //rtsp unittest
1007 unittest {
1008 	URL url = URL("rtsp://127.0.0.1:554/echo");
1009 	assert(url.host == "127.0.0.1");
1010 	assert(url.port == 554);
1011 	assert(url.localURI == "/echo");
1012 }
1013 
1014 unittest {
1015 	auto p = PosixPath("/foo bar/boo oom/");
1016 	URL url = URL("http", "example.com", 0, p); // constructor test
1017 	assert(url.path == cast(InetPath)p);
1018 	url.path = p;
1019 	assert(url.path == cast(InetPath)p);					   // path assignement test
1020 	assert(url.pathString == "/foo%20bar/boo%20oom/");
1021 	assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/");
1022 	url.pathString = "/foo%20bar/boo%2foom/";
1023 	assert(url.pathString == "/foo%20bar/boo%2foom/");
1024 	assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/");
1025 }
1026 
1027 unittest {
1028 	URL url = URL("http://user:password@example.com");
1029 	assert(url.toString() == "http://user:password@example.com");
1030 
1031 	url = URL("http://user@example.com");
1032 	assert(url.toString() == "http://user@example.com");
1033 }
1034 
1035 unittest {
1036 	auto url = URL("http://example.com/some%2bpath");
1037 	assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString());
1038 }
1039 
1040 unittest {
1041 	assert(URL("file:///test").pathString == "/test");
1042 	assert(URL("file:///test").port == 0);
1043 	assert(URL("file:///test").path.toString() == "/test");
1044 	assert(URL("file://test").host == "test");
1045 	assert(URL("file://test").pathString() == "");
1046 	assert(URL("file://./test").host == ".");
1047 	assert(URL("file://./test").pathString == "/test");
1048 	assert(URL("file://./test").path.toString() == "/test");
1049 }
1050 
1051 unittest { // issue #1318
1052 	try {
1053 		URL("http://something/inval%id");
1054 		assert(false, "Expected to throw an exception.");
1055 	} catch (Exception e) {}
1056 }
1057 
1058 unittest {
1059 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix");
1060 	assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix");
1061 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock");
1062 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == "");
1063 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json");
1064 	auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json");
1065 	assert(URL(url.toString()) == url);
1066 }
1067 
1068 unittest {
1069 	import vibe.data.serialization;
1070 	static assert(isStringSerializable!URL);
1071 }
1072 
1073 unittest { // issue #1732
1074 	auto url = URL("tcp://0.0.0.0:1234");
1075 	url.port = 4321;
1076 	assert(url.toString == "tcp://0.0.0.0:4321", url.toString);
1077 }
1078 
1079 unittest { // host name role in file:// URLs
1080 	auto url = URL.parse("file:///foo/bar");
1081 	assert(url.host == "");
1082 	assert(url.path == InetPath("/foo/bar"));
1083 	assert(url.toString() == "file:///foo/bar");
1084 
1085 	url = URL.parse("file://foo/bar/baz");
1086 	assert(url.host == "foo");
1087 	assert(url.path == InetPath("/bar/baz"));
1088 	assert(url.toString() == "file://foo/bar/baz");
1089 }
1090 
1091 unittest { // native path <-> URL conversion
1092 	import std.exception : assertThrown;
1093 
1094 	auto url = URL(NativePath("/foo/bar"));
1095 	assert(url.schema == "file");
1096 	assert(url.host == "");
1097 	assert(url.path == InetPath("/foo/bar"));
1098 	assert(url.toNativePath == NativePath("/foo/bar"));
1099 
1100 	assertThrown(URL("http://example.org/").toNativePath);
1101 	assertThrown(URL(NativePath("foo/bar")));
1102 }
1103 
1104 unittest { // URL Normalization
1105 	auto url = URL.parse("http://example.com/foo%2a");
1106 	assert(url.normalized.toString() == "http://example.com/foo%2A");
1107 
1108 	url = URL.parse("HTTP://User@Example.COM/Foo");
1109 	assert(url.normalized.toString() == "http://User@example.com/Foo");
1110 	
1111 	url = URL.parse("http://example.com/%7Efoo");
1112 	assert(url.normalized.toString() == "http://example.com/~foo");
1113 	
1114 	url = URL.parse("http://example.com/foo/./bar/baz/../qux");
1115 	assert(url.normalized.toString() == "http://example.com/foo/bar/qux");
1116 	
1117 	url = URL.parse("http://example.com");
1118 	assert(url.normalized.toString() == "http://example.com/");
1119 	
1120 	url = URL.parse("http://example.com:80/");
1121 	assert(url.normalized.toString() == "http://example.com/");
1122 
1123 	url = URL.parse("hTTPs://examPLe.COM:443/my/path");
1124 	assert(url.normalized.toString() == "https://example.com/my/path");
1125 
1126 	url = URL.parse("http://example.com/foo");
1127 	url.normalize(true);
1128 	assert(url.toString() == "http://example.com/foo/");
1129 }
1130 
1131 version (Windows) unittest { // Windows drive letter paths
1132 	auto url = URL(WindowsPath(`C:\foo`));
1133 	assert(url.schema == "file");
1134 	assert(url.host == "");
1135 	assert(url.path == InetPath("/C:/foo"));
1136 	auto p = url.toNativePath;
1137 	p.normalize();
1138 	assert(p == WindowsPath(`C:\foo`));
1139 }
1140 
1141 version (Windows) unittest { // UNC paths
1142 	auto url = URL(WindowsPath(`\\server\share\path`));
1143 	assert(url.schema == "file");
1144 	assert(url.host == "server");
1145 	assert(url.path == InetPath("/share/path"));
1146 
1147 	auto p = url.toNativePath;
1148 	p.normalize(); // convert slash to backslash if necessary
1149 	assert(p == WindowsPath(`\\server\share\path`));
1150 }
1151 
1152 unittest {
1153 	assert((URL.parse("http://example.com/foo") ~ InetPath("bar")).toString()
1154 		== "http://example.com/foo/bar");
1155 	assert((URL.parse("http://example.com/foo") ~ InetPath.Segment("bar")).toString()
1156 		== "http://example.com/foo/bar");
1157 
1158 	URL url = URL.parse("http://example.com/");
1159 	url ~= InetPath("foo");
1160 	url ~= InetPath.Segment("bar");
1161 	assert(url.toString() == "http://example.com/foo/bar");
1162 }
1163 
1164 unittest {
1165 	assert(URL.parse("foo:/foo/bar").toString() == "foo:/foo/bar");
1166 	assert(URL.parse("foo:/foo/bar").path.toString() == "/foo/bar");
1167 	assert(URL.parse("foo:foo/bar").toString() == "foo:foo/bar");
1168 }