1 /**
2 	URL parsing routines.
3 
4 	Copyright: © 2012-2017 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.inet.url;
9 
10 public import vibe.core.path;
11 
12 import vibe.textfilter.urlencode;
13 import vibe.utils.string;
14 
15 import std.array;
16 import std.conv;
17 import std.exception;
18 import std.string;
19 import std.traits : isInstanceOf;
20 
21 
22 /**
23 	Represents a URL decomposed into its components.
24 */
25 struct URL {
26 @safe:
27 	private {
28 		string m_schema;
29 		string m_pathString;
30 		string m_host;
31 		ushort m_port;
32 		string m_username;
33 		string m_password;
34 		string m_queryString;
35 		string m_anchor;
36 	}
37 
38 	/// Constructs a new URL object from its components.
39 	this(string schema, string host, ushort port, InetPath path)
40 	{
41 		m_schema = schema;
42 		m_host = host;
43 		m_port = port;
44 		version (Have_vibe_core) m_pathString = path.toString();
45 		else m_pathString = urlEncode(path.toString(), "/");
46 	}
47 	/// ditto
48 	this(string schema, InetPath path)
49 	{
50 		this(schema, null, 0, path);
51 	}
52 
53 	version (Have_vibe_core) {
54 		/// ditto
55 		this(string schema, string host, ushort port, PosixPath path)
56 		{
57 			this(schema, host, port, cast(InetPath)path);
58 		}
59 		/// ditto
60 		this(string schema, PosixPath path)
61 		{
62 			this(schema, null, 0, cast(InetPath)path);
63 		}
64 		/// ditto
65 		this(string schema, string host, ushort port, WindowsPath path)
66 		{
67 			this(schema, host, port, cast(InetPath)path);
68 		}
69 		/// ditto
70 		this(string schema, WindowsPath path)
71 		{
72 			this(schema, null, 0, cast(InetPath)path);
73 		}
74 	}
75 
76 	/** Constructs a URL from its string representation.
77 
78 		TODO: additional validation required (e.g. valid host and user names and port)
79 	*/
80 	this(string url_string)
81 	{
82 		auto str = url_string;
83 		enforce(str.length > 0, "Empty URL.");
84 		if( str[0] != '/' ){
85 			auto idx = str.indexOfCT(':');
86 			enforce(idx > 0, "No schema in URL:"~str);
87 			m_schema = str[0 .. idx];
88 			str = str[idx+1 .. $];
89 			bool requires_host = false;
90 
91 			if (isDoubleSlashSchema(m_schema)) {
92 				// proto://server/path style
93 				enforce(str.startsWith("//"), "URL must start with proto://...");
94 				requires_host = true;
95 				str = str[2 .. $];
96 			}
97 
98 			if (schema != "file") {
99 				auto si = str.indexOfCT('/');
100 				if( si < 0 ) si = str.length;
101 				auto ai = str[0 .. si].indexOfCT('@');
102 				sizediff_t hs = 0;
103 				if( ai >= 0 ){
104 					hs = ai+1;
105 					auto ci = str[0 .. ai].indexOfCT(':');
106 					if( ci >= 0 ){
107 						m_username = str[0 .. ci];
108 						m_password = str[ci+1 .. ai];
109 					} else m_username = str[0 .. ai];
110 					enforce(m_username.length > 0, "Empty user name in URL.");
111 				}
112 
113 				m_host = str[hs .. si];
114 
115 				auto findPort ( string src )
116 				{
117 					auto pi = src.indexOfCT(':');
118 					if(pi > 0) {
119 						enforce(pi < src.length-1, "Empty port in URL.");
120 						m_port = to!ushort(src[pi+1..$]);
121 					}
122 					return pi;
123 				}
124 
125 
126 				auto ip6 = m_host.indexOfCT('[');
127 				if (ip6 == 0) { // [ must be first char
128 					auto pe = m_host.indexOfCT(']');
129 					if (pe > 0) {
130 						findPort(m_host[pe..$]);
131 						m_host = m_host[1 .. pe];
132 					}
133 				}
134 				else {
135 					auto pi = findPort(m_host);
136 					if(pi > 0) {
137 						m_host = m_host[0 .. pi];
138 					}
139 				}
140 
141 				enforce(!requires_host || m_schema == "file" || m_host.length > 0,
142 						"Empty server name in URL.");
143 				str = str[si .. $];
144 			}
145 		}
146 
147 		this.localURI = str;
148 	}
149 	/// ditto
150 	static URL parse(string url_string)
151 	{
152 		return URL(url_string);
153 	}
154 	/// ditto
155 	static URL fromString(string url_string)
156 	{
157 		return URL(url_string);
158 	}
159 
160 	invariant()
161 	{
162 		assert(isURLEncoded(m_pathString), "Wrong URL encoding of '"~m_pathString~"'");
163 	}
164 
165 	/// The schema/protocol part of the URL
166 	@property string schema() const { return m_schema; }
167 	/// ditto
168 	@property void schema(string v) { m_schema = v; }
169 
170 	/// The url encoded path part of the URL
171 	@property string pathString() const { return m_pathString; }
172 
173 	/// Set the path part of the URL. It should be properly encoded.
174 	@property void pathString(string s)
175 	{
176 		enforce(isURLEncoded(s), "Wrong URL encoding of the path string '"~s~"'");
177 		m_pathString = s;
178 	}
179 
180 	/// The path part of the URL
181 	@property InetPath path() const {
182 		version (Have_vibe_core)
183 			return InetPath(m_pathString);
184 		else
185 			return Path(urlDecode(m_pathString));
186 	}
187 	version (Have_vibe_core) {
188 		/// ditto
189 		@property void path(Path)(Path p)
190 			if (isInstanceOf!(GenericPath, Path))
191 		{
192 			m_pathString = (cast(InetPath)p).toString();
193 		}
194 	} else {
195 		/// ditto
196 		@property void path(Path p)
197 		{
198 			m_pathString = p.toString().urlEncode("/");
199 		}
200 	}
201 
202 	/// The host part of the URL (depends on the schema)
203 	@property string host() const { return m_host; }
204 	/// ditto
205 	@property void host(string v) { m_host = v; }
206 
207 	/// The port part of the URL (optional)
208 	@property ushort port() const { return m_port; }
209 	/// ditto
210 	@property port(ushort v) { m_port = v; }
211 
212 	/// Get the default port for the given schema or 0
213 	static ushort defaultPort(string schema) {
214 		switch(schema){
215 			default:
216 			case "file": return 0;
217 			case "http": return 80;
218 			case "https": return 443;
219 			case "ftp": return 21;
220 			case "spdy": return 443;
221 			case "sftp": return 22;
222 		}
223 	}
224 	/// ditto
225 	ushort defaultPort() const {
226 		return defaultPort(m_schema);
227 	}
228 
229 	/// The user name part of the URL (optional)
230 	@property string username() const { return m_username; }
231 	/// ditto
232 	@property void username(string v) { m_username = v; }
233 
234 	/// The password part of the URL (optional)
235 	@property string password() const { return m_password; }
236 	/// ditto
237 	@property void password(string v) { m_password = v; }
238 
239 	/// The query string part of the URL (optional)
240 	@property string queryString() const { return m_queryString; }
241 	/// ditto
242 	@property void queryString(string v) { m_queryString = v; }
243 
244 	/// The anchor part of the URL (optional)
245 	@property string anchor() const { return m_anchor; }
246 
247 	/// The path part plus query string and anchor
248 	@property string localURI()
249 	const {
250 		auto str = appender!string();
251 		// m_pathString is already encoded
252 		str.put(m_pathString);
253 		if( queryString.length ) {
254 			str.put("?");
255 			str.put(queryString);
256 		}
257 		if( anchor.length ) {
258 			str.put("#");
259 			str.put(anchor);
260 		}
261 		return str.data;
262 	}
263 	/// ditto
264 	@property void localURI(string str)
265 	{
266 		auto ai = str.indexOfCT('#');
267 		if( ai >= 0 ){
268 			m_anchor = str[ai+1 .. $];
269 			str = str[0 .. ai];
270 		} else m_anchor = null;
271 
272 		auto qi = str.indexOfCT('?');
273 		if( qi >= 0 ){
274 			m_queryString = str[qi+1 .. $];
275 			str = str[0 .. qi];
276 		} else m_queryString = null;
277 
278 		this.pathString = str;
279 	}
280 
281 	/// The URL to the parent path with query string and anchor stripped.
282 	@property URL parentURL() const {
283 		URL ret;
284 		ret.schema = schema;
285 		ret.host = host;
286 		ret.port = port;
287 		ret.username = username;
288 		ret.password = password;
289 		ret.path = path.parentPath;
290 		return ret;
291 	}
292 
293 	/// Converts this URL object to its string representation.
294 	string toString()
295 	const {
296 		import std.format;
297 		auto dst = appender!string();
298 		dst.put(schema);
299 		dst.put(":");
300 		if (isDoubleSlashSchema(schema))
301 			dst.put("//");
302 		if (m_username.length || m_password.length) {
303 			dst.put(username);
304 			dst.put(':');
305 			dst.put(password);
306 			dst.put('@');
307 		}
308 
309 		import std.algorithm : canFind;
310 		auto ipv6 = host.canFind(":");
311 
312 		if ( ipv6 ) dst.put('[');
313 		dst.put(host);
314 		if ( ipv6 ) dst.put(']');
315 
316 		if( m_port > 0 ) formattedWrite(dst, ":%d", m_port);
317 		dst.put(localURI);
318 		return dst.data;
319 	}
320 
321 	bool startsWith(const URL rhs) const {
322 		if( m_schema != rhs.m_schema ) return false;
323 		if( m_host != rhs.m_host ) return false;
324 		// FIXME: also consider user, port, querystring, anchor etc
325 		version (Have_vibe_core)
326 			return this.path.bySegment.startsWith(rhs.path.bySegment);
327 		else return this.path.startsWith(rhs.path);
328 	}
329 
330 	URL opBinary(string OP, Path)(Path rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
331 	URL opBinary(string OP, Path)(Path.Segment rhs) const if (OP == "~" && isAnyPath!Path) { return URL(m_schema, m_host, m_port, this.path ~ rhs); }
332 	void opOpAssign(string OP, Path)(Path rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
333 	void opOpAssign(string OP, Path)(Path.Segment rhs) if (OP == "~" && isAnyPath!Path) { this.path = this.path ~ rhs; }
334 
335 	/// Tests two URLs for equality using '=='.
336 	bool opEquals(ref const URL rhs) const {
337 		if( m_schema != rhs.m_schema ) return false;
338 		if( m_host != rhs.m_host ) return false;
339 		if( m_pathString != rhs.m_pathString ) return false;
340 		return true;
341 	}
342 	/// ditto
343 	bool opEquals(const URL other) const { return opEquals(other); }
344 
345 	int opCmp(ref const URL rhs) const {
346 		if( m_schema != rhs.m_schema ) return m_schema.cmp(rhs.m_schema);
347 		if( m_host != rhs.m_host ) return m_host.cmp(rhs.m_host);
348 		if( m_pathString != rhs.m_pathString ) return cmp(m_pathString, rhs.m_pathString);
349 		return true;
350 	}
351 }
352 
353 private enum isAnyPath(P) = is(P == InetPath) || is(P == PosixPath) || is(P == WindowsPath);
354 
355 private bool isDoubleSlashSchema(string schema)
356 @safe nothrow @nogc {
357 	switch (schema) {
358 		case "ftp", "http", "https", "http+unix", "https+unix":
359 		case "spdy", "sftp", "ws", "wss", "file", "redis", "tcp":
360 			return true;
361 		default:
362 			return false;
363 	}
364 }
365 
366 unittest { // IPv6
367 	auto urlstr = "http://[2003:46:1a7b:6c01:64b:80ff:fe80:8003]:8091/abc";
368 	auto url = URL.parse(urlstr);
369 	assert(url.schema == "http", url.schema);
370 	assert(url.host == "2003:46:1a7b:6c01:64b:80ff:fe80:8003", url.host);
371 	assert(url.port == 8091);
372 	assert(url.path == InetPath("/abc"), url.path.toString());
373 	assert(url.toString == urlstr);
374 
375 	url.host = "abcd:46:1a7b:6c01:64b:80ff:fe80:8abc";
376 	urlstr = "http://[abcd:46:1a7b:6c01:64b:80ff:fe80:8abc]:8091/abc";
377 	assert(url.toString == urlstr);
378 }
379 
380 
381 unittest {
382 	auto urlstr = "https://www.example.net/index.html";
383 	auto url = URL.parse(urlstr);
384 	assert(url.schema == "https", url.schema);
385 	assert(url.host == "www.example.net", url.host);
386 	assert(url.path == InetPath("/index.html"), url.path.toString());
387 	assert(url.toString == urlstr);
388 
389 	urlstr = "http://jo.doe:password@sub.www.example.net:4711/sub2/index.html?query#anchor";
390 	url = URL.parse(urlstr);
391 	assert(url.schema == "http", url.schema);
392 	assert(url.username == "jo.doe", url.username);
393 	assert(url.password == "password", url.password);
394 	assert(url.port == 4711, to!string(url.port));
395 	assert(url.host == "sub.www.example.net", url.host);
396 	assert(url.path.toString() == "/sub2/index.html", url.path.toString());
397 	assert(url.queryString == "query", url.queryString);
398 	assert(url.anchor == "anchor", url.anchor);
399 	assert(url.toString == urlstr);
400 }
401 
402 unittest { // issue #1044
403 	URL url = URL.parse("http://example.com/p?query#anchor");
404 	assert(url.schema == "http");
405 	assert(url.host == "example.com");
406 	assert(url.queryString == "query");
407 	assert(url.anchor == "anchor");
408 	assert(url.pathString == "/p");
409 	url.localURI = "/q";
410 	assert(url.schema == "http");
411 	assert(url.host == "example.com");
412 	assert(url.queryString == "");
413 	assert(url.anchor == "");
414 	assert(url.pathString == "/q");
415 	url.localURI = "/q?query";
416 	assert(url.schema == "http");
417 	assert(url.host == "example.com");
418 	assert(url.queryString == "query");
419 	assert(url.anchor == "");
420 	assert(url.pathString == "/q");
421 	url.localURI = "/q#anchor";
422 	assert(url.schema == "http");
423 	assert(url.host == "example.com");
424 	assert(url.queryString == "");
425 	assert(url.anchor == "anchor");
426 	assert(url.pathString == "/q");
427 }
428 
429 //websocket unittest
430 unittest {
431 	URL url = URL("ws://127.0.0.1:8080/echo");
432 	assert(url.host == "127.0.0.1");
433 	assert(url.port == 8080);
434 	assert(url.localURI == "/echo");
435 }
436 
437 unittest {
438 	auto p = PosixPath("/foo bar/boo oom/");
439 	URL url = URL("http", "example.com", 0, p); // constructor test
440 	assert(url.path == cast(InetPath)p);
441 	url.path = p;
442 	assert(url.path == cast(InetPath)p);					   // path assignement test
443 	assert(url.pathString == "/foo%20bar/boo%20oom/");
444 	assert(url.toString() == "http://example.com/foo%20bar/boo%20oom/");
445 	url.pathString = "/foo%20bar/boo%2foom/";
446 	assert(url.pathString == "/foo%20bar/boo%2foom/");
447 	assert(url.toString() == "http://example.com/foo%20bar/boo%2foom/");
448 }
449 
450 unittest {
451 	auto url = URL("http://example.com/some%2bpath");
452 	assert((cast(PosixPath)url.path).toString() == "/some+path", url.path.toString());
453 }
454 
455 unittest {
456 	assert(URL("file:///test").pathString == "/test");
457 	assert(URL("file:///test").path.toString() == "/test");
458 	assert(URL("file://test").pathString == "test");
459 	assert(URL("file://test").path.toString() == "test");
460 	assert(URL("file://./test").pathString == "./test");
461 	assert(URL("file://./test").path.toString() == "./test");
462 }
463 
464 unittest { // issue #1318
465 	try {
466 		URL("http://something/inval%id");
467 		assert(false, "Expected to throw an exception.");
468 	} catch (Exception e) {}
469 }
470 
471 unittest {
472 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "http+unix");
473 	assert(URL("https+unix://%2Fvar%2Frun%2Fdocker.sock").schema == "https+unix");
474 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").host == "%2Fvar%2Frun%2Fdocker.sock");
475 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock").pathString == "");
476 	assert(URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json").pathString == "/container/json");
477 	auto url = URL("http+unix://%2Fvar%2Frun%2Fdocker.sock/container/json");
478 	assert(URL(url.toString()) == url);
479 }
480 
481 unittest {
482 	import vibe.data.serialization;
483 	static assert(isStringSerializable!URL);
484 }
485 
486 unittest { // issue #1732
487 	auto url = URL("tcp://0.0.0.0:1234");
488 	url.port = 4321;
489 	assert(url.toString == "tcp://0.0.0.0:4321", url.toString);
490 }