1 /**
2 	Internet message handling according to RFC822/RFC5322
3 
4 	Copyright: © 2012-2014 Sönke Ludwig
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.inet.message;
9 
10 import vibe.core.log;
11 import vibe.core.stream;
12 import vibe.stream.operations;
13 import vibe.utils.array;
14 import vibe.internal.allocator;
15 import vibe.utils.string;
16 import vibe.utils.dictionarylist;
17 
18 import std.conv;
19 import std.datetime;
20 import std.exception;
21 import std.range;
22 import std.string;
23 
24 
25 /**
26 	Parses an internet header according to RFC5322 (with RFC822 compatibility).
27 
28 	Params:
29 		input = Input stream from which the header is parsed
30 		dst = Destination map to write into
31 		max_line_length = The maximum allowed length of a single line
32 		alloc = Custom allocator to use for allocating strings
33 		rfc822_compatible = Flag indicating that duplicate fields should be merged using a comma
34 */
35 void parseRFC5322Header(InputStream)(InputStream input, ref InetHeaderMap dst, size_t max_line_length = 1000, IAllocator alloc = vibeThreadAllocator(), bool rfc822_compatible = true)
36 	if (isInputStream!InputStream)
37 {
38 	string hdr, hdrvalue;
39 
40 	void addPreviousHeader() {
41 		if (!hdr.length) return;
42 		if (rfc822_compatible) {
43 			if (auto pv = hdr in dst) {
44 				*pv ~= "," ~ hdrvalue; // RFC822 legacy support
45 			} else {
46 				dst[hdr] = hdrvalue;
47 			}
48 		} else dst.addField(hdr, hdrvalue);
49 	}
50 
51 	string readStringLine() @safe {
52 		auto ret = input.readLine(max_line_length, "\n", alloc);
53 		if (ret.length && ret[$-1] == '\r') ret = ret[0..$-1];
54 		return () @trusted { return cast(string)ret; } ();
55 	}
56 
57 	string ln;
58 	while ((ln = readStringLine()).length > 0) {
59 		if (ln[0] != ' ' && ln[0] != '\t') {
60 			addPreviousHeader();
61 
62 			auto colonpos = ln.indexOf(':');
63 			enforce(colonpos >= 0, "Header is missing ':'.");
64 			enforce(colonpos > 0, "Header name is empty.");
65 			hdr = ln[0..colonpos].stripA();
66 			hdrvalue = ln[colonpos+1..$].stripA();
67 		} else {
68 			hdrvalue ~= " " ~ ln.stripA();
69 		}
70 	}
71 	addPreviousHeader();
72 }
73 
74 unittest { // test usual, empty and multiline header
75 	import vibe.stream.memory;
76 	ubyte[] hdr = cast(ubyte[])"A: a \r\nB: \r\nC:\r\n\tc\r\n\r\n".dup;
77 	InetHeaderMap map;
78 	parseRFC5322Header(createMemoryStream(hdr), map);
79 	assert(map.length == 3);
80 	assert(map["A"] == "a");
81 	assert(map["B"] == "");
82 	assert(map["C"] == " c");
83 }
84 
85 unittest { // fail for empty header names
86 	import std.exception;
87 	import vibe.stream.memory;
88 	auto hdr = cast(ubyte[])": test\r\n\r\n".dup;
89 	InetHeaderMap map;
90 	assertThrown(parseRFC5322Header(createMemoryStream(hdr), map));
91 }
92 
93 unittest { // tolerant line separator header parser - see: https://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.3
94 	import std.exception;
95 	import vibe.stream.memory;
96 	auto hdr = cast(ubyte[])"a: test\r\nb: foo\nc: bar\n\nbody".dup;
97 	InetHeaderMap map;
98 	parseRFC5322Header(createMemoryStream(hdr), map);
99 	assert(map.length == 3);
100 	assert(map["a"] == "test");
101 	assert(map["b"] == "foo");
102 	assert(map["c"] == "bar");
103 }
104 
105 private immutable monthStrings = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
106 
107 /**
108 	Writes an RFC-822/5322 date string to the given output range.
109 */
110 void writeRFC822DateString(R)(ref R dst, SysTime time)
111 {
112 	writeRFC822DateString(dst, cast(Date)time);
113 }
114 /// ditto
115 void writeRFC822DateString(R)(ref R dst, Date date)
116 {
117 	static immutable dayStrings = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
118 	dst.put(dayStrings[date.dayOfWeek]);
119 	dst.put(", ");
120 	writeDecimal2(dst, date.day);
121 	dst.put(' ');
122 	dst.put(monthStrings[date.month-1]);
123 	dst.put(' ');
124 	writeDecimal(dst, date.year);
125 }
126 
127 /**
128 	Writes an RFC-822 time string to the given output range.
129 */
130 void writeRFC822TimeString(R)(ref R dst, SysTime time)
131 {
132 	writeRFC822TimeString(dst, cast(TimeOfDay)time, getRFC822TimeZoneOffset(time));
133 }
134 /// ditto
135 void writeRFC822TimeString(R)(ref R dst, TimeOfDay time, int tz_offset)
136 {
137 	writeDecimal2(dst, time.hour);
138 	dst.put(':');
139 	writeDecimal2(dst, time.minute);
140 	dst.put(':');
141 	writeDecimal2(dst, time.second);
142 	if (tz_offset == 0) dst.put(" GMT");
143 	else {
144 		dst.put(' ');
145 		dst.put(tz_offset >= 0 ? '+' : '-');
146 		if (tz_offset < 0) tz_offset = -tz_offset;
147 		writeDecimal2(dst, tz_offset / 60);
148 		writeDecimal2(dst, tz_offset % 60);
149 	}
150 }
151 
152 /**
153 	Writes an RFC-822 date+time string to the given output range.
154 */
155 void writeRFC822DateTimeString(R)(ref R dst, SysTime time)
156 {
157 	writeRFC822DateTimeString(dst, cast(DateTime)time, getRFC822TimeZoneOffset(time));
158 }
159 /// ditto
160 void writeRFC822DateTimeString(R)(ref R dst, DateTime time, int tz_offset)
161 {
162 	writeRFC822DateString(dst, time.date);
163 	dst.put(' ');
164 	writeRFC822TimeString(dst, time.timeOfDay, tz_offset);
165 }
166 
167 /**
168 	Returns the RFC-822 time string representation of the given time.
169 */
170 string toRFC822TimeString(SysTime time)
171 @trusted {
172 	auto ret = new FixedAppender!(string, 14);
173 	writeRFC822TimeString(ret, time);
174 	return ret.data;
175 }
176 
177 /**
178 	Returns the RFC-822/5322 date string representation of the given time.
179 */
180 string toRFC822DateString(SysTime time)
181 @trusted {
182 	auto ret = new FixedAppender!(string, 16);
183 	writeRFC822DateString(ret, time);
184 	return ret.data;
185 }
186 
187 /**
188 	Returns the RFC-822 date+time string representation of the given time.
189 */
190 string toRFC822DateTimeString(SysTime time)
191 @trusted {
192 	auto ret = new FixedAppender!(string, 31);
193 	writeRFC822DateTimeString(ret, time);
194 	return ret.data;
195 }
196 
197 /**
198 	Returns the offset of the given time from UTC in minutes.
199 */
200 int getRFC822TimeZoneOffset(SysTime time)
201 @safe {
202 	return cast(int)time.utcOffset.total!"minutes";
203 }
204 
205 /// Parses a date+time string according to RFC-822/5322.
206 alias parseRFC822DateTimeString = parseRFC822DateTime;
207 
208 unittest {
209 	import std.typecons;
210 
211 	auto times = [
212 		tuple("Wed, 02 Oct 2002 08:00:00 GMT", SysTime(DateTime(2002, 10, 02, 8, 0, 0), UTC())),
213 		tuple("Wed, 02 Oct 2002 08:00:00 +0200", SysTime(DateTime(2002, 10, 02, 8, 0, 0), new immutable SimpleTimeZone(120.minutes))),
214 		tuple("Wed, 02 Oct 2002 08:00:00 -0130", SysTime(DateTime(2002, 10, 02, 8, 0, 0), new immutable SimpleTimeZone(-90.minutes)))
215 	];
216 	foreach (t; times) {
217 		auto st = parseRFC822DateTimeString(t[0]);
218 		auto ts = toRFC822DateTimeString(t[1]);
219 		assert(st == t[1], "Parse error: "~t[0]);
220 		assert(parseRFC822DateTimeString(ts) == t[1], "Stringify error: "~ts);
221 	}
222 }
223 
224 
225 /**
226 	Decodes a string in encoded-word form.
227 
228 	See_Also: $(LINK http://tools.ietf.org/html/rfc2047#section-2)
229 */
230 string decodeEncodedWords()(string encoded)
231 {
232 	import std.array;
233 	Appender!string dst;
234 	() @trusted {
235 		dst = appender!string();
236 		decodeEncodedWords(dst, encoded);
237 	} ();
238 	return dst.data;
239 }
240 /// ditto
241 void decodeEncodedWords(R)(ref R dst, string encoded)
242 {
243 	import std.base64;
244 	import std.encoding;
245 
246 	while(!encoded.empty){
247 		auto idx = encoded.indexOf("=?");
248 		if( idx >= 0 ){
249 			auto end = encoded.indexOf("?=");
250 			enforce(end > idx);
251 			dst.put(encoded[0 .. idx]);
252 			auto code = encoded[idx+2 .. end];
253 			encoded = encoded[end+2 .. $];
254 
255 			idx = code.indexOf('?');
256 			auto cs = code[0 .. idx];
257 			auto enc = code[idx+1];
258 			auto data = code[idx+3 .. $];
259 			ubyte[] textenc;
260 			switch(enc){
261 				default: textenc = cast(ubyte[])data; break;
262 				case 'B': textenc = Base64.decode(data); break;
263 				case 'Q': textenc = QuotedPrintable.decode(data, true); break;
264 			}
265 
266 			switch(cs){
267 				default: dst.put(sanitizeUTF8(textenc)); break;
268 				case "UTF-8": dst.put(cast(string)textenc); break;
269 				case "ISO-8859-15": // hack...
270 				case "ISO-8859-1":
271 					string tmp;
272 					transcode(cast(Latin1String)textenc, tmp);
273 					dst.put(tmp);
274 					break;
275 			}
276 		} else {
277 			dst.put(encoded);
278 			break;
279 		}
280 	}
281 }
282 
283 
284 /**
285 	Decodes a From/To header value as it appears in emails.
286 */
287 void decodeEmailAddressHeader(string header, out string name, out string address)
288 @safe {
289 	import std.utf;
290 
291 	scope(failure) logDebug("emailbase %s", header);
292 	header = decodeEncodedWords(header);
293 	scope(failure) logDebug("emaildec %s", header);
294 
295 	if( header[$-1] == '>' ){
296 		auto sidx = header.lastIndexOf('<');
297 		enforce(sidx >= 0);
298 		address = header[sidx+1 .. $-1];
299 		header = header[0 .. sidx].strip();
300 
301 		if( header[0] == '"' ){
302 			name = header[1 .. $-1];
303 		} else {
304 			name = header.strip();
305 		}
306 	} else {
307 		name = header;
308 		address = header;
309 	}
310 	validate(name);
311 }
312 
313 
314 /**
315 	Decodes a message body according to the specified content transfer
316 	encoding ("Content-Transfer-Encoding" header).
317 
318 	The result is returned as a UTF-8 string.
319 */
320 string decodeMessage(in ubyte[] message_body, string content_transfer_encoding)
321 @safe {
322 	import std.algorithm;
323 	import std.base64;
324 
325 	const(ubyte)[] msg = message_body;
326 	switch (content_transfer_encoding) {
327 		default: break;
328 		case "quoted-printable": msg = QuotedPrintable.decode(cast(const(char)[])msg); break;
329 		case "base64":
330 			try msg = Base64.decode(msg);
331 			catch(Exception e){
332 				auto dst = appender!(ubyte[])();
333 				try {
334 					auto dec = Base64.decoder(filter!(ch => ch != '\r' && ch != '\n')(msg));
335 					while( !dec.empty ){
336 						dst.put(dec.front);
337 						dec.popFront();
338 					}
339 				} catch(Exception e){
340 					dst.put(cast(const(ubyte)[])"\r\n-------\r\nDECODING ERROR: ");
341 					dst.put(cast(const(ubyte)[])() @trusted { return e.toString(); } ());
342 				}
343 				msg = dst.data();
344 			}
345 			break;
346 	}
347 	// TODO: do character encoding etc.
348 	return sanitizeUTF8(msg);
349 }
350 
351 
352 /**
353 	Behaves similar to string[string] but case does not matter for the key, the insertion order is not
354 	changed and multiple values per key are supported.
355 
356 	This kind of map is used for MIME headers (e.g. for HTTP), where the case of the key strings
357 	does not matter. Note that the map can contain fields with the same key multiple times if
358 	addField is used for insertion. Insertion order is preserved.
359 
360 	Note that despite case not being relevant for matching keyse, iterating over the map will yield
361 	the original case of the key that was put in.
362 */
363 alias InetHeaderMap = DictionaryList!(string, false, 12);
364 
365 
366 
367 /**
368 	Performs quoted-printable decoding.
369 */
370 struct QuotedPrintable {
371 	static ubyte[] decode(in char[] input, bool in_header = false)
372 	@safe {
373 		auto ret = appender!(ubyte[])();
374 		for( size_t i = 0; i < input.length; i++ ){
375 			if( input[i] == '=' ){
376 				import std.utf : UTFException;
377 				if (input.length - i <= 2) throw new UTFException("");
378 				auto code = input[i+1 .. i+3];
379 				i += 2;
380 				if( code != cast(const(ubyte)[])"\r\n" )
381 					ret.put(code.parse!ubyte(16));
382 			} else if( in_header && input[i] == '_') ret.put(' ');
383 			else ret.put(input[i]);
384 		}
385 		return ret.data();
386 	}
387 }
388 
389 unittest
390 {
391   assert(QuotedPrintable.decode("abc")   == "abc");
392   assert(QuotedPrintable.decode("a=3Cc") == "a<c");
393 
394   import std.exception;
395   import std.utf : UTFException;
396   assertThrown!UTFException(QuotedPrintable.decode("ab=c"));
397   assertThrown!UTFException(QuotedPrintable.decode("abc="));
398 }
399 
400 
401 private void writeDecimal2(R)(ref R dst, uint n)
402 {
403 	auto d1 = n % 10;
404 	auto d2 = (n / 10) % 10;
405 	dst.put(cast(char)(d2 + '0'));
406 	dst.put(cast(char)(d1 + '0'));
407 }
408 
409 private void writeDecimal(R)(ref R dst, uint n)
410 {
411 	if( n == 0 ){
412 		dst.put('0');
413 		return;
414 	}
415 
416 	// determine all digits
417 	uint[10] digits;
418 	int i = 0;
419 	while( n > 0 ){
420 		digits[i++] = n % 10;
421 		n /= 10;
422 	}
423 
424 	// write out the digits in reverse order
425 	while( i > 0 ) dst.put(cast(char)(digits[--i] + '0'));
426 }