1 /**
2 	Internet message handling according to RFC822/RFC5322
3 
4 	Copyright: © 2012-2014 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module vibe.inet.message;
9 
10 import vibe.core.log;
11 import vibe.core.stream;
12 import vibe.stream.operations;
13 import vibe.utils.array;
14 import vibe.internal.allocator;
15 import vibe.utils.string;
16 import vibe.utils.dictionarylist;
17 
18 import std.conv;
19 import std.datetime;
20 import std.exception;
21 import std.range;
22 import std.string;
23 
24 
25 /**
26 	Parses an internet header according to RFC5322 (with RFC822 compatibility).
27 
28 	Params:
29 		input = Input stream from which the header is parsed
30 		dst = Destination map to write into
31 		max_line_length = The maximum allowed length of a single line
32 		alloc = Custom allocator to use for allocating strings
33 		rfc822_compatible = Flag indicating that duplicate fields should be merged using a comma
34 */
35 void parseRFC5322Header(InputStream)(InputStream input, ref InetHeaderMap dst, size_t max_line_length = 1000, IAllocator alloc = vibeThreadAllocator(), bool rfc822_compatible = true)
36 	if (isInputStream!InputStream)
37 {
38 	string hdr, hdrvalue;
39 
40 	void addPreviousHeader() {
41 		if (!hdr.length) return;
42 		if (rfc822_compatible) {
43 			if (auto pv = hdr in dst) {
44 				*pv ~= "," ~ hdrvalue; // RFC822 legacy support
45 			} else {
46 				dst[hdr] = hdrvalue;
47 			}
48 		} else dst.addField(hdr, hdrvalue);
49 	}
50 
51 	string readStringLine() @safe {
52 		auto ret = input.readLine(max_line_length, "\r\n", alloc);
53 		return () @trusted { return cast(string)ret; } ();
54 	}
55 
56 	string ln;
57 	while ((ln = readStringLine()).length > 0) {
58 		if (ln[0] != ' ' && ln[0] != '\t') {
59 			addPreviousHeader();
60 
61 			auto colonpos = ln.indexOf(':');
62 			enforce(colonpos >= 0, "Header is missing ':'.");
63 			enforce(colonpos > 0, "Header name is empty.");
64 			hdr = ln[0..colonpos].stripA();
65 			hdrvalue = ln[colonpos+1..$].stripA();
66 		} else {
67 			hdrvalue ~= " " ~ ln.stripA();
68 		}
69 	}
70 	addPreviousHeader();
71 }
72 
73 unittest { // test usual, empty and multiline header
74 	import vibe.stream.memory;
75 	ubyte[] hdr = cast(ubyte[])"A: a \r\nB: \r\nC:\r\n\tc\r\n\r\n".dup;
76 	InetHeaderMap map;
77 	parseRFC5322Header(createMemoryStream(hdr), map);
78 	assert(map.length == 3);
79 	assert(map["A"] == "a");
80 	assert(map["B"] == "");
81 	assert(map["C"] == " c");
82 }
83 
84 unittest { // fail for empty header names
85 	import std.exception;
86 	import vibe.stream.memory;
87 	auto hdr = cast(ubyte[])": test\r\n\r\n".dup;
88 	InetHeaderMap map;
89 	assertThrown(parseRFC5322Header(createMemoryStream(hdr), map));
90 }
91 
92 
93 private immutable monthStrings = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
94 
95 /**
96 	Writes an RFC-822/5322 date string to the given output range.
97 */
98 void writeRFC822DateString(R)(ref R dst, SysTime time)
99 {
100 	writeRFC822DateString(dst, cast(Date)time);
101 }
102 /// ditto
103 void writeRFC822DateString(R)(ref R dst, Date date)
104 {
105 	static immutable dayStrings = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
106 	dst.put(dayStrings[date.dayOfWeek]);
107 	dst.put(", ");
108 	writeDecimal2(dst, date.day);
109 	dst.put(' ');
110 	dst.put(monthStrings[date.month-1]);
111 	dst.put(' ');
112 	writeDecimal(dst, date.year);
113 }
114 
115 /**
116 	Writes an RFC-822 time string to the given output range.
117 */
118 void writeRFC822TimeString(R)(ref R dst, SysTime time)
119 {
120 	writeRFC822TimeString(dst, cast(TimeOfDay)time, getRFC822TimeZoneOffset(time));
121 }
122 /// ditto
123 void writeRFC822TimeString(R)(ref R dst, TimeOfDay time, int tz_offset)
124 {
125 	writeDecimal2(dst, time.hour);
126 	dst.put(':');
127 	writeDecimal2(dst, time.minute);
128 	dst.put(':');
129 	writeDecimal2(dst, time.second);
130 	if (tz_offset == 0) dst.put(" GMT");
131 	else {
132 		dst.put(' ');
133 		dst.put(tz_offset >= 0 ? '+' : '-');
134 		if (tz_offset < 0) tz_offset = -tz_offset;
135 		writeDecimal2(dst, tz_offset / 60);
136 		writeDecimal2(dst, tz_offset % 60);
137 	}
138 }
139 
140 /**
141 	Writes an RFC-822 date+time string to the given output range.
142 */
143 void writeRFC822DateTimeString(R)(ref R dst, SysTime time)
144 {
145 	writeRFC822DateTimeString(dst, cast(DateTime)time, getRFC822TimeZoneOffset(time));
146 }
147 /// ditto
148 void writeRFC822DateTimeString(R)(ref R dst, DateTime time, int tz_offset)
149 {
150 	writeRFC822DateString(dst, time.date);
151 	dst.put(' ');
152 	writeRFC822TimeString(dst, time.timeOfDay, tz_offset);
153 }
154 
155 /**
156 	Returns the RFC-822 time string representation of the given time.
157 */
158 string toRFC822TimeString(SysTime time)
159 @trusted {
160 	auto ret = new FixedAppender!(string, 14);
161 	writeRFC822TimeString(ret, time);
162 	return ret.data;
163 }
164 
165 /**
166 	Returns the RFC-822/5322 date string representation of the given time.
167 */
168 string toRFC822DateString(SysTime time)
169 @trusted {
170 	auto ret = new FixedAppender!(string, 16);
171 	writeRFC822DateString(ret, time);
172 	return ret.data;
173 }
174 
175 /**
176 	Returns the RFC-822 date+time string representation of the given time.
177 */
178 string toRFC822DateTimeString(SysTime time)
179 @trusted {
180 	auto ret = new FixedAppender!(string, 31);
181 	writeRFC822DateTimeString(ret, time);
182 	return ret.data;
183 }
184 
185 /**
186 	Returns the offset of the given time from UTC in minutes.
187 */
188 int getRFC822TimeZoneOffset(SysTime time)
189 @safe {
190 	return cast(int)time.utcOffset.total!"minutes";
191 }
192 
193 /// Parses a date+time string according to RFC-822/5322.
194 alias parseRFC822DateTimeString = parseRFC822DateTime;
195 
196 unittest {
197 	import std.typecons;
198 
199 	auto times = [
200 		tuple("Wed, 02 Oct 2002 08:00:00 GMT", SysTime(DateTime(2002, 10, 02, 8, 0, 0), UTC())),
201 		tuple("Wed, 02 Oct 2002 08:00:00 +0200", SysTime(DateTime(2002, 10, 02, 8, 0, 0), new immutable SimpleTimeZone(120.minutes))),
202 		tuple("Wed, 02 Oct 2002 08:00:00 -0130", SysTime(DateTime(2002, 10, 02, 8, 0, 0), new immutable SimpleTimeZone(-90.minutes)))
203 	];
204 	foreach (t; times) {
205 		auto st = parseRFC822DateTimeString(t[0]);
206 		auto ts = toRFC822DateTimeString(t[1]);
207 		assert(st == t[1], "Parse error: "~t[0]);
208 		assert(parseRFC822DateTimeString(ts) == t[1], "Stringify error: "~ts);
209 	}
210 }
211 
212 
213 /**
214 	Decodes a string in encoded-word form.
215 
216 	See_Also: $(LINK http://tools.ietf.org/html/rfc2047#section-2)
217 */
218 string decodeEncodedWords()(string encoded)
219 {
220 	import std.array;
221 	Appender!string dst;
222 	() @trusted {
223 		dst = appender!string();
224 		decodeEncodedWords(dst, encoded);
225 	} ();
226 	return dst.data;
227 }
228 /// ditto
229 void decodeEncodedWords(R)(ref R dst, string encoded)
230 {
231 	import std.base64;
232 	import std.encoding;
233 
234 	while(!encoded.empty){
235 		auto idx = encoded.indexOf("=?");
236 		if( idx >= 0 ){
237 			auto end = encoded.indexOf("?=");
238 			enforce(end > idx);
239 			dst.put(encoded[0 .. idx]);
240 			auto code = encoded[idx+2 .. end];
241 			encoded = encoded[end+2 .. $];
242 
243 			idx = code.indexOf('?');
244 			auto cs = code[0 .. idx];
245 			auto enc = code[idx+1];
246 			auto data = code[idx+3 .. $];
247 			ubyte[] textenc;
248 			switch(enc){
249 				default: textenc = cast(ubyte[])data; break;
250 				case 'B': textenc = Base64.decode(data); break;
251 				case 'Q': textenc = QuotedPrintable.decode(data, true); break;
252 			}
253 
254 			switch(cs){
255 				default: dst.put(sanitizeUTF8(textenc)); break;
256 				case "UTF-8": dst.put(cast(string)textenc); break;
257 				case "ISO-8859-15": // hack...
258 				case "ISO-8859-1":
259 					string tmp;
260 					transcode(cast(Latin1String)textenc, tmp);
261 					dst.put(tmp);
262 					break;
263 			}
264 		} else {
265 			dst.put(encoded);
266 			break;
267 		}
268 	}
269 }
270 
271 
272 /**
273 	Decodes a From/To header value as it appears in emails.
274 */
275 void decodeEmailAddressHeader(string header, out string name, out string address)
276 @safe {
277 	import std.utf;
278 
279 	scope(failure) logDebug("emailbase %s", header);
280 	header = decodeEncodedWords(header);
281 	scope(failure) logDebug("emaildec %s", header);
282 
283 	if( header[$-1] == '>' ){
284 		auto sidx = header.lastIndexOf('<');
285 		enforce(sidx >= 0);
286 		address = header[sidx+1 .. $-1];
287 		header = header[0 .. sidx].strip();
288 
289 		if( header[0] == '"' ){
290 			name = header[1 .. $-1];
291 		} else {
292 			name = header.strip();
293 		}
294 	} else {
295 		name = header;
296 		address = header;
297 	}
298 	validate(name);
299 }
300 
301 
302 /**
303 	Decodes a message body according to the specified content transfer
304 	encoding ("Content-Transfer-Encoding" header).
305 
306 	The result is returned as a UTF-8 string.
307 */
308 string decodeMessage(in ubyte[] message_body, string content_transfer_encoding)
309 @safe {
310 	import std.algorithm;
311 	import std.base64;
312 
313 	const(ubyte)[] msg = message_body;
314 	switch (content_transfer_encoding) {
315 		default: break;
316 		case "quoted-printable": msg = QuotedPrintable.decode(cast(const(char)[])msg); break;
317 		case "base64":
318 			try msg = Base64.decode(msg);
319 			catch(Exception e){
320 				auto dst = appender!(ubyte[])();
321 				try {
322 					auto dec = Base64.decoder(filter!(ch => ch != '\r' && ch != '\n')(msg));
323 					while( !dec.empty ){
324 						dst.put(dec.front);
325 						dec.popFront();
326 					}
327 				} catch(Exception e){
328 					dst.put(cast(const(ubyte)[])"\r\n-------\r\nDECODING ERROR: ");
329 					dst.put(cast(const(ubyte)[])() @trusted { return e.toString(); } ());
330 				}
331 				msg = dst.data();
332 			}
333 			break;
334 	}
335 	// TODO: do character encoding etc.
336 	return sanitizeUTF8(msg);
337 }
338 
339 
340 /**
341 	Behaves similar to string[string] but case does not matter for the key, the insertion order is not
342 	changed and multiple values per key are supported.
343 
344 	This kind of map is used for MIME headers (e.g. for HTTP), where the case of the key strings
345 	does not matter. Note that the map can contain fields with the same key multiple times if
346 	addField is used for insertion. Insertion order is preserved.
347 
348 	Note that despite case not being relevant for matching keyse, iterating over the map will yield
349 	the original case of the key that was put in.
350 */
351 alias InetHeaderMap = DictionaryList!(string, false, 12);
352 
353 
354 
355 /**
356 	Performs quoted-printable decoding.
357 */
358 struct QuotedPrintable {
359 	static ubyte[] decode(in char[] input, bool in_header = false)
360 	@safe {
361 		auto ret = appender!(ubyte[])();
362 		for( size_t i = 0; i < input.length; i++ ){
363 			if( input[i] == '=' ){
364 				auto code = input[i+1 .. i+3];
365 				i += 2;
366 				if( code != cast(const(ubyte)[])"\r\n" )
367 					ret.put(code.parse!ubyte(16));
368 			} else if( in_header && input[i] == '_') ret.put(' ');
369 			else ret.put(input[i]);
370 		}
371 		return ret.data();
372 	}
373 }
374 
375 
376 
377 private void writeDecimal2(R)(ref R dst, uint n)
378 {
379 	auto d1 = n % 10;
380 	auto d2 = (n / 10) % 10;
381 	dst.put(cast(char)(d2 + '0'));
382 	dst.put(cast(char)(d1 + '0'));
383 }
384 
385 private void writeDecimal(R)(ref R dst, uint n)
386 {
387 	if( n == 0 ){
388 		dst.put('0');
389 		return;
390 	}
391 
392 	// determine all digits
393 	uint[10] digits;
394 	int i = 0;
395 	while( n > 0 ){
396 		digits[i++] = n % 10;
397 		n /= 10;
398 	}
399 
400 	// write out the digits in reverse order
401 	while( i > 0 ) dst.put(cast(char)(digits[--i] + '0'));
402 }