1 /** Provides a framework for filtering unwanted messages.
2 
3 	See `filterMessage` for an example that shows the basic steps to use this
4 	library.
5 
6 	Copyright: © 2013-2017 RejectedSoftware e.K.
7 	License: Subject to the terms of the General Public License version 3, as written in the included LICENSE.txt file.
8 	Authors: Sönke Ludwig
9 */
10 module antispam.antispam;
11 
12 public import antispam.filter;
13 
14 import vibe.data.json : Json;
15 import vibe.core.core : Task, runTask;
16 
17 
18 /** Encapsulates a multi-stage filter state.
19 
20 	This class is the typical entry point for users of the library.
21 */
22 final class AntispamState {
23 	alias FilterFactory = SpamFilter function() @safe;
24 
25 	private {
26 		SpamFilter[] m_filters;
27 		static FilterFactory[string] m_filterFactories;
28 	}
29 
30 	@safe:
31 
32 	/** Registers a new filter type.
33 
34 		Note that the filter types included with this library are automatically
35 		registered at startup.
36 	*/
37 	static void registerFilter(string filter, FilterFactory factory)
38 	{
39 		m_filterFactories[filter] = factory;
40 	}
41 
42 	/** Returns a JSON configuration object encapsulating the full filter chain.
43 
44 		The filter list itself, as well as all filter settings are contained in
45 		this object.
46 	*/
47 	Json getConfig()
48 	const @trusted { // Json not safe for vibe.d < 0.8.0
49 		Json[] ret;
50 		foreach (f; m_filters) {
51 			auto e = Json.emptyObject;
52 			e["filter"] = f.id;
53 			e["settings"] = f.getSettings();
54 			ret ~= e;
55 		}
56 		return Json(ret);
57 	}
58 
59 	/** Loads a JSON configuration object as output by `getConfig`.
60 	*/
61 	void loadConfig(Json config)
62 	@trusted { // Json not safe for vibe.d < 0.8.0
63 		m_filters = null;
64 
65 		switch (config.type) {
66 			default:
67 				throw new Exception("Invalid Antispam configuration format. Expected JSON array or object of filter/settings pairs.");
68 			case Json.Type.null_: return;
69 			case Json.Type.undefined: return;
70 			case Json.Type.array:
71 				foreach (e; config)
72 					addFilter(e["filter"].get!string, e["settings"]);
73 				break;
74 			case Json.Type.object: // legacy format (doesn't guarantee order)
75 				foreach (string f, settings; config)
76 					addFilter(f, settings);
77 				break;
78 		}
79 	}
80 
81 	/** Appends a filter to the filter chain.
82 	*/
83 	void addFilter(string filter, Json settings)
84 	{
85 		import std.exception : enforce;
86 
87 		auto pff = filter in m_filterFactories;
88 		enforce(pff !is null, "Unknown filter ID: "~filter);
89 		auto f = (*pff)();
90 		assert(f.id == filter, "Filter ID passed to registerFilter doesn't match ID of created filter.");
91 		f.applySettings(settings);
92 		m_filters ~= f;
93 	}
94 
95 	/** Determines the immediate spam status of a message.
96 
97 		The immediate status consists of the combined answer of all
98 		chained filters based on knowledge that is immediately
99 		available, which means without blocking I/O operations or
100 		lengthy CPU calculations.
101 	*/
102 	SpamAction determineImmediateStatus(AntispamMessage message)
103 	{
104 		bool revoke = false;
105 
106 		outer:
107 		foreach (flt; m_filters) {
108 			final switch (flt.determineImmediateSpamStatus(message)) {
109 				case SpamAction.amnesty: return SpamAction.amnesty;
110 				case SpamAction.pass: break;
111 				case SpamAction.revoke: revoke = true; break;
112 				case SpamAction.block: return SpamAction.block;
113 			}
114 		}
115 
116 		return revoke ? SpamAction.revoke : SpamAction.pass;
117 	}
118 
119 	/** Determines the asynchronous spam status of a message.
120 
121 		The asynchronous status is based on status information that
122 		cannot be determined immediately. Examples are querying
123 		an external server to check an IP address or to confirm
124 		a CAPTCHA solution.
125 	*/
126 	SpamAction determineAsyncStatus(AntispamMessage message, SpamAction immediate_status)
127 	{
128 		import std.algorithm.comparison : among;
129 
130 		if (immediate_status.among(SpamAction.amnesty, SpamAction.block))
131 			return immediate_status;
132 
133 		auto status = immediate_status;
134 		foreach (flt; m_filters) {
135 			final switch (flt.determineAsyncSpamStatus(message)) {
136 				case SpamAction.amnesty: return SpamAction.amnesty;
137 				case SpamAction.pass: break;
138 				case SpamAction.revoke: status = SpamAction.revoke; break;
139 				case SpamAction.block: return SpamAction.block;
140 			}
141 		}
142 		return status;
143 	}
144 
145 	/** Feeds a message to all self-learning filters to adjust the
146 		classification criteria.
147 
148 		Params:
149 			message = The message to classify as spam or ham
150 			is_spam = Determines if the message is to be considered
151 				spam (`true`) or ham (`false`)
152 	*/
153 	void classify(in ref AntispamMessage message, bool is_spam)
154 	{
155 		foreach (flt; m_filters)
156 			flt.classify(message, is_spam);
157 	}
158 
159 	/** Removes the effects of a previously classified message from
160 		all left-learning filters.
161 
162 		Note that not all self-learning filters necessarily support
163 		de-classification of messages. For a correct result, it may
164 		be necessary to reset the classification and to re-classify
165 		all messages again.
166 
167 		Params:
168 			message = A message that was passed to `classify`
169 				previously
170 			is_spam = The spam status that was passed to the
171 				previous call to `classify`
172 	*/
173 	void declassify(in ref AntispamMessage message, bool is_spam)
174 	{
175 		foreach (flt; m_filters)
176 			flt.classify(message, is_spam, true);
177 	}
178 
179 	/** Resets the learned classficitaion criteria for all self-learning filters
180 		in the chain.
181 	*/
182 	void resetClassification()
183 	{
184 		foreach (f; m_filters)
185 			f.resetClassification();
186 	}
187 }
188 
189 /** Default implementation of full message filtering.
190 
191 	The function invokes each filter in the filter chain and combines the
192 	results to get a final answer in the for of a `SpamAction` status.
193 
194 	It first determines the immediate spam status, calling the
195 	`on_immediate_status` callback with the result. Then it starts a
196 	background task to determine the asynchronous state and, if different
197 	to the immediate status, passes the result to the `on_async_status`
198 	callback.
199 
200 	Afterwards it will call `AntispamState.classify` to update any self-learning
201 	filters in the chain.
202 
203 	Params:
204 		on_immediate_status: Callback that is invoked synchronously with the
205 			immediate spam status as its argument
206 		on_async_status: Asynchronous callback that gets called if the
207 			immediate spam status got revoked during the asynchronous
208 			spam status check
209 		state: The state object that holds the filter list to apply
210 		message: The message to filter
211 
212 	Returns:
213 		A handle to the background task is returned. This can be used
214 		to determine if the asynchronous part has finished.
215 */
216 Task filterMessage(alias on_immediate_status, alias on_async_status)(AntispamState state, AntispamMessage message)
217 {
218 	import std.algorithm.comparison : among;
219 
220 	auto ss = state.determineImmediateStatus(message);
221 	on_immediate_status(ss);
222 	return runTask({
223 		auto as = state.determineAsyncStatus(message, ss);
224 		if (ss != as)
225 			on_async_status(as);
226 		state.classify(message, as.among(SpamAction.revoke, SpamAction.block) != 0);
227 	});
228 }
229 
230 ///
231 unittest {
232 	import vibe.data.json : parseJsonString;
233 	import std.algorithm.comparison : among;
234 
235 	void main()
236 	{
237 		auto config = parseJsonString(
238 			`[
239 				{"filter": "bayes"},
240 				{"filter": "blacklist",
241 					"settings": {
242 						"ips": ["124.51.45.1", "41.23.11.5"]
243 					}
244 				}
245 			]`);
246 
247 		auto antispam = new AntispamState;
248 		antispam.loadConfig(config);
249 
250 		AntispamMessage msg;
251 		msg.headers["Subject"] = "8uy CH34P V14GR4!!11";
252 		msg.message = cast(const(ubyte)[])"Just look here: http://bestdrugdealz.c0m";
253 		msg.peerAddress = ["123.52.433.1", "vps12315.some.provider.n3t"];
254 
255 		antispam.filterMessage!(
256 			(status) {
257 				if (status.among(SpamAction.revoke, SpamAction.block))
258 					throw new Exception("Your message has been rejected!");
259 				// otherwise store message...
260 			},
261 			(async_status) {
262 				if (async_status.among!(SpamAction.revoke, SpamAction.block)) {
263 					// Flag or delete the stored message.
264 				}
265 
266 				// It could also theoretically happen here that async_status is amnesty
267 				// or pass, so that a message that was already rejected in the first
268 				// phase would be accepted in retrospective. You'll have to decides on
269 				// a per-application basis if it makes sense to support this case, or
270 				// if immediate rejections always have precedence.
271 			}
272 		)(msg);
273 	}
274 }
275 
276 
277 static this()
278 {
279 	import antispam.filters.bayes;
280 	import antispam.filters.blacklist;
281 	AntispamState.registerFilter("bayes", () => cast(SpamFilter)new BayesSpamFilter);
282 	AntispamState.registerFilter("blacklist", () => cast(SpamFilter)new BlackListSpamFilter);
283 }