1 /** Provides a framework for filtering unwanted messages. 2 3 See `filterMessage` for an example that shows the basic steps to use this 4 library. 5 6 Copyright: © 2013-2017 RejectedSoftware e.K. 7 License: Subject to the terms of the General Public License version 3, as written in the included LICENSE.txt file. 8 Authors: Sönke Ludwig 9 */ 10 module antispam.antispam; 11 12 public import antispam.filter; 13 14 import vibe.data.json : Json; 15 import vibe.core.core : Task, runTask; 16 17 18 /** Encapsulates a multi-stage filter state. 19 20 This class is the typical entry point for users of the library. 21 */ 22 final class AntispamState { 23 alias FilterFactory = SpamFilter function() @safe; 24 25 private { 26 SpamFilter[] m_filters; 27 static FilterFactory[string] m_filterFactories; 28 } 29 30 @safe: 31 32 /** Registers a new filter type. 33 34 Note that the filter types included with this library are automatically 35 registered at startup. 36 */ 37 static void registerFilter(string filter, FilterFactory factory) 38 { 39 m_filterFactories[filter] = factory; 40 } 41 42 /** Returns a JSON configuration object encapsulating the full filter chain. 43 44 The filter list itself, as well as all filter settings are contained in 45 this object. 46 */ 47 Json getConfig() 48 const @trusted { // Json not safe for vibe.d < 0.8.0 49 Json[] ret; 50 foreach (f; m_filters) { 51 auto e = Json.emptyObject; 52 e["filter"] = f.id; 53 e["settings"] = f.getSettings(); 54 ret ~= e; 55 } 56 return Json(ret); 57 } 58 59 /** Loads a JSON configuration object as output by `getConfig`. 60 */ 61 void loadConfig(Json config) 62 @trusted { // Json not safe for vibe.d < 0.8.0 63 m_filters = null; 64 65 switch (config.type) { 66 default: 67 throw new Exception("Invalid Antispam configuration format. Expected JSON array or object of filter/settings pairs."); 68 case Json.Type.null_: return; 69 case Json.Type.undefined: return; 70 case Json.Type.array: 71 foreach (e; config) 72 addFilter(e["filter"].get!string, e["settings"]); 73 break; 74 case Json.Type.object: // legacy format (doesn't guarantee order) 75 foreach (string f, settings; config) 76 addFilter(f, settings); 77 break; 78 } 79 } 80 81 /** Appends a filter to the filter chain. 82 */ 83 void addFilter(string filter, Json settings) 84 { 85 import std.exception : enforce; 86 87 auto pff = filter in m_filterFactories; 88 enforce(pff !is null, "Unknown filter ID: "~filter); 89 auto f = (*pff)(); 90 assert(f.id == filter, "Filter ID passed to registerFilter doesn't match ID of created filter."); 91 f.applySettings(settings); 92 m_filters ~= f; 93 } 94 95 /** Determines the immediate spam status of a message. 96 97 The immediate status consists of the combined answer of all 98 chained filters based on knowledge that is immediately 99 available, which means without blocking I/O operations or 100 lengthy CPU calculations. 101 */ 102 SpamAction determineImmediateStatus(AntispamMessage message) 103 { 104 bool revoke = false; 105 106 outer: 107 foreach (flt; m_filters) { 108 final switch (flt.determineImmediateSpamStatus(message)) { 109 case SpamAction.amnesty: return SpamAction.amnesty; 110 case SpamAction.pass: break; 111 case SpamAction.revoke: revoke = true; break; 112 case SpamAction.block: return SpamAction.block; 113 } 114 } 115 116 return revoke ? SpamAction.revoke : SpamAction.pass; 117 } 118 119 /** Determines the asynchronous spam status of a message. 120 121 The asynchronous status is based on status information that 122 cannot be determined immediately. Examples are querying 123 an external server to check an IP address or to confirm 124 a CAPTCHA solution. 125 */ 126 SpamAction determineAsyncStatus(AntispamMessage message, SpamAction immediate_status) 127 { 128 import std.algorithm.comparison : among; 129 130 if (immediate_status.among(SpamAction.amnesty, SpamAction.block)) 131 return immediate_status; 132 133 auto status = immediate_status; 134 foreach (flt; m_filters) { 135 final switch (flt.determineAsyncSpamStatus(message)) { 136 case SpamAction.amnesty: return SpamAction.amnesty; 137 case SpamAction.pass: break; 138 case SpamAction.revoke: status = SpamAction.revoke; break; 139 case SpamAction.block: return SpamAction.block; 140 } 141 } 142 return status; 143 } 144 145 /** Feeds a message to all self-learning filters to adjust the 146 classification criteria. 147 148 Params: 149 message = The message to classify as spam or ham 150 is_spam = Determines if the message is to be considered 151 spam (`true`) or ham (`false`) 152 */ 153 void classify(in ref AntispamMessage message, bool is_spam) 154 { 155 foreach (flt; m_filters) 156 flt.classify(message, is_spam); 157 } 158 159 /** Removes the effects of a previously classified message from 160 all left-learning filters. 161 162 Note that not all self-learning filters necessarily support 163 de-classification of messages. For a correct result, it may 164 be necessary to reset the classification and to re-classify 165 all messages again. 166 167 Params: 168 message = A message that was passed to `classify` 169 previously 170 is_spam = The spam status that was passed to the 171 previous call to `classify` 172 */ 173 void declassify(in ref AntispamMessage message, bool is_spam) 174 { 175 foreach (flt; m_filters) 176 flt.classify(message, is_spam, true); 177 } 178 179 /** Resets the learned classficitaion criteria for all self-learning filters 180 in the chain. 181 */ 182 void resetClassification() 183 { 184 foreach (f; m_filters) 185 f.resetClassification(); 186 } 187 } 188 189 /** Default implementation of full message filtering. 190 191 The function invokes each filter in the filter chain and combines the 192 results to get a final answer in the for of a `SpamAction` status. 193 194 It first determines the immediate spam status, calling the 195 `on_immediate_status` callback with the result. Then it starts a 196 background task to determine the asynchronous state and, if different 197 to the immediate status, passes the result to the `on_async_status` 198 callback. 199 200 Afterwards it will call `AntispamState.classify` to update any self-learning 201 filters in the chain. 202 203 Params: 204 on_immediate_status: Callback that is invoked synchronously with the 205 immediate spam status as its argument 206 on_async_status: Asynchronous callback that gets called if the 207 immediate spam status got revoked during the asynchronous 208 spam status check 209 state: The state object that holds the filter list to apply 210 message: The message to filter 211 212 Returns: 213 A handle to the background task is returned. This can be used 214 to determine if the asynchronous part has finished. 215 */ 216 Task filterMessage(alias on_immediate_status, alias on_async_status)(AntispamState state, AntispamMessage message) 217 { 218 import std.algorithm.comparison : among; 219 220 auto ss = state.determineImmediateStatus(message); 221 on_immediate_status(ss); 222 return runTask({ 223 auto as = state.determineAsyncStatus(message, ss); 224 if (ss != as) 225 on_async_status(as); 226 state.classify(message, as.among(SpamAction.revoke, SpamAction.block) != 0); 227 }); 228 } 229 230 /// 231 unittest { 232 import vibe.data.json : parseJsonString; 233 import std.algorithm.comparison : among; 234 235 void main() 236 { 237 auto config = parseJsonString( 238 `[ 239 {"filter": "bayes"}, 240 {"filter": "blacklist", 241 "settings": { 242 "ips": ["124.51.45.1", "41.23.11.5"] 243 } 244 } 245 ]`); 246 247 auto antispam = new AntispamState; 248 antispam.loadConfig(config); 249 250 AntispamMessage msg; 251 msg.headers["Subject"] = "8uy CH34P V14GR4!!11"; 252 msg.message = cast(const(ubyte)[])"Just look here: http://bestdrugdealz.c0m"; 253 msg.peerAddress = ["123.52.433.1", "vps12315.some.provider.n3t"]; 254 255 antispam.filterMessage!( 256 (status) { 257 if (status.among(SpamAction.revoke, SpamAction.block)) 258 throw new Exception("Your message has been rejected!"); 259 // otherwise store message... 260 }, 261 (async_status) { 262 if (async_status.among!(SpamAction.revoke, SpamAction.block)) { 263 // Flag or delete the stored message. 264 } 265 266 // It could also theoretically happen here that async_status is amnesty 267 // or pass, so that a message that was already rejected in the first 268 // phase would be accepted in retrospective. You'll have to decides on 269 // a per-application basis if it makes sense to support this case, or 270 // if immediate rejections always have precedence. 271 } 272 )(msg); 273 } 274 } 275 276 277 static this() 278 { 279 import antispam.filters.bayes; 280 import antispam.filters.blacklist; 281 AntispamState.registerFilter("bayes", () => cast(SpamFilter)new BayesSpamFilter); 282 AntispamState.registerFilter("blacklist", () => cast(SpamFilter)new BlackListSpamFilter); 283 }