Package lamson :: Module bounce
[hide private]
[frames] | no frames]

Source Code for Module lamson.bounce

  1  """ 
  2  Bounce analysis module for Lamson.  It uses an algorithm that tries 
  3  to simply collect the headers that are most likely found in a bounce 
  4  message, and then determine a probability based on what it finds. 
  5  """ 
  6   
  7  import re 
  8  from functools import wraps 
  9   
 10   
 11  BOUNCE_MATCHERS = { 
 12      'Action': re.compile(r'(failed|delayed|delivered|relayed|expanded)', re.IGNORECASE | re.DOTALL), 
 13      'Content-Description': re.compile(r'(Notification|Undelivered Message|Delivery Report)', re.IGNORECASE | re.DOTALL), 
 14      'Diagnostic-Code': re.compile(r'(.+);\s*([0-9\-\.]+)?\s*(.*)', re.IGNORECASE | re.DOTALL), 
 15      'Final-Recipient': re.compile(r'(.+);\s*(.*)', re.IGNORECASE | re.DOTALL), 
 16      'Received': re.compile(r'(.+)', re.IGNORECASE | re.DOTALL), 
 17      'Remote-Mta': re.compile(r'(.+);\s*(.*)', re.IGNORECASE | re.DOTALL), 
 18      'Reporting-Mta': re.compile(r'(.+);\s*(.*)', re.IGNORECASE | re.DOTALL), 
 19      'Status': re.compile(r'([0-9]+)\.([0-9]+)\.([0-9]+)', re.IGNORECASE | re.DOTALL) 
 20  } 
 21   
 22  BOUNCE_MAX = len(BOUNCE_MATCHERS) * 2.0 
 23   
 24  PRIMARY_STATUS_CODES = { 
 25      u'1': u'Unknown Status Code 1', 
 26      u'2': u'Success', 
 27      u'3': u'Temporary Failure', 
 28      u'4': u'Persistent Transient Failure', 
 29      u'5': u'Permanent Failure' 
 30  } 
 31   
 32  SECONDARY_STATUS_CODES = { 
 33      u'0':   u'Other or Undefined Status', 
 34      u'1':   u'Addressing Status', 
 35      u'2':   u'Mailbox Status', 
 36      u'3':   u'Mail System Status', 
 37      u'4':   u'Network and Routing Status', 
 38      u'5':   u'Mail Delivery Protocol Status', 
 39      u'6':   u'Message Content or Media Status', 
 40      u'7':   u'Security or Policy Status', 
 41  } 
 42   
 43  COMBINED_STATUS_CODES = { 
 44      u'00': u'Not Applicable', 
 45      u'10': u'Other address status', 
 46      u'11': u'Bad destination mailbox address', 
 47      u'12': u'Bad destination system address', 
 48      u'13': u'Bad destination mailbox address syntax', 
 49      u'14': u'Destination mailbox address ambiguous', 
 50      u'15': u'Destination mailbox address valid', 
 51      u'16': u'Mailbox has moved', 
 52      u'17': u'Bad sender\'s mailbox address syntax', 
 53      u'18': u'Bad sender\'s system address', 
 54   
 55      u'20': u'Other or undefined mailbox status', 
 56      u'21': u'Mailbox disabled, not accepting messages', 
 57      u'22': u'Mailbox full', 
 58      u'23': u'Message length exceeds administrative limit.', 
 59      u'24': u'Mailing list expansion problem', 
 60   
 61      u'30': u'Other or undefined mail system status', 
 62      u'31': u'Mail system full', 
 63      u'32': u'System not accepting network messages', 
 64      u'33': u'System not capable of selected features', 
 65      u'34': u'Message too big for system', 
 66   
 67      u'40': u'Other or undefined network or routing status', 
 68      u'41': u'No answer from host', 
 69      u'42': u'Bad connection', 
 70      u'43': u'Routing server failure', 
 71      u'44': u'Unable to route', 
 72      u'45': u'Network congestion', 
 73      u'46': u'Routing loop detected', 
 74      u'47': u'Delivery time expired', 
 75   
 76      u'50': u'Other or undefined protocol status', 
 77      u'51': u'Invalid command', 
 78      u'52': u'Syntax error', 
 79      u'53': u'Too many recipients', 
 80      u'54': u'Invalid command arguments', 
 81      u'55': u'Wrong protocol version', 
 82   
 83      u'60': u'Other or undefined media error', 
 84      u'61': u'Media not supported', 
 85      u'62': u'Conversion required and prohibited', 
 86      u'63': u'Conversion required but not supported', 
 87      u'64': u'Conversion with loss performed', 
 88      u'65': u'Conversion failed', 
 89   
 90      u'70': u'Other or undefined security status', 
 91      u'71': u'Delivery not authorized, message refused', 
 92      u'72': u'Mailing list expansion prohibited', 
 93      u'73': u'Security conversion required but not possible', 
 94      u'74': u'Security features not supported', 
 95      u'75': u'Cryptographic failure', 
 96      u'76': u'Cryptographic algorithm not supported', 
 97      u'77': u'Message integrity failure', 
 98  } 
99 100 -def match_bounce_headers(msg):
101 """ 102 Goes through the headers in a potential bounce message recursively 103 and collects all the answers for the usual bounce headers. 104 """ 105 matches = {'Content-Description-Parts': {}} 106 for part in msg.base.walk(): 107 for k in BOUNCE_MATCHERS: 108 if k in part.headers: 109 if k not in matches: 110 matches[k] = set() 111 112 # kind of an odd place to put this, but it's the easiest way 113 if k == 'Content-Description': 114 matches['Content-Description-Parts'][part.headers[k].lower()] = part 115 116 matches[k].add(part.headers[k]) 117 118 return matches
119
120 121 -def detect(msg):
122 """ 123 Given a message, this will calculate a probability score based on 124 possible bounce headers it finds and return a lamson.bounce.BounceAnalyzer 125 object for further analysis. 126 127 The detection algorithm is very simple but still accurate. For each header 128 it finds it adds a point to the score. It then uses the regex in BOUNCE_MATCHERS 129 to see if the value of that header is parseable, and if it is it adds another 130 point to the score. The final probability is based on how many headers and matchers 131 were found out of the total possible. 132 133 Finally, a header will be included in the score if it doesn't match in value, but 134 it WILL NOT be included in the headers used by BounceAnalyzer to give you meanings 135 like remote_mta and such. 136 137 Because this algorithm is very dumb, you are free to add to BOUNCE_MATCHERS in your 138 boot files if there's special headers you need to detect in your own code. 139 """ 140 originals = match_bounce_headers(msg) 141 results = {'Content-Description-Parts': 142 originals['Content-Description-Parts']} 143 score = 0 144 del originals['Content-Description-Parts'] 145 146 for key in originals: 147 score += 1 # score still goes up, even if value doesn't parse 148 r = BOUNCE_MATCHERS[key] 149 150 scan = (r.match(v) for v in originals[key]) 151 matched = [m.groups() for m in scan if m] 152 153 # a key is counted in the score, but only added if it matches 154 if len(matched) > 0: 155 score += len(matched) / len(originals[key]) 156 results[key] = matched 157 158 return BounceAnalyzer(results, score / BOUNCE_MAX)
159
160 161 -class BounceAnalyzer(object):
162 """ 163 BounceAnalyzer collects up the score and the headers and gives more 164 meaningful interaction with them. You can keep it simple and just use 165 is_hard, is_soft, and probable methods to see if there was a bounce. 166 If you need more information then attributes are set for each of the following: 167 168 * primary_status -- The main status number that determines hard vs soft. 169 * secondary_status -- Advice status. 170 * combined_status -- the 2nd and 3rd number combined gives more detail. 171 * remote_mta -- The MTA that you sent mail to and aborted. 172 * reporting_mta -- The MTA that was sending the mail and has to report to you. 173 * diagnostic_codes -- Human readable codes usually with info from the provider. 174 * action -- Usually 'failed', and turns out to be not too useful. 175 * content_parts -- All the attachments found as a hash keyed by the type. 176 * original -- The original message, if it's found. 177 * report -- All report elements, as lamson.encoding.MailBase raw messages. 178 * notification -- Usually the detailed reason you bounced. 179 """
180 - def __init__(self, headers, score):
181 """ 182 Initializes all the various attributes you can use to analyze the bounce 183 results. 184 """ 185 self.headers = headers 186 self.score = score 187 188 if 'Status' in self.headers: 189 status = self.headers['Status'][0] 190 self.primary_status = int(status[0]), PRIMARY_STATUS_CODES[status[0]] 191 self.secondary_status = int(status[1]), SECONDARY_STATUS_CODES[status[1]] 192 combined = "".join(status[1:]) 193 self.combined_status = int(combined), COMBINED_STATUS_CODES[combined] 194 else: 195 self.primary_status = (None, None) 196 self.secondary_status = (None, None) 197 self.combined_status = (None, None) 198 199 if 'Remote-Mta' in self.headers: 200 self.remote_mta = self.headers['Remote-Mta'][0][1] 201 else: 202 self.remote_mta = None 203 204 if 'Reporting-Mta' in self.headers: 205 self.reporting_mta = self.headers['Reporting-Mta'][0][1] 206 else: 207 self.reporting_mta = None 208 209 if 'Final-Recipient' in self.headers: 210 self.final_recipient = self.headers['Final-Recipient'][0][1] 211 else: 212 self.final_recipient = None 213 214 if 'Diagnostic-Code' in self.headers: 215 self.diagnostic_codes = self.headers['Diagnostic-Code'][0][1:] 216 else: 217 self.diagnostic_codes = [None, None] 218 219 if 'Action' in self.headers: 220 self.action = self.headers['Action'][0][0] 221 else: 222 self.action = None 223 224 # these are forced lowercase because they're so damn random 225 self.content_parts = self.headers['Content-Description-Parts'] 226 # and of course, this isn't the original original, it's the wrapper 227 self.original = self.content_parts.get('undelivered message', None) 228 229 if self.original and self.original.parts: 230 self.original = self.original.parts[0] 231 232 self.report = self.content_parts.get('delivery report', None) 233 if self.report and self.report.parts: 234 self.report = self.report.parts 235 236 self.notification = self.content_parts.get('notification', None)
237 238
239 - def is_hard(self):
240 """ 241 Tells you if this was a hard bounce, which is determined by the message 242 being a probably bounce with a primary_status greater than 4. 243 """ 244 return self.probable() and self.primary_status[0] > 4
245
246 - def is_soft(self):
247 """Basically the inverse of is_hard()""" 248 return self.probable() and self.primary_status[0] <= 4
249
250 - def probable(self, threshold=0.3):
251 """ 252 Determines if this is probably a bounce based on the score 253 probability. Default threshold is 0.3 which is conservative. 254 """ 255 return self.score > threshold
256
257 - def error_for_humans(self):
258 """ 259 Constructs an error from the status codes that you can print to 260 a user. 261 """ 262 if self.primary_status[0]: 263 return "%s, %s, %s" % (self.primary_status[1], 264 self.secondary_status[1], 265 self.combined_status[1]) 266 else: 267 return "No status codes found in bounce message."
268
269 270 -class bounce_to(object):
271 """ 272 Used to route bounce messages to a handler for either soft or hard bounces. 273 Set the soft/hard parameters to the function that represents the handler. 274 The function should take one argument of the message that it needs to handle 275 and should have a route that handles everything. 276 277 WARNING: You should only place this on the START of modules that will 278 receive bounces, and every bounce handler should return START. The reason 279 is that the bounce emails come from *mail daemons* not the actual person 280 who bounced. You can find out who that person is using 281 message.bounce.final_recipient. But the bounce handler is *actually* 282 interacting with a message from something like MAILER-DAEMON@somehost.com. 283 If you don't go back to start immediately then you will mess with the state 284 for this address, which can be bad. 285 """
286 - def __init__(self, soft=None, hard=None):
287 self.soft = soft 288 self.hard = hard 289 290 assert self.soft and self.hard, "You must give at least soft and/or hard"
291
292 - def __call__(self, func):
293 @wraps(func) 294 def bounce_wrapper(message, *args, **kw): 295 if message.is_bounce(): 296 if message.bounce.is_soft(): 297 return self.soft(message) 298 else: 299 return self.hard(message) 300 else: 301 return func(message, *args, **kw)
302 303 return bounce_wrapper
304