""" ############################################################################### retrieve, delete, match mail from a POP server (see __init__ for docs, test) ############################################################################### """ import poplib, mailconfig, sys # client's mailconfig on sys.path print('user:', mailconfig.popusername) # script dir, pythonpath, changes from .mailParser import MailParser # for headers matching (4E: .) from .mailTool import MailTool, SilentMailTool # trace control supers (4E: .) # Dec-2014 evil "monkeypatch": else bombs on quit() when 2048 if hasattr(poplib, '_MAXLINE'): # Dec2015: not in older libs! poplib._MAXLINE *= 1000 # index/server msgnum out of synch tests class DeleteSynchError(Exception): pass # msg out of synch in del class TopNotSupported(Exception): pass # can't run synch test class MessageSynchError(Exception): pass # index list out of sync class MailFetcher(MailTool): """ fetch mail: connect, fetch headers+mails, delete mails works on any machine with Python+Inet; subclass me to cache implemented with the POP protocol; IMAP requires new class; 4E: handles decoding of full mail text on fetch for parser; """ def __init__(self, popserver=None, popuser=None, poppswd=None, hastop=True): self.popServer = popserver or mailconfig.popservername self.popUser = popuser or mailconfig.popusername self.srvrHasTop = hastop self.popPassword = poppswd # ask later if None self.saveserver = None # 1.5 def connect(self): """ establish POP server connection for mailbox operations password fetched from file or code provided by subclasses if run in a GUI's thread, caller should avoid GUI popup """ # Jan2014, 1.5, experimental: servers that limit logins (outlook.com) # *NOT USED*: this didn't work in testing, but retained for future use if hasattr(mailconfig, 'popusesOneLogin') and self.saveserver: return self.saveserver self.trace('Connecting...') self.getPassword() # via file, GUI, or console # Dec2015 and Jan2014: support SSL/TLS (post book) # TLS is an extension to basic SSL: use one or the other # all use an optional ':port' at end of server name, else default for type # unlike smtplib, must strip port manually if present if ':' not in self.popServer: srvr, port = self.popServer, None else: srvr, port = self.popServer.split(':') # May2016: timeouts increased, configurable timeout = mailconfig.popTimeout if getattr(mailconfig, 'popusesSSL', False): # set and True? # start ssl pop session, encrypted # default port=995 print('Using SSL POP on', srvr, port) server = poplib.POP3_SSL(srvr, port or 995, timeout=timeout) elif getattr(mailconfig, 'popusesTLS', False): # set and True? # start tls pop session, encrypted # default port=110 print('Using TLS/SSL POP on', srvr, port) # uses SSL auto server = poplib.POP3(srvr, port or 110, timeout=timeout) server.stls() server.ehlo() else: # original/book: plain pop # default port=110 print('Using basic POP on', srvr, port) server = poplib.POP3(srvr, port or 110, timeout=timeout) # back to original/book code server.user(self.popUser) # connect,login POP server server.pass_(self.popPassword) # pass is a reserved word self.trace(server.getwelcome()) # print returned greeting # Jan2014, 1.5: support servers that limit logins (outlook.com) if hasattr(mailconfig, 'popusesOneLogin'): self.saveserver = server return server def disconnect(self, server): """ quit the server connection, unlock mailbox """ # Jan2014, 1.5: support servers that limit logins (outlook.com); # *NOT USED* - TBD: what does a persistent login do to mail box locks? if not hasattr(mailconfig, 'popusesOneLogin'): server.quit() # unlocks mailbox # use setting in client's mailconfig on import search path; # to tailor, this can be changed in class or per instance; fetchEncoding = mailconfig.fetchEncoding def decodeFullText(self, messageBytes): """ 4E, Py3.1: decode full fetched mail text bytes to str Unicode string; done at fetch, for later display or parsing (full mail text is always Unicode thereafter); decode with per-class or per-instance setting, or common types; could also try headers inspection, or intelligent guess from structure; in Python 3.2/3.3, this step may not be required: if so, change to return message line list intact; for more details see Chapter 13; an 8-bit encoding such as latin-1 will likely suffice for most emails, as ASCII is the original standard; this method applies to entire/full message text, which is really just one part of the email encoding story: Message payloads and Message headers may also be encoded per email, MIME, and Unicode standards; see Chapter 13 and mailParser and mailSender for more; """ text = None kinds = [self.fetchEncoding] # try user setting first kinds += ['ascii', 'latin1', 'utf8'] # then try common types kinds += [sys.getdefaultencoding()] # and platform dflt (may differ) for kind in kinds: # may cause mail saves to fail try: text = [line.decode(kind) for line in messageBytes] break except (UnicodeError, LookupError): # LookupError: bad name pass if text == None: # try returning headers + error msg, else except may kill client; # still try to decode headers per ascii, other, platform default; blankline = messageBytes.index(b'') hdrsonly = messageBytes[:blankline] commons = ['ascii', 'latin1', 'utf8'] for common in commons: try: text = [line.decode(common) for line in hdrsonly] break except UnicodeError: pass else: # none worked try: text = [line.decode() for line in hdrsonly] # platform dflt? except UnicodeError: text = ['From: (sender of unknown Unicode format headers)'] text += ['', '--Sorry: mailtools cannot decode this mail content!--'] return text def downloadMessage(self, msgnum): """ load full raw text of one mail msg, given its POP relative msgnum; caller must parse content """ self.trace('load ' + str(msgnum)) server = self.connect() try: resp, msglines, respsz = server.retr(msgnum) finally: self.disconnect(server) # 1.5 msglines = self.decodeFullText(msglines) # raw bytes to Unicode str return '\n'.join(msglines) # concat lines for parsing def downloadAllHeaders(self, progress=None, loadfrom=1): """ get sizes, raw header text only, for all or new msgs begins loading headers from message number loadfrom use loadfrom to load newly arrived mails only use downloadMessage to get a full msg text later progress is a function called with (count, total); returns: [headers text], [mail sizes], loadedfull? 4E: add mailconfig.fetchlimit to support large email inboxes: if not None, only fetches that many headers, and returns others as dummy/empty mail; else inboxes like one of mine (4K emails) are not practical to use; 4E: pass loadfrom along to downloadAllMsgs (a buglet); """ if not self.srvrHasTop: # not all servers support TOP # naively load full msg text return self.downloadAllMsgs(progress, loadfrom) else: self.trace('loading headers') fetchlimit = mailconfig.fetchlimit server = self.connect() # mbox now locked until quit try: resp, msginfos, respsz = server.list() # 'num size' lines list msgCount = len(msginfos) # alt to srvr.stat[0] msginfos = msginfos[loadfrom-1:] # drop already loadeds allsizes = [int(x.split()[1]) for x in msginfos] allhdrs = [] for msgnum in range(loadfrom, msgCount+1): # poss empty if progress: progress(msgnum, msgCount) # run callback if fetchlimit and (msgnum <= msgCount - fetchlimit): # skip, add dummy hdrs hdrtext = 'Subject: --mail skipped--\n\n' allhdrs.append(hdrtext) else: # fetch, retr hdrs only resp, hdrlines, respsz = server.top(msgnum, 0) hdrlines = self.decodeFullText(hdrlines) allhdrs.append('\n'.join(hdrlines)) finally: self.disconnect(server) # 1.5 # make sure unlock mbox assert len(allhdrs) == len(allsizes) self.trace('load headers exit') return allhdrs, allsizes, False def downloadAllMessages(self, progress=None, loadfrom=1): """ load full message text for all msgs from loadfrom..N, despite any caching that may be being done in the caller; much slower than downloadAllHeaders, if just need hdrs; 4E: support mailconfig.fetchlimit: see downloadAllHeaders; could use server.list() to get sizes of skipped emails here too, but clients probably don't care about these anyhow; """ self.trace('loading full messages') fetchlimit = mailconfig.fetchlimit server = self.connect() try: (msgCount, msgBytes) = server.stat() # inbox on server allmsgs = [] allsizes = [] for i in range(loadfrom, msgCount+1): # empty if low >= high if progress: progress(i, msgCount) if fetchlimit and (i <= msgCount - fetchlimit): # skip, add dummy mail mailtext = 'Subject: --mail skipped--\n\nMail skipped.\n' allmsgs.append(mailtext) allsizes.append(len(mailtext)) else: # fetch, retr full mail (resp, message, respsz) = server.retr(i) # save text on list message = self.decodeFullText(message) allmsgs.append('\n'.join(message)) # leave mail on server allsizes.append(respsz) # diff from len(msg) finally: self.disconnect(server) # 1.5 # unlock the mail box assert len(allmsgs) == (msgCount - loadfrom) + 1 # msg nums start at 1 #assert sum(allsizes) == msgBytes # not if loadfrom > 1 return allmsgs, allsizes, True # not if fetchlimit def deleteMessages(self, msgnums, progress=None): """ delete multiple msgs off server; assumes email inbox unchanged since msgnums were last determined/loaded; use if msg headers not available as state information; fast, but poss dangerous: see deleteMessagesSafely """ self.trace('deleting mails') server = self.connect() try: for (ix, msgnum) in enumerate(msgnums): # don't reconnect for each if progress: progress(ix+1, len(msgnums)) server.dele(msgnum) finally: # changes msgnums: reload self.disconnect(server) # 1.5 def deleteMessagesSafely(self, msgnums, synchHeaders, progress=None): """ delete multiple msgs off server, but use TOP fetches to check for a match on each msg's header part before deleting; assumes the email server supports the TOP interface of POP, else raises TopNotSupported - client may call deleteMessages; use if the mail server might change the inbox since the email index was last fetched, thereby changing POP relative message numbers; this can happen if email is deleted in a different client; some ISPs may also move a mail from inbox to the undeliverable box in response to a failed download; synchHeaders must be a list of already loaded mail hdrs text, corresponding to selected msgnums (requires state); raises exception if any out of synch with the email server; inbox is locked until quit, so it should not change between TOP check and actual delete: synch check must occur here, not in caller; may be enough to call checkSynchError+deleteMessages, but check each msg here in case deletes and inserts in middle of inbox; """ if not self.srvrHasTop: raise TopNotSupported('Safe delete cancelled') self.trace('deleting mails safely') errmsg = 'Message %s out of synch with server.\n' errmsg += 'Delete terminated at this message.\n' errmsg += 'Mail client may require restart or reload.' server = self.connect() # locks inbox till quit try: # don't reconnect for each (msgCount, msgBytes) = server.stat() # inbox size on server for (ix, msgnum) in enumerate(msgnums): if progress: progress(ix+1, len(msgnums)) if msgnum > msgCount: # msgs deleted raise DeleteSynchError(errmsg % msgnum) resp, hdrlines, respsz = server.top(msgnum, 0) # hdrs only hdrlines = self.decodeFullText(hdrlines) msghdrs = '\n'.join(hdrlines) if not self.headersMatch(msghdrs, synchHeaders[msgnum-1]): raise DeleteSynchError(errmsg % msgnum) else: server.dele(msgnum) # safe to delete this msg finally: # changes msgnums: reload self.disconnect(server) # 1.5 # unlock inbox on way out def checkSynchError(self, synchHeaders): """ check to see if already loaded hdrs text in synchHeaders list matches what is on the server, using the TOP command in POP to fetch headers text; use if inbox can change due to deletes in other client, or automatic action by email server; raises except if out of synch, or error while talking to server; for speed, only checks last in last: this catches inbox deletes, but assumes server won't insert before last (true for incoming mails); check inbox size first: smaller if just deletes; else top will differ if deletes and newly arrived messages added at end; result valid only when run: inbox may change after return; """ self.trace('synch check') errormsg = 'Message index out of synch with mail server.\n' errormsg += 'Mail client may require restart or reload.' server = self.connect() try: lastmsgnum = len(synchHeaders) # 1..N (msgCount, msgBytes) = server.stat() # inbox size if lastmsgnum > msgCount: # fewer now? raise MessageSynchError(errormsg) # none to cmp if self.srvrHasTop: resp, hdrlines, respsz = server.top(lastmsgnum, 0) # hdrs only hdrlines = self.decodeFullText(hdrlines) lastmsghdrs = '\n'.join(hdrlines) if not self.headersMatch(lastmsghdrs, synchHeaders[-1]): raise MessageSynchError(errormsg) finally: self.disconnect(server) # 1.5 def headersMatch(self, hdrtext1, hdrtext2): """" may not be as simple as a string compare: some servers add a "Status:" header that changes over time; on one ISP, it begins as "Status: U" (unread), and changes to "Status: RO" (read, old) after fetched once - throws off synch tests if new when index fetched, but have been fetched once before delete or last-message check; "Message-id:" line is unique per message in theory, but optional, and can be anything if forged; match more common: try first; parsing costly: try last """ # try match by simple string compare if hdrtext1 == hdrtext2: self.trace('Same headers text') return True # try match without status lines split1 = hdrtext1.splitlines() # s.split('\n'), but no final '' split2 = hdrtext2.splitlines() strip1 = [line for line in split1 if not line.startswith('Status:')] strip2 = [line for line in split2 if not line.startswith('Status:')] if strip1 == strip2: self.trace('Same without Status') return True # try mismatch by message-id headers if either has one msgid1 = [line for line in split1 if line[:11].lower() == 'message-id:'] msgid2 = [line for line in split2 if line[:11].lower() == 'message-id:'] if (msgid1 or msgid2) and (msgid1 != msgid2): self.trace('Different Message-Id') return False # try full hdr parse and common headers if msgid missing or trash tryheaders = ('From', 'To', 'Subject', 'Date') tryheaders += ('Cc', 'Return-Path', 'Received') msg1 = MailParser().parseHeaders(hdrtext1) msg2 = MailParser().parseHeaders(hdrtext2) for hdr in tryheaders: # poss multiple Received if msg1.get_all(hdr) != msg2.get_all(hdr): # case insens, dflt None self.trace('Diff common headers') return False # all common hdrs match and don't have a diff message-id self.trace('Same common headers') return True def getPassword(self): """ get POP password if not yet known not required until go to server from client-side file or subclass method Dec2015: user rstrip() instead of [:-1] in case no \n """ if not self.popPassword: try: self.getPasswordFromFile() except: self.popPassword = self.askPopPassword() # on instance def getPasswordFromFile(self): """ Dec2015: factor this code out so callable from PyMailGUI main thread; that program can't allow a mail thread to popup a password dialog; any exceptions (e.g., non-existent file/path) handled by the caller; """ localfile = open(mailconfig.poppasswdfile) self.popPassword = localfile.readline().rstrip() # on inst #self.trace('local file password' + repr(self.popPassword)) # nope def askPopPassword(self): """ if run in a GUI's spawned thread, caller should avoid GUI popup """ assert False, 'Subclass must define method' # GUIs beware! ################################################################################ # specialized subclasses ################################################################################ class MailFetcherConsole(MailFetcher): def askPopPassword(self): import getpass prompt = 'Password for %s on %s?' % (self.popUser, self.popServer) return getpass.getpass(prompt) class SilentMailFetcher(SilentMailTool, MailFetcher): pass # replaces trace