File: pyedit-products/unzipped/PP4E/Internet/Email/mailtools/mailFetcher.py
"""
###############################################################################
retrieve, delete, match mail from a POP server (see __init__ for docs, test)
###############################################################################
"""
import poplib, mailconfig, sys # client's mailconfig on sys.path
print('user:', mailconfig.popusername) # script dir, pythonpath, changes
from .mailParser import MailParser # for headers matching (4E: .)
from .mailTool import MailTool, SilentMailTool # trace control supers (4E: .)
# Dec-2014 evil "monkeypatch": else bombs on quit() when 2048
if hasattr(poplib, '_MAXLINE'): # Dec2015: not in older libs!
poplib._MAXLINE *= 1000
# index/server msgnum out of synch tests
class DeleteSynchError(Exception): pass # msg out of synch in del
class TopNotSupported(Exception): pass # can't run synch test
class MessageSynchError(Exception): pass # index list out of sync
class MailFetcher(MailTool):
"""
fetch mail: connect, fetch headers+mails, delete mails
works on any machine with Python+Inet; subclass me to cache
implemented with the POP protocol; IMAP requires new class;
4E: handles decoding of full mail text on fetch for parser;
"""
def __init__(self, popserver=None, popuser=None, poppswd=None, hastop=True):
self.popServer = popserver or mailconfig.popservername
self.popUser = popuser or mailconfig.popusername
self.srvrHasTop = hastop
self.popPassword = poppswd # ask later if None
self.saveserver = None # 1.5
def connect(self):
"""
establish POP server connection for mailbox operations
password fetched from file or code provided by subclasses
if run in a GUI's thread, caller should avoid GUI popup
"""
# Jan2014, 1.5, experimental: servers that limit logins (outlook.com)
# *NOT USED*: this didn't work in testing, but retained for future use
if hasattr(mailconfig, 'popusesOneLogin') and self.saveserver:
return self.saveserver
self.trace('Connecting...')
self.getPassword() # via file, GUI, or console
# Dec2015 and Jan2014: support SSL/TLS (post book)
# TLS is an extension to basic SSL: use one or the other
# all use an optional ':port' at end of server name, else default for type
# unlike smtplib, must strip port manually if present
if ':' not in self.popServer:
srvr, port = self.popServer, None
else:
srvr, port = self.popServer.split(':')
# May2016: timeouts increased, configurable
timeout = mailconfig.popTimeout
if getattr(mailconfig, 'popusesSSL', False): # set and True?
# start ssl pop session, encrypted # default port=995
print('Using SSL POP on', srvr, port)
server = poplib.POP3_SSL(srvr, port or 995, timeout=timeout)
elif getattr(mailconfig, 'popusesTLS', False): # set and True?
# start tls pop session, encrypted # default port=110
print('Using TLS/SSL POP on', srvr, port) # uses SSL auto
server = poplib.POP3(srvr, port or 110, timeout=timeout)
server.stls()
server.ehlo()
else:
# original/book: plain pop # default port=110
print('Using basic POP on', srvr, port)
server = poplib.POP3(srvr, port or 110, timeout=timeout)
# back to original/book code
server.user(self.popUser) # connect,login POP server
server.pass_(self.popPassword) # pass is a reserved word
self.trace(server.getwelcome()) # print returned greeting
# Jan2014, 1.5: support servers that limit logins (outlook.com)
if hasattr(mailconfig, 'popusesOneLogin'):
self.saveserver = server
return server
def disconnect(self, server):
"""
quit the server connection, unlock mailbox
"""
# Jan2014, 1.5: support servers that limit logins (outlook.com);
# *NOT USED* - TBD: what does a persistent login do to mail box locks?
if not hasattr(mailconfig, 'popusesOneLogin'):
server.quit() # unlocks mailbox
# use setting in client's mailconfig on import search path;
# to tailor, this can be changed in class or per instance;
fetchEncoding = mailconfig.fetchEncoding
def decodeFullText(self, messageBytes):
"""
4E, Py3.1: decode full fetched mail text bytes to str Unicode string;
done at fetch, for later display or parsing (full mail text is always
Unicode thereafter); decode with per-class or per-instance setting, or
common types; could also try headers inspection, or intelligent guess
from structure; in Python 3.2/3.3, this step may not be required: if so,
change to return message line list intact; for more details see Chapter 13;
an 8-bit encoding such as latin-1 will likely suffice for most emails, as
ASCII is the original standard; this method applies to entire/full message
text, which is really just one part of the email encoding story: Message
payloads and Message headers may also be encoded per email, MIME, and
Unicode standards; see Chapter 13 and mailParser and mailSender for more;
"""
text = None
kinds = [self.fetchEncoding] # try user setting first
kinds += ['ascii', 'latin1', 'utf8'] # then try common types
kinds += [sys.getdefaultencoding()] # and platform dflt (may differ)
for kind in kinds: # may cause mail saves to fail
try:
text = [line.decode(kind) for line in messageBytes]
break
except (UnicodeError, LookupError): # LookupError: bad name
pass
if text == None:
# try returning headers + error msg, else except may kill client;
# still try to decode headers per ascii, other, platform default;
blankline = messageBytes.index(b'')
hdrsonly = messageBytes[:blankline]
commons = ['ascii', 'latin1', 'utf8']
for common in commons:
try:
text = [line.decode(common) for line in hdrsonly]
break
except UnicodeError:
pass
else: # none worked
try:
text = [line.decode() for line in hdrsonly] # platform dflt?
except UnicodeError:
text = ['From: (sender of unknown Unicode format headers)']
text += ['', '--Sorry: mailtools cannot decode this mail content!--']
return text
def downloadMessage(self, msgnum):
"""
load full raw text of one mail msg, given its
POP relative msgnum; caller must parse content
"""
self.trace('load ' + str(msgnum))
server = self.connect()
try:
resp, msglines, respsz = server.retr(msgnum)
finally:
self.disconnect(server) # 1.5
msglines = self.decodeFullText(msglines) # raw bytes to Unicode str
return '\n'.join(msglines) # concat lines for parsing
def downloadAllHeaders(self, progress=None, loadfrom=1):
"""
get sizes, raw header text only, for all or new msgs
begins loading headers from message number loadfrom
use loadfrom to load newly arrived mails only
use downloadMessage to get a full msg text later
progress is a function called with (count, total);
returns: [headers text], [mail sizes], loadedfull?
4E: add mailconfig.fetchlimit to support large email
inboxes: if not None, only fetches that many headers,
and returns others as dummy/empty mail; else inboxes
like one of mine (4K emails) are not practical to use;
4E: pass loadfrom along to downloadAllMsgs (a buglet);
"""
if not self.srvrHasTop: # not all servers support TOP
# naively load full msg text
return self.downloadAllMsgs(progress, loadfrom)
else:
self.trace('loading headers')
fetchlimit = mailconfig.fetchlimit
server = self.connect() # mbox now locked until quit
try:
resp, msginfos, respsz = server.list() # 'num size' lines list
msgCount = len(msginfos) # alt to srvr.stat[0]
msginfos = msginfos[loadfrom-1:] # drop already loadeds
allsizes = [int(x.split()[1]) for x in msginfos]
allhdrs = []
for msgnum in range(loadfrom, msgCount+1): # poss empty
if progress: progress(msgnum, msgCount) # run callback
if fetchlimit and (msgnum <= msgCount - fetchlimit):
# skip, add dummy hdrs
hdrtext = 'Subject: --mail skipped--\n\n'
allhdrs.append(hdrtext)
else:
# fetch, retr hdrs only
resp, hdrlines, respsz = server.top(msgnum, 0)
hdrlines = self.decodeFullText(hdrlines)
allhdrs.append('\n'.join(hdrlines))
finally:
self.disconnect(server) # 1.5 # make sure unlock mbox
assert len(allhdrs) == len(allsizes)
self.trace('load headers exit')
return allhdrs, allsizes, False
def downloadAllMessages(self, progress=None, loadfrom=1):
"""
load full message text for all msgs from loadfrom..N,
despite any caching that may be being done in the caller;
much slower than downloadAllHeaders, if just need hdrs;
4E: support mailconfig.fetchlimit: see downloadAllHeaders;
could use server.list() to get sizes of skipped emails here
too, but clients probably don't care about these anyhow;
"""
self.trace('loading full messages')
fetchlimit = mailconfig.fetchlimit
server = self.connect()
try:
(msgCount, msgBytes) = server.stat() # inbox on server
allmsgs = []
allsizes = []
for i in range(loadfrom, msgCount+1): # empty if low >= high
if progress: progress(i, msgCount)
if fetchlimit and (i <= msgCount - fetchlimit):
# skip, add dummy mail
mailtext = 'Subject: --mail skipped--\n\nMail skipped.\n'
allmsgs.append(mailtext)
allsizes.append(len(mailtext))
else:
# fetch, retr full mail
(resp, message, respsz) = server.retr(i) # save text on list
message = self.decodeFullText(message)
allmsgs.append('\n'.join(message)) # leave mail on server
allsizes.append(respsz) # diff from len(msg)
finally:
self.disconnect(server) # 1.5 # unlock the mail box
assert len(allmsgs) == (msgCount - loadfrom) + 1 # msg nums start at 1
#assert sum(allsizes) == msgBytes # not if loadfrom > 1
return allmsgs, allsizes, True # not if fetchlimit
def deleteMessages(self, msgnums, progress=None):
"""
delete multiple msgs off server; assumes email inbox
unchanged since msgnums were last determined/loaded;
use if msg headers not available as state information;
fast, but poss dangerous: see deleteMessagesSafely
"""
self.trace('deleting mails')
server = self.connect()
try:
for (ix, msgnum) in enumerate(msgnums): # don't reconnect for each
if progress: progress(ix+1, len(msgnums))
server.dele(msgnum)
finally: # changes msgnums: reload
self.disconnect(server) # 1.5
def deleteMessagesSafely(self, msgnums, synchHeaders, progress=None):
"""
delete multiple msgs off server, but use TOP fetches to
check for a match on each msg's header part before deleting;
assumes the email server supports the TOP interface of POP,
else raises TopNotSupported - client may call deleteMessages;
use if the mail server might change the inbox since the email
index was last fetched, thereby changing POP relative message
numbers; this can happen if email is deleted in a different
client; some ISPs may also move a mail from inbox to the
undeliverable box in response to a failed download;
synchHeaders must be a list of already loaded mail hdrs text,
corresponding to selected msgnums (requires state); raises
exception if any out of synch with the email server; inbox is
locked until quit, so it should not change between TOP check
and actual delete: synch check must occur here, not in caller;
may be enough to call checkSynchError+deleteMessages, but check
each msg here in case deletes and inserts in middle of inbox;
"""
if not self.srvrHasTop:
raise TopNotSupported('Safe delete cancelled')
self.trace('deleting mails safely')
errmsg = 'Message %s out of synch with server.\n'
errmsg += 'Delete terminated at this message.\n'
errmsg += 'Mail client may require restart or reload.'
server = self.connect() # locks inbox till quit
try: # don't reconnect for each
(msgCount, msgBytes) = server.stat() # inbox size on server
for (ix, msgnum) in enumerate(msgnums):
if progress: progress(ix+1, len(msgnums))
if msgnum > msgCount: # msgs deleted
raise DeleteSynchError(errmsg % msgnum)
resp, hdrlines, respsz = server.top(msgnum, 0) # hdrs only
hdrlines = self.decodeFullText(hdrlines)
msghdrs = '\n'.join(hdrlines)
if not self.headersMatch(msghdrs, synchHeaders[msgnum-1]):
raise DeleteSynchError(errmsg % msgnum)
else:
server.dele(msgnum) # safe to delete this msg
finally: # changes msgnums: reload
self.disconnect(server) # 1.5 # unlock inbox on way out
def checkSynchError(self, synchHeaders):
"""
check to see if already loaded hdrs text in synchHeaders
list matches what is on the server, using the TOP command in
POP to fetch headers text; use if inbox can change due to
deletes in other client, or automatic action by email server;
raises except if out of synch, or error while talking to server;
for speed, only checks last in last: this catches inbox deletes,
but assumes server won't insert before last (true for incoming
mails); check inbox size first: smaller if just deletes; else
top will differ if deletes and newly arrived messages added at
end; result valid only when run: inbox may change after return;
"""
self.trace('synch check')
errormsg = 'Message index out of synch with mail server.\n'
errormsg += 'Mail client may require restart or reload.'
server = self.connect()
try:
lastmsgnum = len(synchHeaders) # 1..N
(msgCount, msgBytes) = server.stat() # inbox size
if lastmsgnum > msgCount: # fewer now?
raise MessageSynchError(errormsg) # none to cmp
if self.srvrHasTop:
resp, hdrlines, respsz = server.top(lastmsgnum, 0) # hdrs only
hdrlines = self.decodeFullText(hdrlines)
lastmsghdrs = '\n'.join(hdrlines)
if not self.headersMatch(lastmsghdrs, synchHeaders[-1]):
raise MessageSynchError(errormsg)
finally:
self.disconnect(server) # 1.5
def headersMatch(self, hdrtext1, hdrtext2):
""""
may not be as simple as a string compare: some servers add
a "Status:" header that changes over time; on one ISP, it
begins as "Status: U" (unread), and changes to "Status: RO"
(read, old) after fetched once - throws off synch tests if
new when index fetched, but have been fetched once before
delete or last-message check; "Message-id:" line is unique
per message in theory, but optional, and can be anything if
forged; match more common: try first; parsing costly: try last
"""
# try match by simple string compare
if hdrtext1 == hdrtext2:
self.trace('Same headers text')
return True
# try match without status lines
split1 = hdrtext1.splitlines() # s.split('\n'), but no final ''
split2 = hdrtext2.splitlines()
strip1 = [line for line in split1 if not line.startswith('Status:')]
strip2 = [line for line in split2 if not line.startswith('Status:')]
if strip1 == strip2:
self.trace('Same without Status')
return True
# try mismatch by message-id headers if either has one
msgid1 = [line for line in split1 if line[:11].lower() == 'message-id:']
msgid2 = [line for line in split2 if line[:11].lower() == 'message-id:']
if (msgid1 or msgid2) and (msgid1 != msgid2):
self.trace('Different Message-Id')
return False
# try full hdr parse and common headers if msgid missing or trash
tryheaders = ('From', 'To', 'Subject', 'Date')
tryheaders += ('Cc', 'Return-Path', 'Received')
msg1 = MailParser().parseHeaders(hdrtext1)
msg2 = MailParser().parseHeaders(hdrtext2)
for hdr in tryheaders: # poss multiple Received
if msg1.get_all(hdr) != msg2.get_all(hdr): # case insens, dflt None
self.trace('Diff common headers')
return False
# all common hdrs match and don't have a diff message-id
self.trace('Same common headers')
return True
def getPassword(self):
"""
get POP password if not yet known
not required until go to server
from client-side file or subclass method
Dec2015: user rstrip() instead of [:-1] in case no \n
"""
if not self.popPassword:
try:
self.getPasswordFromFile()
except:
self.popPassword = self.askPopPassword() # on instance
def getPasswordFromFile(self):
"""
Dec2015: factor this code out so callable from PyMailGUI main thread;
that program can't allow a mail thread to popup a password dialog;
any exceptions (e.g., non-existent file/path) handled by the caller;
"""
localfile = open(mailconfig.poppasswdfile)
self.popPassword = localfile.readline().rstrip() # on inst
#self.trace('local file password' + repr(self.popPassword)) # nope
def askPopPassword(self):
"""
if run in a GUI's spawned thread, caller should avoid GUI popup
"""
assert False, 'Subclass must define method' # GUIs beware!
################################################################################
# specialized subclasses
################################################################################
class MailFetcherConsole(MailFetcher):
def askPopPassword(self):
import getpass
prompt = 'Password for %s on %s?' % (self.popUser, self.popServer)
return getpass.getpass(prompt)
class SilentMailFetcher(SilentMailTool, MailFetcher):
pass # replaces trace