#!/usr/bin/python3 """ ============================================================================== imapfetch.py - fetch and save emails from server folders via IMAP. Author: copyright M. Lutz, Dec-4-2015. License: provided freely, but with no warranties of any kind. Usage: change configurations below, run with no command-line arguments; view saved mailbox files with PyMailGUI's "Open" (or other scheme); requires Python 3.X, tested and used only on 3.3 and 3.5 so far; Examples: see HOW.txt for script usage, \runlogs for script outputs, and \SavedMailboxes-yourid-yourisp.net for save-mail files and GUI; This script was originally used to download ~20k saved mails spanning more than a decade, before closing a former ISP email account (a one-time download that took over 5 hours for a slow ISP). Configure to use your own account's parameters using the upper-case settings at the start of the code below. Fetches all messages from all (or selected) mail folders at an email account host, using the IMAP interface. POP provides access to the inbox only; IMAP is more complex and not universally supported, but also gives access to all other saved-mail folders at the host (e.g., sent mails, drafts, etc.). Creates one file per host folder, each of which contains the full-text of all the folder's messages separated by marker lines. These files are designed to be viewed via the "Open" button of the PyMailGUI client, coded in the book "Programming Python, 4th Edition", and available standalone at website "http://learning-python.com/pymailgui". For more on using PyMailGUI, see SavedMailboxes-yourid-yourisp.net\README.txt. To use this script for other clients and use cases, change its code or separator line as desired. Uses bytes for both file names and contents: encodings unknown and variable. The imaplib API returns structured data; reply formats are documented in "e.g.," comments, but see imaplib and IMAP docs for the 'magic' indexes here. ============================================================================== """ import imaplib, getpass, sys, os # # configuration settings: change for your accounts (see HOW.txt for examples) # Ask = False PORT = 143 HOST = input('Host name? ') if Ask else 'imap.yourisp.net' USER = input('User name? ' ) if Ask else 'yourid@yourisp.net' PSWD = getpass.getpass('Password? ' ) if Ask else 'yourpassword' # save mailbox files here: in '.', or use absolute path SAVEDIR = b'SavedMailboxes-yourid-yourisp.net' # if not [], skip just these mailboxes, by save-file name MBOXSKIP = [b'Virus-Blocker', b'Sent-Spam', b'Known-spam', b'Suspect-Email', b'Trash'] # if not [], save just these mailboxes, by save-file name (higher precedence) MBOXSAVE = [] # [b'pp3e-errata', b'lp3e-errata', b'Old-Sent'] # # open save-mail files with PyMailGUI's "Open" (see top docstring) # try: from PP4E.Internet.Email.PyMailGui.sharednames import saveMailSeparator except: saveMailSeparator = 'PyMailGUI' + ('-'*60) + 'PyMailGUI\r\n' saveMailSeparator = saveMailSeparator.encode() def getMailboxNames(server): """ -------------------------------------------------------------- extract mailbox (a.k.a. folder) names from server; save xor skip folders per configuration list settings, if any; fix double-quoted names having embedded spaces: retain quotes for server.select, drop quotes and blanks for save-file name; -------------------------------------------------------------- """ # e.g., ('OK', [b'...', b'...', ...]) resp, mboxes = server.list() mboxnames = [] for mboxinfo in mboxes: # e.g., b'(..) "." mboxname' => (b'mboxname', b'mboxname') mboxname = mboxinfo.split()[-1] savename = mboxname if b'"' in mboxname: # e.g., b'(..) "." "mbox name"' => (b'"mbox name"', b'mbox-name') mboxname = b'"' + mboxinfo.split(b'"')[-2] + b'"' savename = mboxname[1:-1].replace(b' ', b'-') if MBOXSAVE and savename not in MBOXSAVE: # save these only print('Skipping mailbox:', mboxname) elif MBOXSKIP and savename in MBOXSKIP: # or skip these only print('Skipping mailbox:', mboxname) else: # or use all folders mboxnames.append((mboxname, savename)) return mboxnames def fetchAndSaveMessages(server, mboxnames): """ ------------------------------------------------------------------ fetch and save mailboxes of messages, one flat file per mailbox; use bytes name+file for outputs: no encoding for name or content; ------------------------------------------------------------------ """ for (mboxname, savename) in mboxnames: print('\nDownloading:', mboxname, 'to', savename, '...', flush=True) try: # use bytes for name and content savepath = os.path.join(SAVEDIR, savename) + b'.eml.txt' savefile = open(savepath, 'wb') # e.g., ('OK', [b'62']) resp, nmsg = server.select(mailbox=mboxname) nummessages = int(nmsg[0]) # fetch/save mailbox's messages as bytes: encoding unknown for i in range(nummessages): mnum = i + 1 if mnum % 12 == 0: print(flush=True) print(mnum, end=' ') # e.g., ('OK', [(b'1 (RFC822 {2020}', b'Return-Path:...'), b')']) message = server.fetch('%d:%d' % (mnum, mnum), 'RFC822') savefile.write(saveMailSeparator) savefile.write(message[1][0][1] + b'\n') savefile.close() print() except Exception as excobj: # many things can fail, but let ctrl-c pass print('\nError while downloading', mboxname) print(type(excobj), excobj) # same as sys.exc_info()[0]/[1] print('Continuing\n') if __name__ == '__main__': # make savedir if needed if not os.path.exists(SAVEDIR): os.mkdir(SAVEDIR) # connect to server server = imaplib.IMAP4(host=HOST, port=PORT) server.login(user=USER, password=PSWD) # fetch and save emails mboxnames = getMailboxNames(server) fetchAndSaveMessages(server, mboxnames) # wrap-up print('\nFinished: see mail files in', SAVEDIR) # don't decode here either if sys.platform.startswith('win') and sys.stdout.isatty(): # for Windows icon clickers, input('Press Enter to close') # unless output piped to file