File: tagpix/tagpix.py

#!/usr/bin/python
"""
==========================================================================
tagpix - combine your photos for easy viewing and archiving.

Website: https://learning-python.com/tagpix.html
Author:  © M. Lutz (http://learning-python.com) 2013-2020.
License: provided freely, but with no warranties of any kind.

Summary:
  This script, run with console inputs and no command-line arguments, 
  organizes the content of multiple camera-card or other photo-collection
  folders for fast and convenient access.  It:

  - Transfers all the photos in an entire folder tree to a single folder
  - Renames photos with date-taken prefixes for uniqueness and sorts
  - Discards duplicate content and makes duplicate filenames unique
  - Transfers any movies and other files in the tree to their own folders
  - Optionally groups merged items of all types into by-year subfolders
  - Transfers files by either moves or copies (with optional deletes)

  The result is a single folder that combines all your photos in one 
  location.  tagpix runs with either Python 3.X or 2.X, and on all 
  major platforms - including Windows, Mac OS, Linux, and Android.  

  To download this program, visit its website (above).  For configuration
  settings, see file user_configs.py.  For the complete story on this 
  program's roles and usage, open this package's UserGuide.html.

Versions:
  2.3, Sep 2020 - patched to silence spurious Pillow DOS warning
  2.2, Jun 2020 - repackaged with documentation changes only
  2.2, Dec 2018 - use+drop redundant dates in Android photo filenames
  2.1, Oct 2018 - copy modes, more dups, folder skips, verify deletes
  2.0, Oct 2017 - year groups, list-only, dup skips, mime, console
  See release notes in UserGuide.html for complete change logs.

*CAUTION*: By design, this script's default operation moves and renames 
all photos and other files in an entire source folder tree.  No automated
method for undoing the changes it makes is provided, and no warranty is 
included with this program.  Although tagpix has been tested and used 
successfully on large photo collections, please read all usage details 
in UserGuide.html carefully before running it on yours.  It is strongly
recommended to preview changes with list-only mode before applying them;
and either run tagpix on a temporary copy of your source folder tree, 
or enable its copy-only transfer mode in file user_configs.py to avoid
source-tree changes.

[All other usage and version documentation now resides in UserGuide.html]

==========================================================================
"""

from __future__ import print_function   # py 2.X
import os, sys, pprint, datetime, time, mimetypes, shutil, re


# Exif tag extraction.
# Uses Pillow/PIL; EXIF alternative failed for more files in testing.
#
from PIL import Image
from PIL.ExifTags import TAGS      # tag #id => name
#import EXIF


# [2.3] Sep-2020: silence a harmless but excessive Pillow-library warning
# now issued stupidly for all large images.  This includes perfectly valid
# 108MP images shot on a Note20 Ultra smartphone, among other >89M image
# devices.  This also impacted thumbspage, shrinkpix, and PyPhoto, requiring
# program rereleases - a typical open-source-agenda result, and an example 
# of the pitfalls of "batteries included" development.  Fix, please.
# More complete coverage (and diatribe): UserGuide.html#pillowdoswarning.
# Update: Pillow makes this an error _exception_ at limit*2: disable too.
#

Image.MAX_IMAGE_PIXELS = None    # stop both warning, and error at limit*2

# in case the preceding fails
if hasattr(Image, 'DecompressionBombWarning'):    # not until 2014+ Pillows
    import warnings
    warnings.simplefilter('ignore', Image.DecompressionBombWarning)


# Py 2.X compatibility.
# This and the __future__ import above are all that differ for 2.X.
# 3.X's re.ASCII is defined in 2.X so it can be named in flags (0=none).
# Update: the first is now moot, given the input() stderr redef below.
#
if sys.version[0] == '2': 
    #input = raw_input      # don't eval() input string
    re.ASCII = 0            # no-op in 2.X (has re.UNICODE, not re.ASCII) 


# A few globals in this file.
#
sepln = '-' * 80     # report-section separator
tracemore = False    # show extra program-trace output?
workdir = '.'        # location of default source and destination folders


# [2.1] Get user configurations: more easily changed than this script's code.
# See user_configs.py for more on these options and settings.
#
from user_configs import IgnoreFoldersPattern    # folder-skip names regex

from user_configs import CopyInsteadOfMove       # copy-only or copy-and-delete modes
from user_configs import DeleteAfterCopy         # copy-and-delete mode (both True)

# [2.2] additions
from user_configs import UseAndroidFilenameDates            # use when no Exif date tag? 
from user_configs import DropAndroidFilenameDates           # drop redundant dates?
from user_configs import KeepDifferingAndroidFilenameDates  # drop iff tagpix==Android?


# prior-run-date pattern, compile just once (e.g, '2017-10-13__2017-10-13__xxx')
dupprefixpattern = re.compile('(?:\d{4}-\d{2}-\d{2}__){2}', re.ASCII)

# [2.1] folder-skips pattern (in user_configs.py), precompile string for speed
ignorefolderspattern = re.compile(IgnoreFoldersPattern)

# [2.2] redundant dates pattern (e.g., '2018-02-05__20180205_154910.jpg')
redundantdatepattern = re.compile('(\d{4}-\d{2}-\d{2})__(\d{8})_\d{6}\..*')

# [2.2] android dates pattern, pre-tagpix (e.g., '20180205_154910.jpg')
filenamedatepattern = re.compile('(\d{8})_\d{6}\..*')


# Newer camera video types.
# Not hardcoded in py module, but may come from local files on some platforms
# even if not set here.  E.g., on Mac OS 10.11, the module auto-loads types from
# /etc/apache2/mime.types; on Windows, it tries the registry's MIME database.
# Some cameras save AVCHD videos as '.mts', which may map to MIME model/vnd.mts.
#
mimetypes.add_type('video/mp2t', '.mts')      # need video/ here
mimetypes.add_type('video/mp2t', '.m2ts')     # ditto
mimetypes.add_type('video/3gpp', '.3gp')      # or auto-loaded (probably)


# Route input() prompts to stderr.
# This allows normal stdout prints to be redirected to a file or pipe.
# Also make sure to flush stdout so Unix can watch with a 'tail -f'.
# [2.3] User-friendly exit on ctrl-c at prompt, not exception trace.
#
def input(prompt):
    "prompt on stderr, so stdout report can be piped to a file"
    if sys.stderr.isatty():
        # no eoln at console 
        sys.stderr.write(prompt)
    else:
        # else eoln (e.g., PyEdit)
        sys.stderr.write(prompt + '\n') 
    sys.stderr.flush()

    try:
        return sys.stdin.readline().rstrip('\n')
    except KeyboardInterrupt:
        print('\nScript not run: no changes made.')    # [2.3] friendly exit
        sys.exit(0)

builtin_print = print
def print(*pargs, **kargs):
    builtin_print(*pargs, **kargs)
    sys.stdout.flush()   # flush=True only in some Py 3.Xs


#=========================================================================
# Get run parameters from console
#=========================================================================


def yes(prompt):
    reply = input(prompt + ' ')
    return reply.lower()[:1] == 'y'   # Enter=no

# [2.1] say copy if copy-only, but moves unchanged
copyonly = CopyInsteadOfMove and not DeleteAfterCopy
xfermode = 'move' if not copyonly else 'copie'
xferverb = xfermode.replace('ie', 'y')

# don't run accidentally (e.g., clicks)
if not yes('tagpix renames and %ss photos to a merged folder; proceed?' % xfermode):
    print('Script not run: no changes made.')
    sys.exit(0)

# from dir
SourceDir = input('Source - pathname of folder with photos to be %sd? ' % xfermode)
if not SourceDir:
    SourceDir = os.path.join(workdir, 'SOURCE')   # prior/default: copy here

# [2.3] now done asap: verify from-dir
if not os.path.isdir(SourceDir):
    print('Script not run: source folder does not exist, no changes made.')
    sys.exit(0)

# to dir
destdir = input('Destination - pathname of folder to %s items to? ' % xferverb)
if not destdir:
    destdir = workdir

# target dirs (unknowns folder dropped)
FlatPhotoDir = os.path.join(destdir, 'MERGED', 'PHOTOS')
FlatMovieDir = os.path.join(destdir, 'MERGED', 'MOVIES')
FlatOtherDir = os.path.join(destdir, 'MERGED', 'OTHERS')

# group into by-year subdirs?
YearFolders = yes('Group items into by-year subfolders?')

# show target names but don't rename/move
ListOnly = yes('List only: show target names, but do not rename or %s?' % xferverb)


#=========================================================================
# Initial setup
#=========================================================================


def configdirs():
    """
    ----------------------------------------------------------------------
    Verify input folder, create or clean (optionally) output folders.
    ----------------------------------------------------------------------
    """

    # verify from-dir - now done earlier [2.3]
    # if not os.path.isdir(SourceDir):
    #     print('Not run: source folder does not exist.')
    #     sys.exit()

    # make no changes in list-only mode
    if ListOnly:
        return

    # make or empty to-dirs
    for subdir in (FlatPhotoDir, FlatMovieDir, FlatOtherDir):     
        if not os.path.exists(subdir):
            try:
                os.makedirs(subdir)   # all path items, as needed
            except:
                print('Script not run: cannot make an output folder, no images changed.')
                sys.exit()
        else:
            if (len(os.listdir(subdir)) >= 1   # even if just a .DS_Store
                and
                yes('Delete all prior-run outputs in "%s"?' % subdir) 
                and
                yes('....About to delete: ARE YOU SURE?')):   # [2.1] verify!

                for tempname in os.listdir(subdir):
                    temppath = os.path.join(subdir, tempname)
                    if os.path.isfile(temppath):
                        os.remove(temppath)        # simple photo or other file
                    else:
                        shutil.rmtree(temppath)    # else a year subfolder


#=========================================================================
# Analysis phase
#=========================================================================


def isMovieFileName(filename):
    """
    ----------------------------------------------------------------------
    Detect videos by filename extension's mimetype (not hardcoded set).
    ----------------------------------------------------------------------
    """
    mimetype = mimetypes.guess_type(filename)[0]       # (type?, encoding?)
    return (mimetype != None and 
            mimetype.split('/')[0] == 'video')         # e.g., 'video/mpeg'


def isExifImageFileName(filename):
    """
    ----------------------------------------------------------------------
    Detect images by filename extension's mimetype (not hardcoded set).
    This currently is True for JPEGs and TIFFs (of any extension type), 
    because these are the only image types defined to contain Exif tags.
    Hence, these are considered 'photos' by tagpix; others go to OTHERS.
    ----------------------------------------------------------------------
    """
    exiftypes = ['jpeg', 'tiff']                       # of any extension 
    mimetype = mimetypes.guess_type(filename)[0]       # (type?, encoding?)
    return (mimetype != None and 
            mimetype.split('/')[0] == 'image' and      # e.g., 'image/jpeg'
            mimetype.split('/')[1] in exiftypes)       # type does exif tags? 


def getExifTags(filepath):
    """
    ----------------------------------------------------------------------
    Collect image-file metadata in new dict, if any (PIL code + try+if).
    Returns {name: value} holding all Exif tags in image, and uses the 
    TAGS table in PIL (Pillow) to map tag numeric ids to mnemonic names.
    ----------------------------------------------------------------------
    """
    nametoval = {}
    try:
        i = Image.open(filepath)
        info = i._getexif()                        # not all have Exif tags
        if info == None:
            raise LookupError('No tags found')     # else items() bombs
        for tag, value in info.items():            # for all tags in photo file
            decoded = TAGS.get(tag, tag)           # map tag's numeric id to name
            nametoval[decoded] = value             # or use id if not in table
    except Exception as E:
        print('***Unusable Exif tags skipped: "%s" for' % E, filepath)
    return nametoval


def looksLikeDate(datestr):
    """
    ----------------------------------------------------------------------
    Return true if datestr seems to be a valid date.  datestr is a
    string of form "YYYYMMDD".  If it is a reasonable date, returns a 
    tuple of 3 ints (YYYY, MM, DD), which is true; else returns False.
    This is used on filename dates after pattern matching, to discount 
    unrelated strings that have a date-like structure coincidentally.
    It is assumed that tagpix probably won't be widely used after 2100...
    ----------------------------------------------------------------------
    """
    assert len(datestr) == 8 and datestr.isdigit()
    year, month, day = [int(x) for x in (datestr[0:4], datestr[4:6], datestr[6:8])]
    if ((1900 <= year  <= 2100) and
        (1    <= month <= 12)   and
        (1    <= day   <= 31)):
        return (year, month, day)
    else:
        return False


def getFileNameDate(filename):
    """
    ----------------------------------------------------------------------
    Get an Android-style date from a photo's filename itself, if any.
    Used for images with no Exif tags, or Exifs but no date-taken tag.
    The former can happen for Android photos edited in tools that drop
    all tags; the latter can happen in Samsung front (selfie) cameras
    that record no date-taken tag (probably a temp bug, but widespread). 
    In general, tries tags, then Android filenames, then file moddate.
    looksLikeDate() tries to avoid false positives, but is heuristic.
    ----------------------------------------------------------------------
    """
    filenamedate = None
    if UseAndroidFilenameDates:                           # enbled in user configs?
        match = filenamedatepattern.match(filename)       # "yyyymmdd_hhmmss.*"?
        if match:
            datepart = match.group(1)
            validate = looksLikeDate(datepart)            # date str is valid date?
            if validate:
                year, month, day = validate
                filenamedate = '%4d-%02d-%02d' % (year, month, day)
    return filenamedate


def getFileModDate(filepath):
    """
    ----------------------------------------------------------------------
    Get any file's modification-date string, or a default if unavailable.
    This is used as last resort tagpix date if there is no Exif or Android
    filename date, and reflects either file creation if the file was not
    edited, or else the most-recent edit.  Note that getctime() creation
    date is not used, because it is dependent on both operating system and 
    filesystem, is generally unavailable on Unix, and may be irrelevant. 
    ----------------------------------------------------------------------
    """
    try:
        filemodtime = os.path.getmtime(filepath)
        filemoddate = str(datetime.date.fromtimestamp(filemodtime))    # 'yyyy-mm-dd'
    except:
        filemoddate = 'unknown'                                        # sort together
       #filemoddate = str(datetime.date.fromtimestamp(time.time()))    # or use today?
    return filemoddate


def classify(sourcedir):
    """
    ----------------------------------------------------------------------
    For each file item in the sourcedir tree, create a (date, name, path)
    tuple, and add it to photo, movie, or other lists according to its type.
    The lists have item photo-tag or file-mod dates, to be added by moves.
    subshere.remove() can't mod loop's list (and py 2.X has no list.copy()).
    TBD: the .* filename skips could be generalized for Windows cruft too;
    foldername skips are now in user_configs.py, but filenames are not.
    ----------------------------------------------------------------------
    """
    print(sepln)
    print('Analyzing source tree')
    photos, movies, others = [], [], []
    for (dirpath, subshere, fileshere) in os.walk(sourcedir):

        for subname in subshere[:]:                   # copy: can't mod in-place [2.1]
            subpath = os.path.join(dirpath, subname)

            # skip Unix hidden and thumbs subfolders
            if ignorefolderspattern.match(subname) != None:
                print('Skipping folder:', subpath)    # old PyPhoto, new thumbspage, etc
                subshere.remove(subname)              # don't scan, leave in source tree

        for filename in fileshere:
            filepath = os.path.join(dirpath, filename)

            # skip Mac .DS_Store, and other Unix hidden files
            if filename.startswith('.'):
                print('Skipping file:', filepath)     # and will remain in source tree
                continue 

            if not isExifImageFileName(filename):
                #
                # nonphoto: try filename date, then file moddate
                #
                filenamedate = getFileNameDate(filename)            # android-style name?
                filemoddate  = getFileModDate(filepath)             # else file mod date 
                datefile     = filenamedate or filemoddate          # tagdate='yyyy-mm-dd'
                if isMovieFileName(filename):
                    movies.append((datefile, filename, filepath))   # all video types
                else:
                    others.append((datefile, filename, filepath))   # pngs, gifs, text, etc.

            else:
                # 
                # photo: check for Exif tags in images only
                #
                pictags = getExifTags(filepath)                     # possibly None
                if not pictags:
                    #
                    # photo without exif: try filename date, then file moddate
                    #
                    filenamedate = getFileNameDate(filename)        # android-style name?
                    filemoddate  = getFileModDate(filepath)         # else file mod date 
                    datefile     = filenamedate or filemoddate      # tagdate='yyyy-mm-dd'
                    photos.append((datefile, filename, filepath))   # photo sans exif tags

                else:
                    # 
                    # photo with exif: try tags first, then filename, then file moddate
                    #
                    fulltaken = ''
                    for trythis in ('DateTimeOriginal', 'DateTimeDigitized'):
                        try:
                            fulltaken = pictags[trythis]               # normal: use 1st
                        except KeyError:                               # tag may be absent
                            pass
                        if fulltaken.strip():                          # bursts: 1st='  '
                            break                                      # stop if nonempty

                    splittaken = fulltaken.split()                     # fmt='date time'
                    datetaken  = splittaken[0] if splittaken else ''
                    if datetaken:                                      # [0]='yyyy:mm:dd'
                        datetaken = datetaken.replace(':', '-')        # use 'yyyy-mm-dd'
                        photos.append((datetaken, filename, filepath))
                    else:    
                        filenamedate = getFileNameDate(filename)       # android-style name?
                        filemoddate  = getFileModDate(filepath)        # else file mode date 
                        datefile     = filenamedate or filemoddate     # tagdate='yyyy-mm-dd'
                        photos.append((datefile, filename, filepath))  # photo sans exif date

    return (photos, movies, others)   # lists of (date, name, path)


#=========================================================================
# File-moves phase
#=========================================================================


def stripPriorRunDate(filename):
    """
    ----------------------------------------------------------------------
    Drop a prior run's "yyyy-mm-dd__" date prefix if present, so that 
    results of prior merges can be used as source items for new reruns.
    Also ensures dates are the same; if not, it's not a tagpix prefix.  
    Note that there's no need to use the looksLikeDate() test here, 
    because the filename has already been prepended with a true date.
    Also note that this does not remove __N suffixes added to duplicate 
    names of differing content, but the suffix is still useful in reruns, 
    and moveone() will ensure that the new name is unique in any event.
    ----------------------------------------------------------------------
    """
    if (dupprefixpattern.match(filename) == None or      # no duplicate dates? 
        filename[:12] != filename[12:24]):               # not the same dates?
        return filename                                  # not a tagpix prefix dup
    else:
        tense = 'will be' if ListOnly else 'was'
        print('***A prior run\'s date prefix %s stripped:' % tense, filename)   # [2.2]
        prefix, stripped = filename[:12], filename[12:]
        assert prefix == stripped[:12], 'Prior and new dates differ'
        return stripped


def stripAndroidDate(filename):
    """
    ----------------------------------------------------------------------
    [2.2] Drop redundant Android dates in image filenames if present.
    This must be run _after_ stripPriorRunDate(), due to the pattern.

    Android (and perhaps other) cameras add a date in image filenames
    which is redundant with that added by tagpix in moveall() below
    (e.g., '2018-02-05__20180205_154910.jpg').  Rename the renamed 
    image file to drop the extra Android date and keep the tagpix date 
    (e.g., '2018-02-05__20180205_154910.jpg' => '2018-02-05__154910.jpg').

    This step can be disabled in user_configs.py to always keep the extra
    dates, and can be specialized to drop Android dates only if they are 
    the same as the tagpix date (in rare cases, the two dates may differ 
    if an image is edited in tools that discard Exif creation-date tags).
    looksLikeDate() tries to avoid false positives, but is heuristic.
    See also the on-demand _drop-redundant-dates.py utility script. 
    ----------------------------------------------------------------------
    """
    if not DropAndroidFilenameDates:                        # enabled in user_configs.py?
        return filename
    else:
        matched = redundantdatepattern.match(filename)      # redundant date present?
        if matched == None:
            return filename
        else:
            tagpixdate = matched.group(1)                   # YYYY-MM-DD__date2_time.jpg
            sourcedate = matched.group(2)                   # date1__YYYYMMDD_time.jpg
            if not looksLikeDate(sourcedate):               # bail if not a valid date
                return filename
            samedate = tagpixdate.replace('-', '') == sourcedate
            if (not samedate and KeepDifferingAndroidFilenameDates):
                return filename                     
            else:
                stripped = filename[0:12] + filename[21:]   # drop 2nd/redundant date2
                return stripped                             # no message here: common


def samecontent(filepath1, filepath2, chunksize=1*(1024*1024)):
    """
    ----------------------------------------------------------------------
    Return True if two files' content is byte-for-byte identical.
    Reads up to chunksize bytes on each loop, till bytes differ
    or eof encountered on either/both (which returns an empty '').
    This tests POSIX file content (the 'data' fork in Mac OS lingo). 
    ----------------------------------------------------------------------
    """
    file1 = open(filepath1, 'rb')         # close explicitly for non-cpython
    file2 = open(filepath2, 'rb')         # read in chunks for huge files 
    samebytes = True
    while samebytes:
        chunk1 = file1.read(chunksize)    # at most this many more byte
        chunk2 = file2.read(chunksize)
        if not chunk1 and not chunk2:
            break                         # eof on both: entirely same
        elif chunk1 != chunk2:
            samebytes = False             # eof on one or bytes differ
    file1.close()
    file2.close()
    return samebytes


def moveone(filename, filepath, flatdir, moved):
    """
    ----------------------------------------------------------------------
    Transfer one already-renamed file to its destination folder in the 
    merged result, or skip it if it has the same name and content as a
    file already transferred.  filename already has a tagpix date prefix, 
    filepath=original name: FROM=filepath, TO=flatdir(/year)?/filename.
    'moved' is used for ListOnly mode; os.path.exists() handles all dups. 

    This adds the year folder level to the path; skips true content 
    duplicates; and creates unique names for same-name/diff-content.
    The while loop here ensures that the unique-name suffix is unique,
    and tests for same content among all the filename's variants [2.1].
    Now does copy-and-delete and copy-only modes, not just moves [2.1].
    ----------------------------------------------------------------------
    """

    # 
    # group by years, if selected
    #
    if YearFolders:
        year = filename.split('-')[0]                   # oddballs might be 'unknown'
        yearsub = flatdir + os.sep + year               # add year subfolder to dest path
        if not os.path.exists(yearsub) and not ListOnly:
            os.mkdir(yearsub)
        flatpath = os.path.join(yearsub, filename)      # year-subdfolder/prefixed-name
    else:
        flatpath = os.path.join(flatdir, filename)      # flat-dest-folder/prefixed-name

    # 
    # skip or rename duplicates (report in ListOnly mode)
    #
    if os.path.exists(flatpath) or flatpath in moved:            # dup from this run or other?
        if ListOnly:
            # note dup but don't resolve now
            print('***Duplicate name will be resolved:', flatpath)
        else:
            # skip if same full content, else rename
            flatpath0 = flatpath
            id = 1                                               # per-file numeric id [2.1]
            while True:                                          # till skipped or unique
                if samecontent(filepath, flatpath):
                    # same name and byte-for-byte content: don't move
                    print('***Duplicate content was skipped:', filepath, '==', flatpath)
                    return

                else:
                    # same date-prefixed name, diff content: add id to name and recheck
                    print('***Duplicate filename made unique:', flatpath)
                    front, ext = os.path.splitext(flatpath0)     # ext = last '.' to end, or ''
                    flatpath = '%s__%s%s' % (front, id, ext)     # add id suffix before ext
                    if not os.path.exists(flatpath):             # id used by prior run? [2.1]
                        break                                    # no: use this unique name
                    id += 1                                      # else try again with next id

    # 
    # transfer unique file with date prefix from source to dest
    #
    print(filepath, '=>', flatpath)
    moved[flatpath] = True
    if not ListOnly:
        try:
            if not CopyInsteadOfMove:
                # move to merged result: original, default, recommended, faster
                os.rename(filepath, flatpath)

            else:
                # copy to result, leave in source? (e.g., across drives) [2.1]
                shutil.copyfile(filepath, flatpath)
                shutil.copystat(filepath, flatpath)    # same as copy2() but EIBTI
                if DeleteAfterCopy:
                    os.remove(filepath)                # else files may accumulate

        except Exception as why:
            # e.g., permissions, path length, lock, diff dev/filesystem
            message = ('***Error moving: %s\n'
                       'It was not renamed or moved, but the run continued'
                       ' and all non-error items were transferred.\n'
                       'Resolve the issue and rerun tagpix on your source folder'
                       ' to transfer this item too.\n'
                       'The Python error message follows:\n'
                       '%s => %s')
            print(message % (filepath, why.__class__.__name__, why))


def moveall(photos, movies, others): 
    """
    ----------------------------------------------------------------------
    Add date prefix to filenames, and move photos, movies, and others.
    [2.1] Refactored three loops into one here; they differed slightly 
    conceptually, but did identical work, and have not diverged in some 
    five years - all handle duplicates and prior-run dates the same way. 
    ----------------------------------------------------------------------
    """
    moved = {}   # for duplicates in ListOnly mode
    xfermode = 'Moving' if (not CopyInsteadOfMove) or DeleteAfterCopy else 'Copying'

    categories = [('PHOTOS', photos, FlatPhotoDir),         # redundancy kills (code)
                  ('MOVIES', movies, FlatMovieDir),         # refactored from 3 loops
                  ('OTHERS', others, FlatOtherDir)]

    for (catname, catitems, catdest) in categories:
        print(sepln)
        print('%s %s:' % (xfermode, catname), len(catitems))

        for (datetag, filename, filepath) in catitems:      # ids per file (not cat, run)
            filename = '%s__%s' % (datetag, filename)       # add date-taken-or-mod prefix
            filename = stripPriorRunDate(filename)          # drop any prior-run prefix 
            filename = stripAndroidDate(filename)           # drop any extra Android date
            moveone(filename, filepath, catdest, moved)     # handle dups, move or copy

    print(sepln)
        

def unmoved(sourcedir):
    """
    ----------------------------------------------------------------------
    Find and report any files missed in the souredir folder, post-moves.
    This includes duplicates, errors, hiddens, and skipped-folder items. 
    ----------------------------------------------------------------------
    """
    if CopyInsteadOfMove and not DeleteAfterCopy:
        # nothing was moved or deleted: source content is moot [2.1]
        print('Nothing was removed from the source tree')

    else:
        # original: show all files left behind by skips and errors
        missed = []
        for (dirpath, subshere, fileshere) in os.walk(sourcedir):   # skips, errs
            for filename in fileshere:                              # ignore dirs
                missed.append(os.path.join(dirpath, filename))
        print('Missed:', len(missed))
        pprint.pprint(missed, width=200)
        print(sepln)


#=========================================================================
# Main logic
#=========================================================================


if __name__ == '__main__':
    """
    ----------------------------------------------------------------------
    Setup, classify, rename/move, and verify.
    ----------------------------------------------------------------------
    """
    configdirs()
    photos, movies, others = classify(SourceDir)          # plan moves
    if tracemore:
        pprint.pprint(photos, width=200); print(sepln)
        pprint.pprint(movies, width=200); print(sepln)
        pprint.pprint(others, width=200); print(sepln)
    moveall(photos, movies, others)                       # execute moves
    if not ListOnly: 
        unmoved(SourceDir)                                # report skips
    print('Bye.')



[Home page] Books Code Blog Python Author Train Find ©M.Lutz