#!/usr/bin/python """ ================================================================================= Usage: [py[thon]] dirdiff.py dir1-path dir2-path Compare two directories to find files that exist in one but not the other. This version uses the os.listdir function and list difference. Note that this script checks only filenames, not file contents--see diffall.py for an extension that does the latter by comparing .read() results. New Sep-2016: changed difference labels slightly, so users can search the report for uppercase '*UNIQUE' and '*DIFFERS' to inspect differences quickly. New Mar-2017: use FWP() to fix long path names on Windows, but don't change user message in the process (else could minimize number of calls). New Dec-2011, [3.3]: normalize Unicode in filenames for script-mode use. When used as a module, callers are expected to normalize names instead. New Dec-2021, [3.3]: moved intersect() to this file from diffall.py, both for cohesion, and to break a mergeall<==>diffall cyclic import (but the latter was made moot when the importee moved to fixunicodedups.py). Also spruced up docs. ================================================================================= """ from __future__ import print_function # ADDED: 2.X compatibility import os, sys # [3.0] fix too-long paths on Windows from fixlongpaths import FWP # [3.3] normalize Unicode for comparisons from fixunicodedups import normalizeUnicode def reportdiffs(unique1, unique2, dir1, dir2): """ --------------------------------------------------------------------------- Generate diffs report for one dir: part of comparedirs output. --------------------------------------------------------------------------- """ if not (unique1 or unique2): print('Directory lists are identical') else: if unique1: print('*UNIQUE items in %s:' % dir1) for file in unique1: print('...', file) if unique2: print('*UNIQUE items in %s:' % dir2) for file in unique2: print('...', file) def intersect(seq1, seq2): """ --------------------------------------------------------------------------- Return all items in both seq1 and seq2. A set(seq1) & set(seq2) would work too, but sets are randomly ordered, so any platform-dependent directory order would be lost. [3.3] Assumes seq1/seq2 reflect Unicode normalization if needed. [3.3] Moved here from diffall.py for cohesion; no used if script. --------------------------------------------------------------------------- """ return [item for item in seq1 if item in seq2] def difference(seq1, seq2): """ --------------------------------------------------------------------------- Return all items in seq1 only. A set(seq1) - set(seq2) would work too, but sets are randomly ordered, so any platform-dependent directory order would be lost. [3.3] Assumes seq1/seq2 reflect Unicode normalization if needed. --------------------------------------------------------------------------- """ return [item for item in seq1 if item not in seq2] def comparedirs(dir1, dir2, files1=None, files2=None): """ --------------------------------------------------------------------------- Compare directory contents, but not actual files. May need bytes listdir arg for undecodable filenames on some platforms. [3.3] Normalize Unicode if file lists are None, else caller must do so. --------------------------------------------------------------------------- """ print('Comparing', dir1, 'to', dir2) if files1 is None: files1 = os.listdir(FWP(dir1)) files1 = [normalizeUnicode(file) for file in files1] if files2 is None: files2 = os.listdir(FWP(dir2)) files2 = [normalizeUnicode(file) for file in files2] unique1 = difference(files1, files2) unique2 = difference(files2, files1) reportdiffs(unique1, unique2, dir1, dir2) return not (unique1 or unique2) # true if no diffs def getargs(): """ --------------------------------------------------------------------------- Args for command-line mode. --------------------------------------------------------------------------- """ try: dir1, dir2 = sys.argv[1:] # 2 command-line args except: print('Usage: [py[thon]] dirdiff.py dir1 dir2') sys.exit(1) else: return (dir1, dir2) if __name__ == '__main__': """ --------------------------------------------------------------------------- Main script-execution logic (not when imported). --------------------------------------------------------------------------- """ dir1, dir2 = getargs() comparedirs(dir1, dir2)