#!/usr/bin/env python3 """ ================================================================================= Simple tactical script to compare thumbspage results, sans date-generated lines. Filters out two galleries' date-generated comments, and compares their folders. Pass the pathnames of the two folders to compare as command-line arguments. To compare folders, this uses the diffall program, if present; install it from learning-python.com/mergeall.html, and set its install path in diffhome below. This walks folder trees, because galleries can have nested gallery subfolders. Spurious diffs in the host site include readme copies and ip-anon.py inserts. An awk or sed script could probably do this too, but do we really want it to? ================================================================================= """ import os, sys, shutil trace = print diffall = True # run diffall to compare results? cleanup = True # remove sans-dates temp folders? # if diffall: number end lines to show, install path diffshow = 50 diffhome = '/Users/me/MY-STUFF/Code/mergeall' # edit me if len(sys.argv) != 3: print('Usage: cmp-thumbspage-results.py gallerypath1 gallerypath2') sys.exit(1) for i in (1, 2): if not os.path.isdir(sys.argv[i]): print('Gallery path %d is invalid:' % i, sys.argv[i]) sys.exit(1) # Replace date lines in HTML files # clean temp dirs in '.' temp1, temp2 = 'cmp-thumbspage-results-temp1', 'cmp-thumbspage-results-temp2' for temp in (temp1, temp2): if os.path.exists(temp): shutil.rmtree(temp) # copy thumbspage result folders shutil.copytree(sys.argv[1], temp1) shutil.copytree(sys.argv[2], temp2) dateline = '' def changeone(path): """ Strip date line in one file """ lines = open(path, 'r', encoding='utf8').readlines() if any(line.startswith(dateline) for line in lines): lines = [dropline if line.startswith(dateline) else line for line in lines] open(path, 'w', encoding='utf8').writelines(lines) trace('...', os.sep.join(path.split(os.sep)[1:])) return 1 return 0 def changeall(target): """ Strip date lines in entire folder tree """ changed = 0 for (adir, subs, files) in os.walk(target): for file in files: path = os.path.join(adir, file) if path.lower().endswith('.html'): changed += changeone(path) return changed print('--Removing date lines--\n') nchanged = changeall(temp1) print('Files changed in path1 copy:', nchanged, end='\n\n') nchanged = changeall(temp2) print('Files changed in path2 copy:', nchanged, end='\n\n') # Then diffall to compare? if diffall: # Compare folder trees trace('--Comparing with diffall--\n') python = sys.executable or 'python3' if not os.path.exists(diffhome): print('diffall home does not exist: please correct or install') else: os.system('%s %s/diffall.py %s %s -skipcruft | tail -n %s' % (python, diffhome, temp1, temp2, diffshow)) if cleanup: shutil.rmtree(temp1) shutil.rmtree(temp2) """ ================================================================================ OUTPUT: --Removing date lines-- ... index.html ... _thumbspage/2011-hartsfield-2.JPG.html ... _thumbspage/2010-losalamos-1.JPG.html ... _thumbspage/2004-colorado-2.JPG.html ....etc.... ... _thumbspage/2003-sonyClie.jpg.html ... _thumbspage/2012-marysville.jpg.html ... _thumbspage/2017-pygadgets.png.html ... _thumbspage/2019-pyandroid-3.png.html Files changed in path1 copy: 113 ... index.html ... _thumbspage/2011-hartsfield-2.JPG.html ... _thumbspage/2010-losalamos-1.JPG.html ... _thumbspage/2004-colorado-2.JPG.html ....etc.... ... _thumbspage/2003-sonyClie.jpg.html ... _thumbspage/2012-marysville.jpg.html ... _thumbspage/2017-pygadgets.png.html ... _thumbspage/2019-pyandroid-3.png.html Files changed in path2 copy: 113 --Comparing with diffall-- 2011-losalamos-1.JPG matches 2011-monterey.JPG matches 2006-chicago-2.JPG matches 2014-Frankenthon!.png matches ....etc.... 2010-losalamos-2.JPG matches 2019-pyandroid-2.jpg matches 2018-mobile-site-2.png matches 2000-newmarket-3.jpg matches ================================================================================ Runtime hrs:mins:secs = 0:0:0.23 Dirs checked 8, Files checked: 467, Files skipped: 0 System metadata (cruft) files were skipped Diffs found: 3 - files DIFFER at [cmp-thumbspage-results-temp1/xold-index.html] - [cmp-thumbspage-results-temp2/xold-index.html] - files DIFFER at [cmp-thumbspage-results-temp1/_cut/Mark Lutz's Training Photos.htm] - [cmp-thumbspage-results-temp2/_cut/Mark Lutz's Training Photos.htm] - items UNIQUE at [cmp-thumbspage-results-temp1/_cut/_RESTORED-mar1120-bad-shrink] - [cmp-thumbspage-results-temp2/_cut/_RESTORED-mar1120-bad-shrink] End of report. """