File: genhtml/__docs__/
#!/usr/bin/env python3 """ ================================================================================ part of genhtml (with same copyright, author, and license). An example script that runs genhtml on every part folder in a website, and is used to create and publish the website of Also uses ziptools, available at Your site will vary -- this is just one example use case for genhtml. OVERVIEW: this script assumes the site's content is split into multiple local subfolders under a common root, with each part's subfolder configured to use genhtml inserts. It generates, combines, zips, and uploads by FTP all parts in a single run. After a manual unzip in its root, the site on the server winds up being the union of all the local subfolder parts. Common files like CSS and .htaccess must be in just one part, except for nested content folders. USAGE: Set global switches below for zip, upload, and folder-save modes, and see all "# edit me" for items that will vary per site. Run this script in the local site root folder to make and upload all site content, then SSH to unzip in your site's root. As an example, the subject site is published with these steps: 1) run this script in the local site root to make, zip, and upload 2) ssh user@domain to server, and unzip in the site's root folder with: cd html root; mv zip ..; rm -rf *; mv ../zip .; unzip -d . zip; rm zip 3) cgi folder is nested in html, and part of zip; chmod +x scripts iff needed: ziptools does permissions on both create and extract, but others may not THE SUBJECT SITE's prior structure was ad hoc and legacy, but its URLS in books, hrefs, and search engines kept working. The new union structure has a few subdirs for well-defined, non-page roles, but is otherwise flat. This avoids edit nightmares when subfolders move or change (the site's top level alone is 400+ files), and is aided by URL redirects (e.g., '/book'->'..'). The new single-zip packaging allows all changes/edits to be done on the local copy, not on the site via ssh (e.g. nested part unzips, analytics inserts). SEE ALSO: is a variant of this script that zips and uploads the site in two parts, to placate UNIX "unzip" command-line programs that do not handle files > ZIP64's 2G cutoff. Use iff your site is that large, and a recent Python 2.X or 3.X to run ziptools/ isn't available. UPDATE, Feb-2018: add post-generate step to run, to copy any local README*.txt to _README*.txt for broken Apache autoindex pages, so the local UNION copy matches the remote site (see that script for more details). UPDATE JUN-2019: add post-generate step to insert JS code to anonymize IP addresses in Google analytics calls; differs for HTML and .htaccess inserts. UPDATE, Mar-2020: this site moved to an AWS Lightsail VPS for speed; some pipeline tools now differ, but this script and genhtml are still used as before. ================================================================================ """ import sys, os, shutil join = os.path.join # switches KEEPDIR = True # retain zipped union folder for testing? ZIPDIR = 0#True # zip union upload dir into a single file? UPLOAD = 0#True # upload zipped file by ftp automatically? thedir = 'UNION' # where final joined content appears thezip = thedir + '.zip' # where the zipped content dir appears verbose = False # trace file copies? (else just folders) homedir = os.getcwd() python = sys.executable def say(msg): print('\n\n' + msg + '\n', flush=True) def check(stat, msg): if stat != 0: say('Error: ' + msg); sys.exit() say('Generating sites---------------------------------------------------------') # edit me PARTS = ['Books', 'Programs', 'Posts', 'Author', 'Training', 'OldMain', 'Class'] GENER = [part for part in PARTS if part not in ['Class']] # or set() - set() for gendir in GENER: say('Generating ' + gendir) os.chdir(join(gendir, 'Current')) stat = os.system('%s /Users/me/MY-STUFF/Code/genhtml/' % python) # works on '.' check(stat, 'genhtml failed') os.chdir(homedir) say('Collecting sites---------------------------------------------------------') FROMS = [(join(part, 'Current', 'Complete') if part in GENER else part) for part in PARTS] # favicon.ico, .htaccess => in Books only DUPOK = ('PythonPowered.gif', '.DS_Store', '_main.css') # zip drops cruft later! if os.path.exists(thedir): shutil.rmtree(thedir) os.mkdir(thedir) def copy(item, dest): """ retains original files' modtimes, as does the zip: this is a bit gray, but the history can be useful, and this avoids full copies on incremental backups; NOTE: this follows any symlinks, and copies what they reference (not the links); fix if it matters; """ if os.path.isdir(item): shutil.copytree(item, dest) # does copy2() = content + stat else: shutil.copyfile(item, dest) # content only shutil.copystat(item, dest) # retain modtime and mode (permissions) # merge all into site's root folder for root in FROMS: print('\nCopying part', root) for item in os.listdir(root): if item in os.listdir(thedir) and item not in DUPOK: print('\tDuplicate item: %s in %s' % (item, root)) # and fix sys.exit() else: if verbose: print('\tCopying %s from %s' % (item, root)) copy(join(root, item), join(thedir, item)) # # Feb-2018: fix READMEs for Apache autoindex pages (all "Add": UNION rebuilt) # print('\nFixing READMEs in', thedir) stat = os.system('python ' + thedir) check(stat, 'fix-readmes failed') # not critical, but should fix # # Jun-2019: add GA IP anonymization code to HMTL and .htaccess files made elsewhere; # this should go away after all external files are converted and propagated here; # Caveat: this changes munged-files' modtimes (currently), creating mergeall diffs; # Apr-2020: ip-anon now propagates modtimes: see; # print('\nAdding GA IP anonymization code in', thedir) stat = os.system('python3 0 0') check(stat, 'ip-anon failed') # not critical, but should fix say('Zipping sites to zipfile-------------------------------------------------') if not ZIPDIR: print('--Skipping union dir zip--') else: # run zip in union dir with source="*" so can unzip in site root directly, # else must unzip to temp folder and move all items (or dir) on server; # " zipfile folder" records items as nested in a folder # instead: that requires a post-zip move, but may be arguably safer; # nit: the os.system assumes names are quoted or have no spaces in them; os.chdir(thedir) extras = ('.htaccess') # not in shell * expansion! ziphome = '/Users/me/MY-STUFF/Code/mergeall/test/ziptools' # edit me zipcmd = '%s %s/ %s/%s %s %s -skipcruft' % \ (python, ziphome, homedir, thezip, '*', extras) stat = os.system(zipcmd) check(stat, 'zip-create failed') # see os.chdir(homedir) if KEEPDIR: print('--Retaining union folder--') else: shutil.rmtree(thedir) # or keep around for testing say('Uploading sites zipfile--------------------------------------------------') # # [Mar20] Caveat: this worked well for the site's former Godaddy host, # but is no longer used at the site's latest host, an AWS Lightsail VPS. # Automation would require changing this to use SFTP, and the FileZilla # alternative is both easy and reliable. # if not UPLOAD: print('--Skipping upload step--') else: import ftplib from getpass import getpass remotesite = '' # edit me remotedir = 'public_html' # was '.' (aws='htdocs') remoteuser = input('User name? ') # or hardcode for easier use remotepass = getpass('Password for %s on %s: ' % (remoteuser, remotesite)) # upload call is atomic zipsize = os.path.getsize(thezip) print('Uploading site zipfile, %d bytes...' % zipsize) connection = ftplib.FTP(remotesite) # connect to FTP site connection.login(remoteuser, remotepass) # log in as user/password connection.cwd(remotedir) # cd to directory to xfer localfile = open(thezip, 'rb') connection.storbinary('STOR ' + thezip, localfile) # xfer zip in binary mode localfile.close() connection.quit() say(' finished.') if KEEPDIR: print('See the combination site in local folder %s.' % thedir) if ZIPDIR: print('See the zipfile %s in the local root folder.' % thezip) if UPLOAD: print('Ssh to user@domain and move+unzip %s in the site HTML root folder.' % thezip)