#!/usr/bin/python """ ======================================================================================== headcode.py - July 2016, Python 3.X + 2.X Synopsis: a simple HTML file code insertion/deletion script Author: M. Lutz, learning-python.com License: provided freely but with no warranties of any kind UPDATE: for a more-recent tool which achieves some similar goals, see also https://learning-python.com/thumbspage/build/insert-analytics.py Adds (if no args) or removes (if any arg) analytics or other -section code block *IN-PLACE* in all HTML files in a directory tree. Change your tree root path (HTMLDIR) and insertion code (INSERT) below as desired, and run with 0 args to add or 1 to remove. In the target use case, each run of this script saves editing 70 files manually - a prime example of the sort of tactical tasks Python excels at. For an example of this script in action, see: http://learning-python.com/books/headcode-LOG.txt Assumes your HTML files have either a "" or "" to use as a replacement target. Also assumes your bytes INSERT is encoded per a Unicode scheme that is compatible with the content of all your HTML files -- it's ASCII by default if you use a bytes literal and the os.linesep encoding here, and ASCII is a subset of most encodings (e.g., latin1, utf8); but manually encode a str otherwise if needed. The INSERT encoding constraint derives from this script's use of binary files and bytes to sidestep Unicode encodings - it processes still-encoded text to avoid having to know each file's (and hence the INSERT's) encoding. Alternatives: 1) Guess file encodings by trying to open and decode with each of a set of options 2) Look for an encoding type in a tag (if present) by parsing HTML 3) Attempt to guess each file's encoding by inspecting its contents' bytes Each of these seems overkill for this simple utility (but see genhtml for #1). ======================================================================================== """ from __future__ import print_function # 2.X compatibility import sys, os, glob HTMLDIR = r'C:\MY-STUFF\Training\cd\training-cd-2.7-3.5--oct015' # live HTMLDIR = r'C:\Users\me\Desktop\training-cd-2.7-3.5--oct015' # test mode = 'insert' if len(sys.argv) == 1 else 'restore' print('MODE:', mode) # assume ASCII compatible with content of files, else encode a str manually INSERT = b""" """ INSERT = INSERT.replace(b'\n', os.linesep.encode('ascii')) # for binary-mode file cnvcnt = 0 for (dirpath, dirshere, fileshere) in os.walk(HTMLDIR): for filehere in fileshere: if filehere.endswith(('.htm', '.html')): htmlfile = os.path.join(dirpath, filehere) print(htmlfile) oldfile = open(htmlfile, 'rb') # ignore Unicode encoding of file content, use bytes oldtext = oldfile.read() # but assume compatible with INSERT encoding (ASCII?) oldfile.close() newfile = open(htmlfile, 'wb') if mode == 'insert': newtext = oldtext.replace(b'', (INSERT + b'')) newtext = newtext.replace(b'', (INSERT + b'')) elif mode == 'restore': newtext = oldtext.replace(INSERT, b'') newfile.write(newtext) newfile.close() cnvcnt += 1 print('DONE: %d files converted.' % cnvcnt)