File: .htaccess

#----------------------------------------------------------------------------------------
# Main (site root) Apache config file.  This file overrides host defaults, and some 
# subfolders have their own .htaccess files that may in turn customize settings here.
#  
# In general, Apache settings are hierarchical.  At the top of the tree, the main 
# Apache config file (inaccessible on this site's current host) defines defaults and 
# allows/disallows per-folder .htaccess overrides.  Within a folder, settings are 
# inherited from all higher folders in the path, through the main Apache config file. 
# Hence, settings in a folder's .htaccess apply to that folder and all subfolders below 
# that level in the filesystem.  Because this file is highest, it's settings are 
# site-global.  For more: https://httpd.apache.org/docs/current/howto/htaccess.html.
#
# Coding note: comments in .htaccess files must generally be full-line only; see ahead.
#----------------------------------------------------------------------------------------


#----------------------------------------------------------------------------------------
# Mar-2020: this site was moved from its former GoDaddy shared/business hosting to a
# VPS on Amazon's AWS Lightsail.  The new VPS hosting offers complete access to the
# Apache server and config files (httpd.conf), so many or all of the settings in this
# site's .htaccess files (along with their frustrations) are now moot.  For convenience,
# the .htaccess will still be used until they can be absorbed into the root's configs.
#
# The only new items here are a rewrite from www.* => * below for SEO (both have DNS 
# A records that point to same static IP, and "www" is a ServerAlias for non-"www" in 
# a VirtualHost); along with the README.html and __pycache__ autoindex ignores ahead.
# NOTE: the default Apache install in a bitnami LAMP stack enables mod_pagespeed - which
# both rudely munged educational code, and caused segfaults in Apache; manually disabled!
#----------------------------------------------------------------------------------------

RewriteEngine on
RewriteCond %{HTTP_HOST} ^www\.(.*)$ [NC]
RewriteRule ^(.*)$ https://%1/$1 [R=301,L]


#----------------------------------------------------------------------------------------
# Oct-2022 android-deltas-scripts => android-deltas-sync: rewrite any prior-name URLs 
# lingering on this site, referenced elsewhere, or indexed by search engines.  Used for
# the .html file as well as all references to the folder with or without trailing "/". 
# Per PyEdit Greps: there are just 5 harmless refs to old URL in thumbspage docs today.
#----------------------------------------------------------------------------------------

rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^android-deltas-scripts(.*)$ "https\:\/\/learning-python\.com\/android-deltas-sync$1" [R=301,L]


#----------------------------------------------------------------------------------------
# Jun-2017 redesign for programs, initial redirects 
# (Aug18: now to https, if "L" = last, precluding https rewrite ahead)
#
# CAUTION: Apache mod_rewrite URL rewriting is __iterative__:
# - it reruns all these in sequence again until no more changes, or default=10 times;
# - this means that a rewrite may be overwritten on the next iteration;
# - [L] can preclude reaching rules further below (see 'break' in C/Python loops);
# - [L] just means end *this* ruleset context, and [END] may not be supported;
# But this is _stunningly_ complex and brittle; try a web search, or intros here:
# https://httpd.apache.org/docs/2.4/rewrite/tech.html
# https://httpd.apache.org/docs/2.4/rewrite/flags.html#flag_l
#----------------------------------------------------------------------------------------

# rewriteengine on (now above)

# aug-2017: reroute _all_ /downloads*, including /downloads/, not just /downloads
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^downloads(.*)$ "https\:\/\/learning-python\.com\/programs\.html" [R=301,L] #594463424e1e3

rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^recenthighlights(.*)$ "https\:\/\/learning-python\.com\/posts\.html" [R=301,L] #59446306007e6

rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^books(.*)$ "https\:\/\/learning-python\.com$1" [R=301,L] #59445ffbc6fdc


#----------------------------------------------------------------------------------------
# Jul-12-17: temp workaround for pyedit userguide broken product links (till next build): 
# present in app, exes, src.  This was also fixed in live unzipped copy, /Code, and 
# /Websites/{Programs, UNION}, but not in zipped products (will roll out in next build).
#----------------------------------------------------------------------------------------

rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^pyedit\.html\/PyEdit(.*).zip$ "https\:\/\/learning-python\.com\/pyedit-products\/PyEdit$1.zip" [R=301,L] #59664d12d58c0


#----------------------------------------------------------------------------------------
# Jul-12-17: temp workaround for mergeall userguide broken "Moredocs" bad-case links 
# (till next build) [FOR LINUX ZIP ONLY].  This was also fixed in live unzipped copy,
# /Code, and /Websites/{Programs, UNION}, but not in zipped products.
#----------------------------------------------------------------------------------------

rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^mergeall-products\/unzipped\/docetc\/Moredocs(.*)$ "https\:\/\/learning-python\.com\/mergeall-products\/unzipped\/docetc\/MoreDocs$1" [R=301,L] #59664d12d58c0


#----------------------------------------------------------------------------------------
# Feb-2018: map all *.py not in cgi/ (at URL front) to the new showcode.py CGI script.
# Added for mobile redesign; an alternative to "add handler" or *many* manual link edits.
# This works so well that it was adopted for *.txt files too (per the generalized rule),
# such that even files in the unzipped product packages use the site-wide display style.
# This must avoid robot.txt files, and can impact autoindex README.txt files - see ahead.
#
# Jan2019: add .sh files too for mergeall-android (else downloaded instead of displayed),
# as well as .c and .h for class workbook code (else they are badly broken on mobiles).
# Also, the literal "." before the file extension needed to be escaped, but was harmless:
# just one obscure file out of 8K matched, in one of the old class workbook folders:
#
# /MY-STUFF/Websites/UNION$ find . -name '*[^.]txt' -print
# /MY-STUFF/Websites/UNION$ find . -name '*[^.]pyw' -print
# /MY-STUFF/Websites/UNION$ find . -name '*[^.]py' -print
# ./class/Extras/Other/extensions/Handhelds/Pippy
# ./class/Workbook/Examples/Lecture2/runpy
# /MY-STUFF/Websites/UNION$ find . -type f -print | wc -l
#    7967
#
# Apr2020: make the match case insensitive with [NC] => .txt and .TXT now both match and 
# are routed to the script.  This cropped up once after 2 years of heavy usage, for an 
# old MS-DOS era .TXT file newly copied to the site; user-typed URLs might trigger too. 
#----------------------------------------------------------------------------------------

rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^(?!(?:cgi\/|.*robots.txt))(.*)\.(py|txt|pyw|sh|c|h)$ "https\:\/\/learning-python\.com\/cgi\/showcode\.py\?name\=$1.$2" [L,NC]


# before apr-23-18 excluding robots.txt (likely harmless)
# rewriterule ^(?!cgi\/)(.*).(py|txt|pyw)$ "http\:\/\/learning-python\.com\/cgi\/showcode\.py\?name\=$1.$2"

# this fails (loops) if listed first: why?
# rewriterule ^(.*)robots.txt$ "http\:\/\/learning-python\.com\/$1robots.txt" [L]


#----------------------------------------------------------------------------------------
# Aug-2018: for HTTPS, rewrite and redirect all http://... to https://...
#
# This requires a server SSL/TLS certificate (a much-too-easy push button thing on
# GoDaddy at first, but extra admin and $ later).  It appeases Google Chrome's rude 
# "Not Secure" label now issued for every page on "http://" sites, but is massive 
# overkill here - this site has just 1 input field, on its Search page (yes, irony).  
#
# This rewrite can be applied to results of prior rewrites, but flag "L" = last rules
# above use https in their expansions to force.  An "L" may have been required in the
# showcode.py rule to avoid bogus rewrites.  The R=301 here sends a permanent-redirect 
# reply (important for search engines).  An (untested) equivalent directive: 
# "Redirect permanent / https://learning-python.com/".  
#
# NOTE: older IEs (e.g., 9 on Windows 7) do not support TLS by default, and require an
# Advanced tab settings change in IE to access the site.  Other older browsers may not
# support the TLS version 1.2 that GoDaddy sells today at all.  This seems a massive 
# downside for HTTPS use; some recent TLS browser support is just 5 years old:
# https://help.salesforce.com/articleView?id=000220586&type=1
# https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers
#
# IMPORTANT: browsers don't allow (and ignore) absolute http:// links to page resources
# like CSS and JS file links in pages loaded from an https:// server - even if a rewrite 
# rule here would redirect it.  This broke page icons, coded with absolute http:// paths
# due to the former dual book/training site structure, but was a fairly easy fix: because
# icons are a genhtml insert, just change the insert file, regenerate pages with a normal
# genhtml run, and upload all .htmls.  Also broken were program auto-index README.html
# pages in subfolders that used http:// paths for CSS and icons in zany .htaccess files 
# of their own; these required many manual fixes.  Relative paths are best, but not 
# always possible (see subfolders that might be moved over time). 
#----------------------------------------------------------------------------------------

# Iff not already https://... Then reroute http://... to https://...
RewriteCond %{HTTPS} !=on
RewriteRule (.*) https://%{SERVER_NAME}/$1 [R=301,L]


#----------------------------------------------------------------------------------------
# This never worked: its result was rewritten by the prior rule on the next iteration 
# (see above).  Instead, generate a plain-text reply in the cgi script itself, for the
# rawmode=view query param automatically sent by the formatted-display template's link.
#
# Feb-2018: but DO NOT route to script if it's a raw-text link in a reply page...
# rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
# rewritecond %{HTTP_HOST} ^learning-python.com$
# rewriterule ^cgi\/rawtext\/(.*)$ "http\:\/\/learning-python\.com\/$1"  [L] 
# [L] = skip run next rules if this fired, but on this iteration only...
# 
# RewriteCond %{ENV:REDIRECT_STATUS} !=200   # per a rumor, also failed
# RewriteRule .* -  [L]
#----------------------------------------------------------------------------------------



#----------------------------------------------------------------------------------------
# (Older terse comments) godaddy url redirects simply insert the above rewrite code
# these are just pcre (perl) REs [^=start after domain/, $=eostring, $1=(.*) group]
#
# Last line was this, which caused double // after domain (clear firefox network cache on updates):
# rewriterule ^books(.*)$ "http\:\/\/learning-python\.com\/$1" [R=301,L] #59445ffbc6fdc
#
# Tweaked 2nd rule, to be more inclusive; was:
# rewriterule ^recenthighlights\.html$ "http\:\/\/learning-python\.com\/posts\.html" [R=301,L] #59446306007e6
#
# This might work too (not verified)...
# RewriteCond %{THE_REQUEST} //
# RewriteRule .* $0 [R]
#----------------------------------------------------------------------------------------


#----------------------------------------------------------------------------------------
# Original .htaccess code (and less-polished docs)...
#
# Major changes:
#   Oct15: book site earthlink->godaddy relocation
#   Jun17: redesign for programs, union-folder model
#   Oct17: .htaccess fix for covert godaddy host move
#   Feb18: mobile-friendly redesign, showcode.py
#   Apr18: relocation to better server/host at godaddy
#   Jul18: autoindex READMEs broke again: revive _READMEs
#   Mar20: more autoindex thrashing at new AWS VPN host
#
# This is an Apache server config file: at this root level, 
# it provides defaults for the entire site - it's used for this 
# folder and all its subfolders, unless a subfolder has its own 
# .htaccess overrides.  The main Apache config file is maintained 
# by the hosting provider, and may vary per host (yes, argh).
#
# Here: generate file indexes for dirs, if no index.html; fix 
# autoindex name width to that of longest file name; view .py 
# (etc.) instead of running (except in cgi/ => adds handler).
#
# NOTE: at site, the top-level cgi/ script folder is actually 
# just a link to html/cgi, but it's special-cased so that 
# html/.htaccess (this flie) does not apply to its content;
# [Apr-2018: any folder can be used for cgi now.]
#
# Oct17 .htaccess fix:
# RemoveHandler was broken by godaddy relocating my site...
# Works only in root's .htaccess; had to comment out in all
# subdirs, but leave in root for .pys at the root level...
# permissions and content had no effect...
# all subfolder changed lines are "###RemoveHandler..."
# also requires "AddHandler cgi-script .py" in cgi/.htaccess...
#
# Feb18: added rewrite rule above for showcode.py file viewer;
# this broke README.txt files in auto-index pages (rewritten?);
# addressed by renaming to .html, <PRE> around text, and using
# "ReadmeName README.html" in subdirs .htaccess; this impacted
# 5 auto-index pages in /Code, plus 5 complete-app unzipped;
# later made auto-index .htaccess + README.html more complete;
# later copied README.txt to _README.txt so appear in listings;
# [Apr-18: even later... rules at new hosts differ; see ahead]
#
# TBD: should there be a "+" before NameWith? - as is, this 
# may be cancelling out some other IndexOption defaults (but 
# is unrelated to README.txt/.html autoindex: IndexIgnore);
# Apr-2018: adding a "+" seems to have no effect whatsoever...
#----------------------------------------------------------------------------------------

Options +Indexes
IndexOptions +NameWidth=*
RemoveHandler cgi-script .py .pyw .cgi


#----------------------------------------------------------------------------------------
# Apr-2018: on new cPanel web host at godaddy, the readme issue
# is different - autoindexes list both README.txt and _README.txt;
# display text of neither inline; and also list a README.html
# even if it's named as the ReadmeName file.  On the former host,
# README.txt was dropped altogether (not in list, and not inline)
# due to showcode rewrite rule above; hence the _README.txt copies.  
# The new host at least lists README.txt, but doesn't show it inline 
# despite the ReadmeName below.  Eventually drop _README*.txt copies; 
# exclude them here for now.  Perpetually thrashing software is fun...
#
# COMMENTS in .htaccess files are full-line only - not to right of a
# directive (but some modules (e.g., IndexIgnore) allow this anyhow);
# for details see https://httpd.apache.org/docs/2.4/configuring.html 
#
# Notes about the following lines:
# 1) this is a no-op; why? (and a "#" on the left here fails)
# 2) prior host copies; kill these soon
# 3) assume it's the dir's ReadmeName
# 4) plus 9 more _README-*.txt files
#
# UPDATE, Jul-2018 => reinstate the _READMEs!
# README.txt files have once again vanished from autoindex pages on 
# this site, due to an unknown godaddy apache-server change that was 
# both unannounced and beyond the hosting account's control (yes, blah); 
# neither activating the first line below nor eliminating README.txt 
# files in the showcode.py rewrite rule above had any effect; punt,
# but make+show the _README copy files again for at least some context;
#
# UPDATE Mar-2020 => keep _READMEs, ignore READMEs (no, really)
# after moving this site to an AWS Lightsail VPN, _both_ README.txt and
# the auto-generated _README.txt appear in autoindexes redundantly; don't 
# show the later (again); and consider cleaning up this mess (again...).
# Used ./check-readmes.py on site to verify that no folder has a README
# but not a _README, and that no folder has a README that's != _README,
# (though this can break if old packages are unzipped with just README;
# should probably move to _README xor README sitewide).  Also ignore any 
# __pycache__ in autoindexes; they're not cruft in ziptools (or Mergeall).
#----------------------------------------------------------------------------------------

#ReadmeName README.txt

###IndexIgnore _README.txt
###IndexIgnore _README?*.txt

IndexIgnore README.html

# Mar-2020
IndexIgnore README.txt
IndexIgnore __pycache__


#----------------------------------------------------------------------------------------
# Apr-2018: on new cPanel web host at godaddy, an access to "name"
# does not automatically return "name.html" unless the following is
# set (here, or in the main config file); examples: /training, /pyedit;
# avoid N rewrite rules; this also differs on this host/server only;
# See "apache content negotiation" for all the gory details.
#----------------------------------------------------------------------------------------

Options +MultiViews



[Home page] Books Code Blog Python Author Train Find ©M.Lutz