File: .htaccess
#----------------------------------------------------------------------------------------
# Main (site root) Apache config file. This file overrides host defaults, and some
# subfolders have their own .htaccess files that may in turn customize settings here.
#
# In general, Apache settings are hierarchical. At the top of the tree, the main
# Apache config file (inaccessible on this site's current host) defines defaults and
# allows/disallows per-folder .htaccess overrides. Within a folder, settings are
# inherited from all higher folders in the path, through the main Apache config file.
# Hence, settings in a folder's .htaccess apply to that folder and all subfolders below
# that level in the filesystem. Because this file is highest, it's settings are
# site-global. For more: https://httpd.apache.org/docs/current/howto/htaccess.html.
#
# Coding note: comments in .htaccess files must generally be full-line only; see ahead.
#----------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------
# Mar-2020: this site was moved from its former GoDaddy shared/business hosting to a
# VPS on Amazon's AWS Lightsail. The new VPS hosting offers complete access to the
# Apache server and config files (httpd.conf), so many or all of the settings in this
# site's .htaccess files (along with their frustrations) are now moot. For convenience,
# the .htaccess will still be used until they can be absorbed into the root's configs.
#
# The only new items here are a rewrite from www.* => * below for SEO (both have DNS
# A records that point to same static IP, and "www" is a ServerAlias for non-"www" in
# a VirtualHost); along with the README.html and __pycache__ autoindex ignores ahead.
# NOTE: the default Apache install in a bitnami LAMP stack enables mod_pagespeed - which
# both rudely munged educational code, and caused segfaults in Apache; manually disabled!
#----------------------------------------------------------------------------------------
RewriteEngine on
RewriteCond %{HTTP_HOST} ^www\.(.*)$ [NC]
RewriteRule ^(.*)$ https://%1/$1 [R=301,L]
#----------------------------------------------------------------------------------------
# Oct-2022 android-deltas-scripts => android-deltas-sync: rewrite any prior-name URLs
# lingering on this site, referenced elsewhere, or indexed by search engines. Used for
# the .html file as well as all references to the folder with or without trailing "/".
# Per PyEdit Greps: there are just 5 harmless refs to old URL in thumbspage docs today.
#----------------------------------------------------------------------------------------
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^android-deltas-scripts(.*)$ "https\:\/\/learning-python\.com\/android-deltas-sync$1" [R=301,L]
#----------------------------------------------------------------------------------------
# Jun-2017 redesign for programs, initial redirects
# (Aug18: now to https, if "L" = last, precluding https rewrite ahead)
#
# CAUTION: Apache mod_rewrite URL rewriting is __iterative__:
# - it reruns all these in sequence again until no more changes, or default=10 times;
# - this means that a rewrite may be overwritten on the next iteration;
# - [L] can preclude reaching rules further below (see 'break' in C/Python loops);
# - [L] just means end *this* ruleset context, and [END] may not be supported;
# But this is _stunningly_ complex and brittle; try a web search, or intros here:
# https://httpd.apache.org/docs/2.4/rewrite/tech.html
# https://httpd.apache.org/docs/2.4/rewrite/flags.html#flag_l
#----------------------------------------------------------------------------------------
# rewriteengine on (now above)
# aug-2017: reroute _all_ /downloads*, including /downloads/, not just /downloads
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^downloads(.*)$ "https\:\/\/learning-python\.com\/programs\.html" [R=301,L] #594463424e1e3
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^recenthighlights(.*)$ "https\:\/\/learning-python\.com\/posts\.html" [R=301,L] #59446306007e6
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^books(.*)$ "https\:\/\/learning-python\.com$1" [R=301,L] #59445ffbc6fdc
#----------------------------------------------------------------------------------------
# Jul-12-17: temp workaround for pyedit userguide broken product links (till next build):
# present in app, exes, src. This was also fixed in live unzipped copy, /Code, and
# /Websites/{Programs, UNION}, but not in zipped products (will roll out in next build).
#----------------------------------------------------------------------------------------
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^pyedit\.html\/PyEdit(.*).zip$ "https\:\/\/learning-python\.com\/pyedit-products\/PyEdit$1.zip" [R=301,L] #59664d12d58c0
#----------------------------------------------------------------------------------------
# Jul-12-17: temp workaround for mergeall userguide broken "Moredocs" bad-case links
# (till next build) [FOR LINUX ZIP ONLY]. This was also fixed in live unzipped copy,
# /Code, and /Websites/{Programs, UNION}, but not in zipped products.
#----------------------------------------------------------------------------------------
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^mergeall-products\/unzipped\/docetc\/Moredocs(.*)$ "https\:\/\/learning-python\.com\/mergeall-products\/unzipped\/docetc\/MoreDocs$1" [R=301,L] #59664d12d58c0
#----------------------------------------------------------------------------------------
# Feb-2018: map all *.py not in cgi/ (at URL front) to the new showcode.py CGI script.
# Added for mobile redesign; an alternative to "add handler" or *many* manual link edits.
# This works so well that it was adopted for *.txt files too (per the generalized rule),
# such that even files in the unzipped product packages use the site-wide display style.
# This must avoid robot.txt files, and can impact autoindex README.txt files - see ahead.
#
# Jan2019: add .sh files too for mergeall-android (else downloaded instead of displayed),
# as well as .c and .h for class workbook code (else they are badly broken on mobiles).
# Also, the literal "." before the file extension needed to be escaped, but was harmless:
# just one obscure file out of 8K matched, in one of the old class workbook folders:
#
# /MY-STUFF/Websites/UNION$ find . -name '*[^.]txt' -print
# /MY-STUFF/Websites/UNION$ find . -name '*[^.]pyw' -print
# /MY-STUFF/Websites/UNION$ find . -name '*[^.]py' -print
# ./class/Extras/Other/extensions/Handhelds/Pippy
# ./class/Workbook/Examples/Lecture2/runpy
# /MY-STUFF/Websites/UNION$ find . -type f -print | wc -l
# 7967
#
# Apr2020: make the match case insensitive with [NC] => .txt and .TXT now both match and
# are routed to the script. This cropped up once after 2 years of heavy usage, for an
# old MS-DOS era .TXT file newly copied to the site; user-typed URLs might trigger too.
#
# Jun25: add .note for thumbspage image-note files in online demos.
#----------------------------------------------------------------------------------------
rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
rewritecond %{HTTP_HOST} ^learning-python.com$
rewriterule ^(?!(?:cgi\/|.*robots.txt))(.*)\.(py|txt|pyw|sh|c|h|note)$ "https\:\/\/learning-python\.com\/cgi\/showcode\.py\?name\=$1.$2" [L,NC]
# before apr-23-18 excluding robots.txt (likely harmless)
# rewriterule ^(?!cgi\/)(.*).(py|txt|pyw)$ "http\:\/\/learning-python\.com\/cgi\/showcode\.py\?name\=$1.$2"
# this fails (loops) if listed first: why?
# rewriterule ^(.*)robots.txt$ "http\:\/\/learning-python\.com\/$1robots.txt" [L]
#----------------------------------------------------------------------------------------
# Aug-2018: for HTTPS, rewrite and redirect all http://... to https://...
#
# This requires a server SSL/TLS certificate (a much-too-easy push button thing on
# GoDaddy at first, but extra admin and $ later). It appeases Google Chrome's rude
# "Not Secure" label now issued for every page on "http://" sites, but is massive
# overkill here - this site has just 1 input field, on its Search page (yes, irony).
#
# This rewrite can be applied to results of prior rewrites, but flag "L" = last rules
# above use https in their expansions to force. An "L" may have been required in the
# showcode.py rule to avoid bogus rewrites. The R=301 here sends a permanent-redirect
# reply (important for search engines). An (untested) equivalent directive:
# "Redirect permanent / https://learning-python.com/".
#
# NOTE: older IEs (e.g., 9 on Windows 7) do not support TLS by default, and require an
# Advanced tab settings change in IE to access the site. Other older browsers may not
# support the TLS version 1.2 that GoDaddy sells today at all. This seems a massive
# downside for HTTPS use; some recent TLS browser support is just 5 years old:
# https://help.salesforce.com/articleView?id=000220586&type=1
# https://en.wikipedia.org/wiki/Transport_Layer_Security#Web_browsers
#
# IMPORTANT: browsers don't allow (and ignore) absolute http:// links to page resources
# like CSS and JS file links in pages loaded from an https:// server - even if a rewrite
# rule here would redirect it. This broke page icons, coded with absolute http:// paths
# due to the former dual book/training site structure, but was a fairly easy fix: because
# icons are a genhtml insert, just change the insert file, regenerate pages with a normal
# genhtml run, and upload all .htmls. Also broken were program auto-index README.html
# pages in subfolders that used http:// paths for CSS and icons in zany .htaccess files
# of their own; these required many manual fixes. Relative paths are best, but not
# always possible (see subfolders that might be moved over time).
#----------------------------------------------------------------------------------------
# Iff not already https://... Then reroute http://... to https://...
RewriteCond %{HTTPS} !=on
RewriteRule (.*) https://%{SERVER_NAME}/$1 [R=301,L]
#----------------------------------------------------------------------------------------
# This never worked: its result was rewritten by the prior rule on the next iteration
# (see above). Instead, generate a plain-text reply in the cgi script itself, for the
# rawmode=view query param automatically sent by the formatted-display template's link.
#
# Feb-2018: but DO NOT route to script if it's a raw-text link in a reply page...
# rewritecond %{HTTP_HOST} ^www.learning-python.com$ [OR]
# rewritecond %{HTTP_HOST} ^learning-python.com$
# rewriterule ^cgi\/rawtext\/(.*)$ "http\:\/\/learning-python\.com\/$1" [L]
# [L] = skip run next rules if this fired, but on this iteration only...
#
# RewriteCond %{ENV:REDIRECT_STATUS} !=200 # per a rumor, also failed
# RewriteRule .* - [L]
#----------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------
# (Older terse comments) godaddy url redirects simply insert the above rewrite code
# these are just pcre (perl) REs [^=start after domain/, $=eostring, $1=(.*) group]
#
# Last line was this, which caused double // after domain (clear firefox network cache on updates):
# rewriterule ^books(.*)$ "http\:\/\/learning-python\.com\/$1" [R=301,L] #59445ffbc6fdc
#
# Tweaked 2nd rule, to be more inclusive; was:
# rewriterule ^recenthighlights\.html$ "http\:\/\/learning-python\.com\/posts\.html" [R=301,L] #59446306007e6
#
# This might work too (not verified)...
# RewriteCond %{THE_REQUEST} //
# RewriteRule .* $0 [R]
#----------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------
# Original .htaccess code (and less-polished docs)...
#
# Major changes:
# Oct15: book site earthlink->godaddy relocation
# Jun17: redesign for programs, union-folder model
# Oct17: .htaccess fix for covert godaddy host move
# Feb18: mobile-friendly redesign, showcode.py
# Apr18: relocation to better server/host at godaddy
# Jul18: autoindex READMEs broke again: revive _READMEs
# Mar20: more autoindex thrashing at new AWS VPN host
#
# This is an Apache server config file: at this root level,
# it provides defaults for the entire site - it's used for this
# folder and all its subfolders, unless a subfolder has its own
# .htaccess overrides. The main Apache config file is maintained
# by the hosting provider, and may vary per host (yes, argh).
#
# Here: generate file indexes for dirs, if no index.html; fix
# autoindex name width to that of longest file name; view .py
# (etc.) instead of running (except in cgi/ => adds handler).
#
# NOTE: at site, the top-level cgi/ script folder is actually
# just a link to html/cgi, but it's special-cased so that
# html/.htaccess (this flie) does not apply to its content;
# [Apr-2018: any folder can be used for cgi now.]
#
# Oct17 .htaccess fix:
# RemoveHandler was broken by godaddy relocating my site...
# Works only in root's .htaccess; had to comment out in all
# subdirs, but leave in root for .pys at the root level...
# permissions and content had no effect...
# all subfolder changed lines are "###RemoveHandler..."
# also requires "AddHandler cgi-script .py" in cgi/.htaccess...
#
# Feb18: added rewrite rule above for showcode.py file viewer;
# this broke README.txt files in auto-index pages (rewritten?);
# addressed by renaming to .html, <PRE> around text, and using
# "ReadmeName README.html" in subdirs .htaccess; this impacted
# 5 auto-index pages in /Code, plus 5 complete-app unzipped;
# later made auto-index .htaccess + README.html more complete;
# later copied README.txt to _README.txt so appear in listings;
# [Apr-18: even later... rules at new hosts differ; see ahead]
#
# TBD: should there be a "+" before NameWith? - as is, this
# may be cancelling out some other IndexOption defaults (but
# is unrelated to README.txt/.html autoindex: IndexIgnore);
# Apr-2018: adding a "+" seems to have no effect whatsoever...
#----------------------------------------------------------------------------------------
Options +Indexes
IndexOptions +NameWidth=*
RemoveHandler cgi-script .py .pyw .cgi
#----------------------------------------------------------------------------------------
# Apr-2018: on new cPanel web host at godaddy, the readme issue
# is different - autoindexes list both README.txt and _README.txt;
# display text of neither inline; and also list a README.html
# even if it's named as the ReadmeName file. On the former host,
# README.txt was dropped altogether (not in list, and not inline)
# due to showcode rewrite rule above; hence the _README.txt copies.
# The new host at least lists README.txt, but doesn't show it inline
# despite the ReadmeName below. Eventually drop _README*.txt copies;
# exclude them here for now. Perpetually thrashing software is fun...
#
# COMMENTS in .htaccess files are full-line only - not to right of a
# directive (but some modules (e.g., IndexIgnore) allow this anyhow);
# for details see https://httpd.apache.org/docs/2.4/configuring.html
#
# Notes about the following lines:
# 1) this is a no-op; why? (and a "#" on the left here fails)
# 2) prior host copies; kill these soon
# 3) assume it's the dir's ReadmeName
# 4) plus 9 more _README-*.txt files
#
# UPDATE, Jul-2018 => reinstate the _READMEs!
# README.txt files have once again vanished from autoindex pages on
# this site, due to an unknown godaddy apache-server change that was
# both unannounced and beyond the hosting account's control (yes, blah);
# neither activating the first line below nor eliminating README.txt
# files in the showcode.py rewrite rule above had any effect; punt,
# but make+show the _README copy files again for at least some context;
#
# UPDATE Mar-2020 => keep _READMEs, ignore READMEs (no, really)
# after moving this site to an AWS Lightsail VPN, _both_ README.txt and
# the auto-generated _README.txt appear in autoindexes redundantly; don't
# show the later (again); and consider cleaning up this mess (again...).
# Used ./check-readmes.py on site to verify that no folder has a README
# but not a _README, and that no folder has a README that's != _README,
# (though this can break if old packages are unzipped with just README;
# should probably move to _README xor README sitewide). Also ignore any
# __pycache__ in autoindexes; they're not cruft in ziptools (or Mergeall).
#----------------------------------------------------------------------------------------
#ReadmeName README.txt
###IndexIgnore _README.txt
###IndexIgnore _README?*.txt
IndexIgnore README.html
# Mar-2020
IndexIgnore README.txt
IndexIgnore __pycache__
#----------------------------------------------------------------------------------------
# Apr-2018: on new cPanel web host at godaddy, an access to "name"
# does not automatically return "name.html" unless the following is
# set (here, or in the main config file); examples: /training, /pyedit;
# avoid N rewrite rules; this also differs on this host/server only;
# See "apache content negotiation" for all the gory details.
#----------------------------------------------------------------------------------------
Options +MultiViews