File: pystockmood/pystockmood.py

#!python3
"""
-------------------------------------------------------------------------------
Try to guess the mood of the stock market by screen scraping
a financial news site for positive and negative terms.  This
reflects shifting human mood, not actual stock market values.

*This is a work in progress*: expand the search term patterns here.
Uses simplistic eliza-like heuristic, not a full html parse (this
is mostly just re pattern matching + urllib web page fetches).
Assumes .\audiofiles (mp3s which you must provide and list here)
and .\testfiles (tests) subdirs in this script's directory.

Drag this file out to a desktop shortcut to launch by icon clicks
on Windows (and similar elsewhere); it prints substantial trace
information to the console, including patterns and their matches.
-------------------------------------------------------------------------------
"""

import webbrowser, os, sys, re, pprint
from urllib.request import urlopen
trace = print

# future possibility?: full html parse
# from html.parser import HTMLParser


# Audio files------------------------------------------------------------------
# configure me: assumed to be in .\audiofiles
goodtune = "Everything's Coming Up Roses.mp3"
badtune  = 'Always Look On the Bright Side of.mp3'
mehtune  = "Lithium.mp3"


# Patterns---------------------------------------------------------------------
# grow me: accepts full re patterns or literal substrings, case is ignored;
# list variations individually or via '|' pattern: 'wall (?:street|st) rises';
# now computes terms auto: combines each noun with one of the verbs columns;


nouns = ['wall street',                      # terms = noun + ' ' + verb
         'wall st',                          # or 'wall (?:street|st)'
         'stocks',
         'markets',
         'market',
         's&p',
         's&p 500',
         'dow'
	] 


verbs = [('rises',      'falls'),             # (good, bad)
         ('rose',       'fell'),          
         ('rise',       'fall'),

         # '...s?' = optional trailing 's' [dow gains, stocks gain]
         # '?' applies to preceding pattern item: one char only

         ('gains?',     'loses?'),        
         ('expands?',   'contracts?'),
         ('grows?',     'shrinks?'),
         ('inflates?',  'declines?'),
         ('advances?',  'retreats?'),

         # 1st matches 'noun (opional-named-verb )up' [dow up, dow edges down]
         # 2nd matches an optional ('end '|'ends ') in middle [markets end higher]
         # 3rd matches a required  ('close '|'closes ') in middle [s&p closes lower]

    #    ('(?:(?:edges|closes) )?up',  '(?:(?:edges|closes) )?down'),
    #    ('(?:end(?:s)? )?higher',     '(?:end(?:s)? )?lower'),
    #    ('(?:close|closes) higher',   '(?:close|closes) lower'),

         # catchall, supercedes prior 3 patterns (don't count twice!):
         # matches optional 'any-alpha-string ' in middle [dow drifts higher, etc.]
 
         ('(?:[a-zA-Z]+ )?(?:up|higher)', '(?:[a-zA-Z]+ )?(?:down|lower)')
	]


goodterms = [(noun + ' ' + good) for noun in nouns for (good, bad) in verbs]
badterms  = [(noun + ' ' + bad)  for noun in nouns for (good, bad) in verbs]


# Fixup------------------------------------------------------------------------
# 'x rise' is a prefix of 'x rises' => count for first term only!
# can't assume true of both good and bad in original verbs tuple;
# ok for literals, but this may not be appropriate for some patterns?
trace('good=>\n', pprint.pformat(goodterms),end='\n\n')

goodterms = [term for term in goodterms if not
                  [other for other in goodterms
                             if other != term and term.startswith(other)]]

badterms  = [term for term in badterms if not
                  [other for other in badterms
                             if other != term and term.startswith(other)]]

trace('good=>\n', pprint.pformat(goodterms),end='\n\n')
trace('bad =>\n', pprint.pformat(badterms), end='\n\n')


# Main logic-------------------------------------------------------------------

# canned test files via cmd line arg, in .\testfiles
testpages  = ['test-good.htm', 'test-bad.htm', 'test-meh.htm']    # good=Fidelity.com
testpages += ['test-bad-pattern.htm', 'test-bad2.htm']            # re patterns, etc
testpages  = ['testfiles' + os.sep + test for test in testpages]

# configure me: live news page to scrape
newssite   = 'http://news.fidelity.com/news/topnews.jhtml'

# mode via command line arg (or not)
if len(sys.argv) > 1:
    test = testpages[int(sys.argv[1])]
    page = open(test, 'rb').read()               # pystockmood.py (0..4)?
    trace('opened:', test)
else:
    page = urlopen(newssite).read()              # else fetch live from web
    trace('fetched:', newssite)                  # this is also clicked case

# score the page, pick a tune
net = 0
for (kind, score, terms) in [('good', +1, goodterms), ('bad', -1, badterms)]:
    for term in terms:
        matches = re.findall(term.encode('utf8'), page, re.IGNORECASE)
        count = len(matches)
        if count:
            trace('...%d: %r -> %s' % (count, term, pprint.pformat(matches)))
        net += score * count
    trace(kind, '~', '%+d' % net)

tune = goodtune if net > 0 else (badtune if net < 0 else mehtune)
trace(net, '=>', tune)

# play the tune file
if sys.platform.startswith('win'):
    os.startfile('audiofiles\\' + tune)  # same as webbrowser on windows
    input('[Press Enter to close]')
else:
    webbrowser.open_new('file://%s/audiofiles/%s' % (os.getcwd(), tune))