|
""" |
|
Demo is based on https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html |
|
""" |
|
|
|
import sys |
|
import numpy as np |
|
import pandas as pd |
|
|
|
symbol_dict = { |
|
"TOT": "Total", |
|
"XOM": "Exxon", |
|
"CVX": "Chevron", |
|
"COP": "ConocoPhillips", |
|
"VLO": "Valero Energy", |
|
"MSFT": "Microsoft", |
|
"IBM": "IBM", |
|
"TWX": "Time Warner", |
|
"CMCSA": "Comcast", |
|
"CVC": "Cablevision", |
|
"YHOO": "Yahoo", |
|
"DELL": "Dell", |
|
"HPQ": "HP", |
|
"AMZN": "Amazon", |
|
"TM": "Toyota", |
|
"CAJ": "Canon", |
|
"SNE": "Sony", |
|
"F": "Ford", |
|
"HMC": "Honda", |
|
"NAV": "Navistar", |
|
"NOC": "Northrop Grumman", |
|
"BA": "Boeing", |
|
"KO": "Coca Cola", |
|
"MMM": "3M", |
|
"MCD": "McDonald's", |
|
"PEP": "Pepsi", |
|
"K": "Kellogg", |
|
"UN": "Unilever", |
|
"MAR": "Marriott", |
|
"PG": "Procter Gamble", |
|
"CL": "Colgate-Palmolive", |
|
"GE": "General Electrics", |
|
"WFC": "Wells Fargo", |
|
"JPM": "JPMorgan Chase", |
|
"AIG": "AIG", |
|
"AXP": "American express", |
|
"BAC": "Bank of America", |
|
"GS": "Goldman Sachs", |
|
"AAPL": "Apple", |
|
"SAP": "SAP", |
|
"CSCO": "Cisco", |
|
"TXN": "Texas Instruments", |
|
"XRX": "Xerox", |
|
"WMT": "Wal-Mart", |
|
"HD": "Home Depot", |
|
"GSK": "GlaxoSmithKline", |
|
"PFE": "Pfizer", |
|
"SNY": "Sanofi-Aventis", |
|
"NVS": "Novartis", |
|
"KMB": "Kimberly-Clark", |
|
"R": "Ryder", |
|
"GD": "General Dynamics", |
|
"RTN": "Raytheon", |
|
"CVS": "CVS", |
|
"CAT": "Caterpillar", |
|
"DD": "DuPont de Nemours", |
|
} |
|
|
|
|
|
symbols, names = np.array(sorted(symbol_dict.items())).T |
|
|
|
quotes = [] |
|
|
|
for symbol in symbols: |
|
print("Fetching quote history for %r" % symbol, file=sys.stderr) |
|
url = ( |
|
"https://raw.githubusercontent.com/scikit-learn/examples-data/" |
|
"master/financial-data/{}.csv" |
|
) |
|
quotes.append(pd.read_csv(url.format(symbol))) |
|
|
|
close_prices = np.vstack([q["close"] for q in quotes]) |
|
open_prices = np.vstack([q["open"] for q in quotes]) |
|
|
|
|
|
variation = close_prices - open_prices |
|
|
|
|
|
from sklearn import covariance |
|
|
|
alphas = np.logspace(-1.5, 1, num=10) |
|
edge_model = covariance.GraphicalLassoCV(alphas=alphas) |
|
|
|
|
|
|
|
|
|
X = variation.copy().T |
|
X /= X.std(axis=0) |
|
edge_model.fit(X) |
|
|
|
|
|
|
|
from sklearn import cluster |
|
|
|
_, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0) |
|
n_labels = labels.max() |
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
title = " π Visualizing the stock market structure π" |
|
|
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"# {title}") |
|
gr.Markdown(" Data is of 56 stocks between the period of 2003 - 2008 <br>") |
|
gr.Markdown(" Stocks the move in together with each other are grouped together in a cluster <br>") |
|
|
|
gr.Markdown(" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html)**") |
|
|
|
for i in range(n_labels + 1): |
|
gr.Markdown( f"Cluster {i + 1}: {', '.join(names[labels == i])}") |
|
|
|
gr.Markdown( f"## In progress") |
|
demo.launch() |