conda install pandas_datareader
conda install datetime
conda install -c conda-forge python-graphviz
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import pandas as pd
import arviz as az
import pandas_datareader.data as pdr
import datetime
dataA = pdr.DataReader('UNH','yahoo')
dataA = pd.DataFrame(dataA.reset_index())
dataA.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1258 entries, 0 to 1257 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 1258 non-null datetime64[ns] 1 High 1258 non-null float64 2 Low 1258 non-null float64 3 Open 1258 non-null float64 4 Close 1258 non-null float64 5 Volume 1258 non-null float64 6 Adj Close 1258 non-null float64 dtypes: datetime64[ns](1), float64(6) memory usage: 68.9 KB
dataA.tail()
Date | High | Low | Open | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|---|
1253 | 2021-04-15 | 392.359985 | 380.000000 | 380.000000 | 390.010010 | 4488300.0 | 390.010010 |
1254 | 2021-04-16 | 393.920013 | 385.459991 | 393.920013 | 391.010010 | 4532400.0 | 391.010010 |
1255 | 2021-04-19 | 393.390015 | 388.470001 | 390.000000 | 389.839996 | 2965400.0 | 389.839996 |
1256 | 2021-04-20 | 397.880005 | 389.420013 | 389.859985 | 396.529999 | 3348100.0 | 396.529999 |
1257 | 2021-04-21 | 401.476990 | 395.440002 | 398.859985 | 396.260010 | 662781.0 | 396.260010 |
plt.figure(figsize=(13,6))
plt.plot(dataA.Date,dataA.Close)
plt.grid()
plt.show()
dataA["dayofWeek"] = dataA.Date.dt.dayofweek
dataA.head(7)
Date | High | Low | Open | Close | Volume | Adj Close | dayofWeek | |
---|---|---|---|---|---|---|---|---|
0 | 2016-04-22 | 134.330002 | 133.050003 | 133.720001 | 134.130005 | 2782800.0 | 124.045631 | 4 |
1 | 2016-04-25 | 134.070007 | 132.589996 | 133.710007 | 133.779999 | 2702100.0 | 123.721939 | 0 |
2 | 2016-04-26 | 134.610001 | 133.000000 | 133.970001 | 134.240005 | 2638000.0 | 124.147400 | 1 |
3 | 2016-04-27 | 134.580002 | 131.899994 | 134.339996 | 132.800003 | 3358800.0 | 122.815636 | 2 |
4 | 2016-04-28 | 134.389999 | 131.690002 | 132.789993 | 132.070007 | 3332800.0 | 122.140526 | 3 |
5 | 2016-04-29 | 131.880005 | 128.929993 | 131.059998 | 131.679993 | 4161100.0 | 121.779816 | 4 |
6 | 2016-05-02 | 132.929993 | 130.919998 | 132.610001 | 132.100006 | 2887700.0 | 122.168297 | 0 |
dataA['change']=(dataA.Close-dataA.Open)/dataA.Open
observed_prior = np.array(dataA.change[0:-50])
plt.figure(figsize=(13,6))
plt.plot(dataA.Date,dataA.change)
plt.grid()
plt.show()
plt.figure(figsize=(13,6))
plt.plot(observed_prior)
plt.grid()
plt.show()
def price_change_data_model(data):
with pm.Model() as model:
location = pm.Uniform('location', lower=-1, upper = 1)
scale = pm.Uniform('scale', lower=0, upper = 0.5)
tail = pm.Uniform("tail", lower=1, upper=20)
obs = pm.StudentT('daily Open',
mu = location,
sd = scale,
nu = tail,
observed = data
)
return model
my_stock_model = price_change_data_model(observed_prior)
pm.model_to_graphviz(my_stock_model)
with my_stock_model:
trace = pm.sample(2000, tune=1000)
print(az.summary(trace))
az.plot_trace(trace)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [tail, scale, location] Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:09<00:00, 1265.06draws/s]
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_mean ess_sd \ location 0.000 0.000 -0.001 0.001 0.000 0.000 7158.0 3777.0 scale 0.009 0.000 0.008 0.009 0.000 0.000 4306.0 4306.0 tail 2.973 0.283 2.471 3.525 0.004 0.003 4589.0 4589.0 ess_bulk ess_tail r_hat location 7159.0 5688.0 1.0 scale 4307.0 5297.0 1.0 tail 4575.0 5545.0 1.0
with my_stock_model:
predictive = pm.sample_posterior_predictive(trace)
100%|██████████| 8000/8000 [00:08<00:00, 915.24it/s]
plt.figure(figsize=(13,6))
y_vals = predictive['daily Open'][::10]
plt.plot(y_vals[0:20], c='green', lw=0.4, alpha=0.2)
plt.plot(range(0,20),dataA.change[-50:-30],c='black')
plt.ylim(-0.15,0.15)
plt.show()
start = len(dataA)-50
end=len(dataA)
xx = range(start,end)
plt.figure(figsize=(13,6))
for s in range(0,600):
plotSample = []
price= dataA.Close[start]
for i in range(start,end):
randx = np.random.randint(1000,len(trace))
movement = np.random.normal(trace.get_values('location')[randx],trace.get_values('scale')[randx])
price+=movement*price
plotSample.append(price)
plt.plot(dataA.Date[start:end],plotSample, c='blue',alpha=0.05)
plt.plot(dataA.Date[start:end],dataA.Close[start:end], c='r')
[<matplotlib.lines.Line2D at 0x7fa068592588>]
plt.figure(figsize=(13,6))
plt.scatter(dataA.Volume,dataA.change,alpha=0.3)
plt.grid()
plt.show()
dataA['Panic']= (dataA.Volume-dataA.Volume.rolling(10).mean())/dataA.Volume
plt.figure(figsize=(13,6))
plt.scatter(dataA.Panic,dataA.change,alpha=0.3)
plt.hlines(0,-4,1,color='r')
plt.vlines(0,-0.5,0.5,color='r')
plt.xlim(-1,1)
plt.ylim(-0.2,0.2)
plt.grid()
plt.show()
def price_change_data_model(data):
with pm.Model() as model:
bump = pm.Uniform('bump',lower=-5, upper=5)
location = pm.Uniform('location', lower=-10, upper = 10)
scale = pm.Uniform('scale', lower=0, upper = 5)
tail = pm.Uniform("tail", lower=1, upper=20)
μ = pm.Deterministic("μ", location+bump*data.Panic)
obs = pm.StudentT('daily Open',
mu = μ,
sd = scale,
nu = tail,
observed = data.change
)
return model
dataA.head()
Date | High | Low | Open | Close | Volume | Adj Close | dayofWeek | change | Panic | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-04-22 | 134.330002 | 133.050003 | 133.720001 | 134.130005 | 2782800.0 | 124.045631 | 4 | 0.003066 | NaN |
1 | 2016-04-25 | 134.070007 | 132.589996 | 133.710007 | 133.779999 | 2702100.0 | 123.721939 | 0 | 0.000523 | NaN |
2 | 2016-04-26 | 134.610001 | 133.000000 | 133.970001 | 134.240005 | 2638000.0 | 124.147400 | 1 | 0.002015 | NaN |
3 | 2016-04-27 | 134.580002 | 131.899994 | 134.339996 | 132.800003 | 3358800.0 | 122.815636 | 2 | -0.011463 | NaN |
4 | 2016-04-28 | 134.389999 | 131.690002 | 132.789993 | 132.070007 | 3332800.0 | 122.140526 | 3 | -0.005422 | NaN |
my_new_model= price_change_data_model(dataA[10:-50])
pm.model_to_graphviz(my_new_model)
with my_new_model:
trace = pm.sample(2000, tune=1000)
print(az.summary(trace, var_names=['location','scale','tail','bump']))
az.plot_trace(trace, var_names=['location','scale','tail','bump'])
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [tail, scale, location, bump] Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:16<00:00, 722.90draws/s]
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_mean ess_sd \ location -0.000 0.000 -0.001 0.000 0.000 0.000 5998.0 5137.0 scale 0.009 0.000 0.008 0.010 0.000 0.000 4901.0 4901.0 tail 2.978 0.282 2.474 3.510 0.004 0.003 4906.0 4906.0 bump -0.001 0.001 -0.003 0.001 0.000 0.000 5865.0 5387.0 ess_bulk ess_tail r_hat location 5979.0 5773.0 1.0 scale 4863.0 5531.0 1.0 tail 4858.0 5935.0 1.0 bump 5873.0 6170.0 1.0
start = len(dataA)-50
end=len(dataA)
xx = range(start,end)
plt.figure(figsize=(13,6))
for s in range(0,600):
plotSample = []
price= dataA.Close[start]
for i in range(start,end):
randx = np.random.randint(1000,len(trace))
movement = np.random.normal(trace.get_values('location')[randx]+trace.get_values('bump')[randx]*dataA.Panic[i-1],trace.get_values('scale')[randx])
price+=movement*price
plotSample.append(price)
plt.plot(dataA.Date[start:end],plotSample, c='blue',alpha=0.05)
plt.plot(dataA.Date[start:end],dataA.Close[start:end], c='r')
[<matplotlib.lines.Line2D at 0x7fa0384f3be0>]
Based upon Paper Hoffman/Gelman 2011:The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo
def get_stock(X):
dataA=pdr.DataReader(X,"yahoo")
dataA = pd.DataFrame(dataA.reset_index())
dataA['day_of_week'] = dataA.Date.dt.dayofweek
dataA['change']=(dataA['Close']-dataA['Open'])/dataA.Open
dataA['Panic'] =(dataA.Volume- dataA.Volume.rolling(5).mean())/dataA.Volume
return dataA
dataA = get_stock('MELI')
plt.plot(dataA.Open)
[<matplotlib.lines.Line2D at 0x7fa08a8f1240>]
dataA['log_change'] = np.log(dataA.Open).diff()
plt.figure(figsize=(13,6))
plt.scatter(dataA.change,dataA.log_change,alpha=0.3)
<matplotlib.collections.PathCollection at 0x7fa08a7c1e10>
plt.figure(figsize=(13,6))
plt.plot(dataA.Date,dataA.Close)
plt.grid()
plt.show()
plt.figure(figsize=(13,6))
plt.plot(dataA.Date,dataA.log_change)
plt.grid()
plt.show()
def make_stocastic_model(data):
with pm.Model() as model:
step_size = pm.Exponential('step_size',10 )
volatility = pm.GaussianRandomWalk('volatility', sigma=step_size, shape=len(data))
nu= pm.Exponential('nu',0.1)
returns = pm.StudentT('returns', nu=nu,lam=np.exp(-2*volatility), observed=data['log_change'])
return model
stocastic_model = make_stocastic_model(dataA[10:])
pm.model_to_graphviz(stocastic_model)
with stocastic_model:
prior = pm.sample_prior_predictive(500)
fig, ax = plt.subplots(figsize=(13,6))
ax.plot(dataA.log_change, lw=1, color='black')
ax.plot(prior['returns'][4:6].T, 'g',lw=1,zorder=-10)
[<matplotlib.lines.Line2D at 0x7fa058956518>, <matplotlib.lines.Line2D at 0x7fa0508a11d0>]
with stocastic_model:
trace = pm.sample(2000,tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [nu, volatility, step_size] Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [11:08<00:00, 17.95draws/s] The acceptance probability does not match the target. It is 0.9102784532346088, but should be close to 0.8. Try to increase the number of tuning steps. The acceptance probability does not match the target. It is 0.8857786679531626, but should be close to 0.8. Try to increase the number of tuning steps. The acceptance probability does not match the target. It is 0.6222340859826226, but should be close to 0.8. Try to increase the number of tuning steps. The rhat statistic is larger than 1.05 for some parameters. This indicates slight problems during sampling. The estimated number of effective samples is smaller than 200 for some parameters.
with stocastic_model:
az.plot_trace(trace, var_names=['step_size','nu'])
plt.figure(figsize=(13,6))
plt.plot(np.exp(trace['volatility'])[::10].T, 'green',alpha=0.01)
plt.show()
with stocastic_model:
posterior_stochastic = pm.sample_posterior_predictive(trace)
100%|██████████| 8000/8000 [00:15<00:00, 521.59it/s]
fig, axes= plt.subplots(nrows=2, figsize=(13,12), sharex=True)
dataA['log_change'].plot(ax=axes[0], color='black')
axes[0].plot(posterior_stochastic['returns'][::100].T, 'g', alpha=0.3,zorder=-10)
axes[1].plot(np.exp(trace['volatility'])[::100].T, 'r', alpha =0.1)
plt.show()