import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import pandas as pd
import arviz as az
import pandas_datareader.data as pdr
import datetime


dataA = pdr.DataReader('UNH','yahoo')
dataA = pd.DataFrame(dataA.reset_index())
dataA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       1258 non-null   datetime64[ns]
 1   High       1258 non-null   float64       
 2   Low        1258 non-null   float64       
 3   Open       1258 non-null   float64       
 4   Close      1258 non-null   float64       
 5   Volume     1258 non-null   float64       
 6   Adj Close  1258 non-null   float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 68.9 KB


dataA.tail()


plt.figure(figsize=(13,6))
plt.plot(dataA.Date,dataA.Close)
plt.grid()
plt.show()


dataA["dayofWeek"] = dataA.Date.dt.dayofweek
dataA.head(7)


dataA['change']=(dataA.Close-dataA.Open)/dataA.Open

observed_prior = np.array(dataA.change[0:-50])


plt.figure(figsize=(13,6))
plt.plot(dataA.Date,dataA.change)
plt.grid()
plt.show()

plt.figure(figsize=(13,6))
plt.plot(observed_prior)
plt.grid()
plt.show()


def price_change_data_model(data):
    with pm.Model() as model:
        location = pm.Uniform('location', lower=-1, upper = 1)
        scale = pm.Uniform('scale', lower=0, upper = 0.5)
        tail = pm.Uniform("tail", lower=1, upper=20)
        
        obs = pm.StudentT('daily Open',
                          mu = location,
                          sd = scale,
                          nu = tail,
                          observed = data
                         )
        
        return model


my_stock_model = price_change_data_model(observed_prior)


pm.model_to_graphviz(my_stock_model)


with my_stock_model:
    trace = pm.sample(2000, tune=1000)
    
    print(az.summary(trace))
    
    az.plot_trace(trace)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tail, scale, location]
Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:09<00:00, 1265.06draws/s]

           mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_mean  ess_sd  \
location  0.000  0.000  -0.001    0.001      0.000    0.000    7158.0  3777.0   
scale     0.009  0.000   0.008    0.009      0.000    0.000    4306.0  4306.0   
tail      2.973  0.283   2.471    3.525      0.004    0.003    4589.0  4589.0   

          ess_bulk  ess_tail  r_hat  
location    7159.0    5688.0    1.0  
scale       4307.0    5297.0    1.0  
tail        4575.0    5545.0    1.0


with my_stock_model:
    predictive = pm.sample_posterior_predictive(trace)

100%|██████████| 8000/8000 [00:08<00:00, 915.24it/s]


plt.figure(figsize=(13,6))


y_vals = predictive['daily Open'][::10]


plt.plot(y_vals[0:20], c='green', lw=0.4, alpha=0.2)
plt.plot(range(0,20),dataA.change[-50:-30],c='black')

plt.ylim(-0.15,0.15)

plt.show()


start = len(dataA)-50
end=len(dataA)

xx = range(start,end)
plt.figure(figsize=(13,6))

for s in range(0,600):
    plotSample = []
    price= dataA.Close[start]
    for i in range(start,end):
        randx = np.random.randint(1000,len(trace))
        movement = np.random.normal(trace.get_values('location')[randx],trace.get_values('scale')[randx])
        price+=movement*price
        plotSample.append(price)
    plt.plot(dataA.Date[start:end],plotSample, c='blue',alpha=0.05)
plt.plot(dataA.Date[start:end],dataA.Close[start:end], c='r')

[<matplotlib.lines.Line2D at 0x7fa068592588>]


plt.figure(figsize=(13,6))
plt.scatter(dataA.Volume,dataA.change,alpha=0.3)
plt.grid()
plt.show()


dataA['Panic']= (dataA.Volume-dataA.Volume.rolling(10).mean())/dataA.Volume


plt.figure(figsize=(13,6))
plt.scatter(dataA.Panic,dataA.change,alpha=0.3)
plt.hlines(0,-4,1,color='r')
plt.vlines(0,-0.5,0.5,color='r')
plt.xlim(-1,1)
plt.ylim(-0.2,0.2)
plt.grid()
plt.show()


def price_change_data_model(data):
    with pm.Model() as model:
        
        bump = pm.Uniform('bump',lower=-5, upper=5)
        location = pm.Uniform('location', lower=-10, upper = 10)
        scale = pm.Uniform('scale', lower=0, upper = 5)
        tail = pm.Uniform("tail", lower=1, upper=20)
        
        μ = pm.Deterministic("μ", location+bump*data.Panic)
        
        
        obs = pm.StudentT('daily Open',
                          mu = μ,
                          sd = scale,
                          nu = tail,
                          observed = data.change
                         )
        
        return model


dataA.head()


my_new_model= price_change_data_model(dataA[10:-50])
pm.model_to_graphviz(my_new_model)


with my_new_model:
    trace = pm.sample(2000, tune=1000)
    
    print(az.summary(trace, var_names=['location','scale','tail','bump']))
    
    az.plot_trace(trace, var_names=['location','scale','tail','bump'])

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tail, scale, location, bump]
Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:16<00:00, 722.90draws/s]

           mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_mean  ess_sd  \
location -0.000  0.000  -0.001    0.000      0.000    0.000    5998.0  5137.0   
scale     0.009  0.000   0.008    0.010      0.000    0.000    4901.0  4901.0   
tail      2.978  0.282   2.474    3.510      0.004    0.003    4906.0  4906.0   
bump     -0.001  0.001  -0.003    0.001      0.000    0.000    5865.0  5387.0   

          ess_bulk  ess_tail  r_hat  
location    5979.0    5773.0    1.0  
scale       4863.0    5531.0    1.0  
tail        4858.0    5935.0    1.0  
bump        5873.0    6170.0    1.0


start = len(dataA)-50
end=len(dataA)

xx = range(start,end)
plt.figure(figsize=(13,6))

for s in range(0,600):
    plotSample = []
    price= dataA.Close[start]
    for i in range(start,end):
        randx = np.random.randint(1000,len(trace))
        
        movement = np.random.normal(trace.get_values('location')[randx]+trace.get_values('bump')[randx]*dataA.Panic[i-1],trace.get_values('scale')[randx])
        price+=movement*price
        plotSample.append(price)
    plt.plot(dataA.Date[start:end],plotSample, c='blue',alpha=0.05)
plt.plot(dataA.Date[start:end],dataA.Close[start:end], c='r')

[<matplotlib.lines.Line2D at 0x7fa0384f3be0>]


def get_stock(X):
    dataA=pdr.DataReader(X,"yahoo")
    dataA = pd.DataFrame(dataA.reset_index())
    dataA['day_of_week'] = dataA.Date.dt.dayofweek
    dataA['change']=(dataA['Close']-dataA['Open'])/dataA.Open
    dataA['Panic'] =(dataA.Volume- dataA.Volume.rolling(5).mean())/dataA.Volume
    
    return dataA


dataA = get_stock('MELI')


plt.plot(dataA.Open)

[<matplotlib.lines.Line2D at 0x7fa08a8f1240>]


dataA['log_change'] = np.log(dataA.Open).diff()


plt.figure(figsize=(13,6))
plt.scatter(dataA.change,dataA.log_change,alpha=0.3)

<matplotlib.collections.PathCollection at 0x7fa08a7c1e10>


plt.figure(figsize=(13,6))

plt.plot(dataA.Date,dataA.Close)
plt.grid()
plt.show()
plt.figure(figsize=(13,6))

plt.plot(dataA.Date,dataA.log_change)

plt.grid()
plt.show()


def make_stocastic_model(data):
    with pm.Model() as model:
        step_size = pm.Exponential('step_size',10 )
        volatility = pm.GaussianRandomWalk('volatility', sigma=step_size, shape=len(data))
        nu=  pm.Exponential('nu',0.1)
        
        returns = pm.StudentT('returns', nu=nu,lam=np.exp(-2*volatility), observed=data['log_change'])
        
        return model


stocastic_model = make_stocastic_model(dataA[10:])


pm.model_to_graphviz(stocastic_model)


with stocastic_model:
    prior = pm.sample_prior_predictive(500)


fig, ax = plt.subplots(figsize=(13,6))

ax.plot(dataA.log_change, lw=1, color='black')
ax.plot(prior['returns'][4:6].T, 'g',lw=1,zorder=-10)

[<matplotlib.lines.Line2D at 0x7fa058956518>,
 <matplotlib.lines.Line2D at 0x7fa0508a11d0>]


with stocastic_model:
    trace = pm.sample(2000,tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [nu, volatility, step_size]
Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [11:08<00:00, 17.95draws/s]
The acceptance probability does not match the target. It is 0.9102784532346088, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.8857786679531626, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.6222340859826226, but should be close to 0.8. Try to increase the number of tuning steps.
The rhat statistic is larger than 1.05 for some parameters. This indicates slight problems during sampling.
The estimated number of effective samples is smaller than 200 for some parameters.


with stocastic_model:
    az.plot_trace(trace, var_names=['step_size','nu'])


plt.figure(figsize=(13,6))
plt.plot(np.exp(trace['volatility'])[::10].T, 'green',alpha=0.01)
plt.show()


with stocastic_model:
    posterior_stochastic = pm.sample_posterior_predictive(trace)

100%|██████████| 8000/8000 [00:15<00:00, 521.59it/s]


fig, axes= plt.subplots(nrows=2, figsize=(13,12), sharex=True)

dataA['log_change'].plot(ax=axes[0], color='black')

axes[0].plot(posterior_stochastic['returns'][::100].T, 'g', alpha=0.3,zorder=-10)

axes[1].plot(np.exp(trace['volatility'])[::100].T, 'r', alpha =0.1)

plt.show()

	Date	High	Low	Open	Close	Volume	Adj Close
1253	2021-04-15	392.359985	380.000000	380.000000	390.010010	4488300.0	390.010010
1254	2021-04-16	393.920013	385.459991	393.920013	391.010010	4532400.0	391.010010
1255	2021-04-19	393.390015	388.470001	390.000000	389.839996	2965400.0	389.839996
1256	2021-04-20	397.880005	389.420013	389.859985	396.529999	3348100.0	396.529999
1257	2021-04-21	401.476990	395.440002	398.859985	396.260010	662781.0	396.260010

	Date	High	Low	Open	Close	Volume	Adj Close	dayofWeek
0	2016-04-22	134.330002	133.050003	133.720001	134.130005	2782800.0	124.045631	4
1	2016-04-25	134.070007	132.589996	133.710007	133.779999	2702100.0	123.721939	0
2	2016-04-26	134.610001	133.000000	133.970001	134.240005	2638000.0	124.147400	1
3	2016-04-27	134.580002	131.899994	134.339996	132.800003	3358800.0	122.815636	2
4	2016-04-28	134.389999	131.690002	132.789993	132.070007	3332800.0	122.140526	3
5	2016-04-29	131.880005	128.929993	131.059998	131.679993	4161100.0	121.779816	4
6	2016-05-02	132.929993	130.919998	132.610001	132.100006	2887700.0	122.168297	0

	Date	High	Low	Open	Close	Volume	Adj Close	dayofWeek	change	Panic
0	2016-04-22	134.330002	133.050003	133.720001	134.130005	2782800.0	124.045631	4	0.003066	NaN
1	2016-04-25	134.070007	132.589996	133.710007	133.779999	2702100.0	123.721939	0	0.000523	NaN
2	2016-04-26	134.610001	133.000000	133.970001	134.240005	2638000.0	124.147400	1	0.002015	NaN
3	2016-04-27	134.580002	131.899994	134.339996	132.800003	3358800.0	122.815636	2	-0.011463	NaN
4	2016-04-28	134.389999	131.690002	132.789993	132.070007	3332800.0	122.140526	3	-0.005422	NaN

MAT411: Bayesian Data Analysis¶

Week 13¶

Stock predictions and develop models¶

Lets use some real math¶

Stocastic Volatility¶