import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import pandas as pd
import arviz as az
pm.__version__
'3.8'
dataX = pd.read_csv('data/KyotoFullFlower7.csv')
dataX.tail(300)
AD | Full-flowering date (DOY) | Full-flowering date | Source code | Data type code | Reference Name | Unnamed: 6 | |
---|---|---|---|---|---|---|---|
921 | 1722 | 100.0 | 410.0 | 4.0 | 1.0 | MOTOHIRO-KOKI | NaN |
922 | 1723 | 104.0 | 414.0 | 4.0 | 1.0 | KIMIAKI-KYOKI | NaN |
923 | 1724 | 99.0 | 408.0 | 3.0 | 2.0 | MYOHOIN-HINAMIKI | NaN |
924 | 1725 | 106.0 | 416.0 | 4.0 | 2.0 | MOTONAGA-KYOKI | NaN |
925 | 1726 | 116.0 | 426.0 | 3.0 | 2.0 | KAIKI | NaN |
... | ... | ... | ... | ... | ... | ... | ... |
1216 | 2017 | 100.0 | 409.0 | 8.0 | 0.0 | NEWS-PAPER(ARASHIYAMA) | NaN |
1217 | 2018 | 89.0 | 330.0 | 8.0 | 0.0 | NEWS-PAPER(ARASHIYAMA) | NaN |
1218 | 2019 | 95.0 | 405.0 | 8.0 | 0.0 | NEWS-PAPER(ARASHIYAMA) | NaN |
1219 | 2020 | 92.0 | 401.0 | 8.0 | 0.0 | NEWS-PAPER(ARASHIYAMA) | NaN |
1220 | 2021 | 85.0 | 326.0 | 8.0 | 0.0 | NEWS-PAPER(ARASHIYAMA) | NaN |
300 rows × 7 columns
dataX.head(20)
AD | Full-flowering date (DOY) | Full-flowering date | Source code | Data type code | Reference Name | Unnamed: 6 | |
---|---|---|---|---|---|---|---|
0 | 801 | NaN | NaN | NaN | NaN | - | NaN |
1 | 802 | NaN | NaN | NaN | NaN | - | NaN |
2 | 803 | NaN | NaN | NaN | NaN | - | NaN |
3 | 804 | NaN | NaN | NaN | NaN | - | NaN |
4 | 805 | NaN | NaN | NaN | NaN | - | NaN |
5 | 806 | NaN | NaN | NaN | NaN | - | NaN |
6 | 807 | NaN | NaN | NaN | NaN | - | NaN |
7 | 808 | NaN | NaN | NaN | NaN | - | NaN |
8 | 809 | NaN | NaN | NaN | NaN | - | NaN |
9 | 810 | NaN | NaN | NaN | NaN | - | NaN |
10 | 811 | NaN | NaN | NaN | NaN | - | NaN |
11 | 812 | 92.0 | 401.0 | 1.0 | 2.0 | NIHON-KOKI | NaN |
12 | 813 | NaN | NaN | NaN | NaN | - | NaN |
13 | 814 | NaN | NaN | NaN | NaN | - | NaN |
14 | 815 | 105.0 | 415.0 | 1.0 | 2.0 | NIHON-KOKI | NaN |
15 | 816 | NaN | NaN | NaN | NaN | - | NaN |
16 | 817 | NaN | NaN | NaN | NaN | - | NaN |
17 | 818 | NaN | NaN | NaN | NaN | - | NaN |
18 | 819 | NaN | NaN | NaN | NaN | - | NaN |
19 | 820 | NaN | NaN | NaN | NaN | - | NaN |
dataX.columns
dataX.columns=['year', 'day', 'Full-flowering date', 'Source code',
'Data type code', 'Reference Name', 'Unnamed: 6']
data=pd.DataFrame(dataX[['year','day']])
data=data.dropna()
plt.figure(figsize=(13,6))
plt.plot(data.year,data.day,'.')
[<matplotlib.lines.Line2D at 0x7fa3311a7b00>]
with pm.Model() as Blossoms:
x0 = pm.Normal('x0',mu=105, sd=10)
x1 = pm.Normal('x1', mu=0, sd=0.1)
day_est = x0 +x1*data.year
day_obs = pm.Normal('day', mu=day_est, sd=data.day.std(), observed = data.day)
trace = pm.sample(1000, tune=1000)
print(az.summary(trace))
az.plot_trace(trace)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [x1, x0] Sampling 4 chains, 0 divergences: 100%|██████████| 8000/8000 [00:07<00:00, 1079.26draws/s] The number of effective samples is smaller than 25% for some parameters.
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_mean ess_sd \ x0 106.748 1.173 104.497 108.823 0.044 0.031 707.0 707.0 x1 -0.001 0.001 -0.003 -0.000 0.000 0.000 723.0 723.0 ess_bulk ess_tail r_hat x0 717.0 565.0 1.00 x1 737.0 604.0 1.01
plt.figure(figsize=(13,6))
plt.plot(data.year,data.day,'.')
x0_m = trace['x0'].mean()
x1_m = trace['x1'].mean()
samples = range(0, len(trace), 20)
plt.plot(data.year, trace['x0'][samples] + trace['x1'][samples]*data.year[:, np.newaxis], c='grey', alpha=0.2)
plt.plot(data.year, x0_m + x1_m*data.year, c='r')
plt.show()
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/ipykernel_launcher.py:11: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. # This is added back by InteractiveShellApp.init_path()
ppc = pm.sample_posterior_predictive(trace,samples=200, model = Blossoms)
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/pymc3/sampling.py:1247: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample "samples parameter is smaller than nchains times ndraws, some draws " 100%|██████████| 200/200 [00:00<00:00, 1122.53it/s]
ppc
{'day': array([[113.27154232, 107.51471207, 107.15494642, ..., 103.25846907, 106.12749697, 105.06847638], [103.0599633 , 119.45620826, 103.86368706, ..., 99.10597738, 98.75799293, 96.61272484], [113.67478694, 115.32044901, 118.58034597, ..., 103.55215589, 104.28767806, 93.571078 ], ..., [107.83698212, 115.2622345 , 101.76246342, ..., 94.85417852, 108.09081089, 104.34680443], [ 97.58207008, 118.137261 , 113.17412267, ..., 109.34793861, 100.13346935, 87.95535835], [ 95.87702004, 107.50601767, 103.30857983, ..., 94.23318496, 110.99771909, 98.85158147]])}
graph = az.plot_hpd(data.year, ppc['day'], hdi_prob=0.9, color='green', figsize=(13,6))
plt.plot(data.year,data.day,'.')
x0_m = trace['x0'].mean()
x1_m = trace['x1'].mean()
plt.plot(data.year, x0_m + x1_m*data.year, c='r', label=f'y = {x0_m:.2f} + {x1_m:.5f} *x')
plt.legend()
plt.xlabel('year')
plt.ylabel('day')
plt.show()
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/arviz/stats/stats.py:487: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions FutureWarning,
with pm.Model() as Blossoms:
x0 = pm.Normal('x0',mu=105, sd=10)
x1 = pm.Normal('x1', mu=0, sd=0.1)
day_est = x0 +x1*data.year
day_obs = pm.Normal('day', mu=day_est, sd=data.day.std(), observed = data.day)
trace = pm.sample(1000, tune=1000)
print(az.summary(trace))
az.plot_trace(trace)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [x1, x0] Sampling 4 chains, 0 divergences: 100%|██████████| 8000/8000 [00:13<00:00, 600.05draws/s] The acceptance probability does not match the target. It is 0.7199633461801087, but should be close to 0.8. Try to increase the number of tuning steps. The number of effective samples is smaller than 25% for some parameters.
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_mean ess_sd \ x0 106.815 1.176 104.701 109.142 0.042 0.03 791.0 790.0 x1 -0.002 0.001 -0.003 -0.000 0.000 0.00 779.0 689.0 ess_bulk ess_tail r_hat x0 779.0 604.0 1.01 x1 769.0 650.0 1.01
with pm.Model() as Blossoms:
α = pm.Normal('α', mu=100, sd=10)
β = pm.Normal('β', mu=0 ,sd = 0.1)
ϵ = pm.HalfCauchy('ϵ', 10)
μ = pm.Deterministic('μ', α + β*data.year)
years_predict = pm.Normal('y_pred' , mu=μ, sd = ϵ, observed = data.day)
trace = pm.sample(2000, tune=1000)
az.plot_trace(trace, var_names= ['α','β','ϵ'])
plt.show()
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [ϵ, β, α] Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:19<00:00, 611.96draws/s]
plt.figure(figsize=(13,6))
plt.plot(data.year,data.day,'.')
alpha_m = trace['α'].mean()
beta_m = trace['β'].mean()
samples = range(0, len(trace), 20)
plt.plot(data.year, trace['α'][samples] + trace['β'][samples]*data.year[:, np.newaxis], c='grey', alpha=0.2)
plt.plot(data.year, alpha_m + beta_m*data.year, c='r')
plt.show()
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/ipykernel_launcher.py:9: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. if __name__ == '__main__':
graph = az.plot_hpd(data.year, trace['μ'], credible_interval = 0.95, color='k',figsize=(13,6))
plt.plot(data.year,data.day,'.')
plt.plot(data.year, alpha_m + beta_m*data.year, c='r')
plt.show()
az.plot_hpd(data.year, trace['μ'], credible_interval = 0.95, color='k',figsize=(13,6))
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/arviz/utils.py:661: UserWarning: Keyword argument credible_interval has been deprecated Please replace with hdi_prob ("Keyword argument credible_interval has been deprecated " "Please replace with hdi_prob"), /opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/arviz/stats/stats.py:487: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions FutureWarning,
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/arviz/utils.py:661: UserWarning: Keyword argument credible_interval has been deprecated Please replace with hdi_prob ("Keyword argument credible_interval has been deprecated " "Please replace with hdi_prob"), /opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/arviz/stats/stats.py:487: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions FutureWarning,
<AxesSubplot:>
Event=list(data.year.values).index(1600)
with pm.Model() as BlossomsS:
α = pm.Normal('α', mu=100, sd=10)
β = pm.Normal('β', mu=0 ,sd = 0.1)
ϵ = pm.HalfCauchy('ϵ', 10)
μ = pm.Deterministic('μ', α + β*data.year[:Event])
years_predict = pm.Normal('y_pred' , mu=μ, sd = ϵ, observed = data.day[:Event])
traceS = pm.sample(1000, tune=1000)
az.plot_trace(traceS, var_names= ['α','β','ϵ'])
plt.show()
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [ϵ, β, α] Sampling 4 chains, 0 divergences: 100%|██████████| 8000/8000 [00:13<00:00, 597.54draws/s]
Event=list(data.year.values).index(1600)
with pm.Model() as BlossomsE:
α = pm.Normal('α', mu=100, sd=10)
β = pm.Normal('β', mu=0 ,sd = 0.1)
ϵ = pm.HalfCauchy('ϵ', 10)
μ = pm.Deterministic('μ', α + β*data.year[Event:])
years_predict = pm.Normal('y_pred' , mu=μ, sd = ϵ, observed = data.day[Event:])
traceE = pm.sample(1000, tune=1000)
az.plot_trace(traceE, var_names= ['α','β','ϵ'])
plt.show()
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [ϵ, β, α] Sampling 4 chains, 0 divergences: 100%|██████████| 8000/8000 [00:26<00:00, 304.97draws/s]
plt.figure(figsize=(13,6))
plt.plot(data.year,data.day,'.')
alphaS_m = traceS['α'].mean()
betaS_m = traceS['β'].mean()
samples = range(0, len(traceS), 20)
plt.plot(data.year, traceS['α'][samples] + traceS['β'][samples]*data.year[:, np.newaxis], c='green', alpha=0.2)
plt.plot(data.year, alphaS_m + betaS_m*data.year, c='black')
alphaE_m = traceE['α'].mean()
betaE_m = traceE['β'].mean()
samples = range(0, len(traceE), 20)
plt.plot(data.year, traceE['α'][samples] + traceE['β'][samples]*data.year[:, np.newaxis], c='red', alpha=0.2)
plt.plot(data.year, alphaE_m + betaE_m*data.year, c='black')
plt.show()
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/ipykernel_launcher.py:9: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. if __name__ == '__main__': /opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/ipykernel_launcher.py:16: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. app.launch_new_instance()
switch_point = np.random.randint(300,700)
m1,m2 = 9,10
sd1,sd2 = 1.5,1.5
data = list(stats.norm.rvs(m1,sd1,switch_point))+list(stats.norm.rvs(m2,sd2,1000-switch_point))
print(switch_point)
plt.figure(figsize=(13,6))
plt.scatter(range(len(data)), data,alpha=0.4)
plt.show()
546
μ1 = np.mean(data[len(data)//3:])
μ2 = np.mean(data[:-len(data)//3])
ϵ1 = np.std(data[len(data)//3:])
ϵ2 = np.std(data[:-len(data)//3])
print(switch_point)
with pm.Model() as switcheroo:
sw = pm.DiscreteUniform('switch', lower=len(data)//4 , upper = len(data)-len(data)//4)
first_data = pm.Normal('d1', mu=μ1 , sd= ϵ1)
second_data = pm.Normal('d2', mu=μ2, sd = ϵ2)
idx = np.arange(1000)
vals = pm.math.switch(sw >= idx, first_data, second_data)
obs_vals = pm.Poisson("my_data", vals, observed=data)
tracesw = pm.sample(1000)
az.plot_trace(tracesw)
print(az.summary(tracesw))
546
Multiprocess sampling (4 chains in 4 jobs) CompoundStep >Metropolis: [switch] >NUTS: [d2, d1] Sampling 4 chains, 0 divergences: 100%|██████████| 6000/6000 [00:04<00:00, 1334.02draws/s] The number of effective samples is smaller than 10% for some parameters.
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_mean \ switch 554.224 38.473 497.000 637.000 2.093 1.482 338.0 d1 8.625 0.130 8.383 8.871 0.003 0.002 2440.0 d2 9.535 0.154 9.223 9.805 0.004 0.003 1604.0 ess_sd ess_bulk ess_tail r_hat switch 338.0 381.0 295.0 1.01 d1 2440.0 2439.0 2422.0 1.00 d2 1604.0 1599.0 2267.0 1.01
dataX.columns
dataX.columns=['year', 'day', 'Full-flowering date', 'Source code',
'Data type code', 'Reference Name', 'Unnamed: 6']
data=pd.DataFrame(dataX[['year','day']])
data=data.dropna()
Event=list(data.year.values).index(1600)
with pm.Model() as BlossomsS:
α = pm.Normal('α', mu=100, sd=10)
β = pm.Normal('β', mu=0 ,sd = 0.1)
ϵ = pm.HalfCauchy('ϵ', 10)
μ = pm.Deterministic('μ', α + β*data.year[:Event])
years_predict = pm.Normal('y_pred' , mu=μ, sd = ϵ, observed = data.day[:Event])
traceS = pm.sample(1000, tune=1000)
az.plot_trace(traceS, var_names= ['α','β','ϵ'])
plt.show()
with pm.Model() as BlossomsE:
α = pm.Normal('α', mu=100, sd=10)
β = pm.Normal('β', mu=0 ,sd = 0.1)
ϵ = pm.HalfCauchy('ϵ', 10)
μ = pm.Deterministic('μ', α + β*data.year[Event:])
years_predict = pm.Normal('y_pred' , mu=μ, sd = ϵ, observed = data.day[Event:])
traceE = pm.sample(1000, tune=1000)
az.plot_trace(traceE, var_names= ['α','β','ϵ'])
plt.show()
plt.figure(figsize=(13,6))
plt.plot(data.year,data.day,'.')
alphaS_m = traceS['α'].mean()
betaS_m = traceS['β'].mean()
samples = range(0, len(traceS), 20)
plt.plot(data.year, traceS['α'][samples] + traceS['β'][samples]*data.year[:, np.newaxis], c='green', alpha=0.2)
plt.plot(data.year, alphaS_m + betaS_m*data.year, c='black')
alphaE_m = traceE['α'].mean()
betaE_m = traceE['β'].mean()
samples = range(0, len(traceE), 20)
plt.plot(data.year, traceE['α'][samples] + traceE['β'][samples]*data.year[:, np.newaxis], c='red', alpha=0.2)
plt.plot(data.year, alphaE_m + betaE_m*data.year, c='black')
plt.show()
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [ϵ, β, α] Sampling 4 chains, 0 divergences: 100%|██████████| 8000/8000 [00:12<00:00, 636.47draws/s]
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [ϵ, β, α] Sampling 4 chains, 0 divergences: 100%|██████████| 8000/8000 [00:32<00:00, 243.02draws/s]
/opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/ipykernel_launcher.py:49: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead. /opt/anaconda3/envs/pymc3_3_6/lib/python3.6/site-packages/ipykernel_launcher.py:56: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead.
with pm.Model() as Blossom_switch:
switchpoint = pm.DiscreteUniform('switchpoint', lower=1600, upper=2020)
α1 = pm.Normal('α1', mu=alphaS_m, sd=1)
β1 = pm.Normal('β1', mu=betaS_m ,sd = 0.051)
α2 = pm.Normal('α2', mu=alphaE_m, sd=1)
β2 = pm.Normal('β2', mu=betaE_m ,sd = 0.051)
alpha = pm.math.switch(switchpoint>=data.year, α1,α2)
beta = pm.math.switch(switchpoint >=data.year, β1, β2)
likelihood = pm.Normal('days', mu= alpha + beta*data.year, observed = data.year)
start = pm.find_MAP()
trace= pm.sample(3000, start = start, tune = 2000, chains = 4)
az.plot_trace(trace)
print(az.summary(trace))
logp = -62,895, ||grad|| = 7.7364e+06: 100%|██████████| 31/31 [00:00<00:00, 1301.76it/s] Multiprocess sampling (4 chains in 4 jobs) CompoundStep >Metropolis: [switchpoint] >NUTS: [β2, α2, β1, α1] Sampling 4 chains, 0 divergences: 100%|██████████| 20000/20000 [00:32<00:00, 616.11draws/s] The number of effective samples is smaller than 10% for some parameters.
mean sd hdi_3% hdi_97% mcse_mean mcse_sd \ switchpoint 1997.422 6.712 1986.000 2010.000 0.213 0.150 α1 3.316 0.193 2.960 3.681 0.003 0.002 β1 0.998 0.000 0.998 0.998 0.000 0.000 α2 132.648 1.035 130.777 134.615 0.015 0.010 β2 0.934 0.001 0.933 0.935 0.000 0.000 ess_mean ess_sd ess_bulk ess_tail r_hat switchpoint 997.0 997.0 1000.0 1755.0 1.0 α1 3802.0 3802.0 3800.0 4496.0 1.0 β1 3530.0 3530.0 3529.0 4635.0 1.0 α2 4883.0 4881.0 4885.0 4322.0 1.0 β2 4338.0 4338.0 4337.0 4235.0 1.0