MAT411 Bayesian Analsysis:¶

Week 8¶

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import Pythonic_files as pf


%matplotlib inline
%config InlineBackend.figure_format='retina'

Lets look at the unfair coin "one more time"¶

again lets generate an experiment

flips = 20
Heads_prob = np.random.random()
num_heads = stats.binom(flips,Heads_prob).rvs()
num_tails = flips-num_heads
print(Heads_prob,num_heads,num_tails)

0.9182723445124208 18 2

pf.nCk(3,1)

3.0

Metropolis Algorithm¶

We want to set up a random walk through the probability space 0, 1

Steps¶

Start somewhere

generate a random move within the allowable space

Find the Odds or moving = prob(new)/prob(old)

Generate a random number and check to see if the move happens

Rinse and repeat

p_1 = 0.3

step_size =0.1

p_walk = np.random.normal(0,step_size,1)[0]

p_walk

-0.027979088082813304

MCMCp1 = [p_1]

while len(MCMCp1)<100:
    p_walk = np.random.normal(0,step_size,1)[0]
    
    if p_walk+p_1>0 and p_walk+p_1<1:
        p1_jump = p_1+p_walk
    else:
        p1_jump = p_1
        
    prob_old = pf.nCk(flips,num_heads)*(p_1)**num_heads*(1-p_1)**num_tails
    prob_new = pf.nCk(flips,num_heads)*(p1_jump)**num_heads*(1-p1_jump)**num_tails
    
    prob_move= prob_new/prob_old
    
    if min(1,prob_move)>=np.random.random():
        p_1 = p1_jump
        
    MCMCp1.append(p_1)

plt.figure(figsize=(13,6))
plt.plot(MCMCp1,'.-')
plt.title('MCMC walk with a probability of '+str(Heads_prob))
plt.show()

lets put it all together¶

flips = 20
Heads_prob = np.random.random()
num_heads = stats.binom(flips,Heads_prob).rvs()
num_tails = flips-num_heads

p_1 = 0.5

step_size =0.3

MCMCp1 = [p_1]

while len(MCMCp1)<10000:
    p_walk = np.random.normal(0,step_size,1)[0]
    
    if p_walk+p_1>0 and p_walk+p_1<1:
        p1_jump = p_1+p_walk
    else:
        p1_jump = p_1
        
    prob_old = pf.nCk(flips,num_heads)*(p_1)**num_heads*(1-p_1)**num_tails
    prob_new = pf.nCk(flips,num_heads)*(p1_jump)**num_heads*(1-p1_jump)**num_tails
    
    prob_move= prob_new/prob_old
    
    if min(1,prob_move)>=np.random.random():
        p_1 = p1_jump
        
    MCMCp1.append(p_1)
    

plt.figure(figsize=(13,6))
plt.plot(MCMCp1,',-', alpha =0.2)
plt.hlines(Heads_prob,0,len(MCMCp1),color='r')
plt.hlines(num_heads/flips,0,len(MCMCp1),color = 'green')
plt.title('MCMC walk with a probability of '+str(Heads_prob))
plt.ylim(0,1)
print('Heads/flips = ',num_heads/flips)
print("heads, tails = ",num_heads, num_tails)


plt.show()

Heads/flips =  0.25
heads, tails =  5 15

sns.displot(MCMCp1)

<seaborn.axisgrid.FacetGrid at 0x7f95b1aecbb0>

np.mean(MCMCp1)

0.2726767512686508

flips = 20
Heads_prob = np.random.random()
num_heads = stats.binom(flips,Heads_prob).rvs(20)

num_heads

array([2, 6, 4, 7, 3, 7, 3, 5, 7, 8, 2, 6, 9, 4, 3, 5, 5, 4, 4, 6])

Lets look at this MCMC method for linear regressions¶

intercept = 5
slope = 2
num_points = 200
error = 2

x= np.random.random(num_points)

y = intercept + slope*x + np.random.normal(0,error, num_points)
plt.figure(figsize=(13,6))
plt.scatter(x,y,alpha=0.5)
data = np.array([x,y])

np.polyfit(x,y,1)

array([2.55569334, 4.86789977])

slope_range = (data[1].max()-data[1].min())/(data[0].max()-data[0].min())
slope_range = slope_range/3

y_bar = data[1].mean()
x_bar = data[0].mean()

intercept_guess = y_bar-x_bar*slope_range*3
intercept_range = slope_range

def prior_prob(position):
    I = position[0]
    S = position[1]
    
    prior_I = stats.norm(intercept_guess,intercept_range).pdf(I)
    prior_S = stats.norm(0,slope_range).pdf(S)
    
    return np.log(prior_I)+ np.log(prior_S)

def likelihood_prob(position):
    I = position[0]
    S = position[1]
    
    y_like = I + S*data[0]
    
    prob_like_space = stats.norm(y_like, 2).pdf(data[1])
    
    return np.sum(np.log(prob_like_space))

def posterior_prob(position):
    return prior_prob(position) + likelihood_prob(position)

def jump_position(position, step_size):
    I = position[0]
    S = position[1]
    
    jump_I = stats.norm(I,step_size).rvs()
    jump_S = stats.norm(S,step_size).rvs()
    
    return [jump_I,jump_S]

initial_position = [4,7]
results = [initial_position]
step_size = 0.3
while len(results) <5000:
    old_position = results[-1]
    
    test_position = jump_position(old_position,step_size)
    
    jump_prob = np.exp(posterior_prob(test_position)-posterior_prob(old_position))
    
    if min(1,jump_prob)>= np.random.random():
        results.append(test_position)
        
    else:
        results.append(old_position)

results = np.array(results)

plt.figure(figsize=(13,6))
plt.plot(results[:300])
plt.show()

burn_in = int(len(results)*.1//1)

plt.figure(figsize=(13,6))
plt.plot(results[burn_in:])
plt.show()

plt.figure(figsize=(13,6))
sns.histplot(results[burn_in:,0], color='red', label = 'intercept')
sns.histplot(results[burn_in:,1], label = 'slope')
plt.legend()

<matplotlib.legend.Legend at 0x7f9591ecaf10>

num_samples = 50

plt.figure(figsize=(13,6))

samples_I = np.random.choice(results[burn_in:,0],num_samples, replace = False)
samples_S = np.random.choice(results[burn_in:,1],num_samples, replace = False)

for l in range(0,len(samples_I)):
    y_line = samples_I[l] + samples_S[l]*data[0]
    
    plt.plot(data[0],y_line,'-',c='grey', alpha =0.3)


    
y_projected = np.mean(results[burn_in:,0]) + np.mean(results[burn_in:,1])*data[0]

plt.plot(data[0],y_projected,'-',c='r')
plt.plot(data[0],data[1],'.',alpha=0.2,c='b')

plt.show()

print(np.mean(results[burn_in:,0]))
print(np.mean(results[burn_in:,1]))

4.806689741558933
2.647171780154239

plt.plot(results[burn_in:,0],results[burn_in:,1],'.-',alpha = 0.2)

[<matplotlib.lines.Line2D at 0x7f955b1de4f0>]