MAT411 Bayesian Analsysis:

Week 8

In [13]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import Pythonic_files as pf


%matplotlib inline
%config InlineBackend.figure_format='retina'

Lets look at the unfair coin "one more time"

again lets generate an experiment

In [12]:
flips = 20
Heads_prob = np.random.random()
num_heads = stats.binom(flips,Heads_prob).rvs()
num_tails = flips-num_heads
print(Heads_prob,num_heads,num_tails)
0.9182723445124208 18 2
In [16]:
pf.nCk(3,1)
Out[16]:
3.0

Metropolis Algorithm

We want to set up a random walk through the probability space 0, 1

Steps

  • Start somewhere
  • generate a random move within the allowable space
  • Find the Odds or moving = prob(new)/prob(old)
  • Generate a random number and check to see if the move happens
  • Rinse and repeat
  • In [17]:
    p_1 = 0.3
    
    step_size =0.1
    
    p_walk = np.random.normal(0,step_size,1)[0]
    
    In [18]:
    p_walk
    
    Out[18]:
    -0.027979088082813304
    In [20]:
    MCMCp1 = [p_1]
    
    while len(MCMCp1)<100:
        p_walk = np.random.normal(0,step_size,1)[0]
        
        if p_walk+p_1>0 and p_walk+p_1<1:
            p1_jump = p_1+p_walk
        else:
            p1_jump = p_1
            
        prob_old = pf.nCk(flips,num_heads)*(p_1)**num_heads*(1-p_1)**num_tails
        prob_new = pf.nCk(flips,num_heads)*(p1_jump)**num_heads*(1-p1_jump)**num_tails
        
        prob_move= prob_new/prob_old
        
        if min(1,prob_move)>=np.random.random():
            p_1 = p1_jump
            
        MCMCp1.append(p_1)
        
    
    In [24]:
    plt.figure(figsize=(13,6))
    plt.plot(MCMCp1,'.-')
    plt.title('MCMC walk with a probability of '+str(Heads_prob))
    plt.show()
    

    lets put it all together

    In [54]:
    flips = 20
    Heads_prob = np.random.random()
    num_heads = stats.binom(flips,Heads_prob).rvs()
    num_tails = flips-num_heads
    
    p_1 = 0.5
    
    step_size =0.3
    
    MCMCp1 = [p_1]
    
    while len(MCMCp1)<10000:
        p_walk = np.random.normal(0,step_size,1)[0]
        
        if p_walk+p_1>0 and p_walk+p_1<1:
            p1_jump = p_1+p_walk
        else:
            p1_jump = p_1
            
        prob_old = pf.nCk(flips,num_heads)*(p_1)**num_heads*(1-p_1)**num_tails
        prob_new = pf.nCk(flips,num_heads)*(p1_jump)**num_heads*(1-p1_jump)**num_tails
        
        prob_move= prob_new/prob_old
        
        if min(1,prob_move)>=np.random.random():
            p_1 = p1_jump
            
        MCMCp1.append(p_1)
        
    
    plt.figure(figsize=(13,6))
    plt.plot(MCMCp1,',-', alpha =0.2)
    plt.hlines(Heads_prob,0,len(MCMCp1),color='r')
    plt.hlines(num_heads/flips,0,len(MCMCp1),color = 'green')
    plt.title('MCMC walk with a probability of '+str(Heads_prob))
    plt.ylim(0,1)
    print('Heads/flips = ',num_heads/flips)
    print("heads, tails = ",num_heads, num_tails)
    
    
    plt.show()
    
    Heads/flips =  0.25
    heads, tails =  5 15
    
    In [56]:
    sns.displot(MCMCp1)
    
    Out[56]:
    <seaborn.axisgrid.FacetGrid at 0x7f95b1aecbb0>
    In [57]:
    np.mean(MCMCp1)
    
    Out[57]:
    0.2726767512686508
    In [59]:
    flips = 20
    Heads_prob = np.random.random()
    num_heads = stats.binom(flips,Heads_prob).rvs(20)
    
    num_heads
    
    Out[59]:
    array([2, 6, 4, 7, 3, 7, 3, 5, 7, 8, 2, 6, 9, 4, 3, 5, 5, 4, 4, 6])

    Lets look at this MCMC method for linear regressions

    In [70]:
    intercept = 5
    slope = 2
    num_points = 200
    error = 2
    
    x= np.random.random(num_points)
    
    y = intercept + slope*x + np.random.normal(0,error, num_points)
    plt.figure(figsize=(13,6))
    plt.scatter(x,y,alpha=0.5)
    data = np.array([x,y])
    
    In [71]:
    np.polyfit(x,y,1)
    
    Out[71]:
    array([2.55569334, 4.86789977])
    In [78]:
    slope_range = (data[1].max()-data[1].min())/(data[0].max()-data[0].min())
    slope_range = slope_range/3
    
    y_bar = data[1].mean()
    x_bar = data[0].mean()
    
    intercept_guess = y_bar-x_bar*slope_range*3
    intercept_range = slope_range
    
    In [81]:
    def prior_prob(position):
        I = position[0]
        S = position[1]
        
        prior_I = stats.norm(intercept_guess,intercept_range).pdf(I)
        prior_S = stats.norm(0,slope_range).pdf(S)
        
        return np.log(prior_I)+ np.log(prior_S)
    
    In [87]:
    def likelihood_prob(position):
        I = position[0]
        S = position[1]
        
        y_like = I + S*data[0]
        
        prob_like_space = stats.norm(y_like, 2).pdf(data[1])
        
        return np.sum(np.log(prob_like_space))
        
    
    In [93]:
    def posterior_prob(position):
        return prior_prob(position) + likelihood_prob(position)
    
    In [94]:
    def jump_position(position, step_size):
        I = position[0]
        S = position[1]
        
        jump_I = stats.norm(I,step_size).rvs()
        jump_S = stats.norm(S,step_size).rvs()
        
        return [jump_I,jump_S]
    
    In [103]:
    initial_position = [4,7]
    results = [initial_position]
    step_size = 0.3
    while len(results) <5000:
        old_position = results[-1]
        
        test_position = jump_position(old_position,step_size)
        
        jump_prob = np.exp(posterior_prob(test_position)-posterior_prob(old_position))
        
        if min(1,jump_prob)>= np.random.random():
            results.append(test_position)
            
        else:
            results.append(old_position)
    
    In [104]:
    results = np.array(results)
    
    In [111]:
    plt.figure(figsize=(13,6))
    plt.plot(results[:300])
    plt.show()
    
    In [112]:
    burn_in = int(len(results)*.1//1)
    
    In [113]:
    plt.figure(figsize=(13,6))
    plt.plot(results[burn_in:])
    plt.show()
    
    In [118]:
    plt.figure(figsize=(13,6))
    sns.histplot(results[burn_in:,0], color='red', label = 'intercept')
    sns.histplot(results[burn_in:,1], label = 'slope')
    plt.legend()
    
    Out[118]:
    <matplotlib.legend.Legend at 0x7f9591ecaf10>
    In [124]:
    num_samples = 50
    
    plt.figure(figsize=(13,6))
    
    samples_I = np.random.choice(results[burn_in:,0],num_samples, replace = False)
    samples_S = np.random.choice(results[burn_in:,1],num_samples, replace = False)
    
    for l in range(0,len(samples_I)):
        y_line = samples_I[l] + samples_S[l]*data[0]
        
        plt.plot(data[0],y_line,'-',c='grey', alpha =0.3)
    
    
        
    y_projected = np.mean(results[burn_in:,0]) + np.mean(results[burn_in:,1])*data[0]
    
    plt.plot(data[0],y_projected,'-',c='r')
    plt.plot(data[0],data[1],'.',alpha=0.2,c='b')
    
    plt.show()
    
    In [127]:
    print(np.mean(results[burn_in:,0]))
    print(np.mean(results[burn_in:,1]))
    
    4.806689741558933
    2.647171780154239
    
    In [131]:
    plt.plot(results[burn_in:,0],results[burn_in:,1],'.-',alpha = 0.2)
    
    Out[131]:
    [<matplotlib.lines.Line2D at 0x7f955b1de4f0>]
    In [ ]: