MAT411 Bayesian Analsysis:

Week 7 part 2

In [6]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import Pythonic_files as pf
import pandas as pd


%matplotlib inline
%config InlineBackend.figure_format='retina'

Lets look at distributions with multiple variable

In [5]:
x,y =pf.elipse_noise(10000,17,(20,10),(100,200))
plt.figure(figsize=(13,6))

plt.scatter(x,y, alpha=0.051)
Out[5]:
<matplotlib.collections.PathCollection at 0x7fc1f8605250>
In [16]:
dataX = pd.DataFrame([x,y], index=None)
dataX=dataX.T
dataX.columns=['x','y']
dataX.head()
Out[16]:
x y
0 100.157411 199.696415
1 100.457483 200.908437
2 101.736017 201.165543
3 100.851441 201.212750
4 99.640435 200.554714
In [22]:
plt.figure(figsize=(13,6))
sns.scatterplot(x='x',y='y',data=dataX, alpha = 0.52)
Out[22]:
<AxesSubplot:xlabel='x', ylabel='y'>
In [23]:
dataX.describe()
Out[23]:
x y
count 10000.000000 10000.000000
mean 99.988729 199.990519
std 1.008234 1.008250
min 95.990989 196.133406
25% 99.320238 199.321559
50% 100.003574 199.999182
75% 100.663912 200.670484
max 104.559338 204.281967
In [27]:
plt.figure(figsize=(13,6))
sns.kdeplot(x='x',y='y',data=dataX, n_levels=50, shade=True, cmap='RdBu_r',cbar=True,thresh = 0)
plt.show()
In [29]:
plt.figure(figsize=(13,6))
sns.jointplot(x='x',y='y',data=dataX, kind='hex')
plt.show()
<Figure size 936x432 with 0 Axes>
In [30]:
sns.pairplot(data=dataX)
plt.show()

Lets look how we would look at the distributions of the graphs

In [32]:
x,y =pf.elipse_noise(10000,17,(20,10),(100,200))
plt.figure(figsize=(13,6))

plt.scatter(x,y, alpha=0.051)
Out[32]:
<matplotlib.collections.PathCollection at 0x7fc2018358b0>
In [36]:
sns.displot(dataX.y, kind='kde')
Out[36]:
<seaborn.axisgrid.FacetGrid at 0x7fc200a8ba30>
In [53]:
sample_size = 500
#np.random.choice(x,sample_size,replace=False)
samples = np.random.randint(0,len(x),sample_size)

plt.figure(figsize=(13,6))

plt.scatter(x,y, alpha=0.051)
plt.plot(x[samples],y[samples], 'x', c='orange')


plt.show()
In [54]:
sns.distplot(x)
sns.distplot(x[samples])
/opt/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/opt/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
Out[54]:
<AxesSubplot:ylabel='Density'>
In [58]:
plt.figure(figsize=(26,12))
plt.subplot(2,2,1)
plt.plot(x,y,'.',alpha=0.15)
plt.xlim(95,105)
plt.ylim(195,205)
plt.subplot(2,2,2)
plt.plot(x[samples],y[samples],'x',alpha=0.55, c='orange')
plt.xlim(95,105)
plt.ylim(195,205)
plt.subplot(2,2,3)
plt.hist(x,bins=20)
plt.xlim(95,105)

plt.subplot(2,2,4)
plt.hist(x[samples],bins=20)
plt.xlim(95,105)

plt.show()
In [60]:
print(np.mean(x),np.mean(x[samples]))
99.99425202735536 99.99555185927241
In [ ]: