import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format='retina' #Sharp graphs, higher resolution
x = np.random.random(100)
y=np.random.random([300,500])
x
array([0.72849772, 0.28634765, 0.51250514, 0.12223006, 0.20499995,
0.97943066, 0.89593034, 0.93264747, 0.6990873 , 0.4476208 ,
0.17922023, 0.88086146, 0.90034927, 0.85839652, 0.36774522,
0.92775528, 0.1990322 , 0.70765016, 0.98386024, 0.6303022 ,
0.33444369, 0.46200633, 0.67329615, 0.04208176, 0.01933778,
0.64850699, 0.65444804, 0.50877939, 0.35702145, 0.74000317,
0.26780376, 0.18268975, 0.64709331, 0.45530489, 0.21019303,
0.07332238, 0.69304089, 0.0820179 , 0.2799592 , 0.33082313,
0.58428732, 0.09025344, 0.7034371 , 0.73103803, 0.50399405,
0.55354515, 0.62670808, 0.69993811, 0.03142535, 0.04919653,
0.59894273, 0.29196858, 0.20118783, 0.41229857, 0.53640477,
0.57474109, 0.17812323, 0.33802916, 0.38649237, 0.48716238,
0.19490078, 0.94208024, 0.14982987, 0.45643282, 0.12844383,
0.58477759, 0.64205975, 0.92245012, 0.65224673, 0.04545392,
0.92779547, 0.88744108, 0.21585594, 0.36714833, 0.44339485,
0.17460866, 0.41274068, 0.65362162, 0.43761918, 0.26878401,
0.29845683, 0.51557875, 0.52447941, 0.8529765 , 0.61862564,
0.76241719, 0.50328707, 0.28306359, 0.44934877, 0.0953664 ,
0.39526117, 0.2019546 , 0.97922837, 0.66848224, 0.88907361,
0.65501013, 0.26024173, 0.5920493 , 0.16029675, 0.51547966])
plt.hist(x)
plt.show()
bob=plt.hist(x)
bob[0][2]
9.0
x = np.random.random(1000000)
g=plt.hist(x,1000)
sns.histplot(x)
<AxesSubplot:ylabel='Count'>
x=np.random.random(1000)
sns.displot(x,kind="kde")
<seaborn.axisgrid.FacetGrid at 0x7f995b329b20>
sns.histplot(x,cumulative=True)
<AxesSubplot:ylabel='Count'>
x=np.random.normal(3,1,1000)
plt.plot(x,'.')
[<matplotlib.lines.Line2D at 0x7f9959e12b50>]
sns.histplot(x,cumulative=True)
<AxesSubplot:ylabel='Count'>
sns.boxplot(data=x, orient='h')
<AxesSubplot:>
np.mean(x)
2.9873071782799387
np.std(x)
0.9970066328090376
np.median(x)
3.011721881611428
np.var(x)
0.994022225865215
np.ptp(x)
6.208458175832762
np.amin(x)
-0.3795143648460684
np.min(x)
-0.3795143648460684
np.percentile(x,27)
2.365205037359119
x=np.random.randint(10,100,2000)
r=np.random.randint(1,10)
w = np.random.random(2000)
n = np.random.normal(0,1,2000)
z=r*x + w*n*x
z
array([117.73868384, 287.99460851, 319.70747003, ..., 310.588065 ,
39.20358754, 300.62657006])
plt.plot(x,z,'.',alpha=0.052)
[<matplotlib.lines.Line2D at 0x7f9969948f70>]
dataX=pd.DataFrame(x,z)
dataX.head()
| 0 | |
|---|---|
| 117.738684 | 27 |
| 287.994609 | 77 |
| 319.707470 | 84 |
| 182.845268 | 52 |
| 175.606768 | 44 |
dataX=dataX.reset_index()
dataX.head()
| index | 0 | |
|---|---|---|
| 0 | 117.738684 | 27 |
| 1 | 287.994609 | 77 |
| 2 | 319.707470 | 84 |
| 3 | 182.845268 | 52 |
| 4 | 175.606768 | 44 |
dataX.columns =['z','x']
dataX.head()
| z | x | |
|---|---|---|
| 0 | 117.738684 | 27 |
| 1 | 287.994609 | 77 |
| 2 | 319.707470 | 84 |
| 3 | 182.845268 | 52 |
| 4 | 175.606768 | 44 |
dataX.tail(20)
| z | x | |
|---|---|---|
| 1980 | 279.340770 | 70 |
| 1981 | 197.818556 | 49 |
| 1982 | 82.450744 | 21 |
| 1983 | 80.472200 | 31 |
| 1984 | 376.640677 | 88 |
| 1985 | 290.713836 | 58 |
| 1986 | 396.367726 | 88 |
| 1987 | 36.453017 | 12 |
| 1988 | 50.136872 | 12 |
| 1989 | 207.226602 | 53 |
| 1990 | 210.339484 | 53 |
| 1991 | 78.749481 | 20 |
| 1992 | 338.592799 | 90 |
| 1993 | 77.569325 | 19 |
| 1994 | 237.376062 | 70 |
| 1995 | 346.193869 | 83 |
| 1996 | 229.938570 | 64 |
| 1997 | 310.588065 | 76 |
| 1998 | 39.203588 | 10 |
| 1999 | 300.626570 | 64 |
dataX.describe()
| z | x | |
|---|---|---|
| count | 2000.000000 | 2000.000000 |
| mean | 213.523321 | 53.601500 |
| std | 107.747149 | 25.817959 |
| min | 26.955958 | 10.000000 |
| 25% | 118.727254 | 30.000000 |
| 50% | 210.185950 | 53.000000 |
| 75% | 296.533153 | 75.000000 |
| max | 543.385580 | 99.000000 |
sns.boxplot(data=dataX)
<AxesSubplot:>
sns.scatterplot(x='x',y='z',data=dataX)
<AxesSubplot:xlabel='x', ylabel='z'>
plt.scatter(dataX.x,dataX.z,alpha=0.5)
<matplotlib.collections.PathCollection at 0x7f996999a3a0>
sns.jointplot(x='x',y='z',data=dataX)
<seaborn.axisgrid.JointGrid at 0x7f993c772c40>
sns.pairplot(dataX)
<seaborn.axisgrid.PairGrid at 0x7f993ce69bb0>
sns.lmplot(x='x',y='z',data=dataX)
<seaborn.axisgrid.FacetGrid at 0x7f99697dcaf0>
sns.regplot(x='x',y='z',data=dataX)
<AxesSubplot:xlabel='x', ylabel='z'>
sns.kdeplot(x='x',y='z',data=dataX,cmap='Reds',shade=True)
<AxesSubplot:xlabel='x', ylabel='z'>
dataX=pd.read_csv('data/110Statdata.csv')
dataX.head(20)
| Q | M | M .1 | M .2 | M .3 | M .4 | F | F.1 | F.2 | F.3 | F.4 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 71.0 | 67.0 | 75 | 72.0 | 72.0 | 69.0 | 70.0 | 67 | 61 | 67 |
| 1 | 2 | 162.0 | 140.0 | 177 | 210.0 | 180.0 | 136.0 | 162.0 | 152 | 135 | 150 |
| 2 | 3 | 10.0 | 11.0 | 12 | 11.0 | 11.0 | 9.0 | 10.0 | 8 | 8 | 7 |
| 3 | 4 | 35.0 | 7.0 | 3 | 11.0 | 20.0 | 63.0 | 1.0 | 5 | 9 | 8 |
| 4 | 5 | 7.0 | 28.0 | 3 | 14.0 | 15.0 | 21.0 | 0.0 | 3 | 7 | 3 |
| 5 | 1 | 70.0 | 70.0 | 72 | 71.0 | 71.0 | 68.0 | 65.0 | 62 | 64 | 64 |
| 6 | 2 | 140.0 | 175.0 | 190 | 240.0 | 280.0 | 124.0 | 140.0 | 110 | 147 | 125 |
| 7 | 3 | 10.0 | 8.0 | 11 | 11.0 | 11.0 | 7.0 | 9.0 | 7.5 | 8 | 6.5 |
| 8 | 4 | 14.0 | 2.0 | 2 | 6.0 | 10.0 | 4.0 | 5.0 | 1 | 10 | 20 |
| 9 | 5 | 5.0 | 0.0 | 20 | 12.0 | 10.0 | 10.0 | 2.0 | 1 | 28 | 3 |
| 10 | 1 | 69.0 | 70.0 | 76 | 69.0 | 68.0 | 69.0 | 63.0 | 64 | 60 | 60 |
| 11 | 2 | 190.0 | 175.0 | 335 | 225.0 | 136.0 | 140.0 | 130.0 | 135 | 115 | 135 |
| 12 | 3 | 11.0 | 12.0 | 13 | 11.0 | 10.0 | 11.0 | 7.0 | 9 | 7 | 7 |
| 13 | 4 | 6.0 | 20.0 | 7 | 6.0 | 7.0 | 6.0 | 5.0 | 1 | 42 | 10 |
| 14 | 5 | 0.0 | 48.0 | 20 | 10.0 | 0.0 | 0.0 | 0.0 | 2 | 0 | 2 |
| 15 | 1 | 72.0 | 71.0 | 73 | 75.0 | 71.0 | 65.0 | 68.0 | 65 | 66 | 66 |
| 16 | 2 | 310.0 | 140.0 | 180 | 200.0 | 160.0 | 140.0 | 130.0 | 145 | 140 | 150 |
| 17 | 3 | 13.0 | 11.0 | 12 | 13.0 | 11.0 | 8.0 | 7.5 | 8 | 7.5 | 7.5 |
| 18 | 4 | 26.0 | 2.0 | 5 | 10.0 | 4.0 | 5.0 | 10.0 | 7 | 12 | 20 |
| 19 | 5 | 20.0 | 8.0 | 5 | 15.0 | 3.0 | 12.0 | 8.0 | 3 | 3 | 15 |
dataX.tail()
| Q | M | M .1 | M .2 | M .3 | M .4 | F | F.1 | F.2 | F.3 | F.4 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 820 | 1 | 71.0 | 70.0 | 71 | 72.0 | 68.0 | 71.0 | 65.0 | 64 | 70 | 73 |
| 821 | 2 | 149.0 | 164.0 | 127 | 213.0 | 180.0 | 148.0 | 110.0 | 116 | 134 | 185 |
| 822 | 3 | 12.0 | 11.5 | 10 | 12.0 | 11.0 | 6.0 | 4.0 | 5 | 8 | 7 |
| 823 | 4 | 15.0 | 30.0 | 25 | 0.0 | 30.0 | 15.0 | 5.0 | 5 | 15 | 30 |
| 824 | 5 | 5.0 | 11.0 | 0 | 0.0 | 20.0 | 1.0 | 0.0 | 0 | 0 | 5 |