import pandas as pd  # Manipulating dataframes, boolean logic
import numpy as np  # numerical play stuff
import matplotlib.pyplot as plt  # plotting functions
import seaborn as sns  # prettier plotting

#code to make stuff appear
%matplotlib inline 
%config InlineBackend.figure_format='retina' #Sharp graphs, higher resolution


dataX = pd.read_csv('data/MAT110Survey/110Statdata.csv')


dataX.head(10)


dataX.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 825 entries, 0 to 824
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Q       825 non-null    int64  
 1   M       825 non-null    float64
 2   M .1    825 non-null    float64
 3   M .2    825 non-null    object 
 4   M .3    825 non-null    float64
 5   M .4    825 non-null    float64
 6   F       825 non-null    float64
 7   F.1     825 non-null    float64
 8   F.2     825 non-null    object 
 9   F.3     825 non-null    object 
 10  F.4     824 non-null    object 
dtypes: float64(6), int64(1), object(4)
memory usage: 71.0+ KB


len(dataX)

825


dataX.transpose()


dataX[0:5].transpose().reset_index()


dataX[5:10].transpose().reset_index()


np.concatenate((dataX[0:5].transpose().reset_index()[1:11].values,dataX[5:10].transpose().reset_index()[1:11].values))

array([['M ', 71.0, 162.0, 10.0, 35.0, 7.0],
       ['M .1', 67.0, 140.0, 11.0, 7.0, 28.0],
       ['M .2', '75', '177', '12', '3', '3'],
       ['M .3', 72.0, 210.0, 11.0, 11.0, 14.0],
       ['M .4', 72.0, 180.0, 11.0, 20.0, 15.0],
       ['F', 69.0, 136.0, 9.0, 63.0, 21.0],
       ['F.1', 70.0, 162.0, 10.0, 1.0, 0.0],
       ['F.2', '67', '152', '8', '5', '3'],
       ['F.3', '61', '135', '8', '9', '7'],
       ['F.4', '67', '150', '7', '8', '3'],
       ['M ', 70.0, 140.0, 10.0, 14.0, 5.0],
       ['M .1', 70.0, 175.0, 8.0, 2.0, 0.0],
       ['M .2', '72', '190', '11', '2', '20'],
       ['M .3', 71.0, 240.0, 11.0, 6.0, 12.0],
       ['M .4', 71.0, 280.0, 11.0, 10.0, 10.0],
       ['F', 68.0, 124.0, 7.0, 4.0, 10.0],
       ['F.1', 65.0, 140.0, 9.0, 5.0, 2.0],
       ['F.2', '62', '110', '7.5', '1', '1'],
       ['F.3', '64', '147', '8', '10', '28'],
       ['F.4', '64', '125', '6.5', '20', '3']], dtype=object)


len(dataX)//5

165


New_set=[['Sex','Height','Weight','Shoe','tv','Soda']]
for c in range(len(dataX)//5):
    New_set=np.concatenate((New_set,(dataX[0+5*c:5+5*c].transpose().reset_index()[1:11].values)))


dataY=pd.DataFrame(New_set)


dataY.columns=dataY.iloc[0]


dataY.head()


dataY.iloc[3]["tv"]

'3'


dataY.tv[11]

14.0


dataY=dataY[1:]


dataY.Sex=dataY.Sex.str[0]


dataY.tail()


dataY=dataY.reset_index(drop=True)


dataY.head()


dataY.tail()


dataY.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1650 entries, 0 to 1649
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Sex     1650 non-null   object
 1   Height  1650 non-null   object
 2   Weight  1650 non-null   object
 3   Shoe    1650 non-null   object
 4   tv      1649 non-null   object
 5   Soda    1650 non-null   object
dtypes: object(6)
memory usage: 77.5+ KB


dataY.Height.str.isnumeric()

0        NaN
1        NaN
2       True
3        NaN
4        NaN
        ... 
1645     NaN
1646     NaN
1647    True
1648    True
1649    True
Name: Height, Length: 1650, dtype: object


dataY.Height = dataY.Height.astype(float)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-44-1fe343f08823> in <module>
----> 1 dataY.Height = dataY.Height.astype(float)

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors)
   5544         else:
   5545             # else, only a single dtype is given
-> 5546             new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
   5547             return self._constructor(new_data).__finalize__(self, method="astype")
   5548 

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors)
    593         self, dtype, copy: bool = False, errors: str = "raise"
    594     ) -> "BlockManager":
--> 595         return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
    596 
    597     def convert(

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, **kwargs)
    404                 applied = b.apply(f, **kwargs)
    405             else:
--> 406                 applied = getattr(b, f)(**kwargs)
    407             result_blocks = _extend_blocks(applied, result_blocks)
    408 

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors)
    593             vals1d = values.ravel()
    594             try:
--> 595                 values = astype_nansafe(vals1d, dtype, copy=True)
    596             except (ValueError, TypeError):
    597                 # e.g. astype_nansafe can fail on object-dtype of strings

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
    993     if copy or is_object_dtype(arr) or is_object_dtype(dtype):
    994         # Explicit copy, or required since NumPy can't view from / to object.
--> 995         return arr.astype(dtype, copy=True)
    996 
    997     return arr.view(dtype)

ValueError: could not convert string to float: '70 3/4'


dataY[dataY.Height=="70 3/4"]


dataY[1427:1447]


dataY.Height[1437]=70.75


dataY.Shoe[1438]=5.5


dataY.Height = dataY.Height.astype(float)
dataY.Weight = dataY.Weight.astype(float)
dataY.Shoe = dataY.Shoe.astype(float)
dataY.tv = dataY.tv.astype(float)
dataY.Soda = dataY.Soda.astype(float)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-51-9da1037f56ae> in <module>
      3 dataY.Shoe = dataY.Shoe.astype(float)
      4 dataY.tv = dataY.tv.astype(float)
----> 5 dataY.Soda = dataY.Soda.astype(float)

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in astype(self, dtype, copy, errors)
   5544         else:
   5545             # else, only a single dtype is given
-> 5546             new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
   5547             return self._constructor(new_data).__finalize__(self, method="astype")
   5548 

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/managers.py in astype(self, dtype, copy, errors)
    593         self, dtype, copy: bool = False, errors: str = "raise"
    594     ) -> "BlockManager":
--> 595         return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
    596 
    597     def convert(

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, **kwargs)
    404                 applied = b.apply(f, **kwargs)
    405             else:
--> 406                 applied = getattr(b, f)(**kwargs)
    407             result_blocks = _extend_blocks(applied, result_blocks)
    408 

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/internals/blocks.py in astype(self, dtype, copy, errors)
    593             vals1d = values.ravel()
    594             try:
--> 595                 values = astype_nansafe(vals1d, dtype, copy=True)
    596             except (ValueError, TypeError):
    597                 # e.g. astype_nansafe can fail on object-dtype of strings

/opt/anaconda3/lib/python3.8/site-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy, skipna)
    993     if copy or is_object_dtype(arr) or is_object_dtype(dtype):
    994         # Explicit copy, or required since NumPy can't view from / to object.
--> 995         return arr.astype(dtype, copy=True)
    996 
    997     return arr.view(dtype)

ValueError: could not convert string to float: ''


dataY[dataY.Soda==" "]


dataY.Soda[1279]=0

<ipython-input-53-406c15af3e6b>:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataY.Soda[1279]=0


dataY =pd.DataFrame(dataY)


dataY.Height = dataY.Height.astype(float)
dataY.Weight = dataY.Weight.astype(float)
dataY.Shoe = dataY.Shoe.astype(float)
dataY.tv = dataY.tv.astype(float)
dataY.Soda = dataY.Soda.astype(float)


dataY.tail()


len(dataY)*5

8250


plt.hist(dataY.Soda)

(array([927., 386., 167.,  92.,  40.,  21.,   6.,   1.,   7.,   3.]),
 array([ 0.,  6., 12., 18., 24., 30., 36., 42., 48., 54., 60.]),
 <BarContainer object of 10 artists>)


dataY['BMI'] = dataY.Weight/(dataY.Height)**2*703


plt.hist(dataY.BMI)

(array([124., 966., 431., 103.,  21.,   4.,   0.,   0.,   0.,   1.]),
 array([11.9005848 , 19.03005294, 26.15952109, 33.28898924, 40.41845739,
        47.54792553, 54.67739368, 61.80686183, 68.93632998, 76.06579812,
        83.19526627]),
 <BarContainer object of 10 artists>)


dataY.to_csv('data/MAT110Survey/110Survey_fixed.csv')


dataY.to_json('data/MAT110Survey/110Survey_fixed.json')


import pandas as pd  # Manipulating dataframes, boolean logic
import numpy as np  # numerical play stuff
import matplotlib.pyplot as plt  # plotting functions
import seaborn as sns  # prettier plotting
import statsmodels.api as sm


#code to make stuff appear
%matplotlib inline 
%config InlineBackend.figure_format='retina' #Sharp graphs, higher resolution


dataX = pd.read_csv('data/MAT110Survey/110Survey_fixed.csv', index_col=0)


dataX.head()


dataX.columns = ['Sex','Height','Weight','Shoe','TV','Soda', 'BMI']


dataX.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1650 entries, 0 to 1649
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Sex     1650 non-null   object 
 1   Height  1650 non-null   float64
 2   Weight  1650 non-null   float64
 3   Shoe    1650 non-null   float64
 4   TV      1649 non-null   float64
 5   Soda    1650 non-null   float64
 6   BMI     1650 non-null   float64
dtypes: float64(6), object(1)
memory usage: 103.1+ KB


dataX.describe()


bob = plt.hist(dataX.Height,bins=10)


sns.boxplot(x='Height', data = dataX)
plt.show()
sns.boxplot(x='Height', data = dataX[(dataX.BMI>=25)&~(dataX.Sex=="F")])

<AxesSubplot:xlabel='Height'>


fred = sns.histplot(dataX.Height)


dataX[(dataX.BMI>=25)&~(dataX.Sex=="F")].head(20)


sns.displot(data=dataX.Height, kind='kde')

<seaborn.axisgrid.FacetGrid at 0x7ff258ad5df0>


sns.displot(data=dataX[dataX.Sex=='M'].Height, kind='kde')
plt.show()
sns.displot(data=dataX[dataX.Sex=='F'].Height, kind='kde')

<seaborn.axisgrid.FacetGrid at 0x7ff271b9c2e0>


plt.figure(figsize=(13,6))
sns.distplot(dataX[dataX.Sex=="M"].Height)
sns.distplot(dataX[dataX.Sex=="F"].Height)
plt.show()


import warnings
warnings.filterwarnings('ignore')


plt.figure(figsize=(13,6))

plt.scatter(dataX.Height,dataX.Weight)
plt.xlabel('Height')
plt.ylabel('Weight')

plt.show()


sns.displot(dataX.BMI)

<seaborn.axisgrid.FacetGrid at 0x7ff278d33670>


sns.distplot(dataX[dataX.Sex=='M'].BMI )
sns.distplot(dataX[dataX.Sex=='F'].BMI)

<AxesSubplot:xlabel='BMI', ylabel='Density'>


plt.figure(figsize=(13,6))
sns.scatterplot(data=dataX, x='Height', y = 'Weight', hue='Sex', alpha =1, x_jitter=5)

<AxesSubplot:xlabel='Height', ylabel='Weight'>


plt.figure(figsize=(13,10))
sns.jointplot(data=dataX, x='Height', y = 'Weight', hue='Sex')

<seaborn.axisgrid.JointGrid at 0x7ff2798ecc10>

<Figure size 936x720 with 0 Axes>


sns.pairplot(dataX[dataX.Sex=="M"][['Height','Weight','Shoe','BMI']])
sns.pairplot(dataX[dataX.Sex=="F"][['Height','Weight','Shoe','BMI']])

<seaborn.axisgrid.PairGrid at 0x7ff2696e3cd0>


plt.figure(figsize=(13,8))
plt.scatter(dataX[dataX.Sex=="M"].Height,dataX[dataX.Sex=="M"].Weight, c=dataX[dataX.Sex=='M'].BMI, cmap='plasma', alpha =0.5)
cbar = plt.colorbar()


BMI_bins = [0,18.5,25,30,np.inf]

BMI_names = ['Underweight',"Normal",'Overweight','Obese']

dataX['BMI_levels']= pd.cut(dataY.BMI,BMI_bins,labels=BMI_names)


dataX.head(20)


dataX.BMI.max()

83.19526627218934


plt.figure(figsize=(13,6))
sns.scatterplot(x='Height', y='Weight', data=dataX, hue = "BMI_levels")

<AxesSubplot:xlabel='Height', ylabel='Weight'>


levels = BMI_names


import matplotlib.colors as mcolors


import matplotlib.colors as mcolors


def plot_colortable(colors, title, sort_colors=True, emptycols=0):

    cell_width = 212
    cell_height = 22
    swatch_width = 48
    margin = 12
    topmargin = 40

    # Sort colors by hue, saturation, value and name.
    if sort_colors is True:
        by_hsv = sorted((tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(color))),
                         name)
                        for name, color in colors.items())
        names = [name for hsv, name in by_hsv]
    else:
        names = list(colors)

    n = len(names)
    ncols = 4 - emptycols
    nrows = n // ncols + int(n % ncols > 0)

    width = cell_width * 4 + 2 * margin
    height = cell_height * nrows + margin + topmargin
    dpi = 72

    fig, ax = plt.subplots(figsize=(width / dpi, height / dpi), dpi=dpi)
    fig.subplots_adjust(margin/width, margin/height,
                        (width-margin)/width, (height-topmargin)/height)
    ax.set_xlim(0, cell_width * 4)
    ax.set_ylim(cell_height * (nrows-0.5), -cell_height/2.)
    ax.yaxis.set_visible(False)
    ax.xaxis.set_visible(False)
    ax.set_axis_off()
    ax.set_title(title, fontsize=24, loc="left", pad=10)

    for i, name in enumerate(names):
        row = i % nrows
        col = i // nrows
        y = row * cell_height

        swatch_start_x = cell_width * col
        swatch_end_x = cell_width * col + swatch_width
        text_pos_x = cell_width * col + swatch_width + 7

        ax.text(text_pos_x, y, name, fontsize=14,
                horizontalalignment='left',
                verticalalignment='center')

        ax.hlines(y, swatch_start_x, swatch_end_x,
                  color=colors[name], linewidth=18)

    return fig

plot_colortable(mcolors.BASE_COLORS, "Base Colors",
                sort_colors=False, emptycols=1)
plot_colortable(mcolors.TABLEAU_COLORS, "Tableau Palette",
                sort_colors=False, emptycols=2)

#sphinx_gallery_thumbnail_number = 3
plot_colortable(mcolors.CSS4_COLORS, "CSS Colors")

# Optionally plot the XKCD colors (Caution: will produce large figure)
#xkcd_fig = plot_colortable(mcolors.XKCD_COLORS, "XKCD Colors")
#xkcd_fig.savefig("XKCD_Colors.png")

plt.show()


by_hsv = sorted((tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(color))),name) for name, color in mcolors.CSS4_COLORS.items())
names = [name for hsv, name in by_hsv]


names

['black',
 'dimgray',
 'dimgrey',
 'gray',
 'grey',
 'darkgray',
 'darkgrey',
 'silver',
 'lightgray',
 'lightgrey',
 'gainsboro',
 'whitesmoke',
 'white',
 'snow',
 'rosybrown',
 'lightcoral',
 'indianred',
 'brown',
 'firebrick',
 'maroon',
 'darkred',
 'red',
 'mistyrose',
 'salmon',
 'tomato',
 'darksalmon',
 'coral',
 'orangered',
 'lightsalmon',
 'sienna',
 'seashell',
 'chocolate',
 'saddlebrown',
 'sandybrown',
 'peachpuff',
 'peru',
 'linen',
 'bisque',
 'darkorange',
 'burlywood',
 'antiquewhite',
 'tan',
 'navajowhite',
 'blanchedalmond',
 'papayawhip',
 'moccasin',
 'orange',
 'wheat',
 'oldlace',
 'floralwhite',
 'darkgoldenrod',
 'goldenrod',
 'cornsilk',
 'gold',
 'lemonchiffon',
 'khaki',
 'palegoldenrod',
 'darkkhaki',
 'ivory',
 'beige',
 'lightyellow',
 'lightgoldenrodyellow',
 'olive',
 'yellow',
 'olivedrab',
 'yellowgreen',
 'darkolivegreen',
 'greenyellow',
 'chartreuse',
 'lawngreen',
 'honeydew',
 'darkseagreen',
 'palegreen',
 'lightgreen',
 'forestgreen',
 'limegreen',
 'darkgreen',
 'green',
 'lime',
 'seagreen',
 'mediumseagreen',
 'springgreen',
 'mintcream',
 'mediumspringgreen',
 'mediumaquamarine',
 'aquamarine',
 'turquoise',
 'lightseagreen',
 'mediumturquoise',
 'azure',
 'lightcyan',
 'paleturquoise',
 'darkslategray',
 'darkslategrey',
 'teal',
 'darkcyan',
 'aqua',
 'cyan',
 'darkturquoise',
 'cadetblue',
 'powderblue',
 'lightblue',
 'deepskyblue',
 'skyblue',
 'lightskyblue',
 'steelblue',
 'aliceblue',
 'dodgerblue',
 'lightslategray',
 'lightslategrey',
 'slategray',
 'slategrey',
 'lightsteelblue',
 'cornflowerblue',
 'royalblue',
 'ghostwhite',
 'lavender',
 'midnightblue',
 'navy',
 'darkblue',
 'mediumblue',
 'blue',
 'slateblue',
 'darkslateblue',
 'mediumslateblue',
 'mediumpurple',
 'rebeccapurple',
 'blueviolet',
 'indigo',
 'darkorchid',
 'darkviolet',
 'mediumorchid',
 'thistle',
 'plum',
 'violet',
 'purple',
 'darkmagenta',
 'fuchsia',
 'magenta',
 'orchid',
 'mediumvioletred',
 'deeppink',
 'hotpink',
 'lavenderblush',
 'palevioletred',
 'crimson',
 'pink',
 'lightpink']


names[::6]

['black',
 'darkgrey',
 'white',
 'firebrick',
 'tomato',
 'seashell',
 'linen',
 'navajowhite',
 'oldlace',
 'lemonchiffon',
 'lightyellow',
 'darkolivegreen',
 'palegreen',
 'lime',
 'mediumaquamarine',
 'lightcyan',
 'aqua',
 'deepskyblue',
 'lightslategray',
 'royalblue',
 'mediumblue',
 'rebeccapurple',
 'thistle',
 'magenta',
 'palevioletred']


Color_map={}


new_names = names[10::8]
for x in levels:
    print(x, new_names[levels.index(x)])
    Color_map.update({x:new_names[levels.index(x)]})

Underweight gainsboro
Normal firebrick
Overweight coral
Obese peachpuff


Color_map["Underweight"]

'gainsboro'


plt.figure(figsize=(13,5))
plt.scatter(dataX[dataX.Sex=="M"].Height,dataX[dataX.Sex=="M"].Weight, c=dataX[dataX.Sex=='M'].BMI_levels.apply(lambda x: Color_map[x]), alpha =0.5)
plt.show()
plt.figure(figsize=(13,5))
plt.scatter(dataX[dataX.Sex=="F"].Height,dataX[dataX.Sex=="F"].Weight, c=dataX[dataX.Sex=='F'].BMI_levels.apply(lambda x: Color_map[x]), alpha =0.5)
plt.show()


plt.figure(figsize=(13,5))
sns.scatterplot(y='Shoe',x='BMI',data=dataX[dataX.BMI<=50], hue = 'Sex')

<AxesSubplot:xlabel='BMI', ylabel='Shoe'>


model = sm.OLS(dataX.BMI,dataX.Shoe).fit()


model.summary()


import pandas as pd  # Manipulating dataframes, boolean logic
import numpy as np  # numerical play stuff
import matplotlib.pyplot as plt  # plotting functions
import seaborn as sns  # prettier plotting
import statsmodels.api as sm


#code to make stuff appear
%matplotlib inline 
%config InlineBackend.figure_format='retina' #Sharp graphs, higher resolution


dataX = pd.read_csv('data/MAT110Survey/110Survey_fixed.csv', index_col=0)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-24efddeb8487> in <module>
----> 1 sns.heatmap(data=dataX[dataX.Sex=="M"])

/opt/anaconda3/lib/python3.8/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48 

/opt/anaconda3/lib/python3.8/site-packages/seaborn/matrix.py in heatmap(data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, linewidths, linecolor, cbar, cbar_kws, cbar_ax, square, xticklabels, yticklabels, mask, ax, **kwargs)
    543     """
    544     # Initialize the plotter object
--> 545     plotter = _HeatMapper(data, vmin, vmax, cmap, center, robust, annot, fmt,
    546                           annot_kws, cbar, cbar_kws, xticklabels,
    547                           yticklabels, mask)

/opt/anaconda3/lib/python3.8/site-packages/seaborn/matrix.py in __init__(self, data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, cbar, cbar_kws, xticklabels, yticklabels, mask)
    163 
    164         # Determine good default values for the colormapping
--> 165         self._determine_cmap_params(plot_data, vmin, vmax,
    166                                     cmap, center, robust)
    167 

/opt/anaconda3/lib/python3.8/site-packages/seaborn/matrix.py in _determine_cmap_params(self, plot_data, vmin, vmax, cmap, center, robust)
    197 
    198         # plot_data is a np.ma.array instance
--> 199         calc_data = plot_data.astype(float).filled(np.nan)
    200         if vmin is None:
    201             if robust:

ValueError: could not convert string to float: 'M'


np.random.random()

0.47147260589615314


np.random.random(3000000)

array([0.63108745, 0.03054646, 0.94150723, ..., 0.42674499, 0.74178852,
       0.61746057])

	Height	Weight	Shoe	TV	Soda	BMI
count	1650.000000	1650.000000	1650.000000	1649.000000	1650.000000	1650.000000
mean	67.603891	164.700606	9.466303	11.660400	7.055455	25.144393
std	5.059790	44.749529	2.432196	9.693915	8.041798	5.480159
min	40.000000	41.000000	3.000000	0.000000	0.000000	11.900585
25%	64.000000	130.000000	8.000000	5.000000	1.000000	21.410418
50%	68.000000	156.000000	9.500000	10.000000	5.000000	24.126627
75%	71.000000	189.750000	11.000000	15.000000	10.000000	27.703134
max	96.000000	500.000000	17.000000	96.000000	60.000000	83.195266

Dep. Variable:	BMI	R-squared (uncentered):	0.929
Model:	OLS	Adj. R-squared (uncentered):	0.929
Method:	Least Squares	F-statistic:	2.158e+04
Date:	Wed, 27 Jan 2021	Prob (F-statistic):	0.00
Time:	13:47:46	Log-Likelihood:	-5517.9
No. Observations:	1650	AIC:	1.104e+04
Df Residuals:	1649	BIC:	1.104e+04
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Shoe	2.5379	0.017	146.893	0.000	2.504	2.572

Omnibus:	347.780	Durbin-Watson:	1.399
Prob(Omnibus):	0.000	Jarque-Bera (JB):	2262.980
Skew:	0.820	Prob(JB):	0.00
Kurtosis:	8.498	Cond. No.	1.00

MAT411 Week 2¶

Lets look at real data¶

MAT411 Week 2b¶

Problem 1¶

	Q	M	M .1	M .2	M .3	M .4	F	F.1	F.2	F.3	F.4
0	1	71.0	67.0	75	72.0	72.0	69.0	70.0	67	61	67
1	2	162.0	140.0	177	210.0	180.0	136.0	162.0	152	135	150
2	3	10.0	11.0	12	11.0	11.0	9.0	10.0	8	8	7
3	4	35.0	7.0	3	11.0	20.0	63.0	1.0	5	9	8
4	5	7.0	28.0	3	14.0	15.0	21.0	0.0	3	7	3
5	1	70.0	70.0	72	71.0	71.0	68.0	65.0	62	64	64
6	2	140.0	175.0	190	240.0	280.0	124.0	140.0	110	147	125
7	3	10.0	8.0	11	11.0	11.0	7.0	9.0	7.5	8	6.5
8	4	14.0	2.0	2	6.0	10.0	4.0	5.0	1	10	20
9	5	5.0	0.0	20	12.0	10.0	10.0	2.0	1	28	3

	0	1	2	3	4	5	6	7	8	9	...	815	816	817	818	819	820	821	822	823	824
Q	1	2	3	4	5	1	2	3	4	5	...	1	2	3	4	5	1	2	3	4	5
M	71	162	10	35	7	70	140	10	14	5	...	72	172	12	5	0	71	149	12	15	5
M .1	67	140	11	7	28	70	175	8	2	0	...	69	180	12.5	3	3	70	164	11.5	30	11
M .2	75	177	12	3	3	72	190	11	2	20	...	74	176	10	7	5	71	127	10	25	0
M .3	72	210	11	11	14	71	240	11	6	12	...	67	192	11	8	4	72	213	12	0	0
M .4	72	180	11	20	15	71	280	11	10	10	...	68	201	13	6	6	68	180	11	30	20
F	69	136	9	63	21	68	124	7	4	10	...	62	154	8.5	6	0	71	148	6	15	1
F.1	70	162	10	1	0	65	140	9	5	2	...	64	132	9	8	1	65	110	4	5	0
F.2	67	152	8	5	3	62	110	7.5	1	1	...	70	128	9	3	2	64	116	5	5	0
F.3	61	135	8	9	7	64	147	8	10	28	...	63	167	7.5	2	5	70	134	8	15	0
F.4	67	150	7	8	3	64	125	6.5	20	3	...	62	133	7	5	0	73	185	7	30	5

	Sex	Height	Weight	Shoe	tv	Soda
1646	F	71	148	6	15	1
1647	F	65	110	4	5	0
1648	F	64	116	5	5	0
1649	F	70	134	8	15	0
1650	F	73	185	7	30	5

	Sex	Height	Weight	Shoe	tv	Soda
0	M	71	162	10	35	7
1	M	67	140	11	7	28
2	M	75	177	12	3	3
3	M	72	210	11	11	14
4	M	72	180	11	20	15

	Sex	Height	Weight	Shoe	tv	Soda
1645	F	71	148	6	15	1
1646	F	65	110	4	5	0
1647	F	64	116	5	5	0
1648	F	70	134	8	15	0
1649	F	73	185	7	30	5

	Sex	Height	Weight	Shoe	tv	Soda
1427	F	74	180	11	6	2
1428	F	68	140	9	7	1
1429	F	65	203	7.5	14	4
1430	M	68	139	11	0	0
1431	M	72	155	12	1	1
1432	M	70	130	8 1/2	10	5
1433	M	72	220	15	96	0
1434	M	73	220	13	4	10
1435	F	61	125	8	10	0
1436	F	60	120	6	3	3
1437	F	70 3/4	170	10 1/2	8	5
1438	F	62	120	5 1/2	5	0
1439	F	63	132	7	5	6
1440	M	72	250	11	35	2
1441	M	76	300	12	50	14
1442	M	73	200	11	0	0
1443	M	78	196	10	5	1
1444	M	72	208	13	20	6
1445	F	65	200	9	35	25
1446	F	62	165	10	2	4

	Sex	Height	Weight	Shoe	tv	Soda
1645	F	71.0	148.0	6.0	15.0	1.0
1646	F	65.0	110.0	4.0	5.0	0.0
1647	F	64.0	116.0	5.0	5.0	0.0
1648	F	70.0	134.0	8.0	15.0	0.0
1649	F	73.0	185.0	7.0	30.0	5.0

	Sex	Height	Weight	Shoe	tv	Soda
0	M	71	162	10	35	7
1	M	67	140	11	7	28
2	M	75	177	12	3	3
3	M	72	210	11	11	14
4	M	72	180	11	20	15

	Sex	Height	Weight	Shoe	tv	Soda
0	M	71	162	10	35	7
1	M	67	140	11	7	28
2	M	75	177	12	3	3
3	M	72	210	11	11	14
4	M	72	180	11	20	15