Group work for a Monash Research Methods course

Add python checks to the report

+264
wk9/pearson.png

This is a binary file and will not be displayed.

wk9/spearman.png

This is a binary file and will not be displayed.

+12
wk9/week9.tex
··· 12 12 \usepackage[utf8]{inputenc} %support umlauts in the input 13 13 % Easier compilation 14 14 \usepackage{bookmark} 15 + \usepackage{graphicx} 15 16 16 17 \begin{document} 17 18 \title{Week 9 - Correlation and Regression} ··· 34 35 population of cleaned data (removed duplicates etc), alongside several random 35 36 samples and samples of ranking ranges within the top 200. To this end, we made 36 37 use of Microsoft Excel tools and functions of the Python library SciPy. 38 + 39 + TODO: Describe Python method as a sanity check 37 40 38 41 \section{Results} \label{sec:results} 39 42 We performed separate statistical analyses on 10 different samples of the ··· 74 77 weight using different test sets. All data is rounded to 5 decimal 75 78 places} 76 79 \end{table} 80 + 81 + \begin{figure}[ht] 82 + \centering 83 + \label{fig:scipy} 84 + \includegraphics[width=0.6\textwidth]{pearson.png} 85 + \includegraphics[width=0.6\textwidth]{spearman.png} 86 + \caption{The Pearsion (top) and Spearman (bottom) correlations coefficients 87 + of the data set as computed by the Pandas Python library} 88 + \end{figure} 77 89 78 90 \section{Discussion} \label{sec:discussion} 79 91 The results generally indicate that there is a fairly strong positive
+252
wk9/wk9.ipynb
··· 1 + { 2 + "cells": [ 3 + { 4 + "cell_type": "code", 5 + "execution_count": 1, 6 + "metadata": {}, 7 + "outputs": [ 8 + { 9 + "name": "stdout", 10 + "output_type": "stream", 11 + "text": [ 12 + "Using matplotlib backend: MacOSX\n", 13 + "Populating the interactive namespace from numpy and matplotlib\n" 14 + ] 15 + } 16 + ], 17 + "source": [ 18 + "%pylab\n", 19 + "%matplotlib inline\n", 20 + "import pandas as pd\n", 21 + "import numpy as np\n", 22 + "import matplotlib.pyplot as plt\n", 23 + "from scipy import stats\n", 24 + "from matplotlib import colors\n", 25 + "\n", 26 + "data = pd.read_csv(\"Tennis players 2017-09.csv\")" 27 + ] 28 + }, 29 + { 30 + "cell_type": "code", 31 + "execution_count": 2, 32 + "metadata": {}, 33 + "outputs": [ 34 + { 35 + "data": { 36 + "text/html": [ 37 + "<style type=\"text/css\" >\n", 38 + " #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0 {\n", 39 + " background-color: #fc7f00;\n", 40 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1 {\n", 41 + " background-color: #ffd20c;\n", 42 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2 {\n", 43 + " background-color: #ffe619;\n", 44 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3 {\n", 45 + " background-color: #f1f44d;\n", 46 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0 {\n", 47 + " background-color: #ffd20c;\n", 48 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1 {\n", 49 + " background-color: #fc7f00;\n", 50 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2 {\n", 51 + " background-color: #e4ff7a;\n", 52 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3 {\n", 53 + " background-color: #e8fc6c;\n", 54 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0 {\n", 55 + " background-color: #ffe619;\n", 56 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1 {\n", 57 + " background-color: #e4ff7a;\n", 58 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2 {\n", 59 + " background-color: #fc7f00;\n", 60 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3 {\n", 61 + " background-color: #fe9800;\n", 62 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0 {\n", 63 + " background-color: #f1f44d;\n", 64 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1 {\n", 65 + " background-color: #e8fc6c;\n", 66 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2 {\n", 67 + " background-color: #fe9800;\n", 68 + " } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3 {\n", 69 + " background-color: #fc7f00;\n", 70 + " }</style> \n", 71 + "<table id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2\" > \n", 72 + "<thead> <tr> \n", 73 + " <th class=\"blank level0\" ></th> \n", 74 + " <th class=\"col_heading level0 col0\" >DOB</th> \n", 75 + " <th class=\"col_heading level0 col1\" >RANK</th> \n", 76 + " <th class=\"col_heading level0 col2\" >HEIGHT</th> \n", 77 + " <th class=\"col_heading level0 col3\" >Weight</th> \n", 78 + " </tr></thead> \n", 79 + "<tbody> <tr> \n", 80 + " <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n", 81 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n", 82 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.277766</td> \n", 83 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.139684</td> \n", 84 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >-0.030479</td> \n", 85 + " </tr> <tr> \n", 86 + " <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n", 87 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.277766</td> \n", 88 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n", 89 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.16755</td> \n", 90 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.121946</td> \n", 91 + " </tr> <tr> \n", 92 + " <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n", 93 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.139684</td> \n", 94 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.16755</td> \n", 95 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n", 96 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.779526</td> \n", 97 + " </tr> <tr> \n", 98 + " <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n", 99 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >-0.030479</td> \n", 100 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.121946</td> \n", 101 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.779526</td> \n", 102 + " <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n", 103 + " </tr></tbody> \n", 104 + "</table> " 105 + ], 106 + "text/plain": [ 107 + "<pandas.io.formats.style.Styler at 0x1a197d7b38>" 108 + ] 109 + }, 110 + "execution_count": 2, 111 + "metadata": {}, 112 + "output_type": "execute_result" 113 + } 114 + ], 115 + "source": [ 116 + "def background_gradient(s, m, M, cmap='Wistia', low=0, high=0):\n", 117 + " rng = M - m\n", 118 + " norm = colors.Normalize(m - (rng * low),\n", 119 + " M + (rng * high))\n", 120 + " normed = norm(s.values)\n", 121 + " c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n", 122 + " return ['background-color: %s' % color for color in c]\n", 123 + "\n", 124 + "data = data[[\"SEX\", \"DOB\", \"RANK\", \"HANDED\", \"Country\", \"HEIGHT\", \"Weight\"]]\n", 125 + "data.drop_duplicates\n", 126 + "\n", 127 + "pearson = data.corr()\n", 128 + "pearson.style.apply(background_gradient,\n", 129 + " cmap='Wistia',\n", 130 + " m=pearson.min().min(),\n", 131 + " M=pearson.max().max()\n", 132 + ")" 133 + ] 134 + }, 135 + { 136 + "cell_type": "code", 137 + "execution_count": 3, 138 + "metadata": {}, 139 + "outputs": [ 140 + { 141 + "data": { 142 + "text/html": [ 143 + "<style type=\"text/css\" >\n", 144 + " #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0 {\n", 145 + " background-color: #fc7f00;\n", 146 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1 {\n", 147 + " background-color: #ffd20c;\n", 148 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2 {\n", 149 + " background-color: #fee91d;\n", 150 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3 {\n", 151 + " background-color: #f4f242;\n", 152 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0 {\n", 153 + " background-color: #ffd20c;\n", 154 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1 {\n", 155 + " background-color: #fc7f00;\n", 156 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2 {\n", 157 + " background-color: #e4ff7a;\n", 158 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3 {\n", 159 + " background-color: #eafa63;\n", 160 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0 {\n", 161 + " background-color: #fee91d;\n", 162 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1 {\n", 163 + " background-color: #e4ff7a;\n", 164 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2 {\n", 165 + " background-color: #fc7f00;\n", 166 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3 {\n", 167 + " background-color: #ff9d00;\n", 168 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0 {\n", 169 + " background-color: #f4f242;\n", 170 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1 {\n", 171 + " background-color: #eafa63;\n", 172 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2 {\n", 173 + " background-color: #ff9d00;\n", 174 + " } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3 {\n", 175 + " background-color: #fc7f00;\n", 176 + " }</style> \n", 177 + "<table id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2\" > \n", 178 + "<thead> <tr> \n", 179 + " <th class=\"blank level0\" ></th> \n", 180 + " <th class=\"col_heading level0 col0\" >DOB</th> \n", 181 + " <th class=\"col_heading level0 col1\" >RANK</th> \n", 182 + " <th class=\"col_heading level0 col2\" >HEIGHT</th> \n", 183 + " <th class=\"col_heading level0 col3\" >Weight</th> \n", 184 + " </tr></thead> \n", 185 + "<tbody> <tr> \n", 186 + " <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n", 187 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n", 188 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.280386</td> \n", 189 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.122412</td> \n", 190 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >0.00769861</td> \n", 191 + " </tr> <tr> \n", 192 + " <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n", 193 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.280386</td> \n", 194 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n", 195 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.160006</td> \n", 196 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.0908714</td> \n", 197 + " </tr> <tr> \n", 198 + " <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n", 199 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.122412</td> \n", 200 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.160006</td> \n", 201 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n", 202 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.739246</td> \n", 203 + " </tr> <tr> \n", 204 + " <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n", 205 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >0.00769861</td> \n", 206 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.0908714</td> \n", 207 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.739246</td> \n", 208 + " <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n", 209 + " </tr></tbody> \n", 210 + "</table> " 211 + ], 212 + "text/plain": [ 213 + "<pandas.io.formats.style.Styler at 0x111a3b198>" 214 + ] 215 + }, 216 + "execution_count": 3, 217 + "metadata": {}, 218 + "output_type": "execute_result" 219 + } 220 + ], 221 + "source": [ 222 + "spearman = data.corr(method=\"spearman\")\n", 223 + "spearman.style.apply(background_gradient,\n", 224 + " cmap='Wistia',\n", 225 + " m=spearman.min().min(),\n", 226 + " M=spearman.max().max()\n", 227 + ")" 228 + ] 229 + } 230 + ], 231 + "metadata": { 232 + "kernelspec": { 233 + "display_name": "Python 3", 234 + "language": "python", 235 + "name": "python3" 236 + }, 237 + "language_info": { 238 + "codemirror_mode": { 239 + "name": "ipython", 240 + "version": 3 241 + }, 242 + "file_extension": ".py", 243 + "mimetype": "text/x-python", 244 + "name": "python", 245 + "nbconvert_exporter": "python", 246 + "pygments_lexer": "ipython3", 247 + "version": "3.6.4" 248 + } 249 + }, 250 + "nbformat": 4, 251 + "nbformat_minor": 2 252 + }