Group work for a Monash Research Methods course

Merge branch 'master' of github.com:Dekker1/ResearchMethods

+70 -2
+16
mini_proj/report/references.bib
··· 33 33 pages={18--22}, 34 34 year={2002} 35 35 } 36 + @article{Kotsiantis2007, 37 + abstract = {Supervised machine learning is the search for algorithms that reason from externally supplied instances to produce general hypotheses, which then make predictions about future instances. In other words, the goal of supervised learning is to build a concise model of the distribution of class labels in terms of predictor features. The resulting classifier is then used to assign class labels to the testing instances where the values of the predictor features are known, but the value of the class label is unknown. This paper describes various supervised machine learning classification techniques. Of course, a single article cannot be a complete review of all supervised machine learning classification algorithms (also known induction classification algorithms), yet we hope that the references cited will cover the major theoretical issues, guiding the researcher in interesting research directions and suggesting possible bias combinations that have yet to be explored.}, 38 + author = {Kotsiantis, Sotiris B.}, 39 + doi = {10.1115/1.1559160}, 40 + file = {:home/kelvin/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kotsiantis - 2007 - Supervised machine learning A review of classification techniques.pdf:pdf}, 41 + isbn = {1586037803}, 42 + issn = {09226389}, 43 + journal = {Informatica}, 44 + keywords = {algorithms analysis classifiers computational conn,classifiers,data mining techniques,intelligent data analysis,learning algorithms}, 45 + mendeley-groups = {CS Proj/ML,CS Proj,Thesis,Thesis/ML}, 46 + pages = {249--268}, 47 + title = {{Supervised machine learning: A review of classification techniques}}, 48 + url = {http://books.google.com/books?hl=en{\&}lr={\&}id=vLiTXDHr{\_}sYC{\&}oi=fnd{\&}pg=PA3{\&}dq=survey+machine+learning{\&}ots=CVsyuwYHjo{\&}sig=A6wYWvywU8XTc7Dzp8ZdKJaW7rc{\%}5Cnpapers://5e3e5e59-48a2-47c1-b6b1-a778137d3ec1/Paper/p800{\%}5Cnhttp://www.informatica.si/PDF/31-3/11{\_}Kotsiantis - S}, 49 + volume = {31}, 50 + year = {2007} 51 + }
+54 -2
mini_proj/report/waldo.tex
··· 115 115 \paragraph{Fully Convolutional Neural Networks} 116 116 117 117 118 - \section{Methods} \label{sec:methods} 118 + \todo{This paper is mad \cite{Kotsiantis2007}.} 119 + 120 + \section{Methods} 119 121 120 - \section{Results and Discussion} \label{sec:results} 122 + % Kelvin Start 123 + \subsection{Benchmarking}\label{benchmarking} 124 + 125 + In order to benchmark the Neural Networks, the performance of these 126 + algorithms are evaluated against other Machine Learning algorithms. We 127 + use Support Vector Machines, K-Nearest Neighbours (\(K=5\)), Gaussian 128 + Naive Bayes and Random Forest classifiers, as provided in Scikit-Learn. 129 + 130 + \subsection{Performance Metrics}\label{performance-metrics} 131 + 132 + To evaluate the performance of the models, we record the time taken by 133 + each model to train, based on the training data and statistics about the 134 + predictions the models make on the test data. These prediction 135 + statistics include: 136 + 137 + \begin{itemize} 138 + \tightlist 139 + \item 140 + \textbf{Accuracy:} 141 + \[a = \dfrac{|correct\ predictions|}{|predictions|} = \dfrac{tp + tn}{tp + tn + fp + fn}\] 142 + \item 143 + \textbf{Precision:} 144 + \[p = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|predicted\ as\ Waldo|} = \dfrac{tp}{tp + fp}\] 145 + \item 146 + \textbf{Recall:} 147 + \[r = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|actually\ Waldo|} = \dfrac{tp}{tp + fn}\] 148 + \item 149 + \textbf{F1 Measure:} \[f1 = \dfrac{2pr}{p + r}\] where \(tp\) is the 150 + number of true positives, \(tn\) is the number of true negatives, 151 + \(fp\) is the number of false positives, and \(tp\) is the number of 152 + false negatives. 153 + \end{itemize} 154 + 155 + Accuracy is a common performance metric used in Machine Learning, 156 + however in classification problems where the training data is heavily 157 + biased toward one category, sometimes a model will learn to optimize its 158 + accuracy by classifying all instances as one category. I.e. the 159 + classifier will classify all images that do not contain Waldo as not 160 + containing Waldo, but will also classify all images containing Waldo as 161 + not containing Waldo. Thus we use, other metrics to measure performance 162 + as well. 163 + 164 + \emph{Precision} returns the percentage of classifications of Waldo that 165 + are actually Waldo. \emph{Recall} returns the percentage of Waldos that 166 + were actually predicted as Waldo. In the case of a classifier that 167 + classifies all things as Waldo, the recall would be 0. \emph{F1-Measure} 168 + returns a combination of precision and recall that heavily penalises 169 + classifiers that perform poorly in either precision or recall. 170 + % Kelvin End 171 + 172 + \section{Results} \label{sec:results} 121 173 122 174 \section{Conclusion} \label{sec:conclusion} 123 175