Merge remote-tracking branch 'origin/master' · dekker.one/monash-researchmethods@7d63544

+7

mini_proj/report/references.bib

··· 1 @techreport{knn, 2 title={Discriminatory analysis-nonparametric discrimination: consistency properties}, 3 author={Fix, Evelyn and Hodges Jr, Joseph L},

··· 1 + @misc{openData, 2 + title={Open Database License (ODbL) v1.0}, 3 + url={https://opendatacommons.org/licenses/odbl/1.0/}, 4 + journal={Open Data Commons}, 5 + year={2018}, 6 + month={Feb} 7 + } 8 @techreport{knn, 9 title={Discriminatory analysis-nonparametric discrimination: consistency properties}, 10 author={Fix, Evelyn and Hodges Jr, Joseph L},

+95 -45

mini_proj/report/waldo.tex

··· 6 \usepackage[justification=centering]{caption} % Used for captions 7 \captionsetup[figure]{font=small} % Makes captions small 8 \newcommand\tab[1][0.5cm]{\hspace*{#1}} % Defines a new command to use 'tab' in text 9 - % Math package 10 - \usepackage{amsmath} 11 % Enable that parameters of \cref{}, \ref{}, \cite{}, ... are linked so that a reader can click on the number an jump to the target in the document 12 \usepackage{hyperref} 13 %enable \cref{...} and \Cref{...} instead of \ref: Type of reference included in the link ··· 99 architectures, as this method is currently the most used for image 100 classification. 101 102 \subsection{Classical Machine Learning Methods} 103 104 The following paragraphs will give only brief descriptions of the different ··· 130 131 \subsection{Neural Network Architectures} 132 \todo{Did we only do the three in the end? (Alexnet?)} 133 134 \paragraph{Convolutional Neural Networks} 135 ··· 139 140 141 \section{Method} \label{sec:method} 142 143 - % Kelvin Start 144 - \subsection{Benchmarking}\label{benchmarking} 145 146 - In order to benchmark the Neural Networks, the performance of these 147 - algorithms are evaluated against other Machine Learning algorithms. We 148 - use Support Vector Machines, K-Nearest Neighbours ($K=5$), Gaussian 149 - Naive Bayes and Random Forest classifiers, as provided in Scikit-Learn. 150 151 - \subsection{Performance Metrics}\label{performance-metrics} 152 153 - To evaluate the performance of the models, we record the time taken by 154 - each model to train, based on the training data and statistics about the 155 - predictions the models make on the test data. These prediction 156 - statistics include: 157 158 - \begin{itemize} 159 - \item 160 - \textbf{Accuracy:} 161 - \[a = \dfrac{|correct\ predictions|}{|predictions|} = \dfrac{tp + tn}{tp + tn + fp + fn}\] 162 - \item 163 - \textbf{Precision:} 164 - \[p = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|predicted\ as\ Waldo|} = \dfrac{tp}{tp + fp}\] 165 - \item 166 - \textbf{Recall:} 167 - \[r = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|actually\ Waldo|} = \dfrac{tp}{tp + fn}\] 168 - \item 169 - \textbf{F1 Measure:} \[f1 = \dfrac{2pr}{p + r}\] where $tp$ is the 170 - number of true positives, $tn$ is the number of true negatives, 171 - $fp$ is the number of false positives, and $tp$ is the number of 172 - false negatives. 173 - \end{itemize} 174 175 - Accuracy is a common performance metric used in Machine Learning, 176 - however in classification problems where the training data is heavily 177 - biased toward one category, sometimes a model will learn to optimize its 178 - accuracy by classifying all instances as one category. I.e. the 179 - classifier will classify all images that do not contain Waldo as not 180 - containing Waldo, but will also classify all images containing Waldo as 181 - not containing Waldo. Thus we use, other metrics to measure performance 182 - as well. 183 184 - \emph{Precision} returns the percentage of classifications of Waldo that 185 - are actually Waldo. \emph{Recall} returns the percentage of Waldos that 186 - were actually predicted as Waldo. In the case of a classifier that 187 - classifies all things as Waldo, the recall would be 0. \emph{F1-Measure} 188 - returns a combination of precision and recall that heavily penalises 189 - classifiers that perform poorly in either precision or recall. 190 - % Kelvin End 191 192 \section{Results} \label{sec:results} 193 194 \section{Conclusion} \label{sec:conclusion} 195 196 \bibliographystyle{alpha} 197 \bibliography{references} 198 - 199 \end{document}

··· 6 \usepackage[justification=centering]{caption} % Used for captions 7 \captionsetup[figure]{font=small} % Makes captions small 8 \newcommand\tab[1][0.5cm]{\hspace*{#1}} % Defines a new command to use 'tab' in text 9 + \usepackage[comma, numbers]{natbib} % Used for the bibliography 10 + \usepackage{amsmath} % Math package 11 % Enable that parameters of \cref{}, \ref{}, \cite{}, ... are linked so that a reader can click on the number an jump to the target in the document 12 \usepackage{hyperref} 13 %enable \cref{...} and \Cref{...} instead of \ref: Type of reference included in the link ··· 99 architectures, as this method is currently the most used for image 100 classification. 101 102 + \textbf{ 103 + \\A couple of papers that may be useful (if needed): 104 + - LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf 105 + - AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks 106 + - General comparison of LeNet and AlexNet: 107 + "On the Performance of GoogLeNet and AlexNet Applied to Sketches", Pedro Ballester and Ricardo Matsumura Araujo 108 + - Deep NN Architecture: 109 + https://www-sciencedirect-com.ezproxy.lib.monash.edu.au/science/article/pii/S0925231216315533 110 + } 111 + 112 \subsection{Classical Machine Learning Methods} 113 114 The following paragraphs will give only brief descriptions of the different ··· 140 141 \subsection{Neural Network Architectures} 142 \todo{Did we only do the three in the end? (Alexnet?)} 143 + Yeah, we implemented the LeNet architecture, then improved on it for a fairly standar convolutional neural network (CNN) that was deeper, extracted more features, and condensed that image information more. Then we implemented a more fully convolutional network (FCN) which contained only one dense layer for the final binary classification step. The FCN added an extra convolutional layer, meaning the before classifying each image, the network abstracted the data more than the other two. 144 + \begin{itemize} 145 + \item LeNet 146 + \item CNN 147 + \item FCN 148 + \end{itemize} 149 150 \paragraph{Convolutional Neural Networks} 151 ··· 155 156 157 \section{Method} \label{sec:method} 158 + \tab 159 + In order to effectively utilize the aforementioned modelling and classification techniques, a key consideration is the data they are acting on. 160 + A dataset containing Waldo and non-Waldo images was obtained from an Open Database\footnote{``The Open Database License (ODbL) is a license agreement intended to allow users to freely share, modify, and use [a] Database while maintaining [the] same freedom for others"\cite{openData}}hosted on the predictive modelling and analytics competition framework, Kaggle. 161 + The distinction between images containing Waldo, and those that do not, was providied by the separation of the images in different sub-directories. 162 + It was therefore necessary to preprocess these images before they could be utilised by the proposed machine learning algorithms. 163 + 164 + \subsection{Image Processing} 165 + \tab 166 + The Waldo image database consists of images of size 64$\times$64, 128$\times$128, and 256$\times$256 pixels obtained by dividing complete Where's Waldo? puzzles. 167 + Within each set of images, those containing Waldo are located in a folder called `waldo', and those not containing Waldo, in a folder called `not\_waldo'. 168 + Since Where's Waldo? puzzles are usually densely populated and contain fine details, the 64$\times$64 pixel set of images were selected to train and evaluate the machine learning models. 169 + These images provide the added benefit of containing the most individual images of the three size groups. 170 + \\ 171 + \par 172 + Each of the 64$\times$64 pixel images were inserted into a Numpy 173 + \footnote{Numpy is a popular Python programming library for scientific computing} 174 + array of images, and a binary value was inserted into a seperate list at the same index. 175 + These binary values form the labels for each image (waldo or not waldo). 176 + Colour normalisation was performed on each so that artefacts in an image's colour profile correspond to meaningful features of the image (rather than photographic method). 177 + \\ 178 + \par 179 + Each original puzzle is broken down into many images, and only contains one Waldo. Although Waldo might span multiple 64$\times$64 pixel squares, this means that the non-Waldo data far outnumbers the Waldo data. 180 + To combat the bias introduced by the skewed data, all Waldo images were artificially augmented by performing random rotations, reflections, and introducing random noise in the image to produce news images. 181 + In this way, each original Waldo image was used to produce an additional 10 variations of the image, inserted into the image array. 182 + This provided more variation in the true positives of the data set and assists in the development of more robust methods by exposing each technique to variations of the image during the training phase. 183 + \\ 184 + \par 185 + Despite the additional data, there were still over ten times as many non-Waldo images than Waldo images. 186 + Therefore, it was necessary to cull the no-Waldo data, so that there was an even split of Waldo and non-Waldo images, improving the representation of true positives in the image data set. 187 + \\ 188 189 + % Kelvin Start 190 + \subsection{Benchmarking}\label{benchmarking} 191 192 + In order to benchmark the Neural Networks, the performance of these 193 + algorithms are evaluated against other Machine Learning algorithms. We 194 + use Support Vector Machines, K-Nearest Neighbours ($K=5$), Gaussian 195 + Naive Bayes and Random Forest classifiers, as provided in Scikit-Learn. 196 197 + \subsection{Performance Metrics}\label{performance-metrics} 198 199 + To evaluate the performance of the models, we record the time taken by 200 + each model to train, based on the training data and statistics about the 201 + predictions the models make on the test data. These prediction 202 + statistics include: 203 204 + \begin{itemize} 205 + \item 206 + \textbf{Accuracy:} 207 + \[a = \dfrac{|correct\ predictions|}{|predictions|} = \dfrac{tp + tn}{tp + tn + fp + fn}\] 208 + \item 209 + \textbf{Precision:} 210 + \[p = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|predicted\ as\ Waldo|} = \dfrac{tp}{tp + fp}\] 211 + \item 212 + \textbf{Recall:} 213 + \[r = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|actually\ Waldo|} = \dfrac{tp}{tp + fn}\] 214 + \item 215 + \textbf{F1 Measure:} \[f1 = \dfrac{2pr}{p + r}\] where $tp$ is the 216 + number of true positives, $tn$ is the number of true negatives, 217 + $fp$ is the number of false positives, and $tp$ is the number of 218 + false negatives. 219 + \end{itemize} 220 221 + Accuracy is a common performance metric used in Machine Learning, 222 + however in classification problems where the training data is heavily 223 + biased toward one category, sometimes a model will learn to optimize its 224 + accuracy by classifying all instances as one category. I.e. the 225 + classifier will classify all images that do not contain Waldo as not 226 + containing Waldo, but will also classify all images containing Waldo as 227 + not containing Waldo. Thus we use, other metrics to measure performance 228 + as well. 229 230 + \emph{Precision} returns the percentage of classifications of Waldo that 231 + are actually Waldo. \emph{Recall} returns the percentage of Waldos that 232 + were actually predicted as Waldo. In the case of a classifier that 233 + classifies all things as Waldo, the recall would be 0. \emph{F1-Measure} 234 + returns a combination of precision and recall that heavily penalises 235 + classifiers that perform poorly in either precision or recall. 236 + % Kelvin End 237 238 \section{Results} \label{sec:results} 239 240 \section{Conclusion} \label{sec:conclusion} 241 242 + \clearpage % Ensures that the references are on a seperate page 243 + \pagebreak 244 + % References 245 + \section{References} 246 + \renewcommand{\refname}{} 247 \bibliographystyle{alpha} 248 \bibliography{references} 249 \end{document}

+73 -4

mini_proj/waldo_model.py

··· 25 ''' 26 Model definition define the network structure 27 ''' 28 - def FCN(): 29 ## List of model layers 30 inputs = Input((3, 64, 64)) 31 ··· 33 m_pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) 34 35 conv2 = Conv2D(32, (3, 3), activation='relu', padding='same')(m_pool1) 36 - #drop1 = Dropout(0.2)(conv2) # Drop some portion of features to prevent overfitting 37 m_pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) 38 39 conv3 = Conv2D(32, (3, 3), activation='relu', padding='same')(m_pool2) ··· 47 drop3 = Dropout(0.2)(dense) 48 classif = Dense(2, activation='sigmoid')(drop3) # Final layer to classify 49 50 - ## Define the model structure 51 model = Model(inputs=inputs, outputs=classif) 52 # Optimizer recommended Adadelta values (lr=0.01) 53 model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy', f1]) 54 55 return model 56 57 def f1(y_true, y_pred): 58 def recall(y_true, y_pred): 59 """Recall metric. ··· 110 lbl_test = to_categorical(lbl_test) 111 112 ## Define model 113 model = FCN() 114 # svm_iclf = ImageClassifier(svm.SVC) 115 # tree_iclf = ImageClassifier(tree.DecisionTreeClassifier) 116 # naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNBd) 117 # ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier) 118 119 ## Define training parameters 120 - epochs = 10 # an epoch is one forward pass and back propogation of all training data 121 batch_size = 150 # batch size - number of training example used in one forward/backward pass 122 # (higher batch size uses more memory, smaller batch size takes more time) 123 #lrate = 0.01 # Learning rate of the model - controls magnitude of weight changes in training the NN

··· 25 ''' 26 Model definition define the network structure 27 ''' 28 + def CNN(): 29 ## List of model layers 30 inputs = Input((3, 64, 64)) 31 ··· 33 m_pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) 34 35 conv2 = Conv2D(32, (3, 3), activation='relu', padding='same')(m_pool1) 36 m_pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) 37 38 conv3 = Conv2D(32, (3, 3), activation='relu', padding='same')(m_pool2) ··· 46 drop3 = Dropout(0.2)(dense) 47 classif = Dense(2, activation='sigmoid')(drop3) # Final layer to classify 48 49 + ## Define the model start and end 50 + model = Model(inputs=inputs, outputs=classif) 51 + # Optimizer recommended Adadelta values (lr=0.01) 52 + model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy', f1]) 53 + 54 + return model 55 + 56 + ''' 57 + Model definition for a fully convolutional (no dense layers) network structure 58 + ''' 59 + def FCN(): 60 + ## List of model layers 61 + inputs = Input((3, 64, 64)) 62 + 63 + conv1 = Conv2D(16, (3, 3), activation='relu', padding='same', input_shape=(64, 64, 3))(inputs) 64 + m_pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) 65 + 66 + conv2 = Conv2D(32, (3, 3), activation='relu', padding='same')(m_pool1) 67 + m_pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) 68 + 69 + conv3 = Conv2D(32, (3, 3), activation='relu', padding='same')(m_pool2) 70 + drop2 = Dropout(0.2)(conv3) # Drop some portion of features to prevent overfitting 71 + m_pool2 = MaxPooling2D(pool_size=(2, 2))(drop2) 72 + 73 + conv4 = Conv2D(64, (2, 2), activation='relu', padding='same')(m_pool2) 74 + 75 + flat = Flatten()(conv4) # Makes data 1D 76 + drop3 = Dropout(0.2)(flat) 77 + classif = Dense(2, activation='sigmoid')(drop3) # Final layer to classify 78 + 79 + ## Define the model start and end 80 model = Model(inputs=inputs, outputs=classif) 81 # Optimizer recommended Adadelta values (lr=0.01) 82 model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy', f1]) 83 84 return model 85 86 + 87 + ''' 88 + Model definition for the network structure of LeNet 89 + Note: LeNet was designed to classify into 10 classes, but we are only performing binary classification 90 + ''' 91 + def LeNet(): 92 + ## List of model layers 93 + inputs = Input((3, 64, 64)) 94 + 95 + conv1 = Conv2D(6, (5, 5), activation='relu', padding='valid', input_shape=(64, 64, 3))(inputs) 96 + m_pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) 97 + 98 + conv2 = Conv2D(16, (5, 5), activation='relu', padding='valid')(m_pool1) 99 + m_pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) 100 + 101 + flat = Flatten()(m_pool2) # Makes data 1D 102 + 103 + dense1 = Dense(120, activation='relu')(flat) # Fully connected layer 104 + dense2 = Dense(84, activation='relu')(dense1) # Fully connected layer 105 + drop3 = Dropout(0.2)(dense2) 106 + classif = Dense(2, activation='sigmoid')(drop3) # Final layer to classify 107 + 108 + ## Define the model start and end 109 + model = Model(inputs=inputs, outputs=classif) 110 + model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy', f1]) 111 + 112 + return model 113 + 114 + ''' 115 + AlexNet architecture 116 + ''' 117 + def AlexNet(): 118 + inputs = Input(shape=(3, 64, 64)) 119 + 120 + 121 + return model 122 + 123 + 124 def f1(y_true, y_pred): 125 def recall(y_true, y_pred): 126 """Recall metric. ··· 177 lbl_test = to_categorical(lbl_test) 178 179 ## Define model 180 + #model = CNN() 181 model = FCN() 182 + #model = LeNet() 183 # svm_iclf = ImageClassifier(svm.SVC) 184 # tree_iclf = ImageClassifier(tree.DecisionTreeClassifier) 185 # naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNBd) 186 # ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier) 187 188 ## Define training parameters 189 + epochs = 25 # an epoch is one forward pass and back propogation of all training data 190 batch_size = 150 # batch size - number of training example used in one forward/backward pass 191 # (higher batch size uses more memory, smaller batch size takes more time) 192 #lrate = 0.01 # Learning rate of the model - controls magnitude of weight changes in training the NN