|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +from __future__ import (absolute_import, unicode_literals, print_function) |
| 3 | + |
| 4 | +__all__ = ['MultiLayerPerceptronBackend'] |
| 5 | + |
| 6 | +import os |
| 7 | +import sys |
| 8 | +import math |
| 9 | +import time |
| 10 | +import logging |
| 11 | +import itertools |
| 12 | + |
| 13 | +log = logging.getLogger('sknn') |
| 14 | + |
| 15 | + |
| 16 | +import numpy |
| 17 | +import theano |
| 18 | +import sklearn.base |
| 19 | +import sklearn.pipeline |
| 20 | +import sklearn.preprocessing |
| 21 | +import sklearn.cross_validation |
| 22 | + |
| 23 | +import theano.tensor as T |
| 24 | +import lasagne.layers |
| 25 | +import lasagne.nonlinearities as nl |
| 26 | + |
| 27 | +from ..base import BaseBackend |
| 28 | +from ...nn import Layer, Convolution, ansi |
| 29 | + |
| 30 | + |
| 31 | +class MultiLayerPerceptronBackend(BaseBackend): |
| 32 | + """ |
| 33 | + Abstract base class for wrapping the multi-layer perceptron functionality |
| 34 | + from Lasagne. |
| 35 | + """ |
| 36 | + |
| 37 | + def __init__(self, spec): |
| 38 | + super(MultiLayerPerceptronBackend, self).__init__(spec) |
| 39 | + self.mlp = None |
| 40 | + self.f = None |
| 41 | + self.trainer = None |
| 42 | + self.cost = None |
| 43 | + |
| 44 | + def _create_mlp_trainer(self, params): |
| 45 | + # Aggregate all regularization parameters into common dictionaries. |
| 46 | + layer_decay = {} |
| 47 | + if self.regularize in ('L1', 'L2') or any(l.weight_decay for l in self.layers): |
| 48 | + wd = self.weight_decay or 0.0001 |
| 49 | + for l in self.layers: |
| 50 | + layer_decay[l.name] = l.weight_decay or wd |
| 51 | + assert len(layer_decay) == 0 or self.regularize in ('L1', 'L2', None) |
| 52 | + |
| 53 | + if len(layer_decay) > 0: |
| 54 | + if self.regularize is None: |
| 55 | + self.regularize = 'L2' |
| 56 | + penalty = getattr(lasagne.regularization, self.regularize.lower()) |
| 57 | + regularize = lasagne.regularization.apply_penalty |
| 58 | + self.cost = sum(layer_decay[s.name] * regularize(l.get_params(tags={'regularizable': True}), penalty) |
| 59 | + for s, l in zip(self.layers, self.mlp)) |
| 60 | + |
| 61 | + cost_functions = {'mse': 'squared_error', 'mcc': 'categorical_crossentropy'} |
| 62 | + loss_type = self.loss_type or ('mcc' if self.is_classifier else 'mse') |
| 63 | + assert loss_type in cost_functions,\ |
| 64 | + "Loss type `%s` not supported by Lasagne backend." % loss_type |
| 65 | + cost_fn = getattr(lasagne.objectives, cost_functions[loss_type]) |
| 66 | + cost_eval = cost_fn(self.symbol_output, self.tensor_output).mean() |
| 67 | + if self.cost is not None: |
| 68 | + cost_eval = cost_eval * self.cost |
| 69 | + return self._create_trainer(params, cost_eval) |
| 70 | + |
| 71 | + def _create_trainer(self, params, cost): |
| 72 | + if self.learning_rule in ('sgd', 'adagrad', 'adadelta', 'rmsprop', 'adam'): |
| 73 | + lr = getattr(lasagne.updates, self.learning_rule) |
| 74 | + self._learning_rule = lr(cost, params, learning_rate=self.learning_rate) |
| 75 | + elif self.learning_rule in ('momentum', 'nesterov'): |
| 76 | + lasagne.updates.nesterov = lasagne.updates.nesterov_momentum |
| 77 | + lr = getattr(lasagne.updates, self.learning_rule) |
| 78 | + self._learning_rule = lr(cost, params, learning_rate=self.learning_rate, momentum=self.learning_momentum) |
| 79 | + else: |
| 80 | + raise NotImplementedError( |
| 81 | + "Learning rule type `%s` is not supported." % self.learning_rule) |
| 82 | + |
| 83 | + return theano.function([self.tensor_input, self.tensor_output], cost, |
| 84 | + updates=self._learning_rule, |
| 85 | + allow_input_downcast=True) |
| 86 | + |
| 87 | + def _get_activation(self, l): |
| 88 | + nonlinearities = {'Rectifier': nl.rectify, |
| 89 | + 'Sigmoid': nl.sigmoid, |
| 90 | + 'Tanh': nl.tanh, |
| 91 | + 'Softmax': nl.softmax, |
| 92 | + 'Linear': nl.linear} |
| 93 | + |
| 94 | + assert l.type in nonlinearities,\ |
| 95 | + "Layer type `%s` is not supported for `%s`." % (l.type, l.name) |
| 96 | + return nonlinearities[l.type] |
| 97 | + |
| 98 | + def _create_convolution_layer(self, name, layer, network): |
| 99 | + self._check_layer(layer, |
| 100 | + required=['channels', 'kernel_shape'], |
| 101 | + optional=['kernel_stride', 'border_mode', 'pool_shape', 'pool_type']) |
| 102 | + |
| 103 | + network = lasagne.layers.Conv2DLayer( |
| 104 | + network, |
| 105 | + num_filters=layer.channels, |
| 106 | + filter_size=layer.kernel_shape, |
| 107 | + stride=layer.kernel_stride, |
| 108 | + pad=layer.border_mode, |
| 109 | + nonlinearity=self._get_activation(layer)) |
| 110 | + |
| 111 | + if layer.pool_shape != (1, 1): |
| 112 | + network = lasagne.layers.Pool2DLayer( |
| 113 | + network, |
| 114 | + pool_size=layer.pool_shape, |
| 115 | + stride=layer.pool_shape) |
| 116 | + |
| 117 | + return network |
| 118 | + |
| 119 | + def _create_layer(self, name, layer, network): |
| 120 | + dropout = layer.dropout or self.dropout_rate |
| 121 | + if dropout is not None: |
| 122 | + network = lasagne.layers.dropout(network, dropout) |
| 123 | + |
| 124 | + if isinstance(layer, Convolution): |
| 125 | + return self._create_convolution_layer(name, layer, network) |
| 126 | + |
| 127 | + self._check_layer(layer, required=['units']) |
| 128 | + return lasagne.layers.DenseLayer(network, |
| 129 | + num_units=layer.units, |
| 130 | + nonlinearity=self._get_activation(layer)) |
| 131 | + |
| 132 | + def _create_mlp(self, X): |
| 133 | + self.tensor_input = T.tensor4('X') if self.is_convolution else T.matrix('X') |
| 134 | + self.tensor_output = T.matrix('y') |
| 135 | + |
| 136 | + lasagne.random.get_rng().seed(self.random_state) |
| 137 | + |
| 138 | + shape = list(X.shape) |
| 139 | + network = lasagne.layers.InputLayer([None]+shape[1:], self.tensor_input) |
| 140 | + |
| 141 | + # Create the layers one by one, connecting to previous. |
| 142 | + self.mlp = [] |
| 143 | + for i, layer in enumerate(self.layers): |
| 144 | + network = self._create_layer(layer.name, layer, network) |
| 145 | + self.mlp.append(network) |
| 146 | + |
| 147 | + log.info( |
| 148 | + "Initializing neural network with %i layers, %i inputs and %i outputs.", |
| 149 | + len(self.layers), self.unit_counts[0], self.layers[-1].units) |
| 150 | + |
| 151 | + for l, p, count in zip(self.layers, self.mlp, self.unit_counts[1:]): |
| 152 | + space = p.output_shape |
| 153 | + if isinstance(l, Convolution): |
| 154 | + log.debug(" - Convl: {}{: <10}{} Output: {}{: <10}{} Channels: {}{}{}".format( |
| 155 | + ansi.BOLD, l.type, ansi.ENDC, |
| 156 | + ansi.BOLD, repr(space[2:]), ansi.ENDC, |
| 157 | + ansi.BOLD, space[1], ansi.ENDC)) |
| 158 | + |
| 159 | + # NOTE: Numbers don't match up exactly for pooling; one off. The logic is convoluted! |
| 160 | + # assert count == numpy.product(space.shape) * space.num_channels,\ |
| 161 | + # "Mismatch in the calculated number of convolution layer outputs." |
| 162 | + else: |
| 163 | + log.debug(" - Dense: {}{: <10}{} Units: {}{: <4}{}".format( |
| 164 | + ansi.BOLD, l.type, ansi.ENDC, ansi.BOLD, l.units, ansi.ENDC)) |
| 165 | + assert count == space[1],\ |
| 166 | + "Mismatch in the calculated number of dense layer outputs." |
| 167 | + |
| 168 | + if self.weights is not None: |
| 169 | + l = min(len(self.weights), len(self.mlp)) |
| 170 | + log.info("Reloading parameters for %i layer weights and biases." % (l,)) |
| 171 | + self._array_to_mlp(self.weights, self.mlp) |
| 172 | + self.weights = None |
| 173 | + |
| 174 | + log.debug("") |
| 175 | + |
| 176 | + self.symbol_output = lasagne.layers.get_output(network, deterministic=True) |
| 177 | + self.f = theano.function([self.tensor_input], self.symbol_output, allow_input_downcast=True) |
| 178 | + |
| 179 | + def _initialize_impl(self, X, y=None): |
| 180 | + if self.is_convolution: |
| 181 | + X = numpy.transpose(X, (0, 3, 1, 2)) |
| 182 | + |
| 183 | + if self.mlp is None: |
| 184 | + self._create_mlp(X) |
| 185 | + |
| 186 | + # Can do partial initialization when predicting, no trainer needed. |
| 187 | + if y is None: |
| 188 | + return |
| 189 | + |
| 190 | + if self.valid_size > 0.0: |
| 191 | + assert self.valid_set is None, "Can't specify valid_size and valid_set together." |
| 192 | + X, X_v, y, y_v = sklearn.cross_validation.train_test_split( |
| 193 | + X, y, |
| 194 | + test_size=self.valid_size, |
| 195 | + random_state=self.random_state) |
| 196 | + self.valid_set = X_v, y_v |
| 197 | + |
| 198 | + params = [] |
| 199 | + for spec, mlp_layer in zip(self.layers, self.mlp): |
| 200 | + if spec.frozen: continue |
| 201 | + params.extend(mlp_layer.get_params()) |
| 202 | + |
| 203 | + self.trainer = self._create_mlp_trainer(params) |
| 204 | + return X, y |
| 205 | + |
| 206 | + def _predict_impl(self, X): |
| 207 | + if not self.is_initialized: |
| 208 | + self._initialize_impl(X) |
| 209 | + |
| 210 | + if self.is_convolution: |
| 211 | + X = numpy.transpose(X, (0, 3, 1, 2)) |
| 212 | + return self.f(X) |
| 213 | + |
| 214 | + def _iterate_data(self, X, y, batch_size, shuffle=False): |
| 215 | + def cast(array): |
| 216 | + if type(array) != numpy.ndarray: |
| 217 | + array = array.todense() |
| 218 | + return array.astype(theano.config.floatX) |
| 219 | + |
| 220 | + total_size = X.shape[0] |
| 221 | + indices = numpy.arange(total_size) |
| 222 | + if shuffle: |
| 223 | + numpy.random.shuffle(indices) |
| 224 | + |
| 225 | + for start_idx in range(0, total_size - batch_size + 1, batch_size): |
| 226 | + excerpt = indices[start_idx:start_idx + batch_size] |
| 227 | + Xb, yb = cast(X[excerpt]), cast(y[excerpt]) |
| 228 | + if self.mutator is not None: |
| 229 | + for x, _ in zip(Xb, yb): |
| 230 | + self.mutator(x) |
| 231 | + yield Xb, yb |
| 232 | + |
| 233 | + def _train_impl(self, X, y): |
| 234 | + loss, batches = 0.0, 0 |
| 235 | + for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle=True): |
| 236 | + loss += self.trainer(Xb, yb) |
| 237 | + batches += 1 |
| 238 | + return loss / batches |
| 239 | + |
| 240 | + def _valid_impl(self, X, y): |
| 241 | + loss, batches = 0.0, 0 |
| 242 | + for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle=True): |
| 243 | + ys = self.f(Xb) |
| 244 | + loss += ((ys - yb) ** 2.0).mean() |
| 245 | + batches += 1 |
| 246 | + return loss / batches |
| 247 | + |
| 248 | + @property |
| 249 | + def is_initialized(self): |
| 250 | + """Check if the neural network was setup already. |
| 251 | + """ |
| 252 | + return not (self.f is None) |
| 253 | + |
| 254 | + def _mlp_get_params(self, layer): |
| 255 | + while not hasattr(layer, 'W') and not hasattr(layer, 'b'): |
| 256 | + layer = layer.input_layer |
| 257 | + return (layer.W.get_value(), layer.b.get_value()) |
| 258 | + |
| 259 | + def _mlp_to_array(self): |
| 260 | + return [self._mlp_get_params(l) for l in self.mlp] |
| 261 | + |
| 262 | + def _array_to_mlp(self, array, nn): |
| 263 | + for layer, (weights, biases) in zip(nn, array): |
| 264 | + while not hasattr(layer, 'W') and not hasattr(layer, 'b'): |
| 265 | + layer = layer.input_layer |
| 266 | + |
| 267 | + ws = tuple(layer.W.shape.eval()) |
| 268 | + assert ws == weights.shape, "Layer weights shape mismatch: %r != %r" %\ |
| 269 | + (ws, weights.shape) |
| 270 | + layer.W.set_value(weights) |
| 271 | + |
| 272 | + bs = tuple(layer.b.shape.eval()) |
| 273 | + assert bs == biases.shape, "Layer biases shape mismatch: %r != %r" %\ |
| 274 | + (bs, biases.shape) |
| 275 | + layer.b.set_value(biases) |
0 commit comments