Merge pull request #62 from aigamedev/digits

alexjc · alexjc · commit b0aef476063e · 2015-05-23T20:55:42.000+02:00
Converting input array shapes internally when they don't match
diff --git a/examples/plot_digits.py b/examples/plot_digits.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+from __future__ import (absolute_import, unicode_literals, print_function)
+
+from sklearn import datasets, cross_validation
+from sknn.mlp import Classifier, Layer, Convolution
+
+
+# Load the data and split it into subsets for training and testing.
+digits = datasets.load_digits()
+X = digits.images
+y = digits.target
+
+X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)
+
+
+# Create a neural network that uses convolution to scan the input images.
+nn = Classifier(
+    layers=[
+        Convolution('Rectifier', channels=12, kernel_shape=(3, 3), border_mode='full'),
+        Convolution('Rectifier', channels=8, kernel_shape=(3, 3), border_mode='valid'),
+        Layer('Rectifier', units=64),
+        Layer('Softmax')],
+    learning_rate=0.002,
+    valid_size=0.2,
+    n_stable=10,
+    verbose=True)
+
+nn.fit(X_train, y_train)
+
+
+# Determine how well it does on training data and unseen test data.
+print('\nTRAIN SCORE', nn.score(X_train, y_train))
+print('TEST SCORE', nn.score(X_test, y_test))
+
+y_pred = nn.predict(X_test)
+
+
+# Show some training images and some test images too.
+import pylab
+
+for index, (image, label) in enumerate(zip(digits.images, digits.target)[:6]):
+    pylab.subplot(2, 6, index + 1)
+    pylab.axis('off')
+    pylab.imshow(image, cmap=pylab.cm.gray_r, interpolation='nearest')
+    pylab.title('Training: %i' % label)
+
+for index, (image, prediction) in enumerate(zip(X_test, y_pred)[:6]):
+    pylab.subplot(2, 6, index + 7)
+    pylab.axis('off')
+    pylab.imshow(image.reshape((8,8)), cmap=pylab.cm.gray_r, interpolation='nearest')
+    pylab.title('Predicts: %i' % prediction)
+
+pylab.show()
diff --git a/examples/plot_mlp.py b/examples/plot_mlp.py
@@ -1,3 +1,6 @@
+# -*- coding: utf-8 -*-
+from __future__ import (absolute_import, unicode_literals, print_function)
+
 """\
 Visualizing Parameters in a Modern Neural Network
 =================================================
diff --git a/sknn/mlp.py b/sknn/mlp.py
@@ -330,6 +330,15 @@ def _array_to_mlp(self, array, nn):
             assert layer.get_biases().shape == biases.shape
             layer.set_biases(biases)
 
+    def _reshape(self, X, y=None):
+        if y is not None and y.ndim == 1:
+            y = y.reshape((y.shape[0], 1))
+        if self.is_convolution and X.ndim == 3:
+            X = X.reshape((X.shape[0], X.shape[1], X.shape[2], 1))
+        if not self.is_convolution and X.ndim > 2:
+            X = X.reshape((X.shape[0], numpy.product(X.shape[1:])))
+        return X, y
+
     def _fit(self, *data, **extra):
         try:
             return self._train(*data, **extra)
@@ -346,22 +355,19 @@ def _fit(self, *data, **extra):
     def _train(self, X, y, test=None):
         assert X.shape[0] == y.shape[0],\
             "Expecting same number of input and output samples."
-        num_samples, data_size = X.shape[0], X.size+y.size
-
-        if y.ndim == 1:
-            y = y.reshape((y.shape[0], 1))
+        data_shape, data_size = X.shape, X.size+y.size
+        X, y = self._reshape(X, y)
 
         if not self.is_initialized:
             self._initialize(X, y)
             X, y = self.train_set
         else:
             self.train_set = X, y
+        assert self.ds is not None, "Training after serialization is not (yet) supported."
 
-        if self.is_convolution:
-            X = self.ds.view_converter.topo_view_to_design_mat(X)
-        self.ds.X, self.ds.y = X, y
-
-        log.info("Training on dataset of {:,} samples with {:,} total size.".format(num_samples, data_size))
+        log.info("Training on dataset of {:,} samples with {:,} total size.".format(data_shape[0], data_size))
+        if data_shape[1:] != X.shape[1:]:
+            log.warning("  - Reshaping input array from {} to {}.".format(data_shape, X.shape))
         if self.valid_set:
             X_v, _ = self.valid_set
             log.debug("  - Train: {: <9,}  Valid: {: <4,}".format(X.shape[0], X_v.shape[0]))
@@ -370,6 +376,10 @@ def _train(self, X, y, test=None):
         if self.n_stable:
             log.debug("  - Early termination after {} stable iterations.".format(self.n_stable))
 
+        if self.is_convolution:
+            X = self.ds.view_converter.topo_view_to_design_mat(X)
+        self.ds.X, self.ds.y = X, y
+
         if self.verbose:
             log.debug("\nEpoch    Validation Error    Time"
                       "\n---------------------------------")
@@ -382,15 +392,14 @@ def _predict(self, X):
             assert self.layers[-1].units is not None,\
                 "You must specify the number of units to predict without fitting."
             log.warning("Computing estimates with an untrained network.")
-
             self._create_specs(X)
             self._create_mlp()
 
+        X, _ = self._reshape(X)
         if X.dtype != numpy.float32:
             X = X.astype(numpy.float32)
         if not isinstance(X, numpy.ndarray):
             X = X.toarray()
-
         return self.f(X)
 
     def get_params(self, deep=True):
diff --git a/sknn/nn.py b/sknn/nn.py
@@ -28,6 +28,7 @@
 class ansi:
     BOLD = '\033[1;97m'
     WHITE = '\033[0;97m'
+    YELLOW = '\033[0;33m'
     RED = '\033[0;31m'
     GREEN = '\033[0;32m'
     BLUE = '\033[0;94m'
@@ -457,6 +458,7 @@ def _create_logger(self):
         hnd.setFormatter(fmt)
         hnd.setLevel(lvl)
         log.addHandler(hnd)
+        log.setLevel(lvl)
 
     def _create_matrix_input(self, X, y=None):
         if self.is_convolution:
@@ -479,7 +481,7 @@ def _create_trainer(self, dataset, cost):
         if dataset is not None:
             termination_criterion = tc.MonitorBased(
                 channel_name='objective',
-                N=self.n_stable,
+                N=self.n_stable-1,
                 prop_decrease=self.f_stable)
         else:
             termination_criterion = None
diff --git a/sknn/tests/test_conv.py b/sknn/tests/test_conv.py
@@ -9,12 +9,22 @@
 
 class TestConvolution(unittest.TestCase):
 
-    def _run(self, nn):
-        a_in, a_out = numpy.zeros((8,32,16,1)), numpy.zeros((8,4))
+    def _run(self, nn, a_in=None):
+        if a_in is None:
+            a_in = numpy.zeros((8,32,16,1))
+        a_out = numpy.zeros((8,4))
         nn.fit(a_in, a_out)
         a_test = nn.predict(a_in)
         assert_equal(type(a_out), type(a_in))
 
+    def test_MissingLastDim(self):
+        self._run(MLPR(
+            layers=[
+                C("Tanh", channels=4, kernel_shape=(3,3)),
+                L("Linear")],
+            n_iter=1),
+            a_in=numpy.zeros((8,32,16)))
+
     def test_SquareKernel(self):
         self._run(MLPR(
             layers=[
@@ -39,7 +49,7 @@ def test_VerticalKernel(self):
     def test_VerticalVerbose(self):
         self._run(MLPR(
             layers=[
-                C("Rectifier", channels=4, kernel_shape=(16,1)),
+                C("Sigmoid", channels=4, kernel_shape=(16,1)),
                 L("Linear")],
             n_iter=1, verbose=1, valid_size=0.1))
 
@@ -53,7 +63,7 @@ def test_HorizontalKernel(self):
     def test_ValidationSet(self):
         self._run(MLPR(
             layers=[
-                C("Rectifier", channels=4, kernel_shape=(3,3)),
+                C("Tanh", channels=4, kernel_shape=(3,3)),
                 L("Linear")],
             n_iter=1,
             valid_size=0.5))
@@ -62,8 +72,8 @@ def test_MultipleLayers(self):
         self._run(MLPR(
             layers=[
                 C("Rectifier", channels=6, kernel_shape=(3,3)),
-                C("Rectifier", channels=4, kernel_shape=(5,5)),
-                C("Rectifier", channels=8, kernel_shape=(3,3)),
+                C("Sigmoid", channels=4, kernel_shape=(5,5)),
+                C("Tanh", channels=8, kernel_shape=(3,3)),
                 L("Linear")],
             n_iter=1))
 
@@ -187,8 +197,11 @@ def test_UnknownConv(self):
 
 class TestConvolutionRGB(TestConvolution):
 
-    def _run(self, nn):
-        a_in, a_out = numpy.zeros((8,32,16,3)), numpy.zeros((8,4))
+    def _run(self, nn, a_in=None):
+        if a_in is None:
+            a_in = numpy.zeros((8,32,16,1))
+        a_out = numpy.zeros((8,4))
+
         nn.fit(a_in, a_out)
         a_test = nn.predict(a_in)
         assert_equal(type(a_out), type(a_in))
diff --git a/sknn/tests/test_linear.py b/sknn/tests/test_linear.py
@@ -32,6 +32,16 @@ def test_FitAutoInitialize(self):
         self.nn.fit(a_in, a_out)
         assert_true(self.nn.is_initialized)
 
+    def test_ResizeInputFrom4D(self):
+        a_in, a_out = numpy.zeros((8,4,4,1)), numpy.zeros((8,4))
+        self.nn.fit(a_in, a_out)
+        assert_true(self.nn.is_initialized)
+
+    def test_ResizeInputFrom3D(self):
+        a_in, a_out = numpy.zeros((8,4,4)), numpy.zeros((8,4))
+        self.nn.fit(a_in, a_out)
+        assert_true(self.nn.is_initialized)
+
     def test_FitWrongSize(self):
         a_in, a_out = numpy.zeros((7,16)), numpy.zeros((9,4))
         assert_raises(AssertionError, self.nn.fit, a_in, a_out)
@@ -87,10 +97,11 @@ def test_TypeOfWeightsArray(self):
             assert_equal(type(w), numpy.ndarray)
             assert_equal(type(b), numpy.ndarray)
 
-    def test_FitAutoInitialize(self):
-        # Override base class test, you currently can't re-train a network that
-        # was serialized and deserialized.
-        pass
+    # Override base class test, you currently can't re-train a network that
+    # was serialized and deserialized.
+    def test_FitAutoInitialize(self): pass
+    def test_ResizeInputFrom4D(self): pass
+    def test_ResizeInputFrom3D(self): pass
 
     def test_PredictNoOutputUnitsAssertion(self):
         # Override base class test, this is not initialized but it