diff --git a/docs/source/api_reference/regression.rst b/docs/source/api_reference/regression.rst
index 5abb22cd955..bfca5f9538a 100644
--- a/docs/source/api_reference/regression.rst
+++ b/docs/source/api_reference/regression.rst
@@ -30,8 +30,11 @@ Deep learning
     :template: class.rst
 
     CNNRegressor
+    CNTCRRegressor
     FCNRegressor
+    InceptionTimeRegressor
     LSTMRegressor
+    MACNNRegressor
     MCDCNNRegressor
     MLPRegressor
     SimpleRNNRegressor
diff --git a/sktime/regression/deep_learning/__init__.py b/sktime/regression/deep_learning/__init__.py
index 73acd00e813..a2af16729e1 100644
--- a/sktime/regression/deep_learning/__init__.py
+++ b/sktime/regression/deep_learning/__init__.py
@@ -1,8 +1,11 @@
 """Deep learning based regressors."""
 __all__ = [
     "CNNRegressor",
+    "CNTCRegressor",
     "FCNRegressor",
+    "InceptionTimeRegressor",
     "LSTMFCNRegressor",
+    "MACNNRegressor",
     "MCDCNNRegressor",
     "MLPRegressor",
     "ResNetRegressor",
@@ -11,8 +14,11 @@
 ]
 
 from sktime.regression.deep_learning.cnn import CNNRegressor
+from sktime.regression.deep_learning.cntc import CNTCRegressor
 from sktime.regression.deep_learning.fcn import FCNRegressor
+from sktime.regression.deep_learning.inceptiontime import InceptionTimeRegressor
 from sktime.regression.deep_learning.lstmfcn import LSTMFCNRegressor
+from sktime.regression.deep_learning.macnn import MACNNRegressor
 from sktime.regression.deep_learning.mcdcnn import MCDCNNRegressor
 from sktime.regression.deep_learning.mlp import MLPRegressor
 from sktime.regression.deep_learning.resnet import ResNetRegressor
diff --git a/sktime/regression/deep_learning/cntc.py b/sktime/regression/deep_learning/cntc.py
new file mode 100644
index 00000000000..44ca435dfa8
--- /dev/null
+++ b/sktime/regression/deep_learning/cntc.py
@@ -0,0 +1,252 @@
+"""Contextual Time-series Neural Regressor for TSC."""
+
+__author__ = ["James-Large", "TonyBagnall", "AurumnPegasus"]
+__all__ = ["CNTCRegressor"]
+from sklearn.utils import check_random_state
+
+from sktime.networks.cntc import CNTCNetwork
+from sktime.regression.deep_learning.base import BaseDeepRegressor
+from sktime.utils.validation._dependencies import _check_dl_dependencies
+
+
+class CNTCRegressor(BaseDeepRegressor):
+    """Contextual Time-series Neural Regressor (CNTC), as described in [1].
+
+    Parameters
+    ----------
+    n_epochs       : int, default = 2000
+        the number of epochs to train the model
+    batch_size      : int, default = 16
+        the number of samples per gradient update.
+    filter_sizes    : tuple of shape (2), default = (16, 8)
+        filter sizes for CNNs in CCNN arm.
+    kernel_sizes     : two-tuple, default = (1, 1)
+        the length of the 1D convolution window for
+        CNNs in CCNN arm.
+    rnn_size        : int, default = 64
+        number of rnn units in the CCNN arm.
+    lstm_size       : int, default = 8
+        number of lstm units in the CLSTM arm.
+    dense_size      : int, default = 64
+        dimension of dense layer in CNTC.
+    random_state    : int or None, default=None
+        Seed for random number generation.
+    verbose         : boolean, default = False
+        whether to output extra information
+    loss            : string, default="mean_squared_error"
+        fit parameter for the keras model
+    optimizer       : keras.optimizer, default=keras.optimizers.Adam(),
+    metrics         : list of strings, default=["accuracy"],
+
+    Notes
+    -----
+    Adapted from the implementation from Fullah et. al
+    https://github.com/AmaduFullah/CNTC_MODEL/blob/master/cntc.ipynb
+
+    References
+    ----------
+    .. [1] Network originally defined in:
+        @article{FULLAHKAMARA202057,
+        title = {Combining contextual neural networks for time series classification},
+        journal = {Neurocomputing},
+        volume = {384},
+        pages = {57-66},
+        year = {2020},
+        issn = {0925-2312},
+        doi = {https://doi.org/10.1016/j.neucom.2019.10.113},
+        url = {https://www.sciencedirect.com/science/article/pii/S0925231219316364},
+        author = {Amadu {Fullah Kamara} and Enhong Chen and Qi Liu and Zhen Pan},
+        keywords = {Time series classification, Contextual convolutional neural
+            networks, Contextual long short-term memory, Attention, Multilayer
+            perceptron},
+       }
+    """
+
+    _tags = {
+        "authors": ["James-Large", "Withington", "TonyBagnall", "AurumnPegasus"],
+        "maintainers": ["James-Large", "Withington", "AurumnPegasus", "nilesh05apr"],
+        "python_dependencies": ["tensorflow", "keras-self-attention"],
+    }
+
+    def __init__(
+        self,
+        n_epochs=2000,
+        batch_size=16,
+        filter_sizes=(16, 8),
+        kernel_sizes=(1, 1),
+        rnn_size=64,
+        lstm_size=8,
+        dense_size=64,
+        callbacks=None,
+        verbose=False,
+        loss="mean_squared_error",
+        metrics=None,
+        random_state=0,
+    ):
+        _check_dl_dependencies(severity="error")
+
+        self.kernel_sizes = kernel_sizes  # used plural
+        self.filter_sizes = filter_sizes  # used plural
+        self.rnn_size = rnn_size
+        self.lstm_size = lstm_size
+        self.dense_size = dense_size
+        self.callbacks = callbacks
+        self.n_epochs = n_epochs
+        self.batch_size = batch_size
+        self.verbose = verbose
+        self.loss = loss
+        self.metrics = metrics
+        self.random_state = random_state
+        self._network = CNTCNetwork()
+
+        super().__init__(batch_size=batch_size, random_state=random_state)
+
+    def build_model(self, input_shape, **kwargs):
+        """Construct a compiled, un-trained, keras model that is ready for training.
+
+        In sktime, time series are stored in numpy arrays of shape (d,m), where d
+        is the number of dimensions, m is the series length. Keras/tensorflow assume
+        data is in shape (m,d). This method also assumes (m,d). Transpose should
+        happen in fit.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            The shape of the data fed into the input layer, should be (m,d)
+
+        Returns
+        -------
+        output : a compiled Keras Model
+        """
+        from tensorflow import keras
+
+        metrics = ["accuracy"] if self.metrics is None else self.metrics
+        input_layer, output_layer = self._network.build_network(input_shape, **kwargs)
+
+        output_layer = keras.layers.Dense(units=1)(output_layer)
+
+        model = keras.models.Model(inputs=input_layer, outputs=output_layer)
+        model.compile(
+            loss=self.loss,
+            optimizer=keras.optimizers.Adam(),
+            metrics=metrics,
+        )
+        return model
+
+    def prepare_input(self, X):
+        """
+        Prepare input for the CLSTM arm of the model.
+
+        According to the paper:
+            "
+                Time series data is fed into a CLSTM and CCNN networks simultaneously
+                and is perceived differently. In the CLSTM block, the input data is
+                viewed as a multivariate time series with a single time stamp. In
+                contrast, the CCNN block receives univariate data with numerous time
+                stamps
+            "
+
+        Arguments
+        ---------
+        X: tuple of shape = (series_length (m), n_dimensions (d))
+            The shape of the data fed into the model.
+
+        Returns
+        -------
+        trainX: tuple,
+            The input to be fed to the two arms of CNTC.
+        """
+        import numpy as np
+        import pandas as pd
+        from tensorflow import keras
+
+        if X.shape[2] == 1:
+            # Converting data to pandas
+            trainX1 = X.reshape([X.shape[0], X.shape[1]])
+            pd_trainX = pd.DataFrame(trainX1)
+
+            # Taking rolling window
+            window = pd_trainX.rolling(window=3).mean()
+            window = window.fillna(0)
+
+            trainX2 = np.concatenate((trainX1, window), axis=1)
+            trainX2 = keras.backend.variable(trainX2)
+            trainX2 = keras.layers.Dense(
+                trainX1.shape[1], input_shape=(trainX2.shape[1:])
+            )(trainX2)
+            trainX2 = keras.backend.eval(trainX2)
+            trainX = trainX2.reshape((trainX2.shape[0], trainX2.shape[1], 1))
+        else:
+            trainXs = []
+            for i in range(X.shape[2]):
+                trainX1 = X[:, :, i]
+                pd_trainX = pd.DataFrame(trainX1)
+
+                window = pd_trainX.rolling(window=3).mean()
+                window = window.fillna(0)
+
+                trainX2 = np.concatenate((trainX1, window), axis=1)
+                trainX2 = keras.backend.variable(trainX2)
+                trainX2 = keras.layers.Dense(
+                    trainX1.shape[1], input_shape=(trainX2.shape[1:])
+                )(trainX2)
+                trainX2 = keras.backend.eval(trainX2)
+
+                trainX = trainX2.reshape((trainX2.shape[0], trainX2.shape[1], 1))
+                trainXs.append(trainX)
+
+            trainX = np.concatenate(trainXs, axis=2)
+        return trainX
+
+    def _fit(self, X, y):
+        """Fit the regressor on the training set (X, y).
+
+        Parameters
+        ----------
+        X : np.ndarray of shape = (n_instances (n), n_dimensions (d), series_length (m))
+            The training input samples.
+        y : np.ndarray of shape n
+            The training data class labels.
+
+        Returns
+        -------
+        self : object
+        """
+        if self.callbacks is None:
+            self._callbacks = []
+        # Transpose to conform to Keras input style.
+        X = X.transpose(0, 2, 1)
+
+        check_random_state(self.random_state)
+        self.input_shape = X.shape[1:]
+        self.model_ = self.build_model(self.input_shape)
+        X2 = self.prepare_input(X)
+        if self.verbose:
+            self.model_.summary()
+        self.history = self.model_.fit(
+            [X2, X, X],
+            y,
+            batch_size=self.batch_size,
+            epochs=self.n_epochs,
+            verbose=self.verbose,
+            callbacks=self._callbacks,
+        )
+        return self
+
+    def _predict(self, X, **kwargs):
+        """Find regression estimate for all cases in X.
+
+        Parameters
+        ----------
+        X : an np.ndarray of shape = (n_instances, n_dimensions, series_length)
+            The training input samples.
+
+        Returns
+        -------
+        output : array of shape = [n_instances, n_classes] of probabilities
+        """
+        # Transpose to work correctly with keras
+        X = X.transpose((0, 2, 1))
+        X2 = self.prepare_input(X)
+        preds = self.model_.predict([X2, X, X], self.batch_size, **kwargs)
+        return preds
diff --git a/sktime/regression/deep_learning/inceptiontime.py b/sktime/regression/deep_learning/inceptiontime.py
new file mode 100644
index 00000000000..9e5e7d0559a
--- /dev/null
+++ b/sktime/regression/deep_learning/inceptiontime.py
@@ -0,0 +1,235 @@
+"""InceptionTime for Regression."""
+__author__ = "james-large"
+__all__ = ["InceptionTimeRegressor"]
+
+from copy import deepcopy
+
+from sklearn.utils import check_random_state
+
+from sktime.networks.inceptiontime import InceptionTimeNetwork
+from sktime.regression.deep_learning.base import BaseDeepRegressor
+from sktime.utils.validation._dependencies import _check_dl_dependencies
+
+
+class InceptionTimeRegressor(BaseDeepRegressor):
+    """InceptionTime Deep Learning Regressor.
+
+    Parameters
+    ----------
+    n_epochs : int, default=1500
+    batch_size : int, default=64
+        the number of samples per gradient update
+    kernel_size : int, default=40
+        specifying the length of the 1D convolution window
+    n_filters : int, default=32
+    use_residual : boolean, default=True
+    use_bottleneck : boolean, default=True
+    bottleneck_size : int, default=32
+    depth : int, default=6
+    callbacks : list of tf.keras.callbacks.Callback objects
+    random_state: int, optional, default=None
+        random seed for internal random number generator
+    verbose: boolean, default=False
+        whether to print runtime information
+    loss: str, default="mean_squared_error"
+    metrics: optional
+
+    Notes
+    -----
+    ..[1] Fawaz et. al, InceptionTime: Finding AlexNet for Time Series
+    Classification, Data Mining and Knowledge Discovery, 34, 2020
+
+    Adapted from the implementation from Fawaz et. al
+    https://github.com/hfawaz/InceptionTime/blob/master/classifiers/inception.py
+    """
+
+    _tags = {
+        # packaging info
+        # --------------
+        "authors": ["james-large"],
+        "maintainers": ["james-large", "niles05apr"],
+        # estimator type handled by parent class
+    }
+
+    def __init__(
+        self,
+        n_epochs=1500,
+        batch_size=64,
+        kernel_size=40,
+        n_filters=32,
+        use_residual=True,
+        use_bottleneck=True,
+        bottleneck_size=32,
+        depth=6,
+        callbacks=None,
+        random_state=None,
+        verbose=False,
+        loss="mean_squared_error",
+        metrics=None,
+    ):
+        _check_dl_dependencies(severity="error")
+        super().__init__()
+
+        self.verbose = verbose
+
+        # predefined
+        self.batch_size = batch_size
+        self.bottleneck_size = bottleneck_size
+        self.callbacks = callbacks
+        self.depth = depth
+        self.kernel_size = kernel_size
+        self.loss = loss
+        self.metrics = metrics
+        self.n_epochs = n_epochs
+        self.n_filters = n_filters
+        self.random_state = random_state
+        self.use_bottleneck = use_bottleneck
+        self.use_residual = use_residual
+        self.verbose = verbose
+        self._is_fitted = False
+
+        network_params = {
+            "n_filters": n_filters,
+            "use_residual": use_residual,
+            "use_bottleneck": use_bottleneck,
+            "bottleneck_size": bottleneck_size,
+            "depth": depth,
+            "kernel_size": kernel_size,
+            "random_state": random_state,
+        }
+
+        self._network = InceptionTimeNetwork(**network_params)
+
+    def build_model(self, input_shape, **kwargs):
+        """Construct a compiled, un-trained, keras model that is ready for training.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            The shape of the data fed into the input layer
+
+        Returns
+        -------
+        output : a compiled Keras Model
+        """
+        from tensorflow import keras
+
+        input_layer, output_layer = self._network.build_network(input_shape, **kwargs)
+
+        output_layer = keras.layers.Dense(1)(output_layer)
+
+        model = keras.models.Model(inputs=input_layer, outputs=output_layer)
+
+        # if user hasn't provided own metrics use accuracy
+        if self.metrics is None:
+            metrics = ["accuracy"]
+        else:
+            metrics = self.metrics
+
+        model.compile(
+            loss=self.loss,
+            optimizer=keras.optimizers.Adam(),
+            metrics=metrics,
+        )
+
+        return model
+
+    def _fit(self, X, y):
+        """Fit the regressor on the training set (X, y).
+
+        Parameters
+        ----------
+        X : np.ndarray of shape = (n_instances (n), n_dimensions (d), series_length (m))
+            The training input samples.
+        y : np.ndarray of shape n
+            The training data class labels.
+
+        Returns
+        -------
+        self : object
+        """
+        # Transpose to conform to Keras input style.
+        X = X.transpose(0, 2, 1)
+
+        check_random_state(self.random_state)
+        self.input_shape = X.shape[1:]
+        self.model_ = self.build_model(self.input_shape, self.n_classes_)
+        if self.verbose:
+            self.model_.summary()
+
+        callbacks = self._check_callbacks(self.callbacks)
+
+        self.history = self.model_.fit(
+            X,
+            y,
+            batch_size=self.batch_size,
+            epochs=self.n_epochs,
+            verbose=self.verbose,
+            callbacks=deepcopy(callbacks) if callbacks else [],
+        )
+        return self
+
+    def _check_callbacks(self, callbacks):
+        from tensorflow import keras
+
+        # if user hasn't provided a custom ReduceLROnPlateau via init already,
+        # add the default from literature
+        if callbacks is None:
+            callbacks = []
+
+        if not any(
+            isinstance(callback, keras.callbacks.ReduceLROnPlateau)
+            for callback in callbacks
+        ):
+            reduce_lr = keras.callbacks.ReduceLROnPlateau(
+                monitor="loss", factor=0.5, patience=50, min_lr=0.0001
+            )
+            callbacks = callbacks + [reduce_lr]
+        return callbacks
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+            For classifiers, a "default" set of parameters should be provided for
+            general testing, and a "results_comparison" set for comparing against
+            previously recorded results if the general set does not produce suitable
+            probabilities to compare against.
+
+        Returns
+        -------
+        params : dict or list of dict, default={}
+            Parameters to create testing instances of the class.
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`.
+        """
+        from sktime.utils.validation._dependencies import _check_soft_dependencies
+
+        param1 = {
+            "n_epochs": 10,
+            "batch_size": 4,
+        }
+
+        param2 = {
+            "n_epochs": 12,
+            "batch_size": 6,
+        }
+        test_params = [param1, param2]
+
+        if _check_soft_dependencies("keras", severity="none"):
+            from keras.callbacks import LambdaCallback
+
+            test_params.append(
+                {
+                    "n_epochs": 2,
+                    "callbacks": [LambdaCallback()],
+                }
+            )
+
+        return test_params
diff --git a/sktime/regression/deep_learning/macnn.py b/sktime/regression/deep_learning/macnn.py
new file mode 100644
index 00000000000..09fb22887de
--- /dev/null
+++ b/sktime/regression/deep_learning/macnn.py
@@ -0,0 +1,243 @@
+"""Multi-scale Attention Convolutional Neural Classifier."""
+
+__author__ = ["jnrusson1"]
+
+from copy import deepcopy
+
+from sklearn.utils import check_random_state
+
+from sktime.networks.macnn import MACNNNetwork
+from sktime.regression.deep_learning.base import BaseDeepRegressor
+from sktime.utils.validation._dependencies import _check_dl_dependencies
+
+
+class MACNNRegressor(BaseDeepRegressor):
+    """Multi-Scale Attention Convolutional Neural Regressor, as described in [1]_.
+
+    Parameters
+    ----------
+    n_epochs : int, optional (default=1500)
+        The number of epochs to train the model.
+    batch_size : int, optional (default=4)
+        The number of sample per gradient update.
+    padding : str, optional (default="same")
+        The type of padding to be provided in MACNN Blocks. Accepts
+        all the string values that keras.layers supports.
+    pool_size : int, optional (default=3)
+        A single value representing pooling windows which are applied
+        between two MACNN Blocks.
+    strides : int, optional (default=2)
+        A single value representing strides to be taken during the
+        pooling operation.
+    repeats : int, optional (default=2)
+        The number of MACNN Blocks to be stacked.
+    filter_sizes : tuple, optional (default=(64, 128, 256))
+        The input size of Conv1D layers within each MACNN Block.
+    kernel_size : tuple, optional (default=(3, 6, 12))
+        The output size of Conv1D layers within each MACNN Block.
+    reduction : int, optional (default = 16)
+        The factor by which the first dense layer of a MACNN Block will be divided by.
+    loss : str, optional (default="mean_squared_error")
+        The name of the loss function to be used during training,
+        should be supported by keras.
+    use_bias : bool, optional (default=True)
+        Whether bias should be included in the output layer.
+    metrics : None or string, optional (default=None)
+        The string which will be used during model compilation. If left as None,
+        then "accuracy" is passed to `model.compile()`.
+    optimizer: None or keras.optimizers.Optimizer instance, optional (default=None)
+        The optimizer that is used for model compiltation. If left as None,
+        then `keras.optimizers.Adam(learning_rate=0.0001)` is used.
+    callbacks : None or list of keras.callbacks.Callback, optional (default=None)
+        The callback(s) to use during training.
+    random_state : int, optional (default=0)
+        The seed to any random action.
+    verbose : bool, optional (default=False)
+        Verbosity during model training, making it `True` will
+        print model summary, training information etc.
+
+    References
+    ----------
+    .. [1] Wei Chen et. al, Multi-scale Attention Convolutional
+    Neural Network for time series classification,
+    Neural Networks, Volume 136, 2021, Pages 126-140, ISSN 0893-6080,
+    https://doi.org/10.1016/j.neunet.2021.01.001.
+    """
+
+    _tags = {
+        # packaging info
+        # --------------
+        "authors": ["jnrusson1"],
+        "maintainers": ["jnrusson1", "nilesh05apr"],
+        "python_dependencies": "tensorflow",
+        # estimator type handled by parent class
+    }
+
+    def __init__(
+        self,
+        n_epochs=1500,
+        batch_size=4,
+        padding="same",
+        pool_size=3,
+        strides=2,
+        repeats=2,
+        filter_sizes=(64, 128, 256),
+        kernel_size=(3, 6, 12),
+        reduction=16,
+        loss="mean_squared_error",
+        activation="sigmoid",
+        use_bias=True,
+        metrics=None,
+        optimizer=None,
+        callbacks=None,
+        random_state=0,
+        verbose=False,
+    ):
+        _check_dl_dependencies(severity="error")
+        super().__init__()
+
+        self.n_epochs = n_epochs
+        self.batch_size = batch_size
+        self.padding = padding
+        self.pool_size = pool_size
+        self.strides = strides
+        self.repeats = repeats
+        self.filter_sizes = filter_sizes
+        self.kernel_size = kernel_size
+        self.reduction = reduction
+        self.loss = loss
+        self.activation = activation
+        self.use_bias = use_bias
+        self.metrics = metrics
+        self.optimizer = optimizer
+        self.callbacks = callbacks
+        self.random_state = random_state
+        self.verbose = verbose
+        self.history = None
+        self._network = MACNNNetwork(
+            padding=self.padding,
+            pool_size=self.pool_size,
+            strides=self.strides,
+            repeats=self.repeats,
+            filter_sizes=self.filter_sizes,
+            kernel_size=self.kernel_size,
+            reduction=self.reduction,
+            random_state=self.random_state,
+        )
+
+    def build_model(self, input_shape, **kwargs):
+        """Construct a compiled, un-trained, keras model that is ready for training.
+
+        In sktime, time series are stored in numpy arrays of shape (d,m), where d
+        is the number of dimensions, m is the series length. Keras/tensorflow assume
+        data is in shape (m,d). This method also assumes (m,d). Transpose should
+        happen in fit.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            The shape of the data fed into the input layer, should be (m,d)
+
+        Returns
+        -------
+        output : a compiled Keras Model
+        """
+        import tensorflow as tf
+        from tensorflow import keras
+
+        tf.random.set_seed(self.random_state)
+
+        metrics = ["accuracy"] if self.metrics is None else self.metrics
+
+        input_layer, output_layer = self._network.build_network(input_shape, **kwargs)
+
+        output_layer = keras.layers.Dense(units=1, use_bias=self.use_bias)(output_layer)
+
+        self.optimizer_ = (
+            keras.optimizers.Adam(learning_rate=0.0001)
+            if self.optimizer is None
+            else self.optimizer
+        )
+
+        model = keras.models.Model(inputs=input_layer, outputs=output_layer)
+        model.compile(
+            loss=self.loss,
+            optimizer=self.optimizer_,
+            metrics=metrics,
+        )
+
+        return model
+
+    def _fit(self, X, y):
+        """Fit the regressor on the training set (X, y).
+
+        Parameters
+        ----------
+        X : np.ndarray of shape = (n_instances (n), n_dimensions (d), series_length (m))
+            The training input samples.
+        y : np.ndarray of shape n
+            The training data class labels.
+
+        Returns
+        -------
+        self : object
+        """
+        X = X.transpose(0, 2, 1)
+
+        check_random_state(self.random_state)
+        self.input_shape = X.shape[1:]
+        self.model_ = self.build_model(self.input_shape)
+        self.callbacks_ = deepcopy(self.callbacks)
+
+        if self.verbose:
+            self.model_.summary()
+
+        self.history = self.model_.fit(
+            X,
+            y,
+            batch_size=self.batch_size,
+            epochs=self.n_epochs,
+            verbose=self.verbose,
+            callbacks=self.callbacks_,
+        )
+
+        return self
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, optional (default="default")
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+            For classifiers, a "default" set of parameters should be provided for
+            general testing, and a "results_comparison" set for comparing against
+            previously recorded results if the general set does not produce suitable
+            probabilities to compare against.
+
+        Returns
+        -------
+        params : dict or list of dict
+            Parameters to create testing instances of the class.
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`.
+        """
+        params1 = {
+            "n_epochs": 5,
+            "batch_size": 3,
+            "filter_sizes": (2, 4, 8),
+            "repeats": 1,
+        }
+
+        params2 = {
+            "n_epochs": 1,
+            "filter_sizes": (1, 2, 4),
+            "reduction": 8,
+            "repeats": 1,
+            "random_state": 1,
+        }
+
+        return [params1, params2]
diff --git a/sktime/tests/_config.py b/sktime/tests/_config.py
index d8c7c7ff915..144c8adcd72 100644
--- a/sktime/tests/_config.py
+++ b/sktime/tests/_config.py
@@ -47,6 +47,10 @@
     "ResNetRegressor",
     "FCNRegressor",
     "LSTMFCNRegressor",
+    "MACNNRegressor",
+    "InceptionTimeRegressor",
+    "CNTCClassifier",
+    "CNTCRegressor",
 ]
 
 
@@ -156,6 +160,15 @@
     "FCNRegressor": [
         "test_fit_idempotent",
     ],
+    "MACNNRegressor": [
+        "test_fit_idempotent",
+    ],
+    "InceptionTimeRegressor": [
+        "test_fit_idempotent",
+    ],
+    "CNTCRegressor": [
+        "test_fit_idempotent",
+    ],
     # sth is not quite right with the RowTransformer-s changing state,
     #   but these are anyway on their path to deprecation, see #2370
     "SeriesToPrimitivesRowTransformer": ["test_methods_do_not_change_state"],