aimclub · YamLyubov · Nov 24, 2022 · Nov 3, 2022 · Nov 15, 2022 · Nov 16, 2022
diff --git a/examples/simple/pipeline_tune.py b/examples/simple/pipeline_tune.py
@@ -1,3 +1,5 @@
+from copy import deepcopy
+
 import numpy as np
 from sklearn.metrics import roc_auc_score as roc_auc
 
@@ -34,15 +36,15 @@ def pipeline_tuning(pipeline: Pipeline, train_data: InputData,
     :return several_iter_scores_test: list with metrics
     """
     several_iter_scores_test = []
+    tuner = TunerBuilder(train_data.task) \
+        .with_tuner(PipelineTuner) \
+        .with_metric(ClassificationMetricsEnum.ROCAUC) \
+        .with_iterations(tuner_iter_num) \
+        .build(train_data)
     for iteration in range(local_iter):
         print(f'current local iteration {iteration}')
 
         # Pipeline tuning
-        tuner = TunerBuilder(train_data.task)\
-            .with_tuner(PipelineTuner)\
-            .with_metric(ClassificationMetricsEnum.ROCAUC)\
-            .with_iterations(tuner_iter_num) \
-            .build(train_data)
         tuned_pipeline = tuner.tune(pipeline)
 
         # After tuning prediction
@@ -54,8 +56,8 @@ def pipeline_tuning(pipeline: Pipeline, train_data: InputData,
                                   y_score=after_tuning_predicted.predict)
         several_iter_scores_test.append(aft_tun_roc_auc)
 
-    mean_metric = float(np.mean(several_iter_scores_test))
-    return mean_metric, several_iter_scores_test
+    max_metric = float(np.max(several_iter_scores_test))
+    return max_metric, several_iter_scores_test
 
 
 if __name__ == '__main__':
@@ -78,6 +80,6 @@ def pipeline_tuning(pipeline: Pipeline, train_data: InputData,
                                                                    local_iter=local_iter)
 
     print(f'Several test scores {several_iter_scores_test}')
-    print(f'Mean test score over {local_iter} iterations: {after_tune_roc_auc}')
-    print(round(bfr_tun_roc_auc, 3))
-    print(round(after_tune_roc_auc, 3))
+    print(f'Maximal test score over {local_iter} iterations: {after_tune_roc_auc}')
+    print(f'ROC-AUC before tuning {round(bfr_tun_roc_auc, 3)}')
+    print(f'ROC-AUC after tuning {round(after_tune_roc_auc, 3)}')
diff --git a/fedot/api/api_utils/api_composer.py b/fedot/api/api_utils/api_composer.py
@@ -316,7 +316,7 @@ def tune_final_pipeline(self, task: Task,
                 self.log.message('Hyperparameters tuning finished')
         else:
             self.log.message(f'Time for pipeline composing was {str(self.timer.composing_spend_time)}.\n'
-                             f'The remaining {max(0, timeout_for_tuning)} seconds are not enough '
+                             f'The remaining {max(0, round(timeout_for_tuning, 1))} seconds are not enough '
                              f'to tune the hyperparameters.')
             self.log.message('Composed pipeline returned without tuning.')
             tuned_pipeline = pipeline_gp_composed

diff --git a/fedot/api/main.py b/fedot/api/main.py
@@ -188,7 +188,7 @@ def fit(self,
         self.current_pipeline.preprocessor = merge_preprocessors(self.data_processor.preprocessor,
                                                                  self.current_pipeline.preprocessor)
 
-        self.params.api_params['logger'].message(f'Final pipeline: {str(self.current_pipeline)}')
+        self.params.api_params['logger'].message(f'Final pipeline: {self.current_pipeline.structure}')
 
         return self.current_pipeline
 

diff --git a/fedot/core/optimisers/objective/data_objective_eval.py b/fedot/core/optimisers/objective/data_objective_eval.py
@@ -89,6 +89,7 @@ def evaluate(self, graph: Pipeline) -> Fitness:
             self._log.debug(f'Pipeline {graph_id} with evaluated metrics: {folds_metrics}')
         else:
             folds_metrics = None
+
         return to_fitness(folds_metrics, self._objective.is_multi_objective)
 
     def prepare_graph(self, graph: Pipeline, train_data: InputData,

diff --git a/fedot/core/optimisers/populational_optimizer.py b/fedot/core/optimisers/populational_optimizer.py
@@ -119,8 +119,9 @@ def _update_population(self, next_population: PopulationT, label: Optional[str]
 
         self.log.info(f'Generation num: {self.current_generation_num}')
         self.log.info(f'Best individuals: {str(self.generations)}')
-        self.log.info(f'no improvements for {self.generations.stagnation_iter_count} iterations')
-        self.log.info(f'spent time: {round(self.timer.minutes_from_start, 1)} min')
+        if self.generations.stagnation_iter_count > 0:
+            self.log.info(f'no improvements for {self.generations.stagnation_iter_count} iterations')
+            self.log.info(f'spent time: {round(self.timer.minutes_from_start, 1)} min')
 
     def _log_to_history(self, population: PopulationT, label: Optional[str] = None,
                         metadata: Optional[Dict[str, Any]] = None):

diff --git a/fedot/core/pipelines/node.py b/fedot/core/pipelines/node.py
@@ -1,9 +1,8 @@
 from dataclasses import dataclass
-from typing import Any, List, Optional, Tuple, Union, Iterable
+from typing import Any, List, Optional, Tuple, Union
 
 import numpy as np
 
-from fedot.core.dag.graph_node import GraphNode
 from fedot.core.dag.linked_graph_node import LinkedGraphNode
 from fedot.core.data.data import InputData, OutputData
 from fedot.core.data.merge.data_merger import DataMerger

diff --git a/fedot/core/pipelines/pipeline.py b/fedot/core/pipelines/pipeline.py
@@ -337,14 +337,22 @@ def _assign_data_to_nodes(self, input_data: Union[InputData, MultiModalData]) ->
             return None
         return input_data
 
+    @property
+    def structure(self) -> str:
+        """ Structural information about the pipeline
+
+            Returns:
+                string with pipeline structure
+        """
+        return '\n'.join([str(self), *(f'{node.operation.operation_type} - {node.parameters}' for node in self.nodes)])
+
     def print_structure(self):
-        """ Prints structural information about the pipeline
+        """ Prints structure of the pipeline
         """
 
         print(
             'Pipeline structure:',
-            self,
-            *(f'{node.operation.operation_type} - {node.parameters}' for node in self.nodes),
+            self.structure,
             sep='\n'
         )
 

diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py
@@ -24,12 +24,12 @@ def get_parameters_dict(self):
                 'n_clusters': (hp.uniformint, [2, 7])
             },
             'adareg': {
-                'n_estimators': (hp.choice, [[100]]),
+
                 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
                 'loss': (hp.choice, [["linear", "square", "exponential"]])
             },
             'gbr': {
-                'n_estimators': (hp.choice, [[100]]),
+
                 'loss': (hp.choice, [["ls", "lad", "huber", "quantile"]]),
                 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
                 'max_depth': (hp.uniformint, [1, 11]),
@@ -43,7 +43,6 @@ def get_parameters_dict(self):
                 'C': (hp.uniform, [1e-2, 10.0])
             },
             'rf': {
-                'n_estimators': (hp.choice, [[100]]),
                 'criterion': (hp.choice, [["gini", "entropy"]]),
                 'max_features': (hp.uniform, [0.05, 1.0]),
                 'min_samples_split': (hp.uniformint, [2, 10]),
@@ -57,22 +56,22 @@ def get_parameters_dict(self):
                 'alpha': (hp.uniform, [0.01, 10.0])
             },
             'rfr': {
-                'n_estimators': (hp.choice, [[100]]),
+
                 'max_features': (hp.uniform, [0.05, 1.0]),
                 'min_samples_split': (hp.uniformint, [2, 21]),
                 'min_samples_leaf': (hp.uniformint, [1, 21]),
                 'bootstrap': (hp.choice, [[True, False]])
             },
             'xgbreg': {
-                'n_estimators': (hp.choice, [[100]]),
+
                 'max_depth': (hp.uniformint, [1, 11]),
                 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
                 'subsample': (hp.uniform, [0.05, 1.0]),
                 'min_child_weight': (hp.uniformint, [1, 21]),
                 'objective': (hp.choice, [['reg:squarederror']])
             },
             'xgboost': {
-                'n_estimators': (hp.choice, [[100]]),
+
                 'max_depth': (hp.uniformint, [1, 7]),
                 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
                 'subsample': (hp.uniform, [0.05, 0.99]),
@@ -90,7 +89,7 @@ def get_parameters_dict(self):
                 'min_samples_leaf': (hp.uniformint, [1, 21])
             },
             'treg': {
-                'n_estimators': (hp.choice, [[100]]),
+
                 'max_features': (hp.uniform, [0.05, 1.0]),
                 'min_samples_split': (hp.uniformint, [2, 21]),
                 'min_samples_leaf': (hp.uniformint, [1, 21]),
@@ -346,11 +345,7 @@ def get_node_params(self, node_id, operation_name):
         else:
             params_dict = {}
             for parameter_name in params_list:
-                # Name with operation and parameter
-                op_parameter_name = ''.join((operation_name, ' | ', parameter_name))
-
-                # Name with node id || operation | parameter
-                node_op_parameter_name = ''.join((str(node_id), ' || ', op_parameter_name))
+                node_op_parameter_name = get_node_operation_parameter_label(node_id, operation_name, parameter_name)
 
                 # For operation get range where search can be done
                 space = self.get_operation_parameter_range(operation_name=operation_name,
@@ -362,6 +357,15 @@ def get_node_params(self, node_id, operation_name):
         return params_dict
 
 
+def get_node_operation_parameter_label(node_id: int, operation_name: str, parameter_name: str) -> str:
+    # Name with operation and parameter
+    op_parameter_name = ''.join((operation_name, ' | ', parameter_name))
+
+    # Name with node id || operation | parameter
+    node_op_parameter_name = ''.join((str(node_id), ' || ', op_parameter_name))
+    return node_op_parameter_name
+
+
 def convert_params(params):
     """
     Function removes labels from dictionary with operations

diff --git a/fedot/core/pipelines/tuning/tuner_interface.py b/fedot/core/pipelines/tuning/tuner_interface.py
@@ -68,6 +68,7 @@ def get_metric_value(self, pipeline: Pipeline) -> float:
         Returns:
           value of loss function
         """
+        pipeline.unfit()
         pipeline_fitness = self.objective_evaluate.evaluate(pipeline)
         metric_value = pipeline_fitness.value
         if not pipeline_fitness.valid:
@@ -87,6 +88,8 @@ def init_check(self, pipeline: Pipeline) -> None:
         self.init_pipeline = deepcopy(pipeline)
 
         self.init_metric = self.get_metric_value(pipeline=self.init_pipeline)
+        self.log.message(f'Initial pipeline: {self.init_pipeline.structure} \n'
+                         f'Initial metric: {abs(self.init_metric):.3f}')
 
     def final_check(self, tuned_pipeline: Pipeline):
         """
@@ -111,13 +114,16 @@ def final_check(self, tuned_pipeline: Pipeline):
         init_metric = self.init_metric + deviation * np.sign(self.init_metric)
         if self.obtained_metric is None:
             self.log.info(f'{prefix_init_phrase} is None. Initial metric is {abs(init_metric):.3f}')
-            return self.init_pipeline
+            final_pipeline = self.init_pipeline
 
         elif self.obtained_metric <= init_metric:
             self.log.info(f'{prefix_tuned_phrase} {abs(self.obtained_metric):.3f} equal or '
                           f'better than initial (+ 5% deviation) {abs(init_metric):.3f}')
-            return tuned_pipeline
+            final_pipeline = tuned_pipeline
         else:
             self.log.info(f'{prefix_init_phrase} {abs(self.obtained_metric):.3f} '
                           f'worse than initial (+ 5% deviation) {abs(init_metric):.3f}')
-            return self.init_pipeline
+            final_pipeline = self.init_pipeline
+        self.log.message(f'Final pipeline: {final_pipeline.structure} \n'
+                         f'Final metric: {abs(self.obtained_metric):.3f}')
+        return final_pipeline
diff --git a/fedot/core/pipelines/tuning/unified.py b/fedot/core/pipelines/tuning/unified.py
@@ -1,9 +1,10 @@
 from functools import partial
+from typing import Tuple
 
-from hyperopt import fmin, space_eval
+from hyperopt import fmin, space_eval, hp, Trials
 
 from fedot.core.pipelines.pipeline import Pipeline
-from fedot.core.pipelines.tuning.search_space import convert_params
+from fedot.core.pipelines.tuning.search_space import convert_params, get_node_operation_parameter_label
 from fedot.core.pipelines.tuning.tuner_interface import HyperoptTuner
 
 
@@ -18,21 +19,37 @@ def tune(self, pipeline: Pipeline, show_progress: bool = True) -> Pipeline:
         :param pipeline: Pipeline which hyperparameters will be tuned
         :param show_progress: shows progress of tuning if true
         """
-        parameters_dict = self._get_parameters_for_tune(pipeline)
 
-        # Check source metrics for data
+        parameters_dict, init_parameters, is_init_params_full = self._get_parameters_for_tune(pipeline)
         self.init_check(pipeline)
 
         pipeline.replace_n_jobs_in_nodes(n_jobs=self.n_jobs)
 
+        trials = Trials()
+
+        # try searching using initial parameters (uses original search space with fixed initial parameters)
+        try_initial_parameters = init_parameters and self.iterations > 1
+
+        if try_initial_parameters:
+            trials, init_trials_num = self._search_near_initial_parameters(pipeline, init_parameters,
+                                                                           is_init_params_full, trials,
+                                                                           show_progress)
+
         best = fmin(partial(self._objective, pipeline=pipeline),
                     parameters_dict,
+                    trials=trials,
                     algo=self.algo,
                     max_evals=self.iterations,
                     show_progressbar=show_progress,
                     early_stop_fn=self.early_stop_fn,
                     timeout=self.max_seconds)
 
+        # check if best point was obtained using search space with fixed initial parameters
+        if try_initial_parameters:
+            is_best_trial_with_init_params = trials.best_trial.get('tid') in range(init_trials_num)
+            # replace search space
+            parameters_dict = init_parameters if is_best_trial_with_init_params else parameters_dict
+
         best = space_eval(space=parameters_dict, hp_assignment=best)
 
         tuned_pipeline = self.set_arg_pipeline(pipeline=pipeline,
@@ -43,14 +60,34 @@ def tune(self, pipeline: Pipeline, show_progress: bool = True) -> Pipeline:
 
         return final_pipeline
 
-    def _get_parameters_for_tune(self, pipeline: Pipeline) -> dict:
+    def _search_near_initial_parameters(self, pipeline: Pipeline, initial_parameters: dict,
+                                        is_init_parameters_full: bool, trials: Trials,
+                                        show_progress: bool = True):
+        if self.iterations >= 10 and not is_init_parameters_full:
+            init_trials_num = min(int(self.iterations * 0.1), 10)
+        else:
+            init_trials_num = 1
+
+        # fmin updates trials with evaluation points tried out during the call
+        fmin(partial(self._objective, pipeline=pipeline),
+             initial_parameters,
+             trials=trials,
+             algo=self.algo,
+             max_evals=init_trials_num,
+             show_progressbar=show_progress,
+             early_stop_fn=self.early_stop_fn,
+             timeout=self.max_seconds)
+        return trials, init_trials_num
+
+    def _get_parameters_for_tune(self, pipeline: Pipeline) -> Tuple[dict, dict, bool]:
         """
         Function for defining the search space
 
         :return parameters_dict: dictionary with operation names and parameters
         """
 
         parameters_dict = {}
+        initial_parameters = {}
         for node_id, node in enumerate(pipeline.nodes):
             operation_name = node.operation.operation_type
 
@@ -59,9 +96,28 @@ def _get_parameters_for_tune(self, pipeline: Pipeline) -> dict:
             node_params = self.search_space.get_node_params(node_id=node_id,
                                                             operation_name=operation_name)
 
-            parameters_dict.update({node_id: node_params})
-
-        return parameters_dict
+            if node_params is not None:
+                parameters_dict.update(node_params)
+
+            tunable_node_params = self.search_space.get_operation_parameter_range(operation_name)
+            tunable_initial_params = {get_node_operation_parameter_label(node_id, operation_name, p):
+                                      node.parameters[p] for p in node.parameters if p in tunable_node_params}
+            if tunable_initial_params:
+                initial_parameters.update(tunable_initial_params)
+
+        # create search space with fixed initial parameters
+        init_params_space = {}
+        is_init_params_full = len(initial_parameters) == len(parameters_dict)
+        if initial_parameters:
+            for key in parameters_dict:
+                if key in initial_parameters:
+                    value = initial_parameters[key]
+                    # fix possible value for initial parameter (the value will be chosen with probability=1)
+                    init_params_space[key] = hp.pchoice(key, [(1, value)])
+                else:
+                    init_params_space[key] = parameters_dict[key]
+
+        return parameters_dict, init_params_space, is_init_params_full
 
     def _objective(self, parameters_dict: dict, pipeline: Pipeline) \
             -> float:
@@ -76,7 +132,6 @@ def _objective(self, parameters_dict: dict, pipeline: Pipeline) \
 
         # Set hyperparameters for every node
         pipeline = self.set_arg_pipeline(pipeline=pipeline, parameters=parameters_dict)
-
         metric_value = self.get_metric_value(pipeline=pipeline)
         return metric_value
 
@@ -92,7 +147,7 @@ def set_arg_pipeline(pipeline: Pipeline, parameters: dict) -> Pipeline:
 
         # Set hyperparameters for every node
         for node_id, _ in enumerate(pipeline.nodes):
-            node_params = parameters.get(node_id)
+            node_params = {key: value for key, value in parameters.items() if key.startswith(str(node_id))}
 
             if node_params is not None:
                 # Delete all prefix strings to get appropriate parameters names