Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search near init params and some mini fixes #985

Merged
merged 6 commits into from
Nov 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions examples/simple/pipeline_tune.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from copy import deepcopy

import numpy as np
from sklearn.metrics import roc_auc_score as roc_auc

Expand Down Expand Up @@ -34,15 +36,15 @@ def pipeline_tuning(pipeline: Pipeline, train_data: InputData,
:return several_iter_scores_test: list with metrics
"""
several_iter_scores_test = []
tuner = TunerBuilder(train_data.task) \
.with_tuner(PipelineTuner) \
.with_metric(ClassificationMetricsEnum.ROCAUC) \
.with_iterations(tuner_iter_num) \
.build(train_data)
for iteration in range(local_iter):
print(f'current local iteration {iteration}')

# Pipeline tuning
tuner = TunerBuilder(train_data.task)\
.with_tuner(PipelineTuner)\
.with_metric(ClassificationMetricsEnum.ROCAUC)\
.with_iterations(tuner_iter_num) \
.build(train_data)
tuned_pipeline = tuner.tune(pipeline)

# After tuning prediction
Expand All @@ -54,8 +56,8 @@ def pipeline_tuning(pipeline: Pipeline, train_data: InputData,
y_score=after_tuning_predicted.predict)
several_iter_scores_test.append(aft_tun_roc_auc)

mean_metric = float(np.mean(several_iter_scores_test))
return mean_metric, several_iter_scores_test
max_metric = float(np.max(several_iter_scores_test))
return max_metric, several_iter_scores_test


if __name__ == '__main__':
Expand All @@ -78,6 +80,6 @@ def pipeline_tuning(pipeline: Pipeline, train_data: InputData,
local_iter=local_iter)

print(f'Several test scores {several_iter_scores_test}')
print(f'Mean test score over {local_iter} iterations: {after_tune_roc_auc}')
print(round(bfr_tun_roc_auc, 3))
print(round(after_tune_roc_auc, 3))
print(f'Maximal test score over {local_iter} iterations: {after_tune_roc_auc}')
print(f'ROC-AUC before tuning {round(bfr_tun_roc_auc, 3)}')
print(f'ROC-AUC after tuning {round(after_tune_roc_auc, 3)}')
2 changes: 1 addition & 1 deletion fedot/api/api_utils/api_composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def tune_final_pipeline(self, task: Task,
self.log.message('Hyperparameters tuning finished')
else:
self.log.message(f'Time for pipeline composing was {str(self.timer.composing_spend_time)}.\n'
f'The remaining {max(0, timeout_for_tuning)} seconds are not enough '
f'The remaining {max(0, round(timeout_for_tuning, 1))} seconds are not enough '
f'to tune the hyperparameters.')
self.log.message('Composed pipeline returned without tuning.')
tuned_pipeline = pipeline_gp_composed
Expand Down
2 changes: 1 addition & 1 deletion fedot/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def fit(self,
self.current_pipeline.preprocessor = merge_preprocessors(self.data_processor.preprocessor,
self.current_pipeline.preprocessor)

self.params.api_params['logger'].message(f'Final pipeline: {str(self.current_pipeline)}')
self.params.api_params['logger'].message(f'Final pipeline: {self.current_pipeline.structure}')

return self.current_pipeline

Expand Down
1 change: 1 addition & 0 deletions fedot/core/optimisers/objective/data_objective_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def evaluate(self, graph: Pipeline) -> Fitness:
self._log.debug(f'Pipeline {graph_id} with evaluated metrics: {folds_metrics}')
else:
folds_metrics = None

return to_fitness(folds_metrics, self._objective.is_multi_objective)

def prepare_graph(self, graph: Pipeline, train_data: InputData,
Expand Down
5 changes: 3 additions & 2 deletions fedot/core/optimisers/populational_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ def _update_population(self, next_population: PopulationT, label: Optional[str]

self.log.info(f'Generation num: {self.current_generation_num}')
self.log.info(f'Best individuals: {str(self.generations)}')
self.log.info(f'no improvements for {self.generations.stagnation_iter_count} iterations')
self.log.info(f'spent time: {round(self.timer.minutes_from_start, 1)} min')
if self.generations.stagnation_iter_count > 0:
self.log.info(f'no improvements for {self.generations.stagnation_iter_count} iterations')
self.log.info(f'spent time: {round(self.timer.minutes_from_start, 1)} min')

def _log_to_history(self, population: PopulationT, label: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None):
Expand Down
3 changes: 1 addition & 2 deletions fedot/core/pipelines/node.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from dataclasses import dataclass
from typing import Any, List, Optional, Tuple, Union, Iterable
from typing import Any, List, Optional, Tuple, Union

import numpy as np

from fedot.core.dag.graph_node import GraphNode
from fedot.core.dag.linked_graph_node import LinkedGraphNode
from fedot.core.data.data import InputData, OutputData
from fedot.core.data.merge.data_merger import DataMerger
Expand Down
14 changes: 11 additions & 3 deletions fedot/core/pipelines/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,14 +337,22 @@ def _assign_data_to_nodes(self, input_data: Union[InputData, MultiModalData]) ->
return None
return input_data

@property
def structure(self) -> str:
""" Structural information about the pipeline

Returns:
string with pipeline structure
"""
return '\n'.join([str(self), *(f'{node.operation.operation_type} - {node.parameters}' for node in self.nodes)])

def print_structure(self):
""" Prints structural information about the pipeline
""" Prints structure of the pipeline
"""

print(
'Pipeline structure:',
self,
*(f'{node.operation.operation_type} - {node.parameters}' for node in self.nodes),
self.structure,
sep='\n'
)

Expand Down
28 changes: 16 additions & 12 deletions fedot/core/pipelines/tuning/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ def get_parameters_dict(self):
'n_clusters': (hp.uniformint, [2, 7])
},
'adareg': {
'n_estimators': (hp.choice, [[100]]),

'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
'loss': (hp.choice, [["linear", "square", "exponential"]])
},
'gbr': {
'n_estimators': (hp.choice, [[100]]),

'loss': (hp.choice, [["ls", "lad", "huber", "quantile"]]),
'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
'max_depth': (hp.uniformint, [1, 11]),
Expand All @@ -43,7 +43,6 @@ def get_parameters_dict(self):
'C': (hp.uniform, [1e-2, 10.0])
},
'rf': {
'n_estimators': (hp.choice, [[100]]),
'criterion': (hp.choice, [["gini", "entropy"]]),
'max_features': (hp.uniform, [0.05, 1.0]),
'min_samples_split': (hp.uniformint, [2, 10]),
Expand All @@ -57,22 +56,22 @@ def get_parameters_dict(self):
'alpha': (hp.uniform, [0.01, 10.0])
},
'rfr': {
'n_estimators': (hp.choice, [[100]]),

'max_features': (hp.uniform, [0.05, 1.0]),
'min_samples_split': (hp.uniformint, [2, 21]),
'min_samples_leaf': (hp.uniformint, [1, 21]),
'bootstrap': (hp.choice, [[True, False]])
},
'xgbreg': {
'n_estimators': (hp.choice, [[100]]),

'max_depth': (hp.uniformint, [1, 11]),
'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
'subsample': (hp.uniform, [0.05, 1.0]),
'min_child_weight': (hp.uniformint, [1, 21]),
'objective': (hp.choice, [['reg:squarederror']])
},
'xgboost': {
'n_estimators': (hp.choice, [[100]]),

'max_depth': (hp.uniformint, [1, 7]),
'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]),
'subsample': (hp.uniform, [0.05, 0.99]),
Expand All @@ -90,7 +89,7 @@ def get_parameters_dict(self):
'min_samples_leaf': (hp.uniformint, [1, 21])
},
'treg': {
'n_estimators': (hp.choice, [[100]]),

'max_features': (hp.uniform, [0.05, 1.0]),
'min_samples_split': (hp.uniformint, [2, 21]),
'min_samples_leaf': (hp.uniformint, [1, 21]),
Expand Down Expand Up @@ -346,11 +345,7 @@ def get_node_params(self, node_id, operation_name):
else:
params_dict = {}
for parameter_name in params_list:
# Name with operation and parameter
op_parameter_name = ''.join((operation_name, ' | ', parameter_name))

# Name with node id || operation | parameter
node_op_parameter_name = ''.join((str(node_id), ' || ', op_parameter_name))
node_op_parameter_name = get_node_operation_parameter_label(node_id, operation_name, parameter_name)

# For operation get range where search can be done
space = self.get_operation_parameter_range(operation_name=operation_name,
Expand All @@ -362,6 +357,15 @@ def get_node_params(self, node_id, operation_name):
return params_dict


def get_node_operation_parameter_label(node_id: int, operation_name: str, parameter_name: str) -> str:
# Name with operation and parameter
op_parameter_name = ''.join((operation_name, ' | ', parameter_name))

# Name with node id || operation | parameter
node_op_parameter_name = ''.join((str(node_id), ' || ', op_parameter_name))
return node_op_parameter_name


def convert_params(params):
"""
Function removes labels from dictionary with operations
Expand Down
12 changes: 9 additions & 3 deletions fedot/core/pipelines/tuning/tuner_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def get_metric_value(self, pipeline: Pipeline) -> float:
Returns:
value of loss function
"""
pipeline.unfit()
pipeline_fitness = self.objective_evaluate.evaluate(pipeline)
metric_value = pipeline_fitness.value
if not pipeline_fitness.valid:
Expand All @@ -87,6 +88,8 @@ def init_check(self, pipeline: Pipeline) -> None:
self.init_pipeline = deepcopy(pipeline)

self.init_metric = self.get_metric_value(pipeline=self.init_pipeline)
self.log.message(f'Initial pipeline: {self.init_pipeline.structure} \n'
f'Initial metric: {abs(self.init_metric):.3f}')

def final_check(self, tuned_pipeline: Pipeline):
"""
Expand All @@ -111,13 +114,16 @@ def final_check(self, tuned_pipeline: Pipeline):
init_metric = self.init_metric + deviation * np.sign(self.init_metric)
if self.obtained_metric is None:
self.log.info(f'{prefix_init_phrase} is None. Initial metric is {abs(init_metric):.3f}')
return self.init_pipeline
final_pipeline = self.init_pipeline

elif self.obtained_metric <= init_metric:
self.log.info(f'{prefix_tuned_phrase} {abs(self.obtained_metric):.3f} equal or '
f'better than initial (+ 5% deviation) {abs(init_metric):.3f}')
return tuned_pipeline
final_pipeline = tuned_pipeline
else:
self.log.info(f'{prefix_init_phrase} {abs(self.obtained_metric):.3f} '
f'worse than initial (+ 5% deviation) {abs(init_metric):.3f}')
return self.init_pipeline
final_pipeline = self.init_pipeline
self.log.message(f'Final pipeline: {final_pipeline.structure} \n'
f'Final metric: {abs(self.obtained_metric):.3f}')
return final_pipeline
75 changes: 65 additions & 10 deletions fedot/core/pipelines/tuning/unified.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from functools import partial
from typing import Tuple

from hyperopt import fmin, space_eval
from hyperopt import fmin, space_eval, hp, Trials

from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.tuning.search_space import convert_params
from fedot.core.pipelines.tuning.search_space import convert_params, get_node_operation_parameter_label
from fedot.core.pipelines.tuning.tuner_interface import HyperoptTuner


Expand All @@ -18,21 +19,37 @@ def tune(self, pipeline: Pipeline, show_progress: bool = True) -> Pipeline:
:param pipeline: Pipeline which hyperparameters will be tuned
:param show_progress: shows progress of tuning if true
"""
parameters_dict = self._get_parameters_for_tune(pipeline)

# Check source metrics for data
parameters_dict, init_parameters, is_init_params_full = self._get_parameters_for_tune(pipeline)
self.init_check(pipeline)

pipeline.replace_n_jobs_in_nodes(n_jobs=self.n_jobs)

trials = Trials()

# try searching using initial parameters (uses original search space with fixed initial parameters)
try_initial_parameters = init_parameters and self.iterations > 1

if try_initial_parameters:
trials, init_trials_num = self._search_near_initial_parameters(pipeline, init_parameters,
is_init_params_full, trials,
Comment on lines +31 to +35
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Тут нужен какой-то комментарий, поясняющий что происходит. А то потом сложно будет вспомнить, зачем это.

show_progress)

best = fmin(partial(self._objective, pipeline=pipeline),
parameters_dict,
trials=trials,
algo=self.algo,
max_evals=self.iterations,
show_progressbar=show_progress,
early_stop_fn=self.early_stop_fn,
timeout=self.max_seconds)

# check if best point was obtained using search space with fixed initial parameters
if try_initial_parameters:
is_best_trial_with_init_params = trials.best_trial.get('tid') in range(init_trials_num)
# replace search space
parameters_dict = init_parameters if is_best_trial_with_init_params else parameters_dict

best = space_eval(space=parameters_dict, hp_assignment=best)

tuned_pipeline = self.set_arg_pipeline(pipeline=pipeline,
Expand All @@ -43,14 +60,34 @@ def tune(self, pipeline: Pipeline, show_progress: bool = True) -> Pipeline:

return final_pipeline

def _get_parameters_for_tune(self, pipeline: Pipeline) -> dict:
def _search_near_initial_parameters(self, pipeline: Pipeline, initial_parameters: dict,
is_init_parameters_full: bool, trials: Trials,
show_progress: bool = True):
if self.iterations >= 10 and not is_init_parameters_full:
init_trials_num = min(int(self.iterations * 0.1), 10)
else:
init_trials_num = 1

# fmin updates trials with evaluation points tried out during the call
fmin(partial(self._objective, pipeline=pipeline),
initial_parameters,
trials=trials,
algo=self.algo,
max_evals=init_trials_num,
show_progressbar=show_progress,
early_stop_fn=self.early_stop_fn,
timeout=self.max_seconds)
Comment on lines +72 to +79
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А это так и задумано что fmin ничего не возвращает? Если да, то стоит пояснить.

return trials, init_trials_num

def _get_parameters_for_tune(self, pipeline: Pipeline) -> Tuple[dict, dict, bool]:
"""
Function for defining the search space

:return parameters_dict: dictionary with operation names and parameters
"""

parameters_dict = {}
initial_parameters = {}
for node_id, node in enumerate(pipeline.nodes):
operation_name = node.operation.operation_type

Expand All @@ -59,9 +96,28 @@ def _get_parameters_for_tune(self, pipeline: Pipeline) -> dict:
node_params = self.search_space.get_node_params(node_id=node_id,
operation_name=operation_name)

parameters_dict.update({node_id: node_params})

return parameters_dict
if node_params is not None:
parameters_dict.update(node_params)

tunable_node_params = self.search_space.get_operation_parameter_range(operation_name)
tunable_initial_params = {get_node_operation_parameter_label(node_id, operation_name, p):
node.parameters[p] for p in node.parameters if p in tunable_node_params}
if tunable_initial_params:
initial_parameters.update(tunable_initial_params)

# create search space with fixed initial parameters
init_params_space = {}
is_init_params_full = len(initial_parameters) == len(parameters_dict)
if initial_parameters:
for key in parameters_dict:
if key in initial_parameters:
value = initial_parameters[key]
# fix possible value for initial parameter (the value will be chosen with probability=1)
init_params_space[key] = hp.pchoice(key, [(1, value)])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Вот тут с hp.pchoice не очень понятно.

else:
init_params_space[key] = parameters_dict[key]

return parameters_dict, init_params_space, is_init_params_full

def _objective(self, parameters_dict: dict, pipeline: Pipeline) \
-> float:
Expand All @@ -76,7 +132,6 @@ def _objective(self, parameters_dict: dict, pipeline: Pipeline) \

# Set hyperparameters for every node
pipeline = self.set_arg_pipeline(pipeline=pipeline, parameters=parameters_dict)

metric_value = self.get_metric_value(pipeline=pipeline)
return metric_value

Expand All @@ -92,7 +147,7 @@ def set_arg_pipeline(pipeline: Pipeline, parameters: dict) -> Pipeline:

# Set hyperparameters for every node
for node_id, _ in enumerate(pipeline.nodes):
node_params = parameters.get(node_id)
node_params = {key: value for key, value in parameters.items() if key.startswith(str(node_id))}

if node_params is not None:
# Delete all prefix strings to get appropriate parameters names
Expand Down
Loading