Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Edit yaml task #1538

Merged
merged 2 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 49 additions & 4 deletions qlib/finco/task.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os

from pathlib import Path
from typing import Any, List
import io
from typing import Any, List, Union
from jinja2 import Template
import ruamel.yaml as yaml
import abc
import re
import logging
Expand Down Expand Up @@ -207,7 +209,11 @@ def execute(self):
new_task = []
# 1) create a workspace
# TODO: we have to make choice between `sl` and `sl-cfg`
new_task.append(CMDTask(cmd_intention=f"Copy folder from {get_tpl_path() / 'sl'} to {self._context_manager.get_context('workspace')}"))
new_task.append(
CMDTask(
cmd_intention=f"Copy folder from {get_tpl_path() / 'sl'} to {self._context_manager.get_context('workspace')}"
)
)

# 2) CURD on the workspace
for name, regex in regex_dict.items():
Expand Down Expand Up @@ -249,6 +255,7 @@ class CMDTask(ActionTask):
"""
This CMD task is responsible for ensuring compatibility across different operating systems.
"""

__DEFAULT_WORKFLOW_SYSTEM_PROMPT = """
You are an expert system administrator.
Your task is to convert the user's intention into a specific runnable command for a particular system.
Expand All @@ -271,8 +278,9 @@ def __init__(self, cmd_intention: str, cwd=None):
self._output = None

def execute(self):
prompt = Template(self.__DEFAULT_WORKFLOW_USER_PROMPT).render(cmd_intention=self.cmd_intention,
user_os=platform.system())
prompt = Template(self.__DEFAULT_WORKFLOW_USER_PROMPT).render(
cmd_intention=self.cmd_intention, user_os=platform.system()
)
response = APIBackend().build_messages_and_create_chat_completion(prompt, self.__DEFAULT_WORKFLOW_SYSTEM_PROMPT)
self._output = subprocess.check_output(response, shell=True, cwd=self.cwd)
return []
Expand Down Expand Up @@ -535,6 +543,43 @@ def execute(self):
return []


class YamlEditTask(ActionTask):
"""This yaml edit task will replace a specific component directly"""

def __init__(self, file: Union[str, Path], module_path: str, updated_content: str):
"""

Parameters
----------
file
a target file that needs to be modified
module_path
the path to the section that needs to be replaced with `updated_content`
updated_content
The content to replace the original content in `module_path`
"""
self.p = Path(file)
self.module_path = module_path
self.updated_content = updated_content

def execute(self):
# 1) read original and new content
with self.p.open("r") as f:
config = yaml.safe_load(f)
update_config = yaml.safe_load(io.StringIO(self.updated_content))

# 2) locate the module
focus = config
module_list = self.module_path.split(".")
for k in module_list[:-1]:
focus = focus[k]

# 3) replace the module and save
focus[module_list[-1]] = update_config
with self.p.open("w") as f:
yaml.dump(config, f)


class SummarizeTask(Task):
__DEFAULT_WORKSPACE = "./"

Expand Down
1 change: 0 additions & 1 deletion qlib/finco/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def __init__(self, workspace=None) -> None:
self._context.set_context("workspace", self._workspace)
self.default_user_prompt = "Please help me build a low turnover strategy that focus more on longterm return in China a stock market. I want to construct a new dataset covers longer history"


def _confirm_and_rm(self):
# if workspace exists, please confirm and remove it. Otherwise exit.
if self._workspace.exists():
Expand Down
44 changes: 40 additions & 4 deletions tests/finco/test_cfg.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,71 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import unittest
import shutil
import difflib
from qlib.finco.tpl import get_tpl_path
import ruamel.yaml as yaml

from qlib.data.dataset.handler import DataHandlerLP
from qlib.utils import init_instance_by_config
from qlib.tests import TestAutoData

from pathlib import Path
from qlib.finco.tpl import get_tpl_path
from qlib.finco.task import YamlEditTask

DIRNAME = Path(__file__).absolute().resolve().parent

class FincoTpl(TestAutoData):

class FincoTpl(TestAutoData):
def test_tpl_consistence(self):
"""Motivation: make sure the configuable template is consistent with the default config"""
tpl_p = get_tpl_path()
with (tpl_p / "sl" / "workflow_config.yaml").open("rb") as fp:
config = yaml.safe_load(fp)
# init_data_handler
hd: DataHandlerLP = init_instance_by_config(config["task"]["dataset"]["kwargs"]["handler"])
# NOTE: The config in workflow_config_ds.yaml is generated by the following code:
# NOTE: The config in workflow_config.yaml is generated by the following code:
# dump in yaml format to file without auto linebreak
# print(yaml.dump(hd.data_loader.fields, width=10000, stream=open("_tmp", "w")))

with (tpl_p / "sl-cfg" / "workflow_config_ds.yaml").open("rb") as fp:
with (tpl_p / "sl-cfg" / "workflow_config.yaml").open("rb") as fp:
config = yaml.safe_load(fp)
hd_ds: DataHandlerLP = init_instance_by_config(config["task"]["dataset"]["kwargs"]["handler"])
self.assertEqual(hd_ds.data_loader.fields, hd.data_loader.fields)

check = hd_ds.fetch().fillna(0.) == hd.fetch().fillna(0.)
check = hd_ds.fetch().fillna(0.0) == hd.fetch().fillna(0.0)
self.assertTrue(check.all().all())

def test_update_yaml(self):
p = get_tpl_path() / "sl" / "workflow_config.yaml"
p_new = DIRNAME / "_test_config.yaml"
shutil.copy(p, p_new)
updated_content = """
class: LGBModelTest
module_path: qlib.contrib.model.gbdt
kwargs:
loss: mse
colsample_bytree: 1.8879
learning_rate: 0.3
subsample: 0.8790
lambda_l1: 205.7000
lambda_l2: 580.9769
max_depth: 9
num_leaves: 211
num_threads: 21
"""
t = YamlEditTask(p_new, "task.model", updated_content)
t.execute()
# NOTE: the formmat is changed by ruamel.yaml, so it can't be compared by text directly..
# print the diff between p and p_new with difflib
# with p.open("r") as fp:
# content = fp.read()
# with p_new.open("r") as fp:
# content_new = fp.read()
# for line in difflib.unified_diff(content, content_new, fromfile="original", tofile="new", lineterm=""):
# print(line)


if __name__ == "__main__":
unittest.main()