Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Hyperparameter tuning Workflow Template #786

Merged
merged 24 commits into from
Dec 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9695550
display tfod dataset
rushtehrani Dec 11, 2020
e79404a
display maskrcnn dataset
rushtehrani Dec 11, 2020
1862567
add hyperparam tuning workflow template
rushtehrani Dec 11, 2020
a55ba7a
add comment to exported funcs
rushtehrani Dec 11, 2020
b6e08df
minor update to tf training template
rushtehrani Dec 11, 2020
eab1c11
update default search space config
rushtehrani Dec 11, 2020
64b591d
reduce hyperparam max trials for quicker example
rushtehrani Dec 11, 2020
ad74259
revert prev migration changes
rushtehrani Dec 22, 2020
5f46623
revert prev migration changes
rushtehrani Dec 22, 2020
5caf584
update hyperparam tuning template
rushtehrani Dec 22, 2020
efea105
add select.nodepool
rushtehrani Dec 23, 2020
9f202b2
Merge branch 'master' into feat/migrations
rushtehrani Dec 23, 2020
cc5e8a0
fix indentation
rushtehrani Dec 24, 2020
cda805b
add template source as comment
rushtehrani Dec 24, 2020
f5320e6
update migration date
rushtehrani Dec 25, 2020
086a375
update template path
rushtehrani Dec 25, 2020
656e6e6
update hyperparam tuning example paths
rushtehrani Dec 25, 2020
300842a
rename hyperparam tune migration
rushtehrani Dec 25, 2020
ea8bf0a
update nni template
rushtehrani Dec 27, 2020
968444a
use nni-web-ui image
rushtehrani Dec 27, 2020
029429d
Merge branch 'feat/sys.node.pool' into feat/migrations
rushtehrani Dec 28, 2020
f60824f
fix indentation
rushtehrani Dec 28, 2020
a79f57c
Merge branch 'master' into feat/migrations
rushtehrani Dec 29, 2020
39bb30d
make updates based on feedback
rushtehrani Dec 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package migration

import (
"database/sql"
"github.com/pressly/goose"
"path/filepath"
)

const hyperparameterTuningTemplateName = "Hyperparameter Tuning Example"

func initialize20201225172926() {
if _, ok := initializedMigrations[20201225172926]; !ok {
goose.AddMigration(Up20201225172926, Down20201225172926)
initializedMigrations[20201225172926] = true
}
}

// Up20201225172926 adds Hyperparameter Tuning Workflow Template
func Up20201225172926(tx *sql.Tx) error {
// This code is executed when the migration is applied.
return createWorkflowTemplate(
filepath.Join("workflows", "hyperparam_tuning", "20201225172926.yaml"),
hyperparameterTuningTemplateName,
map[string]string{
"framework": "pytorch",
"tuner": "TPE",
"created-by": "system",
},
)
}

// Down20201225172926 archives Hyperparameter Tuning Workflow Template
func Down20201225172926(tx *sql.Tx) error {
// This code is executed when the migration is rolled back.
return archiveWorkflowTemplate(hyperparameterTuningTemplateName)
}
1 change: 1 addition & 0 deletions db/go/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ func Initialize() {
initialize20201221195937()
initialize20201223062947()
initialize20201223202929()
initialize20201225172926()
initialize20201229205644()

if err := client.DB.Close(); err != nil {
Expand Down
72 changes: 72 additions & 0 deletions db/go/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,51 @@ func updateWorkspaceTemplateManifest(filename, templateName string) error {
return nil
}

// createWorkflowTemplate will create the workflow template given by {{templateName}} with the contents
// given by {{filename}}
// It will do so for all namespaces.
func createWorkflowTemplate(filename, templateName string, labels map[string]string) error {
client, err := getClient()
if err != nil {
return err
}
defer client.DB.Close()

namespaces, err := client.ListOnepanelEnabledNamespaces()
if err != nil {
return err
}

manifest, err := readDataFile(filename)
if err != nil {
return err
}

uid, err := uid2.GenerateUID(templateName, 30)
if err != nil {
return err
}

for _, namespace := range namespaces {
workflowTemplate := &v1.WorkflowTemplate{
UID: uid,
Name: templateName,
Manifest: manifest,
Labels: labels,
}

workflowTemplate.Manifest, err = ReplaceRuntimeVariablesInManifest(client, namespace.Name, workflowTemplate.Manifest)
if err != nil {
return err
}
if _, err := client.CreateWorkflowTemplate(namespace.Name, workflowTemplate); err != nil {
return err
}
}

return nil
}

// updateWorkflowTemplateManifest will update the workflow template given by {{templateName}} with the contents
// given by {{filename}}
// It will do so for all namespaces.
Expand Down Expand Up @@ -93,3 +138,30 @@ func updateWorkflowTemplateManifest(filename, templateName string, labels map[st

return nil
}

// archiveWorkflowTemplate removes a Workflow Template by a given templateName
func archiveWorkflowTemplate(templateName string) error {
client, err := getClient()
if err != nil {
return err
}
defer client.DB.Close()

namespaces, err := client.ListOnepanelEnabledNamespaces()
if err != nil {
return err
}

uid, err := uid2.GenerateUID(templateName, 30)
if err != nil {
return err
}

for _, namespace := range namespaces {
if _, err := client.ArchiveWorkflowTemplate(namespace.Name, uid); err != nil {
return err
}
}

return nil
}
185 changes: 185 additions & 0 deletions db/yaml/workflows/hyperparameter-tuning/20201225172926.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# source: https://github.com/onepanelio/templates/blob/master/workflows/nni-hyperparameter-tuning/mnist/
entrypoint: main
arguments:
parameters:
- name: source
value: https://github.com/onepanelio/templates
- name: revision
value: master
- name: config
displayName: Configuration
required: true
hint: NNI configuration
type: textarea.textarea
value: |-
authorName: Onepanel, Inc.
experimentName: MNIST TF v2.x
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
trainingServicePlatform: local
searchSpacePath: search_space.json
useAnnotation: false
tuner:
# gpuIndices: '0' # uncomment and update to the GPU indices to assign this tuner
builtinTunerName: TPE # choices: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
classArgs:
optimize_mode: maximize # choices: maximize, minimize
trial:
command: python main.py --output /mnt/output
codeDir: .
# gpuNum: 1 # uncomment and update to number of GPUs
- name: search-space
displayName: Search space configuration
required: true
type: textarea.textarea
value: |-
{
"dropout_rate": { "_type": "uniform", "_value": [0.5, 0.9] },
"conv_size": { "_type": "choice", "_value": [2, 3, 5, 7] },
"hidden_size": { "_type": "choice", "_value": [124, 512, 1024] },
"batch_size": { "_type": "choice", "_value": [16, 32] },
"learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] },
"epochs": { "_type": "choice", "_value": [10] }
}
- displayName: Node pool
hint: Name of node pool or group to run this workflow task
type: select.nodepool
name: sys-node-pool
value: {{.DefaultNodePoolOption}}
required: true

volumeClaimTemplates:
- metadata:
name: hyperparamtuning-data
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 20Gi
- metadata:
name: hyperparamtuning-output
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 20Gi

templates:
- name: main
dag:
tasks:
- name: hyperparameter-tuning
template: hyperparameter-tuning
- name: workflow-metrics-writer
template: workflow-metrics-writer
dependencies: [hyperparameter-tuning]
arguments:
# Use sys-metrics artifact output from hyperparameter-tuning Task
artifacts:
- name: best-metrics
from: "{{tasks.hyperparameter-tuning.outputs.artifacts.sys-metrics}}"
- name: hyperparameter-tuning
inputs:
artifacts:
- name: src
git:
repo: '{{workflow.parameters.source}}'
revision: '{{workflow.parameters.revision}}'
path: /mnt/data/src
- name: config
path: /mnt/data/src/workflows/hyperparameter-tuning/mnist/config.yaml
raw:
data: '{{workflow.parameters.config}}'
- name: search-space
path: /mnt/data/src/workflows/hyperparameter-tuning/mnist/search_space.json
raw:
data: '{{workflow.parameters.search-space}}'
outputs:
artifacts:
- name: output
path: /mnt/output
optional: true
archive:
none: {}
container:
image: onepanel/dl:0.17.0
args:
- --config
- /mnt/data/src/workflows/hyperparameter-tuning/mnist/config.yaml
workingDir: /mnt
volumeMounts:
- name: hyperparamtuning-data
mountPath: /mnt/data
- name: hyperparamtuning-output
mountPath: /mnt/output
nodeSelector:
beta.kubernetes.io/instance-type: '{{workflow.parameters.sys-node-pool}}'
sidecars:
- name: nni-web-ui
image: 'onepanel/nni-web-ui:0.17.0'
env:
- name: ONEPANEL_INTERACTIVE_SIDECAR
value: 'true'
ports:
- containerPort: 9000
name: nni
- name: tensorboard
image: 'tensorflow/tensorflow:2.3.0'
command:
- sh
- '-c'
env:
- name: ONEPANEL_INTERACTIVE_SIDECAR
value: 'true'
args:
# Read logs from /mnt/output/tensorboard - /mnt/output is auto-mounted from volumeMounts
- tensorboard --logdir /mnt/output/tensorboard
ports:
- containerPort: 6006
name: tensorboard
- name: workflow-metrics-writer
inputs:
artifacts:
- name: best-metrics
path: /tmp/sys-metrics.json
script:
image: onepanel/python-sdk:v0.16.0
command: [python, '-u']
source: |
import os
import json

import onepanel.core.api
from onepanel.core.api.models.metric import Metric
from onepanel.core.api.rest import ApiException
from onepanel.core.api.models import Parameter

# Load Task A metrics
with open('/tmp/sys-metrics.json') as f:
metrics = json.load(f)

with open('/var/run/secrets/kubernetes.io/serviceaccount/token') as f:
token = f.read()

# Configure API authorization
configuration = onepanel.core.api.Configuration(
host = os.getenv('ONEPANEL_API_URL'),
api_key = {
'authorization': token
}
)
configuration.api_key_prefix['authorization'] = 'Bearer'

# Call SDK method to save metrics
with onepanel.core.api.ApiClient(configuration) as api_client:
api_instance = onepanel.core.api.WorkflowServiceApi(api_client)
namespace = '{{workflow.namespace}}'
uid = '{{workflow.name}}'
body = onepanel.core.api.AddWorkflowExecutionsMetricsRequest()
body.metrics = metrics
try:
api_response = api_instance.add_workflow_execution_metrics(namespace, uid, body)
print('Metrics added.')
except ApiException as e:
print("Exception when calling WorkflowServiceApi->add_workflow_execution_metrics: %s\n" % e)