diff --git a/manifests/pytorch-job-crds/base/crd.yaml b/manifests/pytorch-job-crds/base/crd.yaml new file mode 100644 index 000000000..4a8cf899d --- /dev/null +++ b/manifests/pytorch-job-crds/base/crd.yaml @@ -0,0 +1,42 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: pytorchjobs.kubeflow.org +spec: + additionalPrinterColumns: + - JSONPath: .status.conditions[-1:].type + name: State + type: string + - JSONPath: .metadata.creationTimestamp + name: Age + type: date + group: kubeflow.org + names: + kind: PyTorchJob + plural: pytorchjobs + singular: pytorchjob + scope: Namespaced + subresources: + status: {} + validation: + openAPIV3Schema: + properties: + spec: + properties: + pytorchReplicaSpecs: + properties: + Master: + properties: + replicas: + maximum: 1 + minimum: 1 + type: integer + Worker: + properties: + replicas: + minimum: 1 + type: integer + versions: + - name: v1 + served: true + storage: true diff --git a/manifests/pytorch-job-crds/base/kustomization.yaml b/manifests/pytorch-job-crds/base/kustomization.yaml new file mode 100644 index 000000000..6e120e7b6 --- /dev/null +++ b/manifests/pytorch-job-crds/base/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- crd.yaml diff --git a/manifests/pytorch-job-crds/overlays/application/application.yaml b/manifests/pytorch-job-crds/overlays/application/application.yaml new file mode 100644 index 000000000..4946a1cf8 --- /dev/null +++ b/manifests/pytorch-job-crds/overlays/application/application.yaml @@ -0,0 +1,42 @@ +apiVersion: app.k8s.io/v1beta1 +kind: Application +metadata: + name: pytorch-job-crds +spec: + selector: + matchLabels: + app.kubernetes.io/name: pytorch-job-crds + app.kubernetes.io/instance: pytorch-job-crds-v0.7.0 + app.kubernetes.io/version: v0.7.0 + app.kubernetes.io/component: pytorch + app.kubernetes.io/part-of: kubeflow + app.kubernetes.io/managed-by: kfctl + componentKinds: + - group: core + kind: Service + - group: apps + kind: Deployment + - group: core + kind: ServiceAccount + - group: kubeflow.org + kind: PyTorchJob + descriptor: + type: "pytorch-job-crds" + version: "v1" + description: "Pytorch-job-crds contains the \"PyTorchJob\" custom resource definition." + maintainers: + - name: Johnu George + email: johnugeo@cisco.com + owners: + - name: Johnu George + email: johnugeo@cisco.com + keywords: + - "pytorchjob" + - "pytorch-operator" + - "pytorch-training" + links: + - description: About + url: "https://github.com/kubeflow/pytorch-operator" + - description: Docs + url: "https://www.kubeflow.org/docs/reference/pytorchjob/v1/pytorch/" + addOwnerRef: true diff --git a/manifests/pytorch-job-crds/overlays/application/kustomization.yaml b/manifests/pytorch-job-crds/overlays/application/kustomization.yaml new file mode 100644 index 000000000..8647a23c2 --- /dev/null +++ b/manifests/pytorch-job-crds/overlays/application/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +bases: +- ../../base +commonLabels: + app.kubernetes.io/component: pytorch + app.kubernetes.io/name: pytorch-job-crds +kind: Kustomization +resources: +- application.yaml diff --git a/manifests/pytorch-operator/base/cluster-role-binding.yaml b/manifests/pytorch-operator/base/cluster-role-binding.yaml new file mode 100644 index 000000000..595f0fd26 --- /dev/null +++ b/manifests/pytorch-operator/base/cluster-role-binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + labels: + app: pytorch-operator + name: pytorch-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: pytorch-operator +subjects: +- kind: ServiceAccount + name: pytorch-operator diff --git a/manifests/pytorch-operator/base/cluster-role.yaml b/manifests/pytorch-operator/base/cluster-role.yaml new file mode 100644 index 000000000..d1a9f7f20 --- /dev/null +++ b/manifests/pytorch-operator/base/cluster-role.yaml @@ -0,0 +1,89 @@ +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + labels: + app: pytorch-operator + name: pytorch-operator +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + - pytorchjobs/finalizers + verbs: + - '*' +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - '*' +- apiGroups: + - "" + resources: + - pods + - services + - endpoints + - events + verbs: + - '*' +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-pytorchjobs-admin + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true" +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pytorchjobs-admin: "true" +rules: [] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-pytorchjobs-edit + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true" + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pytorchjobs-admin: "true" +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + - pytorchjobs/finalizers + verbs: + - get + - list + - watch + - create + - delete + - deletecollection + - patch + - update + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kubeflow-pytorchjobs-view + labels: + rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true" +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + - pytorchjobs/status + - pytorchjobs/finalizers + verbs: + - get + - list + - watch diff --git a/manifests/pytorch-operator/base/deployment.yaml b/manifests/pytorch-operator/base/deployment.yaml new file mode 100644 index 000000000..4e1f6a8bd --- /dev/null +++ b/manifests/pytorch-operator/base/deployment.yaml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pytorch-operator +spec: + replicas: 1 + selector: + matchLabels: + name: pytorch-operator + template: + metadata: + labels: + name: pytorch-operator + annotations: + sidecar.istio.io/inject: "false" + spec: + containers: + - command: + - /pytorch-operator.v1 + - --alsologtostderr + - -v=1 + - --monitoring-port=8443 + env: + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + image: gcr.io/kubeflow-images-public/pytorch-operator:v0.6.0-18-g5e36a57 + name: pytorch-operator + serviceAccountName: pytorch-operator diff --git a/manifests/pytorch-operator/base/kustomization.yaml b/manifests/pytorch-operator/base/kustomization.yaml new file mode 100644 index 000000000..09728b045 --- /dev/null +++ b/manifests/pytorch-operator/base/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow +resources: +- cluster-role-binding.yaml +- cluster-role.yaml +- deployment.yaml +- service-account.yaml +- service.yaml +commonLabels: + kustomize.component: pytorch-operator +images: +- name: gcr.io/kubeflow-images-public/pytorch-operator + newName: gcr.io/kubeflow-images-public/pytorch-operator + newTag: vmaster-g518f9c76 diff --git a/manifests/pytorch-operator/base/params.env b/manifests/pytorch-operator/base/params.env new file mode 100644 index 000000000..47e9d44b5 --- /dev/null +++ b/manifests/pytorch-operator/base/params.env @@ -0,0 +1,3 @@ +pytorchDefaultImage=null +deploymentScope=cluster +deploymentNamespace=null diff --git a/manifests/pytorch-operator/base/service-account.yaml b/manifests/pytorch-operator/base/service-account.yaml new file mode 100644 index 000000000..3fe6033e1 --- /dev/null +++ b/manifests/pytorch-operator/base/service-account.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app: pytorch-operator + name: pytorch-operator diff --git a/manifests/pytorch-operator/base/service.yaml b/manifests/pytorch-operator/base/service.yaml new file mode 100644 index 000000000..c788ab2db --- /dev/null +++ b/manifests/pytorch-operator/base/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: "8443" + prometheus.io/scrape: "true" + labels: + app: pytorch-operator + name: pytorch-operator +spec: + ports: + - name: monitoring-port + port: 8443 + targetPort: 8443 + selector: + name: pytorch-operator + type: ClusterIP + diff --git a/manifests/pytorch-operator/overlays/application/application.yaml b/manifests/pytorch-operator/overlays/application/application.yaml new file mode 100644 index 000000000..c2eb60291 --- /dev/null +++ b/manifests/pytorch-operator/overlays/application/application.yaml @@ -0,0 +1,44 @@ +apiVersion: app.k8s.io/v1beta1 +kind: Application +metadata: + name: pytorch-operator +spec: + selector: + matchLabels: + app.kubernetes.io/name: pytorch-operator + app.kubernetes.io/instance: pytorch-operator-v0.7.0 + app.kubernetes.io/version: v0.7.0 + app.kubernetes.io/component: pytorch + app.kubernetes.io/part-of: kubeflow + app.kubernetes.io/managed-by: kfctl + componentKinds: + - group: core + kind: Service + - group: apps + kind: Deployment + - group: core + kind: ConfigMap + - group: core + kind: ServiceAccount + - group: kubeflow.org + kind: PyTorchJob + descriptor: + type: "pytorch-operator" + version: "v1" + description: "Pytorch-operator allows users to create and manage the \"PyTorchJob\" custom resource." + maintainers: + - name: Johnu George + email: johnugeo@cisco.com + owners: + - name: Johnu George + email: johnugeo@cisco.com + keywords: + - "pytorchjob" + - "pytorch-operator" + - "pytorch-training" + links: + - description: About + url: "https://github.com/kubeflow/pytorch-operator" + - description: Docs + url: "https://www.kubeflow.org/docs/reference/pytorchjob/v1/pytorch/" + addOwnerRef: true diff --git a/manifests/pytorch-operator/overlays/application/kustomization.yaml b/manifests/pytorch-operator/overlays/application/kustomization.yaml new file mode 100644 index 000000000..3cfee7722 --- /dev/null +++ b/manifests/pytorch-operator/overlays/application/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +bases: +- ../../base +commonLabels: + app.kubernetes.io/component: pytorch + app.kubernetes.io/name: pytorch-operator +kind: Kustomization +resources: +- application.yaml