Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: rewrite activestandby task to remove external dioscuri requirement #3592

Merged
merged 10 commits into from
Dec 4, 2023
Merged
2 changes: 1 addition & 1 deletion DEPRECATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ All deprecations are listed below, with the most recent announcements at the top

### Lagoon v2.17.0
release link: https://github.com/uselagoon/lagoon/releases/tag/v2.17.0
* (insert any planned deprecations here)
* This release introduces a new active/standby task image that does not require the use of the [dioscuri controller](https://github.com/amazeeio/dioscuri). Dioscuri is deprecated and will eventually be removed from the `lagoon-remote` helm chart. If you use active/standby functionality in your clusters, you should upgrade to lagoon v2.17.0 and update your remote clusters to the version of the `lagoon-remote` helm chart the v2.17.0 release says to use (see release notes for v2.17.0)
tobybellwood marked this conversation as resolved.
Show resolved Hide resolved

### Lagoon v2.16.0
release link: https://github.com/uselagoon/lagoon/releases/tag/v2.16.0
Expand Down
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ UPSTREAM_TAG ?= latest
# edge is the most current merged change
BUILD_DEPLOY_IMAGE_TAG ?= edge

# OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG and OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY
# set this to a particular build image if required, defaults to nothing to consume what the chart provides
OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG=
OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY=

# To build k3d with Calico instead of Flannel, set this to true. Note that the Calico install in lagoon-charts is always
# disabled for use with k3d, as the cluster needs it on creation.
USE_CALICO_CNI ?= false
Expand Down Expand Up @@ -499,6 +504,8 @@ k3d/test: k3d/cluster helm/repos $(addprefix local-dev/,$(K3D_TOOLS)) build
HELM=$$(realpath ../local-dev/helm) KUBECTL=$$(realpath ../local-dev/kubectl) \
JQ=$$(realpath ../local-dev/jq) \
OVERRIDE_BUILD_DEPLOY_DIND_IMAGE=uselagoon/build-deploy-image:${BUILD_DEPLOY_IMAGE_TAG} \
$$([ $(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG) ] && echo 'OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG=$(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG)') \
$$([ $(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY) ] && echo 'OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY=$(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY)') \
OVERRIDE_ACTIVE_STANDBY_TASK_IMAGE=$$IMAGE_REGISTRY/task-activestandby:$(SAFE_BRANCH_NAME) \
IMAGE_REGISTRY=$$IMAGE_REGISTRY \
SKIP_INSTALL_REGISTRY=true \
Expand Down Expand Up @@ -531,6 +538,8 @@ k3d/setup: k3d/cluster helm/repos $(addprefix local-dev/,$(K3D_TOOLS)) build
HELM=$$(realpath ../local-dev/helm) KUBECTL=$$(realpath ../local-dev/kubectl) \
JQ=$$(realpath ../local-dev/jq) \
OVERRIDE_BUILD_DEPLOY_DIND_IMAGE=uselagoon/build-deploy-image:${BUILD_DEPLOY_IMAGE_TAG} \
$$([ $(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG) ] && echo 'OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG=$(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG)') \
$$([ $(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY) ] && echo 'OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY=$(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY)') \
OVERRIDE_ACTIVE_STANDBY_TASK_IMAGE=$$IMAGE_REGISTRY/task-activestandby:$(SAFE_BRANCH_NAME) \
IMAGE_REGISTRY=$$IMAGE_REGISTRY

Expand Down Expand Up @@ -584,6 +593,8 @@ k3d/dev: build
HELM=$$(realpath ../local-dev/helm) KUBECTL=$$(realpath ../local-dev/kubectl) \
JQ=$$(realpath ../local-dev/jq) \
OVERRIDE_BUILD_DEPLOY_DIND_IMAGE=uselagoon/build-deploy-image:${BUILD_DEPLOY_IMAGE_TAG} \
$$([ $(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG) ] && echo 'OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG=$(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGETAG)') \
$$([ $(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY) ] && echo 'OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY=$(OVERRIDE_BUILD_DEPLOY_CONTROLLER_IMAGE_REPOSITORY)') \
OVERRIDE_ACTIVE_STANDBY_TASK_IMAGE=$$IMAGE_REGISTRY/task-activestandby:$(SAFE_BRANCH_NAME) \
IMAGE_REGISTRY=$$IMAGE_REGISTRY

Expand Down
31 changes: 3 additions & 28 deletions node-packages/commons/src/tasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1081,28 +1081,6 @@ const restoreConfig = (name, backupId, backupS3Config, restoreS3Config) => {
return config;
};

// creates the route/ingress migration config
const migrateHosts = (destinationNamespace, sourceNamespace) => {
const randId = Math.random().toString(36).substring(7);
const migrateName = `host-migration-${randId}`;
let config = {
apiVersion: 'dioscuri.amazee.io/v1',
kind: 'HostMigration',
metadata: {
name: migrateName,
annotations: {
'dioscuri.amazee.io/migrate':'true'
}
},
spec: {
destinationNamespace: destinationNamespace,
activeEnvironment: sourceNamespace,
},
};

return config;
};

export const getTaskProjectEnvironmentVariables =async (projectName: string, environmentId: number) => {
// inject variables into tasks the same way it is in builds
// this makes variables available to tasks the same way for consumption
Expand Down Expand Up @@ -1323,18 +1301,15 @@ export const createMiscTask = async function(taskData: any) {
const restoreBytes = new Buffer(JSON.stringify(restoreConf).replace(/\\n/g, "\n")).toString('base64')
miscTaskData.misc.miscResource = restoreBytes
break;
case 'deploytarget:route:migrate':
case 'deploytarget:task:activestandby':
// handle setting up the task configuration for running the active/standby switch
// this uses the `advanced task` system in the controllers
// first generate the migration CRD
const migrateConf = migrateHosts(
makeSafe(taskData.data.productionEnvironment.openshiftProjectName),
makeSafe(taskData.data.environment.openshiftProjectName))
// generate out custom json payload to send to the advanced task
var jsonPayload: any = {
productionEnvironment: taskData.data.productionEnvironment.name,
standbyEnvironment: taskData.data.environment.name,
crd: migrateConf
sourceNamespace: makeSafe(taskData.data.environment.openshiftProjectName),
destinationNamespace: makeSafe(taskData.data.productionEnvironment.openshiftProjectName)
}
// encode it
const jsonPayloadBytes = new Buffer(JSON.stringify(jsonPayload).replace(/\\n/g, "\n")).toString('base64')
Expand Down
12 changes: 9 additions & 3 deletions services/actions-handler/handler/controller_tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (m *Messenger) handleTask(ctx context.Context, messageQueue *mq.MessageQueu
l := lclient.New(m.LagoonAPI.Endpoint, "actions-handler", &token, false)

switch message.Meta.Key {
case "kubernetes:route:migrate", "deploytarget:route:migrate":
case "kubernetes:route:migrate", "deploytarget:route:migrate", "deploytarget:task:activestandby":
// if the result is from an active/standby task, handle updating the project here
switch message.Meta.JobStatus {
case "complete", "succeeded":
Expand All @@ -58,11 +58,17 @@ func (m *Messenger) handleTask(ctx context.Context, messageQueue *mq.MessageQueu
json.Unmarshal(decodeData, advTask)
// then prepare the patch operation
updateProject := schema.UpdateProjectPatchInput{
ProductionEnvironment: &advTask.StandbyProductionEnvironment, // these are inverted because of how the task works
StandbyProductionEnvironment: &advTask.ProductionEnvironment, // these are inverted because of how the task works
ProductionEnvironment: &advTask.ProductionEnvironment,
StandbyProductionEnvironment: &advTask.StandbyProductionEnvironment,
ProductionRoutes: &advTask.ProductionRoutes,
StandbyRoutes: &advTask.StandbyRoutes,
}
switch message.Meta.Key {
case "kubernetes:route:migrate", "deploytarget:route:migrate":
// the old task had these inverted, so this keeps that inversion in place for now
updateProject.ProductionEnvironment = &advTask.StandbyProductionEnvironment
updateProject.StandbyProductionEnvironment = &advTask.ProductionEnvironment
}
// update the project in the api
updatedProject, err := lagoon.UpdateProject(ctx, int(project.ID), updateProject, l)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion services/api/src/resources/deployment/resolvers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1247,7 +1247,7 @@ export const switchActiveStandby: ResolverFn = async (
data.task.id = sourceTaskData.addTask.id.toString();

// queue the task to trigger the migration
await createMiscTask({ key: 'route:migrate', data });
await createMiscTask({ key: 'task:activestandby', data });

// return the task id and remote id
var retData = {
Expand Down
1 change: 1 addition & 0 deletions taskimages/activestandby/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ COPY . /go/src/github.com/uselagoon/lagoon/taskimages/activestandby/
WORKDIR /go/src/github.com/uselagoon/lagoon/taskimages/activestandby/

# Build
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH} go test -v ./...
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH} go build -a -o taskrunner .

# Use distroless as minimal base image to package the binary
Expand Down
10 changes: 9 additions & 1 deletion taskimages/activestandby/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# tasks-activestandby

This image is used by the activestandby task when using the Lagoon Kubernetes Controllers
This image is used by the activestandby task. The remote-controller has knowledge of this task and will create a role binding between the two namespaces to allow them to temporarily talk and create/edit/delete resources between them as part of the task process.

The resulting payload contains information about the actions that were performed, which are sent back to Lagoon via the message queue to be reflected in the API.

The basic idea is that when activestandby is triggered, it collects the ingress in both namespaces that match the labels `dioscuri.amazee.io/migrate=true` and `activestandby.lagoon.sh/migrate=true` and will then perform the action of storing information about them, removing them from the source namespace, and then creating them in the destination namespace.

Part of this process also involves copying secrets and certificates if they are present, so that they are also available in the destination namespace.

When the process of migrating is taking place, all ingress have a new label added which is `activestandby.lagoon.sh/migrating=true`, which at the end of the migration process is set to `false`. This label will only be true while migrations are taking place. This allows external systems to be aware of the migration if they need to take any action, or prevent some actions from taking place.
43 changes: 43 additions & 0 deletions taskimages/activestandby/dioscuri/certificates.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package dioscuri

import (
"context"
"fmt"

certv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"
networkv1 "k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/types"
client "sigs.k8s.io/controller-runtime/pkg/client"
)

// copy any certificate into a slice of certificates
func copyCertificates(ctx context.Context, c client.Client, ingress *networkv1.Ingress) []*certv1.Certificate {
var certificates []*certv1.Certificate
for _, tls := range ingress.Spec.TLS {
certificate := &certv1.Certificate{}
err := c.Get(ctx, types.NamespacedName{Namespace: ingress.ObjectMeta.Namespace, Name: tls.SecretName}, certificate)
if err != nil {
break
}
certificates = append(certificates, certificate)
fmt.Println(fmt.Sprintf(">> Copying certificate %s in namespace %s", certificate.ObjectMeta.Name, certificate.ObjectMeta.Namespace))
}
return certificates
}

// create any certificates in the destination namespace
func createCertificates(ctx context.Context, c client.Client, destinationNamespace string, certificates []*certv1.Certificate) error {
for _, certificate := range certificates {
certificate.ObjectMeta.Namespace = destinationNamespace
certificate.ResourceVersion = ""
certificate.SelfLink = ""
certificate.UID = ""
err := c.Create(ctx, certificate)
if err != nil {
break
}
// secrets = append(secrets, certificate)
fmt.Println(fmt.Sprintf(">> Creating certificate %s in namespace %s", certificate.ObjectMeta.Name, certificate.ObjectMeta.Namespace))
}
return nil
}
Loading