From 24ba27a23dd7552fe1df5293df1d90a2eddecf17 Mon Sep 17 00:00:00 2001 From: Lucas Telles Date: Tue, 15 Jul 2025 17:07:34 -0300 Subject: [PATCH 01/12] Add fluent-bit loki integration --- application.tf | 1 + monitoring.tf | 47 +++++++++++++++++++++++++++++++++++++++++++++++ variables.tf | 34 +++++++++++++++++++++++++++++----- 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/application.tf b/application.tf index b94f3d25..7f833b95 100644 --- a/application.tf +++ b/application.tf @@ -745,6 +745,7 @@ keda: enabled: true kube-prometheus-stack: ${local.kube_prometheus_stack_values} +${local.fluent_bit_config} metrics-server: global: imageRegistry: ${var.image_registry}/docker.io diff --git a/monitoring.tf b/monitoring.tf index f82e3794..c4aaa738 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -20,6 +20,53 @@ locals { EOT ) + fluent_bit_config = var.enable_loki_logging == true ? (< Date: Tue, 15 Jul 2025 17:33:47 -0300 Subject: [PATCH 02/12] Fix indentation --- monitoring.tf | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/monitoring.tf b/monitoring.tf index c4aaa738..cb008426 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -30,37 +30,37 @@ fluent-bit: config: inputs: | [INPUT] - name tail - path /var/log/containers/*.log - parser docker - tag kube.* + name tail + path /var/log/containers/*.log + parser docker + tag kube.* filters: | [FILTER] - Name kubernetes - Match kube.* - Kube_Tag_Prefix kube.var.log.containers. - Merge_Log On - Keep_Log Off - K8S-Logging.Parser On - K8S-Logging.Exclude Off + Name kubernetes + Match kube.* + Kube_Tag_Prefix kube.var.log.containers. + Merge_Log On + Keep_Log Off + K8S-Logging.Parser On + K8S-Logging.Exclude Off [FILTER] - Name grep - Match kube.* - Regex kubernetes.namespace_name default + Name grep + Match kube.* + Regex kubernetes.namespace_name default outputs: | [OUTPUT] - name loki - match * - host ${var.loki_endpoint} - port 3100 - labels job=fluent-bit, cluster=${var.label} - line_format json - tenant_id devops - http_user ${var.loki_username} - http_passwd ${var.loki_password} + name loki + match * + host ${var.loki_endpoint} + port 3100 + labels job=fluent-bit, cluster=${var.label} + line_format json + tenant_id devops + http_user ${var.loki_username} + http_passwd ${var.loki_password} EOT ) : (< Date: Tue, 15 Jul 2025 19:07:56 -0300 Subject: [PATCH 03/12] Add loki datasource --- monitoring.tf | 70 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/monitoring.tf b/monitoring.tf index cb008426..a794785b 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -32,23 +32,24 @@ fluent-bit: [INPUT] name tail path /var/log/containers/*.log + exclude_path /var/log/containers/*_kube-system_*.log,/var/log/containers/*_indico_*.log,/var/log/containers/*_monitoring_*.log,/var/log/containers/*_amazon-guardduty_*.log parser docker tag kube.* + buffer_chunk_size 64KB + buffer_max_size 128KB filters: | [FILTER] - Name kubernetes - Match kube.* - Kube_Tag_Prefix kube.var.log.containers. - Merge_Log On - Keep_Log Off - K8S-Logging.Parser On - K8S-Logging.Exclude Off - - [FILTER] - Name grep - Match kube.* - Regex kubernetes.namespace_name default + name kubernetes + match kube.* + kube_tag_prefix kube.var.log.containers. + merge_log on + keep_log off + k8s-logging.parser on + k8s-logging.exclude off + k8s-logging.max_records 50000 + k8s-logging.cache_ttl 600 + buffer_size 256KB outputs: | [OUTPUT] @@ -56,9 +57,9 @@ fluent-bit: match * host ${var.loki_endpoint} port 3100 - labels job=fluent-bit, cluster=${var.label} + labels cluster=${var.label} line_format json - tenant_id devops + tenant_id ${var.label} http_user ${var.loki_username} http_passwd ${var.loki_password} EOT @@ -182,6 +183,24 @@ ${local.prometheus_tls} - grafana-${local.monitoring_domain_name} path: / ${local.grafana_tls} +${var.enable_loki_logging == true ? (< Date: Tue, 15 Jul 2025 19:10:57 -0300 Subject: [PATCH 04/12] Remove wrong values --- monitoring.tf | 2 -- 1 file changed, 2 deletions(-) diff --git a/monitoring.tf b/monitoring.tf index a794785b..68a367ff 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -47,8 +47,6 @@ fluent-bit: keep_log off k8s-logging.parser on k8s-logging.exclude off - k8s-logging.max_records 50000 - k8s-logging.cache_ttl 600 buffer_size 256KB outputs: | From a811d431fe90a1606bf067eadb795951697f6708 Mon Sep 17 00:00:00 2001 From: Lucas Telles Date: Tue, 15 Jul 2025 19:16:38 -0300 Subject: [PATCH 05/12] Change loki port --- monitoring.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitoring.tf b/monitoring.tf index 68a367ff..bc586e73 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -54,7 +54,7 @@ fluent-bit: name loki match * host ${var.loki_endpoint} - port 3100 + port 80 labels cluster=${var.label} line_format json tenant_id ${var.label} From 89614382b37663608f581927b4183600e2878d8d Mon Sep 17 00:00:00 2001 From: Lucas Telles Date: Tue, 15 Jul 2025 19:19:45 -0300 Subject: [PATCH 06/12] Add job label --- monitoring.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monitoring.tf b/monitoring.tf index bc586e73..dde3c207 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -55,7 +55,7 @@ fluent-bit: match * host ${var.loki_endpoint} port 80 - labels cluster=${var.label} + labels job=${var.label},cluster=${var.label} line_format json tenant_id ${var.label} http_user ${var.loki_username} From 97a1b379377b3d74e25b2f53c902de8e2b498e82 Mon Sep 17 00:00:00 2001 From: Lucas Telles Date: Wed, 16 Jul 2025 16:27:30 -0300 Subject: [PATCH 07/12] Add k8s input to fluent-bit --- monitoring.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/monitoring.tf b/monitoring.tf index dde3c207..123851cd 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -27,6 +27,10 @@ fluent-bit: repository: harbor.devops.indico.io/docker.io/fluent/fluent-bit imagePullSecrets: - name: harbor-pull-secret + rbac: + create: true + nodeAccess: true + eventsAccess: true config: inputs: | [INPUT] @@ -37,6 +41,10 @@ fluent-bit: tag kube.* buffer_chunk_size 64KB buffer_max_size 128KB + [INPUT] + name kubernetes_events + tag k8s_events + kube_url https://kubernetes.default.svc filters: | [FILTER] From 14810cfef2a9c9412ad63d6a7a522729743a393e Mon Sep 17 00:00:00 2001 From: Lucas Telles Date: Wed, 16 Jul 2025 18:34:51 -0300 Subject: [PATCH 08/12] Add insights logic to events input --- monitoring.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/monitoring.tf b/monitoring.tf index 123851cd..b4d0a982 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -45,6 +45,7 @@ fluent-bit: name kubernetes_events tag k8s_events kube_url https://kubernetes.default.svc + kube_namespace ${var.insights_enabled ? "insights" : "default"} filters: | [FILTER] From 2023e6a09e5872d99a1131ec47d459280b1d99c7 Mon Sep 17 00:00:00 2001 From: Lucas Telles Date: Thu, 17 Jul 2025 11:33:38 -0300 Subject: [PATCH 09/12] Add loki to monitoring stack --- application.tf | 2 +- environment.tf | 1 + main.tf | 6 ++-- monitoring.tf | 75 ++++++++++++++------------------------------------ variables.tf | 16 ++--------- 5 files changed, 28 insertions(+), 72 deletions(-) diff --git a/application.tf b/application.tf index 7f833b95..7bd829de 100644 --- a/application.tf +++ b/application.tf @@ -745,7 +745,7 @@ keda: enabled: true kube-prometheus-stack: ${local.kube_prometheus_stack_values} -${local.fluent_bit_config} +${local.loki_config} metrics-server: global: imageRegistry: ${var.image_registry}/docker.io diff --git a/environment.tf b/environment.tf index f7eb90dc..b64256ec 100644 --- a/environment.tf +++ b/environment.tf @@ -24,6 +24,7 @@ locals { environment_data_s3_bucket_name = var.load_environment == "" ? coalesce(module.s3-storage[0].data_s3_bucket_name, "null") : data.terraform_remote_state.environment[0].outputs.data_s3_bucket_name environment_pgbackup_s3_bucket_name = var.load_environment == "" ? coalesce(module.s3-storage[0].pgbackup_s3_bucket_name, "null") : data.terraform_remote_state.environment[0].outputs.pgbackup_s3_bucket_name environment_miniobkp_s3_bucket_name = var.load_environment == "" ? coalesce(module.s3-storage[0].miniobkp_s3_bucket_name, "null") : data.terraform_remote_state.environment[0].outputs.miniobkp_s3_bucket_name + environment_loki_s3_bucket_name = var.load_environment == "" ? coalesce(module.s3-storage[0].loki_s3_bucket_name, "null") : data.terraform_remote_state.environment[0].outputs.loki_s3_bucket_name environment_efs_filesystem_id = var.load_environment == "" ? var.include_efs == true ? module.efs-storage[0].efs_filesystem_id : "null" : data.terraform_remote_state.environment[0].outputs.efs_filesystem_id environment_fsx_rwx_id = var.load_environment == "" ? var.include_fsx == true ? module.fsx-storage[0].fsx_rwx_id : "null" : data.terraform_remote_state.environment[0].outputs.fsx_rwx_id environment_fsx_rwx_arn = var.load_environment == "" ? var.include_fsx == true ? module.fsx-storage[0].fsx_rwx_arn : "null" : data.terraform_remote_state.environment[0].outputs.fsx_rwx_arn diff --git a/main.tf b/main.tf index a6741dcf..c40ee26c 100644 --- a/main.tf +++ b/main.tf @@ -207,7 +207,7 @@ module "security-group" { module "s3-storage" { count = var.load_environment == "" ? 1 : 0 source = "app.terraform.io/indico/indico-aws-buckets/mod" - version = "4.4.0" + version = "4.5.0" force_destroy = true # allows terraform to destroy non-empty buckets. label = var.label kms_key_arn = local.environment_kms_key_arn @@ -224,6 +224,8 @@ module "s3-storage" { miniobkp_s3_bucket_name_override = var.miniobkp_s3_bucket_name_override include_miniobkp = var.include_miniobkp && var.insights_enabled ? true : false allowed_origins = ["https://${local.dns_name}"] + loki_s3_bucket_name_override = var.loki_s3_bucket_name_override + enable_loki_logging = var.enable_loki_logging } @@ -315,7 +317,7 @@ module "iam" { aws_primary_dns_role_arn = var.aws_primary_dns_role_arn efs_filesystem_id = [var.include_efs == true ? local.environment_efs_filesystem_id : ""] fsx_arns = [var.include_rox ? local.environment_fsx_rox_arn : "", var.include_fsx == true ? local.environment_fsx_rwx_arn : ""] - s3_buckets = compact([local.environment_data_s3_bucket_name, var.include_pgbackup ? local.environment_pgbackup_s3_bucket_name : "", var.include_rox ? local.environment_api_models_s3_bucket_name : "", lower("${var.aws_account}-aws-cod-snapshots"), var.performance_bucket ? "indico-locust-benchmark-test-results" : "", var.include_miniobkp && var.insights_enabled ? local.environment_miniobkp_s3_bucket_name : ""]) + s3_buckets = compact([local.environment_data_s3_bucket_name, var.include_pgbackup ? local.environment_pgbackup_s3_bucket_name : "", var.include_rox ? local.environment_api_models_s3_bucket_name : "", lower("${var.aws_account}-aws-cod-snapshots"), var.performance_bucket ? "indico-locust-benchmark-test-results" : "", var.include_miniobkp && var.insights_enabled ? local.environment_miniobkp_s3_bucket_name : "", var.enable_loki_logging ? local.environment_loki_s3_bucket_name : ""]) kms_key_arn = local.environment_kms_key_arn # EKS cluster role create_cluster_iam_role = var.create_eks_cluster_role diff --git a/monitoring.tf b/monitoring.tf index b4d0a982..106cad05 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -20,55 +20,24 @@ locals { EOT ) - fluent_bit_config = var.enable_loki_logging == true ? (< Date: Thu, 17 Jul 2025 11:40:06 -0300 Subject: [PATCH 10/12] Fix loki s3 output reference --- monitoring.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monitoring.tf b/monitoring.tf index 106cad05..110b51ba 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -29,12 +29,12 @@ loki: storage_config: aws: region: ${var.region} - bucketnames: ${module.s3-storage.loki_bucket_name} + bucketnames: ${module.s3-storage[0].loki_s3_bucket_name} s3forcepathstyle: false storage: type: s3 bucketNames: - chunks: ${module.s3-storage.loki_bucket_name} + chunks: ${module.s3-storage[0].loki_s3_bucket_name} s3: region: ${var.region} From 4468eb6f376f95cd8581516b7527e327b12f55a9 Mon Sep 17 00:00:00 2001 From: Lucas Telles Date: Thu, 17 Jul 2025 11:57:27 -0300 Subject: [PATCH 11/12] Fix loki gateway endpoint --- monitoring.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monitoring.tf b/monitoring.tf index 110b51ba..375c90fc 100644 --- a/monitoring.tf +++ b/monitoring.tf @@ -165,7 +165,7 @@ ${var.enable_loki_logging == true ? (< Date: Thu, 17 Jul 2025 12:00:22 -0300 Subject: [PATCH 12/12] Fix loki variable name --- variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variables.tf b/variables.tf index 7e0ee778..5b334ca2 100644 --- a/variables.tf +++ b/variables.tf @@ -1680,6 +1680,6 @@ variable "enable_loki_logging" { variable "loki_s3_bucket_name_override" { type = string - default = "" + default = null description = "The name of the existing S3 bucket to be loaded and used as the loki bucket" }