From 563e927f7a1729663dbd308ef31a78689276259f Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Thu, 11 Nov 2021 14:14:05 +0100 Subject: [PATCH] [APM] Reinstate ML multi-metric job (#117836) Closes #101734. This reverts commit 008421f. Additionally, incorporate suggested changes from #101734 (comment). --- .../anomaly_detection/apm_ml_anomaly_query.ts | 27 +++++ .../anomaly_detection/apm_ml_detectors.ts | 12 +++ .../index.ts} | 6 +- .../create_anomaly_detection_jobs.ts | 20 ++-- .../lib/service_map/get_service_anomalies.ts | 4 +- .../transactions/get_anomaly_data/fetcher.ts | 4 +- .../modules/apm_transaction/manifest.json | 18 ++-- .../apm_transaction/ml/apm_tx_metrics.json | 53 ++++++++++ .../ml/datafeed_apm_tx_metrics.json | 98 +++++++++++++++++++ ...tafeed_high_mean_transaction_duration.json | 17 ---- .../ml/high_mean_transaction_duration.json | 35 ------- .../apis/ml/modules/recognize_module.ts | 2 +- .../apis/ml/modules/setup_module.ts | 10 +- 13 files changed, 225 insertions(+), 81 deletions(-) create mode 100644 x-pack/plugins/apm/common/anomaly_detection/apm_ml_anomaly_query.ts create mode 100644 x-pack/plugins/apm/common/anomaly_detection/apm_ml_detectors.ts rename x-pack/plugins/apm/common/{anomaly_detection.ts => anomaly_detection/index.ts} (93%) create mode 100644 x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/apm_tx_metrics.json create mode 100644 x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_apm_tx_metrics.json delete mode 100644 x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_high_mean_transaction_duration.json delete mode 100644 x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/high_mean_transaction_duration.json diff --git a/x-pack/plugins/apm/common/anomaly_detection/apm_ml_anomaly_query.ts b/x-pack/plugins/apm/common/anomaly_detection/apm_ml_anomaly_query.ts new file mode 100644 index 000000000000000..00fb4b5ee4a54a0 --- /dev/null +++ b/x-pack/plugins/apm/common/anomaly_detection/apm_ml_anomaly_query.ts @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ApmMlDetectorIndex } from './apm_ml_detectors'; + +export function apmMlAnomalyQuery(detectorIndex: ApmMlDetectorIndex) { + return [ + { + bool: { + filter: [ + { + terms: { + result_type: ['model_plot', 'record'], + }, + }, + { + term: { detector_index: detectorIndex }, + }, + ], + }, + }, + ]; +} diff --git a/x-pack/plugins/apm/common/anomaly_detection/apm_ml_detectors.ts b/x-pack/plugins/apm/common/anomaly_detection/apm_ml_detectors.ts new file mode 100644 index 000000000000000..7c6823212240806 --- /dev/null +++ b/x-pack/plugins/apm/common/anomaly_detection/apm_ml_detectors.ts @@ -0,0 +1,12 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export const enum ApmMlDetectorIndex { + txLatency = 0, + txThroughput = 1, + txFailureRate = 2, +} diff --git a/x-pack/plugins/apm/common/anomaly_detection.ts b/x-pack/plugins/apm/common/anomaly_detection/index.ts similarity index 93% rename from x-pack/plugins/apm/common/anomaly_detection.ts rename to x-pack/plugins/apm/common/anomaly_detection/index.ts index 43a779407d2a495..cdb1c62b9eed5f0 100644 --- a/x-pack/plugins/apm/common/anomaly_detection.ts +++ b/x-pack/plugins/apm/common/anomaly_detection/index.ts @@ -6,12 +6,12 @@ */ import { i18n } from '@kbn/i18n'; -import { ANOMALY_SEVERITY } from './ml_constants'; +import { ANOMALY_SEVERITY } from '../ml_constants'; import { getSeverityType, getSeverityColor as mlGetSeverityColor, -} from '../../ml/common'; -import { ServiceHealthStatus } from './service_health_status'; +} from '../../../ml/common'; +import { ServiceHealthStatus } from '../service_health_status'; export interface ServiceAnomalyStats { transactionType?: string; diff --git a/x-pack/plugins/apm/server/lib/anomaly_detection/create_anomaly_detection_jobs.ts b/x-pack/plugins/apm/server/lib/anomaly_detection/create_anomaly_detection_jobs.ts index 9d6abad0ff6a66c..7277a12c2bf1427 100644 --- a/x-pack/plugins/apm/server/lib/anomaly_detection/create_anomaly_detection_jobs.ts +++ b/x-pack/plugins/apm/server/lib/anomaly_detection/create_anomaly_detection_jobs.ts @@ -5,21 +5,21 @@ * 2.0. */ +import Boom from '@hapi/boom'; import { Logger } from 'kibana/server'; -import uuid from 'uuid/v4'; import { snakeCase } from 'lodash'; -import Boom from '@hapi/boom'; import moment from 'moment'; +import uuid from 'uuid/v4'; import { ML_ERRORS } from '../../../common/anomaly_detection'; -import { ProcessorEvent } from '../../../common/processor_event'; -import { environmentQuery } from '../../../common/utils/environment_query'; -import { Setup } from '../helpers/setup_request'; import { - TRANSACTION_DURATION, + METRICSET_NAME, PROCESSOR_EVENT, } from '../../../common/elasticsearch_fieldnames'; -import { APM_ML_JOB_GROUP, ML_MODULE_ID_APM_TRANSACTION } from './constants'; +import { ProcessorEvent } from '../../../common/processor_event'; +import { environmentQuery } from '../../../common/utils/environment_query'; import { withApmSpan } from '../../utils/with_apm_span'; +import { Setup } from '../helpers/setup_request'; +import { APM_ML_JOB_GROUP, ML_MODULE_ID_APM_TRANSACTION } from './constants'; import { getAnomalyDetectionJobs } from './get_anomaly_detection_jobs'; export async function createAnomalyDetectionJobs( @@ -92,8 +92,8 @@ async function createAnomalyDetectionJob({ query: { bool: { filter: [ - { term: { [PROCESSOR_EVENT]: ProcessorEvent.transaction } }, - { exists: { field: TRANSACTION_DURATION } }, + { term: { [PROCESSOR_EVENT]: ProcessorEvent.metric } }, + { term: { [METRICSET_NAME]: 'transaction' } }, ...environmentQuery(environment), ], }, @@ -105,7 +105,7 @@ async function createAnomalyDetectionJob({ job_tags: { environment, // identifies this as an APM ML job & facilitates future migrations - apm_ml_version: 2, + apm_ml_version: 3, }, }, }, diff --git a/x-pack/plugins/apm/server/lib/service_map/get_service_anomalies.ts b/x-pack/plugins/apm/server/lib/service_map/get_service_anomalies.ts index 2ed1966dcacbdd7..2aa2f5c6eead5b0 100644 --- a/x-pack/plugins/apm/server/lib/service_map/get_service_anomalies.ts +++ b/x-pack/plugins/apm/server/lib/service_map/get_service_anomalies.ts @@ -22,6 +22,8 @@ import { rangeQuery } from '../../../../observability/server'; import { withApmSpan } from '../../utils/with_apm_span'; import { getMlJobsWithAPMGroup } from '../anomaly_detection/get_ml_jobs_with_apm_group'; import { Setup } from '../helpers/setup_request'; +import { apmMlAnomalyQuery } from '../../../common/anomaly_detection/apm_ml_anomaly_query'; +import { ApmMlDetectorIndex } from '../../../common/anomaly_detection/apm_ml_detectors'; export const DEFAULT_ANOMALIES: ServiceAnomaliesResponse = { mlJobIds: [], @@ -56,7 +58,7 @@ export async function getServiceAnomalies({ query: { bool: { filter: [ - { terms: { result_type: ['model_plot', 'record'] } }, + ...apmMlAnomalyQuery(ApmMlDetectorIndex.txLatency), ...rangeQuery( Math.min(end - 30 * 60 * 1000, start), end, diff --git a/x-pack/plugins/apm/server/lib/transactions/get_anomaly_data/fetcher.ts b/x-pack/plugins/apm/server/lib/transactions/get_anomaly_data/fetcher.ts index 2fcbf5842d746a9..dd723f24abe1be2 100644 --- a/x-pack/plugins/apm/server/lib/transactions/get_anomaly_data/fetcher.ts +++ b/x-pack/plugins/apm/server/lib/transactions/get_anomaly_data/fetcher.ts @@ -12,6 +12,8 @@ import { rangeQuery } from '../../../../../observability/server'; import { asMutableArray } from '../../../../common/utils/as_mutable_array'; import { withApmSpan } from '../../../utils/with_apm_span'; import { Setup } from '../../helpers/setup_request'; +import { apmMlAnomalyQuery } from '../../../../common/anomaly_detection/apm_ml_anomaly_query'; +import { ApmMlDetectorIndex } from '../../../../common/anomaly_detection/apm_ml_detectors'; export type ESResponse = Exclude< PromiseReturnType, @@ -40,7 +42,7 @@ export function anomalySeriesFetcher({ query: { bool: { filter: [ - { terms: { result_type: ['model_plot', 'record'] } }, + ...apmMlAnomalyQuery(ApmMlDetectorIndex.txLatency), { term: { partition_field_value: serviceName } }, { term: { by_field_value: transactionType } }, ...rangeQuery(start, end, 'timestamp'), diff --git a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/manifest.json b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/manifest.json index f8feaef3be5f85c..3332fad66b3e258 100644 --- a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/manifest.json +++ b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/manifest.json @@ -1,29 +1,29 @@ { "id": "apm_transaction", "title": "APM", - "description": "Detect anomalies in transactions from your APM services.", + "description": "Detect anomalies in transaction latency, throughput and failure rate from your APM services for metric data.", "type": "Transaction data", "logoFile": "logo.json", - "defaultIndexPattern": "apm-*-transaction", + "defaultIndexPattern": "apm-*-metric,metrics-apm*", "query": { "bool": { "filter": [ - { "term": { "processor.event": "transaction" } }, - { "exists": { "field": "transaction.duration" } } + { "term": { "processor.event": "metric" } }, + { "term": { "metricset.name": "transaction" } } ] } }, "jobs": [ { - "id": "high_mean_transaction_duration", - "file": "high_mean_transaction_duration.json" + "id": "apm_tx_metrics", + "file": "apm_tx_metrics.json" } ], "datafeeds": [ { - "id": "datafeed-high_mean_transaction_duration", - "file": "datafeed_high_mean_transaction_duration.json", - "job_id": "high_mean_transaction_duration" + "id": "datafeed-apm_tx_metrics", + "file": "datafeed_apm_tx_metrics.json", + "job_id": "apm_tx_metrics" } ] } diff --git a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/apm_tx_metrics.json b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/apm_tx_metrics.json new file mode 100644 index 000000000000000..f93b4fb009a1431 --- /dev/null +++ b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/apm_tx_metrics.json @@ -0,0 +1,53 @@ +{ + "job_type": "anomaly_detector", + "groups": [ + "apm" + ], + "description": "Detects anomalies in transaction latency, throughput and error percentage for metric data.", + "analysis_config": { + "bucket_span": "15m", + "summary_count_field_name" : "doc_count", + "detectors" : [ + { + "detector_description" : "high latency by transaction type for an APM service", + "function" : "high_mean", + "field_name" : "transaction_latency", + "by_field_name" : "transaction.type", + "partition_field_name" : "service.name" + }, + { + "detector_description" : "transaction throughput for an APM service", + "function" : "mean", + "field_name" : "transaction_throughput", + "by_field_name" : "transaction.type", + "partition_field_name" : "service.name" + }, + { + "detector_description" : "failed transaction rate for an APM service", + "function" : "high_mean", + "field_name" : "failed_transaction_rate", + "by_field_name" : "transaction.type", + "partition_field_name" : "service.name" + } + ], + "influencers" : [ + "transaction.type", + "service.name" + ] + }, + "analysis_limits": { + "model_memory_limit": "32mb" + }, + "data_description": { + "time_field" : "@timestamp", + "time_format" : "epoch_ms" + }, + "model_plot_config": { + "enabled" : true, + "annotations_enabled" : true + }, + "results_index_name" : "custom-apm", + "custom_settings": { + "created_by": "ml-module-apm-transaction" + } +} diff --git a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_apm_tx_metrics.json b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_apm_tx_metrics.json new file mode 100644 index 000000000000000..4d19cdc9f533d46 --- /dev/null +++ b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_apm_tx_metrics.json @@ -0,0 +1,98 @@ +{ + "job_id": "JOB_ID", + "indices": [ + "INDEX_PATTERN_NAME" + ], + "chunking_config" : { + "mode" : "off" + }, + "query": { + "bool": { + "filter": [ + { "term": { "processor.event": "metric" } }, + { "term": { "metricset.name": "transaction" } } + ] + } + }, + "aggregations" : { + "buckets" : { + "composite" : { + "size" : 5000, + "sources" : [ + { + "date" : { + "date_histogram" : { + "field" : "@timestamp", + "fixed_interval" : "90s" + } + } + }, + { + "transaction.type" : { + "terms" : { + "field" : "transaction.type" + } + } + }, + { + "service.name" : { + "terms" : { + "field" : "service.name" + } + } + } + ] + }, + "aggs" : { + "@timestamp" : { + "max" : { + "field" : "@timestamp" + } + }, + "transaction_throughput" : { + "rate" : { + "unit" : "minute" + } + }, + "transaction_latency" : { + "avg" : { + "field" : "transaction.duration.histogram" + } + }, + "error_count" : { + "filter" : { + "term" : { + "event.outcome" : "failure" + } + }, + "aggs" : { + "actual_error_count" : { + "value_count" : { + "field" : "event.outcome" + } + } + } + }, + "success_count" : { + "filter" : { + "term" : { + "event.outcome" : "success" + } + } + }, + "failed_transaction_rate" : { + "bucket_script" : { + "buckets_path" : { + "failure_count" : "error_count>_count", + "success_count" : "success_count>_count" + }, + "script" : "if ((params.failure_count + params.success_count)==0){return 0;}else{return 100 * (params.failure_count/(params.failure_count + params.success_count));}" + } + } + } + } + }, + "indices_options": { + "ignore_unavailable": true + } +} diff --git a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_high_mean_transaction_duration.json b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_high_mean_transaction_duration.json deleted file mode 100644 index 882bd93fd937dfd..000000000000000 --- a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/datafeed_high_mean_transaction_duration.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "job_id": "JOB_ID", - "indices": [ - "INDEX_PATTERN_NAME" - ], - "query": { - "bool": { - "filter": [ - { "term": { "processor.event": "transaction" } }, - { "exists": { "field": "transaction.duration.us" } } - ] - } - }, - "indices_options": { - "ignore_unavailable": true - } -} diff --git a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/high_mean_transaction_duration.json b/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/high_mean_transaction_duration.json deleted file mode 100644 index 77284cb275cd8df..000000000000000 --- a/x-pack/plugins/ml/server/models/data_recognizer/modules/apm_transaction/ml/high_mean_transaction_duration.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "job_type": "anomaly_detector", - "groups": [ - "apm" - ], - "description": "Detect transaction duration anomalies across transaction types for your APM services.", - "analysis_config": { - "bucket_span": "15m", - "detectors": [ - { - "detector_description": "high duration by transaction type for an APM service", - "function": "high_mean", - "field_name": "transaction.duration.us", - "by_field_name": "transaction.type", - "partition_field_name": "service.name" - } - ], - "influencers": [ - "transaction.type", - "service.name" - ] - }, - "analysis_limits": { - "model_memory_limit": "32mb" - }, - "data_description": { - "time_field": "@timestamp" - }, - "model_plot_config": { - "enabled": true - }, - "custom_settings": { - "created_by": "ml-module-apm-transaction" - } -} diff --git a/x-pack/test/api_integration/apis/ml/modules/recognize_module.ts b/x-pack/test/api_integration/apis/ml/modules/recognize_module.ts index 2742fbff294c0d3..00b820a025c8b7e 100644 --- a/x-pack/test/api_integration/apis/ml/modules/recognize_module.ts +++ b/x-pack/test/api_integration/apis/ml/modules/recognize_module.ts @@ -44,7 +44,7 @@ export default ({ getService }: FtrProviderContext) => { user: USER.ML_POWERUSER, expected: { responseCode: 200, - moduleIds: ['apm_jsbase', 'apm_transaction', 'apm_nodejs'], + moduleIds: ['apm_jsbase', 'apm_nodejs'], }, }, { diff --git a/x-pack/test/api_integration/apis/ml/modules/setup_module.ts b/x-pack/test/api_integration/apis/ml/modules/setup_module.ts index c538fc0d9cb55d7..07a02911c677873 100644 --- a/x-pack/test/api_integration/apis/ml/modules/setup_module.ts +++ b/x-pack/test/api_integration/apis/ml/modules/setup_module.ts @@ -187,9 +187,11 @@ export default ({ getService }: FtrProviderContext) => { dashboards: [] as string[], }, }, + // Set startDatafeed and estimateModelMemory to false for the APM transaction test + // until there is a new data set available with metric data. { testTitleSuffix: - 'for apm_transaction with prefix, startDatafeed true and estimateModelMemory true', + 'for apm_transaction with prefix, startDatafeed false and estimateModelMemory false', sourceDataArchive: 'x-pack/test/functional/es_archives/ml/module_apm', indexPattern: { name: 'ft_module_apm', timeField: '@timestamp' }, module: 'apm_transaction', @@ -197,14 +199,14 @@ export default ({ getService }: FtrProviderContext) => { requestBody: { prefix: 'pf5_', indexPatternName: 'ft_module_apm', - startDatafeed: true, - end: Date.now(), + startDatafeed: false, + estimateModelMemory: false, }, expected: { responseCode: 200, jobs: [ { - jobId: 'pf5_high_mean_transaction_duration', + jobId: 'pf5_apm_tx_metrics', jobState: JOB_STATE.CLOSED, datafeedState: DATAFEED_STATE.STOPPED, },