open-telemetry · bogdandrutu · May 4, 2022 · Apr 21, 2022 · Apr 26, 2022 · Apr 26, 2022
@@ -51,6 +51,7 @@
 - `processor/transform`: Add transformation of logs (#9368)
 - `datadogexporter`: Add `metrics::summaries::mode` to specify export mode for summaries (#8846)
 - `prometheusreceiver`: Add resource attributes for kubernetes resource discovery labels (#9416)
+- `k8sattributesprocessor`: Support regex capture groups in tag_name (#9525)
 
 ### 🧰 Bug fixes 🧰
 

@@ -82,15 +82,32 @@ type ExtractConfig struct {
 
 // FieldExtractConfig allows specifying an extraction rule to extract a value from exactly one field.
 //
-// The field accepts a list FilterExtractConfig map. The map accepts three keys
-//     tag_name, key and regex
+// The field accepts a list FilterExtractConfig map. The map accepts several keys
+//     from, tag_name, key, key_regex and regex
 //
 // - tag_name represents the name of the tag that will be added to the span.
 //   When not specified a default tag name will be used of the format:
 //       k8s.pod.annotations.<annotation key>
 //       k8s.pod.labels.<label key>
 //   For example, if tag_name is not specified and the key is git_sha,
 //   then the attribute name will be `k8s.pod.annotations.git_sha`.
+//   When key_regex is present, tag_name supports back reference to both named capturing and positioned capturing.
+//   For example, if your pod spec contains the following labels,
+//
+//		app.kubernetes.io/component: mysql
+//		app.kubernetes.io/version: 5.7.21
+//
+//   and you'd like to add tags for all labels with prefix app.kubernetes.io/ and also trim the prefix,
+//   then you can specify the following extraction rules:
+//
+//   processors:
+//     k8sattributes:
+//       extract:
+//         labels:
+//         - name: $1
+//           key_regex: kubernetes.io/(.*)
+//
+//  this will add the `component` and `version` tags to the spans or metrics.
 //
 // - key represents the annotation name. This must exactly match an annotation name.
 //
@@ -103,9 +120,10 @@ type ExtractConfig struct {
 //   and you'd like to extract the GIT_SHA and the CI_BUILD values as tags, then you must
 //   specify the following two extraction rules:
 //
-//   procesors:
-//     k8s-tagger:
-//       annotations:
+//   processors:
+//     k8sattributes:
+//       extract:
+//         annotations:
 //         - name: git.sha
 //           key: kubernetes.io/change-cause
 //           regex: GIT_SHA=(?P<value>\w+)

@@ -311,35 +311,11 @@ func (c *WatchClient) extractPodAttributes(pod *api_v1.Pod) map[string]string {
 	}
 
 	for _, r := range c.Rules.Labels {
-		// By default if the From field is not set for labels and annotations we want to extract them from pod
-		if r.From == MetadataFromPod || r.From == "" {
-			if r.KeyRegex != nil {
-				for k, v := range pod.Labels {
-					if r.KeyRegex.MatchString(k) && v != "" {
-						name := fmt.Sprintf("k8s.pod.labels.%s", k)
-						tags[name] = v
-					}
-				}
-			} else if v, ok := pod.Labels[r.Key]; ok {
-				tags[r.Name] = c.extractField(v, r)
-			}
-		}
+		r.extractFromPodMetadata(pod.Labels, tags, "k8s.pod.labels.%s")
 	}
 
 	for _, r := range c.Rules.Annotations {
-		// By default if the From field is not set for labels and annotations we want to extract them from pod
-		if r.From == MetadataFromPod || r.From == "" {
-			if r.KeyRegex != nil {
-				for k, v := range pod.Annotations {
-					if r.KeyRegex.MatchString(k) && v != "" {
-						name := fmt.Sprintf("k8s.pod.annotations.%s", k)
-						tags[name] = v
-					}
-				}
-			} else if v, ok := pod.Annotations[r.Key]; ok {
-				tags[r.Name] = c.extractField(v, r)
-			}
-		}
+		r.extractFromPodMetadata(pod.Annotations, tags, "k8s.pod.annotations.%s")
 	}
 	return tags
 }
@@ -390,50 +366,18 @@ func (c *WatchClient) extractNamespaceAttributes(namespace *api_v1.Namespace) ma
 	tags := map[string]string{}
 
 	for _, r := range c.Rules.Labels {
-		if r.From == MetadataFromNamespace {
-			if r.KeyRegex != nil {
-				for k, v := range namespace.Labels {
-					if r.KeyRegex.MatchString(k) && v != "" {
-						name := fmt.Sprintf("k8s.namespace.labels.%s", k)
-						tags[name] = v
-					}
-				}
-			} else if v, ok := namespace.Labels[r.Key]; ok {
-				tags[r.Name] = c.extractField(v, r)
-			}
-		}
+		r.extractFromNamespaceMetadata(namespace.Labels, tags, "k8s.namespace.labels.%s")
 	}
 
 	for _, r := range c.Rules.Annotations {
-		if r.From == MetadataFromNamespace {
-			if r.KeyRegex != nil {
-				for k, v := range namespace.Annotations {
-					if r.KeyRegex.MatchString(k) && v != "" {
-						name := fmt.Sprintf("k8s.namespace.annotations.%s", k)
-						tags[name] = v
-					}
-				}
-			} else if v, ok := namespace.Annotations[r.Key]; ok {
-				tags[r.Name] = c.extractField(v, r)
-			}
-		}
+		r.extractFromNamespaceMetadata(namespace.Annotations, tags, "k8s.namespace.annotations.%s")
 	}
 
 	return tags
 }
 
 func (c *WatchClient) extractField(v string, r FieldExtractionRule) string {
-	// Check if a subset of the field should be extracted with a regular expression
-	// instead of the whole field.
-	if r.Regex == nil {
-		return v
-	}
-
-	matches := r.Regex.FindStringSubmatch(v)
-	if len(matches) == 2 {
-		return matches[1]
-	}
-	return ""
+	return r.extractField(v)
 }
 
 func (c *WatchClient) addOrUpdatePod(pod *api_v1.Pod) {

@@ -540,6 +540,36 @@ func TestExtractionRules(t *testing.T) {
 				"k8s.pod.annotations.annotation1": "av1",
 			},
 		},
+		{
+			name: "captured-groups",
+			rules: ExtractionRules{
+				Annotations: []FieldExtractionRule{{
+					Name:                 "$1",
+					KeyRegex:             regexp.MustCompile(`annotation(\d+)`),
+					HasKeyRegexReference: true,
+					From:                 MetadataFromPod,
+				},
+				},
+			},
+			attributes: map[string]string{
+				"1": "av1",
+			},
+		},
+		{
+			name: "captured-groups-$0",
+			rules: ExtractionRules{
+				Annotations: []FieldExtractionRule{{
+					Name:                 "$0",
+					KeyRegex:             regexp.MustCompile(`annotation(\d+)`),
+					HasKeyRegexReference: true,
+					From:                 MetadataFromPod,
+				},
+				},
+			},
+			attributes: map[string]string{
+				"annotation1": "av1",
+			},
+		},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {

@@ -15,6 +15,7 @@
 package kube // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sattributesprocessor/internal/kube"
 
 import (
+	"fmt"
 	"regexp"
 	"time"
 
@@ -158,7 +159,8 @@ type FieldExtractionRule struct {
 	// Key is used to lookup k8s pod fields.
 	Key string
 	// KeyRegex is a regular expression used to extract a Key that matches the regex.
-	KeyRegex *regexp.Regexp
+	KeyRegex             *regexp.Regexp
+	HasKeyRegexReference bool
 	// Regex is a regular expression used to extract a sub-part of a field value.
 	// Full value is extracted when no regexp is provided.
 	Regex *regexp.Regexp
@@ -169,6 +171,52 @@ type FieldExtractionRule struct {
 	From string
 }
 
+func (r *FieldExtractionRule) extractFromPodMetadata(metadata map[string]string, tags map[string]string, formatter string) {
+	// By default if the From field is not set for labels and annotations we want to extract them from pod
+	if r.From == MetadataFromPod || r.From == "" {
+		r.extractFromMetadata(metadata, tags, formatter)
+	}
+}
+
+func (r *FieldExtractionRule) extractFromNamespaceMetadata(metadata map[string]string, tags map[string]string, formatter string) {
+	if r.From == MetadataFromNamespace {
+		r.extractFromMetadata(metadata, tags, formatter)
+	}
+}
+
+func (r *FieldExtractionRule) extractFromMetadata(metadata map[string]string, tags map[string]string, formatter string) {
+	if r.KeyRegex != nil {
+		for k, v := range metadata {
+			if r.KeyRegex.MatchString(k) && v != "" {
+				var name string
+				if r.HasKeyRegexReference {
+					result := []byte{}
+					name = string(r.KeyRegex.ExpandString(result, r.Name, k, r.KeyRegex.FindStringSubmatchIndex(k)))
+				} else {
+					name = fmt.Sprintf(formatter, k)
+				}
+				tags[name] = v
+			}
+		}
+	} else if v, ok := metadata[r.Key]; ok {
+		tags[r.Name] = r.extractField(v)
+	}
+}
+
+func (r *FieldExtractionRule) extractField(v string) string {
+	// Check if a subset of the field should be extracted with a regular expression
+	// instead of the whole field.
+	if r.Regex == nil {
+		return v
+	}
+
+	matches := r.Regex.FindStringSubmatch(v)
+	if len(matches) == 2 {
+		return matches[1]
+	}
+	return ""
+}
+
 // Associations represent a list of rules for Pod metadata associations with resources
 type Associations struct {
 	Associations []Association

@@ -178,16 +178,21 @@ func extractFieldRules(fieldType string, fields ...FieldExtractConfig) ([]kube.F
 		}
 
 		var keyRegex *regexp.Regexp
+		var hasKeyRegexReference bool
 		if a.KeyRegex != "" {
 			var err error
 			keyRegex, err = regexp.Compile(a.KeyRegex)
 			if err != nil {
 				return rules, err
 			}
+
+			if keyRegex.NumSubexp() > 0 {
+				hasKeyRegexReference = true
+			}
 		}
 
 		rules = append(rules, kube.FieldExtractionRule{
-			Name: name, Key: a.Key, KeyRegex: keyRegex, Regex: r, From: a.From,
+			Name: name, Key: a.Key, KeyRegex: keyRegex, HasKeyRegexReference: hasKeyRegexReference, Regex: r, From: a.From,
 		})
 	}
 	return rules, nil

@@ -673,19 +673,20 @@ func Test_extractFieldRules(t *testing.T) {
 			true,
 		},
 		{
-			"match-keyregex",
+			"keyregex-capture-group",
 			args{"labels", []FieldExtractConfig{
 				{
-					TagName:  "name",
-					KeyRegex: "key*",
+					TagName:  "$0-$1-$2",
+					KeyRegex: "(key)(.*)",
 					From:     kube.MetadataFromPod,
 				},
 			}},
 			[]kube.FieldExtractionRule{
 				{
-					Name:     "name",
-					KeyRegex: regexp.MustCompile("key*"),
-					From:     kube.MetadataFromPod,
+					Name:                 "$0-$1-$2",
+					KeyRegex:             regexp.MustCompile("(key)(.*)"),
+					HasKeyRegexReference: true,
+					From:                 kube.MetadataFromPod,
 				},
 			},
 			false,