Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[processor/k8sattributesprocessor] support regex capture groups in tag_name #9525

Merged
merged 17 commits into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
- `processor/transform`: Add transformation of logs (#9368)
- `datadogexporter`: Add `metrics::summaries::mode` to specify export mode for summaries (#8846)
- `prometheusreceiver`: Add resource attributes for kubernetes resource discovery labels (#9416)
- `k8sattributesprocessor`: Support regex capture groups in tag_name (#9525)

### 🧰 Bug fixes 🧰

Expand Down
28 changes: 23 additions & 5 deletions processor/k8sattributesprocessor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,32 @@ type ExtractConfig struct {

// FieldExtractConfig allows specifying an extraction rule to extract a value from exactly one field.
//
// The field accepts a list FilterExtractConfig map. The map accepts three keys
// tag_name, key and regex
// The field accepts a list FilterExtractConfig map. The map accepts several keys
// from, tag_name, key, key_regex and regex
//
// - tag_name represents the name of the tag that will be added to the span.
// When not specified a default tag name will be used of the format:
// k8s.pod.annotations.<annotation key>
// k8s.pod.labels.<label key>
// For example, if tag_name is not specified and the key is git_sha,
// then the attribute name will be `k8s.pod.annotations.git_sha`.
// When key_regex is present, tag_name supports back reference to both named capturing and positioned capturing.
// For example, if your pod spec contains the following labels,
//
// app.kubernetes.io/component: mysql
// app.kubernetes.io/version: 5.7.21
//
// and you'd like to add tags for all labels with prefix app.kubernetes.io/ and also trim the prefix,
// then you can specify the following extraction rules:
//
// processors:
// k8sattributes:
// extract:
// labels:
// - name: $1
// key_regex: kubernetes.io/(.*)
//
// this will add the `component` and `version` tags to the spans or metrics.
//
// - key represents the annotation name. This must exactly match an annotation name.
//
Expand All @@ -103,9 +120,10 @@ type ExtractConfig struct {
// and you'd like to extract the GIT_SHA and the CI_BUILD values as tags, then you must
// specify the following two extraction rules:
//
// procesors:
// k8s-tagger:
// annotations:
// processors:
// k8sattributes:
// extract:
// annotations:
// - name: git.sha
// key: kubernetes.io/change-cause
// regex: GIT_SHA=(?P<value>\w+)
Expand Down
66 changes: 5 additions & 61 deletions processor/k8sattributesprocessor/internal/kube/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,35 +311,11 @@ func (c *WatchClient) extractPodAttributes(pod *api_v1.Pod) map[string]string {
}

for _, r := range c.Rules.Labels {
// By default if the From field is not set for labels and annotations we want to extract them from pod
if r.From == MetadataFromPod || r.From == "" {
if r.KeyRegex != nil {
for k, v := range pod.Labels {
if r.KeyRegex.MatchString(k) && v != "" {
name := fmt.Sprintf("k8s.pod.labels.%s", k)
tags[name] = v
}
}
} else if v, ok := pod.Labels[r.Key]; ok {
tags[r.Name] = c.extractField(v, r)
}
}
r.extractFromPodMetadata(pod.Labels, tags, "k8s.pod.labels.%s")
}

for _, r := range c.Rules.Annotations {
// By default if the From field is not set for labels and annotations we want to extract them from pod
if r.From == MetadataFromPod || r.From == "" {
if r.KeyRegex != nil {
for k, v := range pod.Annotations {
if r.KeyRegex.MatchString(k) && v != "" {
name := fmt.Sprintf("k8s.pod.annotations.%s", k)
tags[name] = v
}
}
} else if v, ok := pod.Annotations[r.Key]; ok {
tags[r.Name] = c.extractField(v, r)
}
}
r.extractFromPodMetadata(pod.Annotations, tags, "k8s.pod.annotations.%s")
}
return tags
}
Expand Down Expand Up @@ -390,50 +366,18 @@ func (c *WatchClient) extractNamespaceAttributes(namespace *api_v1.Namespace) ma
tags := map[string]string{}

for _, r := range c.Rules.Labels {
if r.From == MetadataFromNamespace {
if r.KeyRegex != nil {
for k, v := range namespace.Labels {
if r.KeyRegex.MatchString(k) && v != "" {
name := fmt.Sprintf("k8s.namespace.labels.%s", k)
tags[name] = v
}
}
} else if v, ok := namespace.Labels[r.Key]; ok {
tags[r.Name] = c.extractField(v, r)
}
}
r.extractFromNamespaceMetadata(namespace.Labels, tags, "k8s.namespace.labels.%s")
}

for _, r := range c.Rules.Annotations {
if r.From == MetadataFromNamespace {
if r.KeyRegex != nil {
for k, v := range namespace.Annotations {
if r.KeyRegex.MatchString(k) && v != "" {
name := fmt.Sprintf("k8s.namespace.annotations.%s", k)
tags[name] = v
}
}
} else if v, ok := namespace.Annotations[r.Key]; ok {
tags[r.Name] = c.extractField(v, r)
}
}
r.extractFromNamespaceMetadata(namespace.Annotations, tags, "k8s.namespace.annotations.%s")
}

return tags
}

func (c *WatchClient) extractField(v string, r FieldExtractionRule) string {
dmitryax marked this conversation as resolved.
Show resolved Hide resolved
// Check if a subset of the field should be extracted with a regular expression
// instead of the whole field.
if r.Regex == nil {
return v
}

matches := r.Regex.FindStringSubmatch(v)
if len(matches) == 2 {
return matches[1]
}
return ""
return r.extractField(v)
}

func (c *WatchClient) addOrUpdatePod(pod *api_v1.Pod) {
Expand Down
30 changes: 30 additions & 0 deletions processor/k8sattributesprocessor/internal/kube/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,36 @@ func TestExtractionRules(t *testing.T) {
"k8s.pod.annotations.annotation1": "av1",
},
},
{
name: "captured-groups",
rules: ExtractionRules{
Annotations: []FieldExtractionRule{{
Name: "$1",
KeyRegex: regexp.MustCompile(`annotation(\d+)`),
HasKeyRegexReference: true,
From: MetadataFromPod,
},
},
},
attributes: map[string]string{
"1": "av1",
},
},
{
name: "captured-groups-$0",
rules: ExtractionRules{
Annotations: []FieldExtractionRule{{
Name: "$0",
KeyRegex: regexp.MustCompile(`annotation(\d+)`),
HasKeyRegexReference: true,
From: MetadataFromPod,
},
},
},
attributes: map[string]string{
"annotation1": "av1",
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
Expand Down
50 changes: 49 additions & 1 deletion processor/k8sattributesprocessor/internal/kube/kube.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package kube // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sattributesprocessor/internal/kube"

import (
"fmt"
"regexp"
"time"

Expand Down Expand Up @@ -158,7 +159,8 @@ type FieldExtractionRule struct {
// Key is used to lookup k8s pod fields.
Key string
// KeyRegex is a regular expression used to extract a Key that matches the regex.
KeyRegex *regexp.Regexp
KeyRegex *regexp.Regexp
HasKeyRegexReference bool
// Regex is a regular expression used to extract a sub-part of a field value.
// Full value is extracted when no regexp is provided.
Regex *regexp.Regexp
Expand All @@ -169,6 +171,52 @@ type FieldExtractionRule struct {
From string
}

func (r *FieldExtractionRule) extractFromPodMetadata(metadata map[string]string, tags map[string]string, formatter string) {
// By default if the From field is not set for labels and annotations we want to extract them from pod
if r.From == MetadataFromPod || r.From == "" {
r.extractFromMetadata(metadata, tags, formatter)
}
}

func (r *FieldExtractionRule) extractFromNamespaceMetadata(metadata map[string]string, tags map[string]string, formatter string) {
if r.From == MetadataFromNamespace {
r.extractFromMetadata(metadata, tags, formatter)
}
}

func (r *FieldExtractionRule) extractFromMetadata(metadata map[string]string, tags map[string]string, formatter string) {
if r.KeyRegex != nil {
for k, v := range metadata {
if r.KeyRegex.MatchString(k) && v != "" {
var name string
if r.HasKeyRegexReference {
result := []byte{}
name = string(r.KeyRegex.ExpandString(result, r.Name, k, r.KeyRegex.FindStringSubmatchIndex(k)))
} else {
name = fmt.Sprintf(formatter, k)
}
tags[name] = v
}
}
} else if v, ok := metadata[r.Key]; ok {
tags[r.Name] = r.extractField(v)
}
}

func (r *FieldExtractionRule) extractField(v string) string {
// Check if a subset of the field should be extracted with a regular expression
// instead of the whole field.
if r.Regex == nil {
return v
}

matches := r.Regex.FindStringSubmatch(v)
if len(matches) == 2 {
return matches[1]
}
return ""
}

// Associations represent a list of rules for Pod metadata associations with resources
type Associations struct {
Associations []Association
Expand Down
7 changes: 6 additions & 1 deletion processor/k8sattributesprocessor/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,21 @@ func extractFieldRules(fieldType string, fields ...FieldExtractConfig) ([]kube.F
}

var keyRegex *regexp.Regexp
var hasKeyRegexReference bool
if a.KeyRegex != "" {
var err error
keyRegex, err = regexp.Compile(a.KeyRegex)
if err != nil {
return rules, err
}

if keyRegex.NumSubexp() > 0 {
hasKeyRegexReference = true
}
}

rules = append(rules, kube.FieldExtractionRule{
Name: name, Key: a.Key, KeyRegex: keyRegex, Regex: r, From: a.From,
Name: name, Key: a.Key, KeyRegex: keyRegex, HasKeyRegexReference: hasKeyRegexReference, Regex: r, From: a.From,
})
}
return rules, nil
Expand Down
13 changes: 7 additions & 6 deletions processor/k8sattributesprocessor/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,19 +673,20 @@ func Test_extractFieldRules(t *testing.T) {
true,
},
{
"match-keyregex",
"keyregex-capture-group",
args{"labels", []FieldExtractConfig{
{
TagName: "name",
KeyRegex: "key*",
TagName: "$0-$1-$2",
KeyRegex: "(key)(.*)",
From: kube.MetadataFromPod,
},
}},
[]kube.FieldExtractionRule{
{
Name: "name",
KeyRegex: regexp.MustCompile("key*"),
From: kube.MetadataFromPod,
Name: "$0-$1-$2",
KeyRegex: regexp.MustCompile("(key)(.*)"),
HasKeyRegexReference: true,
From: kube.MetadataFromPod,
},
},
false,
Expand Down