From 4215822c718f801191e2f3365e9ad82e49d81d5b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 21 Aug 2020 16:51:22 -0700 Subject: [PATCH] Host metrics instrumentation (#231) * Similar to runtime * Comments * Comments * Process and host CPU usage * Test process CPU time * Add tests for memory and network * Tests * Test the readAddr return value * Increase tolerance to 5% * Deflake this test * Fix doc.go * Comment * PR feedback * Remove Configure() option in runtime and host Co-authored-by: Tyler Yahn --- instrumentation/host/doc.go | 34 +++ instrumentation/host/example/main.go | 50 +++++ instrumentation/host/go.mod | 16 ++ instrumentation/host/go.sum | 111 ++++++++++ instrumentation/host/host.go | 281 ++++++++++++++++++++++++ instrumentation/host/host_test.go | 255 +++++++++++++++++++++ instrumentation/runtime/doc.go | 4 +- instrumentation/runtime/example/main.go | 4 +- instrumentation/runtime/runtime.go | 7 +- instrumentation/runtime/runtime_test.go | 14 +- 10 files changed, 759 insertions(+), 17 deletions(-) create mode 100644 instrumentation/host/doc.go create mode 100644 instrumentation/host/example/main.go create mode 100644 instrumentation/host/go.mod create mode 100644 instrumentation/host/go.sum create mode 100644 instrumentation/host/host.go create mode 100644 instrumentation/host/host_test.go diff --git a/instrumentation/host/doc.go b/instrumentation/host/doc.go new file mode 100644 index 00000000000..580de9baa84 --- /dev/null +++ b/instrumentation/host/doc.go @@ -0,0 +1,34 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package host provides the conventional host metric instruments +// specified by OpenTelemetry. Host metric events are sometimes +// collected through the OpenTelemetry Collector "hostmetrics" +// receiver running as an agent; this instrumentation is an +// alternative for processes that want to record the same information +// without an agent. +// +// The metric events produced are listed here with label dimensions. +// +// Name Label +// ---------------------------------------------------------------------- +// process.cpu.time state=user|system +// system.cpu.time state=user|system|other|idle +// system.memory.usage state=used|available +// system.memory.utilization state=used|available +// system.network.io direction=transmit|receive +// +// See https://github.com/open-telemetry/oteps/blob/master/text/0119-standard-system-metrics.md +// for the definition of these metric instruments. +package host // import "go.opentelemetry.io/contrib/instrumentation/host" diff --git a/instrumentation/host/example/main.go b/instrumentation/host/example/main.go new file mode 100644 index 00000000000..7ffbd6af500 --- /dev/null +++ b/instrumentation/host/example/main.go @@ -0,0 +1,50 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "log" + "os" + "os/signal" + "syscall" + + "go.opentelemetry.io/otel/exporters/stdout" + "go.opentelemetry.io/otel/sdk/metric/controller/push" + + "go.opentelemetry.io/contrib/instrumentation/host" +) + +func initMeter() *push.Controller { + pusher, err := stdout.InstallNewPipeline([]stdout.Option{ + stdout.WithQuantiles([]float64{0.5}), + stdout.WithPrettyPrint(), + }, nil) + if err != nil { + log.Panicf("failed to initialize metric stdout exporter %v", err) + } + return pusher +} + +func main() { + defer initMeter().Stop() + + if err := host.Start(); err != nil { + panic(err) + } + + stopChan := make(chan os.Signal, 1) + signal.Notify(stopChan, syscall.SIGTERM, syscall.SIGINT) + <-stopChan +} diff --git a/instrumentation/host/go.mod b/instrumentation/host/go.mod new file mode 100644 index 00000000000..1028daae111 --- /dev/null +++ b/instrumentation/host/go.mod @@ -0,0 +1,16 @@ +module go.opentelemetry.io/contrib/instrumentation/host + +go 1.14 + +replace go.opentelemetry.io/contrib => ../.. + +require ( + github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect + github.com/go-ole/go-ole v1.2.4 // indirect + github.com/shirou/gopsutil v2.20.7+incompatible + github.com/stretchr/testify v1.6.1 + go.opentelemetry.io/contrib v0.10.1 + go.opentelemetry.io/otel v0.10.0 + go.opentelemetry.io/otel/exporters/stdout v0.10.0 + go.opentelemetry.io/otel/sdk v0.10.0 +) diff --git a/instrumentation/host/go.sum b/instrumentation/host/go.sum new file mode 100644 index 00000000000..306cb47f093 --- /dev/null +++ b/instrumentation/host/go.sum @@ -0,0 +1,111 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/DataDog/sketches-go v0.0.1 h1:RtG+76WKgZuz6FIaGsjoPePmadDBkuD/KC6+ZWu78b8= +github.com/DataDog/sketches-go v0.0.1/go.mod h1:Q5DbzQ+3AkgGwymQO7aZFNP7ns2lZKGtvRBzRXfdi60= +github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk= +github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= +github.com/benbjohnson/clock v1.0.3 h1:vkLuvpK4fmtSCuo60+yC63p7y0BmQ8gm5ZXGuBCJyXg= +github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI= +github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.1 h1:JFrFEBb2xKufg6XkJsJr+WbKb4FQlURi5RUcBveYu9k= +github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= +github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/shirou/gopsutil v2.20.7+incompatible h1:Ymv4OD12d6zm+2yONe39VSmp2XooJe8za7ngOLW/o/w= +github.com/shirou/gopsutil v2.20.7+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +go.opentelemetry.io/otel v0.10.0 h1:2y/HYj1dIfG1nPh0Z15X4se8WwYWuTyKHLSgRb/mbQ0= +go.opentelemetry.io/otel v0.10.0/go.mod h1:n3v1JGUBpn5DafiF1UeoDs5fr5XZMG+43kigDtFB8Vk= +go.opentelemetry.io/otel/exporters/stdout v0.10.0 h1:5dhUv/AMKF+9p2igV0pAmS7sWQvX0r+eimf7uiEDWd8= +go.opentelemetry.io/otel/exporters/stdout v0.10.0/go.mod h1:c7hVyiDzqbxgcerYbLreBNI0+MNE8x/hbekVx3lu+gM= +go.opentelemetry.io/otel/sdk v0.10.0 h1:iQWVDfmGB+5TjbrO9yFlezGCWBaJ73vxJTHB+ttdTQk= +go.opentelemetry.io/otel/sdk v0.10.0/go.mod h1:T5752PMr00aUHAVEbaDAYU5tzM2PWOmyy7Lc5OzSrs8= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1 h1:ogLJMz+qpzav7lGMh10LMvAkM/fAoGlaiiHYiFYdm80= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20191009194640-548a555dbc03 h1:4HYDjxeNXAOTv3o1N2tjo8UUSlhQgAD52FVkwxnWgM8= +google.golang.org/genproto v0.0.0-20191009194640-548a555dbc03/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.0 h1:T7P4R73V3SSDPhH7WW7ATbfViLtmamH0DKrP3f9AuDI= +google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/instrumentation/host/host.go b/instrumentation/host/host.go new file mode 100644 index 00000000000..44f4851f17b --- /dev/null +++ b/instrumentation/host/host.go @@ -0,0 +1,281 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host // import "go.opentelemetry.io/contrib/instrumentation/host" + +import ( + "context" + "fmt" + "os" + "sync" + + "github.com/shirou/gopsutil/cpu" + "github.com/shirou/gopsutil/mem" + "github.com/shirou/gopsutil/net" + "github.com/shirou/gopsutil/process" + + "go.opentelemetry.io/contrib" + "go.opentelemetry.io/otel/api/global" + "go.opentelemetry.io/otel/api/kv" + "go.opentelemetry.io/otel/api/metric" + "go.opentelemetry.io/otel/api/unit" +) + +// Host reports the work-in-progress conventional host metrics specified by OpenTelemetry +type host struct { + config Config + meter metric.Meter +} + +// Config contains optional settings for reporting host metrics. +type Config struct { + // MeterProvider sets the metric.Provider. If nil, the global + // Provider will be used. + MeterProvider metric.Provider +} + +// Option supports configuring optional settings for host metrics. +type Option interface { + // ApplyHost updates *Config. + ApplyHost(*Config) +} + +// WithMeterProvider sets the Metric implementation to use for +// reporting. If this option is not used, the global metric.Provider +// will be used. `provider` must be non-nil. +func WithMeterProvider(provider metric.Provider) Option { + return metricProviderOption{provider} +} + +type metricProviderOption struct{ metric.Provider } + +// ApplyHost implements Option. +func (o metricProviderOption) ApplyHost(c *Config) { + c.MeterProvider = o.Provider +} + +var ( + // Label sets for CPU time measurements. + + LabelCPUTimeUser = []kv.KeyValue{kv.String("state", "user")} + LabelCPUTimeSystem = []kv.KeyValue{kv.String("state", "system")} + LabelCPUTimeOther = []kv.KeyValue{kv.String("state", "other")} + LabelCPUTimeIdle = []kv.KeyValue{kv.String("state", "idle")} + + // Label sets used for Memory measurements. + + LabelMemoryAvailable = []kv.KeyValue{kv.String("state", "available")} + LabelMemoryUsed = []kv.KeyValue{kv.String("state", "used")} + + // Label sets used for Network measurements. + + LabelNetworkTransmit = []kv.KeyValue{kv.String("direction", "transmit")} + LabelNetworkReceive = []kv.KeyValue{kv.String("direction", "receive")} +) + +// configure computes a Config from a list of Options. +func configure(opts ...Option) Config { + c := Config{ + MeterProvider: global.MeterProvider(), + } + for _, opt := range opts { + opt.ApplyHost(&c) + } + return c +} + +// Start initializes reporting of host metrics using the supplied Config. +func Start(opts ...Option) error { + c := configure(opts...) + if c.MeterProvider == nil { + c.MeterProvider = global.MeterProvider() + } + h := &host{ + meter: c.MeterProvider.Meter( + "go.opentelemetry.io/contrib/instrumentation/host", + metric.WithInstrumentationVersion(contrib.SemVersion()), + ), + config: c, + } + return h.register() +} + +func (h *host) register() error { + var ( + err error + + processCPUTime metric.Float64SumObserver + hostCPUTime metric.Float64SumObserver + + hostMemoryUsage metric.Int64UpDownSumObserver + hostMemoryUtilization metric.Float64UpDownSumObserver + + networkIOUsage metric.Int64SumObserver + + // lock prevents a race between batch observer and instrument registration. + lock sync.Mutex + ) + + proc, err := process.NewProcess(int32(os.Getpid())) + if err != nil { + return fmt.Errorf("could not find this process: %w", err) + } + + lock.Lock() + defer lock.Unlock() + + batchObserver := h.meter.NewBatchObserver(func(ctx context.Context, result metric.BatchObserverResult) { + lock.Lock() + defer lock.Unlock() + + // This follows the OpenTelemetry Collector's "hostmetrics" + // receiver/hostmetricsreceiver/internal/scraper/processscraper + // measures User and System IOwait time. + // TODO: the Collector has per-OS compilation modules to support + // specific metrics that are not universal. + processTimes, err := proc.TimesWithContext(ctx) + if err != nil { + global.Handle(err) + return + } + + hostTimeSlice, err := cpu.TimesWithContext(ctx, false) + if err != nil { + global.Handle(err) + return + } + if len(hostTimeSlice) != 1 { + global.Handle(fmt.Errorf("host CPU usage: incorrect summary count")) + return + } + + vmStats, err := mem.VirtualMemoryWithContext(ctx) + if err != nil { + global.Handle(err) + return + } + + ioStats, err := net.IOCountersWithContext(ctx, false) + if err != nil { + global.Handle(err) + return + } + if len(ioStats) != 1 { + global.Handle(fmt.Errorf("host network usage: incorrect summary count")) + return + } + + // Process CPU time + result.Observe(LabelCPUTimeUser, processCPUTime.Observation(processTimes.User)) + result.Observe(LabelCPUTimeSystem, processCPUTime.Observation(processTimes.System)) + + // Host CPU time + hostTime := hostTimeSlice[0] + result.Observe(LabelCPUTimeUser, hostCPUTime.Observation(hostTime.User)) + result.Observe(LabelCPUTimeSystem, hostCPUTime.Observation(hostTime.System)) + + // TODO(#244): "other" is a placeholder for actually dealing + // with these states. Do users actually want this + // (unconditionally)? How should we handle "iowait" + // if not all systems expose it? Should we break + // these down by CPU? If so, are users going to want + // to aggregate in-process? See: + // https://github.com/open-telemetry/opentelemetry-go-contrib/issues/244 + other := hostTime.Nice + + hostTime.Iowait + + hostTime.Irq + + hostTime.Softirq + + hostTime.Steal + + hostTime.Guest + + hostTime.GuestNice + + result.Observe(LabelCPUTimeOther, hostCPUTime.Observation(other)) + result.Observe(LabelCPUTimeIdle, hostCPUTime.Observation(hostTime.Idle)) + + // Host memory usage + result.Observe(LabelMemoryUsed, hostMemoryUsage.Observation(int64(vmStats.Used))) + result.Observe(LabelMemoryAvailable, hostMemoryUsage.Observation(int64(vmStats.Available))) + + // Host memory utilization + result.Observe(LabelMemoryUsed, + hostMemoryUtilization.Observation(float64(vmStats.Used)/float64(vmStats.Total)), + ) + result.Observe(LabelMemoryAvailable, + hostMemoryUtilization.Observation(float64(vmStats.Available)/float64(vmStats.Total)), + ) + + // Host network usage + // + // TODO: These can be broken down by network + // interface, with similar questions to those posed + // about per-CPU measurements above. + result.Observe(LabelNetworkTransmit, networkIOUsage.Observation(int64(ioStats[0].BytesSent))) + result.Observe(LabelNetworkReceive, networkIOUsage.Observation(int64(ioStats[0].BytesRecv))) + }) + + // TODO: .time units are in seconds, but "unit" package does + // not include this string. + // https://github.com/open-telemetry/opentelemetry-specification/issues/705 + if processCPUTime, err = batchObserver.NewFloat64SumObserver( + "process.cpu.time", + metric.WithUnit("s"), + metric.WithDescription( + "Accumulated CPU time spent by this process labeled by state (User, System, ...)", + ), + ); err != nil { + return err + } + + if hostCPUTime, err = batchObserver.NewFloat64SumObserver( + "system.cpu.time", + metric.WithUnit("s"), + metric.WithDescription( + "Accumulated CPU time spent by this host labeled by state (User, System, Other, Idle)", + ), + ); err != nil { + return err + } + + if hostMemoryUsage, err = batchObserver.NewInt64UpDownSumObserver( + "system.memory.usage", + metric.WithUnit(unit.Bytes), + metric.WithDescription( + "Memory usage of this process labeled by memory state (Used, Available)", + ), + ); err != nil { + return err + } + + if hostMemoryUtilization, err = batchObserver.NewFloat64UpDownSumObserver( + "system.memory.utilization", + metric.WithUnit(unit.Dimensionless), + metric.WithDescription( + "Memory utilization of this process labeled by memory state (Used, Available)", + ), + ); err != nil { + return err + } + + if networkIOUsage, err = batchObserver.NewInt64SumObserver( + "system.network.io", + metric.WithUnit(unit.Bytes), + metric.WithDescription( + "Bytes transferred labeled by direction (Transmit, Receive)", + ), + ); err != nil { + return err + } + + return nil +} diff --git a/instrumentation/host/host_test.go b/instrumentation/host/host_test.go new file mode 100644 index 00000000000..f9b6274c7a5 --- /dev/null +++ b/instrumentation/host/host_test.go @@ -0,0 +1,255 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package host_test + +import ( + "context" + "os" + "runtime" + "testing" + "time" + + gonet "net" + + "github.com/shirou/gopsutil/cpu" + "github.com/shirou/gopsutil/mem" + "github.com/shirou/gopsutil/net" + "github.com/shirou/gopsutil/process" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "go.opentelemetry.io/contrib/instrumentation/host" + "go.opentelemetry.io/contrib/internal/metric" + "go.opentelemetry.io/otel/api/kv" +) + +func getMetric(impl *metric.MeterImpl, name string, label kv.KeyValue) float64 { + for _, b := range impl.MeasurementBatches { + foundLabel := false + for _, haveLabel := range b.Labels { + if haveLabel != label { + continue + } + foundLabel = true + break + } + if !foundLabel { + continue + } + + for _, m := range b.Measurements { + if m.Instrument.Descriptor().Name() != name { + continue + } + + return m.Number.CoerceToFloat64(m.Instrument.Descriptor().NumberKind()) + } + } + panic("Could not locate a metric in test output") +} + +func TestHostCPU(t *testing.T) { + impl, provider := metric.NewProvider() + err := host.Start( + host.WithMeterProvider(provider), + ) + assert.NoError(t, err) + + proc, err := process.NewProcess(int32(os.Getpid())) + if err != nil { + t.Errorf("could not find this process: %w", err) + } + + ctx := context.Background() + processBefore, err := proc.TimesWithContext(ctx) + require.NoError(t, err) + + hostBefore, err := cpu.TimesWithContext(ctx, false) + require.NoError(t, err) + + start := time.Now() + for time.Since(start) < time.Second { + // This has a mix of user and system time, so serves + // the purpose of advancing both process and host, + // user and system CPU usage. + _, err = proc.TimesWithContext(ctx) + require.NoError(t, err) + } + + impl.RunAsyncInstruments() + + processUser := getMetric(impl, "process.cpu.time", host.LabelCPUTimeUser[0]) + processSystem := getMetric(impl, "process.cpu.time", host.LabelCPUTimeSystem[0]) + + hostUser := getMetric(impl, "system.cpu.time", host.LabelCPUTimeUser[0]) + hostSystem := getMetric(impl, "system.cpu.time", host.LabelCPUTimeSystem[0]) + + processAfter, err := proc.TimesWithContext(ctx) + require.NoError(t, err) + + hostAfter, err := cpu.TimesWithContext(ctx, false) + require.NoError(t, err) + + // Validate process times: + // User times are in range + require.LessOrEqual(t, processBefore.User, processUser) + require.GreaterOrEqual(t, processAfter.User, processUser) + // System times are in range + require.LessOrEqual(t, processBefore.System, processSystem) + require.GreaterOrEqual(t, processAfter.System, processSystem) + // Ranges are not empty + require.NotEqual(t, processAfter.System, processBefore.System) + require.NotEqual(t, processAfter.User, processBefore.User) + + // Validate host times: + // Correct assumptions: + require.Equal(t, 1, len(hostBefore)) + require.Equal(t, 1, len(hostAfter)) + // User times are in range + require.LessOrEqual(t, hostBefore[0].User, hostUser) + require.GreaterOrEqual(t, hostAfter[0].User, hostUser) + // System times are in range + require.LessOrEqual(t, hostBefore[0].System, hostSystem) + require.GreaterOrEqual(t, hostAfter[0].System, hostSystem) + // Ranges are not empty + require.NotEqual(t, hostAfter[0].System, hostBefore[0].System) + require.NotEqual(t, hostAfter[0].User, hostBefore[0].User) + + // TODO: We are not testing host "Other" nor "Idle" and + // generally the specification hasn't been finalized, so + // there's more to do. Moreover, "Other" is not portable and + // "Idle" may not advance on a fully loaded machine => both + // are difficult to test. +} + +func TestHostMemory(t *testing.T) { + impl, provider := metric.NewProvider() + err := host.Start( + host.WithMeterProvider(provider), + ) + assert.NoError(t, err) + + ctx := context.Background() + hostBefore, err := mem.VirtualMemoryWithContext(ctx) + require.NoError(t, err) + + slice := make([]byte, 100*1024*1024) + defer runtime.KeepAlive(slice) + for i := range slice { + slice[i] = byte(i) + } + + // As we are going to read the /proc file system for this info, sleep a while: + time.Sleep(time.Second) + + impl.RunAsyncInstruments() + + hostAfter, err := mem.VirtualMemoryWithContext(ctx) + require.NoError(t, err) + + hostUsed := getMetric(impl, "system.memory.usage", host.LabelMemoryUsed[0]) + hostAvailable := getMetric(impl, "system.memory.usage", host.LabelMemoryAvailable[0]) + + hostUsedUtil := getMetric(impl, "system.memory.utilization", host.LabelMemoryUsed[0]) + hostAvailableUtil := getMetric(impl, "system.memory.utilization", host.LabelMemoryAvailable[0]) + + beforeTotal := hostBefore.Available + hostBefore.Used + afterTotal := hostAfter.Available + hostAfter.Used + measureTotal := hostUsed + hostAvailable + + // Tolerance is 5% + const tolerance = 0.05 + + // Check that the sum of used and available doesn't change: + require.InEpsilon(t, float64(beforeTotal), measureTotal, tolerance) + require.InEpsilon(t, float64(afterTotal), measureTotal, tolerance) + + // Check that the implied total is equal from both Used and Available metrics: + require.InEpsilon(t, hostUsed/hostUsedUtil, hostAvailable/hostAvailableUtil, tolerance) + + // Check that utilization sums to 1.0: + require.InEpsilon(t, 1.0, hostUsedUtil+hostAvailableUtil, tolerance) +} + +func sendBytes(t *testing.T, count int) error { + conn1, err := gonet.ListenPacket("udp", "127.0.0.1:0") + if err != nil { + return err + } + defer conn1.Close() + + conn2, err := gonet.ListenPacket("udp", "127.0.0.1:0") + if err != nil { + return err + } + defer conn2.Close() + + data1 := make([]byte, 1000) + data2 := make([]byte, 1000) + for i := range data1 { + data1[i] = byte(i) + } + + for ; count > 0; count -= len(data1) { + _, err = conn1.WriteTo(data1, conn2.LocalAddr()) + if err != nil { + return err + } + _, readAddr, err := conn2.ReadFrom(data2) + if err != nil { + return err + } + + require.Equal(t, "udp", readAddr.Network()) + require.Equal(t, conn1.LocalAddr().String(), readAddr.String()) + } + + return nil +} + +func TestHostNetwork(t *testing.T) { + impl, provider := metric.NewProvider() + err := host.Start( + host.WithMeterProvider(provider), + ) + assert.NoError(t, err) + + ctx := context.Background() + hostBefore, err := net.IOCountersWithContext(ctx, false) + require.NoError(t, err) + + const howMuch = 10000 + err = sendBytes(t, howMuch) + require.NoError(t, err) + + // As we are going to read the /proc file system for this info, sleep a while: + time.Sleep(time.Second) + + impl.RunAsyncInstruments() + + hostAfter, err := net.IOCountersWithContext(ctx, false) + require.NoError(t, err) + + hostTransmit := getMetric(impl, "system.network.io", host.LabelNetworkTransmit[0]) + hostReceive := getMetric(impl, "system.network.io", host.LabelNetworkReceive[0]) + + // Check that the network transmit/receive used is greater than before: + require.LessOrEqual(t, uint64(howMuch), hostAfter[0].BytesSent-hostBefore[0].BytesSent) + require.LessOrEqual(t, uint64(howMuch), hostAfter[0].BytesRecv-hostBefore[0].BytesRecv) + + // Check that the recorded measurements reflect the same change: + require.LessOrEqual(t, uint64(howMuch), uint64(hostTransmit)-hostBefore[0].BytesSent) + require.LessOrEqual(t, uint64(howMuch), uint64(hostReceive)-hostBefore[0].BytesRecv) +} diff --git a/instrumentation/runtime/doc.go b/instrumentation/runtime/doc.go index 7003866ed39..836d98cd0a5 100644 --- a/instrumentation/runtime/doc.go +++ b/instrumentation/runtime/doc.go @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -// package runtime implements the work-in-progress conventional runtime metrics specified by OpenTelemetry. +// package runtime implements the conventional runtime metrics specified by OpenTelemetry. // -// The metrics produced are: +// The metric events produced are: // runtime.go.cgo.calls - Number of cgo calls made by the current process // runtime.go.gc.count - Number of completed garbage collection cycles // runtime.go.gc.pause_ns (ns) Amount of nanoseconds in GC stop-the-world pauses diff --git a/instrumentation/runtime/example/main.go b/instrumentation/runtime/example/main.go index 28ef0855c40..c29c6b98f7b 100644 --- a/instrumentation/runtime/example/main.go +++ b/instrumentation/runtime/example/main.go @@ -42,9 +42,7 @@ func main() { defer initMeter().Stop() if err := runtime.Start( - runtime.Configure( - runtime.WithMinimumReadMemStatsInterval(time.Second), - ), + runtime.WithMinimumReadMemStatsInterval(time.Second), ); err != nil { panic(err) } diff --git a/instrumentation/runtime/runtime.go b/instrumentation/runtime/runtime.go index b8d7acc762d..25e0bad9654 100644 --- a/instrumentation/runtime/runtime.go +++ b/instrumentation/runtime/runtime.go @@ -86,8 +86,8 @@ func (o metricProviderOption) ApplyRuntime(c *Config) { c.MeterProvider = o.Provider } -// Configure computes a Config from the supplied Options. -func Configure(opts ...Option) Config { +// configure computes a Config from the supplied Options. +func configure(opts ...Option) Config { c := Config{ MeterProvider: global.MeterProvider(), MinimumReadMemStatsInterval: DefaultMinimumReadMemStatsInterval, @@ -99,7 +99,8 @@ func Configure(opts ...Option) Config { } // Start initializes reporting of runtime metrics using the supplied Config. -func Start(c Config) error { +func Start(opts ...Option) error { + c := configure(opts...) if c.MinimumReadMemStatsInterval < 0 { c.MinimumReadMemStatsInterval = DefaultMinimumReadMemStatsInterval } diff --git a/instrumentation/runtime/runtime_test.go b/instrumentation/runtime/runtime_test.go index 3bc299d0b3e..1657e6c148a 100644 --- a/instrumentation/runtime/runtime_test.go +++ b/instrumentation/runtime/runtime_test.go @@ -28,9 +28,7 @@ import ( func TestRuntime(t *testing.T) { err := runtime.Start( - runtime.Configure( - runtime.WithMinimumReadMemStatsInterval(time.Second), - ), + runtime.WithMinimumReadMemStatsInterval(time.Second), ) assert.NoError(t, err) time.Sleep(time.Second) @@ -57,12 +55,10 @@ func testMinimumInterval(t *testing.T, shouldHappen bool, opts ...runtime.Option impl, provider := metric.NewProvider() err := runtime.Start( - runtime.Configure( - append( - opts, - runtime.WithMeterProvider(provider), - )..., - ), + append( + opts, + runtime.WithMeterProvider(provider), + )..., ) assert.NoError(t, err)