From 760f560deb504d1a784a211a087b3fc299221c55 Mon Sep 17 00:00:00 2001 From: eundoo-song Date: Tue, 13 Feb 2018 19:19:42 +0900 Subject: [PATCH 1/6] Make tracer metrics consistent with Go client Signed-off-by: eundoo-song --- jaeger_client/reporter.py | 8 ++++---- jaeger_client/sampler.py | 2 +- jaeger_client/tracer.py | 12 ++++++------ tests/test_reporter.py | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/jaeger_client/reporter.py b/jaeger_client/reporter.py index 46241ae5..d7c29253 100644 --- a/jaeger_client/reporter.py +++ b/jaeger_client/reporter.py @@ -233,13 +233,13 @@ def _flush(self): class ReporterMetrics(object): def __init__(self, metrics_factory): self.reporter_success = \ - metrics_factory.create_counter(name='jaeger.spans', tags={'reported': 'true'}) + metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'ok'}) self.reporter_failure = \ - metrics_factory.create_counter(name='jaeger.spans', tags={'reported': 'false'}) + metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'err'}) self.reporter_dropped = \ - metrics_factory.create_counter(name='jaeger.spans', tags={'dropped': 'true'}) + metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'dropped'}) self.reporter_socket = \ - metrics_factory.create_counter(name='jaeger.spans', tags={'socket_error': 'true'}) + metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'socket_error'}) class CompositeReporter(NullReporter): diff --git a/jaeger_client/sampler.py b/jaeger_client/sampler.py index b08aad16..faaaf66b 100644 --- a/jaeger_client/sampler.py +++ b/jaeger_client/sampler.py @@ -344,7 +344,7 @@ def __init__(self, channel, service_name, **kwargs): self.metrics_factory = kwargs.get('metrics_factory', None) \ or LegacyMetricsFactory(kwargs.get('metrics', None) or Metrics()) self.sampler_errors = \ - self.metrics_factory.create_counter('jaeger.sampler', {'error': 'true'}) + self.metrics_factory.create_counter(name='jaeger.sampler', tags={'result': 'err'}) self.error_reporter = kwargs.get('error_reporter') or \ ErrorReporter(Metrics()) self.max_operations = kwargs.get('max_operations', DEFAULT_MAX_OPERATIONS) diff --git a/jaeger_client/tracer.py b/jaeger_client/tracer.py index 85da9f24..301fe1db 100644 --- a/jaeger_client/tracer.py +++ b/jaeger_client/tracer.py @@ -234,14 +234,14 @@ class TracerMetrics(object): def __init__(self, metrics_factory): self.traces_started_sampled = \ - metrics_factory.create_counter(name='jaeger.traces-started', tags={'sampled': 'true'}) + metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'started', 'sampled': 'y'}) self.traces_started_not_sampled = \ - metrics_factory.create_counter(name='jaeger.traces-started', tags={'sampled': 'false'}) + metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'started', 'sampled': 'n'}) self.traces_joined_sampled = \ - metrics_factory.create_counter(name='jaeger.traces-joined', tags={'sampled': 'true'}) + metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'joined', 'sampled': 'y'}) self.traces_joined_not_sampled = \ - metrics_factory.create_counter(name='jaeger.traces-joined', tags={'sampled': 'false'}) + metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'joined', 'sampled': 'n'}) self.spans_sampled = \ - metrics_factory.create_counter(name='jaeger.spans', tags={'sampled': 'true'}) + metrics_factory.create_counter(name='jaeger.started_spans', tags={'sampled': 'y'}) self.spans_not_sampled = \ - metrics_factory.create_counter(name='jaeger.spans', tags={'sampled': 'false'}) + metrics_factory.create_counter(name='jaeger.started_spans', tags={'sampled': 'n'}) diff --git a/tests/test_reporter.py b/tests/test_reporter.py index a492c79f..cf45b876 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -184,7 +184,7 @@ def test_submit_batch_size_1(self): assert 1 == len(sender.futures) # send after close - span_dropped_key = 'jaeger.spans.dropped_true' + span_dropped_key = 'jaeger.reporter_spans.result_dropped' assert span_dropped_key not in reporter.metrics_factory.counters reporter.report_span(self._new_span('1')) assert 1 == reporter.metrics_factory.counters[span_dropped_key] @@ -195,7 +195,7 @@ def test_submit_failure(self): reporter.error_reporter = ErrorReporter( metrics=Metrics(), logger=logging.getLogger()) - reporter_failure_key = 'jaeger.spans.reported_false' + reporter_failure_key = 'jaeger.reporter_spans.result_err' assert reporter_failure_key not in reporter.metrics_factory.counters # simulate exception in send @@ -218,7 +218,7 @@ def test_submit_queue_full_batch_size_1(self): assert 1 == len(sender.futures) # the consumer is blocked on a future, so won't drain the queue reporter.report_span(self._new_span('2')) - span_dropped_key = 'jaeger.spans.dropped_true' + span_dropped_key = 'jaeger.reporter_spans.result_dropped' assert span_dropped_key not in reporter.metrics_factory.counters reporter.report_span(self._new_span('3')) yield self._wait_for( From 11308c33b8e99b7aff21839fe2c72e9d7310f8aa Mon Sep 17 00:00:00 2001 From: Eundoo Song Date: Fri, 23 Feb 2018 22:00:19 +0900 Subject: [PATCH 2/6] Fix lint error, add SamplerMetrics for the consistency Signed-off-by: Eundoo Song --- jaeger_client/reporter.py | 9 +++++---- jaeger_client/sampler.py | 26 +++++++++++++++++++++----- jaeger_client/tracer.py | 27 +++++++++++++++++---------- tests/test_reporter.py | 6 +++--- 4 files changed, 46 insertions(+), 22 deletions(-) diff --git a/jaeger_client/reporter.py b/jaeger_client/reporter.py index d7c29253..8cd1c6e6 100644 --- a/jaeger_client/reporter.py +++ b/jaeger_client/reporter.py @@ -233,13 +233,14 @@ def _flush(self): class ReporterMetrics(object): def __init__(self, metrics_factory): self.reporter_success = \ - metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'ok'}) + metrics_factory.create_counter(name='jaeger:reporter_spans', tags={'result': 'ok'}) self.reporter_failure = \ - metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'err'}) + metrics_factory.create_counter(name='jaeger:reporter_spans', tags={'result': 'err'}) self.reporter_dropped = \ - metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'dropped'}) + metrics_factory.create_counter(name='jaeger:reporter_spans', tags={'result': 'dropped'}) self.reporter_socket = \ - metrics_factory.create_counter(name='jaeger.reporter_spans', tags={'result': 'socket_error'}) + metrics_factory.create_counter(name='jaeger:reporter_spans', + tags={'result': 'socket_error'}) class CompositeReporter(NullReporter): diff --git a/jaeger_client/sampler.py b/jaeger_client/sampler.py index faaaf66b..0adf8692 100644 --- a/jaeger_client/sampler.py +++ b/jaeger_client/sampler.py @@ -343,8 +343,7 @@ def __init__(self, channel, service_name, **kwargs): kwargs.get('sampling_refresh_interval', DEFAULT_SAMPLING_INTERVAL) self.metrics_factory = kwargs.get('metrics_factory', None) \ or LegacyMetricsFactory(kwargs.get('metrics', None) or Metrics()) - self.sampler_errors = \ - self.metrics_factory.create_counter(name='jaeger.sampler', tags={'result': 'err'}) + self.metrics = SamplerMetrics(self.metrics_factory) self.error_reporter = kwargs.get('error_reporter') or \ ErrorReporter(Metrics()) self.max_operations = kwargs.get('max_operations', DEFAULT_MAX_OPERATIONS) @@ -408,7 +407,7 @@ def _create_periodic_callback(self): def _sampling_request_callback(self, future): exception = future.exception() if exception: - self.sampler_errors(1) + self.metrics.sampler_query_failure(1) self.error_reporter.error( 'Fail to get sampling strategy from jaeger-agent: %s', exception) @@ -417,8 +416,9 @@ def _sampling_request_callback(self, future): response = future.result() try: sampling_strategies_response = json.loads(response.body) + self.metrics.sampler_retrieved(1) except Exception as e: - self.sampler_errors(1) + self.metrics.sampler_query_failure(1) self.error_reporter.error( 'Fail to parse sampling strategy ' 'from jaeger-agent: %s [%s]', e, response.body) @@ -435,7 +435,7 @@ def _update_sampler(self, response): else: self._update_rate_limiting_or_probabilistic_sampler(response) except Exception as e: - self.sampler_errors(1) + self.metrics.sampler_update_failure(1) self.error_reporter.error( 'Fail to update sampler' 'from jaeger-agent: %s [%s]', e, response) @@ -443,6 +443,7 @@ def _update_sampler(self, response): def _update_adaptive_sampler(self, per_operation_strategies): if isinstance(self.sampler, AdaptiveSampler): self.sampler.update(per_operation_strategies) + self.metrics.sampler_updated(1) else: self.sampler = AdaptiveSampler(per_operation_strategies, self.max_operations) @@ -463,6 +464,7 @@ def _update_rate_limiting_or_probabilistic_sampler(self, response): if self.sampler != new_sampler: self.sampler = new_sampler + self.metrics.sampler_updated(1) def _poll_sampling_manager(self): self.logger.debug('Requesting tracing sampler refresh') @@ -493,3 +495,17 @@ def get_rate_limit(strategy=None): if not rate_limit_strategy: return DEFAULT_LOWER_BOUND return rate_limit_strategy.get(MAX_TRACES_PER_SECOND_STR, DEFAULT_LOWER_BOUND) + + +class SamplerMetrics(object): + """Tracer specific metrics.""" + + def __init__(self, metrics_factory): + self.sampler_retrieved = \ + metrics_factory.create_counter(name='jaeger:sampler_queries', tags={'result': 'ok'}) + self.sampler_query_failure = \ + metrics_factory.create_counter(name='jaeger:sampler_queries', tags={'result': 'err'}) + self.sampler_updated = \ + metrics_factory.create_counter(name='jaeger:sampler_updates', tags={'result': 'ok'}) + self.sampler_update_failure = \ + metrics_factory.create_counter(name='jaeger:sampler_updates', tags={'result': 'err'}) diff --git a/jaeger_client/tracer.py b/jaeger_client/tracer.py index 301fe1db..e096150f 100644 --- a/jaeger_client/tracer.py +++ b/jaeger_client/tracer.py @@ -206,9 +206,9 @@ def close(self): def _emit_span_metrics(self, span, join=False): if span.is_sampled(): - self.metrics.spans_sampled(1) + self.metrics.spans_started_sampled(1) else: - self.metrics.spans_not_sampled(1) + self.metrics.spans_started_not_sampled(1) if not span.context.parent_id: if span.is_sampled(): if join: @@ -224,6 +224,7 @@ def _emit_span_metrics(self, span, join=False): def report_span(self, span): self.reporter.report_span(span) + self.metrics.spans_finished(1) def random_id(self): return self.random.getrandbits(constants.MAX_ID_BITS) @@ -234,14 +235,20 @@ class TracerMetrics(object): def __init__(self, metrics_factory): self.traces_started_sampled = \ - metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'started', 'sampled': 'y'}) + metrics_factory.create_counter(name='jaeger:traces', + tags={'state': 'started', 'sampled': 'y'}) self.traces_started_not_sampled = \ - metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'started', 'sampled': 'n'}) + metrics_factory.create_counter(name='jaeger:traces', + tags={'state': 'started', 'sampled': 'n'}) self.traces_joined_sampled = \ - metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'joined', 'sampled': 'y'}) + metrics_factory.create_counter(name='jaeger:traces', + tags={'state': 'joined', 'sampled': 'y'}) self.traces_joined_not_sampled = \ - metrics_factory.create_counter(name='jaeger.traces', tags={'state': 'joined', 'sampled': 'n'}) - self.spans_sampled = \ - metrics_factory.create_counter(name='jaeger.started_spans', tags={'sampled': 'y'}) - self.spans_not_sampled = \ - metrics_factory.create_counter(name='jaeger.started_spans', tags={'sampled': 'n'}) + metrics_factory.create_counter(name='jaeger:traces', + tags={'state': 'joined', 'sampled': 'n'}) + self.spans_started_sampled = \ + metrics_factory.create_counter(name='jaeger:started_spans', tags={'sampled': 'y'}) + self.spans_started_not_sampled = \ + metrics_factory.create_counter(name='jaeger:started_spans', tags={'sampled': 'n'}) + self.spans_finished = \ + metrics_factory.create_counter(name='jaeger:finished_spans') diff --git a/tests/test_reporter.py b/tests/test_reporter.py index cf45b876..d51e347a 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -184,7 +184,7 @@ def test_submit_batch_size_1(self): assert 1 == len(sender.futures) # send after close - span_dropped_key = 'jaeger.reporter_spans.result_dropped' + span_dropped_key = 'jaeger:reporter_spans.result_dropped' assert span_dropped_key not in reporter.metrics_factory.counters reporter.report_span(self._new_span('1')) assert 1 == reporter.metrics_factory.counters[span_dropped_key] @@ -195,7 +195,7 @@ def test_submit_failure(self): reporter.error_reporter = ErrorReporter( metrics=Metrics(), logger=logging.getLogger()) - reporter_failure_key = 'jaeger.reporter_spans.result_err' + reporter_failure_key = 'jaeger:reporter_spans.result_err' assert reporter_failure_key not in reporter.metrics_factory.counters # simulate exception in send @@ -218,7 +218,7 @@ def test_submit_queue_full_batch_size_1(self): assert 1 == len(sender.futures) # the consumer is blocked on a future, so won't drain the queue reporter.report_span(self._new_span('2')) - span_dropped_key = 'jaeger.reporter_spans.result_dropped' + span_dropped_key = 'jaeger:reporter_spans.result_dropped' assert span_dropped_key not in reporter.metrics_factory.counters reporter.report_span(self._new_span('3')) yield self._wait_for( From 993134200dbf0040aee863ee72df4e0ea1e62dfb Mon Sep 17 00:00:00 2001 From: Eundoo Song Date: Sun, 25 Feb 2018 18:51:01 +0900 Subject: [PATCH 3/6] add comment in reporter, sampler metrics class Signed-off-by: Eundoo Song --- jaeger_client/reporter.py | 2 ++ jaeger_client/sampler.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/jaeger_client/reporter.py b/jaeger_client/reporter.py index 8cd1c6e6..220732be 100644 --- a/jaeger_client/reporter.py +++ b/jaeger_client/reporter.py @@ -231,6 +231,8 @@ def _flush(self): class ReporterMetrics(object): + """Reporter specific metrics.""" + def __init__(self, metrics_factory): self.reporter_success = \ metrics_factory.create_counter(name='jaeger:reporter_spans', tags={'result': 'ok'}) diff --git a/jaeger_client/sampler.py b/jaeger_client/sampler.py index 0adf8692..6c6282d9 100644 --- a/jaeger_client/sampler.py +++ b/jaeger_client/sampler.py @@ -498,7 +498,7 @@ def get_rate_limit(strategy=None): class SamplerMetrics(object): - """Tracer specific metrics.""" + """Sampler specific metrics.""" def __init__(self, metrics_factory): self.sampler_retrieved = \ From 4880edfc17865f3b6b2e6697663c9a2ec5f18cd1 Mon Sep 17 00:00:00 2001 From: Eundoo Song Date: Wed, 28 Feb 2018 00:14:38 +0900 Subject: [PATCH 4/6] add reporter_queue_length gauge Signed-off-by: Eundoo Song --- jaeger_client/reporter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jaeger_client/reporter.py b/jaeger_client/reporter.py index 220732be..64cba417 100644 --- a/jaeger_client/reporter.py +++ b/jaeger_client/reporter.py @@ -171,6 +171,7 @@ def _consume_queue(self): spans.append(span) if spans: yield self._submit(spans) + self.metrics.reporter_queue_length(self.queue.qsize()) for _ in spans: self.queue.task_done() spans = spans[:0] @@ -243,6 +244,8 @@ def __init__(self, metrics_factory): self.reporter_socket = \ metrics_factory.create_counter(name='jaeger:reporter_spans', tags={'result': 'socket_error'}) + self.reporter_queue_length = \ + metrics_factory.create_gauge(name="jaeger:reporter_queue_length") class CompositeReporter(NullReporter): From 3002240ba07bea6d2ad666dc0bc4fc78c757ef73 Mon Sep 17 00:00:00 2001 From: Eundoo Song Date: Thu, 1 Mar 2018 00:42:36 +0900 Subject: [PATCH 5/6] move reporter_queue_length gauge as the last statement of while loop. replace socker_error by reporter failure counter. Signed-off-by: Eundoo Song --- jaeger_client/reporter.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/jaeger_client/reporter.py b/jaeger_client/reporter.py index 64cba417..c97527da 100644 --- a/jaeger_client/reporter.py +++ b/jaeger_client/reporter.py @@ -171,10 +171,10 @@ def _consume_queue(self): spans.append(span) if spans: yield self._submit(spans) - self.metrics.reporter_queue_length(self.queue.qsize()) for _ in spans: self.queue.task_done() spans = spans[:0] + self.metrics.reporter_queue_length(self.queue.qsize()) self.logger.info('Span publisher exists') # method for protocol factory @@ -199,7 +199,7 @@ def _submit(self, spans): yield self._send(batch) self.metrics.reporter_success(len(spans)) except socket.error as e: - self.metrics.reporter_socket(len(spans)) + self.metrics.reporter_failure(len(spans)) self.error_reporter.error( 'Failed to submit traces to jaeger-agent socket: %s', e) except Exception as e: @@ -241,11 +241,8 @@ def __init__(self, metrics_factory): metrics_factory.create_counter(name='jaeger:reporter_spans', tags={'result': 'err'}) self.reporter_dropped = \ metrics_factory.create_counter(name='jaeger:reporter_spans', tags={'result': 'dropped'}) - self.reporter_socket = \ - metrics_factory.create_counter(name='jaeger:reporter_spans', - tags={'result': 'socket_error'}) self.reporter_queue_length = \ - metrics_factory.create_gauge(name="jaeger:reporter_queue_length") + metrics_factory.create_gauge(name='jaeger:reporter_queue_length') class CompositeReporter(NullReporter): From 421795ef18ca97afe1b0e3c5d4e911a8ad863e4c Mon Sep 17 00:00:00 2001 From: Eundoo Song Date: Wed, 7 Mar 2018 02:54:46 +0900 Subject: [PATCH 6/6] in _update_adaptive_sampler, move sampler_updated metric out of the condition Signed-off-by: Eundoo Song --- jaeger_client/sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jaeger_client/sampler.py b/jaeger_client/sampler.py index 2025a7f7..75dd7afa 100644 --- a/jaeger_client/sampler.py +++ b/jaeger_client/sampler.py @@ -443,9 +443,9 @@ def _update_sampler(self, response): def _update_adaptive_sampler(self, per_operation_strategies): if isinstance(self.sampler, AdaptiveSampler): self.sampler.update(per_operation_strategies) - self.metrics.sampler_updated(1) else: self.sampler = AdaptiveSampler(per_operation_strategies, self.max_operations) + self.metrics.sampler_updated(1) def _update_rate_limiting_or_probabilistic_sampler(self, response): s_type = response.get(STRATEGY_TYPE_STR)