Skip to content

Commit 5014202

Browse files
author
Colman Yau
committed
New implementation of ProfilerDisabler: iteration 1
Introduce should_stop_sampling and should_stop_profiling. should_stop_sampling is called whenever we sample while should_stop_profiling is only called after we refresh config or submit profile.
1 parent 11e899d commit 5014202

File tree

4 files changed

+58
-17
lines changed

4 files changed

+58
-17
lines changed

codeguru_profiler_agent/local_aggregator.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def __init__(self, reporter, environment=dict()):
4444
self.memory_limit_bytes = environment["memory_limit_bytes"]
4545
self.last_report_attempted = current_milli_time(clock=self.clock)
4646

47-
self._reset()
47+
self.reset()
4848

4949
def add(self, sample):
5050
"""
@@ -69,7 +69,7 @@ def _check_memory_limit(self):
6969
"Profiler memory usage limit has been reached")
7070
self.flush(force=True)
7171

72-
def _reset(self):
72+
def reset(self):
7373
self.profile = self.profile_factory(
7474
profiling_group_name=self.profiling_group_name,
7575
sampling_interval_seconds=AgentConfiguration.get().sampling_interval.total_seconds(),
@@ -80,7 +80,7 @@ def _reset(self):
8080
self.timer.reset()
8181

8282
@with_timer("flush")
83-
def flush(self, force=False):
83+
def flush(self, force=False, reset=True):
8484
now = current_milli_time(clock=self.clock)
8585
reported = False
8686
if not force and not self._is_over_reporting_interval(now):
@@ -92,8 +92,8 @@ def flush(self, force=False):
9292
self._report_profile(now)
9393
reported = True
9494

95-
if force or reported:
96-
self._reset()
95+
if force or (reset and reported):
96+
self.reset()
9797
return reported
9898

9999
def refresh_configuration(self):

codeguru_profiler_agent/profiler_disabler.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,16 @@ def __init__(self, environment, clock=time.time):
2020
self.killswitch = KillSwitch(environment['killswitch_filepath'], clock)
2121
self.memory_limit_bytes = environment['memory_limit_bytes']
2222

23-
def should_stop_profiling(self, profile=None):
23+
def should_stop_sampling(self, profile=None):
2424
return (self.killswitch.is_killswitch_on()
2525
or self.cpu_usage_check.is_cpu_usage_limit_reached(profile)
2626
or profile is not None and self._is_memory_limit_reached(profile))
2727

28+
def should_stop_profiling(self, profile=None):
29+
return self.killswitch.is_killswitch_on() or \
30+
(profile is not None and self.cpu_usage_check.is_overall_cpu_limit_reached(profile)
31+
and self._is_memory_limit_reached(profile))
32+
2833
def _is_memory_limit_reached(self, profile):
2934
return profile.get_memory_usage_bytes() > self.memory_limit_bytes
3035

@@ -38,9 +43,29 @@ class CpuUsageCheck:
3843
def __init__(self, timer):
3944
self.timer = timer
4045

46+
# This function carries out an overall cpu limit check that covers the cpu overhead caused for the full
47+
# sampling cycle: sample -> aggregate -> report -> refresh config. This has to be called with a profile
48+
# which captured the total cycle cpu time usage. hnhg
49+
def is_overall_cpu_limit_reached(self, profile):
50+
profiler_metric = self.timer.metrics.get("runProfiler")
51+
if not profile or not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
52+
return False
53+
54+
used_time_percentage = 100 * profiler_metric.total/profile.get_active_millis_since_start()
55+
56+
if used_time_percentage >= AgentConfiguration.get().cpu_limit_percentage:
57+
logger.debug(self.timer.metrics)
58+
logger.info(
59+
"Profiler cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler.".format(
60+
used_time_percentage, AgentConfiguration.get().cpu_limit_percentage))
61+
return True
62+
else:
63+
return False
64+
4165
def is_cpu_usage_limit_reached(self, profile=None):
4266
sample_and_aggregate_metric = self.timer.metrics.get("sampleAndAggregate")
43-
if not sample_and_aggregate_metric or sample_and_aggregate_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
67+
if not sample_and_aggregate_metric or \
68+
sample_and_aggregate_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
4469
return False
4570

4671
sampling_interval_seconds = self._get_average_sampling_interval_seconds(profile)

codeguru_profiler_agent/profiler_runner.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def start(self):
5151
5252
:return: True if the profiler was started successfully; False otherwise.
5353
"""
54-
if self.profiler_disabler.should_stop_profiling():
54+
if self.profiler_disabler.should_stop_sampling():
5555
logger.info("Profiler will not start.")
5656
return False
5757
self.scheduler.start()
@@ -71,26 +71,35 @@ def _profiling_command(self):
7171
if self._first_execution:
7272
self.collector.setup()
7373
self._first_execution = False
74-
return self._run_profiler()
74+
sample_result = self._run_profiler()
75+
if sample_result.success and sample_result.should_check_overall:
76+
if self.profiler_disabler.should_stop_profiling(profile=self.collector.profile):
77+
return False
78+
if sample_result.should_reset:
79+
self.collector.reset()
80+
return True
81+
return sample_result.success
7582
except:
7683
logger.info("An unexpected issue caused the profiling command to terminate.", exc_info=True)
7784
return False
7885

7986
@with_timer("runProfiler")
8087
def _run_profiler(self):
81-
if self.profiler_disabler.should_stop_profiling(self.collector.profile):
82-
return False
88+
if self.profiler_disabler.should_stop_sampling(self.collector.profile):
89+
return RunProfilerStatus(success=False, should_check_overall=False, should_reset=False)
8390

91+
refreshed_config = False
8492
if not self.is_profiling_in_progress:
8593
self._refresh_configuration()
94+
refreshed_config = True
8695

8796
# after the refresh we may be working on a profile
8897
if self.is_profiling_in_progress:
89-
if self.collector.flush():
98+
if self.collector.flush(reset=False):
9099
self.is_profiling_in_progress = False
91-
return True
100+
return RunProfilerStatus(success=True, should_check_overall=True, should_reset=True)
92101
self._sample_and_aggregate()
93-
return True
102+
return RunProfilerStatus(success=True, should_check_overall=refreshed_config, should_reset=False)
94103

95104
@with_timer("sampleAndAggregate")
96105
def _sample_and_aggregate(self):
@@ -129,3 +138,10 @@ def pause(self, block=False):
129138
"""
130139
self.scheduler.pause(block)
131140
self.collector.profile.pause()
141+
142+
143+
class RunProfilerStatus:
144+
def __init__(self, success, should_check_overall, should_reset):
145+
self.success = success
146+
self.should_check_overall = should_check_overall
147+
self.should_reset = should_reset

test/unit/test_profiler_disabler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,15 @@ def before(self):
7272

7373
def test_it_stops_profiling_if_killswitch_is_on(self):
7474
self.disabler.killswitch.is_killswitch_on = Mock(return_value=True)
75-
assert self.disabler.should_stop_profiling(self.profiler)
75+
assert self.disabler.should_stop_sampling(self.profiler)
7676

7777
def test_it_stops_profiling_if_memory_limit_is_reached(self):
7878
self.disabler._is_memory_limit_reached = Mock(return_value=True)
79-
assert self.disabler.should_stop_profiling(self.profiler)
79+
assert self.disabler.should_stop_sampling(self.profiler)
8080

8181
def test_it_stops_profiling_if_process_duration_is_reached(self):
8282
self.disabler.cpu_usage_check.is_cpu_usage_limit_reached = Mock(return_value=True)
83-
assert self.disabler.should_stop_profiling(self.profiler)
83+
assert self.disabler.should_stop_sampling(self.profiler)
8484

8585

8686
class TestKillSwitch:

0 commit comments

Comments
 (0)