|
1 | 1 | import time
|
| 2 | +import logging |
2 | 3 |
|
3 | 4 | from random import SystemRandom
|
4 | 5 | from codeguru_profiler_agent.model.call_graph_node import CallGraphNode
|
|
9 | 10 |
|
10 | 11 | ROOT_NODE_NAME = "ALL"
|
11 | 12 |
|
| 13 | +logger = logging.getLogger(__name__) |
12 | 14 |
|
13 | 15 | class Profile:
|
14 | 16 | def __init__(self, profiling_group_name, sampling_interval_seconds, host_weight, start, agent_debug_info, clock=time.time):
|
@@ -56,11 +58,40 @@ def get_active_millis_since_start(self):
|
56 | 58 | This returns the total "active" wall clock time since start. In AWS lambda the process can be frozen, the
|
57 | 59 | time while we are frozen should not be counted in here. In an EC2 or other type of host it is simply the wall
|
58 | 60 | clock time since start.
|
59 |
| - If the "end" time has been set, we give the active time between start and end otherwise between start and now |
| 61 | +
|
| 62 | + Previously self.end was used to calculate active_millis_since_start but self.end is updated when a sample is added |
| 63 | + so in rare cases where a sample is collected before the last pause time then we might add additional pause time |
| 64 | + which can lead to incorrect calculation of active time. In worst cases, it can even lead to negative values depending |
| 65 | + on the pause time. |
| 66 | +
|
| 67 | + Below is an example indicating the potential error of considering self.end in the calculation. |
| 68 | + ------------------------------------------------------------------ |
| 69 | + | | | | | | | | |||||| |
| 70 | + S P R P R SE P R REPORT |
| 71 | + s1_________|___p1___|____|____p2___|____e1____|___p3__|__________| |
| 72 | +
|
| 73 | + S - Start |
| 74 | + P - Pause |
| 75 | + R - Resume |
| 76 | + SE - self.end (last sample) |
| 77 | + REPORT - Calculating active time. |
| 78 | +
|
| 79 | + If we consider self.end which is e1 in above case then active time would be e1-s1-(p1+p2+p3). But pause p3 is after e1 |
| 80 | + so that leads to incorrect calculation of active time. |
| 81 | +
|
| 82 | + Ideally we would want to set profile_end to be last sample time and subtract pause times only before that but it requires |
| 83 | + additional work in maintaining pause time which isn't worth as it makes the logic complex with very little gain. |
| 84 | +
|
| 85 | + So we are setting it to current time and in some corner cases to last_pause time. |
60 | 86 | """
|
61 |
| - end = self.last_pause if self.last_pause is not None else \ |
62 |
| - self._end if self._end is not None else current_milli_time(clock=self._clock) |
63 |
| - return end - self.start - self._paused_ms |
| 87 | + end = self.last_pause if self.last_pause is not None else current_milli_time(clock=self._clock) |
| 88 | + active_time_millis_since_start = end - self.start - self._paused_ms |
| 89 | + logger.debug( |
| 90 | + "Active time since start is {activeTime} which is calculated using start: {start}, end: {end}, last_pause: {last_pause}, paused_ms: {paused_ms}, last_resume: {last_resume}" |
| 91 | + .format(activeTime = active_time_millis_since_start, start = self.start, end = self._end, last_pause = self.last_pause, paused_ms = self._paused_ms, last_resume = self.last_resume) |
| 92 | + ) |
| 93 | + |
| 94 | + return active_time_millis_since_start |
64 | 95 |
|
65 | 96 | def add(self, sample):
|
66 | 97 | """
|
@@ -113,8 +144,9 @@ def resume(self):
|
113 | 144 | # resume gets called when profile is running
|
114 | 145 | return
|
115 | 146 | self.last_resume = current_milli_time(clock=self._clock)
|
116 |
| - self._paused_ms += self.last_resume - self.last_pause |
| 147 | + prev_last_pause = self.last_pause |
117 | 148 | self.last_pause = None
|
| 149 | + self._paused_ms += self.last_resume - prev_last_pause |
118 | 150 |
|
119 | 151 | def is_empty(self):
|
120 | 152 | return self.total_seen_threads_count == 0.0
|
|
0 commit comments