1
- from codeguru_profiler_agent .utils .synchronization import synchronized
1
+ import logging
2
+ import os
3
+
4
+ logger = logging .getLogger (__name__ )
2
5
3
6
4
7
class ErrorsMetadata :
5
8
def __init__ (self ):
6
- self .errors_count = 0
7
- self .sdk_client_errors = 0
8
- self .configure_agent_errors = 0
9
- self .configure_agent_rnfe_auto_create_enabled_errors = 0
10
- self .create_profiling_group_errors = 0
11
- self .post_agent_profile_errors = 0
12
- self .post_agent_profile_rnfe_auto_create_enabled_errors = 0
9
+ self .reset ()
13
10
14
11
def reset (self ):
12
+ """
13
+ We want to differentiate API call errors more granularly. We want to gather ResourceNotFoundException errors
14
+ because we are going to get this exception with auto-create feature and want to monitor how many times
15
+ the agent is not able to create the PG and resulting in subsequent ResourceNotFoundException.
16
+ """
15
17
self .errors_count = 0
16
18
self .sdk_client_errors = 0
17
19
self .configure_agent_errors = 0
@@ -20,12 +22,10 @@ def reset(self):
20
22
self .post_agent_profile_errors = 0
21
23
self .post_agent_profile_rnfe_auto_create_enabled_errors = 0
22
24
23
- """
24
- This needs to be compliant with errors count schema.
25
- https://code.amazon.com/packages/SkySailProfileIonSchema/blobs/811cc0e7e406e37a5b878acf31468be3dcd2963d/--/src/main/resources/schema/DebugInfo.isl#L21
26
- """
27
-
28
25
def serialize_to_json (self ):
26
+ """
27
+ This needs to be compliant with errors count schema.
28
+ """
29
29
return {
30
30
"errorsCount" : self .errors_count ,
31
31
"sdkClientErrors" : self .sdk_client_errors ,
@@ -36,35 +36,86 @@ def serialize_to_json(self):
36
36
"postAgentProfileRnfeAutoCreateEnabledErrors" : self .post_agent_profile_rnfe_auto_create_enabled_errors
37
37
}
38
38
39
- @synchronized
40
39
def increment_sdk_error (self , error_type ):
40
+ """
41
+ ErrorsCount is the umbrella of all the kinds of error we want to capture. Currently we have only SdkClientErrors
42
+ in it. SdkClientErrors is comprised of different API level errors like ConfigureAgentErrors,
43
+ PostAgentProfileErrors, CreateProfilingGroupErrors.
44
+ :param error_type: The type of API level error that we want to capture.
45
+ """
41
46
self .errors_count += 1
42
47
self .sdk_client_errors += 1
43
48
49
+ """
50
+ Special handling for ResourceNotFoundException errors.
51
+ For example configureAgentRnfeAutoCreateEnabledErrors is also a configureAgentErrors.
52
+ """
44
53
if error_type == "configureAgentErrors" :
45
54
self .configure_agent_errors += 1
46
55
elif error_type == "configureAgentRnfeAutoCreateEnabledErrors" :
56
+ self .configure_agent_errors += 1
47
57
self .configure_agent_rnfe_auto_create_enabled_errors += 1
48
58
elif error_type == "createProfilingGroupErrors" :
49
59
self .create_profiling_group_errors += 1
50
60
elif error_type == "postAgentProfileErrors" :
51
61
self .post_agent_profile_errors += 1
52
62
elif error_type == "postAgentProfileRnfeAutoCreateEnabledErrors" :
63
+ self .post_agent_profile_errors += 1
53
64
self .post_agent_profile_rnfe_auto_create_enabled_errors += 1
54
65
55
66
def record_sdk_error (self , error_type ):
56
67
self .increment_sdk_error (error_type )
57
68
58
69
59
70
class AgentDebugInfo :
60
- def __init__ (self , errors_metadata ):
71
+ def __init__ (self , errors_metadata = None , agent_start_time = None , timer = None ):
72
+ self .process_id = get_process_id ()
61
73
self .errors_metadata = errors_metadata
74
+ self .agent_start_time = agent_start_time
75
+ self .timer = timer
62
76
63
77
def serialize_to_json (self ):
64
78
"""
65
79
This needs to be compliant with agent debug info schema.
66
- https://code.amazon.com/packages/SkySailProfileIonSchema/blobs/811cc0e7e406e37a5b878acf31468be3dcd2963d/--/src/main/resources/schema/DebugInfo.isl#L21
67
80
"""
68
- return {
69
- "errorsCount" : self .errors_metadata .serialize_to_json ()
70
- }
81
+ json = {}
82
+
83
+ self .add_agent_start_time (json )
84
+ self .add_process_id (json )
85
+ self .add_errors_metadata (json )
86
+ self .add_generic_metrics (json )
87
+
88
+ return json
89
+
90
+ def add_agent_start_time (self , json ):
91
+ if self .agent_start_time is not None :
92
+ json ["agentStartTime" ] = int (self .agent_start_time )
93
+
94
+ def add_errors_metadata (self , json ):
95
+ if self .errors_metadata is not None :
96
+ json ["errorsCount" ] = self .errors_metadata .serialize_to_json ()
97
+
98
+ def add_process_id (self , json ):
99
+ if self .process_id is not None :
100
+ json ["processId" ] = self .process_id
101
+
102
+ def add_generic_metrics (self , json ):
103
+ if self .timer is not None and self .timer .metrics :
104
+ generic_metrics = {}
105
+
106
+ for metric in self .timer .metrics :
107
+ metric_value = self .timer .metrics [metric ]
108
+ generic_metrics [metric + "_max" ] = metric_value .max
109
+ generic_metrics [metric + "_average" ] = metric_value .average ()
110
+
111
+ if generic_metrics :
112
+ json ["genericMetrics" ] = generic_metrics
113
+
114
+
115
+ def get_process_id ():
116
+ try :
117
+ return os .getpid ()
118
+ except Exception as e :
119
+ logger .info ("Failed to get the process id, " + repr (e ))
120
+ return None
121
+
0 commit comments