Skip to content

Commit 5f1cc70

Browse files
committed
Add operation name frame in stacks with boto api calls
When using API calls from boto we often do not see the operation names in the stack traces even though there is a dedicate function name. This is because of the generic way that api calls are implemented in boto. Unfortunately this is not convenient for profiling as we do not see in flamegraphs which calls are using resources. This change adds a specific check when we build the stacks to detect api calls inside boto and if possible add a new frame with the operation function name. i.e. instead of ``` (...) botocore.client:CloudWatch:_make_api_call botocore.client:CloudWatch:_api_call user_module:send_metric ``` we should end up with ``` (...) botocore.client:CloudWatch:_make_api_call botocore.client:CloudWatch:_api_call botocore.client:CloudWatch:put_metric_data user_module:send_metric ``` This new frame should help when investigating on flamegraphs but also with automatic analysis.
1 parent 723c992 commit 5f1cc70

File tree

3 files changed

+101
-3
lines changed

3 files changed

+101
-3
lines changed

codeguru_profiler_agent/sampling_utils.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
import linecache
55
import threading
66
import traceback
7-
7+
import re
88
from codeguru_profiler_agent.model.frame import Frame
99

10+
BOTO_CLIENT_PATH = re.compile("[/\\\\]botocore[/\\\\]client.py$")
1011
TRUNCATED_FRAME = Frame(name="<Truncated>")
1112

1213
TIME_SLEEP_FRAME = Frame(name="<Sleep>")
@@ -64,15 +65,35 @@ def _extract_stack(stack, max_depth):
6465
"""
6566
result = []
6667
for raw_frame, line_no in stack:
68+
_maybe_add_boto_operation_name(raw_frame, result)
6769
co = raw_frame.f_code
6870
result.append(
6971
Frame(name=co.co_name, class_name=_extract_class(raw_frame.f_locals), line_no=line_no,
7072
file_path=co.co_filename)
7173
)
72-
if len(stack) < max_depth:
74+
if len(result) < max_depth:
7375
last_frame, last_frame_line_no = stack[-1]
7476
_maybe_append_synthetic_frame(result, last_frame, last_frame_line_no)
75-
return result
77+
return result[:max_depth]
78+
79+
80+
def _maybe_add_boto_operation_name(raw_frame, result):
81+
"""
82+
boto is dealing with API calls in a very generic way so by default the sampling
83+
would only show that we are making an api call without having the actual operation name.
84+
This function checks if this frame is botocore.client.py:_api_call and if it is, it adds
85+
a frame with the actual operation name.
86+
:param raw_frame: the raw frame
87+
"""
88+
if (raw_frame.f_code.co_name == '_api_call'
89+
and BOTO_CLIENT_PATH.search(raw_frame.f_code.co_filename) is not None
90+
and raw_frame.f_locals and 'py_operation_name' in raw_frame.f_locals.keys()
91+
and raw_frame.f_locals.get('py_operation_name')):
92+
result.append(
93+
Frame(name=raw_frame.f_locals.get('py_operation_name'),
94+
class_name=_extract_class(raw_frame.f_locals),
95+
file_path=raw_frame.f_code.co_filename)
96+
)
7697

7798

7899
def _maybe_append_synthetic_frame(result, frame, line_no):

test/help_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import threading
22
import time
33
from queue import Queue
4+
import boto3
5+
from botocore.client import Config
46

57
INTEGRATION_TEST_ACCOUNT_ID = "519630429520"
68
MY_PROFILING_GROUP_NAME_FOR_INTEG_TESTS = "MyProfilingGroupForIntegrationTests"
@@ -48,6 +50,23 @@ def dummy_parent_method(self):
4850
"""
4951
self.dummy_method()
5052

53+
def make_boto_api_call(self, boto_client):
54+
try:
55+
boto_client.put_metric_data(Namespace="any_namespace", MetricData=[])
56+
except Exception as e:
57+
print("This should be a ConnectTimeoutError", e)
58+
59+
def new_thread_sending_boto_api_call(self, timeout_seconds=1, thread_name="test-boto-api-call"):
60+
no_retry_config = Config(connect_timeout=timeout_seconds, retries={'max_attempts': 0})
61+
# we do not want boto to look for real credentials so provide fake ones
62+
session = boto3.Session(region_name="us-east-1", aws_access_key_id="fake_id", aws_secret_access_key="fake_key")
63+
# we set a fake endpoint in the client because we do not want to make a real call
64+
# this is only so we can have a thread inside an api call trying to make a connection
65+
# long enough for us to take a sample
66+
no_target_client = session.client('cloudwatch', endpoint_url='https://notExisting.com/', config=no_retry_config)
67+
self.boto_thread = threading.Thread(
68+
name=thread_name, target=self.make_boto_api_call, daemon=True, kwargs={"boto_client": no_target_client})
69+
self.boto_thread.start()
5170

5271
def wait_for(condition, timeout_seconds=1.0, poll_interval_seconds=0.01):
5372
"""

test/unit/test_sampling_utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,20 @@
33
import sys
44

55
from test import help_utils
6+
from collections import namedtuple
67

78
from codeguru_profiler_agent.sampling_utils import get_stacks
89

910
DEFAULT_TRUNCATED_FRAME_NAME = "<Truncated>"
1011

12+
test_code = namedtuple('code', ['co_filename', 'co_name'])
13+
test_frame = namedtuple('frame', ['f_code', 'f_locals'])
14+
test_tb = namedtuple('tb', ['tb_frame', 'tb_lineno'])
15+
16+
17+
def make_frame(path, method, line_nbr, f_locals={}):
18+
return test_tb(test_frame(test_code(path, method), f_locals), line_nbr)
19+
1120

1221
def is_frame_in_stacks(stacks, target_frame):
1322
for stack in stacks:
@@ -88,3 +97,52 @@ def test_it_does_not_include_zombie_threads(self):
8897

8998
assert not is_frame_in_stacks(
9099
stacks, "dummy_parent_method")
100+
101+
def test_it_adds_operation_name_frame_for_boto(self):
102+
raw_stack = [
103+
make_frame('path/to/foo.py', 'foo', 3),
104+
make_frame('site-packages/botocore/client.py', '_api_call', 3, {'py_operation_name': 'boto_api_call'}),
105+
make_frame('path/to/bar.py', 'bar', 3)
106+
]
107+
with mock.patch(
108+
"traceback.walk_stack",
109+
side_effect=
110+
lambda end_frame: raw_stack
111+
):
112+
stacks = get_stacks(
113+
threads_to_sample=sys._current_frames().items(),
114+
excluded_threads=set(),
115+
max_depth=100)
116+
assert len(stacks[0]) == 4
117+
assert is_frame_in_stacks(stacks, "boto_api_call")
118+
119+
def test_adding_boto_frame_does_not_exceed_maximum_depth(self):
120+
raw_stack = [
121+
make_frame('site-packages/botocore/client.py', '_api_call', 34, {'py_operation_name': 'boto_api_call'}),
122+
make_frame('path/to/foo.py', 'bar', 12),
123+
124+
]
125+
for i in range(100):
126+
raw_stack.insert(0, make_frame('path/to/foo.py', 'bar' + str(i), 1))
127+
with mock.patch(
128+
"traceback.walk_stack",
129+
side_effect=
130+
lambda end_frame: raw_stack
131+
):
132+
stacks = get_stacks(
133+
threads_to_sample=sys._current_frames().items(),
134+
excluded_threads=set(),
135+
max_depth=100)
136+
assert len(stacks[0]) == 100
137+
assert is_frame_in_stacks(stacks, "boto_api_call")
138+
139+
def test_it_adds_operation_name_frame_for_real_boto_call(self):
140+
# Run a thread that will try to do a boto3 api call for 1 second then fail with a log
141+
# the function will call put_metric_data on a cloudwatch client
142+
# so get_stack should capture it.
143+
self.helper.new_thread_sending_boto_api_call(timeout_seconds=1)
144+
stacks = get_stacks(
145+
threads_to_sample=sys._current_frames().items(),
146+
excluded_threads=set(),
147+
max_depth=100)
148+
assert is_frame_in_stacks(stacks, "put_metric_data")

0 commit comments

Comments
 (0)