Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Enable GPU Memory profiler tests
Browse files Browse the repository at this point in the history
Previously tests are not run as test_profiler.py was not taken into account on
GPU CI runs and some tests were marked for being skipped if run on a CPU-only
machine.
  • Loading branch information
leezu committed Jul 13, 2020
1 parent 9d62392 commit 53e977a
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 119 deletions.
144 changes: 144 additions & 0 deletions tests/python/gpu/test_profiler_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import sys

import mxnet as mx
mx.test_utils.set_default_context(mx.gpu(0))

curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.insert(0, os.path.join(curr_path, '../unittest'))
# We import all tests from ../unittest/test_deferred_compute.py
# They will be detected by test framework, as long as the current file has a different filename
from test_profiler import *


def test_gpu_memory_profiler_symbolic():
iter_num = 5

enable_profiler('test_profiler.json', False, False)
profiler.set_state('run')

with profiler.Scope("tensordot"):
A = mx.sym.Variable('A')
B = mx.sym.Variable('B')
C = mx.symbol.dot(A, B, name='dot')

executor = C._simple_bind(mx.gpu(), 'write', A=(4096, 4096), B=(4096, 4096))

a = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096))
b = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096))

a.copyto(executor.arg_dict['A'])
b.copyto(executor.arg_dict['B'])

for i in range(iter_num):
executor.forward()
c = executor.outputs[0]
mx.nd.waitall()
profiler.set_state('stop')
profiler.dump(True)

expected_alloc_entries = [
{'Attribute Name' : 'tensordot:in_arg:A',
'Requested Size' : str(4 * a.size)},
{'Attribute Name' : 'tensordot:in_arg:B',
'Requested Size' : str(4 * b.size)},
{'Attribute Name' : 'tensordot:dot',
'Requested Size' : str(4 * c.size)}]

# Sample gpu_memory_profile.csv:
# "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
# "<unk>:_zeros","67108864","0","67108864","0"
# "<unk>:_zeros","67108864","0","67108864","0"
# "tensordot:dot","67108864","0","67108864","1"
# "tensordot:dot","67108864","0","67108864","1"
# "tensordot:in_arg:A","67108864","0","67108864","0"
# "tensordot:in_arg:B","67108864","0","67108864","0"
# "nvml_amend","1074790400","0","1074790400","0"

with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
for expected_alloc_entry in expected_alloc_entries:
csv_file.seek(0)
entry_found = False
for row in csv_reader:
if row['Attribute Name'] == expected_alloc_entry['Attribute Name']:
assert row['Requested Size'] == expected_alloc_entry['Requested Size'], \
"requested size={} is not equal to the expected size={}" \
.format(row['Requested Size'],
expected_alloc_entry['Requested Size'])
entry_found = True
break
assert entry_found, \
"Entry for attr_name={} has not been found" \
.format(expected_alloc_entry['Attribute Name'])


@pytest.mark.skipif(is_cd_run(), reason="flaky test - open issue #18564")
def test_gpu_memory_profiler_gluon():
enable_profiler(profile_filename='test_profiler.json',
run=True, continuous_dump=True)
profiler.set_state('run')

model = nn.HybridSequential()
model.add(nn.Dense(128, activation='tanh'))
model.add(nn.Dropout(0.5))
model.add(nn.Dense(64, activation='tanh'),
nn.Dense(32, in_units=64))
model.add(nn.Activation('relu'))
model.initialize(ctx=mx.gpu())
model.hybridize()

inputs = mx.sym.var('data')

with mx.autograd.record():
out = model(mx.nd.zeros((16, 10), ctx=mx.gpu()))
out.backward()
mx.nd.waitall()
profiler.set_state('stop')
profiler.dump(True)

# We are only checking for weight parameters here, also making sure that
# there is no unknown entries in the memory profile.
with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
print(",".join(list(row.values())))
for scope in ['in_arg', 'arg_grad']:
for key, nd in model.collect_params().items():
expected_arg_name = "%s:%s:" % (model.name, scope) + nd.name
expected_arg_size = str(4 * np.prod(nd.shape))
csv_file.seek(0)
entry_found = False
for row in csv_reader:
if row['Attribute Name'] == expected_arg_name:
assert row['Requested Size'] == expected_arg_size, \
"requested size={} is not equal to the expected size={}" \
.format(row['Requested Size'], expected_arg_size)
entry_found = True
break
assert entry_found, \
"Entry for attr_name={} has not been found" \
.format(expected_arg_name)
# Make sure that there is no unknown allocation entry.
csv_file.seek(0)
for row in csv_reader:
if row['Attribute Name'] == "<unk>:unknown" or \
row['Attribute Name'] == "<unk>:":
assert False, "Unknown allocation entry has been encountered"
119 changes: 0 additions & 119 deletions tests/python/unittest/test_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,122 +462,3 @@ def test_custom_operator_profiling_naive_engine():
run_in_spawned_process(custom_operator_profiling_multiple_custom_ops, \
{'MXNET_ENGINE_TYPE' : "NaiveEngine"}, 'symbolic', \
'test_custom_operator_profiling_multiple_custom_ops_symbolic_naive.json')


@pytest.mark.skipif(mx.context.num_gpus() == 0, reason="GPU memory profiler records allocation on GPUs only")
def test_gpu_memory_profiler_symbolic():
iter_num = 5

enable_profiler('test_profiler.json', False, False)
profiler.set_state('run')

with profiler.Scope("tensordot"):
A = mx.sym.Variable('A')
B = mx.sym.Variable('B')
C = mx.symbol.dot(A, B, name='dot')

executor = C._simple_bind(mx.gpu(), 'write', A=(4096, 4096), B=(4096, 4096))

a = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096))
b = mx.random.uniform(-1.0, 1.0, shape=(4096, 4096))

a.copyto(executor.arg_dict['A'])
b.copyto(executor.arg_dict['B'])

for i in range(iter_num):
executor.forward()
c = executor.outputs[0]
mx.nd.waitall()
profiler.set_state('stop')
profiler.dump(True)

expected_alloc_entries = [
{'Attribute Name' : 'tensordot:in_arg:A',
'Requested Size' : str(4 * a.size)},
{'Attribute Name' : 'tensordot:in_arg:B',
'Requested Size' : str(4 * b.size)},
{'Attribute Name' : 'tensordot:dot',
'Requested Size' : str(4 * c.size)}]

# Sample gpu_memory_profile.csv:
# "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
# "<unk>:_zeros","67108864","0","67108864","0"
# "<unk>:_zeros","67108864","0","67108864","0"
# "tensordot:dot","67108864","0","67108864","1"
# "tensordot:dot","67108864","0","67108864","1"
# "tensordot:in_arg:A","67108864","0","67108864","0"
# "tensordot:in_arg:B","67108864","0","67108864","0"
# "nvml_amend","1074790400","0","1074790400","0"

with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
for expected_alloc_entry in expected_alloc_entries:
csv_file.seek(0)
entry_found = False
for row in csv_reader:
if row['Attribute Name'] == expected_alloc_entry['Attribute Name']:
assert row['Requested Size'] == expected_alloc_entry['Requested Size'], \
"requested size={} is not equal to the expected size={}" \
.format(row['Requested Size'],
expected_alloc_entry['Requested Size'])
entry_found = True
break
assert entry_found, \
"Entry for attr_name={} has not been found" \
.format(expected_alloc_entry['Attribute Name'])


@pytest.mark.skipif(mx.context.num_gpus() == 0, reason="GPU memory profiler records allocation on GPUs only")
@pytest.mark.skipif(is_cd_run(), reason="flaky test - open issue #18564")
def test_gpu_memory_profiler_gluon():
enable_profiler(profile_filename='test_profiler.json',
run=True, continuous_dump=True)
profiler.set_state('run')

model = nn.HybridSequential()
model.add(nn.Dense(128, activation='tanh'))
model.add(nn.Dropout(0.5))
model.add(nn.Dense(64, activation='tanh'),
nn.Dense(32, in_units=64))
model.add(nn.Activation('relu'))
model.initialize(ctx=mx.gpu())
model.hybridize()

inputs = mx.sym.var('data')

with mx.autograd.record():
out = model(mx.nd.zeros((16, 10), ctx=mx.gpu()))
out.backward()
mx.nd.waitall()
profiler.set_state('stop')
profiler.dump(True)

# We are only checking for weight parameters here, also making sure that
# there is no unknown entries in the memory profile.
with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
print(",".join(list(row.values())))
for scope in ['in_arg', 'arg_grad']:
for key, nd in model.collect_params().items():
expected_arg_name = "%s:%s:" % (model.name, scope) + nd.name
expected_arg_size = str(4 * np.prod(nd.shape))
csv_file.seek(0)
entry_found = False
for row in csv_reader:
if row['Attribute Name'] == expected_arg_name:
assert row['Requested Size'] == expected_arg_size, \
"requested size={} is not equal to the expected size={}" \
.format(row['Requested Size'], expected_arg_size)
entry_found = True
break
assert entry_found, \
"Entry for attr_name={} has not been found" \
.format(expected_arg_name)
# Make sure that there is no unknown allocation entry.
csv_file.seek(0)
for row in csv_reader:
if row['Attribute Name'] == "<unk>:unknown" or \
row['Attribute Name'] == "<unk>:":
assert False, "Unknown allocation entry has been encountered"

0 comments on commit 53e977a

Please sign in to comment.