From 3178f99b66572e75addedaed719791effa8aeb36 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 5 Jul 2024 05:37:16 -0700 Subject: [PATCH 01/10] Add client checks --- .../library/tritonclient/grpc/_infer_input.py | 47 ++++++++++++- .../library/tritonclient/grpc/_utils.py | 1 + .../library/tritonclient/http/_infer_input.py | 46 +++++++++++++ .../library/tritonclient/http/_utils.py | 6 +- .../library/tritonclient/utils/__init__.py | 69 +++++++++++++++++++ 5 files changed, 167 insertions(+), 2 deletions(-) diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py index aff73bc18..b070c2153 100755 --- a/src/python/library/tritonclient/grpc/_infer_input.py +++ b/src/python/library/tritonclient/grpc/_infer_input.py @@ -30,7 +30,7 @@ from tritonclient.grpc import service_pb2 from tritonclient.utils import * -from ._utils import raise_error +from ._utils import get_data_type_byte_size, num_elements, raise_error class InferInput: @@ -54,6 +54,7 @@ def __init__(self, name, shape, datatype): self._input.ClearField("shape") self._input.shape.extend(shape) self._input.datatype = datatype + self._data_shape = None self._raw_content = None def name(self): @@ -86,6 +87,48 @@ def shape(self): """ return self._input.shape + def is_ready(self): + """Get the status of input. + + Returns + ------- + bool + The status of input + """ + # Input must set only one of the following fields: '_raw_content', 'shared_memory_region' in '_input.parameters' + cnt = 0 + cnt += self._raw_content != None + cnt += "shared_memory_region" in self._input.parameters + if cnt != 1: + return + + if "shared_memory_region" in self._input.parameters: + # Using shared memory + if self._input.datatype != "BYTES": + expected_byte_size = num_elements( + self._input.shape + ) * get_data_type_byte_size(self._input.datatype) + data_byte_size = self._input.parameters[ + "shared_memory_byte_size" + ].int64_param + if data_byte_size != expected_byte_size: + raise_error( + "'{}' got unexpected byte size {}, expected {}".format( + self._input.name, data_byte_size, expected_byte_size + ) + ) + else: + # Not using shared memory + expected_num_elements = num_elements(self._input.shape) + data_num_elements = num_elements(self._data_shape) + if expected_num_elements != data_num_elements: + raise_error( + "'{}' got unexpected elements count {}, expected {}".format( + self._input.name, data_num_elements, expected_num_elements + ) + ) + return + def set_shape(self, shape): """Set the shape of input. @@ -171,6 +214,7 @@ def set_data_from_numpy(self, input_tensor): self._raw_content = b"" else: self._raw_content = input_tensor.tobytes() + self._data_shape = input_tensor.shape return self def set_shared_memory(self, region_name, byte_size, offset=0): @@ -193,6 +237,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0): """ self._input.ClearField("contents") self._raw_content = None + self._data_shape = None self._input.parameters["shared_memory_region"].string_param = region_name self._input.parameters["shared_memory_byte_size"].int64_param = byte_size diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py index dae6d71f8..d4d897300 100755 --- a/src/python/library/tritonclient/grpc/_utils.py +++ b/src/python/library/tritonclient/grpc/_utils.py @@ -96,6 +96,7 @@ def _get_inference_request( if request_id != "": request.id = request_id for infer_input in inputs: + infer_input.is_ready() request.inputs.extend([infer_input._get_tensor()]) if infer_input._get_content() is not None: request.raw_input_contents.extend([infer_input._get_content()]) diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index 85beabd2f..02bf011d6 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -27,7 +27,9 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np from tritonclient.utils import ( + get_data_type_byte_size, np_to_triton_dtype, + num_elements, raise_error, serialize_bf16_tensor, serialize_byte_tensor, @@ -55,6 +57,7 @@ def __init__(self, name, shape, datatype): self._datatype = datatype self._parameters = {} self._data = None + self._data_shape = None self._raw_data = None def name(self): @@ -87,6 +90,47 @@ def shape(self): """ return self._shape + def is_ready(self): + """Get the status of input. + + Returns + ------- + bool + The status of input + """ + # Input must set only one of the following fields: 'data', 'binary_data_size' in 'parameters', 'shared_memory_region' in 'parameters' + cnt = 0 + cnt += self._data != None + cnt += "binary_data_size" in self._parameters + cnt += "shared_memory_region" in self._parameters + if cnt != 1: + return + + if "shared_memory_region" in self._parameters: + # Using shared memory + if self._datatype != "BYTES": + expected_byte_size = num_elements( + self._shape + ) * get_data_type_byte_size(self._datatype) + data_byte_size = self._parameters["shared_memory_byte_size"] + if data_byte_size != expected_byte_size: + raise_error( + "'{}' got unexpected byte size {}, expected {}".format( + self._name, data_byte_size, expected_byte_size + ) + ) + else: + # Not using shared memory + expected_num_elements = num_elements(self._shape) + data_num_elements = num_elements(self._data_shape) + if expected_num_elements != data_num_elements: + raise_error( + "'{}' got unexpected elements count {}, expected {}".format( + self._name, data_num_elements, expected_num_elements + ) + ) + return + def set_shape(self, shape): """Set the shape of input. @@ -211,6 +255,7 @@ def set_data_from_numpy(self, input_tensor, binary_data=True): else: self._raw_data = input_tensor.tobytes() self._parameters["binary_data_size"] = len(self._raw_data) + self._data_shape = input_tensor.shape return self def set_shared_memory(self, region_name, byte_size, offset=0): @@ -232,6 +277,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0): The updated input """ self._data = None + self._data_shape = None self._raw_data = None self._parameters.pop("binary_data_size", None) diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py index 0f4456c9a..015cdf4ec 100755 --- a/src/python/library/tritonclient/http/_utils.py +++ b/src/python/library/tritonclient/http/_utils.py @@ -106,7 +106,11 @@ def _get_inference_request( if timeout is not None: parameters["timeout"] = timeout - infer_request["inputs"] = [this_input._get_tensor() for this_input in inputs] + infer_request["inputs"] = [] + for infer_input in inputs: + infer_input.is_ready() + infer_request["inputs"].append(infer_input._get_tensor()) + if outputs: infer_request["outputs"] = [ this_output._get_tensor() for this_output in outputs diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py index 7f3079c66..b46685ef5 100755 --- a/src/python/library/tritonclient/utils/__init__.py +++ b/src/python/library/tritonclient/utils/__init__.py @@ -40,6 +40,27 @@ def raise_error(msg): raise InferenceServerException(msg=msg) from None +def num_elements(shape): + """ + Calculate the number of elements in an array given its shape. + + Parameters + ---------- + shape : list or tuple + Shape of the array. + + Returns + ------- + int + Number of elements in the array. + """ + + num_elements = 1 + for dim in shape: + num_elements *= dim + return num_elements + + def serialized_byte_size(tensor_value): """ Get the underlying number of bytes for a numpy ndarray. @@ -190,6 +211,54 @@ def triton_to_np_dtype(dtype): return None +def get_data_type_byte_size(dtype): + """ + Get the size of a given datatype in bytes. + + Parameters + ---------- + dtype : str + The data-type + + Returns + ------- + int + The size in bytes of the datatype, or 0 if size cannot be determined + (for example, values of type BYTES have variable length and so size + cannot be determine just from the type) + """ + + if dtype == "BOOL": + return 1 + elif dtype == "INT8": + return 1 + elif dtype == "INT16": + return 2 + elif dtype == "INT32": + return 4 + elif dtype == "INT64": + return 8 + elif dtype == "UINT8": + return 1 + elif dtype == "UINT16": + return 2 + elif dtype == "UINT32": + return 4 + elif dtype == "UINT64": + return 8 + elif dtype == "FP16": + return 2 + elif dtype == "FP32": + return 4 + elif dtype == "FP64": + return 8 + elif dtype == "BYTES": + return 0 + elif dtype == "BF16": + return 2 + return 0 + + def serialize_byte_tensor(input_tensor): """ Serializes a bytes tensor into a flat numpy array of length prepended From 7210d0053f1848dafbd4cfb6d534ae5989c43987 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Mon, 8 Jul 2024 18:10:59 -0700 Subject: [PATCH 02/10] Add C++ client tests --- src/c++/library/CMakeLists.txt | 11 + src/c++/library/common.cc | 111 +++++++ src/c++/library/common.h | 9 + src/c++/library/grpc_client.cc | 6 + src/c++/library/http_client.cc | 5 + src/c++/tests/CMakeLists.txt | 31 ++ src/c++/tests/client_input_test.cc | 299 ++++++++++++++++++ .../library/tritonclient/grpc/_infer_input.py | 10 +- .../library/tritonclient/grpc/_utils.py | 2 +- .../library/tritonclient/http/_infer_input.py | 10 +- .../library/tritonclient/http/_utils.py | 2 +- 11 files changed, 484 insertions(+), 12 deletions(-) create mode 100644 src/c++/tests/client_input_test.cc diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt index cdee03e1a..62bd9bcff 100644 --- a/src/c++/library/CMakeLists.txt +++ b/src/c++/library/CMakeLists.txt @@ -122,6 +122,7 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER) grpcclient_static PRIVATE gRPC::grpc++ PRIVATE gRPC::grpc + PRIVATE triton-common-model-config PUBLIC protobuf::libprotobuf PUBLIC Threads::Threads ) @@ -275,6 +276,10 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) http-client-library EXCLUDE_FROM_ALL OBJECT ${REQUEST_SRCS} ${REQUEST_HDRS} ) + add_dependencies( + http-client-library + proto-library + ) if (NOT WIN32) set_property( @@ -287,12 +292,14 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) http-client-library PUBLIC triton-common-json # from repo-common + triton-common-model-config ) # libhttpclient_static.a add_library( httpclient_static STATIC $ + $ ) add_library( TritonClient::httpclient_static ALIAS httpclient_static @@ -301,6 +308,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) target_link_libraries( httpclient_static PRIVATE triton-common-json + PRIVATE triton-common-model-config PUBLIC CURL::libcurl PUBLIC Threads::Threads ) @@ -316,6 +324,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) add_library( httpclient SHARED $ + $ ) add_library( TritonClient::httpclient ALIAS httpclient @@ -333,6 +342,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) target_link_libraries( httpclient PRIVATE triton-common-json + PRIVATE triton-common-model-config PUBLIC CURL::libcurl PUBLIC Threads::Threads ) @@ -358,6 +368,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) $ $ $ + $ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc index 27da69888..8bf77c7d2 100644 --- a/src/c++/library/common.cc +++ b/src/c++/library/common.cc @@ -26,6 +26,10 @@ #include "common.h" +#include + +#include "triton/common/model_config.h" + namespace triton { namespace client { //============================================================================== @@ -232,6 +236,113 @@ InferInput::SetBinaryData(const bool binary_data) return Error::Success; } +Error +InferInput::GetStringCount(size_t* str_cnt) const +{ + int64_t str_checked = 0; + size_t remaining_str_size = 0; + + size_t next_buf_idx = 0; + const size_t buf_cnt = bufs_.size(); + + const uint8_t* buf = nullptr; + size_t remaining_buf_size = 0; + + // Validate elements until all buffers have been fully processed. + while (remaining_buf_size || next_buf_idx < buf_cnt) { + // Get the next buf if not currently processing one. + if (!remaining_buf_size) { + // Reset remaining buf size and pointers for next buf. + buf = bufs_[next_buf_idx]; + remaining_buf_size = buf_byte_sizes_[next_buf_idx]; + next_buf_idx++; + } + + constexpr size_t kStringSizeIndicator = sizeof(uint32_t); + // Get the next element if not currently processing one. + if (!remaining_str_size) { + // FIXME: Assume the string element's byte size indicator is not spread + // across buf boundaries for simplicity. Also needs better log msg. + if (remaining_buf_size < kStringSizeIndicator) { + return Error("element byte size indicator exceeds the end of the buf."); + } + + // Start the next element and reset the remaining element size. + remaining_str_size = *(reinterpret_cast(buf)); + str_checked++; + + // Advance pointer and remainder by the indicator size. + buf += kStringSizeIndicator; + remaining_buf_size -= kStringSizeIndicator; + } + + // If the remaining buf fits it: consume the rest of the element, proceed + // to the next element. + if (remaining_buf_size >= remaining_str_size) { + buf += remaining_str_size; + remaining_buf_size -= remaining_str_size; + remaining_str_size = 0; + } + // Otherwise the remaining element is larger: consume the rest of the + // buf, proceed to the next buf. + else { + remaining_str_size -= remaining_buf_size; + remaining_buf_size = 0; + } + } + + // FIXME: If more than expected, should stop earlier + // Validate the number of processed elements exactly match expectations. + *str_cnt = str_checked; + return Error::Success; +} + +Error +InferInput::ValidateData() const +{ + inference::DataType datatype = + triton::common::ProtocolStringToDataType(datatype_); + if (io_type_ == SHARED_MEMORY) { + if (datatype == inference::DataType::TYPE_STRING) { + // TODO Didn't find any shm and BYTES inputs inference example + } else { + int64_t expected_byte_size = + triton::common::GetByteSize(datatype, shape_); + if ((int64_t)byte_size_ != expected_byte_size) { + return Error( + "'" + name_ + "' got unexpected byte size " + + std::to_string(byte_size_) + ", expected " + + std::to_string(expected_byte_size)); + } + } + } else { + if (datatype == inference::DataType::TYPE_STRING) { + int64_t expected_str_cnt = triton::common::GetElementCount(shape_); + size_t str_cnt; + Error err = GetStringCount(&str_cnt); + if (!err.IsOk()) { + return err; + } + if ((int64_t)str_cnt != expected_str_cnt) { + return Error( + "'" + name_ + "' got unexpected string count " + + std::to_string(str_cnt) + ", expected " + + std::to_string(expected_str_cnt)); + } + } else { + int64_t expected_byte_size = + triton::common::GetByteSize(datatype, shape_); + if ((int64_t)byte_size_ != expected_byte_size) { + return Error( + "'" + name_ + "' got unexpected byte size " + + std::to_string(byte_size_) + ", expected " + + std::to_string(expected_byte_size)); + } + } + } + return Error::Success; +} + Error InferInput::PrepareForRequest() { diff --git a/src/c++/library/common.h b/src/c++/library/common.h index 8d05b966b..e132ee51a 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -354,6 +354,15 @@ class InferInput { /// \return Error object indicating success or failure. Error SetBinaryData(const bool binary_data); + /// Gets the total number of strings in this input data. + /// \param byte_size The number of strings. + /// \return Error object indicating success or failure. + Error GetStringCount(size_t* str_cnt) const; + + /// Validate input has data and input shape matches input data. + /// \return Error object indicating success of failure. + Error ValidateData() const; + private: #ifdef TRITON_INFERENCE_SERVER_CLIENT_CLASS friend class TRITON_INFERENCE_SERVER_CLIENT_CLASS; diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index c9ee70125..d2971ead6 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -1470,7 +1470,13 @@ InferenceServerGrpcClient::PreRunProcessing( int index = 0; infer_request_.mutable_raw_input_contents()->Clear(); + Error err; for (const auto input : inputs) { + err = input->ValidateData(); + if (!err.IsOk()) { + return err; + } + // Add new InferInputTensor submessages only if required, otherwise // reuse the submessages already available. auto grpc_input = (infer_request_.inputs().size() <= index) diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc index 9f2f5ab5e..55a846839 100644 --- a/src/c++/library/http_client.cc +++ b/src/c++/library/http_client.cc @@ -2117,6 +2117,11 @@ InferenceServerHttpClient::PreRunProcessing( // Add the buffers holding input tensor data bool all_inputs_are_json{true}; for (const auto this_input : inputs) { + err = this_input->ValidateData(); + if (!err.IsOk()) { + return err; + } + if (this_input->BinaryData()) { all_inputs_are_json = false; } diff --git a/src/c++/tests/CMakeLists.txt b/src/c++/tests/CMakeLists.txt index 81eb74271..efa4f45f3 100644 --- a/src/c++/tests/CMakeLists.txt +++ b/src/c++/tests/CMakeLists.txt @@ -70,6 +70,9 @@ install( RUNTIME DESTINATION bin ) +# +# cc_client_test +# add_executable( cc_client_test cc_client_test.cc @@ -89,6 +92,34 @@ install( RUNTIME DESTINATION bin ) +# +# client_input_test +# +add_executable( + client_input_test + client_input_test.cc + $ +) +target_include_directories( + client_input_test + PRIVATE + ${GTEST_INCLUDE_DIRS} +) +target_link_libraries( + client_input_test + PRIVATE + grpcclient_static + httpclient_static + gtest + ${GTEST_LIBRARY} + ${GTEST_MAIN_LIBRARY} + GTest::gmock +) +install( + TARGETS client_input_test + RUNTIME DESTINATION bin +) + endif() # TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC endif() diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc new file mode 100644 index 000000000..0ae346afb --- /dev/null +++ b/src/c++/tests/client_input_test.cc @@ -0,0 +1,299 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "gmock/gmock.h" +#include "grpc_client.h" +#include "gtest/gtest.h" +#include "http_client.h" +#include "shm_utils.h" + +namespace tc = triton::client; + +#define FAIL_IF_ERR(X, MSG) \ + do { \ + tc::Error err = (X); \ + if (!err.IsOk()) { \ + std::cerr << "error: " << (MSG) << ": " << err << std::endl; \ + exit(1); \ + } \ + } while (false) + +#define FAIL_IF_SUCCESS(X, MSG, ERR_MSG) \ + do { \ + tc::Error err = (X); \ + ASSERT_FALSE(err.IsOk()) << "error: " << (MSG) << ": "; \ + ASSERT_THAT(err.Message(), ::testing::HasSubstr(ERR_MSG)); \ + } while (false) + +namespace { + +template +class ClientInputTest : public ::testing::Test { + public: + ClientInputTest() : shape_{1, 16} {} + + void SetUp() override + { + std::string url; + std::string client_type; + if (std::is_same::value) { + url = "localhost:8001"; + client_type = "GRPC"; + } else if (std::is_same::value) { + url = "localhost:8000"; + client_type = "HTTP"; + } else { + ASSERT_TRUE(false) << "Unrecognized client class type '" + << typeid(ClientType).name() << "'"; + } + auto err = ClientType::Create(&this->client_, url); + ASSERT_TRUE(err.IsOk()) + << "failed to create " << client_type << " client: " << err.Message(); + + // Initialize vector input_data_ + for (size_t i = 0; i < 16; ++i) { + this->input_data_.emplace_back(i); + } + } + + std::unique_ptr client_; + std::vector input_data_; + std::vector shape_; +}; + +TYPED_TEST_SUITE_P(ClientInputTest); + +TYPED_TEST_P(ClientInputTest, AppendRaw) +{ + // Initialize the inputs with the data. + tc::InferInput* input0; + tc::InferInput* input1; + + FAIL_IF_ERR( + tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"), + "unable to get INPUT0"); + std::shared_ptr input0_ptr; + input0_ptr.reset(input0); + FAIL_IF_ERR( + tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"), + "unable to get INPUT1"); + std::shared_ptr input1_ptr; + input1_ptr.reset(input1); + + FAIL_IF_ERR( + input0_ptr->AppendRaw( + reinterpret_cast(&(this->input_data_[0])), + this->input_data_.size() * sizeof(int32_t)), + "unable to set data for INPUT0"); + FAIL_IF_ERR( + input1_ptr->AppendRaw( + reinterpret_cast(&(this->input_data_[0])), + this->input_data_.size() * sizeof(int32_t)), + "unable to set data for INPUT1"); + + // The inference settings. Will be using default for now. + tc::InferOptions options("simple"); + options.model_version_ = ""; + + std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; + tc::InferResult* results; + inputs[1]->SetShape({1, 15}); + FAIL_IF_SUCCESS( + this->client_->Infer(&results, options, inputs), + "expect error with inference request", + "'INPUT1' got unexpected byte size 64, expected 60"); + + // Check error message and verify the request reaches the server + inputs[1]->SetShape({2, 8}); + FAIL_IF_SUCCESS( + this->client_->Infer(&results, options, inputs), + "expect error with inference request", + "input 'INPUT0' batch size does not match other inputs for 'simple'"); +} + +TYPED_TEST_P(ClientInputTest, SetSharedMemory) +{ + // Unregistering all shared memory regions for a clean + // start. + FAIL_IF_ERR( + this->client_->UnregisterSystemSharedMemory(), + "unable to unregister all system shared memory regions"); + FAIL_IF_ERR( + this->client_->UnregisterCudaSharedMemory(), + "unable to unregister all cuda shared memory regions"); + + // Initialize the inputs with the data. + tc::InferInput* input0; + tc::InferInput* input1; + size_t input_byte_size = 64; + + FAIL_IF_ERR( + tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"), + "unable to get INPUT0"); + std::shared_ptr input0_ptr; + input0_ptr.reset(input0); + FAIL_IF_ERR( + tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"), + "unable to get INPUT1"); + std::shared_ptr input1_ptr; + input1_ptr.reset(input1); + + // Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique + // integers and Input1 to all ones. + std::string shm_key = "/input_simple"; + int shm_fd_ip, *input0_shm; + FAIL_IF_ERR( + tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip), + ""); + FAIL_IF_ERR( + tc::MapSharedMemory( + shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm), + ""); + FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), ""); + int* input1_shm = (int*)(input0_shm + 16); + for (size_t i = 0; i < 16; ++i) { + *(input0_shm + i) = i; + *(input1_shm + i) = 1; + } + + FAIL_IF_ERR( + this->client_->RegisterSystemSharedMemory( + "input_data", shm_key, input_byte_size * 2), + "failed to register input shared memory region"); + + FAIL_IF_ERR( + input0_ptr->SetSharedMemory( + "input_data", input_byte_size, 0 /* offset */), + "unable to set shared memory for INPUT0"); + FAIL_IF_ERR( + input1_ptr->SetSharedMemory( + "input_data", input_byte_size, input_byte_size /* offset */), + "unable to set shared memory for INPUT1"); + + // The inference settings. Will be using default for now. + tc::InferOptions options("simple"); + options.model_version_ = ""; + + std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; + inputs[1]->SetShape({1, 15}); + + tc::InferResult* results; + FAIL_IF_SUCCESS( + this->client_->Infer(&results, options, inputs), + "expect error with inference request", + ("'INPUT1' got unexpected byte size " + std::to_string(input_byte_size) + + ", expected " + std::to_string(input_byte_size - sizeof(int)))); + + // Get shared memory regions active/registered within triton + // std::string shm_status; + // FAIL_IF_ERR( + // this->client_->SystemSharedMemoryStatus(&shm_status), + // "failed to get shared memory status"); + // std::cout << "Shared Memory Status:\n" << shm_status << "\n"; + + // Unregister shared memory + FAIL_IF_ERR( + this->client_->UnregisterSystemSharedMemory("input_data"), + "unable to unregister shared memory input region"); + + // Cleanup shared memory + FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), ""); + FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), ""); +} + +TYPED_TEST_P(ClientInputTest, AppendString) +{ + // Create the data for the two input tensors. Initialize the first + // to unique integers and the second to all ones. The input tensors + // are the string representation of these values. + std::vector input0_data(16); + std::vector input1_data(16); + for (size_t i = 0; i < 16; ++i) { + input0_data[i] = std::to_string(i); + input1_data[i] = std::to_string(1); + } + + std::vector shape{1, 16}; + + // Initialize the inputs with the data. + tc::InferInput* input0; + tc::InferInput* input1; + + FAIL_IF_ERR( + tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"), + "unable to get INPUT0"); + std::shared_ptr input0_ptr; + input0_ptr.reset(input0); + FAIL_IF_ERR( + tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"), + "unable to get INPUT1"); + std::shared_ptr input1_ptr; + input1_ptr.reset(input1); + + FAIL_IF_ERR( + input0_ptr->AppendFromString(input0_data), + "unable to set data for INPUT0"); + FAIL_IF_ERR( + input1_ptr->AppendFromString(input1_data), + "unable to set data for INPUT1"); + + // The inference settings. Will be using default for now. + tc::InferOptions options("simple_string"); + options.model_version_ = ""; + + std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; + tc::InferResult* results; + input1_ptr->SetShape({1, 15}); + FAIL_IF_SUCCESS( + this->client_->Infer(&results, options, inputs), + "expect error with inference request", + "'INPUT1' got unexpected elements count 16, expected 15"); + + // Check error message and verify the request reaches the server + inputs[1]->SetShape({2, 8}); + FAIL_IF_SUCCESS( + this->client_->Infer(&results, options, inputs), + "expect error with inference request", + "input 'INPUT0' batch size does not match other inputs for " + "'simple_string'"); +} + +REGISTER_TYPED_TEST_SUITE_P( + ClientInputTest, AppendRaw, SetSharedMemory, AppendString); + +INSTANTIATE_TYPED_TEST_SUITE_P( + GRPC, ClientInputTest, tc::InferenceServerGrpcClient); +INSTANTIATE_TYPED_TEST_SUITE_P( + HTTP, ClientInputTest, tc::InferenceServerHttpClient); + +} // namespace + +int +main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py index b070c2153..0711ef792 100755 --- a/src/python/library/tritonclient/grpc/_infer_input.py +++ b/src/python/library/tritonclient/grpc/_infer_input.py @@ -87,15 +87,15 @@ def shape(self): """ return self._input.shape - def is_ready(self): - """Get the status of input. + def validate_data(self): + """Validate input has data and input shape matches input data. Returns ------- - bool - The status of input + None """ - # Input must set only one of the following fields: '_raw_content', 'shared_memory_region' in '_input.parameters' + # Input must set only one of the following fields: '_raw_content', + # 'shared_memory_region' in '_input.parameters' cnt = 0 cnt += self._raw_content != None cnt += "shared_memory_region" in self._input.parameters diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py index d4d897300..8b40f5aa3 100755 --- a/src/python/library/tritonclient/grpc/_utils.py +++ b/src/python/library/tritonclient/grpc/_utils.py @@ -96,7 +96,7 @@ def _get_inference_request( if request_id != "": request.id = request_id for infer_input in inputs: - infer_input.is_ready() + infer_input.validate_data() request.inputs.extend([infer_input._get_tensor()]) if infer_input._get_content() is not None: request.raw_input_contents.extend([infer_input._get_content()]) diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index 02bf011d6..6c0aaade5 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -90,15 +90,15 @@ def shape(self): """ return self._shape - def is_ready(self): - """Get the status of input. + def validate_data(self): + """Validate input has data and input shape matches input data. Returns ------- - bool - The status of input + None """ - # Input must set only one of the following fields: 'data', 'binary_data_size' in 'parameters', 'shared_memory_region' in 'parameters' + # Input must set only one of the following fields: 'data', 'binary_data_size' + # in 'parameters', 'shared_memory_region' in 'parameters' cnt = 0 cnt += self._data != None cnt += "binary_data_size" in self._parameters diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py index 015cdf4ec..2b1d37d76 100755 --- a/src/python/library/tritonclient/http/_utils.py +++ b/src/python/library/tritonclient/http/_utils.py @@ -108,7 +108,7 @@ def _get_inference_request( infer_request["inputs"] = [] for infer_input in inputs: - infer_input.is_ready() + infer_input.validate_data() infer_request["inputs"].append(infer_input._get_tensor()) if outputs: From 9c2941bb51f455b10ac5398faad305ee899f77ce Mon Sep 17 00:00:00 2001 From: Yingge He Date: Mon, 8 Jul 2024 18:26:47 -0700 Subject: [PATCH 03/10] Update copyrights --- src/c++/library/CMakeLists.txt | 2 +- src/c++/library/common.cc | 2 +- src/c++/library/common.h | 2 +- src/c++/library/grpc_client.cc | 2 +- src/c++/library/http_client.cc | 2 +- src/c++/tests/CMakeLists.txt | 2 +- src/python/library/tritonclient/grpc/_infer_input.py | 2 +- src/python/library/tritonclient/grpc/_utils.py | 2 +- src/python/library/tritonclient/http/_infer_input.py | 2 +- src/python/library/tritonclient/http/_utils.py | 2 +- src/python/library/tritonclient/utils/__init__.py | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt index 62bd9bcff..7d8a7c29f 100644 --- a/src/c++/library/CMakeLists.txt +++ b/src/c++/library/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc index 8bf77c7d2..5e3e7c517 100644 --- a/src/c++/library/common.cc +++ b/src/c++/library/common.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/library/common.h b/src/c++/library/common.h index e132ee51a..8ed9f7ab3 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index d2971ead6..02a1a888e 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc index 55a846839..343263d6c 100644 --- a/src/c++/library/http_client.cc +++ b/src/c++/library/http_client.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/tests/CMakeLists.txt b/src/c++/tests/CMakeLists.txt index efa4f45f3..40a3f6d61 100644 --- a/src/c++/tests/CMakeLists.txt +++ b/src/c++/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py index 0711ef792..a20bca7e8 100755 --- a/src/python/library/tritonclient/grpc/_infer_input.py +++ b/src/python/library/tritonclient/grpc/_infer_input.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py index 8b40f5aa3..bbc13db4b 100755 --- a/src/python/library/tritonclient/grpc/_utils.py +++ b/src/python/library/tritonclient/grpc/_utils.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index 6c0aaade5..5355ed7aa 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py index 2b1d37d76..d4ffc5b70 100755 --- a/src/python/library/tritonclient/http/_utils.py +++ b/src/python/library/tritonclient/http/_utils.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py index b46685ef5..40dffffcf 100755 --- a/src/python/library/tritonclient/utils/__init__.py +++ b/src/python/library/tritonclient/utils/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions From b4c6a17c06a1585611466d60021ddcaa8fb3f70c Mon Sep 17 00:00:00 2001 From: Yingge He Date: Tue, 9 Jul 2024 15:02:15 -0700 Subject: [PATCH 04/10] Update error msg and build deps --- src/c++/library/CMakeLists.txt | 1 + src/c++/library/common.cc | 6 +++--- src/python/library/tritonclient/grpc/_infer_input.py | 4 ++-- src/python/library/tritonclient/http/_infer_input.py | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt index 7d8a7c29f..c6b3e09e1 100644 --- a/src/c++/library/CMakeLists.txt +++ b/src/c++/library/CMakeLists.txt @@ -151,6 +151,7 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER) grpcclient PRIVATE gRPC::grpc++ PRIVATE gRPC::grpc + PRIVATE triton-common-model-config PUBLIC protobuf::libprotobuf PUBLIC Threads::Threads ) diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc index 5e3e7c517..26903a03a 100644 --- a/src/c++/library/common.cc +++ b/src/c++/library/common.cc @@ -310,7 +310,7 @@ InferInput::ValidateData() const triton::common::GetByteSize(datatype, shape_); if ((int64_t)byte_size_ != expected_byte_size) { return Error( - "'" + name_ + "' got unexpected byte size " + + "input '" + name_ + "' got unexpected byte size " + std::to_string(byte_size_) + ", expected " + std::to_string(expected_byte_size)); } @@ -325,7 +325,7 @@ InferInput::ValidateData() const } if ((int64_t)str_cnt != expected_str_cnt) { return Error( - "'" + name_ + "' got unexpected string count " + + "input '" + name_ + "' got unexpected string count " + std::to_string(str_cnt) + ", expected " + std::to_string(expected_str_cnt)); } @@ -334,7 +334,7 @@ InferInput::ValidateData() const triton::common::GetByteSize(datatype, shape_); if ((int64_t)byte_size_ != expected_byte_size) { return Error( - "'" + name_ + "' got unexpected byte size " + + "input '" + name_ + "' got unexpected byte size " + std::to_string(byte_size_) + ", expected " + std::to_string(expected_byte_size)); } diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py index a20bca7e8..89d944dff 100755 --- a/src/python/library/tritonclient/grpc/_infer_input.py +++ b/src/python/library/tritonclient/grpc/_infer_input.py @@ -113,7 +113,7 @@ def validate_data(self): ].int64_param if data_byte_size != expected_byte_size: raise_error( - "'{}' got unexpected byte size {}, expected {}".format( + "input '{}' got unexpected byte size {}, expected {}".format( self._input.name, data_byte_size, expected_byte_size ) ) @@ -123,7 +123,7 @@ def validate_data(self): data_num_elements = num_elements(self._data_shape) if expected_num_elements != data_num_elements: raise_error( - "'{}' got unexpected elements count {}, expected {}".format( + "input '{}' got unexpected elements count {}, expected {}".format( self._input.name, data_num_elements, expected_num_elements ) ) diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index 5355ed7aa..cc9f259cf 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -115,7 +115,7 @@ def validate_data(self): data_byte_size = self._parameters["shared_memory_byte_size"] if data_byte_size != expected_byte_size: raise_error( - "'{}' got unexpected byte size {}, expected {}".format( + "input '{}' got unexpected byte size {}, expected {}".format( self._name, data_byte_size, expected_byte_size ) ) @@ -125,7 +125,7 @@ def validate_data(self): data_num_elements = num_elements(self._data_shape) if expected_num_elements != data_num_elements: raise_error( - "'{}' got unexpected elements count {}, expected {}".format( + "input '{}' got unexpected elements count {}, expected {}".format( self._name, data_num_elements, expected_num_elements ) ) From e5e6b7e0d39667f9bacb86fc389ba2beee86c93d Mon Sep 17 00:00:00 2001 From: Yingge He Date: Wed, 10 Jul 2024 00:58:04 -0700 Subject: [PATCH 05/10] Update error msg --- src/c++/tests/client_input_test.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc index 0ae346afb..b6d9d7d87 100644 --- a/src/c++/tests/client_input_test.cc +++ b/src/c++/tests/client_input_test.cc @@ -124,7 +124,7 @@ TYPED_TEST_P(ClientInputTest, AppendRaw) FAIL_IF_SUCCESS( this->client_->Infer(&results, options, inputs), "expect error with inference request", - "'INPUT1' got unexpected byte size 64, expected 60"); + "input 'INPUT1' got unexpected byte size 64, expected 60"); // Check error message and verify the request reaches the server inputs[1]->SetShape({2, 8}); @@ -204,8 +204,9 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory) FAIL_IF_SUCCESS( this->client_->Infer(&results, options, inputs), "expect error with inference request", - ("'INPUT1' got unexpected byte size " + std::to_string(input_byte_size) + - ", expected " + std::to_string(input_byte_size - sizeof(int)))); + ("input 'INPUT1' got unexpected byte size " + + std::to_string(input_byte_size) + ", expected " + + std::to_string(input_byte_size - sizeof(int)))); // Get shared memory regions active/registered within triton // std::string shm_status; @@ -270,7 +271,7 @@ TYPED_TEST_P(ClientInputTest, AppendString) FAIL_IF_SUCCESS( this->client_->Infer(&results, options, inputs), "expect error with inference request", - "'INPUT1' got unexpected elements count 16, expected 15"); + "input 'INPUT1' got unexpected elements count 16, expected 15"); // Check error message and verify the request reaches the server inputs[1]->SetShape({2, 8}); From 07059a64ef2e3f26bca4133aa6aa33fbd2c9afc1 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Mon, 22 Jul 2024 20:06:33 -0700 Subject: [PATCH 06/10] Remove client checks for string inputs --- src/c++/library/common.cc | 109 +++-------------------------- src/c++/library/common.h | 5 -- src/c++/tests/client_input_test.cc | 104 ++++++++++----------------- 3 files changed, 47 insertions(+), 171 deletions(-) diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc index 26903a03a..566e3d366 100644 --- a/src/c++/library/common.cc +++ b/src/c++/library/common.cc @@ -236,109 +236,22 @@ InferInput::SetBinaryData(const bool binary_data) return Error::Success; } -Error -InferInput::GetStringCount(size_t* str_cnt) const -{ - int64_t str_checked = 0; - size_t remaining_str_size = 0; - - size_t next_buf_idx = 0; - const size_t buf_cnt = bufs_.size(); - - const uint8_t* buf = nullptr; - size_t remaining_buf_size = 0; - - // Validate elements until all buffers have been fully processed. - while (remaining_buf_size || next_buf_idx < buf_cnt) { - // Get the next buf if not currently processing one. - if (!remaining_buf_size) { - // Reset remaining buf size and pointers for next buf. - buf = bufs_[next_buf_idx]; - remaining_buf_size = buf_byte_sizes_[next_buf_idx]; - next_buf_idx++; - } - - constexpr size_t kStringSizeIndicator = sizeof(uint32_t); - // Get the next element if not currently processing one. - if (!remaining_str_size) { - // FIXME: Assume the string element's byte size indicator is not spread - // across buf boundaries for simplicity. Also needs better log msg. - if (remaining_buf_size < kStringSizeIndicator) { - return Error("element byte size indicator exceeds the end of the buf."); - } - - // Start the next element and reset the remaining element size. - remaining_str_size = *(reinterpret_cast(buf)); - str_checked++; - - // Advance pointer and remainder by the indicator size. - buf += kStringSizeIndicator; - remaining_buf_size -= kStringSizeIndicator; - } - - // If the remaining buf fits it: consume the rest of the element, proceed - // to the next element. - if (remaining_buf_size >= remaining_str_size) { - buf += remaining_str_size; - remaining_buf_size -= remaining_str_size; - remaining_str_size = 0; - } - // Otherwise the remaining element is larger: consume the rest of the - // buf, proceed to the next buf. - else { - remaining_str_size -= remaining_buf_size; - remaining_buf_size = 0; - } - } - - // FIXME: If more than expected, should stop earlier - // Validate the number of processed elements exactly match expectations. - *str_cnt = str_checked; - return Error::Success; -} - Error InferInput::ValidateData() const { inference::DataType datatype = triton::common::ProtocolStringToDataType(datatype_); - if (io_type_ == SHARED_MEMORY) { - if (datatype == inference::DataType::TYPE_STRING) { - // TODO Didn't find any shm and BYTES inputs inference example - } else { - int64_t expected_byte_size = - triton::common::GetByteSize(datatype, shape_); - if ((int64_t)byte_size_ != expected_byte_size) { - return Error( - "input '" + name_ + "' got unexpected byte size " + - std::to_string(byte_size_) + ", expected " + - std::to_string(expected_byte_size)); - } - } - } else { - if (datatype == inference::DataType::TYPE_STRING) { - int64_t expected_str_cnt = triton::common::GetElementCount(shape_); - size_t str_cnt; - Error err = GetStringCount(&str_cnt); - if (!err.IsOk()) { - return err; - } - if ((int64_t)str_cnt != expected_str_cnt) { - return Error( - "input '" + name_ + "' got unexpected string count " + - std::to_string(str_cnt) + ", expected " + - std::to_string(expected_str_cnt)); - } - } else { - int64_t expected_byte_size = - triton::common::GetByteSize(datatype, shape_); - if ((int64_t)byte_size_ != expected_byte_size) { - return Error( - "input '" + name_ + "' got unexpected byte size " + - std::to_string(byte_size_) + ", expected " + - std::to_string(expected_byte_size)); - } - } + // String inputs will be checked at core and backend to reduce overhead. + if (datatype == inference::DataType::TYPE_STRING) { + return Error::Success; + } + + int64_t expected_byte_size = triton::common::GetByteSize(datatype, shape_); + if ((int64_t)byte_size_ != expected_byte_size) { + return Error( + "input '" + name_ + "' got unexpected byte size " + + std::to_string(byte_size_) + ", expected " + + std::to_string(expected_byte_size)); } return Error::Success; } diff --git a/src/c++/library/common.h b/src/c++/library/common.h index 8ed9f7ab3..64717e628 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -354,11 +354,6 @@ class InferInput { /// \return Error object indicating success or failure. Error SetBinaryData(const bool binary_data); - /// Gets the total number of strings in this input data. - /// \param byte_size The number of strings. - /// \return Error object indicating success or failure. - Error GetStringCount(size_t* str_cnt) const; - /// Validate input has data and input shape matches input data. /// \return Error object indicating success of failure. Error ValidateData() const; diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc index b6d9d7d87..5b973f6d5 100644 --- a/src/c++/tests/client_input_test.cc +++ b/src/c++/tests/client_input_test.cc @@ -120,18 +120,23 @@ TYPED_TEST_P(ClientInputTest, AppendRaw) std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; tc::InferResult* results; + + // Test 1 inputs[1]->SetShape({1, 15}); FAIL_IF_SUCCESS( this->client_->Infer(&results, options, inputs), "expect error with inference request", "input 'INPUT1' got unexpected byte size 64, expected 60"); - // Check error message and verify the request reaches the server + // Test 2 + inputs[0]->SetShape({2, 8}); inputs[1]->SetShape({2, 8}); + // Assert the request reaches the server FAIL_IF_SUCCESS( this->client_->Infer(&results, options, inputs), "expect error with inference request", - "input 'INPUT0' batch size does not match other inputs for 'simple'"); + "unexpected shape for input 'INPUT1' for model 'simple'. Expected " + "[-1,16], got [2,8]"); } TYPED_TEST_P(ClientInputTest, SetSharedMemory) @@ -198,9 +203,10 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory) options.model_version_ = ""; std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; - inputs[1]->SetShape({1, 15}); - tc::InferResult* results; + + // Test 1 + inputs[1]->SetShape({1, 15}); FAIL_IF_SUCCESS( this->client_->Infer(&results, options, inputs), "expect error with inference request", @@ -208,12 +214,32 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory) std::to_string(input_byte_size) + ", expected " + std::to_string(input_byte_size - sizeof(int)))); + // Test 2 + inputs[0]->SetShape({2, 8}); + inputs[1]->SetShape({2, 8}); + // Assert the request reaches the server + FAIL_IF_SUCCESS( + this->client_->Infer(&results, options, inputs), + "expect error with inference request", + "unexpected shape for input 'INPUT1' for model 'simple'. Expected " + "[-1,16], got [2,8]"); + // Get shared memory regions active/registered within triton - // std::string shm_status; - // FAIL_IF_ERR( - // this->client_->SystemSharedMemoryStatus(&shm_status), - // "failed to get shared memory status"); - // std::cout << "Shared Memory Status:\n" << shm_status << "\n"; + using ClientType = TypeParam; + if constexpr (std::is_same< + ClientType, tc::InferenceServerGrpcClient>::value) { + inference::SystemSharedMemoryStatusResponse shm_status; + FAIL_IF_ERR( + this->client_->SystemSharedMemoryStatus(&shm_status), + "failed to get shared memory status"); + std::cout << "Shared Memory Status:\n" << shm_status.DebugString() << "\n"; + } else { + std::string shm_status; + FAIL_IF_ERR( + this->client_->SystemSharedMemoryStatus(&shm_status), + "failed to get shared memory status"); + std::cout << "Shared Memory Status:\n" << shm_status << "\n"; + } // Unregister shared memory FAIL_IF_ERR( @@ -225,65 +251,7 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory) FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), ""); } -TYPED_TEST_P(ClientInputTest, AppendString) -{ - // Create the data for the two input tensors. Initialize the first - // to unique integers and the second to all ones. The input tensors - // are the string representation of these values. - std::vector input0_data(16); - std::vector input1_data(16); - for (size_t i = 0; i < 16; ++i) { - input0_data[i] = std::to_string(i); - input1_data[i] = std::to_string(1); - } - - std::vector shape{1, 16}; - - // Initialize the inputs with the data. - tc::InferInput* input0; - tc::InferInput* input1; - - FAIL_IF_ERR( - tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"), - "unable to get INPUT0"); - std::shared_ptr input0_ptr; - input0_ptr.reset(input0); - FAIL_IF_ERR( - tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"), - "unable to get INPUT1"); - std::shared_ptr input1_ptr; - input1_ptr.reset(input1); - - FAIL_IF_ERR( - input0_ptr->AppendFromString(input0_data), - "unable to set data for INPUT0"); - FAIL_IF_ERR( - input1_ptr->AppendFromString(input1_data), - "unable to set data for INPUT1"); - - // The inference settings. Will be using default for now. - tc::InferOptions options("simple_string"); - options.model_version_ = ""; - - std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; - tc::InferResult* results; - input1_ptr->SetShape({1, 15}); - FAIL_IF_SUCCESS( - this->client_->Infer(&results, options, inputs), - "expect error with inference request", - "input 'INPUT1' got unexpected elements count 16, expected 15"); - - // Check error message and verify the request reaches the server - inputs[1]->SetShape({2, 8}); - FAIL_IF_SUCCESS( - this->client_->Infer(&results, options, inputs), - "expect error with inference request", - "input 'INPUT0' batch size does not match other inputs for " - "'simple_string'"); -} - -REGISTER_TYPED_TEST_SUITE_P( - ClientInputTest, AppendRaw, SetSharedMemory, AppendString); +REGISTER_TYPED_TEST_SUITE_P(ClientInputTest, AppendRaw, SetSharedMemory); INSTANTIATE_TYPED_TEST_SUITE_P( GRPC, ClientInputTest, tc::InferenceServerGrpcClient); From 2a5c507c33f3fc3ba44434e21fdfc2c1018019c8 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Tue, 30 Jul 2024 19:22:55 -0700 Subject: [PATCH 07/10] Undo C++ client checks and tests --- src/c++/library/CMakeLists.txt | 12 -- src/c++/library/common.cc | 26 +-- src/c++/library/common.h | 6 +- src/c++/library/grpc_client.cc | 8 +- src/c++/library/http_client.cc | 7 +- src/c++/tests/CMakeLists.txt | 33 +--- src/c++/tests/client_input_test.cc | 268 ----------------------------- 7 files changed, 5 insertions(+), 355 deletions(-) delete mode 100644 src/c++/tests/client_input_test.cc diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt index 697930f4e..7a62971e5 100644 --- a/src/c++/library/CMakeLists.txt +++ b/src/c++/library/CMakeLists.txt @@ -223,7 +223,6 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER) grpcclient_static PRIVATE gRPC::grpc++ PRIVATE gRPC::grpc - PRIVATE triton-common-model-config PUBLIC protobuf::libprotobuf PUBLIC Threads::Threads ) @@ -252,7 +251,6 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER) grpcclient PRIVATE gRPC::grpc++ PRIVATE gRPC::grpc - PRIVATE triton-common-model-config PUBLIC protobuf::libprotobuf PUBLIC Threads::Threads ) @@ -378,10 +376,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) http-client-library EXCLUDE_FROM_ALL OBJECT ${REQUEST_SRCS} ${REQUEST_HDRS} ) - add_dependencies( - http-client-library - proto-library - ) if (NOT WIN32) set_property( @@ -394,14 +388,12 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) http-client-library PUBLIC triton-common-json # from repo-common - triton-common-model-config ) # libhttpclient_static.a add_library( httpclient_static STATIC $ - $ ) add_library( TritonClient::httpclient_static ALIAS httpclient_static @@ -410,7 +402,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) target_link_libraries( httpclient_static PRIVATE triton-common-json - PRIVATE triton-common-model-config PUBLIC CURL::libcurl PUBLIC Threads::Threads ) @@ -426,7 +417,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) add_library( httpclient SHARED $ - $ ) add_library( TritonClient::httpclient ALIAS httpclient @@ -444,7 +434,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) target_link_libraries( httpclient PRIVATE triton-common-json - PRIVATE triton-common-model-config PUBLIC CURL::libcurl PUBLIC Threads::Threads ) @@ -470,7 +459,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER) $ $ $ - $ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc index 566e3d366..27da69888 100644 --- a/src/c++/library/common.cc +++ b/src/c++/library/common.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -26,10 +26,6 @@ #include "common.h" -#include - -#include "triton/common/model_config.h" - namespace triton { namespace client { //============================================================================== @@ -236,26 +232,6 @@ InferInput::SetBinaryData(const bool binary_data) return Error::Success; } -Error -InferInput::ValidateData() const -{ - inference::DataType datatype = - triton::common::ProtocolStringToDataType(datatype_); - // String inputs will be checked at core and backend to reduce overhead. - if (datatype == inference::DataType::TYPE_STRING) { - return Error::Success; - } - - int64_t expected_byte_size = triton::common::GetByteSize(datatype, shape_); - if ((int64_t)byte_size_ != expected_byte_size) { - return Error( - "input '" + name_ + "' got unexpected byte size " + - std::to_string(byte_size_) + ", expected " + - std::to_string(expected_byte_size)); - } - return Error::Success; -} - Error InferInput::PrepareForRequest() { diff --git a/src/c++/library/common.h b/src/c++/library/common.h index 64717e628..8d05b966b 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -1,4 +1,4 @@ -// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -354,10 +354,6 @@ class InferInput { /// \return Error object indicating success or failure. Error SetBinaryData(const bool binary_data); - /// Validate input has data and input shape matches input data. - /// \return Error object indicating success of failure. - Error ValidateData() const; - private: #ifdef TRITON_INFERENCE_SERVER_CLIENT_CLASS friend class TRITON_INFERENCE_SERVER_CLIENT_CLASS; diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index 02a1a888e..c9ee70125 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -1470,13 +1470,7 @@ InferenceServerGrpcClient::PreRunProcessing( int index = 0; infer_request_.mutable_raw_input_contents()->Clear(); - Error err; for (const auto input : inputs) { - err = input->ValidateData(); - if (!err.IsOk()) { - return err; - } - // Add new InferInputTensor submessages only if required, otherwise // reuse the submessages already available. auto grpc_input = (infer_request_.inputs().size() <= index) diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc index 728e49ab3..a2651f2eb 100644 --- a/src/c++/library/http_client.cc +++ b/src/c++/library/http_client.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -2116,11 +2116,6 @@ InferenceServerHttpClient::PreRunProcessing( // Add the buffers holding input tensor data bool all_inputs_are_json{true}; for (const auto this_input : inputs) { - err = this_input->ValidateData(); - if (!err.IsOk()) { - return err; - } - if (this_input->BinaryData()) { all_inputs_are_json = false; } diff --git a/src/c++/tests/CMakeLists.txt b/src/c++/tests/CMakeLists.txt index 40a3f6d61..81eb74271 100644 --- a/src/c++/tests/CMakeLists.txt +++ b/src/c++/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -70,9 +70,6 @@ install( RUNTIME DESTINATION bin ) -# -# cc_client_test -# add_executable( cc_client_test cc_client_test.cc @@ -92,34 +89,6 @@ install( RUNTIME DESTINATION bin ) -# -# client_input_test -# -add_executable( - client_input_test - client_input_test.cc - $ -) -target_include_directories( - client_input_test - PRIVATE - ${GTEST_INCLUDE_DIRS} -) -target_link_libraries( - client_input_test - PRIVATE - grpcclient_static - httpclient_static - gtest - ${GTEST_LIBRARY} - ${GTEST_MAIN_LIBRARY} - GTest::gmock -) -install( - TARGETS client_input_test - RUNTIME DESTINATION bin -) - endif() # TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC endif() diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc deleted file mode 100644 index 5b973f6d5..000000000 --- a/src/c++/tests/client_input_test.cc +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of NVIDIA CORPORATION nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "gmock/gmock.h" -#include "grpc_client.h" -#include "gtest/gtest.h" -#include "http_client.h" -#include "shm_utils.h" - -namespace tc = triton::client; - -#define FAIL_IF_ERR(X, MSG) \ - do { \ - tc::Error err = (X); \ - if (!err.IsOk()) { \ - std::cerr << "error: " << (MSG) << ": " << err << std::endl; \ - exit(1); \ - } \ - } while (false) - -#define FAIL_IF_SUCCESS(X, MSG, ERR_MSG) \ - do { \ - tc::Error err = (X); \ - ASSERT_FALSE(err.IsOk()) << "error: " << (MSG) << ": "; \ - ASSERT_THAT(err.Message(), ::testing::HasSubstr(ERR_MSG)); \ - } while (false) - -namespace { - -template -class ClientInputTest : public ::testing::Test { - public: - ClientInputTest() : shape_{1, 16} {} - - void SetUp() override - { - std::string url; - std::string client_type; - if (std::is_same::value) { - url = "localhost:8001"; - client_type = "GRPC"; - } else if (std::is_same::value) { - url = "localhost:8000"; - client_type = "HTTP"; - } else { - ASSERT_TRUE(false) << "Unrecognized client class type '" - << typeid(ClientType).name() << "'"; - } - auto err = ClientType::Create(&this->client_, url); - ASSERT_TRUE(err.IsOk()) - << "failed to create " << client_type << " client: " << err.Message(); - - // Initialize vector input_data_ - for (size_t i = 0; i < 16; ++i) { - this->input_data_.emplace_back(i); - } - } - - std::unique_ptr client_; - std::vector input_data_; - std::vector shape_; -}; - -TYPED_TEST_SUITE_P(ClientInputTest); - -TYPED_TEST_P(ClientInputTest, AppendRaw) -{ - // Initialize the inputs with the data. - tc::InferInput* input0; - tc::InferInput* input1; - - FAIL_IF_ERR( - tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"), - "unable to get INPUT0"); - std::shared_ptr input0_ptr; - input0_ptr.reset(input0); - FAIL_IF_ERR( - tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"), - "unable to get INPUT1"); - std::shared_ptr input1_ptr; - input1_ptr.reset(input1); - - FAIL_IF_ERR( - input0_ptr->AppendRaw( - reinterpret_cast(&(this->input_data_[0])), - this->input_data_.size() * sizeof(int32_t)), - "unable to set data for INPUT0"); - FAIL_IF_ERR( - input1_ptr->AppendRaw( - reinterpret_cast(&(this->input_data_[0])), - this->input_data_.size() * sizeof(int32_t)), - "unable to set data for INPUT1"); - - // The inference settings. Will be using default for now. - tc::InferOptions options("simple"); - options.model_version_ = ""; - - std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; - tc::InferResult* results; - - // Test 1 - inputs[1]->SetShape({1, 15}); - FAIL_IF_SUCCESS( - this->client_->Infer(&results, options, inputs), - "expect error with inference request", - "input 'INPUT1' got unexpected byte size 64, expected 60"); - - // Test 2 - inputs[0]->SetShape({2, 8}); - inputs[1]->SetShape({2, 8}); - // Assert the request reaches the server - FAIL_IF_SUCCESS( - this->client_->Infer(&results, options, inputs), - "expect error with inference request", - "unexpected shape for input 'INPUT1' for model 'simple'. Expected " - "[-1,16], got [2,8]"); -} - -TYPED_TEST_P(ClientInputTest, SetSharedMemory) -{ - // Unregistering all shared memory regions for a clean - // start. - FAIL_IF_ERR( - this->client_->UnregisterSystemSharedMemory(), - "unable to unregister all system shared memory regions"); - FAIL_IF_ERR( - this->client_->UnregisterCudaSharedMemory(), - "unable to unregister all cuda shared memory regions"); - - // Initialize the inputs with the data. - tc::InferInput* input0; - tc::InferInput* input1; - size_t input_byte_size = 64; - - FAIL_IF_ERR( - tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"), - "unable to get INPUT0"); - std::shared_ptr input0_ptr; - input0_ptr.reset(input0); - FAIL_IF_ERR( - tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"), - "unable to get INPUT1"); - std::shared_ptr input1_ptr; - input1_ptr.reset(input1); - - // Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique - // integers and Input1 to all ones. - std::string shm_key = "/input_simple"; - int shm_fd_ip, *input0_shm; - FAIL_IF_ERR( - tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip), - ""); - FAIL_IF_ERR( - tc::MapSharedMemory( - shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm), - ""); - FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), ""); - int* input1_shm = (int*)(input0_shm + 16); - for (size_t i = 0; i < 16; ++i) { - *(input0_shm + i) = i; - *(input1_shm + i) = 1; - } - - FAIL_IF_ERR( - this->client_->RegisterSystemSharedMemory( - "input_data", shm_key, input_byte_size * 2), - "failed to register input shared memory region"); - - FAIL_IF_ERR( - input0_ptr->SetSharedMemory( - "input_data", input_byte_size, 0 /* offset */), - "unable to set shared memory for INPUT0"); - FAIL_IF_ERR( - input1_ptr->SetSharedMemory( - "input_data", input_byte_size, input_byte_size /* offset */), - "unable to set shared memory for INPUT1"); - - // The inference settings. Will be using default for now. - tc::InferOptions options("simple"); - options.model_version_ = ""; - - std::vector inputs = {input0_ptr.get(), input1_ptr.get()}; - tc::InferResult* results; - - // Test 1 - inputs[1]->SetShape({1, 15}); - FAIL_IF_SUCCESS( - this->client_->Infer(&results, options, inputs), - "expect error with inference request", - ("input 'INPUT1' got unexpected byte size " + - std::to_string(input_byte_size) + ", expected " + - std::to_string(input_byte_size - sizeof(int)))); - - // Test 2 - inputs[0]->SetShape({2, 8}); - inputs[1]->SetShape({2, 8}); - // Assert the request reaches the server - FAIL_IF_SUCCESS( - this->client_->Infer(&results, options, inputs), - "expect error with inference request", - "unexpected shape for input 'INPUT1' for model 'simple'. Expected " - "[-1,16], got [2,8]"); - - // Get shared memory regions active/registered within triton - using ClientType = TypeParam; - if constexpr (std::is_same< - ClientType, tc::InferenceServerGrpcClient>::value) { - inference::SystemSharedMemoryStatusResponse shm_status; - FAIL_IF_ERR( - this->client_->SystemSharedMemoryStatus(&shm_status), - "failed to get shared memory status"); - std::cout << "Shared Memory Status:\n" << shm_status.DebugString() << "\n"; - } else { - std::string shm_status; - FAIL_IF_ERR( - this->client_->SystemSharedMemoryStatus(&shm_status), - "failed to get shared memory status"); - std::cout << "Shared Memory Status:\n" << shm_status << "\n"; - } - - // Unregister shared memory - FAIL_IF_ERR( - this->client_->UnregisterSystemSharedMemory("input_data"), - "unable to unregister shared memory input region"); - - // Cleanup shared memory - FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), ""); - FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), ""); -} - -REGISTER_TYPED_TEST_SUITE_P(ClientInputTest, AppendRaw, SetSharedMemory); - -INSTANTIATE_TYPED_TEST_SUITE_P( - GRPC, ClientInputTest, tc::InferenceServerGrpcClient); -INSTANTIATE_TYPED_TEST_SUITE_P( - HTTP, ClientInputTest, tc::InferenceServerHttpClient); - -} // namespace - -int -main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} From 6b56c3b394fb57bf539cf68d859d8d422036a24f Mon Sep 17 00:00:00 2001 From: Yingge He <157551214+yinggeh@users.noreply.github.com> Date: Tue, 30 Jul 2024 19:24:42 -0700 Subject: [PATCH 08/10] Update src/python/library/tritonclient/http/_infer_input.py Co-authored-by: Ryan McCormick --- src/python/library/tritonclient/http/_infer_input.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index cc9f259cf..af650d3ed 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -97,8 +97,8 @@ def validate_data(self): ------- None """ - # Input must set only one of the following fields: 'data', 'binary_data_size' - # in 'parameters', 'shared_memory_region' in 'parameters' + # Input must set only one of the following fields: 'data', 'binary_data_size', + # 'shared_memory_region' in 'parameters' cnt = 0 cnt += self._data != None cnt += "binary_data_size" in self._parameters From a58474193129aca58df8b6123d45bb8f5021a44a Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 2 Aug 2024 14:46:35 -0700 Subject: [PATCH 09/10] Workaround with L0_trt_reformat_free by removing shm checks --- .../library/tritonclient/grpc/_infer_input.py | 34 ++++++------------- .../library/tritonclient/http/_infer_input.py | 32 ++++++----------- 2 files changed, 22 insertions(+), 44 deletions(-) diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py index 89d944dff..d0975b31f 100755 --- a/src/python/library/tritonclient/grpc/_infer_input.py +++ b/src/python/library/tritonclient/grpc/_infer_input.py @@ -102,31 +102,19 @@ def validate_data(self): if cnt != 1: return + # Skip due to trt reformat free tensor if "shared_memory_region" in self._input.parameters: - # Using shared memory - if self._input.datatype != "BYTES": - expected_byte_size = num_elements( - self._input.shape - ) * get_data_type_byte_size(self._input.datatype) - data_byte_size = self._input.parameters[ - "shared_memory_byte_size" - ].int64_param - if data_byte_size != expected_byte_size: - raise_error( - "input '{}' got unexpected byte size {}, expected {}".format( - self._input.name, data_byte_size, expected_byte_size - ) - ) - else: - # Not using shared memory - expected_num_elements = num_elements(self._input.shape) - data_num_elements = num_elements(self._data_shape) - if expected_num_elements != data_num_elements: - raise_error( - "input '{}' got unexpected elements count {}, expected {}".format( - self._input.name, data_num_elements, expected_num_elements - ) + return + + # Not using shared memory + expected_num_elements = num_elements(self._input.shape) + data_num_elements = num_elements(self._data_shape) + if expected_num_elements != data_num_elements: + raise_error( + "input '{}' got unexpected elements count {}, expected {}".format( + self._input.name, data_num_elements, expected_num_elements ) + ) return def set_shape(self, shape): diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index af650d3ed..e0d3f19fb 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -106,29 +106,19 @@ def validate_data(self): if cnt != 1: return + # Skip due to trt reformat free tensor if "shared_memory_region" in self._parameters: - # Using shared memory - if self._datatype != "BYTES": - expected_byte_size = num_elements( - self._shape - ) * get_data_type_byte_size(self._datatype) - data_byte_size = self._parameters["shared_memory_byte_size"] - if data_byte_size != expected_byte_size: - raise_error( - "input '{}' got unexpected byte size {}, expected {}".format( - self._name, data_byte_size, expected_byte_size - ) - ) - else: - # Not using shared memory - expected_num_elements = num_elements(self._shape) - data_num_elements = num_elements(self._data_shape) - if expected_num_elements != data_num_elements: - raise_error( - "input '{}' got unexpected elements count {}, expected {}".format( - self._name, data_num_elements, expected_num_elements - ) + return + + # Not using shared memory + expected_num_elements = num_elements(self._shape) + data_num_elements = num_elements(self._data_shape) + if expected_num_elements != data_num_elements: + raise_error( + "input '{}' got unexpected elements count {}, expected {}".format( + self._name, data_num_elements, expected_num_elements ) + ) return def set_shape(self, shape): From 5889b8ed7e85a860e6991fbf5e042d3dd8c10b08 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Mon, 5 Aug 2024 10:28:35 -0700 Subject: [PATCH 10/10] Remove unused function --- .../library/tritonclient/grpc/_infer_input.py | 2 +- .../library/tritonclient/http/_infer_input.py | 1 - .../library/tritonclient/utils/__init__.py | 48 ------------------- 3 files changed, 1 insertion(+), 50 deletions(-) diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py index d0975b31f..559f60889 100755 --- a/src/python/library/tritonclient/grpc/_infer_input.py +++ b/src/python/library/tritonclient/grpc/_infer_input.py @@ -30,7 +30,7 @@ from tritonclient.grpc import service_pb2 from tritonclient.utils import * -from ._utils import get_data_type_byte_size, num_elements, raise_error +from ._utils import num_elements, raise_error class InferInput: diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py index e0d3f19fb..f237edcfa 100755 --- a/src/python/library/tritonclient/http/_infer_input.py +++ b/src/python/library/tritonclient/http/_infer_input.py @@ -27,7 +27,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np from tritonclient.utils import ( - get_data_type_byte_size, np_to_triton_dtype, num_elements, raise_error, diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py index 40dffffcf..304bbfec9 100755 --- a/src/python/library/tritonclient/utils/__init__.py +++ b/src/python/library/tritonclient/utils/__init__.py @@ -211,54 +211,6 @@ def triton_to_np_dtype(dtype): return None -def get_data_type_byte_size(dtype): - """ - Get the size of a given datatype in bytes. - - Parameters - ---------- - dtype : str - The data-type - - Returns - ------- - int - The size in bytes of the datatype, or 0 if size cannot be determined - (for example, values of type BYTES have variable length and so size - cannot be determine just from the type) - """ - - if dtype == "BOOL": - return 1 - elif dtype == "INT8": - return 1 - elif dtype == "INT16": - return 2 - elif dtype == "INT32": - return 4 - elif dtype == "INT64": - return 8 - elif dtype == "UINT8": - return 1 - elif dtype == "UINT16": - return 2 - elif dtype == "UINT32": - return 4 - elif dtype == "UINT64": - return 8 - elif dtype == "FP16": - return 2 - elif dtype == "FP32": - return 4 - elif dtype == "FP64": - return 8 - elif dtype == "BYTES": - return 0 - elif dtype == "BF16": - return 2 - return 0 - - def serialize_byte_tensor(input_tensor): """ Serializes a bytes tensor into a flat numpy array of length prepended