From 3178f99b66572e75addedaed719791effa8aeb36 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 5 Jul 2024 05:37:16 -0700
Subject: [PATCH 01/10] Add client checks

---
 .../library/tritonclient/grpc/_infer_input.py | 47 ++++++++++++-
 .../library/tritonclient/grpc/_utils.py       |  1 +
 .../library/tritonclient/http/_infer_input.py | 46 +++++++++++++
 .../library/tritonclient/http/_utils.py       |  6 +-
 .../library/tritonclient/utils/__init__.py    | 69 +++++++++++++++++++
 5 files changed, 167 insertions(+), 2 deletions(-)

diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py
index aff73bc18..b070c2153 100755
--- a/src/python/library/tritonclient/grpc/_infer_input.py
+++ b/src/python/library/tritonclient/grpc/_infer_input.py
@@ -30,7 +30,7 @@
 from tritonclient.grpc import service_pb2
 from tritonclient.utils import *
 
-from ._utils import raise_error
+from ._utils import get_data_type_byte_size, num_elements, raise_error
 
 
 class InferInput:
@@ -54,6 +54,7 @@ def __init__(self, name, shape, datatype):
         self._input.ClearField("shape")
         self._input.shape.extend(shape)
         self._input.datatype = datatype
+        self._data_shape = None
         self._raw_content = None
 
     def name(self):
@@ -86,6 +87,48 @@ def shape(self):
         """
         return self._input.shape
 
+    def is_ready(self):
+        """Get the status of input.
+
+        Returns
+        -------
+        bool
+            The status of input
+        """
+        # Input must set only one of the following fields: '_raw_content', 'shared_memory_region' in '_input.parameters'
+        cnt = 0
+        cnt += self._raw_content != None
+        cnt += "shared_memory_region" in self._input.parameters
+        if cnt != 1:
+            return
+
+        if "shared_memory_region" in self._input.parameters:
+            # Using shared memory
+            if self._input.datatype != "BYTES":
+                expected_byte_size = num_elements(
+                    self._input.shape
+                ) * get_data_type_byte_size(self._input.datatype)
+                data_byte_size = self._input.parameters[
+                    "shared_memory_byte_size"
+                ].int64_param
+                if data_byte_size != expected_byte_size:
+                    raise_error(
+                        "'{}' got unexpected byte size {}, expected {}".format(
+                            self._input.name, data_byte_size, expected_byte_size
+                        )
+                    )
+        else:
+            # Not using shared memory
+            expected_num_elements = num_elements(self._input.shape)
+            data_num_elements = num_elements(self._data_shape)
+            if expected_num_elements != data_num_elements:
+                raise_error(
+                    "'{}' got unexpected elements count {}, expected {}".format(
+                        self._input.name, data_num_elements, expected_num_elements
+                    )
+                )
+        return
+
     def set_shape(self, shape):
         """Set the shape of input.
 
@@ -171,6 +214,7 @@ def set_data_from_numpy(self, input_tensor):
                 self._raw_content = b""
         else:
             self._raw_content = input_tensor.tobytes()
+        self._data_shape = input_tensor.shape
         return self
 
     def set_shared_memory(self, region_name, byte_size, offset=0):
@@ -193,6 +237,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0):
         """
         self._input.ClearField("contents")
         self._raw_content = None
+        self._data_shape = None
 
         self._input.parameters["shared_memory_region"].string_param = region_name
         self._input.parameters["shared_memory_byte_size"].int64_param = byte_size
diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py
index dae6d71f8..d4d897300 100755
--- a/src/python/library/tritonclient/grpc/_utils.py
+++ b/src/python/library/tritonclient/grpc/_utils.py
@@ -96,6 +96,7 @@ def _get_inference_request(
     if request_id != "":
         request.id = request_id
     for infer_input in inputs:
+        infer_input.is_ready()
         request.inputs.extend([infer_input._get_tensor()])
         if infer_input._get_content() is not None:
             request.raw_input_contents.extend([infer_input._get_content()])
diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py
index 85beabd2f..02bf011d6 100755
--- a/src/python/library/tritonclient/http/_infer_input.py
+++ b/src/python/library/tritonclient/http/_infer_input.py
@@ -27,7 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import numpy as np
 from tritonclient.utils import (
+    get_data_type_byte_size,
     np_to_triton_dtype,
+    num_elements,
     raise_error,
     serialize_bf16_tensor,
     serialize_byte_tensor,
@@ -55,6 +57,7 @@ def __init__(self, name, shape, datatype):
         self._datatype = datatype
         self._parameters = {}
         self._data = None
+        self._data_shape = None
         self._raw_data = None
 
     def name(self):
@@ -87,6 +90,47 @@ def shape(self):
         """
         return self._shape
 
+    def is_ready(self):
+        """Get the status of input.
+
+        Returns
+        -------
+        bool
+            The status of input
+        """
+        # Input must set only one of the following fields: 'data', 'binary_data_size' in 'parameters', 'shared_memory_region' in 'parameters'
+        cnt = 0
+        cnt += self._data != None
+        cnt += "binary_data_size" in self._parameters
+        cnt += "shared_memory_region" in self._parameters
+        if cnt != 1:
+            return
+
+        if "shared_memory_region" in self._parameters:
+            # Using shared memory
+            if self._datatype != "BYTES":
+                expected_byte_size = num_elements(
+                    self._shape
+                ) * get_data_type_byte_size(self._datatype)
+                data_byte_size = self._parameters["shared_memory_byte_size"]
+                if data_byte_size != expected_byte_size:
+                    raise_error(
+                        "'{}' got unexpected byte size {}, expected {}".format(
+                            self._name, data_byte_size, expected_byte_size
+                        )
+                    )
+        else:
+            # Not using shared memory
+            expected_num_elements = num_elements(self._shape)
+            data_num_elements = num_elements(self._data_shape)
+            if expected_num_elements != data_num_elements:
+                raise_error(
+                    "'{}' got unexpected elements count {}, expected {}".format(
+                        self._name, data_num_elements, expected_num_elements
+                    )
+                )
+        return
+
     def set_shape(self, shape):
         """Set the shape of input.
 
@@ -211,6 +255,7 @@ def set_data_from_numpy(self, input_tensor, binary_data=True):
             else:
                 self._raw_data = input_tensor.tobytes()
             self._parameters["binary_data_size"] = len(self._raw_data)
+        self._data_shape = input_tensor.shape
         return self
 
     def set_shared_memory(self, region_name, byte_size, offset=0):
@@ -232,6 +277,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0):
             The updated input
         """
         self._data = None
+        self._data_shape = None
         self._raw_data = None
         self._parameters.pop("binary_data_size", None)
 
diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py
index 0f4456c9a..015cdf4ec 100755
--- a/src/python/library/tritonclient/http/_utils.py
+++ b/src/python/library/tritonclient/http/_utils.py
@@ -106,7 +106,11 @@ def _get_inference_request(
     if timeout is not None:
         parameters["timeout"] = timeout
 
-    infer_request["inputs"] = [this_input._get_tensor() for this_input in inputs]
+    infer_request["inputs"] = []
+    for infer_input in inputs:
+        infer_input.is_ready()
+        infer_request["inputs"].append(infer_input._get_tensor())
+
     if outputs:
         infer_request["outputs"] = [
             this_output._get_tensor() for this_output in outputs
diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py
index 7f3079c66..b46685ef5 100755
--- a/src/python/library/tritonclient/utils/__init__.py
+++ b/src/python/library/tritonclient/utils/__init__.py
@@ -40,6 +40,27 @@ def raise_error(msg):
     raise InferenceServerException(msg=msg) from None
 
 
+def num_elements(shape):
+    """
+    Calculate the number of elements in an array given its shape.
+
+    Parameters
+    ----------
+    shape : list or tuple
+        Shape of the array.
+
+    Returns
+    -------
+    int
+        Number of elements in the array.
+    """
+
+    num_elements = 1
+    for dim in shape:
+        num_elements *= dim
+    return num_elements
+
+
 def serialized_byte_size(tensor_value):
     """
     Get the underlying number of bytes for a numpy ndarray.
@@ -190,6 +211,54 @@ def triton_to_np_dtype(dtype):
     return None
 
 
+def get_data_type_byte_size(dtype):
+    """
+    Get the size of a given datatype in bytes.
+
+    Parameters
+    ----------
+    dtype : str
+        The data-type
+
+    Returns
+    -------
+    int
+        The size in bytes of the datatype, or 0 if size cannot be determined
+        (for example, values of type BYTES have variable length and so size
+        cannot be determine just from the type)
+    """
+
+    if dtype == "BOOL":
+        return 1
+    elif dtype == "INT8":
+        return 1
+    elif dtype == "INT16":
+        return 2
+    elif dtype == "INT32":
+        return 4
+    elif dtype == "INT64":
+        return 8
+    elif dtype == "UINT8":
+        return 1
+    elif dtype == "UINT16":
+        return 2
+    elif dtype == "UINT32":
+        return 4
+    elif dtype == "UINT64":
+        return 8
+    elif dtype == "FP16":
+        return 2
+    elif dtype == "FP32":
+        return 4
+    elif dtype == "FP64":
+        return 8
+    elif dtype == "BYTES":
+        return 0
+    elif dtype == "BF16":
+        return 2
+    return 0
+
+
 def serialize_byte_tensor(input_tensor):
     """
     Serializes a bytes tensor into a flat numpy array of length prepended

From 7210d0053f1848dafbd4cfb6d534ae5989c43987 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Mon, 8 Jul 2024 18:10:59 -0700
Subject: [PATCH 02/10] Add C++ client tests

---
 src/c++/library/CMakeLists.txt                |  11 +
 src/c++/library/common.cc                     | 111 +++++++
 src/c++/library/common.h                      |   9 +
 src/c++/library/grpc_client.cc                |   6 +
 src/c++/library/http_client.cc                |   5 +
 src/c++/tests/CMakeLists.txt                  |  31 ++
 src/c++/tests/client_input_test.cc            | 299 ++++++++++++++++++
 .../library/tritonclient/grpc/_infer_input.py |  10 +-
 .../library/tritonclient/grpc/_utils.py       |   2 +-
 .../library/tritonclient/http/_infer_input.py |  10 +-
 .../library/tritonclient/http/_utils.py       |   2 +-
 11 files changed, 484 insertions(+), 12 deletions(-)
 create mode 100644 src/c++/tests/client_input_test.cc

diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt
index cdee03e1a..62bd9bcff 100644
--- a/src/c++/library/CMakeLists.txt
+++ b/src/c++/library/CMakeLists.txt
@@ -122,6 +122,7 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
       grpcclient_static
       PRIVATE gRPC::grpc++
       PRIVATE gRPC::grpc
+      PRIVATE triton-common-model-config
       PUBLIC protobuf::libprotobuf
       PUBLIC Threads::Threads
   )
@@ -275,6 +276,10 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
       http-client-library EXCLUDE_FROM_ALL OBJECT
       ${REQUEST_SRCS} ${REQUEST_HDRS}
   )
+  add_dependencies(
+    http-client-library
+    proto-library
+  )
 
   if (NOT WIN32)
     set_property(
@@ -287,12 +292,14 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
     http-client-library
     PUBLIC
       triton-common-json        # from repo-common
+      triton-common-model-config
   )
 
   # libhttpclient_static.a
   add_library(
       httpclient_static STATIC
       $<TARGET_OBJECTS:http-client-library>
+      $<TARGET_OBJECTS:proto-library>
   )
   add_library(
       TritonClient::httpclient_static ALIAS httpclient_static
@@ -301,6 +308,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
   target_link_libraries(
       httpclient_static
       PRIVATE triton-common-json
+      PRIVATE triton-common-model-config
       PUBLIC CURL::libcurl
       PUBLIC Threads::Threads
   )
@@ -316,6 +324,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
   add_library(
       httpclient SHARED
       $<TARGET_OBJECTS:http-client-library>
+      $<TARGET_OBJECTS:proto-library>
   )
   add_library(
       TritonClient::httpclient ALIAS httpclient
@@ -333,6 +342,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
   target_link_libraries(
       httpclient
       PRIVATE triton-common-json
+      PRIVATE triton-common-model-config
       PUBLIC CURL::libcurl
       PUBLIC Threads::Threads
   )
@@ -358,6 +368,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
         $<INSTALL_INTERFACE:include>
         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
         $<TARGET_PROPERTY:CURL::libcurl,INTERFACE_INCLUDE_DIRECTORIES>
+        $<TARGET_PROPERTY:proto-library,INCLUDE_DIRECTORIES>
       PRIVATE
         ${CMAKE_CURRENT_SOURCE_DIR}
     )
diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc
index 27da69888..8bf77c7d2 100644
--- a/src/c++/library/common.cc
+++ b/src/c++/library/common.cc
@@ -26,6 +26,10 @@
 
 #include "common.h"
 
+#include <numeric>
+
+#include "triton/common/model_config.h"
+
 namespace triton { namespace client {
 
 //==============================================================================
@@ -232,6 +236,113 @@ InferInput::SetBinaryData(const bool binary_data)
   return Error::Success;
 }
 
+Error
+InferInput::GetStringCount(size_t* str_cnt) const
+{
+  int64_t str_checked = 0;
+  size_t remaining_str_size = 0;
+
+  size_t next_buf_idx = 0;
+  const size_t buf_cnt = bufs_.size();
+
+  const uint8_t* buf = nullptr;
+  size_t remaining_buf_size = 0;
+
+  // Validate elements until all buffers have been fully processed.
+  while (remaining_buf_size || next_buf_idx < buf_cnt) {
+    // Get the next buf if not currently processing one.
+    if (!remaining_buf_size) {
+      // Reset remaining buf size and pointers for next buf.
+      buf = bufs_[next_buf_idx];
+      remaining_buf_size = buf_byte_sizes_[next_buf_idx];
+      next_buf_idx++;
+    }
+
+    constexpr size_t kStringSizeIndicator = sizeof(uint32_t);
+    // Get the next element if not currently processing one.
+    if (!remaining_str_size) {
+      // FIXME: Assume the string element's byte size indicator is not spread
+      // across buf boundaries for simplicity. Also needs better log msg.
+      if (remaining_buf_size < kStringSizeIndicator) {
+        return Error("element byte size indicator exceeds the end of the buf.");
+      }
+
+      // Start the next element and reset the remaining element size.
+      remaining_str_size = *(reinterpret_cast<const uint32_t*>(buf));
+      str_checked++;
+
+      // Advance pointer and remainder by the indicator size.
+      buf += kStringSizeIndicator;
+      remaining_buf_size -= kStringSizeIndicator;
+    }
+
+    // If the remaining buf fits it: consume the rest of the element, proceed
+    // to the next element.
+    if (remaining_buf_size >= remaining_str_size) {
+      buf += remaining_str_size;
+      remaining_buf_size -= remaining_str_size;
+      remaining_str_size = 0;
+    }
+    // Otherwise the remaining element is larger: consume the rest of the
+    // buf, proceed to the next buf.
+    else {
+      remaining_str_size -= remaining_buf_size;
+      remaining_buf_size = 0;
+    }
+  }
+
+  // FIXME: If more than expected, should stop earlier
+  // Validate the number of processed elements exactly match expectations.
+  *str_cnt = str_checked;
+  return Error::Success;
+}
+
+Error
+InferInput::ValidateData() const
+{
+  inference::DataType datatype =
+      triton::common::ProtocolStringToDataType(datatype_);
+  if (io_type_ == SHARED_MEMORY) {
+    if (datatype == inference::DataType::TYPE_STRING) {
+      // TODO Didn't find any shm and BYTES inputs inference example
+    } else {
+      int64_t expected_byte_size =
+          triton::common::GetByteSize(datatype, shape_);
+      if ((int64_t)byte_size_ != expected_byte_size) {
+        return Error(
+            "'" + name_ + "' got unexpected byte size " +
+            std::to_string(byte_size_) + ", expected " +
+            std::to_string(expected_byte_size));
+      }
+    }
+  } else {
+    if (datatype == inference::DataType::TYPE_STRING) {
+      int64_t expected_str_cnt = triton::common::GetElementCount(shape_);
+      size_t str_cnt;
+      Error err = GetStringCount(&str_cnt);
+      if (!err.IsOk()) {
+        return err;
+      }
+      if ((int64_t)str_cnt != expected_str_cnt) {
+        return Error(
+            "'" + name_ + "' got unexpected string count " +
+            std::to_string(str_cnt) + ", expected " +
+            std::to_string(expected_str_cnt));
+      }
+    } else {
+      int64_t expected_byte_size =
+          triton::common::GetByteSize(datatype, shape_);
+      if ((int64_t)byte_size_ != expected_byte_size) {
+        return Error(
+            "'" + name_ + "' got unexpected byte size " +
+            std::to_string(byte_size_) + ", expected " +
+            std::to_string(expected_byte_size));
+      }
+    }
+  }
+  return Error::Success;
+}
+
 Error
 InferInput::PrepareForRequest()
 {
diff --git a/src/c++/library/common.h b/src/c++/library/common.h
index 8d05b966b..e132ee51a 100644
--- a/src/c++/library/common.h
+++ b/src/c++/library/common.h
@@ -354,6 +354,15 @@ class InferInput {
   /// \return Error object indicating success or failure.
   Error SetBinaryData(const bool binary_data);
 
+  /// Gets the total number of strings in this input data.
+  /// \param byte_size The number of strings.
+  /// \return Error object indicating success or failure.
+  Error GetStringCount(size_t* str_cnt) const;
+
+  /// Validate input has data and input shape matches input data.
+  /// \return Error object indicating success of failure.
+  Error ValidateData() const;
+
  private:
 #ifdef TRITON_INFERENCE_SERVER_CLIENT_CLASS
   friend class TRITON_INFERENCE_SERVER_CLIENT_CLASS;
diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc
index c9ee70125..d2971ead6 100644
--- a/src/c++/library/grpc_client.cc
+++ b/src/c++/library/grpc_client.cc
@@ -1470,7 +1470,13 @@ InferenceServerGrpcClient::PreRunProcessing(
 
   int index = 0;
   infer_request_.mutable_raw_input_contents()->Clear();
+  Error err;
   for (const auto input : inputs) {
+    err = input->ValidateData();
+    if (!err.IsOk()) {
+      return err;
+    }
+
     // Add new InferInputTensor submessages only if required, otherwise
     // reuse the submessages already available.
     auto grpc_input = (infer_request_.inputs().size() <= index)
diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc
index 9f2f5ab5e..55a846839 100644
--- a/src/c++/library/http_client.cc
+++ b/src/c++/library/http_client.cc
@@ -2117,6 +2117,11 @@ InferenceServerHttpClient::PreRunProcessing(
   // Add the buffers holding input tensor data
   bool all_inputs_are_json{true};
   for (const auto this_input : inputs) {
+    err = this_input->ValidateData();
+    if (!err.IsOk()) {
+      return err;
+    }
+
     if (this_input->BinaryData()) {
       all_inputs_are_json = false;
     }
diff --git a/src/c++/tests/CMakeLists.txt b/src/c++/tests/CMakeLists.txt
index 81eb74271..efa4f45f3 100644
--- a/src/c++/tests/CMakeLists.txt
+++ b/src/c++/tests/CMakeLists.txt
@@ -70,6 +70,9 @@ install(
   RUNTIME DESTINATION bin
 )
 
+#
+# cc_client_test
+#
 add_executable(
   cc_client_test
   cc_client_test.cc
@@ -89,6 +92,34 @@ install(
   RUNTIME DESTINATION bin
 )
 
+#
+# client_input_test
+#
+add_executable(
+  client_input_test
+  client_input_test.cc
+  $<TARGET_OBJECTS:shm-utils-library>
+)
+target_include_directories(
+  client_input_test
+  PRIVATE
+    ${GTEST_INCLUDE_DIRS}
+)
+target_link_libraries(
+  client_input_test
+  PRIVATE
+    grpcclient_static
+    httpclient_static
+    gtest
+    ${GTEST_LIBRARY}
+    ${GTEST_MAIN_LIBRARY}
+    GTest::gmock
+)
+install(
+  TARGETS client_input_test
+  RUNTIME DESTINATION bin
+)
+
 endif() # TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC
 
 endif()
diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc
new file mode 100644
index 000000000..0ae346afb
--- /dev/null
+++ b/src/c++/tests/client_input_test.cc
@@ -0,0 +1,299 @@
+// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gmock/gmock.h"
+#include "grpc_client.h"
+#include "gtest/gtest.h"
+#include "http_client.h"
+#include "shm_utils.h"
+
+namespace tc = triton::client;
+
+#define FAIL_IF_ERR(X, MSG)                                        \
+  do {                                                             \
+    tc::Error err = (X);                                           \
+    if (!err.IsOk()) {                                             \
+      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
+      exit(1);                                                     \
+    }                                                              \
+  } while (false)
+
+#define FAIL_IF_SUCCESS(X, MSG, ERR_MSG)                       \
+  do {                                                         \
+    tc::Error err = (X);                                       \
+    ASSERT_FALSE(err.IsOk()) << "error: " << (MSG) << ": ";    \
+    ASSERT_THAT(err.Message(), ::testing::HasSubstr(ERR_MSG)); \
+  } while (false)
+
+namespace {
+
+template <typename ClientType>
+class ClientInputTest : public ::testing::Test {
+ public:
+  ClientInputTest() : shape_{1, 16} {}
+
+  void SetUp() override
+  {
+    std::string url;
+    std::string client_type;
+    if (std::is_same<ClientType, tc::InferenceServerGrpcClient>::value) {
+      url = "localhost:8001";
+      client_type = "GRPC";
+    } else if (std::is_same<ClientType, tc::InferenceServerHttpClient>::value) {
+      url = "localhost:8000";
+      client_type = "HTTP";
+    } else {
+      ASSERT_TRUE(false) << "Unrecognized client class type '"
+                         << typeid(ClientType).name() << "'";
+    }
+    auto err = ClientType::Create(&this->client_, url);
+    ASSERT_TRUE(err.IsOk())
+        << "failed to create " << client_type << " client: " << err.Message();
+
+    // Initialize vector input_data_
+    for (size_t i = 0; i < 16; ++i) {
+      this->input_data_.emplace_back(i);
+    }
+  }
+
+  std::unique_ptr<ClientType> client_;
+  std::vector<int32_t> input_data_;
+  std::vector<int64_t> shape_;
+};
+
+TYPED_TEST_SUITE_P(ClientInputTest);
+
+TYPED_TEST_P(ClientInputTest, AppendRaw)
+{
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  FAIL_IF_ERR(
+      input0_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&(this->input_data_[0])),
+          this->input_data_.size() * sizeof(int32_t)),
+      "unable to set data for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->AppendRaw(
+          reinterpret_cast<uint8_t*>(&(this->input_data_[0])),
+          this->input_data_.size() * sizeof(int32_t)),
+      "unable to set data for INPUT1");
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options("simple");
+  options.model_version_ = "";
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  tc::InferResult* results;
+  inputs[1]->SetShape({1, 15});
+  FAIL_IF_SUCCESS(
+      this->client_->Infer(&results, options, inputs),
+      "expect error with inference request",
+      "'INPUT1' got unexpected byte size 64, expected 60");
+
+  // Check error message and verify the request reaches the server
+  inputs[1]->SetShape({2, 8});
+  FAIL_IF_SUCCESS(
+      this->client_->Infer(&results, options, inputs),
+      "expect error with inference request",
+      "input 'INPUT0' batch size does not match other inputs for 'simple'");
+}
+
+TYPED_TEST_P(ClientInputTest, SetSharedMemory)
+{
+  // Unregistering all shared memory regions for a clean
+  // start.
+  FAIL_IF_ERR(
+      this->client_->UnregisterSystemSharedMemory(),
+      "unable to unregister all system shared memory regions");
+  FAIL_IF_ERR(
+      this->client_->UnregisterCudaSharedMemory(),
+      "unable to unregister all cuda shared memory regions");
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+  size_t input_byte_size = 64;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  // Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
+  // integers and Input1 to all ones.
+  std::string shm_key = "/input_simple";
+  int shm_fd_ip, *input0_shm;
+  FAIL_IF_ERR(
+      tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
+      "");
+  FAIL_IF_ERR(
+      tc::MapSharedMemory(
+          shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
+      "");
+  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
+  int* input1_shm = (int*)(input0_shm + 16);
+  for (size_t i = 0; i < 16; ++i) {
+    *(input0_shm + i) = i;
+    *(input1_shm + i) = 1;
+  }
+
+  FAIL_IF_ERR(
+      this->client_->RegisterSystemSharedMemory(
+          "input_data", shm_key, input_byte_size * 2),
+      "failed to register input shared memory region");
+
+  FAIL_IF_ERR(
+      input0_ptr->SetSharedMemory(
+          "input_data", input_byte_size, 0 /* offset */),
+      "unable to set shared memory for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->SetSharedMemory(
+          "input_data", input_byte_size, input_byte_size /* offset */),
+      "unable to set shared memory for INPUT1");
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options("simple");
+  options.model_version_ = "";
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  inputs[1]->SetShape({1, 15});
+
+  tc::InferResult* results;
+  FAIL_IF_SUCCESS(
+      this->client_->Infer(&results, options, inputs),
+      "expect error with inference request",
+      ("'INPUT1' got unexpected byte size " + std::to_string(input_byte_size) +
+       ", expected " + std::to_string(input_byte_size - sizeof(int))));
+
+  // Get shared memory regions active/registered within triton
+  // std::string shm_status;
+  // FAIL_IF_ERR(
+  //     this->client_->SystemSharedMemoryStatus(&shm_status),
+  //     "failed to get shared memory status");
+  // std::cout << "Shared Memory Status:\n" << shm_status << "\n";
+
+  // Unregister shared memory
+  FAIL_IF_ERR(
+      this->client_->UnregisterSystemSharedMemory("input_data"),
+      "unable to unregister shared memory input region");
+
+  // Cleanup shared memory
+  FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
+  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
+}
+
+TYPED_TEST_P(ClientInputTest, AppendString)
+{
+  // Create the data for the two input tensors. Initialize the first
+  // to unique integers and the second to all ones. The input tensors
+  // are the string representation of these values.
+  std::vector<std::string> input0_data(16);
+  std::vector<std::string> input1_data(16);
+  for (size_t i = 0; i < 16; ++i) {
+    input0_data[i] = std::to_string(i);
+    input1_data[i] = std::to_string(1);
+  }
+
+  std::vector<int64_t> shape{1, 16};
+
+  // Initialize the inputs with the data.
+  tc::InferInput* input0;
+  tc::InferInput* input1;
+
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"),
+      "unable to get INPUT0");
+  std::shared_ptr<tc::InferInput> input0_ptr;
+  input0_ptr.reset(input0);
+  FAIL_IF_ERR(
+      tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"),
+      "unable to get INPUT1");
+  std::shared_ptr<tc::InferInput> input1_ptr;
+  input1_ptr.reset(input1);
+
+  FAIL_IF_ERR(
+      input0_ptr->AppendFromString(input0_data),
+      "unable to set data for INPUT0");
+  FAIL_IF_ERR(
+      input1_ptr->AppendFromString(input1_data),
+      "unable to set data for INPUT1");
+
+  // The inference settings. Will be using default for now.
+  tc::InferOptions options("simple_string");
+  options.model_version_ = "";
+
+  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
+  tc::InferResult* results;
+  input1_ptr->SetShape({1, 15});
+  FAIL_IF_SUCCESS(
+      this->client_->Infer(&results, options, inputs),
+      "expect error with inference request",
+      "'INPUT1' got unexpected elements count 16, expected 15");
+
+  // Check error message and verify the request reaches the server
+  inputs[1]->SetShape({2, 8});
+  FAIL_IF_SUCCESS(
+      this->client_->Infer(&results, options, inputs),
+      "expect error with inference request",
+      "input 'INPUT0' batch size does not match other inputs for "
+      "'simple_string'");
+}
+
+REGISTER_TYPED_TEST_SUITE_P(
+    ClientInputTest, AppendRaw, SetSharedMemory, AppendString);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(
+    GRPC, ClientInputTest, tc::InferenceServerGrpcClient);
+INSTANTIATE_TYPED_TEST_SUITE_P(
+    HTTP, ClientInputTest, tc::InferenceServerHttpClient);
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py
index b070c2153..0711ef792 100755
--- a/src/python/library/tritonclient/grpc/_infer_input.py
+++ b/src/python/library/tritonclient/grpc/_infer_input.py
@@ -87,15 +87,15 @@ def shape(self):
         """
         return self._input.shape
 
-    def is_ready(self):
-        """Get the status of input.
+    def validate_data(self):
+        """Validate input has data and input shape matches input data.
 
         Returns
         -------
-        bool
-            The status of input
+        None
         """
-        # Input must set only one of the following fields: '_raw_content', 'shared_memory_region' in '_input.parameters'
+        # Input must set only one of the following fields: '_raw_content',
+        # 'shared_memory_region' in '_input.parameters'
         cnt = 0
         cnt += self._raw_content != None
         cnt += "shared_memory_region" in self._input.parameters
diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py
index d4d897300..8b40f5aa3 100755
--- a/src/python/library/tritonclient/grpc/_utils.py
+++ b/src/python/library/tritonclient/grpc/_utils.py
@@ -96,7 +96,7 @@ def _get_inference_request(
     if request_id != "":
         request.id = request_id
     for infer_input in inputs:
-        infer_input.is_ready()
+        infer_input.validate_data()
         request.inputs.extend([infer_input._get_tensor()])
         if infer_input._get_content() is not None:
             request.raw_input_contents.extend([infer_input._get_content()])
diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py
index 02bf011d6..6c0aaade5 100755
--- a/src/python/library/tritonclient/http/_infer_input.py
+++ b/src/python/library/tritonclient/http/_infer_input.py
@@ -90,15 +90,15 @@ def shape(self):
         """
         return self._shape
 
-    def is_ready(self):
-        """Get the status of input.
+    def validate_data(self):
+        """Validate input has data and input shape matches input data.
 
         Returns
         -------
-        bool
-            The status of input
+        None
         """
-        # Input must set only one of the following fields: 'data', 'binary_data_size' in 'parameters', 'shared_memory_region' in 'parameters'
+        # Input must set only one of the following fields: 'data', 'binary_data_size'
+        # in 'parameters', 'shared_memory_region' in 'parameters'
         cnt = 0
         cnt += self._data != None
         cnt += "binary_data_size" in self._parameters
diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py
index 015cdf4ec..2b1d37d76 100755
--- a/src/python/library/tritonclient/http/_utils.py
+++ b/src/python/library/tritonclient/http/_utils.py
@@ -108,7 +108,7 @@ def _get_inference_request(
 
     infer_request["inputs"] = []
     for infer_input in inputs:
-        infer_input.is_ready()
+        infer_input.validate_data()
         infer_request["inputs"].append(infer_input._get_tensor())
 
     if outputs:

From 9c2941bb51f455b10ac5398faad305ee899f77ce Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Mon, 8 Jul 2024 18:26:47 -0700
Subject: [PATCH 03/10] Update copyrights

---
 src/c++/library/CMakeLists.txt                       | 2 +-
 src/c++/library/common.cc                            | 2 +-
 src/c++/library/common.h                             | 2 +-
 src/c++/library/grpc_client.cc                       | 2 +-
 src/c++/library/http_client.cc                       | 2 +-
 src/c++/tests/CMakeLists.txt                         | 2 +-
 src/python/library/tritonclient/grpc/_infer_input.py | 2 +-
 src/python/library/tritonclient/grpc/_utils.py       | 2 +-
 src/python/library/tritonclient/http/_infer_input.py | 2 +-
 src/python/library/tritonclient/http/_utils.py       | 2 +-
 src/python/library/tritonclient/utils/__init__.py    | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt
index 62bd9bcff..7d8a7c29f 100644
--- a/src/c++/library/CMakeLists.txt
+++ b/src/c++/library/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc
index 8bf77c7d2..5e3e7c517 100644
--- a/src/c++/library/common.cc
+++ b/src/c++/library/common.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
diff --git a/src/c++/library/common.h b/src/c++/library/common.h
index e132ee51a..8ed9f7ab3 100644
--- a/src/c++/library/common.h
+++ b/src/c++/library/common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc
index d2971ead6..02a1a888e 100644
--- a/src/c++/library/grpc_client.cc
+++ b/src/c++/library/grpc_client.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc
index 55a846839..343263d6c 100644
--- a/src/c++/library/http_client.cc
+++ b/src/c++/library/http_client.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
diff --git a/src/c++/tests/CMakeLists.txt b/src/c++/tests/CMakeLists.txt
index efa4f45f3..40a3f6d61 100644
--- a/src/c++/tests/CMakeLists.txt
+++ b/src/c++/tests/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py
index 0711ef792..a20bca7e8 100755
--- a/src/python/library/tritonclient/grpc/_infer_input.py
+++ b/src/python/library/tritonclient/grpc/_infer_input.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py
index 8b40f5aa3..bbc13db4b 100755
--- a/src/python/library/tritonclient/grpc/_utils.py
+++ b/src/python/library/tritonclient/grpc/_utils.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py
index 6c0aaade5..5355ed7aa 100755
--- a/src/python/library/tritonclient/http/_infer_input.py
+++ b/src/python/library/tritonclient/http/_infer_input.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py
index 2b1d37d76..d4ffc5b70 100755
--- a/src/python/library/tritonclient/http/_utils.py
+++ b/src/python/library/tritonclient/http/_utils.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py
index b46685ef5..40dffffcf 100755
--- a/src/python/library/tritonclient/utils/__init__.py
+++ b/src/python/library/tritonclient/utils/__init__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions

From b4c6a17c06a1585611466d60021ddcaa8fb3f70c Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Tue, 9 Jul 2024 15:02:15 -0700
Subject: [PATCH 04/10] Update error msg and build deps

---
 src/c++/library/CMakeLists.txt                       | 1 +
 src/c++/library/common.cc                            | 6 +++---
 src/python/library/tritonclient/grpc/_infer_input.py | 4 ++--
 src/python/library/tritonclient/http/_infer_input.py | 4 ++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt
index 7d8a7c29f..c6b3e09e1 100644
--- a/src/c++/library/CMakeLists.txt
+++ b/src/c++/library/CMakeLists.txt
@@ -151,6 +151,7 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
       grpcclient
       PRIVATE gRPC::grpc++
       PRIVATE gRPC::grpc
+      PRIVATE triton-common-model-config
       PUBLIC protobuf::libprotobuf
       PUBLIC Threads::Threads
   )
diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc
index 5e3e7c517..26903a03a 100644
--- a/src/c++/library/common.cc
+++ b/src/c++/library/common.cc
@@ -310,7 +310,7 @@ InferInput::ValidateData() const
           triton::common::GetByteSize(datatype, shape_);
       if ((int64_t)byte_size_ != expected_byte_size) {
         return Error(
-            "'" + name_ + "' got unexpected byte size " +
+            "input '" + name_ + "' got unexpected byte size " +
             std::to_string(byte_size_) + ", expected " +
             std::to_string(expected_byte_size));
       }
@@ -325,7 +325,7 @@ InferInput::ValidateData() const
       }
       if ((int64_t)str_cnt != expected_str_cnt) {
         return Error(
-            "'" + name_ + "' got unexpected string count " +
+            "input '" + name_ + "' got unexpected string count " +
             std::to_string(str_cnt) + ", expected " +
             std::to_string(expected_str_cnt));
       }
@@ -334,7 +334,7 @@ InferInput::ValidateData() const
           triton::common::GetByteSize(datatype, shape_);
       if ((int64_t)byte_size_ != expected_byte_size) {
         return Error(
-            "'" + name_ + "' got unexpected byte size " +
+            "input '" + name_ + "' got unexpected byte size " +
             std::to_string(byte_size_) + ", expected " +
             std::to_string(expected_byte_size));
       }
diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py
index a20bca7e8..89d944dff 100755
--- a/src/python/library/tritonclient/grpc/_infer_input.py
+++ b/src/python/library/tritonclient/grpc/_infer_input.py
@@ -113,7 +113,7 @@ def validate_data(self):
                 ].int64_param
                 if data_byte_size != expected_byte_size:
                     raise_error(
-                        "'{}' got unexpected byte size {}, expected {}".format(
+                        "input '{}' got unexpected byte size {}, expected {}".format(
                             self._input.name, data_byte_size, expected_byte_size
                         )
                     )
@@ -123,7 +123,7 @@ def validate_data(self):
             data_num_elements = num_elements(self._data_shape)
             if expected_num_elements != data_num_elements:
                 raise_error(
-                    "'{}' got unexpected elements count {}, expected {}".format(
+                    "input '{}' got unexpected elements count {}, expected {}".format(
                         self._input.name, data_num_elements, expected_num_elements
                     )
                 )
diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py
index 5355ed7aa..cc9f259cf 100755
--- a/src/python/library/tritonclient/http/_infer_input.py
+++ b/src/python/library/tritonclient/http/_infer_input.py
@@ -115,7 +115,7 @@ def validate_data(self):
                 data_byte_size = self._parameters["shared_memory_byte_size"]
                 if data_byte_size != expected_byte_size:
                     raise_error(
-                        "'{}' got unexpected byte size {}, expected {}".format(
+                        "input '{}' got unexpected byte size {}, expected {}".format(
                             self._name, data_byte_size, expected_byte_size
                         )
                     )
@@ -125,7 +125,7 @@ def validate_data(self):
             data_num_elements = num_elements(self._data_shape)
             if expected_num_elements != data_num_elements:
                 raise_error(
-                    "'{}' got unexpected elements count {}, expected {}".format(
+                    "input '{}' got unexpected elements count {}, expected {}".format(
                         self._name, data_num_elements, expected_num_elements
                     )
                 )

From e5e6b7e0d39667f9bacb86fc389ba2beee86c93d Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Wed, 10 Jul 2024 00:58:04 -0700
Subject: [PATCH 05/10] Update error msg

---
 src/c++/tests/client_input_test.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc
index 0ae346afb..b6d9d7d87 100644
--- a/src/c++/tests/client_input_test.cc
+++ b/src/c++/tests/client_input_test.cc
@@ -124,7 +124,7 @@ TYPED_TEST_P(ClientInputTest, AppendRaw)
   FAIL_IF_SUCCESS(
       this->client_->Infer(&results, options, inputs),
       "expect error with inference request",
-      "'INPUT1' got unexpected byte size 64, expected 60");
+      "input 'INPUT1' got unexpected byte size 64, expected 60");
 
   // Check error message and verify the request reaches the server
   inputs[1]->SetShape({2, 8});
@@ -204,8 +204,9 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory)
   FAIL_IF_SUCCESS(
       this->client_->Infer(&results, options, inputs),
       "expect error with inference request",
-      ("'INPUT1' got unexpected byte size " + std::to_string(input_byte_size) +
-       ", expected " + std::to_string(input_byte_size - sizeof(int))));
+      ("input 'INPUT1' got unexpected byte size " +
+       std::to_string(input_byte_size) + ", expected " +
+       std::to_string(input_byte_size - sizeof(int))));
 
   // Get shared memory regions active/registered within triton
   // std::string shm_status;
@@ -270,7 +271,7 @@ TYPED_TEST_P(ClientInputTest, AppendString)
   FAIL_IF_SUCCESS(
       this->client_->Infer(&results, options, inputs),
       "expect error with inference request",
-      "'INPUT1' got unexpected elements count 16, expected 15");
+      "input 'INPUT1' got unexpected elements count 16, expected 15");
 
   // Check error message and verify the request reaches the server
   inputs[1]->SetShape({2, 8});

From 07059a64ef2e3f26bca4133aa6aa33fbd2c9afc1 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Mon, 22 Jul 2024 20:06:33 -0700
Subject: [PATCH 06/10] Remove client checks for string inputs

---
 src/c++/library/common.cc          | 109 +++--------------------------
 src/c++/library/common.h           |   5 --
 src/c++/tests/client_input_test.cc | 104 ++++++++++-----------------
 3 files changed, 47 insertions(+), 171 deletions(-)

diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc
index 26903a03a..566e3d366 100644
--- a/src/c++/library/common.cc
+++ b/src/c++/library/common.cc
@@ -236,109 +236,22 @@ InferInput::SetBinaryData(const bool binary_data)
   return Error::Success;
 }
 
-Error
-InferInput::GetStringCount(size_t* str_cnt) const
-{
-  int64_t str_checked = 0;
-  size_t remaining_str_size = 0;
-
-  size_t next_buf_idx = 0;
-  const size_t buf_cnt = bufs_.size();
-
-  const uint8_t* buf = nullptr;
-  size_t remaining_buf_size = 0;
-
-  // Validate elements until all buffers have been fully processed.
-  while (remaining_buf_size || next_buf_idx < buf_cnt) {
-    // Get the next buf if not currently processing one.
-    if (!remaining_buf_size) {
-      // Reset remaining buf size and pointers for next buf.
-      buf = bufs_[next_buf_idx];
-      remaining_buf_size = buf_byte_sizes_[next_buf_idx];
-      next_buf_idx++;
-    }
-
-    constexpr size_t kStringSizeIndicator = sizeof(uint32_t);
-    // Get the next element if not currently processing one.
-    if (!remaining_str_size) {
-      // FIXME: Assume the string element's byte size indicator is not spread
-      // across buf boundaries for simplicity. Also needs better log msg.
-      if (remaining_buf_size < kStringSizeIndicator) {
-        return Error("element byte size indicator exceeds the end of the buf.");
-      }
-
-      // Start the next element and reset the remaining element size.
-      remaining_str_size = *(reinterpret_cast<const uint32_t*>(buf));
-      str_checked++;
-
-      // Advance pointer and remainder by the indicator size.
-      buf += kStringSizeIndicator;
-      remaining_buf_size -= kStringSizeIndicator;
-    }
-
-    // If the remaining buf fits it: consume the rest of the element, proceed
-    // to the next element.
-    if (remaining_buf_size >= remaining_str_size) {
-      buf += remaining_str_size;
-      remaining_buf_size -= remaining_str_size;
-      remaining_str_size = 0;
-    }
-    // Otherwise the remaining element is larger: consume the rest of the
-    // buf, proceed to the next buf.
-    else {
-      remaining_str_size -= remaining_buf_size;
-      remaining_buf_size = 0;
-    }
-  }
-
-  // FIXME: If more than expected, should stop earlier
-  // Validate the number of processed elements exactly match expectations.
-  *str_cnt = str_checked;
-  return Error::Success;
-}
-
 Error
 InferInput::ValidateData() const
 {
   inference::DataType datatype =
       triton::common::ProtocolStringToDataType(datatype_);
-  if (io_type_ == SHARED_MEMORY) {
-    if (datatype == inference::DataType::TYPE_STRING) {
-      // TODO Didn't find any shm and BYTES inputs inference example
-    } else {
-      int64_t expected_byte_size =
-          triton::common::GetByteSize(datatype, shape_);
-      if ((int64_t)byte_size_ != expected_byte_size) {
-        return Error(
-            "input '" + name_ + "' got unexpected byte size " +
-            std::to_string(byte_size_) + ", expected " +
-            std::to_string(expected_byte_size));
-      }
-    }
-  } else {
-    if (datatype == inference::DataType::TYPE_STRING) {
-      int64_t expected_str_cnt = triton::common::GetElementCount(shape_);
-      size_t str_cnt;
-      Error err = GetStringCount(&str_cnt);
-      if (!err.IsOk()) {
-        return err;
-      }
-      if ((int64_t)str_cnt != expected_str_cnt) {
-        return Error(
-            "input '" + name_ + "' got unexpected string count " +
-            std::to_string(str_cnt) + ", expected " +
-            std::to_string(expected_str_cnt));
-      }
-    } else {
-      int64_t expected_byte_size =
-          triton::common::GetByteSize(datatype, shape_);
-      if ((int64_t)byte_size_ != expected_byte_size) {
-        return Error(
-            "input '" + name_ + "' got unexpected byte size " +
-            std::to_string(byte_size_) + ", expected " +
-            std::to_string(expected_byte_size));
-      }
-    }
+  // String inputs will be checked at core and backend to reduce overhead.
+  if (datatype == inference::DataType::TYPE_STRING) {
+    return Error::Success;
+  }
+
+  int64_t expected_byte_size = triton::common::GetByteSize(datatype, shape_);
+  if ((int64_t)byte_size_ != expected_byte_size) {
+    return Error(
+        "input '" + name_ + "' got unexpected byte size " +
+        std::to_string(byte_size_) + ", expected " +
+        std::to_string(expected_byte_size));
   }
   return Error::Success;
 }
diff --git a/src/c++/library/common.h b/src/c++/library/common.h
index 8ed9f7ab3..64717e628 100644
--- a/src/c++/library/common.h
+++ b/src/c++/library/common.h
@@ -354,11 +354,6 @@ class InferInput {
   /// \return Error object indicating success or failure.
   Error SetBinaryData(const bool binary_data);
 
-  /// Gets the total number of strings in this input data.
-  /// \param byte_size The number of strings.
-  /// \return Error object indicating success or failure.
-  Error GetStringCount(size_t* str_cnt) const;
-
   /// Validate input has data and input shape matches input data.
   /// \return Error object indicating success of failure.
   Error ValidateData() const;
diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc
index b6d9d7d87..5b973f6d5 100644
--- a/src/c++/tests/client_input_test.cc
+++ b/src/c++/tests/client_input_test.cc
@@ -120,18 +120,23 @@ TYPED_TEST_P(ClientInputTest, AppendRaw)
 
   std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
   tc::InferResult* results;
+
+  // Test 1
   inputs[1]->SetShape({1, 15});
   FAIL_IF_SUCCESS(
       this->client_->Infer(&results, options, inputs),
       "expect error with inference request",
       "input 'INPUT1' got unexpected byte size 64, expected 60");
 
-  // Check error message and verify the request reaches the server
+  // Test 2
+  inputs[0]->SetShape({2, 8});
   inputs[1]->SetShape({2, 8});
+  // Assert the request reaches the server
   FAIL_IF_SUCCESS(
       this->client_->Infer(&results, options, inputs),
       "expect error with inference request",
-      "input 'INPUT0' batch size does not match other inputs for 'simple'");
+      "unexpected shape for input 'INPUT1' for model 'simple'. Expected "
+      "[-1,16], got [2,8]");
 }
 
 TYPED_TEST_P(ClientInputTest, SetSharedMemory)
@@ -198,9 +203,10 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory)
   options.model_version_ = "";
 
   std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
-  inputs[1]->SetShape({1, 15});
-
   tc::InferResult* results;
+
+  // Test 1
+  inputs[1]->SetShape({1, 15});
   FAIL_IF_SUCCESS(
       this->client_->Infer(&results, options, inputs),
       "expect error with inference request",
@@ -208,12 +214,32 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory)
        std::to_string(input_byte_size) + ", expected " +
        std::to_string(input_byte_size - sizeof(int))));
 
+  // Test 2
+  inputs[0]->SetShape({2, 8});
+  inputs[1]->SetShape({2, 8});
+  // Assert the request reaches the server
+  FAIL_IF_SUCCESS(
+      this->client_->Infer(&results, options, inputs),
+      "expect error with inference request",
+      "unexpected shape for input 'INPUT1' for model 'simple'. Expected "
+      "[-1,16], got [2,8]");
+
   // Get shared memory regions active/registered within triton
-  // std::string shm_status;
-  // FAIL_IF_ERR(
-  //     this->client_->SystemSharedMemoryStatus(&shm_status),
-  //     "failed to get shared memory status");
-  // std::cout << "Shared Memory Status:\n" << shm_status << "\n";
+  using ClientType = TypeParam;
+  if constexpr (std::is_same<
+                    ClientType, tc::InferenceServerGrpcClient>::value) {
+    inference::SystemSharedMemoryStatusResponse shm_status;
+    FAIL_IF_ERR(
+        this->client_->SystemSharedMemoryStatus(&shm_status),
+        "failed to get shared memory status");
+    std::cout << "Shared Memory Status:\n" << shm_status.DebugString() << "\n";
+  } else {
+    std::string shm_status;
+    FAIL_IF_ERR(
+        this->client_->SystemSharedMemoryStatus(&shm_status),
+        "failed to get shared memory status");
+    std::cout << "Shared Memory Status:\n" << shm_status << "\n";
+  }
 
   // Unregister shared memory
   FAIL_IF_ERR(
@@ -225,65 +251,7 @@ TYPED_TEST_P(ClientInputTest, SetSharedMemory)
   FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
 }
 
-TYPED_TEST_P(ClientInputTest, AppendString)
-{
-  // Create the data for the two input tensors. Initialize the first
-  // to unique integers and the second to all ones. The input tensors
-  // are the string representation of these values.
-  std::vector<std::string> input0_data(16);
-  std::vector<std::string> input1_data(16);
-  for (size_t i = 0; i < 16; ++i) {
-    input0_data[i] = std::to_string(i);
-    input1_data[i] = std::to_string(1);
-  }
-
-  std::vector<int64_t> shape{1, 16};
-
-  // Initialize the inputs with the data.
-  tc::InferInput* input0;
-  tc::InferInput* input1;
-
-  FAIL_IF_ERR(
-      tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"),
-      "unable to get INPUT0");
-  std::shared_ptr<tc::InferInput> input0_ptr;
-  input0_ptr.reset(input0);
-  FAIL_IF_ERR(
-      tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"),
-      "unable to get INPUT1");
-  std::shared_ptr<tc::InferInput> input1_ptr;
-  input1_ptr.reset(input1);
-
-  FAIL_IF_ERR(
-      input0_ptr->AppendFromString(input0_data),
-      "unable to set data for INPUT0");
-  FAIL_IF_ERR(
-      input1_ptr->AppendFromString(input1_data),
-      "unable to set data for INPUT1");
-
-  // The inference settings. Will be using default for now.
-  tc::InferOptions options("simple_string");
-  options.model_version_ = "";
-
-  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
-  tc::InferResult* results;
-  input1_ptr->SetShape({1, 15});
-  FAIL_IF_SUCCESS(
-      this->client_->Infer(&results, options, inputs),
-      "expect error with inference request",
-      "input 'INPUT1' got unexpected elements count 16, expected 15");
-
-  // Check error message and verify the request reaches the server
-  inputs[1]->SetShape({2, 8});
-  FAIL_IF_SUCCESS(
-      this->client_->Infer(&results, options, inputs),
-      "expect error with inference request",
-      "input 'INPUT0' batch size does not match other inputs for "
-      "'simple_string'");
-}
-
-REGISTER_TYPED_TEST_SUITE_P(
-    ClientInputTest, AppendRaw, SetSharedMemory, AppendString);
+REGISTER_TYPED_TEST_SUITE_P(ClientInputTest, AppendRaw, SetSharedMemory);
 
 INSTANTIATE_TYPED_TEST_SUITE_P(
     GRPC, ClientInputTest, tc::InferenceServerGrpcClient);

From 2a5c507c33f3fc3ba44434e21fdfc2c1018019c8 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Tue, 30 Jul 2024 19:22:55 -0700
Subject: [PATCH 07/10] Undo C++ client checks and tests

---
 src/c++/library/CMakeLists.txt     |  12 --
 src/c++/library/common.cc          |  26 +--
 src/c++/library/common.h           |   6 +-
 src/c++/library/grpc_client.cc     |   8 +-
 src/c++/library/http_client.cc     |   7 +-
 src/c++/tests/CMakeLists.txt       |  33 +---
 src/c++/tests/client_input_test.cc | 268 -----------------------------
 7 files changed, 5 insertions(+), 355 deletions(-)
 delete mode 100644 src/c++/tests/client_input_test.cc

diff --git a/src/c++/library/CMakeLists.txt b/src/c++/library/CMakeLists.txt
index 697930f4e..7a62971e5 100644
--- a/src/c++/library/CMakeLists.txt
+++ b/src/c++/library/CMakeLists.txt
@@ -223,7 +223,6 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
       grpcclient_static
       PRIVATE gRPC::grpc++
       PRIVATE gRPC::grpc
-      PRIVATE triton-common-model-config
       PUBLIC protobuf::libprotobuf
       PUBLIC Threads::Threads
   )
@@ -252,7 +251,6 @@ if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
       grpcclient
       PRIVATE gRPC::grpc++
       PRIVATE gRPC::grpc
-      PRIVATE triton-common-model-config
       PUBLIC protobuf::libprotobuf
       PUBLIC Threads::Threads
   )
@@ -378,10 +376,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
       http-client-library EXCLUDE_FROM_ALL OBJECT
       ${REQUEST_SRCS} ${REQUEST_HDRS}
   )
-  add_dependencies(
-    http-client-library
-    proto-library
-  )
 
   if (NOT WIN32)
     set_property(
@@ -394,14 +388,12 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
     http-client-library
     PUBLIC
       triton-common-json        # from repo-common
-      triton-common-model-config
   )
 
   # libhttpclient_static.a
   add_library(
       httpclient_static STATIC
       $<TARGET_OBJECTS:http-client-library>
-      $<TARGET_OBJECTS:proto-library>
   )
   add_library(
       TritonClient::httpclient_static ALIAS httpclient_static
@@ -410,7 +402,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
   target_link_libraries(
       httpclient_static
       PRIVATE triton-common-json
-      PRIVATE triton-common-model-config
       PUBLIC CURL::libcurl
       PUBLIC Threads::Threads
   )
@@ -426,7 +417,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
   add_library(
       httpclient SHARED
       $<TARGET_OBJECTS:http-client-library>
-      $<TARGET_OBJECTS:proto-library>
   )
   add_library(
       TritonClient::httpclient ALIAS httpclient
@@ -444,7 +434,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
   target_link_libraries(
       httpclient
       PRIVATE triton-common-json
-      PRIVATE triton-common-model-config
       PUBLIC CURL::libcurl
       PUBLIC Threads::Threads
   )
@@ -470,7 +459,6 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
         $<INSTALL_INTERFACE:include>
         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
         $<TARGET_PROPERTY:CURL::libcurl,INTERFACE_INCLUDE_DIRECTORIES>
-        $<TARGET_PROPERTY:proto-library,INCLUDE_DIRECTORIES>
       PRIVATE
         ${CMAKE_CURRENT_SOURCE_DIR}
     )
diff --git a/src/c++/library/common.cc b/src/c++/library/common.cc
index 566e3d366..27da69888 100644
--- a/src/c++/library/common.cc
+++ b/src/c++/library/common.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -26,10 +26,6 @@
 
 #include "common.h"
 
-#include <numeric>
-
-#include "triton/common/model_config.h"
-
 namespace triton { namespace client {
 
 //==============================================================================
@@ -236,26 +232,6 @@ InferInput::SetBinaryData(const bool binary_data)
   return Error::Success;
 }
 
-Error
-InferInput::ValidateData() const
-{
-  inference::DataType datatype =
-      triton::common::ProtocolStringToDataType(datatype_);
-  // String inputs will be checked at core and backend to reduce overhead.
-  if (datatype == inference::DataType::TYPE_STRING) {
-    return Error::Success;
-  }
-
-  int64_t expected_byte_size = triton::common::GetByteSize(datatype, shape_);
-  if ((int64_t)byte_size_ != expected_byte_size) {
-    return Error(
-        "input '" + name_ + "' got unexpected byte size " +
-        std::to_string(byte_size_) + ", expected " +
-        std::to_string(expected_byte_size));
-  }
-  return Error::Success;
-}
-
 Error
 InferInput::PrepareForRequest()
 {
diff --git a/src/c++/library/common.h b/src/c++/library/common.h
index 64717e628..8d05b966b 100644
--- a/src/c++/library/common.h
+++ b/src/c++/library/common.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -354,10 +354,6 @@ class InferInput {
   /// \return Error object indicating success or failure.
   Error SetBinaryData(const bool binary_data);
 
-  /// Validate input has data and input shape matches input data.
-  /// \return Error object indicating success of failure.
-  Error ValidateData() const;
-
  private:
 #ifdef TRITON_INFERENCE_SERVER_CLIENT_CLASS
   friend class TRITON_INFERENCE_SERVER_CLIENT_CLASS;
diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc
index 02a1a888e..c9ee70125 100644
--- a/src/c++/library/grpc_client.cc
+++ b/src/c++/library/grpc_client.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -1470,13 +1470,7 @@ InferenceServerGrpcClient::PreRunProcessing(
 
   int index = 0;
   infer_request_.mutable_raw_input_contents()->Clear();
-  Error err;
   for (const auto input : inputs) {
-    err = input->ValidateData();
-    if (!err.IsOk()) {
-      return err;
-    }
-
     // Add new InferInputTensor submessages only if required, otherwise
     // reuse the submessages already available.
     auto grpc_input = (infer_request_.inputs().size() <= index)
diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc
index 728e49ab3..a2651f2eb 100644
--- a/src/c++/library/http_client.cc
+++ b/src/c++/library/http_client.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -2116,11 +2116,6 @@ InferenceServerHttpClient::PreRunProcessing(
   // Add the buffers holding input tensor data
   bool all_inputs_are_json{true};
   for (const auto this_input : inputs) {
-    err = this_input->ValidateData();
-    if (!err.IsOk()) {
-      return err;
-    }
-
     if (this_input->BinaryData()) {
       all_inputs_are_json = false;
     }
diff --git a/src/c++/tests/CMakeLists.txt b/src/c++/tests/CMakeLists.txt
index 40a3f6d61..81eb74271 100644
--- a/src/c++/tests/CMakeLists.txt
+++ b/src/c++/tests/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -70,9 +70,6 @@ install(
   RUNTIME DESTINATION bin
 )
 
-#
-# cc_client_test
-#
 add_executable(
   cc_client_test
   cc_client_test.cc
@@ -92,34 +89,6 @@ install(
   RUNTIME DESTINATION bin
 )
 
-#
-# client_input_test
-#
-add_executable(
-  client_input_test
-  client_input_test.cc
-  $<TARGET_OBJECTS:shm-utils-library>
-)
-target_include_directories(
-  client_input_test
-  PRIVATE
-    ${GTEST_INCLUDE_DIRS}
-)
-target_link_libraries(
-  client_input_test
-  PRIVATE
-    grpcclient_static
-    httpclient_static
-    gtest
-    ${GTEST_LIBRARY}
-    ${GTEST_MAIN_LIBRARY}
-    GTest::gmock
-)
-install(
-  TARGETS client_input_test
-  RUNTIME DESTINATION bin
-)
-
 endif() # TRITON_ENABLE_CC_HTTP AND TRITON_ENABLE_CC_GRPC
 
 endif()
diff --git a/src/c++/tests/client_input_test.cc b/src/c++/tests/client_input_test.cc
deleted file mode 100644
index 5b973f6d5..000000000
--- a/src/c++/tests/client_input_test.cc
+++ /dev/null
@@ -1,268 +0,0 @@
-// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gmock/gmock.h"
-#include "grpc_client.h"
-#include "gtest/gtest.h"
-#include "http_client.h"
-#include "shm_utils.h"
-
-namespace tc = triton::client;
-
-#define FAIL_IF_ERR(X, MSG)                                        \
-  do {                                                             \
-    tc::Error err = (X);                                           \
-    if (!err.IsOk()) {                                             \
-      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
-      exit(1);                                                     \
-    }                                                              \
-  } while (false)
-
-#define FAIL_IF_SUCCESS(X, MSG, ERR_MSG)                       \
-  do {                                                         \
-    tc::Error err = (X);                                       \
-    ASSERT_FALSE(err.IsOk()) << "error: " << (MSG) << ": ";    \
-    ASSERT_THAT(err.Message(), ::testing::HasSubstr(ERR_MSG)); \
-  } while (false)
-
-namespace {
-
-template <typename ClientType>
-class ClientInputTest : public ::testing::Test {
- public:
-  ClientInputTest() : shape_{1, 16} {}
-
-  void SetUp() override
-  {
-    std::string url;
-    std::string client_type;
-    if (std::is_same<ClientType, tc::InferenceServerGrpcClient>::value) {
-      url = "localhost:8001";
-      client_type = "GRPC";
-    } else if (std::is_same<ClientType, tc::InferenceServerHttpClient>::value) {
-      url = "localhost:8000";
-      client_type = "HTTP";
-    } else {
-      ASSERT_TRUE(false) << "Unrecognized client class type '"
-                         << typeid(ClientType).name() << "'";
-    }
-    auto err = ClientType::Create(&this->client_, url);
-    ASSERT_TRUE(err.IsOk())
-        << "failed to create " << client_type << " client: " << err.Message();
-
-    // Initialize vector input_data_
-    for (size_t i = 0; i < 16; ++i) {
-      this->input_data_.emplace_back(i);
-    }
-  }
-
-  std::unique_ptr<ClientType> client_;
-  std::vector<int32_t> input_data_;
-  std::vector<int64_t> shape_;
-};
-
-TYPED_TEST_SUITE_P(ClientInputTest);
-
-TYPED_TEST_P(ClientInputTest, AppendRaw)
-{
-  // Initialize the inputs with the data.
-  tc::InferInput* input0;
-  tc::InferInput* input1;
-
-  FAIL_IF_ERR(
-      tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"),
-      "unable to get INPUT0");
-  std::shared_ptr<tc::InferInput> input0_ptr;
-  input0_ptr.reset(input0);
-  FAIL_IF_ERR(
-      tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"),
-      "unable to get INPUT1");
-  std::shared_ptr<tc::InferInput> input1_ptr;
-  input1_ptr.reset(input1);
-
-  FAIL_IF_ERR(
-      input0_ptr->AppendRaw(
-          reinterpret_cast<uint8_t*>(&(this->input_data_[0])),
-          this->input_data_.size() * sizeof(int32_t)),
-      "unable to set data for INPUT0");
-  FAIL_IF_ERR(
-      input1_ptr->AppendRaw(
-          reinterpret_cast<uint8_t*>(&(this->input_data_[0])),
-          this->input_data_.size() * sizeof(int32_t)),
-      "unable to set data for INPUT1");
-
-  // The inference settings. Will be using default for now.
-  tc::InferOptions options("simple");
-  options.model_version_ = "";
-
-  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
-  tc::InferResult* results;
-
-  // Test 1
-  inputs[1]->SetShape({1, 15});
-  FAIL_IF_SUCCESS(
-      this->client_->Infer(&results, options, inputs),
-      "expect error with inference request",
-      "input 'INPUT1' got unexpected byte size 64, expected 60");
-
-  // Test 2
-  inputs[0]->SetShape({2, 8});
-  inputs[1]->SetShape({2, 8});
-  // Assert the request reaches the server
-  FAIL_IF_SUCCESS(
-      this->client_->Infer(&results, options, inputs),
-      "expect error with inference request",
-      "unexpected shape for input 'INPUT1' for model 'simple'. Expected "
-      "[-1,16], got [2,8]");
-}
-
-TYPED_TEST_P(ClientInputTest, SetSharedMemory)
-{
-  // Unregistering all shared memory regions for a clean
-  // start.
-  FAIL_IF_ERR(
-      this->client_->UnregisterSystemSharedMemory(),
-      "unable to unregister all system shared memory regions");
-  FAIL_IF_ERR(
-      this->client_->UnregisterCudaSharedMemory(),
-      "unable to unregister all cuda shared memory regions");
-
-  // Initialize the inputs with the data.
-  tc::InferInput* input0;
-  tc::InferInput* input1;
-  size_t input_byte_size = 64;
-
-  FAIL_IF_ERR(
-      tc::InferInput::Create(&input0, "INPUT0", this->shape_, "INT32"),
-      "unable to get INPUT0");
-  std::shared_ptr<tc::InferInput> input0_ptr;
-  input0_ptr.reset(input0);
-  FAIL_IF_ERR(
-      tc::InferInput::Create(&input1, "INPUT1", this->shape_, "INT32"),
-      "unable to get INPUT1");
-  std::shared_ptr<tc::InferInput> input1_ptr;
-  input1_ptr.reset(input1);
-
-  // Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
-  // integers and Input1 to all ones.
-  std::string shm_key = "/input_simple";
-  int shm_fd_ip, *input0_shm;
-  FAIL_IF_ERR(
-      tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
-      "");
-  FAIL_IF_ERR(
-      tc::MapSharedMemory(
-          shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
-      "");
-  FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
-  int* input1_shm = (int*)(input0_shm + 16);
-  for (size_t i = 0; i < 16; ++i) {
-    *(input0_shm + i) = i;
-    *(input1_shm + i) = 1;
-  }
-
-  FAIL_IF_ERR(
-      this->client_->RegisterSystemSharedMemory(
-          "input_data", shm_key, input_byte_size * 2),
-      "failed to register input shared memory region");
-
-  FAIL_IF_ERR(
-      input0_ptr->SetSharedMemory(
-          "input_data", input_byte_size, 0 /* offset */),
-      "unable to set shared memory for INPUT0");
-  FAIL_IF_ERR(
-      input1_ptr->SetSharedMemory(
-          "input_data", input_byte_size, input_byte_size /* offset */),
-      "unable to set shared memory for INPUT1");
-
-  // The inference settings. Will be using default for now.
-  tc::InferOptions options("simple");
-  options.model_version_ = "";
-
-  std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
-  tc::InferResult* results;
-
-  // Test 1
-  inputs[1]->SetShape({1, 15});
-  FAIL_IF_SUCCESS(
-      this->client_->Infer(&results, options, inputs),
-      "expect error with inference request",
-      ("input 'INPUT1' got unexpected byte size " +
-       std::to_string(input_byte_size) + ", expected " +
-       std::to_string(input_byte_size - sizeof(int))));
-
-  // Test 2
-  inputs[0]->SetShape({2, 8});
-  inputs[1]->SetShape({2, 8});
-  // Assert the request reaches the server
-  FAIL_IF_SUCCESS(
-      this->client_->Infer(&results, options, inputs),
-      "expect error with inference request",
-      "unexpected shape for input 'INPUT1' for model 'simple'. Expected "
-      "[-1,16], got [2,8]");
-
-  // Get shared memory regions active/registered within triton
-  using ClientType = TypeParam;
-  if constexpr (std::is_same<
-                    ClientType, tc::InferenceServerGrpcClient>::value) {
-    inference::SystemSharedMemoryStatusResponse shm_status;
-    FAIL_IF_ERR(
-        this->client_->SystemSharedMemoryStatus(&shm_status),
-        "failed to get shared memory status");
-    std::cout << "Shared Memory Status:\n" << shm_status.DebugString() << "\n";
-  } else {
-    std::string shm_status;
-    FAIL_IF_ERR(
-        this->client_->SystemSharedMemoryStatus(&shm_status),
-        "failed to get shared memory status");
-    std::cout << "Shared Memory Status:\n" << shm_status << "\n";
-  }
-
-  // Unregister shared memory
-  FAIL_IF_ERR(
-      this->client_->UnregisterSystemSharedMemory("input_data"),
-      "unable to unregister shared memory input region");
-
-  // Cleanup shared memory
-  FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
-  FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
-}
-
-REGISTER_TYPED_TEST_SUITE_P(ClientInputTest, AppendRaw, SetSharedMemory);
-
-INSTANTIATE_TYPED_TEST_SUITE_P(
-    GRPC, ClientInputTest, tc::InferenceServerGrpcClient);
-INSTANTIATE_TYPED_TEST_SUITE_P(
-    HTTP, ClientInputTest, tc::InferenceServerHttpClient);
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}

From 6b56c3b394fb57bf539cf68d859d8d422036a24f Mon Sep 17 00:00:00 2001
From: Yingge He <157551214+yinggeh@users.noreply.github.com>
Date: Tue, 30 Jul 2024 19:24:42 -0700
Subject: [PATCH 08/10] Update
 src/python/library/tritonclient/http/_infer_input.py

Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
---
 src/python/library/tritonclient/http/_infer_input.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py
index cc9f259cf..af650d3ed 100755
--- a/src/python/library/tritonclient/http/_infer_input.py
+++ b/src/python/library/tritonclient/http/_infer_input.py
@@ -97,8 +97,8 @@ def validate_data(self):
         -------
         None
         """
-        # Input must set only one of the following fields: 'data', 'binary_data_size'
-        # in 'parameters', 'shared_memory_region' in 'parameters'
+        # Input must set only one of the following fields: 'data', 'binary_data_size',
+        # 'shared_memory_region' in 'parameters'
         cnt = 0
         cnt += self._data != None
         cnt += "binary_data_size" in self._parameters

From a58474193129aca58df8b6123d45bb8f5021a44a Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 2 Aug 2024 14:46:35 -0700
Subject: [PATCH 09/10] Workaround with L0_trt_reformat_free by removing shm
 checks

---
 .../library/tritonclient/grpc/_infer_input.py | 34 ++++++-------------
 .../library/tritonclient/http/_infer_input.py | 32 ++++++-----------
 2 files changed, 22 insertions(+), 44 deletions(-)

diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py
index 89d944dff..d0975b31f 100755
--- a/src/python/library/tritonclient/grpc/_infer_input.py
+++ b/src/python/library/tritonclient/grpc/_infer_input.py
@@ -102,31 +102,19 @@ def validate_data(self):
         if cnt != 1:
             return
 
+        # Skip due to trt reformat free tensor
         if "shared_memory_region" in self._input.parameters:
-            # Using shared memory
-            if self._input.datatype != "BYTES":
-                expected_byte_size = num_elements(
-                    self._input.shape
-                ) * get_data_type_byte_size(self._input.datatype)
-                data_byte_size = self._input.parameters[
-                    "shared_memory_byte_size"
-                ].int64_param
-                if data_byte_size != expected_byte_size:
-                    raise_error(
-                        "input '{}' got unexpected byte size {}, expected {}".format(
-                            self._input.name, data_byte_size, expected_byte_size
-                        )
-                    )
-        else:
-            # Not using shared memory
-            expected_num_elements = num_elements(self._input.shape)
-            data_num_elements = num_elements(self._data_shape)
-            if expected_num_elements != data_num_elements:
-                raise_error(
-                    "input '{}' got unexpected elements count {}, expected {}".format(
-                        self._input.name, data_num_elements, expected_num_elements
-                    )
+            return
+
+        # Not using shared memory
+        expected_num_elements = num_elements(self._input.shape)
+        data_num_elements = num_elements(self._data_shape)
+        if expected_num_elements != data_num_elements:
+            raise_error(
+                "input '{}' got unexpected elements count {}, expected {}".format(
+                    self._input.name, data_num_elements, expected_num_elements
                 )
+            )
         return
 
     def set_shape(self, shape):
diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py
index af650d3ed..e0d3f19fb 100755
--- a/src/python/library/tritonclient/http/_infer_input.py
+++ b/src/python/library/tritonclient/http/_infer_input.py
@@ -106,29 +106,19 @@ def validate_data(self):
         if cnt != 1:
             return
 
+        # Skip due to trt reformat free tensor
         if "shared_memory_region" in self._parameters:
-            # Using shared memory
-            if self._datatype != "BYTES":
-                expected_byte_size = num_elements(
-                    self._shape
-                ) * get_data_type_byte_size(self._datatype)
-                data_byte_size = self._parameters["shared_memory_byte_size"]
-                if data_byte_size != expected_byte_size:
-                    raise_error(
-                        "input '{}' got unexpected byte size {}, expected {}".format(
-                            self._name, data_byte_size, expected_byte_size
-                        )
-                    )
-        else:
-            # Not using shared memory
-            expected_num_elements = num_elements(self._shape)
-            data_num_elements = num_elements(self._data_shape)
-            if expected_num_elements != data_num_elements:
-                raise_error(
-                    "input '{}' got unexpected elements count {}, expected {}".format(
-                        self._name, data_num_elements, expected_num_elements
-                    )
+            return
+
+        # Not using shared memory
+        expected_num_elements = num_elements(self._shape)
+        data_num_elements = num_elements(self._data_shape)
+        if expected_num_elements != data_num_elements:
+            raise_error(
+                "input '{}' got unexpected elements count {}, expected {}".format(
+                    self._name, data_num_elements, expected_num_elements
                 )
+            )
         return
 
     def set_shape(self, shape):

From 5889b8ed7e85a860e6991fbf5e042d3dd8c10b08 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Mon, 5 Aug 2024 10:28:35 -0700
Subject: [PATCH 10/10] Remove unused function

---
 .../library/tritonclient/grpc/_infer_input.py |  2 +-
 .../library/tritonclient/http/_infer_input.py |  1 -
 .../library/tritonclient/utils/__init__.py    | 48 -------------------
 3 files changed, 1 insertion(+), 50 deletions(-)

diff --git a/src/python/library/tritonclient/grpc/_infer_input.py b/src/python/library/tritonclient/grpc/_infer_input.py
index d0975b31f..559f60889 100755
--- a/src/python/library/tritonclient/grpc/_infer_input.py
+++ b/src/python/library/tritonclient/grpc/_infer_input.py
@@ -30,7 +30,7 @@
 from tritonclient.grpc import service_pb2
 from tritonclient.utils import *
 
-from ._utils import get_data_type_byte_size, num_elements, raise_error
+from ._utils import num_elements, raise_error
 
 
 class InferInput:
diff --git a/src/python/library/tritonclient/http/_infer_input.py b/src/python/library/tritonclient/http/_infer_input.py
index e0d3f19fb..f237edcfa 100755
--- a/src/python/library/tritonclient/http/_infer_input.py
+++ b/src/python/library/tritonclient/http/_infer_input.py
@@ -27,7 +27,6 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import numpy as np
 from tritonclient.utils import (
-    get_data_type_byte_size,
     np_to_triton_dtype,
     num_elements,
     raise_error,
diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py
index 40dffffcf..304bbfec9 100755
--- a/src/python/library/tritonclient/utils/__init__.py
+++ b/src/python/library/tritonclient/utils/__init__.py
@@ -211,54 +211,6 @@ def triton_to_np_dtype(dtype):
     return None
 
 
-def get_data_type_byte_size(dtype):
-    """
-    Get the size of a given datatype in bytes.
-
-    Parameters
-    ----------
-    dtype : str
-        The data-type
-
-    Returns
-    -------
-    int
-        The size in bytes of the datatype, or 0 if size cannot be determined
-        (for example, values of type BYTES have variable length and so size
-        cannot be determine just from the type)
-    """
-
-    if dtype == "BOOL":
-        return 1
-    elif dtype == "INT8":
-        return 1
-    elif dtype == "INT16":
-        return 2
-    elif dtype == "INT32":
-        return 4
-    elif dtype == "INT64":
-        return 8
-    elif dtype == "UINT8":
-        return 1
-    elif dtype == "UINT16":
-        return 2
-    elif dtype == "UINT32":
-        return 4
-    elif dtype == "UINT64":
-        return 8
-    elif dtype == "FP16":
-        return 2
-    elif dtype == "FP32":
-        return 4
-    elif dtype == "FP64":
-        return 8
-    elif dtype == "BYTES":
-        return 0
-    elif dtype == "BF16":
-        return 2
-    return 0
-
-
 def serialize_byte_tensor(input_tensor):
     """
     Serializes a bytes tensor into a flat numpy array of length prepended