From 80246216bb8a03213aca2d3a3f09db9279c6ea01 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Fri, 12 Jul 2024 15:55:26 +0530
Subject: [PATCH 1/7] Update

---
 src/instance_state.cc | 70 +++++++++++++++++++++++++++++++++++++++----
 src/instance_state.h  |  4 ++-
 src/model_state.cc    | 46 +++++++++++++++++++++-------
 src/model_state.h     |  4 +--
 4 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/src/instance_state.cc b/src/instance_state.cc
index 653bd4f..b26f3da 100644
--- a/src/instance_state.cc
+++ b/src/instance_state.cc
@@ -723,11 +723,12 @@ ModelInstanceState::Run(
       TRITONSERVER_DataType datatype;
       const int64_t* shape;
       uint32_t dims_count;
+      size_t req_data_byte_size;
       FAIL_ALL_AND_RETURN_IF_ERROR(
           payload_->requests_, payload_->request_count_, payload_->responses_,
           TRITONBACKEND_InputProperties(
-              repr_input, nullptr, &datatype, &shape, &dims_count, nullptr,
-              nullptr),
+              repr_input, nullptr, &datatype, &shape, &dims_count,
+              &req_data_byte_size, nullptr),
           (std::string("failed to obtain the representative input "
                        "properties for '") +
            name + "'")
@@ -766,6 +767,19 @@ ModelInstanceState::Run(
              (batchn_shape[io_binding_info.GetFormat().vectorized_dim_] %
               io_binding_info.GetFormat().components_per_element_));
         total_byte_size = GetByteSize(datatype, batchn_shape);
+        if (req_data_byte_size != total_byte_size) {
+          FAIL_ALL_AND_RETURN_IF_ERROR(
+              payload_->requests_, payload_->request_count_,
+              payload_->responses_,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("tensor for input '") + name +
+                   "' expected byte size is " +
+                   std::to_string(total_byte_size) + ", got " +
+                   std::to_string(req_data_byte_size))
+                      .c_str()),
+              "failed to run TRT inference");
+        }
       }
 
       payload_->collector_->ProcessTensor(
@@ -1760,7 +1774,8 @@ ModelInstanceState::ValidateIO()
 {
   // Collect all the expected input and allowed output tensor names
   // and validate that the model configuration specifies only those.
-  std::set<std::string> allowed_inputs, allowed_outputs, allowed_shape_tensors;
+  std::set<std::string> allowed_inputs, allowed_outputs, allowed_shape_tensors,
+      allowed_reformat_free_tensors;
   for (int i = 0; i < total_io_tensors_; ++i) {
     const std::string& tensor_name = tensor_names_[i];
     if (IsInput(engine_.get(), tensor_name)) {
@@ -1775,6 +1790,14 @@ ModelInstanceState::ValidateIO()
                                      " as shape binding for " + Name())
                                         .c_str());
     }
+    if (engine_->getTensorFormat(tensor_name.c_str()) !=
+        nvinfer1::TensorFormat::kLINEAR) {
+      allowed_reformat_free_tensors.emplace(tensor_name);
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_VERBOSE, (std::string("Detected ") + tensor_name +
+                                     " as a reformat free tensor for " + Name())
+                                        .c_str());
+    }
   }
 
   triton::common::TritonJson::Value config_inputs;
@@ -1808,9 +1831,11 @@ ModelInstanceState::ValidateIO()
   }
 
   RETURN_IF_ERROR(ValidateIOHelper(
-      config_inputs, allowed_shape_tensors, true /* is_input */));
+      config_inputs, allowed_shape_tensors, allowed_reformat_free_tensors,
+      true /* is_input */));
   RETURN_IF_ERROR(ValidateIOHelper(
-      config_outputs, allowed_shape_tensors, false /* is_input */));
+      config_outputs, allowed_shape_tensors, allowed_reformat_free_tensors,
+      false /* is_input */));
 
   return nullptr;
 }
@@ -1818,7 +1843,9 @@ ModelInstanceState::ValidateIO()
 TRITONSERVER_Error*
 ModelInstanceState::ValidateIOHelper(
     common::TritonJson::Value& ios,
-    const std::set<std::string>& allowed_shape_tensors, const bool is_input)
+    const std::set<std::string>& allowed_shape_tensors,
+    const std::set<std::string>& allowed_reformat_free_tensors,
+    const bool is_input)
 {
   std::string type = is_input ? "input" : "output";
   for (size_t i = 0; i < ios.ArraySize(); i++) {
@@ -1865,6 +1892,37 @@ ModelInstanceState::ValidateIOHelper(
                 .c_str());
       }
     }
+
+    // Check the reformat free tensor specification
+    if (allowed_reformat_free_tensors.find(io_name) !=
+        allowed_reformat_free_tensors.end()) {
+      bool is_reformat_free_tensor = false;
+      RETURN_IF_ERROR(
+          io.MemberAsBool("is_reformat_free_tensor", &is_reformat_free_tensor));
+      if (!is_reformat_free_tensor) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            (type + " '" + io_name + "' for model '" + model_state_->Name() +
+             "' is a reformat free tensor but the model configuration "
+             "doesn't mark it as a reformat free tensor. Set "
+             "'is_reformat_free_tensor' to "
+             "true for " +
+             type + " '" + io_name + "'.")
+                .c_str());
+      }
+    } else {
+      bool is_reformat_free_tensor = false;
+      RETURN_IF_ERROR(
+          io.MemberAsBool("is_reformat_free_tensor", &is_reformat_free_tensor));
+      if (is_reformat_free_tensor) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            (type + " '" + io_name + "' for model '" + model_state_->Name() +
+             "' is incorrectly marked as a reformat free tensor in the model "
+             "configuration.")
+                .c_str());
+      }
+    }
   }
 
   return nullptr;
diff --git a/src/instance_state.h b/src/instance_state.h
index 1564242..a01a720 100644
--- a/src/instance_state.h
+++ b/src/instance_state.h
@@ -295,7 +295,9 @@ class ModelInstanceState : public TensorRTModelInstance {
   TRITONSERVER_Error* ValidateIO();
   TRITONSERVER_Error* ValidateIOHelper(
       common::TritonJson::Value& ios,
-      const std::set<std::string>& allowed_shape_tensors, const bool is_input);
+      const std::set<std::string>& allowed_shape_tensors,
+      const std::set<std::string>& allowed_reformat_free_tensors,
+      const bool is_input);
 
   TRITONSERVER_Error* InitIOBindingBuffers();
   TRITONSERVER_Error* InitializeConfigShapeInputBindings(
diff --git a/src/model_state.cc b/src/model_state.cc
index 5a5d0e7..ac1737d 100644
--- a/src/model_state.cc
+++ b/src/model_state.cc
@@ -754,7 +754,10 @@ ModelState::GetRefIO(
   for (int i = 0; i < num_io_tensors; ++i) {
     const std::string& tensor_name = engine->getIOTensorName(i);
     nvinfer1::Dims dims = engine->getTensorShape(tensor_name.c_str());
-    bool is_shape_binding = engine->isShapeInferenceIO(tensor_name.c_str());
+    bool is_shape_tensor = engine->isShapeInferenceIO(tensor_name.c_str());
+    bool is_reformat_free_tensor =
+        (engine->getTensorFormat(tensor_name.c_str()) !=
+         nvinfer1::TensorFormat::kLINEAR);
     if ((is_input && (!IsInput(engine, tensor_name))) ||
         ((!is_input) && (IsInput(engine, tensor_name)))) {
       continue;
@@ -766,8 +769,10 @@ ModelState::GetRefIO(
     RETURN_IF_ERROR(io.AddString(
         "data_type", ConvertTrtTypeToConfigDataType(
                          engine->getTensorDataType(tensor_name.c_str()))));
-    RETURN_IF_ERROR(InitIODims(engine, dims, is_shape_binding, &io));
-    RETURN_IF_ERROR(io.AddBool("is_shape_tensor", is_shape_binding));
+    RETURN_IF_ERROR(InitIODims(engine, dims, is_shape_tensor, &io));
+    RETURN_IF_ERROR(io.AddBool("is_shape_tensor", is_shape_tensor));
+    RETURN_IF_ERROR(
+        io.AddBool("is_reformat_free_tensor", is_reformat_free_tensor));
 
     RETURN_IF_ERROR(ref_io->Append(std::move(io)));
   }
@@ -777,13 +782,13 @@ ModelState::GetRefIO(
 
 TRITONSERVER_Error*
 ModelState::InitIODims(
-    nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_binding,
+    nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_tensor,
     triton::common::TritonJson::Value* io)
 {
   bool skip_first = (MaxBatchSize() != 0);
   triton::common::TritonJson::Value config_dims(
       ModelConfig(), triton::common::TritonJson::ValueType::ARRAY);
-  if (!is_shape_binding) {
+  if (!is_shape_tensor) {
     for (int didx = (skip_first ? 1 : 0); didx < dims.nbDims; ++didx) {
       RETURN_IF_ERROR(config_dims.AppendInt(dims.d[didx]));
     }
@@ -871,8 +876,7 @@ ModelState::FixIO(
           }
 
           // Check if the IO is a shape tensor.
-          bool is_shape_tensor = false;
-          is_shape_tensor = engine->isShapeInferenceIO(io_name.c_str());
+          bool is_shape_tensor = engine->isShapeInferenceIO(io_name.c_str());
 
           common::TritonJson::Value shape_tensor;
           if (mutable_io.Find("is_shape_tensor", &shape_tensor)) {
@@ -885,15 +889,37 @@ ModelState::FixIO(
                    "' is incorrectly specified as a shape tensor.")
                       .c_str());
             } else if (!shape_tensor_val && is_shape_tensor) {
+              RETURN_IF_ERROR(shape_tensor.SetBool(is_shape_tensor));
+            }
+          } else {
+            RETURN_IF_ERROR(
+                mutable_io.AddBool("is_shape_tensor", is_shape_tensor));
+          }
+
+          // Check if the IO is a reformat free tensor.
+          bool is_reformat_free_tensor =
+              (engine->getTensorFormat(io_name.c_str()) !=
+               nvinfer1::TensorFormat::kLINEAR);
+
+          common::TritonJson::Value reformat_free_tensor;
+          if (mutable_io.Find(
+                  "is_reformat_free_tensor", &reformat_free_tensor)) {
+            bool reformat_free_tensor_val = false;
+            RETURN_IF_ERROR(
+                reformat_free_tensor.AsBool(&reformat_free_tensor_val));
+            if (reformat_free_tensor_val && (!is_reformat_free_tensor)) {
               return TRITONSERVER_ErrorNew(
                   TRITONSERVER_ERROR_INVALID_ARG,
                   (std::string("'") + io_name +
-                   "' is incorrectly specified as an execution tensor.")
+                   "' is incorrectly specified as a reformat free tensor.")
                       .c_str());
+            } else if (!reformat_free_tensor_val && is_reformat_free_tensor) {
+              RETURN_IF_ERROR(
+                  reformat_free_tensor.SetBool(is_reformat_free_tensor));
             }
           } else {
-            RETURN_IF_ERROR(
-                mutable_io.AddBool("is_shape_tensor", is_shape_tensor));
+            RETURN_IF_ERROR(mutable_io.AddBool(
+                "is_reformat_free_tensor", is_reformat_free_tensor));
           }
           break;
         }
diff --git a/src/model_state.h b/src/model_state.h
index f3fa646..b132806 100644
--- a/src/model_state.h
+++ b/src/model_state.h
@@ -109,8 +109,8 @@ class ModelState : public TensorRTModel {
       const bool is_input, nvinfer1::ICudaEngine* engine,
       triton::common::TritonJson::Value* ref_io);
   TRITONSERVER_Error* InitIODims(
-      nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims,
-      bool is_shape_binding, triton::common::TritonJson::Value* io);
+      nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_tensor,
+      triton::common::TritonJson::Value* io);
   TRITONSERVER_Error* FixIO(
       nvinfer1::ICudaEngine* engine,
       triton::common::TritonJson::Value& reference_ios,

From a4bfb14f99ebd1daa2ef780716583dd35005152b Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Sat, 13 Jul 2024 20:11:26 +0530
Subject: [PATCH 2/7] Update flag name

---
 src/instance_state.cc | 57 ++++++++++++++++++++++++-------------------
 src/instance_state.h  |  2 +-
 src/model_state.cc    | 27 ++++++++++----------
 src/tensorrt_utils.cc | 33 +++++++++++++++++++++++++
 src/tensorrt_utils.h  |  2 ++
 5 files changed, 82 insertions(+), 39 deletions(-)

diff --git a/src/instance_state.cc b/src/instance_state.cc
index b26f3da..e6f0ff9 100644
--- a/src/instance_state.cc
+++ b/src/instance_state.cc
@@ -761,12 +761,17 @@ ModelInstanceState::Run(
       size_t total_byte_size = 0;
       if (io_binding_info.GetFormat().is_linear_format_) {
         total_byte_size = GetByteSize(datatype, batchn_shape);
+        // For input tensors with a linear IO format, the request has already
+        // verified the byte size, so no further validation is needed here.
       } else {
         batchn_shape[io_binding_info.GetFormat().vectorized_dim_] +=
             (io_binding_info.GetFormat().components_per_element_ -
              (batchn_shape[io_binding_info.GetFormat().vectorized_dim_] %
               io_binding_info.GetFormat().components_per_element_));
         total_byte_size = GetByteSize(datatype, batchn_shape);
+
+        // Ensure the request data byte size matches the expected byte size for
+        // non-linear IO format tensors
         if (req_data_byte_size != total_byte_size) {
           FAIL_ALL_AND_RETURN_IF_ERROR(
               payload_->requests_, payload_->request_count_,
@@ -1775,7 +1780,7 @@ ModelInstanceState::ValidateIO()
   // Collect all the expected input and allowed output tensor names
   // and validate that the model configuration specifies only those.
   std::set<std::string> allowed_inputs, allowed_outputs, allowed_shape_tensors,
-      allowed_reformat_free_tensors;
+      allowed_non_linear_format_io;
   for (int i = 0; i < total_io_tensors_; ++i) {
     const std::string& tensor_name = tensor_names_[i];
     if (IsInput(engine_.get(), tensor_name)) {
@@ -1790,13 +1795,14 @@ ModelInstanceState::ValidateIO()
                                      " as shape binding for " + Name())
                                         .c_str());
     }
-    if (engine_->getTensorFormat(tensor_name.c_str()) !=
-        nvinfer1::TensorFormat::kLINEAR) {
-      allowed_reformat_free_tensors.emplace(tensor_name);
+    auto detected_io_format = engine_->getTensorFormat(tensor_name.c_str());
+    if (detected_io_format != nvinfer1::TensorFormat::kLINEAR) {
+      allowed_non_linear_format_io.emplace(tensor_name);
       LOG_MESSAGE(
-          TRITONSERVER_LOG_VERBOSE, (std::string("Detected ") + tensor_name +
-                                     " as a reformat free tensor for " + Name())
-                                        .c_str());
+          TRITONSERVER_LOG_VERBOSE,
+          (std::string("Detected ") + tensor_name + " using IO format " +
+           TensorFormatToString(detected_io_format) + " for " + Name())
+              .c_str());
     }
   }
 
@@ -1831,10 +1837,10 @@ ModelInstanceState::ValidateIO()
   }
 
   RETURN_IF_ERROR(ValidateIOHelper(
-      config_inputs, allowed_shape_tensors, allowed_reformat_free_tensors,
+      config_inputs, allowed_shape_tensors, allowed_non_linear_format_io,
       true /* is_input */));
   RETURN_IF_ERROR(ValidateIOHelper(
-      config_outputs, allowed_shape_tensors, allowed_reformat_free_tensors,
+      config_outputs, allowed_shape_tensors, allowed_non_linear_format_io,
       false /* is_input */));
 
   return nullptr;
@@ -1844,7 +1850,7 @@ TRITONSERVER_Error*
 ModelInstanceState::ValidateIOHelper(
     common::TritonJson::Value& ios,
     const std::set<std::string>& allowed_shape_tensors,
-    const std::set<std::string>& allowed_reformat_free_tensors,
+    const std::set<std::string>& allowed_non_linear_format_io,
     const bool is_input)
 {
   std::string type = is_input ? "input" : "output";
@@ -1893,33 +1899,34 @@ ModelInstanceState::ValidateIOHelper(
       }
     }
 
-    // Check the reformat free tensor specification
-    if (allowed_reformat_free_tensors.find(io_name) !=
-        allowed_reformat_free_tensors.end()) {
-      bool is_reformat_free_tensor = false;
+    // Check the tensor IO format specification
+    if (allowed_non_linear_format_io.find(io_name) !=
+        allowed_non_linear_format_io.end()) {
+      bool is_non_linear_format_io = false;
       RETURN_IF_ERROR(
-          io.MemberAsBool("is_reformat_free_tensor", &is_reformat_free_tensor));
-      if (!is_reformat_free_tensor) {
+          io.MemberAsBool("is_non_linear_format_io", &is_non_linear_format_io));
+      if (!is_non_linear_format_io) {
         return TRITONSERVER_ErrorNew(
             TRITONSERVER_ERROR_INTERNAL,
             (type + " '" + io_name + "' for model '" + model_state_->Name() +
-             "' is a reformat free tensor but the model configuration "
-             "doesn't mark it as a reformat free tensor. Set "
-             "'is_reformat_free_tensor' to "
-             "true for " +
+             "' uses a non-linear IO format, but the model configuration "
+             "does not specify it as such. Set "
+             "'is_non_linear_format_io' to true for " +
              type + " '" + io_name + "'.")
                 .c_str());
       }
     } else {
-      bool is_reformat_free_tensor = false;
+      bool is_non_linear_format_io = false;
       RETURN_IF_ERROR(
-          io.MemberAsBool("is_reformat_free_tensor", &is_reformat_free_tensor));
-      if (is_reformat_free_tensor) {
+          io.MemberAsBool("is_non_linear_format_io", &is_non_linear_format_io));
+      if (is_non_linear_format_io) {
         return TRITONSERVER_ErrorNew(
             TRITONSERVER_ERROR_INTERNAL,
             (type + " '" + io_name + "' for model '" + model_state_->Name() +
-             "' is incorrectly marked as a reformat free tensor in the model "
-             "configuration.")
+             "' uses a linear IO format, but 'is_non_linear_format_io' is "
+             "incorrectly set to true. Set "
+             "'is_non_linear_format_io' to false for " +
+             type + " '" + io_name + "'.")
                 .c_str());
       }
     }
diff --git a/src/instance_state.h b/src/instance_state.h
index a01a720..d3eb1ee 100644
--- a/src/instance_state.h
+++ b/src/instance_state.h
@@ -296,7 +296,7 @@ class ModelInstanceState : public TensorRTModelInstance {
   TRITONSERVER_Error* ValidateIOHelper(
       common::TritonJson::Value& ios,
       const std::set<std::string>& allowed_shape_tensors,
-      const std::set<std::string>& allowed_reformat_free_tensors,
+      const std::set<std::string>& allowed_non_linear_format_io,
       const bool is_input);
 
   TRITONSERVER_Error* InitIOBindingBuffers();
diff --git a/src/model_state.cc b/src/model_state.cc
index ac1737d..76b6fbf 100644
--- a/src/model_state.cc
+++ b/src/model_state.cc
@@ -755,7 +755,7 @@ ModelState::GetRefIO(
     const std::string& tensor_name = engine->getIOTensorName(i);
     nvinfer1::Dims dims = engine->getTensorShape(tensor_name.c_str());
     bool is_shape_tensor = engine->isShapeInferenceIO(tensor_name.c_str());
-    bool is_reformat_free_tensor =
+    bool is_non_linear_format_io =
         (engine->getTensorFormat(tensor_name.c_str()) !=
          nvinfer1::TensorFormat::kLINEAR);
     if ((is_input && (!IsInput(engine, tensor_name))) ||
@@ -772,7 +772,7 @@ ModelState::GetRefIO(
     RETURN_IF_ERROR(InitIODims(engine, dims, is_shape_tensor, &io));
     RETURN_IF_ERROR(io.AddBool("is_shape_tensor", is_shape_tensor));
     RETURN_IF_ERROR(
-        io.AddBool("is_reformat_free_tensor", is_reformat_free_tensor));
+        io.AddBool("is_non_linear_format_io", is_non_linear_format_io));
 
     RETURN_IF_ERROR(ref_io->Append(std::move(io)));
   }
@@ -896,30 +896,31 @@ ModelState::FixIO(
                 mutable_io.AddBool("is_shape_tensor", is_shape_tensor));
           }
 
-          // Check if the IO is a reformat free tensor.
-          bool is_reformat_free_tensor =
+          // Verify if the IO format is non-linear.
+          bool is_non_linear_format_io =
               (engine->getTensorFormat(io_name.c_str()) !=
                nvinfer1::TensorFormat::kLINEAR);
 
-          common::TritonJson::Value reformat_free_tensor;
+          common::TritonJson::Value non_linear_format_io;
           if (mutable_io.Find(
-                  "is_reformat_free_tensor", &reformat_free_tensor)) {
-            bool reformat_free_tensor_val = false;
+                  "is_non_linear_format_io", &non_linear_format_io)) {
+            bool non_linear_format_io_val = false;
             RETURN_IF_ERROR(
-                reformat_free_tensor.AsBool(&reformat_free_tensor_val));
-            if (reformat_free_tensor_val && (!is_reformat_free_tensor)) {
+                non_linear_format_io.AsBool(&non_linear_format_io_val));
+            if (non_linear_format_io_val && (!is_non_linear_format_io)) {
               return TRITONSERVER_ErrorNew(
                   TRITONSERVER_ERROR_INVALID_ARG,
                   (std::string("'") + io_name +
-                   "' is incorrectly specified as a reformat free tensor.")
+                   "' uses a linear IO format, but 'is_non_linear_format_io' "
+                   "is incorrectly set to true.")
                       .c_str());
-            } else if (!reformat_free_tensor_val && is_reformat_free_tensor) {
+            } else if (!non_linear_format_io_val && is_non_linear_format_io) {
               RETURN_IF_ERROR(
-                  reformat_free_tensor.SetBool(is_reformat_free_tensor));
+                  non_linear_format_io.SetBool(is_non_linear_format_io));
             }
           } else {
             RETURN_IF_ERROR(mutable_io.AddBool(
-                "is_reformat_free_tensor", is_reformat_free_tensor));
+                "is_non_linear_format_io", is_non_linear_format_io));
           }
           break;
         }
diff --git a/src/tensorrt_utils.cc b/src/tensorrt_utils.cc
index 2a00a83..20f2288 100644
--- a/src/tensorrt_utils.cc
+++ b/src/tensorrt_utils.cc
@@ -491,6 +491,39 @@ DimsJsonToString(common::TritonJson::Value& dims)
   return ShapeToString(dims_vec);
 }
 
+const std::string
+TensorFormatToString(const nvinfer1::TensorFormat& io_format)
+{
+  switch (io_format) {
+    case nvinfer1::TensorFormat::kLINEAR:
+      return "LINEAR";
+    case nvinfer1::TensorFormat::kCHW2:
+      return "CHW2";
+    case nvinfer1::TensorFormat::kCHW4:
+      return "CHW4";
+    case nvinfer1::TensorFormat::kHWC8:
+      return "HWC8";
+    case nvinfer1::TensorFormat::kCHW16:
+      return "CHW16";
+    case nvinfer1::TensorFormat::kDHWC8:
+      return "DHWC8";
+    case nvinfer1::TensorFormat::kCDHW32:
+      return "CDHW32";
+    case nvinfer1::TensorFormat::kHWC:
+      return "HWC";
+    case nvinfer1::TensorFormat::kDLA_LINEAR:
+      return "DLA_LINEAR";
+    case nvinfer1::TensorFormat::kDLA_HWC4:
+      return "DLA_HWC4";
+    case nvinfer1::TensorFormat::kHWC16:
+      return "HWC16";
+    case nvinfer1::TensorFormat::kDHWC:
+      return "DHWC";
+    default:
+      return "INVALID";
+  }
+}
+
 TRITONSERVER_Error*
 SupportsIntegratedZeroCopy(const int gpu_id, bool* zero_copy_support)
 {
diff --git a/src/tensorrt_utils.h b/src/tensorrt_utils.h
index 9944f3b..7bef8e6 100644
--- a/src/tensorrt_utils.h
+++ b/src/tensorrt_utils.h
@@ -108,6 +108,8 @@ const std::string DimsDebugString(const nvinfer1::Dims& dims);
 
 const std::string DimsJsonToString(common::TritonJson::Value& dims);
 
+const std::string TensorFormatToString(const nvinfer1::TensorFormat& io_format);
+
 TRITONSERVER_Error* SupportsIntegratedZeroCopy(
     const int gpu_id, bool* zero_copy_support);
 

From 42fafa96cff6410276dac49f0d3dee2bdb79f876 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Sat, 13 Jul 2024 20:33:03 +0530
Subject: [PATCH 3/7] Update error message

---
 src/instance_state.cc | 10 +++-------
 src/model_state.cc    |  2 +-
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/instance_state.cc b/src/instance_state.cc
index e6f0ff9..4ea1c1c 100644
--- a/src/instance_state.cc
+++ b/src/instance_state.cc
@@ -1909,10 +1909,8 @@ ModelInstanceState::ValidateIOHelper(
         return TRITONSERVER_ErrorNew(
             TRITONSERVER_ERROR_INTERNAL,
             (type + " '" + io_name + "' for model '" + model_state_->Name() +
-             "' uses a non-linear IO format, but the model configuration "
-             "does not specify it as such. Set "
-             "'is_non_linear_format_io' to true for " +
-             type + " '" + io_name + "'.")
+             "' uses a non-linear IO format, but 'is_non_linear_format_io' is "
+             "incorrectly set to false in the model configuration.")
                 .c_str());
       }
     } else {
@@ -1924,9 +1922,7 @@ ModelInstanceState::ValidateIOHelper(
             TRITONSERVER_ERROR_INTERNAL,
             (type + " '" + io_name + "' for model '" + model_state_->Name() +
              "' uses a linear IO format, but 'is_non_linear_format_io' is "
-             "incorrectly set to true. Set "
-             "'is_non_linear_format_io' to false for " +
-             type + " '" + io_name + "'.")
+             "incorrectly set to true in the model configuration.")
                 .c_str());
       }
     }
diff --git a/src/model_state.cc b/src/model_state.cc
index 76b6fbf..6127989 100644
--- a/src/model_state.cc
+++ b/src/model_state.cc
@@ -912,7 +912,7 @@ ModelState::FixIO(
                   TRITONSERVER_ERROR_INVALID_ARG,
                   (std::string("'") + io_name +
                    "' uses a linear IO format, but 'is_non_linear_format_io' "
-                   "is incorrectly set to true.")
+                   "is incorrectly set to true in the model configuration.")
                       .c_str());
             } else if (!non_linear_format_io_val && is_non_linear_format_io) {
               RETURN_IF_ERROR(

From 9aa17b33f098c007b2f58375139fe5077b185694 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Sat, 13 Jul 2024 22:28:27 +0530
Subject: [PATCH 4/7] Update

---
 src/tensorrt_utils.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/tensorrt_utils.cc b/src/tensorrt_utils.cc
index 20f2288..9d390ea 100644
--- a/src/tensorrt_utils.cc
+++ b/src/tensorrt_utils.cc
@@ -505,6 +505,8 @@ TensorFormatToString(const nvinfer1::TensorFormat& io_format)
       return "HWC8";
     case nvinfer1::TensorFormat::kCHW16:
       return "CHW16";
+    case nvinfer1::TensorFormat::kCHW32:
+      return "CHW32";
     case nvinfer1::TensorFormat::kDHWC8:
       return "DHWC8";
     case nvinfer1::TensorFormat::kCDHW32:

From 4787111aeab8f8d1f662a80b8edb6940b6c4c6e5 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Sat, 13 Jul 2024 23:04:58 +0530
Subject: [PATCH 5/7] Update

---
 src/tensorrt_utils.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/tensorrt_utils.cc b/src/tensorrt_utils.cc
index 9d390ea..5a58b09 100644
--- a/src/tensorrt_utils.cc
+++ b/src/tensorrt_utils.cc
@@ -501,26 +501,26 @@ TensorFormatToString(const nvinfer1::TensorFormat& io_format)
       return "CHW2";
     case nvinfer1::TensorFormat::kCHW4:
       return "CHW4";
-    case nvinfer1::TensorFormat::kHWC8:
-      return "HWC8";
     case nvinfer1::TensorFormat::kCHW16:
       return "CHW16";
     case nvinfer1::TensorFormat::kCHW32:
       return "CHW32";
+    case nvinfer1::TensorFormat::kDHWC:
+      return "DHWC";
     case nvinfer1::TensorFormat::kDHWC8:
       return "DHWC8";
-    case nvinfer1::TensorFormat::kCDHW32:
-      return "CDHW32";
     case nvinfer1::TensorFormat::kHWC:
       return "HWC";
+    case nvinfer1::TensorFormat::kHWC8:
+      return "HWC8";
+    case nvinfer1::TensorFormat::kHWC16:
+      return "HWC16";
+    case nvinfer1::TensorFormat::kCDHW32:
+      return "CDHW32";
     case nvinfer1::TensorFormat::kDLA_LINEAR:
       return "DLA_LINEAR";
     case nvinfer1::TensorFormat::kDLA_HWC4:
       return "DLA_HWC4";
-    case nvinfer1::TensorFormat::kHWC16:
-      return "HWC16";
-    case nvinfer1::TensorFormat::kDHWC:
-      return "DHWC";
     default:
       return "INVALID";
   }

From 8b5e3bae287694f1a0f5c75746dbdb2cbf2bd46e Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Sat, 13 Jul 2024 23:05:54 +0530
Subject: [PATCH 6/7] Update

---
 src/tensorrt_utils.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tensorrt_utils.cc b/src/tensorrt_utils.cc
index 5a58b09..e959488 100644
--- a/src/tensorrt_utils.cc
+++ b/src/tensorrt_utils.cc
@@ -522,7 +522,7 @@ TensorFormatToString(const nvinfer1::TensorFormat& io_format)
     case nvinfer1::TensorFormat::kDLA_HWC4:
       return "DLA_HWC4";
     default:
-      return "INVALID";
+      return "UNKNOWN";
   }
 }
 

From 5b8c2639945daab1ef3beebc976fd817cf5d16c5 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Fri, 19 Jul 2024 14:29:18 +0530
Subject: [PATCH 7/7] Improve error message

---
 src/instance_state.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/instance_state.cc b/src/instance_state.cc
index 4ea1c1c..56208a1 100644
--- a/src/instance_state.cc
+++ b/src/instance_state.cc
@@ -778,9 +778,9 @@ ModelInstanceState::Run(
               payload_->responses_,
               TRITONSERVER_ErrorNew(
                   TRITONSERVER_ERROR_INVALID_ARG,
-                  (std::string("tensor for input '") + name +
-                   "' expected byte size is " +
-                   std::to_string(total_byte_size) + ", got " +
+                  (std::string("input byte size mismatch for input '") + name +
+                   "'" + " for model '" + model_state_->Name() +
+                   "'. Expected " + std::to_string(total_byte_size) + ", got " +
                    std::to_string(req_data_byte_size))
                       .c_str()),
               "failed to run TRT inference");