Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix shape and reformat free tensor handling in the input byte size check #97

Merged
merged 7 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 67 additions & 6 deletions src/instance_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -723,11 +723,12 @@ ModelInstanceState::Run(
TRITONSERVER_DataType datatype;
const int64_t* shape;
uint32_t dims_count;
size_t req_data_byte_size;
FAIL_ALL_AND_RETURN_IF_ERROR(
payload_->requests_, payload_->request_count_, payload_->responses_,
TRITONBACKEND_InputProperties(
repr_input, nullptr, &datatype, &shape, &dims_count, nullptr,
nullptr),
repr_input, nullptr, &datatype, &shape, &dims_count,
&req_data_byte_size, nullptr),
(std::string("failed to obtain the representative input "
"properties for '") +
name + "'")
Expand Down Expand Up @@ -760,12 +761,30 @@ ModelInstanceState::Run(
size_t total_byte_size = 0;
if (io_binding_info.GetFormat().is_linear_format_) {
total_byte_size = GetByteSize(datatype, batchn_shape);
// For input tensors with a linear IO format, the request has already
// verified the byte size, so no further validation is needed here.
} else {
batchn_shape[io_binding_info.GetFormat().vectorized_dim_] +=
(io_binding_info.GetFormat().components_per_element_ -
(batchn_shape[io_binding_info.GetFormat().vectorized_dim_] %
io_binding_info.GetFormat().components_per_element_));
total_byte_size = GetByteSize(datatype, batchn_shape);

// Ensure the request data byte size matches the expected byte size for
// non-linear IO format tensors
if (req_data_byte_size != total_byte_size) {
FAIL_ALL_AND_RETURN_IF_ERROR(
payload_->requests_, payload_->request_count_,
payload_->responses_,
TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("input byte size mismatch for input '") + name +
"'" + " for model '" + model_state_->Name() +
"'. Expected " + std::to_string(total_byte_size) + ", got " +
std::to_string(req_data_byte_size))
.c_str()),
"failed to run TRT inference");
}
}

payload_->collector_->ProcessTensor(
Expand Down Expand Up @@ -1760,7 +1779,8 @@ ModelInstanceState::ValidateIO()
{
// Collect all the expected input and allowed output tensor names
// and validate that the model configuration specifies only those.
std::set<std::string> allowed_inputs, allowed_outputs, allowed_shape_tensors;
std::set<std::string> allowed_inputs, allowed_outputs, allowed_shape_tensors,
allowed_non_linear_format_io;
for (int i = 0; i < total_io_tensors_; ++i) {
const std::string& tensor_name = tensor_names_[i];
if (IsInput(engine_.get(), tensor_name)) {
Expand All @@ -1775,6 +1795,15 @@ ModelInstanceState::ValidateIO()
" as shape binding for " + Name())
.c_str());
}
auto detected_io_format = engine_->getTensorFormat(tensor_name.c_str());
if (detected_io_format != nvinfer1::TensorFormat::kLINEAR) {
allowed_non_linear_format_io.emplace(tensor_name);
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
(std::string("Detected ") + tensor_name + " using IO format " +
TensorFormatToString(detected_io_format) + " for " + Name())
.c_str());
}
}

triton::common::TritonJson::Value config_inputs;
Expand Down Expand Up @@ -1808,17 +1837,21 @@ ModelInstanceState::ValidateIO()
}

RETURN_IF_ERROR(ValidateIOHelper(
config_inputs, allowed_shape_tensors, true /* is_input */));
config_inputs, allowed_shape_tensors, allowed_non_linear_format_io,
true /* is_input */));
RETURN_IF_ERROR(ValidateIOHelper(
config_outputs, allowed_shape_tensors, false /* is_input */));
config_outputs, allowed_shape_tensors, allowed_non_linear_format_io,
false /* is_input */));

return nullptr;
}

TRITONSERVER_Error*
ModelInstanceState::ValidateIOHelper(
common::TritonJson::Value& ios,
const std::set<std::string>& allowed_shape_tensors, const bool is_input)
const std::set<std::string>& allowed_shape_tensors,
const std::set<std::string>& allowed_non_linear_format_io,
const bool is_input)
{
std::string type = is_input ? "input" : "output";
for (size_t i = 0; i < ios.ArraySize(); i++) {
Expand Down Expand Up @@ -1865,6 +1898,34 @@ ModelInstanceState::ValidateIOHelper(
.c_str());
}
}

// Check the tensor IO format specification
if (allowed_non_linear_format_io.find(io_name) !=
allowed_non_linear_format_io.end()) {
bool is_non_linear_format_io = false;
RETURN_IF_ERROR(
io.MemberAsBool("is_non_linear_format_io", &is_non_linear_format_io));
if (!is_non_linear_format_io) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
(type + " '" + io_name + "' for model '" + model_state_->Name() +
"' uses a non-linear IO format, but 'is_non_linear_format_io' is "
"incorrectly set to false in the model configuration.")
.c_str());
}
} else {
bool is_non_linear_format_io = false;
RETURN_IF_ERROR(
io.MemberAsBool("is_non_linear_format_io", &is_non_linear_format_io));
if (is_non_linear_format_io) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
(type + " '" + io_name + "' for model '" + model_state_->Name() +
"' uses a linear IO format, but 'is_non_linear_format_io' is "
"incorrectly set to true in the model configuration.")
.c_str());
}
}
}

return nullptr;
Expand Down
4 changes: 3 additions & 1 deletion src/instance_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,9 @@ class ModelInstanceState : public TensorRTModelInstance {
TRITONSERVER_Error* ValidateIO();
TRITONSERVER_Error* ValidateIOHelper(
common::TritonJson::Value& ios,
const std::set<std::string>& allowed_shape_tensors, const bool is_input);
const std::set<std::string>& allowed_shape_tensors,
const std::set<std::string>& allowed_non_linear_format_io,
const bool is_input);

TRITONSERVER_Error* InitIOBindingBuffers();
TRITONSERVER_Error* InitializeConfigShapeInputBindings(
Expand Down
47 changes: 37 additions & 10 deletions src/model_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,10 @@ ModelState::GetRefIO(
for (int i = 0; i < num_io_tensors; ++i) {
const std::string& tensor_name = engine->getIOTensorName(i);
nvinfer1::Dims dims = engine->getTensorShape(tensor_name.c_str());
bool is_shape_binding = engine->isShapeInferenceIO(tensor_name.c_str());
bool is_shape_tensor = engine->isShapeInferenceIO(tensor_name.c_str());
bool is_non_linear_format_io =
(engine->getTensorFormat(tensor_name.c_str()) !=
nvinfer1::TensorFormat::kLINEAR);
if ((is_input && (!IsInput(engine, tensor_name))) ||
((!is_input) && (IsInput(engine, tensor_name)))) {
continue;
Expand All @@ -766,8 +769,10 @@ ModelState::GetRefIO(
RETURN_IF_ERROR(io.AddString(
"data_type", ConvertTrtTypeToConfigDataType(
engine->getTensorDataType(tensor_name.c_str()))));
RETURN_IF_ERROR(InitIODims(engine, dims, is_shape_binding, &io));
RETURN_IF_ERROR(io.AddBool("is_shape_tensor", is_shape_binding));
RETURN_IF_ERROR(InitIODims(engine, dims, is_shape_tensor, &io));
RETURN_IF_ERROR(io.AddBool("is_shape_tensor", is_shape_tensor));
RETURN_IF_ERROR(
io.AddBool("is_non_linear_format_io", is_non_linear_format_io));

RETURN_IF_ERROR(ref_io->Append(std::move(io)));
}
Expand All @@ -777,13 +782,13 @@ ModelState::GetRefIO(

TRITONSERVER_Error*
ModelState::InitIODims(
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_binding,
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_tensor,
triton::common::TritonJson::Value* io)
{
bool skip_first = (MaxBatchSize() != 0);
triton::common::TritonJson::Value config_dims(
ModelConfig(), triton::common::TritonJson::ValueType::ARRAY);
if (!is_shape_binding) {
if (!is_shape_tensor) {
for (int didx = (skip_first ? 1 : 0); didx < dims.nbDims; ++didx) {
RETURN_IF_ERROR(config_dims.AppendInt(dims.d[didx]));
}
Expand Down Expand Up @@ -871,8 +876,7 @@ ModelState::FixIO(
}

// Check if the IO is a shape tensor.
bool is_shape_tensor = false;
is_shape_tensor = engine->isShapeInferenceIO(io_name.c_str());
bool is_shape_tensor = engine->isShapeInferenceIO(io_name.c_str());

common::TritonJson::Value shape_tensor;
if (mutable_io.Find("is_shape_tensor", &shape_tensor)) {
Expand All @@ -885,15 +889,38 @@ ModelState::FixIO(
"' is incorrectly specified as a shape tensor.")
.c_str());
} else if (!shape_tensor_val && is_shape_tensor) {
RETURN_IF_ERROR(shape_tensor.SetBool(is_shape_tensor));
}
} else {
RETURN_IF_ERROR(
mutable_io.AddBool("is_shape_tensor", is_shape_tensor));
}

// Verify if the IO format is non-linear.
bool is_non_linear_format_io =
(engine->getTensorFormat(io_name.c_str()) !=
nvinfer1::TensorFormat::kLINEAR);

common::TritonJson::Value non_linear_format_io;
if (mutable_io.Find(
"is_non_linear_format_io", &non_linear_format_io)) {
bool non_linear_format_io_val = false;
RETURN_IF_ERROR(
non_linear_format_io.AsBool(&non_linear_format_io_val));
if (non_linear_format_io_val && (!is_non_linear_format_io)) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("'") + io_name +
"' is incorrectly specified as an execution tensor.")
"' uses a linear IO format, but 'is_non_linear_format_io' "
"is incorrectly set to true in the model configuration.")
.c_str());
} else if (!non_linear_format_io_val && is_non_linear_format_io) {
RETURN_IF_ERROR(
non_linear_format_io.SetBool(is_non_linear_format_io));
}
} else {
RETURN_IF_ERROR(
mutable_io.AddBool("is_shape_tensor", is_shape_tensor));
RETURN_IF_ERROR(mutable_io.AddBool(
"is_non_linear_format_io", is_non_linear_format_io));
}
break;
}
Expand Down
4 changes: 2 additions & 2 deletions src/model_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ class ModelState : public TensorRTModel {
const bool is_input, nvinfer1::ICudaEngine* engine,
triton::common::TritonJson::Value* ref_io);
TRITONSERVER_Error* InitIODims(
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims,
bool is_shape_binding, triton::common::TritonJson::Value* io);
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_tensor,
triton::common::TritonJson::Value* io);
TRITONSERVER_Error* FixIO(
nvinfer1::ICudaEngine* engine,
triton::common::TritonJson::Value& reference_ios,
Expand Down
35 changes: 35 additions & 0 deletions src/tensorrt_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,41 @@ DimsJsonToString(common::TritonJson::Value& dims)
return ShapeToString(dims_vec);
}

const std::string
TensorFormatToString(const nvinfer1::TensorFormat& io_format)
{
switch (io_format) {
case nvinfer1::TensorFormat::kLINEAR:
return "LINEAR";
case nvinfer1::TensorFormat::kCHW2:
return "CHW2";
case nvinfer1::TensorFormat::kCHW4:
return "CHW4";
case nvinfer1::TensorFormat::kCHW16:
return "CHW16";
case nvinfer1::TensorFormat::kCHW32:
return "CHW32";
case nvinfer1::TensorFormat::kDHWC:
return "DHWC";
case nvinfer1::TensorFormat::kDHWC8:
return "DHWC8";
case nvinfer1::TensorFormat::kHWC:
return "HWC";
case nvinfer1::TensorFormat::kHWC8:
return "HWC8";
case nvinfer1::TensorFormat::kHWC16:
return "HWC16";
case nvinfer1::TensorFormat::kCDHW32:
return "CDHW32";
case nvinfer1::TensorFormat::kDLA_LINEAR:
return "DLA_LINEAR";
case nvinfer1::TensorFormat::kDLA_HWC4:
return "DLA_HWC4";
default:
return "UNKNOWN";
}
}

TRITONSERVER_Error*
SupportsIntegratedZeroCopy(const int gpu_id, bool* zero_copy_support)
{
Expand Down
2 changes: 2 additions & 0 deletions src/tensorrt_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ const std::string DimsDebugString(const nvinfer1::Dims& dims);

const std::string DimsJsonToString(common::TritonJson::Value& dims);

const std::string TensorFormatToString(const nvinfer1::TensorFormat& io_format);

TRITONSERVER_Error* SupportsIntegratedZeroCopy(
const int gpu_id, bool* zero_copy_support);

Expand Down
Loading