Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add yolo v8 #71

Merged
merged 28 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test_accuracy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
run: |
source venv/bin/activate
pytest --data=./data tests/python/accuracy/test_accuracy.py
DATA=data pytest --data=./data tests/python/accuracy/test_YOLOv8.py
- name: Install CPP ependencies
run: |
sudo bash model_api/cpp/install_dependencies.sh
Expand All @@ -40,3 +41,4 @@ jobs:
- name: Run CPP Test
run: |
build/test_accuracy -d data -p tests/python/accuracy/public_scope.json
DATA=data build/test_YOLOv8
3 changes: 3 additions & 0 deletions docs/model-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ The list features only model wrappers which intoduce new configuration values in
###### `YoloV4`
1. `anchors`: List - list of custom anchor values
1. `masks`: List - list of mask, applied to anchors for each output layer
###### `YOLOv5`, `YOLOv8`
1. `agnostic_nms`: bool - if True, the model is agnostic to the number of classes, and all classes are considered as one
1. `iou_threshold`: float - threshold for non-maximum suppression (NMS) intersection over union (IOU) filtering
###### `YOLOX`
1. `iou_threshold`: float - threshold for non-maximum suppression (NMS) intersection over union (IOU) filtering
#### `HpeAssociativeEmbedding`
Expand Down
21 changes: 21 additions & 0 deletions model_api/cpp/models/include/models/detection_model_yolo.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,24 @@ class ModelYolo : public DetectionModelExt {
std::vector<int64_t> presetMasks;
ov::Layout yoloRegionLayout = "NCHW";
};

class YOLOv5 : public DetectionModelExt {
// Reimplementation of ultralytics.YOLO
void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
void updateModelInfo() override;
void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority);
sovrasov marked this conversation as resolved.
Show resolved Hide resolved
bool agnostic_nms = false;
public:
YOLOv5(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration);
YOLOv5(std::shared_ptr<InferenceAdapter>& adapter);
std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
static std::string ModelType;
};

class YOLOv8 : public YOLOv5 {
public:
// YOLOv5 and YOLOv8 are identical in terms of inference
YOLOv8(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration) : YOLOv5{model, configuration} {}
YOLOv8(std::shared_ptr<InferenceAdapter>& adapter) : YOLOv5{adapter} {}
static std::string ModelType;
};
4 changes: 4 additions & 0 deletions model_api/cpp/models/src/detection_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ std::unique_ptr<DetectionModel> DetectionModel::create_model(const std::string&
detectionModel = std::unique_ptr<DetectionModel>(new ModelYoloX(model, configuration));
} else if (model_type == ModelCenterNet::ModelType) {
detectionModel = std::unique_ptr<DetectionModel>(new ModelCenterNet(model, configuration));
} else if (model_type == YOLOv5::ModelType) {
detectionModel = std::unique_ptr<DetectionModel>(new YOLOv5(model, configuration));
} else if (model_type == YOLOv8::ModelType) {
detectionModel = std::unique_ptr<DetectionModel>(new YOLOv8(model, configuration));
} else {
throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + model_type);
}
Expand Down
2 changes: 1 addition & 1 deletion model_api/cpp/models/src/detection_model_faceboxes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResu
std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance);

// Apply Non-maximum Suppression
const std::vector<int> keep = nms(boxes, scores.second, iou_threshold);
const std::vector<size_t>& keep = nms(boxes, scores.second, iou_threshold);

// Create detection result objects
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
Expand Down
18 changes: 10 additions & 8 deletions model_api/cpp/models/src/detection_model_ssd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,13 @@ std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& i
0.f,
floatInputImgHeight);
desc.width = clamp(
round((detections[i * numAndStep.objectSize + 5] * netInputWidth - padLeft) * invertedScaleX - desc.x),
round((detections[i * numAndStep.objectSize + 5] * netInputWidth - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth);
floatInputImgWidth) - desc.x;
desc.height = clamp(
round((detections[i * numAndStep.objectSize + 6] * netInputHeight - padTop) * invertedScaleY - desc.y),
0.f, floatInputImgHeight);
round((detections[i * numAndStep.objectSize + 6] * netInputHeight - padTop) * invertedScaleY),
0.f,
floatInputImgHeight) - desc.y;
sovrasov marked this conversation as resolved.
Show resolved Hide resolved
result->objects.push_back(desc);
}
}
Expand Down Expand Up @@ -223,12 +224,13 @@ std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult
0.f,
floatInputImgHeight);
desc.width = clamp(
round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX - desc.x),
round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth);
floatInputImgWidth) - desc.x;
desc.height = clamp(
round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY - desc.y),
0.f, floatInputImgHeight);
round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY),
0.f,
floatInputImgHeight) - desc.y;
result->objects.push_back(desc);
}
}
Expand Down
169 changes: 169 additions & 0 deletions model_api/cpp/models/src/detection_model_yolo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <openvino/openvino.hpp>

#include <utils/common.hpp>
#include <utils/nms.hpp>
#include <utils/slog.hpp>

#include "models/internal_model_data.h"
Expand Down Expand Up @@ -504,3 +505,171 @@ ModelYolo::Region::Region(size_t classes,
num = anchors.size() / 2;
}
}

std::string YOLOv5::ModelType = "YOLOv5";

void YOLOv5::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
const ov::Output<ov::Node>& input = model->input();
const ov::Shape& in_shape = input.get_partial_shape().get_max_shape();
if (in_shape.size() != 4) {
throw std::runtime_error("YOLO: the rank of the input must be 4");
}
inputNames.push_back(input.get_any_name());
const ov::Layout& inputLayout = getInputLayout(input);
if (!embedded_processing) {
model = ImageModel::embedProcessing(model,
inputNames[0],
inputLayout,
resizeMode,
interpolationMode,
ov::Shape{
in_shape[ov::layout::width_idx(inputLayout)],
in_shape[ov::layout::height_idx(inputLayout)]
},
pad_value,
reverse_input_channels,
{},
scale_values);

netInputWidth = in_shape[ov::layout::width_idx(inputLayout)];
netInputHeight = in_shape[ov::layout::height_idx(inputLayout)];

embedded_processing = true;
}

const ov::Output<const ov::Node>& output = model->output();
if (ov::element::Type_t::f32 != output.get_element_type()) {
throw std::runtime_error("YOLO: the output must be of precision f32");
}
const ov::Shape& out_shape = output.get_partial_shape().get_max_shape();
if (3 != out_shape.size()) {
throw std::runtime_error("YOLO: the output must be of rank 3");
}
if (!labels.empty() && labels.size() + 4 != out_shape[1]) {
throw std::runtime_error("YOLO: number of labels must be smaller than out_shape[1] by 4");
}
}

void YOLOv5::updateModelInfo() {
DetectionModelExt::updateModelInfo();
model->set_rt_info(YOLOv5::ModelType, "model_info", "model_type");
model->set_rt_info(agnostic_nms, "model_info", "agnostic_nms");
model->set_rt_info(iou_threshold, "model_info", "iou_threshold");
}

void YOLOv5::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) {
pad_value = get_from_any_maps("pad_value", top_priority, mid_priority, 114);
if (top_priority.find("resize_type") == top_priority.end() && mid_priority.find("resize_type") == mid_priority.end()) {
interpolationMode = cv::INTER_LINEAR;
resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
}
reverse_input_channels = get_from_any_maps("reverse_input_channels", top_priority, mid_priority, true);
scale_values = get_from_any_maps("scale_values", top_priority, mid_priority, std::vector<float>{255.0f});
confidence_threshold = get_from_any_maps("confidence_threshold", top_priority, mid_priority, 0.25f);
agnostic_nms = get_from_any_maps("agnostic_nms", top_priority, mid_priority, agnostic_nms);
iou_threshold = get_from_any_maps("iou_threshold", top_priority, mid_priority, 0.7f);
}

YOLOv5::YOLOv5(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration)
: DetectionModelExt(model, configuration) {
init_from_config(configuration, model->get_rt_info<ov::AnyMap>("model_info"));
}

YOLOv5::YOLOv5(std::shared_ptr<InferenceAdapter>& adapter)
: DetectionModelExt(adapter) {
init_from_config(adapter->getModelConfig(), ov::AnyMap{});
}

std::unique_ptr<ResultBase> YOLOv5::postprocess(InferenceResult& infResult) {
if (1 != infResult.outputsData.size()) {
throw std::runtime_error("YOLO: expect 1 output");
}
const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
const ov::Shape& out_shape = detectionsTensor.get_shape();
if (3 != out_shape.size()) {
throw std::runtime_error("YOLO: the output must be of rank 3");
}
if (1 != out_shape[0]) {
throw std::runtime_error("YOLO: the first dim of the output must be 1");
}
size_t num_proposals = out_shape[2];
std::vector<Anchor> boxes;
std::vector<float> confidences;
std::vector<size_t> labelIDs;
const float* const detections = detectionsTensor.data<float>();
for (size_t i = 0; i < num_proposals; ++i) {
float confidence = 0.0f;
size_t max_id = 0;
constexpr size_t LABELS_START = 4;
for (size_t j = LABELS_START; j < out_shape[1]; ++j) {
if (detections[j * num_proposals + i] > confidence) {
confidence = detections[j * num_proposals + i];
max_id = j;
}
}
if (confidence > confidence_threshold) {
boxes.push_back(Anchor{
detections[0 * num_proposals + i] - detections[2 * num_proposals + i] / 2.0f,
detections[1 * num_proposals + i] - detections[3 * num_proposals + i] / 2.0f,
detections[0 * num_proposals + i] + detections[2 * num_proposals + i] / 2.0f,
detections[1 * num_proposals + i] + detections[3 * num_proposals + i] / 2.0f,
});
confidences.push_back(confidence);
labelIDs.push_back(max_id - LABELS_START);
}
}
constexpr bool includeBoundaries = false;
constexpr size_t keep_top_k = 30000;
std::vector<size_t> keep;
if (agnostic_nms) {
keep = nms(boxes, confidences, iou_threshold, includeBoundaries, keep_top_k);
} else {
std::vector<AnchorLabeled> boxes_with_class;
boxes_with_class.reserve(boxes.size());
for (size_t i = 0; i < boxes.size(); ++i) {
boxes_with_class.emplace_back(boxes[i], int(labelIDs[i]));
}
keep = multiclass_nms(boxes_with_class, confidences, iou_threshold, includeBoundaries, keep_top_k);
}
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
auto base = std::unique_ptr<ResultBase>(result);
const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
float floatInputImgWidth = float(internalData.inputImgWidth),
floatInputImgHeight = float(internalData.inputImgHeight);
float invertedScaleX = floatInputImgWidth / netInputWidth,
invertedScaleY = floatInputImgHeight / netInputHeight;
int padLeft = 0, padTop = 0;
if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY);
if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
padLeft = (netInputWidth - int(std::round(floatInputImgWidth / invertedScaleX))) / 2;
padTop = (netInputHeight - int(std::round(floatInputImgHeight / invertedScaleY))) / 2;
}
}
for (size_t idx : keep) {
DetectedObject desc;
desc.x = clamp(
round((boxes[idx].left - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth);
desc.y = clamp(
round((boxes[idx].top - padTop) * invertedScaleY),
0.f,
floatInputImgHeight);
desc.width = clamp(
round((boxes[idx].right - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth) - desc.x;
desc.height = clamp(
round((boxes[idx].bottom - padTop) * invertedScaleY),
0.f,
floatInputImgHeight) - desc.y;
desc.confidence = confidences[idx];
desc.labelID = static_cast<size_t>(labelIDs[idx]);
desc.label = getLabelName(desc.labelID);
result->objects.push_back(desc);
}
return base;
}

std::string YOLOv8::ModelType = "YOLOv8";
4 changes: 2 additions & 2 deletions model_api/cpp/models/src/detection_model_yolox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult)
}

// NMS for valid boxes
std::vector<int> keep = nms(validBoxes, scores, iou_threshold, true);
for (auto& index: keep) {
const std::vector<size_t>& keep = nms(validBoxes, scores, iou_threshold, true);
for (size_t index: keep) {
// Create new detected box
DetectedObject obj;
obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
Expand Down
29 changes: 14 additions & 15 deletions model_api/cpp/utils/include/utils/nms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ struct AnchorLabeled : public Anchor {
AnchorLabeled() = default;
AnchorLabeled(float _left, float _top, float _right, float _bottom, int _labelID) :
Anchor(_left, _top, _right, _bottom), labelID(_labelID) {}
AnchorLabeled(const Anchor& coords, int labelID) : Anchor{coords}, labelID{labelID} {}
};

template <typename Anchor>
std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores,
const float thresh, bool includeBoundaries=false, size_t maxNum=0) {
if (maxNum == 0) {
maxNum = boxes.size();
std::vector<size_t> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores, const float thresh, bool includeBoundaries=false, size_t keep_top_k=0) {
if (keep_top_k == 0) {
keep_top_k = boxes.size();
}
std::vector<float> areas(boxes.size());
for (size_t i = 0; i < boxes.size(); ++i) {
Expand All @@ -67,25 +67,24 @@ std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>&
std::sort(order.begin(), order.end(), [&scores](int o1, int o2) { return scores[o1] > scores[o2]; });

size_t ordersNum = 0;
for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0 && ordersNum < maxNum; ordersNum++);
for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0 && ordersNum < keep_top_k; ordersNum++);

std::vector<int> keep;
std::vector<size_t> keep;
bool shouldContinue = true;
for (size_t i = 0; shouldContinue && i < ordersNum; ++i) {
auto idx1 = order[i];
int idx1 = order[i];
if (idx1 >= 0) {
keep.push_back(idx1);
shouldContinue = false;
for (size_t j = i + 1; j < ordersNum; ++j) {
auto idx2 = order[j];
int idx2 = order[j];
if (idx2 >= 0) {
shouldContinue = true;
auto overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
auto overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
auto intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
auto overlap = intersection / (areas[idx1] + areas[idx2] - intersection);

if (overlap >= thresh) {
float overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
float overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
float intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
float union_area = areas[idx1] + areas[idx2] - intersection;
if (0.0f == union_area || intersection / union_area > thresh) {
order[j] = -1;
}
}
Expand All @@ -95,5 +94,5 @@ std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>&
return keep;
}

std::vector<int> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
std::vector<size_t> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
const float iou_threshold=0.45f, bool includeBoundaries=false, size_t maxNum=200);
2 changes: 1 addition & 1 deletion model_api/cpp/utils/src/nms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "utils/nms.hpp"


std::vector<int> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
std::vector<size_t> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
const float iou_threshold, bool includeBoundaries, size_t maxNum) {
std::vector<Anchor> boxes_copy;
boxes_copy.reserve(boxes.size());
Expand Down
4 changes: 3 additions & 1 deletion model_api/python/openvino/model_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
add_rotated_rects,
get_contours,
)
from .yolo import YOLO, YOLOF, YOLOX, YoloV3ONNX, YoloV4
from .yolo import YOLO, YOLOF, YOLOX, YoloV3ONNX, YoloV4, YOLOv5, YOLOv8

classification_models = [
"resnet-18-pytorch",
Expand Down Expand Up @@ -118,6 +118,8 @@
"YOLO",
"YoloV3ONNX",
"YoloV4",
"YOLOv5",
"YOLOv8",
"YOLOF",
"YOLOX",
"ClassificationResult",
Expand Down
Loading