forked from cding-nv/deepstream-openpose
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresize_merge_cpu.cpp
executable file
·123 lines (108 loc) · 5.63 KB
/
resize_merge_cpu.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#include "resize_merge_cpu.h"
#include <memory>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/imgproc.hpp>
#define UNUSED(unusedVariable) (void)(unusedVariable)
template <typename T>
void resizeAndMergeCpu(T* targetPtr, const std::vector<const T*>& sourcePtrs,
const std::array<int, 4>& targetSize,
const std::vector<std::array<int, 4>>& sourceSizes,
const std::vector<T>& scaleInputToNetInputs)
{
try
{
// Scale used in CUDA/CL to know scale ratio between input and output
// CPU directly uses sourceWidth/Height and targetWidth/Height
UNUSED(scaleInputToNetInputs);
// Sanity check
if (sourceSizes.empty())
printf("sourceSizes cannot be empty. %d, %s, %s\n", __LINE__, __FUNCTION__, __FILE__);
// Params
const auto nums = (signed)sourceSizes.size();
const auto channels = targetSize[1]; // 57
const auto targetHeight = targetSize[2]; // 368
const auto targetWidth = targetSize[3]; // 496
const auto targetChannelOffset = targetWidth * targetHeight;
// No multi-scale merging or no merging required
if (sourceSizes.size() == 1)
{
// Params
const auto& sourceSize = sourceSizes[0];
const auto sourceHeight = sourceSize[2]; // 368/8 ..
const auto sourceWidth = sourceSize[3]; // 496/8 ..
const auto sourceChannelOffset = sourceHeight * sourceWidth;
if (sourceSize[0] != 1)
printf("It should never reache this point. Notify us otherwise. %d, %s, %s\n",
__LINE__, __FUNCTION__, __FILE__);
// Per channel resize
const T* sourcePtr = sourcePtrs[0];
for (auto c = 0 ; c < channels ; c++)
{
cv::Mat source(cv::Size(sourceWidth, sourceHeight), CV_32FC1,
const_cast<T*>(&sourcePtr[c*sourceChannelOffset]));
cv::Mat target(cv::Size(targetWidth, targetHeight), CV_32FC1,
(&targetPtr[c*targetChannelOffset]));
cv::resize(source, target, {targetWidth, targetHeight}, 0, 0, cv::INTER_CUBIC);
}
}
// Multi-scale merging
else
{
// Construct temp targets. We resuse targetPtr to store first scale
std::vector<std::unique_ptr<T>> tempTargetPtrs;
for (auto n = 1; n < nums; n++){
tempTargetPtrs.emplace_back(std::unique_ptr<T>(new T[targetChannelOffset * channels]()));
}
// Resize and sum
for (auto n = 0; n < nums; n++){
// Params
const auto& sourceSize = sourceSizes[n];
const auto sourceHeight = sourceSize[2]; // 368/6 ..
const auto sourceWidth = sourceSize[3]; // 496/8 ..
const auto sourceChannelOffset = sourceHeight * sourceWidth;
// Access pointers
const T* sourcePtr = sourcePtrs[n];
T* tempTargetPtr;
if (n != 0)
tempTargetPtr = tempTargetPtrs[n-1].get();
else
tempTargetPtr = targetPtr;
T* firstTempTargetPtr = targetPtr;
for (auto c = 0 ; c < channels ; c++)
{
// Resize
cv::Mat source(cv::Size(sourceWidth, sourceHeight), CV_32FC1,
const_cast<T*>(&sourcePtr[c*sourceChannelOffset]));
cv::Mat target(cv::Size(targetWidth, targetHeight), CV_32FC1,
(&tempTargetPtr[c*targetChannelOffset]));
cv::resize(source, target, {targetWidth, targetHeight}, 0, 0, cv::INTER_CUBIC);
// Add
if (n != 0)
{
cv::Mat addTarget(cv::Size(targetWidth, targetHeight), CV_32FC1,
(&firstTempTargetPtr[c*targetChannelOffset]));
cv::add(target, addTarget, addTarget);
}
}
}
// Average
for (auto c = 0 ; c < channels ; c++)
{
cv::Mat target(cv::Size(targetWidth, targetHeight), CV_32FC1, (&targetPtr[c*targetChannelOffset]));
target /= (float)nums;
}
}
}
catch (const std::exception& e)
{
printf("exception: %s, %d, %s, %s\n", e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template void resizeAndMergeCpu(
float* targetPtr, const std::vector<const float*>& sourcePtrs, const std::array<int, 4>& targetSize,
const std::vector<std::array<int, 4>>& sourceSizes, const std::vector<float>& scaleInputToNetInputs);
template void resizeAndMergeCpu(
double* targetPtr, const std::vector<const double*>& sourcePtrs, const std::array<int, 4>& targetSize,
const std::vector<std::array<int, 4>>& sourceSizes, const std::vector<double>& scaleInputToNetInputs);