From 783e6373670be009046d10b9cd4288e07f873d56 Mon Sep 17 00:00:00 2001 From: Monotosh Das Date: Fri, 17 Sep 2021 14:56:54 +0530 Subject: [PATCH 1/2] RGB32F, RGB32F_PLANAR, and RGB32F_PLANAR contiguous added (#241) * avoiding CUDA_ERROR_DEINITIALIZED in ~CudaResMgr * Cmake improvement * Cmake made even with master * cmake status message omitted * before debug * intermidiate * cmake build in linux fixed * RGB32F export and download working * linux modification * deleted gitignore * Pixel_Format name modified * removed typos * CMake typos changed * typos * upload resize functionalities added fro RGB32F * RGB_32F_PLANAR_CONTIGUOUS omitted --- PyNvCodec/TC/inc/MemoryInterfaces.hpp | 74 +++++++++++- PyNvCodec/TC/src/MemoryInterfaces.cpp | 166 ++++++++++++++++++++++++++ PyNvCodec/TC/src/Tasks.cpp | 126 ++++++++++++++++++- PyNvCodec/TC/src/TasksColorCvt.cpp | 102 +++++++++++++++- PyNvCodec/inc/PyNvCodec.hpp | 6 + PyNvCodec/src/PyNvCodec.cpp | 69 ++++++++++- 6 files changed, 534 insertions(+), 9 deletions(-) diff --git a/PyNvCodec/TC/inc/MemoryInterfaces.hpp b/PyNvCodec/TC/inc/MemoryInterfaces.hpp index cd602b19..1e338ebf 100644 --- a/PyNvCodec/TC/inc/MemoryInterfaces.hpp +++ b/PyNvCodec/TC/inc/MemoryInterfaces.hpp @@ -1,5 +1,7 @@ /* * Copyright 2019 NVIDIA Corporation + * Copyright 2021 Videonetics Technology Private Limited + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -31,6 +33,8 @@ enum Pixel_Format { BGR = 6, YCBCR = 7, YUV444 = 8, + RGB_32F = 9, + RGB_32F_PLANAR = 10, }; enum ColorSpace { @@ -535,4 +539,72 @@ class DllExport SurfaceYUV444 : public SurfaceRGBPlanar { bool DllExport CheckAllocationCounters(); #endif -} // namespace VPF \ No newline at end of file +/* 32-bit float RGB image; + */ +class DllExport SurfaceRGB32F : public Surface { +public: + ~SurfaceRGB32F(); + + SurfaceRGB32F(); + SurfaceRGB32F(const SurfaceRGB32F &other); + SurfaceRGB32F(uint32_t width, uint32_t height, CUcontext context); + SurfaceRGB32F &operator=(const SurfaceRGB32F &other); + + Surface *Clone() override; + Surface *Create() override; + + uint32_t Width(uint32_t planeNumber = 0U) const override; + uint32_t WidthInBytes(uint32_t planeNumber = 0U) const override; + uint32_t Height(uint32_t planeNumber = 0U) const override; + uint32_t Pitch(uint32_t planeNumber = 0U) const override; + uint32_t HostMemSize() const override; + + CUdeviceptr PlanePtr(uint32_t planeNumber = 0U) override; + Pixel_Format PixelFormat() const override { return RGB_32F; } + uint32_t NumPlanes() const override { return 1; } + virtual uint32_t ElemSize() const override { return sizeof(float); } + bool Empty() const override { return 0UL == plane.GpuMem(); } + + void Update(const SurfacePlane &newPlane); + bool Update(SurfacePlane *pPlanes, size_t planesNum) override; + SurfacePlane *GetSurfacePlane(uint32_t planeNumber = 0U) override; + +protected: + SurfacePlane plane; +}; + +/* 32-bit float planar RGB image; + */ +class DllExport SurfaceRGB32FPlanar : public Surface { +public: + ~SurfaceRGB32FPlanar(); + + SurfaceRGB32FPlanar(); + SurfaceRGB32FPlanar(const SurfaceRGB32FPlanar &other); + SurfaceRGB32FPlanar(uint32_t width, uint32_t height, CUcontext context); + SurfaceRGB32FPlanar &operator=(const SurfaceRGB32FPlanar &other); + + virtual Surface *Clone() override; + virtual Surface *Create() override; + + uint32_t Width(uint32_t planeNumber = 0U) const override; + uint32_t WidthInBytes(uint32_t planeNumber = 0U) const override; + uint32_t Height(uint32_t planeNumber = 0U) const override; + uint32_t Pitch(uint32_t planeNumber = 0U) const override; + uint32_t HostMemSize() const override; + + CUdeviceptr PlanePtr(uint32_t planeNumber = 0U) override; + Pixel_Format PixelFormat() const override { return RGB_32F_PLANAR; } + uint32_t NumPlanes() const override { return 3; } + virtual uint32_t ElemSize() const override { return sizeof(float); } + bool Empty() const override { return 0UL == plane.GpuMem(); } + + void Update(const SurfacePlane &newPlane); + bool Update(SurfacePlane *pPlanes, size_t planesNum) override; + SurfacePlane *GetSurfacePlane(uint32_t planeNumber = 0U) override; + +protected: + SurfacePlane plane; +}; + +} // namespace VPF diff --git a/PyNvCodec/TC/src/MemoryInterfaces.cpp b/PyNvCodec/TC/src/MemoryInterfaces.cpp index 063f847b..e0ac7d6a 100644 --- a/PyNvCodec/TC/src/MemoryInterfaces.cpp +++ b/PyNvCodec/TC/src/MemoryInterfaces.cpp @@ -1,5 +1,7 @@ /* * Copyright 2019 NVIDIA Corporation + * Copyright 2021 Videonetics Technology Private Limited + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -434,6 +436,10 @@ Surface *Surface::Make(Pixel_Format format) { return new SurfaceYCbCr; case YUV444: return new SurfaceYUV444; + case RGB_32F: + return new SurfaceRGB32F; + case RGB_32F_PLANAR: + return new SurfaceRGB32FPlanar; default: return nullptr; } @@ -458,6 +464,10 @@ Surface *Surface::Make(Pixel_Format format, uint32_t newWidth, return new SurfaceYCbCr(newWidth, newHeight, context); case YUV444: return new SurfaceYUV444(newWidth, newHeight, context); + case RGB_32F: + return new SurfaceRGB32F(newWidth, newHeight, context); + case RGB_32F_PLANAR: + return new SurfaceRGB32FPlanar(newWidth, newHeight, context); default: return nullptr; } @@ -1006,3 +1016,159 @@ SurfaceYUV444::SurfaceYUV444(uint32_t width, uint32_t height, CUcontext context) Surface *VPF::SurfaceYUV444::Clone() { return new SurfaceYUV444(*this); } Surface *VPF::SurfaceYUV444::Create() { return new SurfaceYUV444; } + +SurfaceRGB32F::~SurfaceRGB32F() = default; + +SurfaceRGB32F::SurfaceRGB32F() = default; + +SurfaceRGB32F::SurfaceRGB32F(const SurfaceRGB32F &other) : plane(other.plane) {} + +SurfaceRGB32F::SurfaceRGB32F(uint32_t width, uint32_t height, CUcontext context) + : plane(width * 3, height, ElemSize(), context) {} + +SurfaceRGB32F &SurfaceRGB32F::operator=(const SurfaceRGB32F &other) { + plane = other.plane; + return *this; +} + +Surface *SurfaceRGB32F::Clone() { return new SurfaceRGB32F(*this); } + +Surface *SurfaceRGB32F::Create() { return new SurfaceRGB32F; } + +uint32_t SurfaceRGB32F::Width(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Width() / 3; + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32F::WidthInBytes(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Width() * plane.ElemSize(); + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32F::Height(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Height(); + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32F::Pitch(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Pitch(); + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32F::HostMemSize() const { return plane.GetHostMemSize(); } + +CUdeviceptr SurfaceRGB32F::PlanePtr(uint32_t planeNumber) { + if (planeNumber < NumPlanes()) { + return plane.GpuMem(); + } + + throw invalid_argument("Invalid plane number"); +} + +void SurfaceRGB32F::Update(const SurfacePlane &newPlane) { plane = newPlane; } + +bool SurfaceRGB32F::Update(SurfacePlane *pPlanes, size_t planesNum) { + if (pPlanes && 1 == planesNum && !plane.OwnMemory()) { + plane = *pPlanes; + return true; + } + + return false; +} + +SurfacePlane *SurfaceRGB32F::GetSurfacePlane(uint32_t planeNumber) { + return planeNumber ? nullptr : &plane; +} + +SurfaceRGB32FPlanar::~SurfaceRGB32FPlanar() = default; + +SurfaceRGB32FPlanar::SurfaceRGB32FPlanar() = default; + +SurfaceRGB32FPlanar::SurfaceRGB32FPlanar(const SurfaceRGB32FPlanar &other) + : plane(other.plane) {} + +SurfaceRGB32FPlanar::SurfaceRGB32FPlanar(uint32_t width, uint32_t height, + CUcontext context) + : plane(width, height * 3, ElemSize(), context) {} + +SurfaceRGB32FPlanar &SurfaceRGB32FPlanar::operator=(const SurfaceRGB32FPlanar &other) { + plane = other.plane; + return *this; +} + +Surface *SurfaceRGB32FPlanar::Clone() { return new SurfaceRGB32FPlanar(*this); } + +Surface *SurfaceRGB32FPlanar::Create() { return new SurfaceRGB32FPlanar; } + +uint32_t SurfaceRGB32FPlanar::Width(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Width(); + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32FPlanar::WidthInBytes(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Width() * plane.ElemSize(); + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32FPlanar::Height(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Height() / 3; + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32FPlanar::Pitch(uint32_t planeNumber) const { + if (planeNumber < NumPlanes()) { + return plane.Pitch(); + } + + throw invalid_argument("Invalid plane number"); +} + +uint32_t SurfaceRGB32FPlanar::HostMemSize() const { + return plane.GetHostMemSize(); +} + +CUdeviceptr SurfaceRGB32FPlanar::PlanePtr(uint32_t planeNumber) { + if (planeNumber < NumPlanes()) { + return plane.GpuMem() + planeNumber * Height() * plane.Pitch(); + } + + throw invalid_argument("Invalid plane number"); +} + +void SurfaceRGB32FPlanar::Update(const SurfacePlane &newPlane) { + plane = newPlane; +} + +bool SurfaceRGB32FPlanar::Update(SurfacePlane *pPlanes, size_t planesNum) { + if (pPlanes && 1 == planesNum && !plane.OwnMemory()) { + plane = *pPlanes; + return true; + } + + return false; +} + +SurfacePlane *SurfaceRGB32FPlanar::GetSurfacePlane(uint32_t planeNumber) { + return planeNumber ? nullptr : &plane; +} diff --git a/PyNvCodec/TC/src/Tasks.cpp b/PyNvCodec/TC/src/Tasks.cpp index ac3d3c00..afdf40ed 100644 --- a/PyNvCodec/TC/src/Tasks.cpp +++ b/PyNvCodec/TC/src/Tasks.cpp @@ -1,6 +1,7 @@ /* * Copyright 2019 NVIDIA Corporation * Copyright 2021 Videonetics Technology Private Limited + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -245,7 +246,7 @@ struct NvdecDecodeFrame_Impl { : stream(cuStream), context(cuContext), nvDecoder(cuStream, cuContext, videoCodec) { pLastSurface = Surface::Make(format); - pPacketData = Buffer::MakeOwnMem(sizeof(PacketData));; + pPacketData = Buffer::MakeOwnMem(sizeof(PacketData)); } ~NvdecDecodeFrame_Impl() { @@ -390,6 +391,10 @@ auto const format_name = [](Pixel_Format format) { return "YCBCR"; case YUV444: return "YUV444"; + case RGB_32F: + return "RGB_32F"; + case RGB_32F_PLANAR: + return "RGB_32F_PLANAR"; default: ss << format; return ss.str().c_str(); @@ -409,6 +414,9 @@ static size_t GetElemSize(Pixel_Format format) { case BGR: case Y: return sizeof(uint8_t); + case RGB_32F: + case RGB_32F_PLANAR: + return sizeof(float); default: ss << __FUNCTION__; ss << ": unsupported pixel format: " << format_name(format); @@ -514,7 +522,9 @@ struct CudaDownloadSurface_Impl { bufferSize = bufferSize * 3U / 2U; } else if (RGB == _pix_fmt || RGB_PLANAR == _pix_fmt || BGR == _pix_fmt || - YUV444 == _pix_fmt) { + YUV444 == _pix_fmt || + RGB_32F == _pix_fmt || + RGB_32F_PLANAR == _pix_fmt) { bufferSize = bufferSize * 3U; } else if (Y == _pix_fmt) { } else { @@ -878,6 +888,114 @@ struct NppResizeSurfacePlanar_Impl final : ResizeSurface_Impl { } }; +struct NppResizeSurfacePacked32F3C_Impl final : ResizeSurface_Impl { + NppResizeSurfacePacked32F3C_Impl(uint32_t width, uint32_t height, CUcontext ctx, + CUstream str, Pixel_Format format) + : ResizeSurface_Impl(width, height, format, ctx, str) { + pSurface = Surface::Make(format, width, height, ctx); + } + + ~NppResizeSurfacePacked32F3C_Impl() { delete pSurface; } + + TaskExecStatus Run(Surface &source) { + NvtxMark tick(__FUNCTION__); + + if (pSurface->PixelFormat() != source.PixelFormat()) { + return TaskExecStatus::TASK_EXEC_FAIL; + } + + auto srcPlane = source.GetSurfacePlane(); + auto dstPlane = pSurface->GetSurfacePlane(); + + const Npp32f *pSrc = (const Npp32f *)srcPlane->GpuMem(); + int nSrcStep = (int)source.Pitch(); + NppiSize oSrcSize = {0}; + oSrcSize.width = source.Width(); + oSrcSize.height = source.Height(); + NppiRect oSrcRectROI = {0}; + oSrcRectROI.width = oSrcSize.width; + oSrcRectROI.height = oSrcSize.height; + + Npp32f *pDst = (Npp32f *)dstPlane->GpuMem(); + int nDstStep = (int)pSurface->Pitch(); + NppiSize oDstSize = {0}; + oDstSize.width = pSurface->Width(); + oDstSize.height = pSurface->Height(); + NppiRect oDstRectROI = {0}; + oDstRectROI.width = oDstSize.width; + oDstRectROI.height = oDstSize.height; + int eInterpolation = NPPI_INTER_LANCZOS; + + CudaCtxPush ctxPush(cu_ctx); + auto ret = nppiResize_32f_C3R_Ctx(pSrc, nSrcStep, oSrcSize, oSrcRectROI, + pDst, nDstStep, oDstSize, oDstRectROI, + eInterpolation, nppCtx); + if (NPP_NO_ERROR != ret) { + cerr << "Can't resize 3-channel packed image. Error code: " << ret + << endl; + return TASK_EXEC_FAIL; + } + + return TASK_EXEC_SUCCESS; + } +}; + +// Resize planar 8 bit surface (YUV420, YCbCr420); +struct NppResizeSurface32FPlanar_Impl final : ResizeSurface_Impl { + NppResizeSurface32FPlanar_Impl(uint32_t width, uint32_t height, CUcontext ctx, + CUstream str, Pixel_Format format) + : ResizeSurface_Impl(width, height, format, ctx, str) { + pSurface = Surface::Make(format, width, height, ctx); + } + + ~NppResizeSurface32FPlanar_Impl() { delete pSurface; } + + TaskExecStatus Run(Surface &source) { + NvtxMark tick(__FUNCTION__); + + if (pSurface->PixelFormat() != source.PixelFormat()) { + cerr << "Actual pixel format is " << source.PixelFormat() << endl; + cerr << "Expected input format is " << pSurface->PixelFormat() << endl; + return TaskExecStatus::TASK_EXEC_FAIL; + } + + for (auto plane = 0; plane < pSurface->NumPlanes(); plane++) { + auto srcPlane = source.GetSurfacePlane(plane); + auto dstPlane = pSurface->GetSurfacePlane(plane); + + const Npp32f *pSrc = (const Npp32f *)srcPlane->GpuMem(); + int nSrcStep = (int)srcPlane->Pitch(); + NppiSize oSrcSize = {0}; + oSrcSize.width = srcPlane->Width(); + oSrcSize.height = srcPlane->Height(); + NppiRect oSrcRectROI = {0}; + oSrcRectROI.width = oSrcSize.width; + oSrcRectROI.height = oSrcSize.height; + + Npp32f *pDst = (Npp32f *)dstPlane->GpuMem(); + int nDstStep = (int)dstPlane->Pitch(); + NppiSize oDstSize = {0}; + oDstSize.width = dstPlane->Width(); + oDstSize.height = dstPlane->Height(); + NppiRect oDstRectROI = {0}; + oDstRectROI.width = oDstSize.width; + oDstRectROI.height = oDstSize.height; + int eInterpolation = NPPI_INTER_LANCZOS; + + CudaCtxPush ctxPush(cu_ctx); + auto ret = nppiResize_32f_C1R_Ctx(pSrc, nSrcStep, oSrcSize, oSrcRectROI, + pDst, nDstStep, oDstSize, oDstRectROI, + eInterpolation, nppCtx); + if (NPP_NO_ERROR != ret) { + cerr << "NPP error with code " << ret << endl; + return TASK_EXEC_FAIL; + } + } + + return TASK_EXEC_SUCCESS; + } +}; + }; // namespace VPF auto const cuda_stream_sync = [](void *stream) { @@ -892,6 +1010,10 @@ ResizeSurface::ResizeSurface(uint32_t width, uint32_t height, pImpl = new NppResizeSurfacePacked3C_Impl(width, height, ctx, str, format); } else if (YUV420 == format || YCBCR == format || YUV444 == format || RGB_PLANAR == format) { pImpl = new NppResizeSurfacePlanar_Impl(width, height, ctx, str, format); + } else if (RGB_32F == format) { + pImpl = new NppResizeSurfacePacked32F3C_Impl(width, height, ctx, str, format); + } else if (RGB_32F_PLANAR == format) { + pImpl = new NppResizeSurface32FPlanar_Impl(width, height, ctx, str, format); } else { stringstream ss; ss << __FUNCTION__; diff --git a/PyNvCodec/TC/src/TasksColorCvt.cpp b/PyNvCodec/TC/src/TasksColorCvt.cpp index 76efae82..d1dc5d49 100644 --- a/PyNvCodec/TC/src/TasksColorCvt.cpp +++ b/PyNvCodec/TC/src/TasksColorCvt.cpp @@ -852,6 +852,98 @@ struct bgr_rgb final : public NppConvertSurface_Impl { } Surface *pSurface = nullptr; }; +struct rbg8_rgb32f final : public NppConvertSurface_Impl { + rbg8_rgb32f(uint32_t width, uint32_t height, CUcontext context, + CUstream stream) + : NppConvertSurface_Impl(context, stream) { + pSurface = Surface::Make(RGB_32F, width, height, context); + } + + ~rbg8_rgb32f() { delete pSurface; } + + Token *Execute(Token *pInput, ColorspaceConversionContext *pCtx) override { + if (!pInput) { + return nullptr; + } + + auto pInputRGB8 = (SurfaceRGB *)pInput; + if (RGB != pInputRGB8->PixelFormat()) { + return nullptr; + } + + const Npp8u *pSrc = (const Npp8u *)pInputRGB8->PlanePtr(); + + int nSrcStep = pInputRGB8->Pitch(); + Npp32f *pDst = (Npp32f *)pSurface->PlanePtr(); + int nDstStep = pSurface->Pitch(); + NppiSize oSizeRoi = {0}; + oSizeRoi.height = pSurface->Height(); + oSizeRoi.width = pSurface->Width(); + Npp32f nMin = 0.0; + Npp32f nMax = 1.0; + // rgb8 to rgb32f + const int aDstOrder[3] = {2, 1, 0}; + + CudaCtxPush ctxPush(cu_ctx); + + // auto err = nppiConvert_8u32f_C3R_Ctx(pSrc, nSrcStep, pDst, nDstStep, + // oSizeRoi, nppCtx); + auto err = nppiScale_8u32f_C3R_Ctx(pSrc, nSrcStep, pDst, nDstStep, + oSizeRoi, nMin, nMax, nppCtx); + if (NPP_NO_ERROR != err) { + cerr << "Failed to convert surface. Error code: " << err << endl; + std::cout << "in height " << pInputRGB8->Height() << " out height " << pSurface->Height() << " in width_in_bytes: " << pInputRGB8->WidthInBytes() << " nSrcStep: " << nSrcStep << " nDstStep: " << nDstStep << std::endl; + std::cout << "in width " << pInputRGB8->Width() << " out width " << pSurface->Width() << " out width_in_bytes: " << pSurface->WidthInBytes() << " element size: " << pSurface->ElemSize() << std::endl; + return nullptr; + } + + return pSurface; + } + Surface *pSurface = nullptr; +}; + +struct rgb32f_deinterleave final : public NppConvertSurface_Impl { + rgb32f_deinterleave(uint32_t width, uint32_t height, CUcontext context, + CUstream stream) + : NppConvertSurface_Impl(context, stream) { + pSurface = Surface::Make(RGB_32F_PLANAR, width, height, context); + } + + ~rgb32f_deinterleave() { delete pSurface; } + + Token *Execute(Token *pInput, ColorspaceConversionContext *pCtx) override { + auto pInputRGB_32F = (SurfaceRGB *)pInput; + + if (RGB_32F != pInputRGB_32F->PixelFormat()) { + return nullptr; + } + + const Npp32f *pSrc = (const Npp32f *)pInputRGB_32F->PlanePtr(); + int nSrcStep = pInputRGB_32F->Pitch(); + Npp32f *aDst[] = {(Npp32f *)((uint8_t *)pSurface->PlanePtr()), + (Npp32f *)((uint8_t *)pSurface->PlanePtr() + + pSurface->Height() * pSurface->Pitch()), + (Npp32f *)((uint8_t *)pSurface->PlanePtr() + + pSurface->Height() * pSurface->Pitch() * 2)}; + int nDstStep = pSurface->Pitch(); + NppiSize oSizeRoi = {0}; + oSizeRoi.height = pSurface->Height(); + oSizeRoi.width = pSurface->Width(); + + CudaCtxPush ctxPush(cu_ctx); + auto err = + nppiCopy_32f_C3P3R_Ctx(pSrc, nSrcStep, aDst, nDstStep, oSizeRoi, nppCtx); + if (NPP_NO_ERROR != err) { + cerr << "Failed to convert surface. Error code: " << err << endl; + return nullptr; + } + + return pSurface; + } + + Surface *pSurface = nullptr; +}; + } // namespace VPF auto const cuda_stream_sync = [](void *stream) { @@ -877,24 +969,28 @@ ConvertSurface::ConvertSurface(uint32_t width, uint32_t height, pImpl = new rgb8_interleave(width, height, ctx, str); } else if (RGB_PLANAR == inFormat && YUV444 == outFormat) { pImpl = new rgb_planar_yuv444(width, height, ctx, str); - }else if (YUV420 == inFormat && RGB == outFormat) { + } else if (YUV420 == inFormat && RGB == outFormat) { pImpl = new yuv420_rgb(width, height, ctx, str); } else if (RGB == inFormat && YUV420 == outFormat) { pImpl = new rgb_yuv420(width, height, ctx, str); } else if (RGB == inFormat && YUV444 == outFormat) { pImpl = new rgb_yuv444(width, height, ctx, str); - }else if (BGR == inFormat && YCBCR == outFormat) { + } else if (BGR == inFormat && YCBCR == outFormat) { pImpl = new bgr_ycbcr(width, height, ctx, str); } else if (RGB == inFormat && BGR == outFormat) { pImpl = new rgb_bgr(width, height, ctx, str); } else if (BGR == inFormat && RGB == outFormat) { pImpl = new bgr_rgb(width, height, ctx, str); - }else if (YUV444 == inFormat && BGR == outFormat) { + } else if (YUV444 == inFormat && BGR == outFormat) { pImpl = new yuv444_bgr(width, height, ctx, str); } else if (BGR == inFormat && YUV444 == outFormat) { pImpl = new bgr_yuv444(width, height, ctx, str); } else if (NV12 == inFormat && Y == outFormat) { pImpl = new nv12_y(width, height, ctx, str); + } else if (RGB == inFormat && RGB_32F == outFormat) { + pImpl = new rbg8_rgb32f(width, height, ctx, str); + } else if (RGB_32F == inFormat && RGB_32F_PLANAR == outFormat) { + pImpl = new rgb32f_deinterleave(width, height, ctx, str); } else { stringstream ss; ss << "Unsupported pixel format conversion: " << inFormat << " to " diff --git a/PyNvCodec/inc/PyNvCodec.hpp b/PyNvCodec/inc/PyNvCodec.hpp index a7994b19..6a85ce99 100644 --- a/PyNvCodec/inc/PyNvCodec.hpp +++ b/PyNvCodec/inc/PyNvCodec.hpp @@ -1,6 +1,8 @@ /* * Copyright 2020 NVIDIA Corporation * Copyright 2021 Kognia Sports Intelligence + * Copyright 2021 Videonetics Technology Private Limited + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -78,6 +80,8 @@ class PyFrameUploader { Pixel_Format GetFormat(); std::shared_ptr UploadSingleFrame(py::array_t &frame); + + std::shared_ptr UploadSingleFrame(py::array_t &frame); }; class PySurfaceDownloader { @@ -100,6 +104,8 @@ class PySurfaceDownloader { bool DownloadSingleSurface(std::shared_ptr surface, py::array_t &frame); + bool DownloadSingleSurface(std::shared_ptr surface, + py::array_t &frame); }; class PySurfaceConverter { diff --git a/PyNvCodec/src/PyNvCodec.cpp b/PyNvCodec/src/PyNvCodec.cpp index 5e17a871..a541b16a 100644 --- a/PyNvCodec/src/PyNvCodec.cpp +++ b/PyNvCodec/src/PyNvCodec.cpp @@ -2,6 +2,7 @@ * Copyright 2019 NVIDIA Corporation * Copyright 2021 Kognia Sports Intelligence * Copyright 2021 Videonetics Technology Private Limited + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -216,6 +217,33 @@ PyFrameUploader::UploadSingleFrame(py::array_t &frame) { return shared_ptr(pSurface->Clone()); } +/* Will upload numpy array to GPU; + * Surface returned is valid untill next call; + */ +shared_ptr +PyFrameUploader::UploadSingleFrame(py::array_t &frame) { + /* Upload to GPU; + */ + auto pRawFrame = Buffer::Make(frame.size() * sizeof(float), frame.mutable_data()); + uploader->SetInput(pRawFrame, 0U); + auto res = uploader->Execute(); + delete pRawFrame; + + if (TASK_EXEC_FAIL == res) { + throw runtime_error("Error uploading frame to GPU"); + } + + /* Get surface; + */ + auto pSurface = (Surface *)uploader->GetOutput(0U); + if (!pSurface) { + throw runtime_error("Error uploading frame to GPU"); + } + + return shared_ptr(pSurface->Clone()); +} + + PySurfaceDownloader::PySurfaceDownloader(uint32_t width, uint32_t height, Pixel_Format format, uint32_t gpu_ID) { surfaceWidth = width; @@ -262,6 +290,26 @@ bool PySurfaceDownloader::DownloadSingleSurface(shared_ptr surface, return false; } +bool PySurfaceDownloader::DownloadSingleSurface(shared_ptr surface, + py::array_t &frame) { + upDownloader->SetInput(surface.get(), 0U); + if (TASK_EXEC_FAIL == upDownloader->Execute()) { + return false; + } + + auto *pRawFrame = (Buffer *)upDownloader->GetOutput(0U); + if (pRawFrame) { + auto const downloadSize = pRawFrame->GetRawMemSize(); + if (downloadSize != frame.size() * sizeof(float)) { + frame.resize({downloadSize}, false); + } + memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), downloadSize); + return true; + } + + return false; +} + PySurfaceConverter::PySurfaceConverter(uint32_t width, uint32_t height, Pixel_Format inFormat, Pixel_Format outFormat, uint32_t gpuID) @@ -1637,6 +1685,8 @@ PYBIND11_MODULE(PyNvCodec, m) .value("YCBCR", Pixel_Format::YCBCR) .value("YUV444", Pixel_Format::YUV444) .value("UNDEFINED", Pixel_Format::UNDEFINED) + .value("RGB_32F", Pixel_Format::RGB_32F) + .value("RGB_32F_PLANAR", Pixel_Format::RGB_32F_PLANAR) .export_values(); py::enum_(m, "ColorSpace") @@ -2075,7 +2125,14 @@ PYBIND11_MODULE(PyNvCodec, m) .def(py::init()) .def(py::init()) .def("Format", &PyFrameUploader::GetFormat) - .def("UploadSingleFrame", &PyFrameUploader::UploadSingleFrame, + .def("UploadSingleFrame", + py::overload_cast&>(&PyFrameUploader::UploadSingleFrame), + py::arg("frame").noconvert(true), + py::return_value_policy::take_ownership, + py::call_guard()) + .def("UploadSingleFrame", + py::overload_cast&>(&PyFrameUploader::UploadSingleFrame), + py::arg("frame").noconvert(true), py::return_value_policy::take_ownership, py::call_guard()); @@ -2084,8 +2141,14 @@ PYBIND11_MODULE(PyNvCodec, m) .def(py::init()) .def("Format", &PySurfaceDownloader::GetFormat) .def("DownloadSingleSurface", - &PySurfaceDownloader::DownloadSingleSurface, - py::call_guard()); + py::overload_cast, py::array_t &>( + &PySurfaceDownloader::DownloadSingleSurface), + py::arg("surface"), py::arg("frame").noconvert(true)) + .def("DownloadSingleSurface", + py::overload_cast, py::array_t &>( + &PySurfaceDownloader::DownloadSingleSurface), + py::arg("surface"), py::arg("frame").noconvert(true), + py::call_guard()); py::class_(m, "PySurfaceConverter") .def(py::init()) From b896bef16a58e1183bcaa4406bd6b5024e890e50 Mon Sep 17 00:00:00 2001 From: Roman Arzumanyan Date: Fri, 17 Sep 2021 12:32:24 +0300 Subject: [PATCH 2/2] Removing excessive debug output --- PyNvCodec/TC/src/TasksColorCvt.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/PyNvCodec/TC/src/TasksColorCvt.cpp b/PyNvCodec/TC/src/TasksColorCvt.cpp index d1dc5d49..fed05773 100644 --- a/PyNvCodec/TC/src/TasksColorCvt.cpp +++ b/PyNvCodec/TC/src/TasksColorCvt.cpp @@ -881,19 +881,14 @@ struct rbg8_rgb32f final : public NppConvertSurface_Impl { oSizeRoi.width = pSurface->Width(); Npp32f nMin = 0.0; Npp32f nMax = 1.0; - // rgb8 to rgb32f const int aDstOrder[3] = {2, 1, 0}; CudaCtxPush ctxPush(cu_ctx); - // auto err = nppiConvert_8u32f_C3R_Ctx(pSrc, nSrcStep, pDst, nDstStep, - // oSizeRoi, nppCtx); auto err = nppiScale_8u32f_C3R_Ctx(pSrc, nSrcStep, pDst, nDstStep, oSizeRoi, nMin, nMax, nppCtx); if (NPP_NO_ERROR != err) { cerr << "Failed to convert surface. Error code: " << err << endl; - std::cout << "in height " << pInputRGB8->Height() << " out height " << pSurface->Height() << " in width_in_bytes: " << pInputRGB8->WidthInBytes() << " nSrcStep: " << nSrcStep << " nDstStep: " << nDstStep << std::endl; - std::cout << "in width " << pInputRGB8->Width() << " out width " << pSurface->Width() << " out width_in_bytes: " << pSurface->WidthInBytes() << " element size: " << pSurface->ElemSize() << std::endl; return nullptr; }