diff --git a/setup.py b/setup.py index b27f89d4..55b8fa99 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # , "PyOpenGL-accelerate" # does not compile on 3.10 "dev": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", f"PytorchNvCodec @ file://{os.getcwd()}/src/PytorchNvCodec/"], "samples": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", "tqdm", PytorchNvCodec], - "tests": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", PytorchNvCodec], + "tests": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "pycuda", "pydantic", PytorchNvCodec], "torch": ["torch", "torchvision", PytorchNvCodec], "tensorrt": ["torch", "torchvision", PytorchNvCodec], }, diff --git a/src/PyNvCodec/__init__.pyi b/src/PyNvCodec/__init__.pyi index 13bac71b..6cbc6ff3 100644 --- a/src/PyNvCodec/__init__.pyi +++ b/src/PyNvCodec/__init__.pyi @@ -1,15 +1,18 @@ -from typing import Any, ClassVar, Dict +from typing import Any, ClassVar, Dict, Tuple from typing import overload import numpy ASYNC_ENCODE_SUPPORT: NV_ENC_CAPS BGR: PixelFormat BIT_DEPTH_MINUS_8: NV_DEC_CAPS +BIT_DEPTH_NOT_SUPPORTED: TaskExecInfo BT_601: ColorSpace BT_709: ColorSpace DYNAMIC_QUERY_ENCODER_CAPACITY: NV_ENC_CAPS +END_OF_STREAM: TaskExecInfo EXACT_FRAME: SeekMode EXPOSED_COUNT: NV_ENC_CAPS +FAIL: TaskExecInfo H264: CudaVideoCodec HEIGHT_MAX: NV_ENC_CAPS HEIGHT_MIN: NV_ENC_CAPS @@ -28,6 +31,7 @@ MB_NUM_MAX: NV_ENC_CAPS MB_PER_SEC_MAX: NV_ENC_CAPS MIN_HEIGHT: NV_DEC_CAPS MIN_WIDTH: NV_DEC_CAPS +MORE_DATA_NEEDED: TaskExecInfo MPEG: ColorRange NO_PTS: int NUM_MAX_BFRAMES: NV_ENC_CAPS @@ -44,6 +48,7 @@ RGB_32F: PixelFormat RGB_32F_PLANAR: PixelFormat RGB_PLANAR: PixelFormat SEPARATE_COLOUR_PLANE: NV_ENC_CAPS +SUCCESS: TaskExecInfo SUPPORTED_RATECONTROL_MODES: NV_ENC_CAPS SUPPORT_10BIT_ENCODE: NV_ENC_CAPS SUPPORT_ADAPTIVE_TRANSFORM: NV_ENC_CAPS @@ -351,15 +356,18 @@ class PyFFmpegDemuxer: class PyFfmpegDecoder: def __init__(self, input: str, opts: Dict[str,str], gpu_id: int = ...) -> None: ... + def AvgFramerate(self) -> float: ... def Codec(self) -> CudaVideoCodec: ... def ColorRange(self) -> ColorRange: ... def ColorSpace(self) -> ColorSpace: ... - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ... def DecodeSingleSurface(self, *args, **kwargs) -> Any: ... def Format(self) -> PixelFormat: ... def Framerate(self) -> float: ... def GetMotionVectors(self) -> numpy.ndarray[MotionVector]: ... def Height(self) -> int: ... + def Numframes(self) -> int: ... + def Timebase(self) -> float: ... def Width(self) -> int: ... class PyFrameUploader: @@ -388,33 +396,33 @@ class PyNvDecoder: def ColorRange(self) -> ColorRange: ... def ColorSpace(self) -> ColorSpace: ... @overload - def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8]) -> bool: ... + def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8]) -> bool: ... + def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ... + def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ... + def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> Tuple[bool,TaskExecInfo]: ... @overload - def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> bool: ... + def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ... def DecodeSingleSurface(self, *args, **kwargs) -> Any: ... def DecodeSurfaceFromPacket(self, enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData, bool_nvcv_check: bool) -> object: ... @overload - def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ... + def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ... @overload - def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ... + def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ... def FlushSingleSurface(self, *args, **kwargs) -> Any: ... def Format(self) -> PixelFormat: ... def Framerate(self) -> float: ... @@ -582,5 +590,30 @@ class SurfacePlane: def Pitch(self) -> int: ... def Width(self) -> int: ... +class TaskExecDetails: + info: TaskExecInfo + def __init__(self) -> None: ... + +class TaskExecInfo: + __members__: ClassVar[dict] = ... # read-only + BIT_DEPTH_NOT_SUPPORTED: ClassVar[TaskExecInfo] = ... + END_OF_STREAM: ClassVar[TaskExecInfo] = ... + FAIL: ClassVar[TaskExecInfo] = ... + MORE_DATA_NEEDED: ClassVar[TaskExecInfo] = ... + SUCCESS: ClassVar[TaskExecInfo] = ... + __entries: ClassVar[dict] = ... + def __init__(self, value: int) -> None: ... + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: int) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + def GetNumGpus() -> int: ... def GetNvencParams() -> Dict[str,str]: ... diff --git a/src/PyNvCodec/inc/PyNvCodec.hpp b/src/PyNvCodec/inc/PyNvCodec.hpp index 1fbaabd4..b1ee6e04 100644 --- a/src/PyNvCodec/inc/PyNvCodec.hpp +++ b/src/PyNvCodec/inc/PyNvCodec.hpp @@ -287,14 +287,17 @@ class PyFfmpegDecoder { const std::map &ffmpeg_options, uint32_t gpuID); - bool DecodeSingleFrame(py::array_t &frame); - std::shared_ptr DecodeSingleSurface(); + bool DecodeSingleFrame(py::array_t &frame, TaskExecDetails& details); + std::shared_ptr DecodeSingleSurface(TaskExecDetails& details); py::array_t GetMotionVectors(); uint32_t Width() const; uint32_t Height() const; double Framerate() const; + double AvgFramerate() const; + double Timebase() const; + uint32_t Numframes() const; ColorSpace Color_Space() const; ColorRange Color_Range() const; cudaVideoCodec Codec() const; @@ -343,12 +346,11 @@ class PyNvDecoder { const std::map &ffmpeg_options): PyNvDecoder(pathToFile, (CUcontext)ctx, (CUstream)str, ffmpeg_options){} - static Buffer *getElementaryVideo(DemuxFrame *demuxer, - SeekContext *seek_ctx, bool needSEI); + Buffer* getElementaryVideo(SeekContext* seek_ctx, TaskExecDetails& details, + bool needSEI); - static Surface *getDecodedSurface(NvdecDecodeFrame *decoder, - DemuxFrame *demuxer, - SeekContext *seek_ctx, bool needSEI); + Surface* getDecodedSurface(SeekContext* seek_ctx, TaskExecDetails& details, + bool needSEI); uint32_t Width() const; @@ -374,13 +376,14 @@ class PyNvDecoder { Pixel_Format GetPixelFormat() const; - bool DecodeSurface(class DecodeContext &ctx); + bool DecodeSurface(class DecodeContext& ctx, TaskExecDetails& details); - bool DecodeFrame(class DecodeContext &ctx, py::array_t& frame); + bool DecodeFrame(class DecodeContext& ctx, TaskExecDetails& details, + py::array_t& frame); - Surface *getDecodedSurfaceFromPacket(const py::array_t *pPacket, - const PacketData *p_packet_data = nullptr, - bool no_eos = false); + Surface* getDecodedSurfaceFromPacket( + const py::array_t* pPacket, TaskExecDetails& details, + const PacketData* p_packet_data = nullptr, bool no_eos = false); void DownloaderLazyInit(); diff --git a/src/PyNvCodec/src/PyFFMpegDecoder.cpp b/src/PyNvCodec/src/PyFFMpegDecoder.cpp index 08043a2a..741cbff8 100644 --- a/src/PyNvCodec/src/PyFFMpegDecoder.cpp +++ b/src/PyNvCodec/src/PyFFMpegDecoder.cpp @@ -34,11 +34,15 @@ PyFfmpegDecoder::PyFfmpegDecoder(const string& pathToFile, upDecoder.reset(FfmpegDecodeFrame::Make(pathToFile.c_str(), cli_iface)); } -bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t& frame) +bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t& frame, + TaskExecDetails& details) { UpdateState(); - if (TASK_EXEC_SUCCESS == upDecoder->Execute()) { + auto ret = upDecoder->Execute(); + details = upDecoder->GetLastExecDetails(); + + if (TASK_EXEC_SUCCESS == ret) { auto pRawFrame = (Buffer*)upDecoder->GetOutput(0U); if (pRawFrame) { auto const frame_size = pRawFrame->GetRawMemSize(); @@ -46,7 +50,7 @@ bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t& frame) frame.resize({frame_size}, false); } - memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), frame_size); + memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), frame_size); return true; } } @@ -54,13 +58,14 @@ bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t& frame) return false; } -std::shared_ptr PyFfmpegDecoder::DecodeSingleSurface() +std::shared_ptr +PyFfmpegDecoder::DecodeSingleSurface(TaskExecDetails& details) { py::array_t frame; std::shared_ptr p_surf = nullptr; UploaderLazyInit(); - if (DecodeSingleFrame(frame)) { + if (DecodeSingleFrame(frame, details)) { p_surf = upUploader->UploadSingleFrame(frame); } @@ -188,6 +193,27 @@ cudaVideoCodec PyFfmpegDecoder::Codec() const return params.videoContext.codec; }; +double PyFfmpegDecoder::AvgFramerate() const +{ + MuxingParams params; + upDecoder->GetParams(params); + return params.videoContext.avgFrameRate; +}; + +double PyFfmpegDecoder::Timebase() const +{ + MuxingParams params; + upDecoder->GetParams(params); + return params.videoContext.timeBase; +}; + +uint32_t PyFfmpegDecoder::Numframes() const +{ + MuxingParams params; + upDecoder->GetParams(params); + return params.videoContext.num_frames; +}; + Pixel_Format PyFfmpegDecoder::PixelFormat() const { MuxingParams params; @@ -197,7 +223,7 @@ Pixel_Format PyFfmpegDecoder::PixelFormat() const void Init_PyFFMpegDecoder(py::module& m) { - py::class_(m, "PyFfmpegDecoder") + py::class_>(m, "PyFfmpegDecoder") .def(py::init&, uint32_t>(), py::arg("input"), py::arg("opts"), py::arg("gpu_id") = 0, R"pbdoc( @@ -206,22 +232,33 @@ void Init_PyFFMpegDecoder(py::module& m) :param input: path to input file :param opts: AVDictionary options that will be passed to AVFormat context. )pbdoc") - .def("DecodeSingleFrame", &PyFfmpegDecoder::DecodeSingleFrame, - py::arg("frame"), py::call_guard(), - R"pbdoc( + .def( + "DecodeSingleFrame", + [](shared_ptr self, py::array_t& frame) { + TaskExecDetails details; + auto res = self->DecodeSingleFrame(frame, details); + return std::make_tuple(res, details.info); + }, + py::arg("frame"), py::call_guard(), + R"pbdoc( Decode single video frame from input file. :param frame: decoded video frame - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") - .def("DecodeSingleSurface", &PyFfmpegDecoder::DecodeSingleSurface, - py::return_value_policy::take_ownership, - py::call_guard(), - R"pbdoc( + .def( + "DecodeSingleSurface", + [](shared_ptr self) { + TaskExecDetails details; + auto res = self->DecodeSingleSurface(details); + return std::make_tuple(res, details.info); + }, + py::return_value_policy::take_ownership, + py::call_guard(), + R"pbdoc( Decode single video frame from input file and upload to GPU memory. - :return: Surface allocated in GPU memory. It's Empty() in case of failure, - non-empty otherwise. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def("GetMotionVectors", &PyFfmpegDecoder::GetMotionVectors, py::return_value_policy::move, @@ -246,6 +283,19 @@ void Init_PyFFMpegDecoder(py::module& m) .def("Framerate", &PyFfmpegDecoder::Framerate, R"pbdoc( Return encoded video file framerate. + )pbdoc") + .def("AvgFramerate", &PyFfmpegDecoder::AvgFramerate, + R"pbdoc( + Return encoded video file average framerate. + )pbdoc") + .def("Timebase", &PyFfmpegDecoder::Timebase, + R"pbdoc( + Return encoded video file time base. + )pbdoc") + .def("Numframes", &PyFfmpegDecoder::Numframes, + R"pbdoc( + Return number of video frames in encoded video file. + Please note that some video containers doesn't store this infomation. )pbdoc") .def("ColorSpace", &PyFfmpegDecoder::Color_Space, R"pbdoc( diff --git a/src/PyNvCodec/src/PyNvCodec.cpp b/src/PyNvCodec/src/PyNvCodec.cpp index 241a7f2b..c6ee88b0 100644 --- a/src/PyNvCodec/src/PyNvCodec.cpp +++ b/src/PyNvCodec/src/PyNvCodec.cpp @@ -237,6 +237,14 @@ PYBIND11_MODULE(_PyNvCodec, m) .value("P12", Pixel_Format::P12) .export_values(); + py::enum_(m, "TaskExecInfo") + .value("FAIL", TaskExecInfo::FAIL) + .value("SUCCESS", TaskExecInfo::SUCCESS) + .value("END_OF_STREAM", TaskExecInfo::END_OF_STREAM) + .value("MORE_DATA_NEEDED", TaskExecInfo::MORE_DATA_NEEDED) + .value("BIT_DEPTH_NOT_SUPPORTED", TaskExecInfo::BIT_DEPTH_NOT_SUPPORTED) + .export_values(); + py::enum_(m, "ColorSpace") .value("BT_601", ColorSpace::BT_601) .value("BT_709", ColorSpace::BT_709) @@ -328,6 +336,10 @@ PYBIND11_MODULE(_PyNvCodec, m) return ss.str(); }); + py::class_>(m, "TaskExecDetails") + .def(py::init<>()) + .def_readwrite("info", &TaskExecDetails::info); + py::class_>( m, "ColorspaceConversionContext") diff --git a/src/PyNvCodec/src/PyNvDecoder.cpp b/src/PyNvCodec/src/PyNvDecoder.cpp index cc3f4895..771ff4ae 100644 --- a/src/PyNvCodec/src/PyNvDecoder.cpp +++ b/src/PyNvCodec/src/PyNvDecoder.cpp @@ -109,39 +109,36 @@ PyNvDecoder::PyNvDecoder(uint32_t width, uint32_t height, height, format)); } -Buffer* PyNvDecoder::getElementaryVideo(DemuxFrame* demuxer, - SeekContext* seek_ctx, bool needSEI) +Buffer* PyNvDecoder::getElementaryVideo(SeekContext* seek_ctx, + TaskExecDetails& details, bool needSEI) { - Buffer* elementaryVideo = nullptr; - Buffer* pktData = nullptr; shared_ptr pSeekCtxBuf = nullptr; - do { // Set 1st demuxer input to any non-zero value if we need SEI; if (needSEI) { - demuxer->SetInput((Token*)0xdeadbeefull, 0U); + upDemuxer->SetInput((Token*)0xdeadbeefull, 0U); } // Set 2nd demuxer input to seek context if we need to seek; if (seek_ctx && seek_ctx->use_seek) { - pSeekCtxBuf = - shared_ptr(Buffer::MakeOwnMem(sizeof(SeekContext), seek_ctx)); - demuxer->SetInput((Token*)pSeekCtxBuf.get(), 1U); + pSeekCtxBuf.reset(Buffer::MakeOwnMem(sizeof(SeekContext), seek_ctx)); + upDemuxer->SetInput((Token*)pSeekCtxBuf.get(), 1U); } - if (TASK_EXEC_FAIL == demuxer->Execute()) { + + if (TASK_EXEC_FAIL == upDemuxer->Execute()) { + details = upDemuxer->GetLastExecDetails(); return nullptr; } - elementaryVideo = (Buffer*)demuxer->GetOutput(0U); /* Clear inputs and set down seek flag or we will seek * for one and the same frame multiple times. */ if (seek_ctx) { seek_ctx->use_seek = false; } - demuxer->ClearInputs(); - } while (!elementaryVideo); + upDemuxer->ClearInputs(); + } while (!upDemuxer->GetOutput(0U)); - auto pktDataBuf = (Buffer*)demuxer->GetOutput(3U); + auto pktDataBuf = (Buffer*)upDemuxer->GetOutput(3U); if (pktDataBuf) { auto pPktData = pktDataBuf->GetDataAs(); if (seek_ctx) { @@ -150,37 +147,39 @@ Buffer* PyNvDecoder::getElementaryVideo(DemuxFrame* demuxer, } } - return elementaryVideo; + return (Buffer*)upDemuxer->GetOutput(0U); }; -Surface* PyNvDecoder::getDecodedSurface(NvdecDecodeFrame* decoder, - DemuxFrame* demuxer, - SeekContext* seek_ctx, bool needSEI) +Surface* PyNvDecoder::getDecodedSurface(SeekContext* seek_ctx, + TaskExecDetails& details, bool needSEI) { - decoder->ClearInputs(); - decoder->ClearOutputs(); + upDecoder->ClearInputs(); + upDecoder->ClearOutputs(); Surface* surface = nullptr; do { - auto elementaryVideo = getElementaryVideo(demuxer, seek_ctx, needSEI); - auto pktData = (Buffer*)demuxer->GetOutput(3U); + auto elementaryVideo = getElementaryVideo(seek_ctx, details, needSEI); + auto pktData = (Buffer*)upDemuxer->GetOutput(3U); - decoder->SetInput(elementaryVideo, 0U); - decoder->SetInput(pktData, 1U); - if (TASK_EXEC_FAIL == decoder->Execute()) { + upDecoder->SetInput(elementaryVideo, 0U); + upDecoder->SetInput(pktData, 1U); + + auto ret = upDecoder->Execute(); + details = upDecoder->GetLastExecDetails(); + + if (TASK_EXEC_FAIL == ret) { break; } - surface = (Surface*)decoder->GetOutput(0U); + surface = (Surface*)upDecoder->GetOutput(0U); } while (!surface); return surface; }; -Surface* -PyNvDecoder::getDecodedSurfaceFromPacket(const py::array_t* pPacket, - const PacketData* p_packet_data, - bool no_eos) +Surface* PyNvDecoder::getDecodedSurfaceFromPacket( + const py::array_t* pPacket, TaskExecDetails& details, + const PacketData* p_packet_data, bool no_eos) { upDecoder->ClearInputs(); upDecoder->ClearOutputs(); @@ -206,6 +205,7 @@ PyNvDecoder::getDecodedSurfaceFromPacket(const py::array_t* pPacket, upDecoder->SetInput(elementaryVideo ? elementaryVideo.get() : nullptr, 0U); if (TASK_EXEC_FAIL == upDecoder->Execute()) { + details = upDecoder->GetLastExecDetails(); return nullptr; } @@ -471,7 +471,7 @@ bool PyNvDecoder::IsResolutionChanged() return false; } -bool PyNvDecoder::DecodeSurface(DecodeContext& ctx) +bool PyNvDecoder::DecodeSurface(DecodeContext& ctx, TaskExecDetails &details) { if (!upDemuxer && !ctx.IsStandalone() && !ctx.IsFlush()) { @@ -507,7 +507,7 @@ bool PyNvDecoder::DecodeSurface(DecodeContext& ctx) Surface* p_surf = nullptr; do { try { - p_surf = getDecodedSurfaceFromPacket(nullptr, nullptr); + p_surf = getDecodedSurfaceFromPacket(nullptr, details, nullptr); } catch (decoder_error& dec_exc) { dec_error = true; cerr << dec_exc.what() << endl; @@ -527,13 +527,12 @@ bool PyNvDecoder::DecodeSurface(DecodeContext& ctx) do { try { if (ctx.IsFlush()) { - pRawSurf = getDecodedSurfaceFromPacket(nullptr, nullptr); + pRawSurf = getDecodedSurfaceFromPacket(nullptr, details, nullptr); } else if (ctx.IsStandalone()) { pRawSurf = - getDecodedSurfaceFromPacket(ctx.GetPacket(), ctx.GetInPacketData()); + getDecodedSurfaceFromPacket(ctx.GetPacket(), details, ctx.GetInPacketData()); } else { - pRawSurf = getDecodedSurface(upDecoder.get(), upDemuxer.get(), - ctx.GetSeekContextMutable(), ctx.HasSEI()); + pRawSurf = getDecodedSurface(ctx.GetSeekContextMutable(), details, ctx.HasSEI()); } if (!pRawSurf) { @@ -649,9 +648,10 @@ void PyNvDecoder::DownloaderLazyInit() } bool PyNvDecoder::DecodeFrame(class DecodeContext& ctx, + TaskExecDetails& details, py::array_t& frame) { - if (!DecodeSurface(ctx)) + if (!DecodeSurface(ctx, details)) return false; DownloaderLazyInit(); @@ -819,51 +819,49 @@ void Init_PyNvDecoder(py::module& m) R"pbdoc( Return dictionary with Nvdec capabilities. )pbdoc") - .def( - "DecodeSurfaceFromPacket", - [](shared_ptr self, PacketData& in_pkt_data, + .def( + "DecodeSurfaceFromPacket", + [](shared_ptr self, PacketData& in_pkt_data, py::array_t& packet, PacketData& out_pkt_data, bool bOutputNVCVImage) -> py::object { - - if (!bOutputNVCVImage) { + TaskExecDetails details; + if (!bOutputNVCVImage) { std::cout << "Please set value of bOutputNVCVImage to true" << std::endl; return py::cast(Py_None); - } - shared_ptr outputSurface; - DecodeContext ctx(nullptr, &packet, &in_pkt_data, &out_pkt_data, + } + shared_ptr outputSurface; + DecodeContext ctx(nullptr, &packet, &in_pkt_data, &out_pkt_data, nullptr, false); - if (self->DecodeSurface(ctx)) { - outputSurface = ctx.GetSurfaceMutable(); - } else { - outputSurface = make_empty_surface(self->GetPixelFormat()); - } - py::object scope = py::module_::import("__main__").attr("__dict__"); - py::dict globals = py::globals(); - auto locals = py::dict(); - - locals["getNumPlanes"] = - py::cpp_function( - [&]() -> int - { return outputSurface->NumPlanes(); }); - locals["getWidthByPlaneIdx"] = - py::cpp_function( - [&](int PlaneIdx) -> int - { return outputSurface->Width(PlaneIdx); }); - locals["getHeightByPlaneIdx"] = - py::cpp_function([&](int PlaneIdx) -> int { - return outputSurface->Height(PlaneIdx); - }); - locals["getDataPtrByPlaneIdx"] = - py::cpp_function([&](int PlaneIdx) -> uint64_t { - return outputSurface->PlanePtr(PlaneIdx); - }); - locals["getPitchByPlaneIdx"] = - py::cpp_function([&](int PlaneIdx) -> int { - return outputSurface->Pitch(PlaneIdx); - }); - nvcvImagePitch = outputSurface->Pitch(0); - py::exec(R"( + if (self->DecodeSurface(ctx, details)) { + outputSurface = ctx.GetSurfaceMutable(); + } else { + outputSurface = make_empty_surface(self->GetPixelFormat()); + } + py::object scope = py::module_::import("__main__").attr("__dict__"); + py::dict globals = py::globals(); + auto locals = py::dict(); + + locals["getNumPlanes"] = py::cpp_function( + [&]() -> int { return outputSurface->NumPlanes(); }); + locals["getWidthByPlaneIdx"] = + py::cpp_function([&](int PlaneIdx) -> int { + return outputSurface->Width(PlaneIdx); + }); + locals["getHeightByPlaneIdx"] = + py::cpp_function([&](int PlaneIdx) -> int { + return outputSurface->Height(PlaneIdx); + }); + locals["getDataPtrByPlaneIdx"] = + py::cpp_function([&](int PlaneIdx) -> uint64_t { + return outputSurface->PlanePtr(PlaneIdx); + }); + locals["getPitchByPlaneIdx"] = + py::cpp_function([&](int PlaneIdx) -> int { + return outputSurface->Pitch(PlaneIdx); + }); + nvcvImagePitch = outputSurface->Pitch(0); + py::exec(R"( class CAIMemory: def __init__(self, shape, data): @@ -909,44 +907,52 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( else: output = None - )", globals, locals); - return globals["output"]; - - }, + )", + globals, locals); + return globals["output"]; + }, py::arg("enc_packet_data"), py::arg("packet"), py::arg("pkt_data"), py::arg("bool_nvcv_check"), - R"pbdoc( + R"pbdoc( Decode single video frame from input stream. Video frame is returned as NVCVImage. :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the image, second is TaskExecInfo. )pbdoc") - .def( - "DecodeSingleSurface", - [](shared_ptr self, PacketData& out_pkt_data) { - DecodeContext ctx(nullptr, nullptr, nullptr, &out_pkt_data, nullptr, - false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); - }, - py::arg("pkt_data"), py::return_value_policy::take_ownership, - py::call_guard(), - R"pbdoc( + .def( + "DecodeSingleSurface", + [](shared_ptr self, PacketData& out_pkt_data) { + DecodeContext ctx(nullptr, nullptr, nullptr, &out_pkt_data, nullptr, + false); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } + }, + py::arg("pkt_data"), py::return_value_policy::take_ownership, + py::call_guard(), + R"pbdoc( Decode single video frame from input stream. Video frame is returned as Surface stored in vRAM. :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSingleSurface", [](shared_ptr self, py::array_t& sei) { DecodeContext ctx(&sei, nullptr, nullptr, nullptr, nullptr, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("sei"), py::return_value_policy::take_ownership, py::call_guard(), @@ -955,6 +961,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( Video frame is returned as Surface stored in vRAM. :param sei: decoded frame SEI data + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSingleSurface", @@ -962,10 +969,13 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& out_pkt_data) { DecodeContext ctx(&sei, nullptr, nullptr, &out_pkt_data, nullptr, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("sei"), py::arg("pkt_data"), py::return_value_policy::take_ownership, @@ -976,6 +986,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param sei: decoded frame SEI data :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSingleSurface", @@ -983,10 +994,13 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( SeekContext& seek_ctx) { DecodeContext ctx(&sei, nullptr, nullptr, nullptr, &seek_ctx, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("sei"), py::arg("seek_context"), py::return_value_policy::take_ownership, @@ -998,6 +1012,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param sei: decoded frame SEI data :param seek_context: SeekContext structure with information about seek procedure + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSingleSurface", @@ -1005,10 +1020,13 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( SeekContext& seek_ctx, PacketData& out_pkt_data) { DecodeContext ctx(&sei, nullptr, nullptr, &out_pkt_data, &seek_ctx, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("sei"), py::arg("seek_context"), py::arg("pkt_data"), py::return_value_policy::take_ownership, @@ -1021,32 +1039,41 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param sei: decoded frame SEI data :param seek_context: SeekContext structure with information about seek procedure :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSingleSurface", [](shared_ptr self) { DecodeContext ctx(nullptr, nullptr, nullptr, nullptr, nullptr, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::return_value_policy::take_ownership, py::call_guard(), R"pbdoc( Decode single video frame from input stream. Video frame is returned as Surface stored in vRAM. + + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSingleSurface", [](shared_ptr self, SeekContext& seek_ctx) { DecodeContext ctx(nullptr, nullptr, nullptr, nullptr, &seek_ctx, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("seek_context"), py::return_value_policy::take_ownership, py::call_guard(), @@ -1056,6 +1083,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( Use this function for seek + decode. :param seek_context: SeekContext structure with information about seek procedure + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSingleSurface", @@ -1063,10 +1091,13 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& out_pkt_data) { DecodeContext ctx(nullptr, nullptr, nullptr, &out_pkt_data, &seek_ctx, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("seek_context"), py::arg("pkt_data"), py::return_value_policy::take_ownership, @@ -1078,16 +1109,20 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param seek_context: SeekContext structure with information about seek procedure :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSurfaceFromPacket", [](shared_ptr self, py::array_t& packet) { DecodeContext ctx(nullptr, &packet, nullptr, nullptr, nullptr, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("packet"), py::return_value_policy::take_ownership, py::call_guard(), @@ -1099,6 +1134,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( Video frame is returned as Surface stored in vRAM. :param packet: encoded video packet + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSurfaceFromPacket", @@ -1106,10 +1142,13 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( py::array_t& packet) { DecodeContext ctx(nullptr, &packet, &in_packet_data, nullptr, nullptr, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("enc_packet_data"), py::arg("packet"), py::return_value_policy::take_ownership, @@ -1123,6 +1162,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param enc_packet_data: PacketData structure of encoded video packet :param packet: encoded video packet + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSurfaceFromPacket", @@ -1130,10 +1170,13 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& out_pkt_data) { DecodeContext ctx(nullptr, &packet, nullptr, &out_pkt_data, nullptr, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("packet"), py::arg("pkt_data"), py::return_value_policy::take_ownership, @@ -1147,6 +1190,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param packet: encoded video packet :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "DecodeSurfaceFromPacket", @@ -1154,10 +1198,13 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( py::array_t& packet, PacketData& out_pkt_data) { DecodeContext ctx(nullptr, &packet, &in_pkt_data, &out_pkt_data, nullptr, false); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("enc_packet_data"), py::arg("packet"), py::arg("pkt_data"), py::return_value_policy::take_ownership, @@ -1172,16 +1219,20 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param enc_packet_data: PacketData structure of encoded video packet :param packet: encoded video packet :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "FlushSingleSurface", [](shared_ptr self) { DecodeContext ctx(nullptr, nullptr, nullptr, nullptr, nullptr, true); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::return_value_policy::take_ownership, py::call_guard(), @@ -1193,16 +1244,20 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( If this method returns empty Surface it means there are no decoded frames left. Video frame is returned as Surface stored in vRAM. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") .def( "FlushSingleSurface", [](shared_ptr self, PacketData& out_pkt_data) { DecodeContext ctx(nullptr, nullptr, nullptr, &out_pkt_data, nullptr, true); - if (self->DecodeSurface(ctx)) - return ctx.GetSurfaceMutable(); - else - return make_empty_surface(self->GetPixelFormat()); + TaskExecDetails details; + if (self->DecodeSurface(ctx, details)) { + return std::make_tuple(ctx.GetSurfaceMutable(), details.info); + } else { + return std::make_tuple(make_empty_surface(self->GetPixelFormat()), + details.info); + } }, py::arg("pkt_data"), py::return_value_policy::take_ownership, py::call_guard(), @@ -1216,14 +1271,17 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( Video frame is returned as Surface stored in vRAM. :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is the surface, second is TaskExecInfo. )pbdoc") - .def( + .def( "DecodeSingleFrame", [](shared_ptr self, py::array_t& frame, py::array_t& sei, PacketData& out_pkt_data) { DecodeContext ctx(&sei, nullptr, nullptr, &out_pkt_data, nullptr, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("sei"), py::arg("pkt_data"), py::call_guard(), @@ -1233,7 +1291,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param sei: decoded frame SEI data :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeSingleFrame", @@ -1241,7 +1299,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( py::array_t& sei, SeekContext& seek_ctx) { DecodeContext ctx(&sei, nullptr, nullptr, nullptr, &seek_ctx, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("sei"), py::arg("seek_context"), py::call_guard(), @@ -1251,7 +1311,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param sei: decoded frame SEI data :param seek_context: SeekContext structure with information about seek procedure - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeSingleFrame", @@ -1260,7 +1320,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& out_pkt_data) { DecodeContext ctx(&sei, nullptr, nullptr, &out_pkt_data, &seek_ctx, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("sei"), py::arg("seek_context"), py::arg("pkt_data"), py::call_guard(), @@ -1271,21 +1333,23 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param sei: decoded frame SEI data :param seek_context: SeekContext structure with information about seek procedure :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeSingleFrame", [](shared_ptr self, py::array_t& frame) { DecodeContext ctx(nullptr, nullptr, nullptr, nullptr, nullptr, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::call_guard(), R"pbdoc( Combination of DecodeSingleSurface + DownloadSingleSurface :param frame: decoded video frame - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeSingleFrame", @@ -1293,7 +1357,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& out_pkt_data) { DecodeContext ctx(nullptr, nullptr, nullptr, &out_pkt_data, nullptr, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("pkt_data"), py::call_guard(), @@ -1302,7 +1368,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeSingleFrame", @@ -1310,7 +1376,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( SeekContext& seek_ctx) { DecodeContext ctx(nullptr, nullptr, nullptr, nullptr, &seek_ctx, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("seek_context"), py::call_guard(), @@ -1319,7 +1387,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param seek_context: SeekContext structure with information about seek procedure - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeSingleFrame", @@ -1327,7 +1395,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( SeekContext& seek_ctx, PacketData& out_pkt_data) { DecodeContext ctx(nullptr, nullptr, nullptr, &out_pkt_data, &seek_ctx, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("seek_context"), py::arg("pkt_data"), py::call_guard(), @@ -1337,7 +1407,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param seek_context: SeekContext structure with information about seek procedure :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeFrameFromPacket", @@ -1345,7 +1415,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( py::array_t& packet) { DecodeContext ctx(nullptr, &packet, nullptr, nullptr, nullptr, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("packet"), py::call_guard(), @@ -1354,7 +1426,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param packet: encoded video packet - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeFrameFromPacket", @@ -1362,7 +1434,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& in_pkt_data, py::array_t& packet) { DecodeContext ctx(nullptr, &packet, &in_pkt_data, nullptr, nullptr, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("enc_packet_data"), py::arg("packet"), py::call_guard(), @@ -1372,7 +1446,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param enc_packet_data: PacketData structure of encoded video packet :param packet: encoded video packet - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeFrameFromPacket", @@ -1381,7 +1455,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& out_pkt_data) { DecodeContext ctx(nullptr, &packet, &in_pkt_data, &out_pkt_data, nullptr, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("enc_packet_data"), py::arg("packet"), py::arg("pkt_data"), py::call_guard(), @@ -1392,7 +1468,7 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param enc_packet_data: PacketData structure of encoded video packet :param packet: encoded video packet :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "DecodeFrameFromPacket", @@ -1400,7 +1476,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( py::array_t& packet, PacketData& out_pkt_data) { DecodeContext ctx(nullptr, &packet, nullptr, &out_pkt_data, nullptr, false); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("packet"), py::arg("pkt_data"), py::call_guard(), @@ -1410,20 +1488,23 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param packet: encoded video packet :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. - :return: True in case of success, False otherwise + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "FlushSingleFrame", [](shared_ptr self, py::array_t& frame) { DecodeContext ctx(nullptr, nullptr, nullptr, nullptr, nullptr, true); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::call_guard(), R"pbdoc( Combination of FlushSingleSurface + DownloadSingleSurface :param frame: decoded video frame + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc") .def( "FlushSingleFrame", @@ -1431,7 +1512,9 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( PacketData& out_pkt_data) { DecodeContext ctx(nullptr, nullptr, nullptr, &out_pkt_data, nullptr, true); - return self->DecodeFrame(ctx, frame); + TaskExecDetails details; + return std::make_tuple(self->DecodeFrame(ctx, details, frame), + details.info); }, py::arg("frame"), py::arg("pkt_data"), py::call_guard(), @@ -1440,5 +1523,6 @@ elif getNumPlanes() == 3 and getWidthByPlaneIdx(0) > 32 and getHeightByPlaneIdx( :param frame: decoded video frame :param pkt_data: PacketData structure of decoded frame with PTS, DTS etc. + :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo. )pbdoc"); } diff --git a/src/TC/TC_CORE/inc/TC_CORE.hpp b/src/TC/TC_CORE/inc/TC_CORE.hpp index 6b3da570..8bffb79d 100644 --- a/src/TC/TC_CORE/inc/TC_CORE.hpp +++ b/src/TC/TC_CORE/inc/TC_CORE.hpp @@ -36,6 +36,22 @@ class TC_CORE_EXPORT Token { enum class TaskExecStatus { TASK_EXEC_SUCCESS, TASK_EXEC_FAIL }; +enum class TaskExecInfo { + SUCCESS, + FAIL, + END_OF_STREAM, + MORE_DATA_NEEDED, + BIT_DEPTH_NOT_SUPPORTED +}; + +struct TaskExecDetails { + TaskExecInfo info = TaskExecInfo::SUCCESS; + + TaskExecDetails(TaskExecInfo const& new_info) : info(new_info) {} + + TaskExecDetails() : info(TaskExecInfo::SUCCESS) {} +}; + /* Synchronization call which will be done after a blocking task; */ typedef void (*p_sync_call)(void *p_args); diff --git a/src/TC/inc/FFmpegDemuxer.h b/src/TC/inc/FFmpegDemuxer.h index 0f01fb67..edba4357 100644 --- a/src/TC/inc/FFmpegDemuxer.h +++ b/src/TC/inc/FFmpegDemuxer.h @@ -227,11 +227,12 @@ class TC_EXPORT FFmpegDemuxer AVColorRange GetColorRange() const; bool Demux(uint8_t*& pVideo, size_t& rVideoBytes, PacketData& pktData, - uint8_t** ppSEI = nullptr, size_t* pSEIBytes = nullptr); + TaskExecDetails& details, uint8_t** ppSEI = nullptr, + size_t* pSEIBytes = nullptr); bool Seek(VPF::SeekContext& seek_ctx, uint8_t*& pVideo, size_t& rVideoBytes, - PacketData& pktData, uint8_t** ppSEI = nullptr, - size_t* pSEIBytes = nullptr); + PacketData& pktData, TaskExecDetails& details, + uint8_t** ppSEI = nullptr, size_t* pSEIBytes = nullptr); void Flush(); diff --git a/src/TC/inc/Tasks.hpp b/src/TC/inc/Tasks.hpp index 5ba5d861..0c7fc14a 100644 --- a/src/TC/inc/Tasks.hpp +++ b/src/TC/inc/Tasks.hpp @@ -117,16 +117,19 @@ class TC_CORE_EXPORT NvdecDecodeFrame final : public Task uint32_t decodedFramesPoolSize, uint32_t coded_width, uint32_t coded_height, Pixel_Format format); + TaskExecDetails& GetLastExecDetails(); private: static const uint32_t numInputs = 3U; - static const uint32_t numOutputs = 2U; + static const uint32_t numOutputs = 3U; struct NvdecDecodeFrame_Impl* pImpl = nullptr; NvdecDecodeFrame(CUstream cuStream, CUcontext cuContext, cudaVideoCodec videoCodec, uint32_t decodedFramesPoolSize, uint32_t coded_width, uint32_t coded_height, Pixel_Format format); + + void UpdateExecInfo(TaskExecInfo info); }; class TC_CORE_EXPORT FfmpegDecodeFrame final : public Task @@ -143,6 +146,7 @@ class TC_CORE_EXPORT FfmpegDecodeFrame final : public Task ~FfmpegDecodeFrame() final; static FfmpegDecodeFrame* Make(const char* URL, NvDecoderClInterface& cli_iface); + TaskExecDetails& GetLastExecDetails(); private: static const uint32_t num_inputs = 0U; @@ -254,14 +258,16 @@ class TC_CORE_EXPORT DemuxFrame final : public Task uint32_t opts_size); static DemuxFrame* Make(const char* url, const char** ffmpeg_options, uint32_t opts_size); + TaskExecDetails& GetLastExecDetails(); private: DemuxFrame(std::istream& i_str, const char** ffmpeg_options, uint32_t opts_size); DemuxFrame(const char* url, const char** ffmpeg_options, uint32_t opts_size); static const uint32_t numInputs = 2U; - static const uint32_t numOutputs = 4U; + static const uint32_t numOutputs = 5U; struct DemuxFrame_Impl* pImpl = nullptr; + void UpdateExecInfo(TaskExecDetails &details); }; class TC_CORE_EXPORT ConvertSurface final : public Task diff --git a/src/TC/src/FFmpegDemuxer.cpp b/src/TC/src/FFmpegDemuxer.cpp index 878b5e2b..54820b5c 100644 --- a/src/TC/src/FFmpegDemuxer.cpp +++ b/src/TC/src/FFmpegDemuxer.cpp @@ -98,9 +98,12 @@ AVColorRange FFmpegDemuxer::GetColorRange() const { return color_range; } extern unsigned long GetNumDecodeSurfaces(cudaVideoCodec eCodec, unsigned int nWidth, unsigned int nHeight); -bool FFmpegDemuxer::Demux(uint8_t *&pVideo, size_t &rVideoBytes, - PacketData &pktData, uint8_t **ppSEI, - size_t *pSEIBytes) { +bool FFmpegDemuxer::Demux(uint8_t*& pVideo, size_t& rVideoBytes, + PacketData& pktData, TaskExecDetails& details, + uint8_t** ppSEI, size_t* pSEIBytes) +{ + rVideoBytes = 0U; + if (!fmtc) { return false; } @@ -162,6 +165,7 @@ bool FFmpegDemuxer::Demux(uint8_t *&pVideo, size_t &rVideoBytes, : is_mp4HEVC ? "filter_units=pass_types=39-40" : "unknown"; ret = av_bsf_list_parse_str(sei_filter.c_str(), &bsfc_sei); if (0 > ret) { + details.info = TaskExecInfo::FAIL; throw runtime_error("Error initializing " + sei_filter + " bitstream filter: " + AvErrorToString(ret)); } @@ -169,12 +173,14 @@ bool FFmpegDemuxer::Demux(uint8_t *&pVideo, size_t &rVideoBytes, ret = avcodec_parameters_copy(bsfc_sei->par_in, fmtc->streams[videoStream]->codecpar); if (0 != ret) { + details.info = TaskExecInfo::FAIL; throw runtime_error("Error copying codec parameters: " + AvErrorToString(ret)); } ret = av_bsf_init(bsfc_sei); if (0 != ret) { + details.info = TaskExecInfo::FAIL; throw runtime_error("Error initializing " + sei_filter + " bitstream filter: " + AvErrorToString(ret)); } @@ -198,6 +204,8 @@ bool FFmpegDemuxer::Demux(uint8_t *&pVideo, size_t &rVideoBytes, if (AVERROR_EOF != ret) { // No need to report EOF; cerr << "Failed to read frame: " << AvErrorToString(ret) << endl; + } else { + details.info = TaskExecInfo::END_OF_STREAM; } return false; } @@ -258,7 +266,8 @@ int64_t FFmpegDemuxer::TsFromFrameNumber(int64_t frame_num) bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, size_t& rVideoBytes, PacketData& pktData, - uint8_t** ppSEI, size_t* pSEIBytes) + TaskExecDetails& details, uint8_t** ppSEI, + size_t* pSEIBytes) { /* !!! IMPORTANT !!! * Across this function packet decode timestamp (DTS) values are used to @@ -273,6 +282,7 @@ bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, } if (IsVFR() && seekCtx.IsByNumber()) { + details.info = TaskExecInfo::FAIL; cerr << "Can't seek by frame number in VFR sequences. Seek by timestamp " "instead." << endl; @@ -296,10 +306,12 @@ bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, ret = av_seek_frame(fmtc, GetVideoStreamIndex(), timestamp, seek_backward ? AVSEEK_FLAG_BACKWARD | flags : flags); } else { + details.info = TaskExecInfo::FAIL; throw runtime_error("Invalid seek mode"); } if (ret < 0) { + details.info = TaskExecInfo::FAIL; throw runtime_error("Error seeking for frame: " + AvErrorToString(ret)); } }; @@ -314,6 +326,7 @@ bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, // Rely solely on FFMpeg API for seek by timestamp; return 1; } else { + details.info = TaskExecInfo::FAIL; throw runtime_error("Invalid seek mode."); } @@ -335,7 +348,7 @@ bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, int condition = 0; do { - if (!Demux(pVideo, rVideoBytes, pkt_data, ppSEI, pSEIBytes)) { + if (!Demux(pVideo, rVideoBytes, pkt_data, details, ppSEI, pSEIBytes)) { break; } condition = is_seek_done(pkt_data, seek_ctx); @@ -348,6 +361,7 @@ bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, tmp_ctx.seek_tssec -= this->GetTimebase(); tmp_ctx.seek_tssec = max(0.0, tmp_ctx.seek_tssec); } else { + details.info = TaskExecInfo::FAIL; throw runtime_error("Invalid seek mode."); } seek_frame(tmp_ctx, AVSEEK_FLAG_ANY); @@ -367,7 +381,7 @@ bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, SeekContext& seek_ctx) { seek_frame(seek_ctx, AVSEEK_FLAG_BACKWARD); - Demux(pVideo, rVideoBytes, pkt_data, ppSEI, pSEIBytes); + Demux(pVideo, rVideoBytes, pkt_data, details, ppSEI, pSEIBytes); seek_ctx.out_frame_pts = pkt_data.pts; seek_ctx.out_frame_duration = pkt_data.duration; }; @@ -380,6 +394,7 @@ bool FFmpegDemuxer::Seek(SeekContext& seekCtx, uint8_t*& pVideo, seek_for_prev_key_frame(pktData, seekCtx); break; default: + details.info = TaskExecInfo::FAIL; throw runtime_error("Unsupported seek mode"); break; } @@ -521,7 +536,6 @@ FFmpegDemuxer::FFmpegDemuxer(AVFormatContext *fmtcx) : fmtc(fmtcx) { throw runtime_error(ss.str()); } - //gop_size = fmtc->streams[videoStream]->codec->gop_size; eVideoCodec = fmtc->streams[videoStream]->codecpar->codec_id; width = fmtc->streams[videoStream]->codecpar->width; height = fmtc->streams[videoStream]->codecpar->height; diff --git a/src/TC/src/FfmpegSwDecoder.cpp b/src/TC/src/FfmpegSwDecoder.cpp index 8b6a3041..ca4bd836 100644 --- a/src/TC/src/FfmpegSwDecoder.cpp +++ b/src/TC/src/FfmpegSwDecoder.cpp @@ -62,10 +62,12 @@ struct FfmpegDecodeFrame_Impl { AVFrame* frame = nullptr; AVPacket pktSrc = {0}; Buffer* dec_frame = nullptr; + Buffer* pDetails = nullptr; map side_data; int video_stream_idx = -1; - bool end_encode = false; + bool end_decode = false; + bool eof = false; FfmpegDecodeFrame_Impl(const char* URL, AVDictionary* pOptions) { @@ -136,6 +138,8 @@ struct FfmpegDecodeFrame_Impl { if (!frame) { cerr << "Could not allocate frame" << endl; } + + pDetails = Buffer::MakeOwnMem(sizeof(TaskExecDetails)); } bool SaveYUV420(AVFrame* pframe) @@ -251,9 +255,9 @@ struct FfmpegDecodeFrame_Impl { return true; } - bool DecodeSingleFrame() +bool DecodeSingleFrame() { - if (end_encode) { + if (end_decode) { return false; } @@ -261,22 +265,34 @@ struct FfmpegDecodeFrame_Impl { do { // Read packets from stream until we find a video packet; do { + if (eof) { + break; + } + auto ret = av_read_frame(fmt_ctx, &pktSrc); - if (ret < 0) { - // Flush decoder; - end_encode = true; - return DecodeSinglePacket(nullptr); + + if (AVERROR_EOF == ret) { + eof = true; + break; + } else if (ret < 0) { + end_decode = true; + UpdateExecInfo(TaskExecInfo::FAIL); + return false; } } while (pktSrc.stream_index != video_stream_idx); - auto status = DecodeSinglePacket(&pktSrc); + auto status = DecodeSinglePacket(eof ? nullptr : &pktSrc); switch (status) { case DEC_SUCCESS: return true; case DEC_ERROR: + UpdateExecInfo(TaskExecInfo::FAIL); + end_decode = true; return false; case DEC_EOS: + UpdateExecInfo(TaskExecInfo::END_OF_STREAM); + end_decode = true; return false; case DEC_MORE: continue; @@ -332,7 +348,10 @@ struct FfmpegDecodeFrame_Impl { DECODE_STATUS DecodeSinglePacket(const AVPacket* pktSrc) { auto res = avcodec_send_packet(avctx, pktSrc); - if (res < 0) { + if (AVERROR_EOF == res) { + // Flush decoder; + res = 0; + } else if (res < 0) { cerr << "Error while sending a packet to the decoder" << endl; cerr << "Error description: " << AvErrorToString(res) << endl; return DEC_ERROR; @@ -341,7 +360,6 @@ struct FfmpegDecodeFrame_Impl { while (res >= 0) { res = avcodec_receive_frame(avctx, frame); if (res == AVERROR_EOF) { - cerr << "Input file is over" << endl; return DEC_EOS; } else if (res == AVERROR(EAGAIN)) { return DEC_MORE; @@ -375,10 +393,25 @@ struct FfmpegDecodeFrame_Impl { if (dec_frame) { delete dec_frame; } + + if (pDetails) { + delete pDetails; + } } + + void UpdateExecInfo(TaskExecInfo info) + { + TaskExecDetails details(info); + pDetails->Update(sizeof(TaskExecDetails), &details); + } }; } // namespace VPF +TaskExecDetails& FfmpegDecodeFrame::GetLastExecDetails() +{ + return *(pImpl->pDetails->GetDataAs()); +} + TaskExecStatus FfmpegDecodeFrame::Run() { ClearOutputs(); @@ -394,13 +427,28 @@ TaskExecStatus FfmpegDecodeFrame::Run() void FfmpegDecodeFrame::GetParams(MuxingParams& params) { memset((void*)¶ms, 0, sizeof(params)); + auto fmtc = pImpl->fmt_ctx; + auto videoStream = + av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); + if (videoStream < 0) { + stringstream ss; + ss << __FUNCTION__ << ": can't find video stream in input file." << endl; + throw runtime_error(ss.str()); + } - params.videoContext.width = pImpl->avctx->width; - params.videoContext.height = pImpl->avctx->height; - params.videoContext.gop_size = pImpl->avctx->gop_size; + params.videoContext.width = fmtc->streams[videoStream]->codecpar->width; + params.videoContext.height = fmtc->streams[videoStream]->codecpar->height; params.videoContext.frameRate = - (1.0 * pImpl->avctx->framerate.num) / (1.0 * pImpl->avctx->framerate.den); + (double)fmtc->streams[videoStream]->r_frame_rate.num / + (double)fmtc->streams[videoStream]->r_frame_rate.den; + params.videoContext.avgFrameRate = + (double)fmtc->streams[videoStream]->avg_frame_rate.num / + (double)fmtc->streams[videoStream]->avg_frame_rate.den; + params.videoContext.timeBase = + (double)fmtc->streams[videoStream]->time_base.num / + (double)fmtc->streams[videoStream]->time_base.den; params.videoContext.codec = FFmpeg2NvCodecId(pImpl->avctx->codec_id); + params.videoContext.num_frames = fmtc->streams[videoStream]->nb_frames; switch (pImpl->avctx->pix_fmt) { case AV_PIX_FMT_YUVJ420P: @@ -432,7 +480,7 @@ void FfmpegDecodeFrame::GetParams(MuxingParams& params) break; } - switch (pImpl->avctx->colorspace) { + switch (fmtc->streams[videoStream]->codecpar->color_space) { case AVCOL_SPC_BT709: params.videoContext.color_space = BT_709; break; @@ -445,7 +493,7 @@ void FfmpegDecodeFrame::GetParams(MuxingParams& params) break; } - switch (pImpl->avctx->color_range) { + switch (fmtc->streams[videoStream]->codecpar->color_range) { case AVCOL_RANGE_MPEG: params.videoContext.color_range = MPEG; break; diff --git a/src/TC/src/Tasks.cpp b/src/TC/src/Tasks.cpp index 5b951278..50c7a3f9 100644 --- a/src/TC/src/Tasks.cpp +++ b/src/TC/src/Tasks.cpp @@ -300,6 +300,7 @@ struct NvdecDecodeFrame_Impl { NvDecoder nvDecoder; Surface* pLastSurface = nullptr; Buffer* pPacketData = nullptr; + Buffer* PDetails = nullptr; CUstream stream = 0; CUcontext context = nullptr; bool didDecode = false; @@ -315,12 +316,14 @@ struct NvdecDecodeFrame_Impl { { pLastSurface = Surface::Make(format); pPacketData = Buffer::MakeOwnMem(sizeof(PacketData)); + PDetails = Buffer::MakeOwnMem(sizeof(TaskExecDetails)); } ~NvdecDecodeFrame_Impl() { delete pLastSurface; delete pPacketData; + delete PDetails; } }; } // namespace VPF @@ -357,6 +360,18 @@ NvdecDecodeFrame::~NvdecDecodeFrame() delete pImpl; } +void NvdecDecodeFrame::UpdateExecInfo(TaskExecInfo info) +{ + TaskExecDetails details(info); + pImpl->PDetails->Update(sizeof(details), &details); + SetOutput(pImpl->PDetails, 2U); +} + +TaskExecDetails& NvdecDecodeFrame::GetLastExecDetails() +{ + return *pImpl->PDetails->GetDataAs(); +} + TaskExecStatus NvdecDecodeFrame::Run() { NvtxMark tick(GetName()); @@ -369,6 +384,7 @@ TaskExecStatus NvdecDecodeFrame::Run() /* Empty input given + we've never did decoding means something went * wrong; Otherwise (no input + we did decode) means we're flushing; */ + UpdateExecInfo(TaskExecInfo::FAIL); return TASK_EXEC_FAIL; } @@ -433,6 +449,7 @@ TaskExecStatus NvdecDecodeFrame::Run() elem_size = sizeof(uint16_t); break; default: + UpdateExecInfo(TaskExecInfo::BIT_DEPTH_NOT_SUPPORTED); return TASK_EXEC_FAIL; } @@ -453,14 +470,19 @@ TaskExecStatus NvdecDecodeFrame::Run() } return TASK_EXEC_SUCCESS; + } else if (pEncFrame) { + /* We have input but no output. + * That happens in the begining of decode loop. Not an error. */ + return TASK_EXEC_SUCCESS; + } else { + /* No input, no output. That means decoder is flushed and we have reached + * end of stream. */ + UpdateExecInfo(TaskExecInfo::END_OF_STREAM); + return TASK_EXEC_FAIL; } - - /* If we have input and don't get output so far that's fine. - * Otherwise input is NULL and we're flusing so we shall get frame. - */ - return pEncFrame ? TASK_EXEC_SUCCESS : TASK_EXEC_FAIL; } catch (exception& e) { cerr << e.what() << endl; + UpdateExecInfo(TaskExecInfo::FAIL); return TASK_EXEC_FAIL; } @@ -906,6 +928,7 @@ struct DemuxFrame_Impl { Buffer* pMuxingParams; Buffer* pSei; Buffer* pPktData; + Buffer* pDetails; unique_ptr demuxer; unique_ptr d_prov; @@ -921,6 +944,7 @@ struct DemuxFrame_Impl { pMuxingParams = Buffer::MakeOwnMem(sizeof(MuxingParams)); pSei = Buffer::MakeOwnMem(0U); pPktData = Buffer::MakeOwnMem(0U); + pDetails = Buffer::MakeOwnMem(sizeof(TaskExecDetails)); } explicit DemuxFrame_Impl(istream& istr, @@ -941,6 +965,7 @@ struct DemuxFrame_Impl { delete pMuxingParams; delete pSei; delete pPktData; + delete pDetails; } }; } // namespace VPF @@ -957,6 +982,11 @@ DemuxFrame* DemuxFrame::Make(const char* url, const char** ffmpeg_options, return new DemuxFrame(url, ffmpeg_options, opts_size); } +TaskExecDetails& DemuxFrame::GetLastExecDetails() +{ + return *pImpl->pDetails->GetDataAs(); +} + DemuxFrame::DemuxFrame(istream& i_str, const char** ffmpeg_options, uint32_t opts_size) : Task("DemuxFrame", DemuxFrame::numInputs, DemuxFrame::numOutputs) @@ -1007,6 +1037,12 @@ int64_t DemuxFrame::TsFromFrameNumber(int64_t frame_num) return pImpl->demuxer->TsFromFrameNumber(frame_num); } +void DemuxFrame::UpdateExecInfo(TaskExecDetails& details) +{ + pImpl->pDetails->Update(sizeof(details), &details); + SetOutput(pImpl->pDetails, 4U); +} + TaskExecStatus DemuxFrame::Run() { NvtxMark tick(GetName()); @@ -1024,16 +1060,26 @@ TaskExecStatus DemuxFrame::Run() bool needSEI = (nullptr != GetInput(0U)); auto pSeekCtxBuf = (Buffer*)GetInput(1U); + TaskExecDetails details; if (pSeekCtxBuf) { SeekContext seek_ctx = *pSeekCtxBuf->GetDataAs(); - auto ret = demuxer->Seek(seek_ctx, pVideo, videoBytes, pkt_data, + auto ret = demuxer->Seek(seek_ctx, pVideo, videoBytes, pkt_data, details, needSEI ? &pSEI : nullptr, &seiBytes); + + UpdateExecInfo(details); + + if (!ret) { + return TASK_EXEC_FAIL; + } + } else { + auto ret = demuxer->Demux(pVideo, videoBytes, pkt_data, details, + needSEI ? &pSEI : nullptr, &seiBytes); + + UpdateExecInfo(details); + if (!ret) { return TASK_EXEC_FAIL; } - } else if (!demuxer->Demux(pVideo, videoBytes, pkt_data, - needSEI ? &pSEI : nullptr, &seiBytes)) { - return TASK_EXEC_FAIL; } if (videoBytes) { diff --git a/tests/gt_files.json b/tests/gt_files.json new file mode 100644 index 00000000..7ddd29aa --- /dev/null +++ b/tests/gt_files.json @@ -0,0 +1,29 @@ +{ + "basic": { + "uri": "test.mp4", + "width": 848, + "height": 464, + "res_change_factor": 0.5, + "is_vfr": false, + "pix_fmt": "PixelFormat.NV12", + "framerate": 30, + "num_frames": 96, + "timebase": 8.1380e-5, + "color_space": "ColorSpace.BT_709", + "color_range": "ColorRange.MPEG" + }, + "res_change": { + "uri": "test_res_change.h264", + "width": 848, + "height": 464, + "res_change_factor": 0.5, + "is_vfr": false, + "pix_fmt": "PixelFormat.NV12", + "framerate": 30, + "num_frames": 96, + "res_change_frame": 47, + "timebase": 8.1380e-5, + "color_space": "ColorSpace.BT_709", + "color_range": "ColorRange.MPEG" + } +} \ No newline at end of file diff --git a/tests/test_PyFFmpegDecoder.py b/tests/test_PyFFmpegDecoder.py new file mode 100644 index 00000000..be1f5f82 --- /dev/null +++ b/tests/test_PyFFmpegDecoder.py @@ -0,0 +1,118 @@ +# +# Copyright 2023 Vision Labs LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Starting from Python 3.8 DLL search policy has changed. +# We need to add path to CUDA DLLs explicitly. +import sys +import os +from os.path import join, dirname + + +if os.name == "nt": + # Add CUDA_PATH env variable + cuda_path = os.environ["CUDA_PATH"] + if cuda_path: + os.add_dll_directory(cuda_path) + else: + print("CUDA_PATH environment variable is not set.", file=sys.stderr) + print("Can't set CUDA DLLs search path.", file=sys.stderr) + exit(1) + + # Add PATH as well for minor CUDA releases + sys_path = os.environ["PATH"] + if sys_path: + paths = sys_path.split(";") + for path in paths: + if os.path.isdir(path): + os.add_dll_directory(path) + else: + print("PATH environment variable is not set.", file=sys.stderr) + exit(1) + +import PyNvCodec as nvc +import numpy as np +import unittest +import json +from test_common import GroundTruth + + +class TestDecoderBasic(unittest.TestCase): + def __init__(self, methodName): + super().__init__(methodName=methodName) + + with open("gt_files.json") as f: + data = json.load(f)["basic"] + self.gtInfo = GroundTruth(**data) + + def test_width(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + self.assertEqual(self.gtInfo.width, ffDec.Width()) + + def test_height(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + self.assertEqual(self.gtInfo.height, ffDec.Height()) + + def test_color_space(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + self.assertEqual(self.gtInfo.color_space, str(ffDec.ColorSpace())) + + def test_color_range(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + self.assertEqual(self.gtInfo.color_range, str(ffDec.ColorRange())) + + def test_format(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + self.assertEqual(self.gtInfo.pix_fmt, str(ffDec.Format())) + + def test_framerate(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + self.assertEqual(self.gtInfo.framerate, ffDec.Framerate()) + + def test_avgframerate(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + self.assertEqual(self.gtInfo.framerate, ffDec.AvgFramerate()) + + def test_timebase(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + epsilon = 1e-4 + self.assertLessEqual( + np.abs(self.gtInfo.timebase - ffDec.Timebase()), epsilon) + + def test_decode_all_frames(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + dec_frames = 0 + frame = np.ndarray(dtype=np.uint8, shape=()) + while True: + success, details = ffDec.DecodeSingleFrame(frame) + if not success: + break + dec_frames += 1 + self.assertEqual(self.gtInfo.num_frames, dec_frames) + self.assertEqual(details, nvc.TaskExecInfo.END_OF_STREAM) + + def test_check_decode_status(self): + ffDec = nvc.PyFfmpegDecoder(self.gtInfo.uri, {}) + frame = np.ndarray(dtype=np.uint8, shape=()) + while True: + success, details = ffDec.DecodeSingleFrame(frame) + if not success: + self.assertEqual(details, nvc.TaskExecInfo.END_OF_STREAM) + break + self.assertEqual(details, nvc.TaskExecInfo.SUCCESS) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_PyNvDecoder.py b/tests/test_PyNvDecoder.py index b42c903b..fc63c52a 100644 --- a/tests/test_PyNvDecoder.py +++ b/tests/test_PyNvDecoder.py @@ -46,53 +46,40 @@ import numpy as np import unittest import random - -# Ground truth information about input video -gt_file = join(dirname(__file__), "test.mp4") -gt_file_res_change = join(dirname(__file__), "test_res_change.h264") -gt_width = 848 -gt_height = 464 -gt_res_change = 47 -gt_res_change_factor = 0.5 -gt_is_vfr = False -gt_pix_fmt = nvc.PixelFormat.NV12 -gt_framerate = 30 -gt_num_frames = 96 -gt_timebase = 8.1380e-5 -gt_color_space = nvc.ColorSpace.BT_709 -gt_color_range = nvc.ColorRange.MPEG +import json +from test_common import GroundTruth class TestDecoderBasic(unittest.TestCase): def __init__(self, methodName): super().__init__(methodName=methodName) - gpu_id = 0 - enc_file = gt_file - self.nvDec = nvc.PyNvDecoder(enc_file, gpu_id) + with open("gt_files.json") as f: + self.gtInfo = GroundTruth(**json.load(f)["basic"]) + self.nvDec = nvc.PyNvDecoder(self.gtInfo.uri, 0) def test_width(self): - self.assertEqual(gt_width, self.nvDec.Width()) + self.assertEqual(self.gtInfo.width, self.nvDec.Width()) def test_height(self): - self.assertEqual(gt_height, self.nvDec.Height()) + self.assertEqual(self.gtInfo.height, self.nvDec.Height()) def test_color_space(self): - self.assertEqual(gt_color_space, self.nvDec.ColorSpace()) + self.assertEqual(self.gtInfo.color_space, str(self.nvDec.ColorSpace())) def test_color_range(self): - self.assertEqual(gt_color_range, self.nvDec.ColorRange()) + self.assertEqual(self.gtInfo.color_range, str(self.nvDec.ColorRange())) def test_format(self): - self.assertEqual(gt_pix_fmt, self.nvDec.Format()) + self.assertEqual(self.gtInfo.pix_fmt, str(self.nvDec.Format())) def test_framerate(self): - self.assertEqual(gt_framerate, self.nvDec.Framerate()) + self.assertEqual(self.gtInfo.framerate, self.nvDec.Framerate()) def test_avgframerate(self): - self.assertEqual(gt_framerate, self.nvDec.AvgFramerate()) + self.assertEqual(self.gtInfo.framerate, self.nvDec.AvgFramerate()) def test_isvfr(self): - self.assertEqual(gt_is_vfr, self.nvDec.IsVFR()) + self.assertEqual(self.gtInfo.is_vfr, self.nvDec.IsVFR()) def test_framesize(self): frame_size = int(self.nvDec.Width() * self.nvDec.Height() * 3 / 2) @@ -100,7 +87,8 @@ def test_framesize(self): def test_timebase(self): epsilon = 1e-4 - self.assertLessEqual(np.abs(gt_timebase - self.nvDec.Timebase()), epsilon) + self.assertLessEqual( + np.abs(self.gtInfo.timebase - self.nvDec.Timebase()), epsilon) def test_lastpacketdata(self): try: @@ -113,16 +101,18 @@ def test_lastpacketdata(self): class TestDecoderStandalone(unittest.TestCase): def __init__(self, methodName): super().__init__(methodName=methodName) + with open("gt_files.json") as f: + self.gtInfo = GroundTruth(**json.load(f)["basic"]) def test_decodesurfacefrompacket(self): - nvDmx = nvc.PyFFmpegDemuxer(gt_file, {}) + nvDmx = nvc.PyFFmpegDemuxer(self.gtInfo.uri, {}) nvDec = nvc.PyNvDecoder( nvDmx.Width(), nvDmx.Height(), nvDmx.Format(), nvDmx.Codec(), 0 ) packet = np.ndarray(shape=(0), dtype=np.uint8) while nvDmx.DemuxSinglePacket(packet): - surf = nvDec.DecodeSurfaceFromPacket(packet) + surf, _ = nvDec.DecodeSurfaceFromPacket(packet) self.assertIsNotNone(surf) if not surf.Empty(): self.assertNotEqual(0, surf.PlanePtr().GpuMem()) @@ -132,7 +122,7 @@ def test_decodesurfacefrompacket(self): return def test_decodesurfacefrompacket_outpktdata(self): - nvDmx = nvc.PyFFmpegDemuxer(gt_file, {}) + nvDmx = nvc.PyFFmpegDemuxer(self.gtInfo.uri, {}) nvDec = nvc.PyNvDecoder( nvDmx.Width(), nvDmx.Height(), nvDmx.Format(), nvDmx.Codec(), 0 ) @@ -144,7 +134,8 @@ def test_decodesurfacefrompacket_outpktdata(self): in_pdata = nvc.PacketData() nvDmx.LastPacketData(in_pdata) out_pdata = nvc.PacketData() - surf = nvDec.DecodeSurfaceFromPacket(in_pdata, packet, out_pdata) + surf, _ = nvDec.DecodeSurfaceFromPacket( + in_pdata, packet, out_pdata) self.assertIsNotNone(surf) if not surf.Empty(): dec_frames += 1 @@ -152,7 +143,7 @@ def test_decodesurfacefrompacket_outpktdata(self): while True: out_pdata = nvc.PacketData() - surf = nvDec.FlushSingleSurface(out_pdata) + surf, _ = nvDec.FlushSingleSurface(out_pdata) if not surf.Empty(): out_bst_size += out_pdata.bsl else: @@ -161,7 +152,7 @@ def test_decodesurfacefrompacket_outpktdata(self): self.assertNotEqual(0, out_bst_size) def test_decode_all_surfaces(self): - nvDmx = nvc.PyFFmpegDemuxer(gt_file, {}) + nvDmx = nvc.PyFFmpegDemuxer(self.gtInfo.uri, {}) nvDec = nvc.PyNvDecoder( nvDmx.Width(), nvDmx.Height(), nvDmx.Format(), nvDmx.Codec(), 0 ) @@ -169,30 +160,50 @@ def test_decode_all_surfaces(self): dec_frames = 0 packet = np.ndarray(shape=(0), dtype=np.uint8) while nvDmx.DemuxSinglePacket(packet): - surf = nvDec.DecodeSurfaceFromPacket(packet) + surf, _ = nvDec.DecodeSurfaceFromPacket(packet) self.assertIsNotNone(surf) if not surf.Empty(): dec_frames += 1 while True: - surf = nvDec.FlushSingleSurface() + surf, _ = nvDec.FlushSingleSurface() self.assertIsNotNone(surf) if not surf.Empty(): dec_frames += 1 else: break - self.assertEqual(gt_num_frames, dec_frames) + self.assertEqual(self.gtInfo.num_frames, dec_frames) + + def test_check_decode_status(self): + nvDmx = nvc.PyFFmpegDemuxer(self.gtInfo.uri, {}) + nvDec = nvc.PyNvDecoder( + nvDmx.Width(), nvDmx.Height(), nvDmx.Format(), nvDmx.Codec(), 0 + ) + + packet = np.ndarray(shape=(0), dtype=np.uint8) + while nvDmx.DemuxSinglePacket(packet): + surf, _ = nvDec.DecodeSurfaceFromPacket(packet) + self.assertIsNotNone(surf) + while True: + surf, details = nvDec.FlushSingleSurface() + self.assertIsNotNone(surf) + if surf.Empty(): + self.assertEqual(details, nvc.TaskExecInfo.END_OF_STREAM) + break + else: + self.assertEqual(details, nvc.TaskExecInfo.SUCCESS) class TestDecoderBuiltin(unittest.TestCase): def __init__(self, methodName): super().__init__(methodName=methodName) + with open("gt_files.json") as f: + self.gtInfo = GroundTruth(**json.load(f)["basic"]) def test_decodesinglesurface(self): gpu_id = 0 - enc_file = gt_file - nvDec = nvc.PyNvDecoder(enc_file, gpu_id) + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, gpu_id) try: - surf = nvDec.DecodeSingleSurface() + surf, _ = nvDec.DecodeSingleSurface() self.assertIsNotNone(surf) self.assertFalse(surf.Empty()) except: @@ -200,14 +211,13 @@ def test_decodesinglesurface(self): def test_decodesinglesurface_outpktdata(self): gpu_id = 0 - enc_file = gt_file - nvDec = nvc.PyNvDecoder(enc_file, gpu_id) + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, gpu_id) dec_frame = 0 last_pts = nvc.NO_PTS while True: pdata = nvc.PacketData() - surf = nvDec.DecodeSingleSurface(pdata) + surf, _ = nvDec.DecodeSingleSurface(pdata) if surf.Empty(): break self.assertNotEqual(pdata.pts, nvc.NO_PTS) @@ -218,13 +228,12 @@ def test_decodesinglesurface_outpktdata(self): def test_decodesinglesurface_sei(self): gpu_id = 0 - enc_file = gt_file - nvDec = nvc.PyNvDecoder(enc_file, gpu_id) + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, gpu_id) total_sei_size = 0 while True: sei = np.ndarray(shape=(0), dtype=np.uint8) - surf = nvDec.DecodeSingleSurface(sei) + surf, _ = nvDec.DecodeSingleSurface(sei) if surf.Empty(): break total_sei_size += sei.size @@ -232,39 +241,39 @@ def test_decodesinglesurface_sei(self): def test_decodesinglesurface_seek(self): gpu_id = 0 - enc_file = gt_file - nvDec = nvc.PyNvDecoder(enc_file, gpu_id) + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, gpu_id) - start_frame = random.randint(0, gt_num_frames - 1) + start_frame = random.randint(0, self.gtInfo.num_frames - 1) dec_frames = 1 seek_ctx = nvc.SeekContext(seek_frame=start_frame) - surf = nvDec.DecodeSingleSurface(seek_ctx) + surf, _ = nvDec.DecodeSingleSurface(seek_ctx) self.assertNotEqual(True, surf.Empty()) while True: - surf = nvDec.DecodeSingleSurface() + surf, _ = nvDec.DecodeSingleSurface() if surf.Empty(): break dec_frames += 1 - self.assertEqual(gt_num_frames - start_frame, dec_frames) + self.assertEqual(self.gtInfo.num_frames - start_frame, dec_frames) def test_decodesinglesurface_cmp_vs_continuous(self): gpu_id = 0 - enc_file = gt_file - nvDec = nvc.PyNvDecoder(enc_file, gpu_id) + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, gpu_id) # First get reconstructed frame with seek - for idx in range(0, gt_num_frames): + for idx in range(0, self.gtInfo.num_frames): seek_ctx = nvc.SeekContext(seek_frame=idx) frame_seek = np.ndarray(shape=(0), dtype=np.uint8) pdata_seek = nvc.PacketData() - self.assertTrue(nvDec.DecodeSingleFrame(frame_seek, seek_ctx, pdata_seek)) + self.assertTrue(nvDec.DecodeSingleFrame( + frame_seek, seek_ctx, pdata_seek)[0]) # Then get it with continuous decoding - nvDec = nvc.PyNvDecoder(gt_file, 0) + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, 0) frame_cont = np.ndarray(shape=(0), dtype=np.uint8) pdata_cont = nvc.PacketData() for i in range(0, idx + 1): - self.assertTrue(nvDec.DecodeSingleFrame(frame_cont, pdata_cont)) + self.assertTrue(nvDec.DecodeSingleFrame( + frame_cont, pdata_cont)[0]) # Compare frames if not np.array_equal(frame_seek, frame_cont): @@ -275,33 +284,49 @@ def test_decodesinglesurface_cmp_vs_continuous(self): fail_msg += "Video frames are not same\n" self.fail(fail_msg) + def test_check_decode_status(self): + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, 0) + + while True: + surf, details = nvDec.DecodeSingleSurface() + self.assertIsNotNone(surf) + if surf.Empty(): + self.assertEqual(details, nvc.TaskExecInfo.END_OF_STREAM) + break + else: + self.assertEqual(details, nvc.TaskExecInfo.SUCCESS) + def test_decode_all_surfaces(self): - nvDec = nvc.PyNvDecoder(gt_file, 0) + nvDec = nvc.PyNvDecoder(self.gtInfo.uri, 0) dec_frames = 0 while True: - surf = nvDec.DecodeSingleSurface() - if not surf or surf.Empty(): + surf, _ = nvDec.DecodeSingleSurface() + self.assertIsNotNone(surf) + if surf.Empty(): break dec_frames += 1 - self.assertEqual(gt_num_frames, dec_frames) + self.assertEqual(self.gtInfo.num_frames, dec_frames) def test_decode_resolution_change(self): - nvDec = nvc.PyNvDecoder(gt_file_res_change, 0) - rw = int(gt_width * gt_res_change_factor) - rh = int(gt_height * gt_res_change_factor) + with open("gt_files.json") as f: + resChangeInfo = GroundTruth(**json.load(f)["res_change"]) + nvDec = nvc.PyNvDecoder(resChangeInfo.uri, 0) + rw = int(resChangeInfo.width * resChangeInfo.res_change_factor) + rh = int(resChangeInfo.height * resChangeInfo.res_change_factor) dec_frames = 0 while True: - surf = nvDec.DecodeSingleSurface() - if not surf or surf.Empty(): + surf, _ = nvDec.DecodeSingleSurface() + self.assertIsNotNone(surf) + if surf.Empty(): break else: dec_frames += 1 - if dec_frames < gt_res_change: - self.assertEqual(surf.Width(), gt_width) - self.assertEqual(surf.Height(), gt_height) + if dec_frames < resChangeInfo.res_change_frame: + self.assertEqual(surf.Width(), resChangeInfo.width) + self.assertEqual(surf.Height(), resChangeInfo.height) else: self.assertEqual(surf.Width(), rw) self.assertEqual(surf.Height(), rh) diff --git a/tests/test_common.py b/tests/test_common.py new file mode 100644 index 00000000..792a5d2a --- /dev/null +++ b/tests/test_common.py @@ -0,0 +1,18 @@ +from pydantic import BaseModel +from typing import Optional + + +class GroundTruth(BaseModel): + uri: str + width: int + height: int + res_change_factor: float + is_vfr: bool + pix_fmt: str + framerate: float + num_frames: int + res_change_frame: Optional[int] = None + broken_frame: Optional[int] = None + timebase: float + color_space: str + color_range: str