3#include <score/gfx/Vulkan.hpp>
6#if __has_include(<libavutil/hwcontext_cuda.h>)
10typedef struct CUctx_st* CUcontext;
11typedef struct CUextMemory_st* CUexternalMemory;
12typedef struct CUmipmappedArray_st* CUmipmappedArray;
13typedef struct CUarray_st* CUarray;
14typedef struct CUstream_st* CUstream;
17#if defined(_WIN64) || defined(__LP64__)
18typedef unsigned long long CUdeviceptr;
20typedef unsigned int CUdeviceptr;
25{ } CUresult_placeholder;
27typedef enum cudaError_enum
33typedef enum CUexternalMemoryHandleType_enum
35 CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
36 CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
37 CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
38 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
39 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
40 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
41 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
42 CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8,
43} CUexternalMemoryHandleType;
45typedef enum CUarray_format_enum
47 CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
48 CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
49 CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
50 CU_AD_FORMAT_SIGNED_INT8 = 0x08,
51 CU_AD_FORMAT_SIGNED_INT16 = 0x09,
52 CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
53 CU_AD_FORMAT_HALF = 0x10,
54 CU_AD_FORMAT_FLOAT = 0x20,
57typedef enum CUmemorytype_enum
59 CU_MEMORYTYPE_HOST = 0x01,
60 CU_MEMORYTYPE_DEVICE = 0x02,
61 CU_MEMORYTYPE_ARRAY = 0x03,
62 CU_MEMORYTYPE_UNIFIED = 0x04,
66typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st
68 CUexternalMemoryHandleType type;
77 const void* nvSciBufObject;
79 unsigned long long size;
81 unsigned int reserved[16];
82} CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
84typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
89 CUarray_format Format;
90 unsigned int NumChannels;
92} CUDA_ARRAY3D_DESCRIPTOR;
94typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st
96 unsigned long long offset;
97 CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
98 unsigned int numLevels;
99 unsigned int reserved[16];
100} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
102typedef struct CUDA_MEMCPY2D_st
106 CUmemorytype srcMemoryType;
108 CUdeviceptr srcDevice;
113 CUmemorytype dstMemoryType;
115 CUdeviceptr dstDevice;
122#include <libavutil/hwcontext_cuda.h>
123#define SCORE_HAS_CUDA_HWCONTEXT 1
127#if defined(SCORE_HAS_CUDA_HWCONTEXT) && QT_HAS_VULKAN && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
129#include <Gfx/Graph/decoders/ColorSpace.hpp>
130#include <Gfx/Graph/decoders/GPUVideoDecoder.hpp>
131#include <Gfx/Graph/decoders/NV12.hpp>
132#include <Gfx/Graph/decoders/P010.hpp>
133#include <Video/GpuFormats.hpp>
135#include <QtGui/private/qrhivulkan_p.h>
136#include <qvulkanfunctions.h>
137#include <vulkan/vulkan.h>
141#ifndef VK_USE_PLATFORM_WIN32_KHR
142#define VK_USE_PLATFORM_WIN32_KHR
144#include <vulkan/vulkan_win32.h>
151#include <libavformat/avformat.h>
152#include <libavutil/hwcontext.h>
167 using FN_cuCtxPushCurrent = CUresult (*)(CUcontext);
168 using FN_cuCtxPopCurrent = CUresult (*)(CUcontext*);
169 using FN_cuImportExternalMemory
170 = CUresult (*)(CUexternalMemory*,
const CUDA_EXTERNAL_MEMORY_HANDLE_DESC*);
171 using FN_cuExternalMemoryGetMappedMipmappedArray = CUresult (*)(
172 CUmipmappedArray*, CUexternalMemory,
173 const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC*);
174 using FN_cuMipmappedArrayGetLevel = CUresult (*)(CUarray*, CUmipmappedArray,
unsigned int);
175 using FN_cuMemcpy2DAsync = CUresult (*)(
const CUDA_MEMCPY2D*, CUstream);
176 using FN_cuStreamSynchronize = CUresult (*)(CUstream);
177 using FN_cuDestroyExternalMemory = CUresult (*)(CUexternalMemory);
178 using FN_cuMipmappedArrayDestroy = CUresult (*)(CUmipmappedArray);
180 FN_cuCtxPushCurrent ctxPush{};
181 FN_cuCtxPopCurrent ctxPop{};
182 FN_cuImportExternalMemory importExtMem{};
183 FN_cuExternalMemoryGetMappedMipmappedArray getMapArray{};
184 FN_cuMipmappedArrayGetLevel getLevel{};
185 FN_cuMemcpy2DAsync memcpy2DAsync{};
186 FN_cuStreamSynchronize streamSync{};
187 FN_cuDestroyExternalMemory destroyExtMem{};
188 FN_cuMipmappedArrayDestroy destroyMipArray{};
193 lib = (
void*)LoadLibraryA(
"nvcuda.dll");
196 auto sym = [
this](
const char* n) {
return (
void*)GetProcAddress((HMODULE)lib, n); };
198 lib = dlopen(
"libcuda.so.1", RTLD_NOW);
201 auto sym = [
this](
const char* n) {
return dlsym(lib, n); };
204 ctxPush = (FN_cuCtxPushCurrent)sym(
"cuCtxPushCurrent_v2");
205 ctxPop = (FN_cuCtxPopCurrent)sym(
"cuCtxPopCurrent_v2");
206 importExtMem = (FN_cuImportExternalMemory)sym(
"cuImportExternalMemory");
207 getMapArray = (FN_cuExternalMemoryGetMappedMipmappedArray)sym(
208 "cuExternalMemoryGetMappedMipmappedArray");
209 getLevel = (FN_cuMipmappedArrayGetLevel)sym(
"cuMipmappedArrayGetLevel");
210 memcpy2DAsync = (FN_cuMemcpy2DAsync)sym(
"cuMemcpy2DAsync_v2");
211 streamSync = (FN_cuStreamSynchronize)sym(
"cuStreamSynchronize");
212 destroyExtMem = (FN_cuDestroyExternalMemory)sym(
"cuDestroyExternalMemory");
213 destroyMipArray = (FN_cuMipmappedArrayDestroy)sym(
"cuMipmappedArrayDestroy");
215 return ctxPush && ctxPop && importExtMem && getMapArray && getLevel
216 && memcpy2DAsync && streamSync && destroyExtMem && destroyMipArray;
224 FreeLibrary((HMODULE)lib);
252struct HWCudaVulkanDecoder : GPUVideoDecoder
255 PixelFormatInfo m_fmt;
258 VkDevice m_dev{VK_NULL_HANDLE};
259 VkPhysicalDevice m_physDev{VK_NULL_HANDLE};
260 QVulkanFunctions* m_funcs{};
261 QVulkanDeviceFunctions* m_dfuncs{};
263 PFN_vkGetMemoryWin32HandleKHR m_vkGetMemoryWin32HandleKHR{};
265 PFN_vkGetMemoryFdKHR m_vkGetMemoryFdKHR{};
270 CUstream m_cuStream{};
276 struct PlaneResources
278 VkImage image{VK_NULL_HANDLE};
279 VkDeviceMemory memory{VK_NULL_HANDLE};
280 CUexternalMemory cuExtMem{};
281 CUmipmappedArray cuMipArray{};
283 VkDeviceSize memSize{};
285 PlaneResources m_planes[2]{};
287 bool m_interopReady{
false};
291 static bool isAvailable(QRhi& rhi, AVBufferRef* hwDeviceCtx)
293 if(rhi.backend() != QRhi::Vulkan)
295 auto* nh =
static_cast<const QRhiVulkanNativeHandles*
>(rhi.nativeHandles());
296 if(!nh || !nh->dev || !nh->physDev || !nh->inst)
299 if(!nh->inst->getInstanceProcAddr(
"vkGetMemoryWin32HandleKHR"))
302 if(!nh->inst->getInstanceProcAddr(
"vkGetMemoryFdKHR"))
309 auto* devCtx =
reinterpret_cast<AVHWDeviceContext*
>(hwDeviceCtx->data);
310 if(devCtx->type != AV_HWDEVICE_TYPE_CUDA)
318 explicit HWCudaVulkanDecoder(
324 auto* nh =
static_cast<const QRhiVulkanNativeHandles*
>(rhi.nativeHandles());
326 m_physDev = nh->physDev;
327 m_funcs = nh->inst->functions();
328 m_dfuncs = nh->inst->deviceFunctions(m_dev);
330 m_vkGetMemoryWin32HandleKHR =
reinterpret_cast<PFN_vkGetMemoryWin32HandleKHR
>(
331 nh->inst->getInstanceProcAddr(
"vkGetMemoryWin32HandleKHR"));
333 m_vkGetMemoryFdKHR =
reinterpret_cast<PFN_vkGetMemoryFdKHR
>(
334 nh->inst->getInstanceProcAddr(
"vkGetMemoryFdKHR"));
338 auto* devCtx =
reinterpret_cast<AVHWDeviceContext*
>(hwDeviceCtx->data);
339 auto* cudaDevCtx =
static_cast<AVCUDADeviceContext*
>(devCtx->hwctx);
340 m_cuCtx = cudaDevCtx->cuda_ctx;
341 m_cuStream = cudaDevCtx->stream;
346 ~HWCudaVulkanDecoder()
override { cleanup(); }
350 if(m_cuCtx && m_cu.ctxPush)
352 m_cu.ctxPush(m_cuCtx);
353 for(
auto& p : m_planes)
356 m_cu.destroyMipArray(p.cuMipArray);
358 m_cu.destroyExtMem(p.cuExtMem);
367 for(
auto& p : m_planes)
369 if(p.image != VK_NULL_HANDLE)
370 m_dfuncs->vkDestroyImage(m_dev, p.image,
nullptr);
371 if(p.memory != VK_NULL_HANDLE)
372 m_dfuncs->vkFreeMemory(m_dev, p.memory,
nullptr);
373 p.image = VK_NULL_HANDLE;
374 p.memory = VK_NULL_HANDLE;
377 m_interopReady =
false;
384 std::pair<QShader, QShader> init(RenderList& r)
override
386 auto& rhi = *r.state.rhi;
387 const auto w = decoder.width, h = decoder.height;
393 auto tex = rhi.newTexture(QRhiTexture::R16, {w, h}, 1, QRhiTexture::Flag{});
395 auto sampler = rhi.newSampler(
396 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
397 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
399 samplers.push_back({sampler, tex});
403 = rhi.newTexture(QRhiTexture::RG16, {w / 2, h / 2}, 1, QRhiTexture::Flag{});
405 auto sampler = rhi.newSampler(
406 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
407 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
409 samplers.push_back({sampler, tex});
413 if(!setupPlane(0, VK_FORMAT_R16_UNORM, w, h, 1, 2)
414 || !setupPlane(1, VK_FORMAT_R16G16_UNORM, w / 2, h / 2, 2, 2))
416 qDebug() <<
"HWCudaVulkanDecoder: interop setup failed";
421 r.state, vertexShader(),
422 QString(P010Decoder::frag).arg(
"").arg(colorMatrix(decoder)));
428 auto tex = rhi.newTexture(QRhiTexture::R8, {w, h}, 1, QRhiTexture::Flag{});
430 auto sampler = rhi.newSampler(
431 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
432 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
434 samplers.push_back({sampler, tex});
438 = rhi.newTexture(QRhiTexture::RG8, {w / 2, h / 2}, 1, QRhiTexture::Flag{});
440 auto sampler = rhi.newSampler(
441 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
442 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
444 samplers.push_back({sampler, tex});
448 if(!setupPlane(0, VK_FORMAT_R8_UNORM, w, h, 1, 1)
449 || !setupPlane(1, VK_FORMAT_R8G8_UNORM, w / 2, h / 2, 2, 1))
451 qDebug() <<
"HWCudaVulkanDecoder: interop setup failed";
455 QString frag = NV12Decoder::nv12_filter_prologue;
456 frag +=
" vec3 yuv = vec3(y, u, v);\n";
457 frag += NV12Decoder::nv12_filter_epilogue;
459 r.state, vertexShader(), frag.arg(
"").arg(colorMatrix(decoder)));
467 void exec(RenderList& r, QRhiResourceUpdateBatch& res, AVFrame& frame)
override
469#if LIBAVUTIL_VERSION_MAJOR >= 57
473 if(!Video::formatIsHardwareDecoded(
static_cast<AVPixelFormat
>(frame.format)))
476 const int w = decoder.width;
477 const int h = decoder.height;
478 const int bpc = m_fmt.is10bit() ? 2 : 1;
480 m_cu.ctxPush(m_cuCtx);
485 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
486 cpy.srcDevice =
reinterpret_cast<CUdeviceptr
>(frame.data[0]);
487 cpy.srcPitch =
static_cast<size_t>(frame.linesize[0]);
488 cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
489 cpy.dstArray = m_planes[0].cuArray;
490 cpy.WidthInBytes =
static_cast<size_t>(w * 1 * bpc);
491 cpy.Height =
static_cast<size_t>(h);
492 m_cu.memcpy2DAsync(&cpy, m_cuStream);
498 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
499 cpy.srcDevice =
reinterpret_cast<CUdeviceptr
>(frame.data[1]);
500 cpy.srcPitch =
static_cast<size_t>(frame.linesize[1]);
501 cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
502 cpy.dstArray = m_planes[1].cuArray;
503 cpy.WidthInBytes =
static_cast<size_t>((w / 2) * 2 * bpc);
504 cpy.Height =
static_cast<size_t>(h / 2);
505 m_cu.memcpy2DAsync(&cpy, m_cuStream);
509 m_cu.streamSync(m_cuStream);
517 samplers[0].texture->setNativeLayout(VK_IMAGE_LAYOUT_GENERAL);
518 samplers[1].texture->setNativeLayout(VK_IMAGE_LAYOUT_GENERAL);
527 int idx, VkFormat vkFmt,
int w,
int h,
528 int numChannels,
int bytesPerChannel)
530 auto& plane = m_planes[idx];
534 VkExternalMemoryImageCreateInfo extInfo{};
535 extInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
537 extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
539 extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
542 VkImageCreateInfo imgInfo{};
543 imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
544 imgInfo.pNext = &extInfo;
545 imgInfo.imageType = VK_IMAGE_TYPE_2D;
546 imgInfo.format = vkFmt;
547 imgInfo.extent = {
static_cast<uint32_t
>(w),
static_cast<uint32_t
>(h), 1};
548 imgInfo.mipLevels = 1;
549 imgInfo.arrayLayers = 1;
550 imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
551 imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
552 imgInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
553 imgInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
554 imgInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
556 if(m_dfuncs->vkCreateImage(m_dev, &imgInfo,
nullptr, &plane.image) != VK_SUCCESS)
561 VkMemoryRequirements memReqs{};
562 m_dfuncs->vkGetImageMemoryRequirements(m_dev, plane.image, &memReqs);
564 VkPhysicalDeviceMemoryProperties memProps{};
565 m_funcs->vkGetPhysicalDeviceMemoryProperties(m_physDev, &memProps);
567 uint32_t memTypeIdx = UINT32_MAX;
568 for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i)
570 if((memReqs.memoryTypeBits & (1u << i))
571 && (memProps.memoryTypes[i].propertyFlags
572 & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
578 if(memTypeIdx == UINT32_MAX)
580 m_dfuncs->vkDestroyImage(m_dev, plane.image,
nullptr);
581 plane.image = VK_NULL_HANDLE;
585 VkExportMemoryAllocateInfo exportInfo{};
586 exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
588 exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
590 exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
593 VkMemoryDedicatedAllocateInfo dedicatedInfo{};
594 dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
595 dedicatedInfo.pNext = &exportInfo;
596 dedicatedInfo.image = plane.image;
598 VkMemoryAllocateInfo allocInfo{};
599 allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
600 allocInfo.pNext = &dedicatedInfo;
601 allocInfo.allocationSize = memReqs.size;
602 allocInfo.memoryTypeIndex = memTypeIdx;
604 if(m_dfuncs->vkAllocateMemory(m_dev, &allocInfo,
nullptr, &plane.memory) != VK_SUCCESS)
606 m_dfuncs->vkDestroyImage(m_dev, plane.image,
nullptr);
607 plane.image = VK_NULL_HANDLE;
611 plane.memSize = memReqs.size;
613 if(m_dfuncs->vkBindImageMemory(m_dev, plane.image, plane.memory, 0) != VK_SUCCESS)
615 m_dfuncs->vkFreeMemory(m_dev, plane.memory,
nullptr);
616 m_dfuncs->vkDestroyImage(m_dev, plane.image,
nullptr);
617 plane.image = VK_NULL_HANDLE;
618 plane.memory = VK_NULL_HANDLE;
624 m_cu.ctxPush(m_cuCtx);
629 VkMemoryGetWin32HandleInfoKHR getHandleInfo{};
630 getHandleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
631 getHandleInfo.memory = plane.memory;
632 getHandleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
634 HANDLE handle =
nullptr;
635 if(m_vkGetMemoryWin32HandleKHR(m_dev, &getHandleInfo, &handle) != VK_SUCCESS
645 CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc{};
646 memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32;
647 memDesc.handle.win32.handle = handle;
648 memDesc.size = plane.memSize;
650 if(m_cu.importExtMem(&plane.cuExtMem, &memDesc) != CUDA_SUCCESS)
662 VkMemoryGetFdInfoKHR getFdInfo{};
663 getFdInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
664 getFdInfo.memory = plane.memory;
665 getFdInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
668 if(m_vkGetMemoryFdKHR(m_dev, &getFdInfo, &fd) != VK_SUCCESS || fd < 0)
677 CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc{};
678 memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
679 memDesc.handle.fd = fd;
680 memDesc.size = plane.memSize;
682 if(m_cu.importExtMem(&plane.cuExtMem, &memDesc) != CUDA_SUCCESS)
694 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipDesc{};
696 mipDesc.arrayDesc.Width =
static_cast<size_t>(w);
697 mipDesc.arrayDesc.Height =
static_cast<size_t>(h);
698 mipDesc.arrayDesc.Depth = 0;
699 mipDesc.arrayDesc.Format = (bytesPerChannel == 2) ? CU_AD_FORMAT_UNSIGNED_INT16
700 : CU_AD_FORMAT_UNSIGNED_INT8;
701 mipDesc.arrayDesc.NumChannels =
static_cast<unsigned int>(numChannels);
702 mipDesc.arrayDesc.Flags = 0;
703 mipDesc.numLevels = 1;
705 if(m_cu.getMapArray(&plane.cuMipArray, plane.cuExtMem, &mipDesc) != CUDA_SUCCESS)
712 if(m_cu.getLevel(&plane.cuArray, plane.cuMipArray, 0) != CUDA_SUCCESS)
725 samplers[idx].texture->createFrom(
726 QRhiTexture::NativeTexture{quint64(plane.image), VK_IMAGE_LAYOUT_GENERAL});
728 m_interopReady = (idx == 1);
Graphics rendering pipeline for ossia score.
Definition Filter/PreviewWidget.hpp:12
std::pair< QShader, QShader > makeShaders(const RenderState &v, QString vert, QString frag)
Get a pair of compiled vertex / fragment shaders from GLSL 4.5 sources.
Definition score-plugin-gfx/Gfx/Graph/Utils.cpp:395