Loading...
Searching...
No Matches
HWCUDA.hpp
1#pragma once
2
3#include <score/gfx/Vulkan.hpp>
4
5extern "C" {
6#if __has_include(<libavutil/hwcontext_cuda.h>)
7#define CUDA_VERSION 0
8
9// Opaque handles
10typedef struct CUctx_st* CUcontext;
11typedef struct CUextMemory_st* CUexternalMemory;
12typedef struct CUmipmappedArray_st* CUmipmappedArray;
13typedef struct CUarray_st* CUarray;
14typedef struct CUstream_st* CUstream;
15
16// Device pointer
17#if defined(_WIN64) || defined(__LP64__)
18typedef unsigned long long CUdeviceptr;
19#else
20typedef unsigned int CUdeviceptr;
21#endif
22
23// Minimal enums
24typedef enum
25{ /* only need the typedef, values unused at call sites */ } CUresult_placeholder;
26
27typedef enum cudaError_enum
28{
29 CUDA_SUCCESS = 0,
30 // Add others as needed
31} CUresult;
32
33typedef enum CUexternalMemoryHandleType_enum
34{
35 CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
36 CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
37 CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
38 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
39 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
40 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
41 CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
42 CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8,
43} CUexternalMemoryHandleType;
44
45typedef enum CUarray_format_enum
46{
47 CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
48 CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
49 CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
50 CU_AD_FORMAT_SIGNED_INT8 = 0x08,
51 CU_AD_FORMAT_SIGNED_INT16 = 0x09,
52 CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
53 CU_AD_FORMAT_HALF = 0x10,
54 CU_AD_FORMAT_FLOAT = 0x20,
55} CUarray_format;
56
57typedef enum CUmemorytype_enum
58{
59 CU_MEMORYTYPE_HOST = 0x01,
60 CU_MEMORYTYPE_DEVICE = 0x02,
61 CU_MEMORYTYPE_ARRAY = 0x03,
62 CU_MEMORYTYPE_UNIFIED = 0x04,
63} CUmemorytype;
64
65// Structs
66typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st
67{
68 CUexternalMemoryHandleType type;
69 union
70 {
71 int fd;
72 struct
73 {
74 void* handle;
75 const void* name;
76 } win32;
77 const void* nvSciBufObject;
78 } handle;
79 unsigned long long size;
80 unsigned int flags;
81 unsigned int reserved[16];
82} CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
83
84typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
85{
86 size_t Width;
87 size_t Height;
88 size_t Depth;
89 CUarray_format Format;
90 unsigned int NumChannels;
91 unsigned int Flags;
92} CUDA_ARRAY3D_DESCRIPTOR;
93
94typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st
95{
96 unsigned long long offset;
97 CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
98 unsigned int numLevels;
99 unsigned int reserved[16];
100} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
101
102typedef struct CUDA_MEMCPY2D_st
103{
104 size_t srcXInBytes;
105 size_t srcY;
106 CUmemorytype srcMemoryType;
107 const void* srcHost;
108 CUdeviceptr srcDevice;
109 CUarray srcArray;
110 size_t srcPitch;
111 size_t dstXInBytes;
112 size_t dstY;
113 CUmemorytype dstMemoryType;
114 void* dstHost;
115 CUdeviceptr dstDevice;
116 CUarray dstArray;
117 size_t dstPitch;
118 size_t WidthInBytes;
119 size_t Height;
120} CUDA_MEMCPY2D;
121
122#include <libavutil/hwcontext_cuda.h>
123#define SCORE_HAS_CUDA_HWCONTEXT 1
124#endif
125}
126
127#if defined(SCORE_HAS_CUDA_HWCONTEXT) && QT_HAS_VULKAN && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
128
129#include <Gfx/Graph/decoders/ColorSpace.hpp>
130#include <Gfx/Graph/decoders/GPUVideoDecoder.hpp>
131#include <Gfx/Graph/decoders/NV12.hpp>
132#include <Gfx/Graph/decoders/P010.hpp>
133#include <Video/GpuFormats.hpp>
134
135#include <QtGui/private/qrhivulkan_p.h>
136#include <qvulkanfunctions.h>
137#include <vulkan/vulkan.h>
138
139#if defined(_WIN32)
140#include <windows.h>
141#ifndef VK_USE_PLATFORM_WIN32_KHR
142#define VK_USE_PLATFORM_WIN32_KHR
143#endif
144#include <vulkan/vulkan_win32.h>
145#else
146#include <dlfcn.h>
147#include <unistd.h>
148#endif
149
150extern "C" {
151#include <libavformat/avformat.h>
152#include <libavutil/hwcontext.h>
153}
154
155namespace score::gfx
156{
157
163struct CudaFunctions
164{
165 void* lib{};
166
167 using FN_cuCtxPushCurrent = CUresult (*)(CUcontext);
168 using FN_cuCtxPopCurrent = CUresult (*)(CUcontext*);
169 using FN_cuImportExternalMemory
170 = CUresult (*)(CUexternalMemory*, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC*);
171 using FN_cuExternalMemoryGetMappedMipmappedArray = CUresult (*)(
172 CUmipmappedArray*, CUexternalMemory,
173 const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC*);
174 using FN_cuMipmappedArrayGetLevel = CUresult (*)(CUarray*, CUmipmappedArray, unsigned int);
175 using FN_cuMemcpy2DAsync = CUresult (*)(const CUDA_MEMCPY2D*, CUstream);
176 using FN_cuStreamSynchronize = CUresult (*)(CUstream);
177 using FN_cuDestroyExternalMemory = CUresult (*)(CUexternalMemory);
178 using FN_cuMipmappedArrayDestroy = CUresult (*)(CUmipmappedArray);
179
180 FN_cuCtxPushCurrent ctxPush{};
181 FN_cuCtxPopCurrent ctxPop{};
182 FN_cuImportExternalMemory importExtMem{};
183 FN_cuExternalMemoryGetMappedMipmappedArray getMapArray{};
184 FN_cuMipmappedArrayGetLevel getLevel{};
185 FN_cuMemcpy2DAsync memcpy2DAsync{};
186 FN_cuStreamSynchronize streamSync{};
187 FN_cuDestroyExternalMemory destroyExtMem{};
188 FN_cuMipmappedArrayDestroy destroyMipArray{};
189
190 bool load()
191 {
192#if defined(_WIN32)
193 lib = (void*)LoadLibraryA("nvcuda.dll");
194 if(!lib)
195 return false;
196 auto sym = [this](const char* n) { return (void*)GetProcAddress((HMODULE)lib, n); };
197#else
198 lib = dlopen("libcuda.so.1", RTLD_NOW);
199 if(!lib)
200 return false;
201 auto sym = [this](const char* n) { return dlsym(lib, n); };
202#endif
203
204 ctxPush = (FN_cuCtxPushCurrent)sym("cuCtxPushCurrent_v2");
205 ctxPop = (FN_cuCtxPopCurrent)sym("cuCtxPopCurrent_v2");
206 importExtMem = (FN_cuImportExternalMemory)sym("cuImportExternalMemory");
207 getMapArray = (FN_cuExternalMemoryGetMappedMipmappedArray)sym(
208 "cuExternalMemoryGetMappedMipmappedArray");
209 getLevel = (FN_cuMipmappedArrayGetLevel)sym("cuMipmappedArrayGetLevel");
210 memcpy2DAsync = (FN_cuMemcpy2DAsync)sym("cuMemcpy2DAsync_v2");
211 streamSync = (FN_cuStreamSynchronize)sym("cuStreamSynchronize");
212 destroyExtMem = (FN_cuDestroyExternalMemory)sym("cuDestroyExternalMemory");
213 destroyMipArray = (FN_cuMipmappedArrayDestroy)sym("cuMipmappedArrayDestroy");
214
215 return ctxPush && ctxPop && importExtMem && getMapArray && getLevel
216 && memcpy2DAsync && streamSync && destroyExtMem && destroyMipArray;
217 }
218
219 ~CudaFunctions()
220 {
221 if(lib)
222 {
223#if defined(_WIN32)
224 FreeLibrary((HMODULE)lib);
225#else
226 dlclose(lib);
227#endif
228 }
229 }
230};
231
252struct HWCudaVulkanDecoder : GPUVideoDecoder
253{
254 Video::ImageFormat& decoder;
255 PixelFormatInfo m_fmt;
256
257 // Vulkan handles (borrowed from QRhi)
258 VkDevice m_dev{VK_NULL_HANDLE};
259 VkPhysicalDevice m_physDev{VK_NULL_HANDLE};
260 QVulkanFunctions* m_funcs{};
261 QVulkanDeviceFunctions* m_dfuncs{};
262#if defined(_WIN32)
263 PFN_vkGetMemoryWin32HandleKHR m_vkGetMemoryWin32HandleKHR{};
264#else
265 PFN_vkGetMemoryFdKHR m_vkGetMemoryFdKHR{};
266#endif
267
268 // CUDA context and stream (from FFmpeg's AVCUDADeviceContext)
269 CUcontext m_cuCtx{};
270 CUstream m_cuStream{};
271
272 // Dynamically loaded CUDA functions
273 CudaFunctions m_cu;
274
275 // Per-plane resources (created once, reused every frame)
276 struct PlaneResources
277 {
278 VkImage image{VK_NULL_HANDLE};
279 VkDeviceMemory memory{VK_NULL_HANDLE};
280 CUexternalMemory cuExtMem{};
281 CUmipmappedArray cuMipArray{};
282 CUarray cuArray{};
283 VkDeviceSize memSize{};
284 };
285 PlaneResources m_planes[2]{}; // 0=Y, 1=UV
286
287 bool m_interopReady{false};
288
289 // ------------------------------------------------------------------
290
291 static bool isAvailable(QRhi& rhi, AVBufferRef* hwDeviceCtx)
292 {
293 if(rhi.backend() != QRhi::Vulkan)
294 return false;
295 auto* nh = static_cast<const QRhiVulkanNativeHandles*>(rhi.nativeHandles());
296 if(!nh || !nh->dev || !nh->physDev || !nh->inst)
297 return false;
298#if defined(_WIN32)
299 if(!nh->inst->getInstanceProcAddr("vkGetMemoryWin32HandleKHR"))
300 return false;
301#else
302 if(!nh->inst->getInstanceProcAddr("vkGetMemoryFdKHR"))
303 return false;
304#endif
305 if(!hwDeviceCtx)
306 return false;
307
308 // Verify it's actually a CUDA device context
309 auto* devCtx = reinterpret_cast<AVHWDeviceContext*>(hwDeviceCtx->data);
310 if(devCtx->type != AV_HWDEVICE_TYPE_CUDA)
311 return false;
312
313 // Check that CUDA driver supports external memory
314 CudaFunctions probe;
315 return probe.load();
316 }
317
318 explicit HWCudaVulkanDecoder(
319 Video::ImageFormat& d, QRhi& rhi, AVBufferRef* hwDeviceCtx,
320 PixelFormatInfo fmt)
321 : decoder{d}
322 , m_fmt{fmt}
323 {
324 auto* nh = static_cast<const QRhiVulkanNativeHandles*>(rhi.nativeHandles());
325 m_dev = nh->dev;
326 m_physDev = nh->physDev;
327 m_funcs = nh->inst->functions();
328 m_dfuncs = nh->inst->deviceFunctions(m_dev);
329#if defined(_WIN32)
330 m_vkGetMemoryWin32HandleKHR = reinterpret_cast<PFN_vkGetMemoryWin32HandleKHR>(
331 nh->inst->getInstanceProcAddr("vkGetMemoryWin32HandleKHR"));
332#else
333 m_vkGetMemoryFdKHR = reinterpret_cast<PFN_vkGetMemoryFdKHR>(
334 nh->inst->getInstanceProcAddr("vkGetMemoryFdKHR"));
335#endif
336
337 // Extract CUDA context and stream from FFmpeg's device context
338 auto* devCtx = reinterpret_cast<AVHWDeviceContext*>(hwDeviceCtx->data);
339 auto* cudaDevCtx = static_cast<AVCUDADeviceContext*>(devCtx->hwctx);
340 m_cuCtx = cudaDevCtx->cuda_ctx;
341 m_cuStream = cudaDevCtx->stream;
342
343 m_cu.load();
344 }
345
346 ~HWCudaVulkanDecoder() override { cleanup(); }
347
348 void cleanup()
349 {
350 if(m_cuCtx && m_cu.ctxPush)
351 {
352 m_cu.ctxPush(m_cuCtx);
353 for(auto& p : m_planes)
354 {
355 if(p.cuMipArray)
356 m_cu.destroyMipArray(p.cuMipArray);
357 if(p.cuExtMem)
358 m_cu.destroyExtMem(p.cuExtMem);
359 p.cuArray = {};
360 p.cuMipArray = {};
361 p.cuExtMem = {};
362 }
363 CUcontext dummy{};
364 m_cu.ctxPop(&dummy);
365 }
366
367 for(auto& p : m_planes)
368 {
369 if(p.image != VK_NULL_HANDLE)
370 m_dfuncs->vkDestroyImage(m_dev, p.image, nullptr);
371 if(p.memory != VK_NULL_HANDLE)
372 m_dfuncs->vkFreeMemory(m_dev, p.memory, nullptr);
373 p.image = VK_NULL_HANDLE;
374 p.memory = VK_NULL_HANDLE;
375 }
376
377 m_interopReady = false;
378 }
379
380 // ------------------------------------------------------------------
381 // init — create exportable Vulkan textures, import into CUDA
382 // ------------------------------------------------------------------
383
384 std::pair<QShader, QShader> init(RenderList& r) override
385 {
386 auto& rhi = *r.state.rhi;
387 const auto w = decoder.width, h = decoder.height;
388
389 if(m_fmt.is10bit())
390 {
391 // P010: R16 (Y) + RG16 (UV)
392 {
393 auto tex = rhi.newTexture(QRhiTexture::R16, {w, h}, 1, QRhiTexture::Flag{});
394 tex->create();
395 auto sampler = rhi.newSampler(
396 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
397 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
398 sampler->create();
399 samplers.push_back({sampler, tex});
400 }
401 {
402 auto tex
403 = rhi.newTexture(QRhiTexture::RG16, {w / 2, h / 2}, 1, QRhiTexture::Flag{});
404 tex->create();
405 auto sampler = rhi.newSampler(
406 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
407 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
408 sampler->create();
409 samplers.push_back({sampler, tex});
410 }
411
412 // Setup Vulkan→CUDA interop for both planes
413 if(!setupPlane(0, VK_FORMAT_R16_UNORM, w, h, 1, 2)
414 || !setupPlane(1, VK_FORMAT_R16G16_UNORM, w / 2, h / 2, 2, 2))
415 {
416 qDebug() << "HWCudaVulkanDecoder: interop setup failed";
417 cleanup();
418 }
419
421 r.state, vertexShader(),
422 QString(P010Decoder::frag).arg("").arg(colorMatrix(decoder)));
423 }
424 else
425 {
426 // NV12: R8 (Y) + RG8 (UV)
427 {
428 auto tex = rhi.newTexture(QRhiTexture::R8, {w, h}, 1, QRhiTexture::Flag{});
429 tex->create();
430 auto sampler = rhi.newSampler(
431 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
432 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
433 sampler->create();
434 samplers.push_back({sampler, tex});
435 }
436 {
437 auto tex
438 = rhi.newTexture(QRhiTexture::RG8, {w / 2, h / 2}, 1, QRhiTexture::Flag{});
439 tex->create();
440 auto sampler = rhi.newSampler(
441 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
442 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
443 sampler->create();
444 samplers.push_back({sampler, tex});
445 }
446
447 // Setup Vulkan→CUDA interop for both planes
448 if(!setupPlane(0, VK_FORMAT_R8_UNORM, w, h, 1, 1)
449 || !setupPlane(1, VK_FORMAT_R8G8_UNORM, w / 2, h / 2, 2, 1))
450 {
451 qDebug() << "HWCudaVulkanDecoder: interop setup failed";
452 cleanup();
453 }
454
455 QString frag = NV12Decoder::nv12_filter_prologue;
456 frag += " vec3 yuv = vec3(y, u, v);\n";
457 frag += NV12Decoder::nv12_filter_epilogue;
459 r.state, vertexShader(), frag.arg("").arg(colorMatrix(decoder)));
460 }
461 }
462
463 // ------------------------------------------------------------------
464 // exec — GPU-to-GPU copy from NVDEC output to Vulkan texture
465 // ------------------------------------------------------------------
466
467 void exec(RenderList& r, QRhiResourceUpdateBatch& res, AVFrame& frame) override
468 {
469#if LIBAVUTIL_VERSION_MAJOR >= 57
470 if(!m_interopReady)
471 return;
472
473 if(!Video::formatIsHardwareDecoded(static_cast<AVPixelFormat>(frame.format)))
474 return;
475
476 const int w = decoder.width;
477 const int h = decoder.height;
478 const int bpc = m_fmt.is10bit() ? 2 : 1; // bytes per component
479
480 m_cu.ctxPush(m_cuCtx);
481
482 // Y plane: frame->data[0] is CUdeviceptr
483 {
484 CUDA_MEMCPY2D cpy{};
485 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
486 cpy.srcDevice = reinterpret_cast<CUdeviceptr>(frame.data[0]);
487 cpy.srcPitch = static_cast<size_t>(frame.linesize[0]);
488 cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
489 cpy.dstArray = m_planes[0].cuArray;
490 cpy.WidthInBytes = static_cast<size_t>(w * 1 * bpc); // 1 channel
491 cpy.Height = static_cast<size_t>(h);
492 m_cu.memcpy2DAsync(&cpy, m_cuStream);
493 }
494
495 // UV plane: frame->data[1] is CUdeviceptr
496 {
497 CUDA_MEMCPY2D cpy{};
498 cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
499 cpy.srcDevice = reinterpret_cast<CUdeviceptr>(frame.data[1]);
500 cpy.srcPitch = static_cast<size_t>(frame.linesize[1]);
501 cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
502 cpy.dstArray = m_planes[1].cuArray;
503 cpy.WidthInBytes = static_cast<size_t>((w / 2) * 2 * bpc); // 2 channels
504 cpy.Height = static_cast<size_t>(h / 2);
505 m_cu.memcpy2DAsync(&cpy, m_cuStream);
506 }
507
508 // Wait for copies to complete before Vulkan reads the textures
509 m_cu.streamSync(m_cuStream);
510
511 CUcontext dummy{};
512 m_cu.ctxPop(&dummy);
513
514 // Tell Qt RHI the images were written externally so it inserts a barrier.
515 // VK_IMAGE_LAYOUT_GENERAL (1) → SHADER_READ_ONLY_OPTIMAL transition
516 // preserves content and acts as a memory barrier.
517 samplers[0].texture->setNativeLayout(VK_IMAGE_LAYOUT_GENERAL);
518 samplers[1].texture->setNativeLayout(VK_IMAGE_LAYOUT_GENERAL);
519#endif
520 }
521
522private:
523 // ------------------------------------------------------------------
524 // Setup one plane: exportable VkImage → fd → CUDA external memory
525 // ------------------------------------------------------------------
526 bool setupPlane(
527 int idx, VkFormat vkFmt, int w, int h,
528 int numChannels, int bytesPerChannel)
529 {
530 auto& plane = m_planes[idx];
531
532 // --- Create VkImage with external memory export ---
533
534 VkExternalMemoryImageCreateInfo extInfo{};
535 extInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
536#if defined(_WIN32)
537 extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
538#else
539 extInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
540#endif
541
542 VkImageCreateInfo imgInfo{};
543 imgInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
544 imgInfo.pNext = &extInfo;
545 imgInfo.imageType = VK_IMAGE_TYPE_2D;
546 imgInfo.format = vkFmt;
547 imgInfo.extent = {static_cast<uint32_t>(w), static_cast<uint32_t>(h), 1};
548 imgInfo.mipLevels = 1;
549 imgInfo.arrayLayers = 1;
550 imgInfo.samples = VK_SAMPLE_COUNT_1_BIT;
551 imgInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
552 imgInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
553 imgInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
554 imgInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
555
556 if(m_dfuncs->vkCreateImage(m_dev, &imgInfo, nullptr, &plane.image) != VK_SUCCESS)
557 return false;
558
559 // --- Allocate exportable device-local memory ---
560
561 VkMemoryRequirements memReqs{};
562 m_dfuncs->vkGetImageMemoryRequirements(m_dev, plane.image, &memReqs);
563
564 VkPhysicalDeviceMemoryProperties memProps{};
565 m_funcs->vkGetPhysicalDeviceMemoryProperties(m_physDev, &memProps);
566
567 uint32_t memTypeIdx = UINT32_MAX;
568 for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i)
569 {
570 if((memReqs.memoryTypeBits & (1u << i))
571 && (memProps.memoryTypes[i].propertyFlags
572 & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
573 {
574 memTypeIdx = i;
575 break;
576 }
577 }
578 if(memTypeIdx == UINT32_MAX)
579 {
580 m_dfuncs->vkDestroyImage(m_dev, plane.image, nullptr);
581 plane.image = VK_NULL_HANDLE;
582 return false;
583 }
584
585 VkExportMemoryAllocateInfo exportInfo{};
586 exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
587#if defined(_WIN32)
588 exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
589#else
590 exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
591#endif
592
593 VkMemoryDedicatedAllocateInfo dedicatedInfo{};
594 dedicatedInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
595 dedicatedInfo.pNext = &exportInfo;
596 dedicatedInfo.image = plane.image;
597
598 VkMemoryAllocateInfo allocInfo{};
599 allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
600 allocInfo.pNext = &dedicatedInfo;
601 allocInfo.allocationSize = memReqs.size;
602 allocInfo.memoryTypeIndex = memTypeIdx;
603
604 if(m_dfuncs->vkAllocateMemory(m_dev, &allocInfo, nullptr, &plane.memory) != VK_SUCCESS)
605 {
606 m_dfuncs->vkDestroyImage(m_dev, plane.image, nullptr);
607 plane.image = VK_NULL_HANDLE;
608 return false;
609 }
610
611 plane.memSize = memReqs.size;
612
613 if(m_dfuncs->vkBindImageMemory(m_dev, plane.image, plane.memory, 0) != VK_SUCCESS)
614 {
615 m_dfuncs->vkFreeMemory(m_dev, plane.memory, nullptr);
616 m_dfuncs->vkDestroyImage(m_dev, plane.image, nullptr);
617 plane.image = VK_NULL_HANDLE;
618 plane.memory = VK_NULL_HANDLE;
619 return false;
620 }
621
622 // --- Export Vulkan memory and import into CUDA ---
623
624 m_cu.ctxPush(m_cuCtx);
625
626#if defined(_WIN32)
627 // --- Export Vulkan memory as Win32 HANDLE ---
628
629 VkMemoryGetWin32HandleInfoKHR getHandleInfo{};
630 getHandleInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
631 getHandleInfo.memory = plane.memory;
632 getHandleInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
633
634 HANDLE handle = nullptr;
635 if(m_vkGetMemoryWin32HandleKHR(m_dev, &getHandleInfo, &handle) != VK_SUCCESS
636 || !handle)
637 {
638 CUcontext dummy{};
639 m_cu.ctxPop(&dummy);
640 return false;
641 }
642
643 // --- Import Win32 handle into CUDA as external memory ---
644
645 CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc{};
646 memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32;
647 memDesc.handle.win32.handle = handle;
648 memDesc.size = plane.memSize;
649
650 if(m_cu.importExtMem(&plane.cuExtMem, &memDesc) != CUDA_SUCCESS)
651 {
652 CloseHandle(handle);
653 CUcontext dummy{};
654 m_cu.ctxPop(&dummy);
655 return false;
656 }
657 // CUDA does not take ownership of Win32 handles — close after import
658 CloseHandle(handle);
659#else
660 // --- Export Vulkan memory as fd ---
661
662 VkMemoryGetFdInfoKHR getFdInfo{};
663 getFdInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
664 getFdInfo.memory = plane.memory;
665 getFdInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
666
667 int fd = -1;
668 if(m_vkGetMemoryFdKHR(m_dev, &getFdInfo, &fd) != VK_SUCCESS || fd < 0)
669 {
670 CUcontext dummy{};
671 m_cu.ctxPop(&dummy);
672 return false;
673 }
674
675 // --- Import fd into CUDA as external memory ---
676
677 CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc{};
678 memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
679 memDesc.handle.fd = fd; // CUDA takes ownership of fd
680 memDesc.size = plane.memSize;
681
682 if(m_cu.importExtMem(&plane.cuExtMem, &memDesc) != CUDA_SUCCESS)
683 {
684 close(fd); // Only close if import failed (ownership not transferred)
685 CUcontext dummy{};
686 m_cu.ctxPop(&dummy);
687 return false;
688 }
689 // fd is now owned by CUDA — do not close
690#endif
691
692 // --- Map to CUDA mipmapped array ---
693
694 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipDesc{};
695 mipDesc.offset = 0;
696 mipDesc.arrayDesc.Width = static_cast<size_t>(w);
697 mipDesc.arrayDesc.Height = static_cast<size_t>(h);
698 mipDesc.arrayDesc.Depth = 0; // 2D
699 mipDesc.arrayDesc.Format = (bytesPerChannel == 2) ? CU_AD_FORMAT_UNSIGNED_INT16
700 : CU_AD_FORMAT_UNSIGNED_INT8;
701 mipDesc.arrayDesc.NumChannels = static_cast<unsigned int>(numChannels);
702 mipDesc.arrayDesc.Flags = 0;
703 mipDesc.numLevels = 1;
704
705 if(m_cu.getMapArray(&plane.cuMipArray, plane.cuExtMem, &mipDesc) != CUDA_SUCCESS)
706 {
707 CUcontext dummy{};
708 m_cu.ctxPop(&dummy);
709 return false;
710 }
711
712 if(m_cu.getLevel(&plane.cuArray, plane.cuMipArray, 0) != CUDA_SUCCESS)
713 {
714 CUcontext dummy{};
715 m_cu.ctxPop(&dummy);
716 return false;
717 }
718
719 CUcontext dummy{};
720 m_cu.ctxPop(&dummy);
721
722 // --- Wrap VkImage in QRhiTexture ---
723 // createFrom() replaces the QRhi-owned VkImage with ours (non-owning).
724 // We set layout to GENERAL since CUDA will write externally.
725 samplers[idx].texture->createFrom(
726 QRhiTexture::NativeTexture{quint64(plane.image), VK_IMAGE_LAYOUT_GENERAL});
727
728 m_interopReady = (idx == 1); // Ready after both planes are set up
729 return true;
730 }
731};
732
733} // namespace score::gfx
734
735#endif // SCORE_HAS_CUDA_HWCONTEXT && QT_HAS_VULKAN && QT_VERSION >= 6.6
Graphics rendering pipeline for ossia score.
Definition Filter/PreviewWidget.hpp:12
std::pair< QShader, QShader > makeShaders(const RenderState &v, QString vert, QString frag)
Get a pair of compiled vertex / fragment shaders from GLSL 4.5 sources.
Definition score-plugin-gfx/Gfx/Graph/Utils.cpp:395
Definition VideoInterface.hpp:26