Loading...
Searching...
No Matches
HWVulkanShared.hpp
1#pragma once
2
3#include <score/gfx/Vulkan.hpp>
4
5#if QT_HAS_VULKAN
6
7extern "C" {
8#if __has_include(<libavutil/hwcontext_vulkan.h>)
9#include <libavutil/hwcontext_vulkan.h>
10#define SCORE_HAS_VULKAN_HWCONTEXT_SHARED 1
11#endif
12}
13
14#if defined(SCORE_HAS_VULKAN_HWCONTEXT_SHARED) && QT_VERSION >= QT_VERSION_CHECK(6, 6, 0)
15
16#include <Gfx/Graph/decoders/ColorSpace.hpp>
17#include <Gfx/Graph/decoders/GPUVideoDecoder.hpp>
18#include <Gfx/Graph/decoders/NV12.hpp>
19#include <Gfx/Graph/decoders/P010.hpp>
20#include <Gfx/Graph/decoders/YUV420.hpp>
21#include <Gfx/Graph/decoders/YUV420P10.hpp>
22#include <Gfx/Graph/decoders/YUV422.hpp>
23#include <Gfx/Graph/decoders/YUV422P10.hpp>
24#include <Gfx/Graph/decoders/YUV444.hpp>
25#include <Gfx/Graph/decoders/YUV444P10.hpp>
26#include <Gfx/Graph/decoders/YUVA444.hpp>
27#include <Video/GpuFormats.hpp>
28
29// Qt private header for QVkTexture internals
30#include <QtGui/private/qrhivulkan_p.h>
31#include <qvulkanfunctions.h>
32#include <vulkan/vulkan.h>
33
34extern "C" {
35#include <libavformat/avformat.h>
36#include <libavutil/hwcontext.h>
37#include <libavutil/pixdesc.h>
38}
39
40namespace score::gfx
41{
42
53struct HWVulkanSharedDecoder : GPUVideoDecoder
54{
55 Video::ImageFormat& decoder;
56 PixelFormatInfo m_fmt;
57 int m_numPlanes{0};
58
59 // Vulkan handles
60 VkDevice m_dev{VK_NULL_HANDLE};
61 VkPhysicalDevice m_physDev{VK_NULL_HANDLE};
62 QVulkanFunctions* m_funcs{};
63 QVulkanDeviceFunctions* m_dfuncs{};
64 PFN_vkWaitSemaphores m_vkWaitSemaphores{};
65 uint32_t m_gfxQueueFamilyIdx{0};
66 VkQueue m_gfxQueue{VK_NULL_HANDLE};
67
68 // Command infrastructure for custom multiplane barriers
69 VkCommandPool m_cmdPool{VK_NULL_HANDLE};
70 VkCommandBuffer m_cmdBuf{VK_NULL_HANDLE};
71 VkFence m_fence{VK_NULL_HANDLE};
72 bool m_cmdReady{false};
73
74 // Ring buffer for frame lifetime + deferred view destruction
75 static constexpr int NumSlots = 3;
76 struct FrameSlot
77 {
78 AVFrame* frameRef{};
79 VkImageView planeViews[4]{};
80 int numViews{0};
81 };
82 FrameSlot m_slots[NumSlots]{};
83 int m_slotIdx{0};
84
85 // ------------------------------------------------------------------
86
87 static bool isAvailable(QRhi& rhi)
88 {
89 if(rhi.backend() != QRhi::Vulkan)
90 return false;
91 auto* nh
92 = static_cast<const QRhiVulkanNativeHandles*>(rhi.nativeHandles());
93 if(!nh || !nh->dev || !nh->physDev || !nh->inst)
94 return false;
95 return nh->inst->getInstanceProcAddr("vkWaitSemaphores") != nullptr;
96 }
97
98 explicit HWVulkanSharedDecoder(
99 Video::ImageFormat& d, QRhi& rhi, PixelFormatInfo fmt)
100 : decoder{d}
101 , m_fmt{fmt}
102 {
103 auto* nh
104 = static_cast<const QRhiVulkanNativeHandles*>(rhi.nativeHandles());
105 m_dev = nh->dev;
106 m_physDev = nh->physDev;
107 m_funcs = nh->inst->functions();
108 m_dfuncs = nh->inst->deviceFunctions(m_dev);
109 m_gfxQueueFamilyIdx = nh->gfxQueueFamilyIdx;
110 m_vkWaitSemaphores = reinterpret_cast<PFN_vkWaitSemaphores>(
111 nh->inst->getInstanceProcAddr("vkWaitSemaphores"));
112 m_dfuncs->vkGetDeviceQueue(
113 m_dev, m_gfxQueueFamilyIdx, 0, &m_gfxQueue);
114 }
115
116 ~HWVulkanSharedDecoder() override
117 {
118 // Wait for in-flight rendering to complete before destroying
119 // VkImageViews and freeing AVFrames (which release the VkImages).
120 // Without this, the last render pass command buffer may still
121 // reference these resources.
122 // Only wait on the graphics queue — lighter than vkDeviceWaitIdle.
123 if(m_gfxQueue != VK_NULL_HANDLE)
124 m_dfuncs->vkQueueWaitIdle(m_gfxQueue);
125
126 for(auto& slot : m_slots)
127 cleanupSlot(slot);
128 if(m_fence != VK_NULL_HANDLE)
129 m_dfuncs->vkDestroyFence(m_dev, m_fence, nullptr);
130 if(m_cmdPool != VK_NULL_HANDLE)
131 m_dfuncs->vkDestroyCommandPool(m_dev, m_cmdPool, nullptr);
132 }
133
134 void cleanupSlot(FrameSlot& slot)
135 {
136 for(int i = 0; i < slot.numViews; i++)
137 {
138 if(slot.planeViews[i] != VK_NULL_HANDLE)
139 {
140 m_dfuncs->vkDestroyImageView(m_dev, slot.planeViews[i], nullptr);
141 slot.planeViews[i] = VK_NULL_HANDLE;
142 }
143 }
144 slot.numViews = 0;
145 if(slot.frameRef)
146 {
147 av_frame_free(&slot.frameRef);
148 slot.frameRef = nullptr;
149 }
150 }
151
152 bool setupCommandInfra()
153 {
154 VkCommandPoolCreateInfo poolInfo{};
155 poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
156 poolInfo.queueFamilyIndex = m_gfxQueueFamilyIdx;
157 poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
158 if(m_dfuncs->vkCreateCommandPool(m_dev, &poolInfo, nullptr, &m_cmdPool)
159 != VK_SUCCESS)
160 return false;
161
162 VkCommandBufferAllocateInfo allocInfo{};
163 allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
164 allocInfo.commandPool = m_cmdPool;
165 allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
166 allocInfo.commandBufferCount = 1;
167 if(m_dfuncs->vkAllocateCommandBuffers(m_dev, &allocInfo, &m_cmdBuf)
168 != VK_SUCCESS)
169 return false;
170
171 VkFenceCreateInfo fenceInfo{};
172 fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
173 if(m_dfuncs->vkCreateFence(m_dev, &fenceInfo, nullptr, &m_fence)
174 != VK_SUCCESS)
175 return false;
176
177 m_cmdReady = true;
178 return true;
179 }
180
181 // ------------------------------------------------------------------
182 // init -- create placeholder textures and shaders
183 // ------------------------------------------------------------------
184
185 std::pair<QShader, QShader> init(RenderList& r) override
186 {
187 auto& rhi = *r.state.rhi;
188 const auto w = decoder.width, h = decoder.height;
189 const bool is10 = m_fmt.is10bit();
190 auto texFmt = is10 ? QRhiTexture::R16 : QRhiTexture::R8;
191 int chromaW = AV_CEIL_RSHIFT(w, m_fmt.log2ChromaW);
192 int chromaH = AV_CEIL_RSHIFT(h, m_fmt.log2ChromaH);
193
194 // Semi-planar (2 planes: Y + UV) vs planar (3 planes: Y + U + V)
195 // Vulkan Video always outputs multiplane images; the plane count
196 // depends on the sw_format negotiated with FFmpeg.
197 // Semi-planar: NV12, P010, P210, P410, etc.
198 // Planar: YUV420P, YUV422P, YUV444P, etc.
199 m_numPlanes = m_fmt.numPlanes;
200
201 if(m_numPlanes == 2)
202 {
203 auto uvFmt = is10 ? QRhiTexture::RG16 : QRhiTexture::RG8;
204 createTex(rhi, texFmt, w, h);
205 createTex(rhi, uvFmt, chromaW, chromaH);
206
207 if(is10)
209 r.state, vertexShader(),
210 QString(P010Decoder::frag).arg("").arg(colorMatrix(decoder)));
211 else
212 {
213 QString frag = NV12Decoder::nv12_filter_prologue;
214 frag += " vec3 yuv = vec3(y, u, v);\n";
215 frag += NV12Decoder::nv12_filter_epilogue;
217 r.state, vertexShader(),
218 frag.arg("").arg(colorMatrix(decoder)));
219 }
220 }
221 else if(m_fmt.hasAlpha)
222 {
223 // 4 planes: Y + U + V + A (YUVA444, YUVA444P10, YUVA444P12, etc.)
224 createTex(rhi, texFmt, w, h);
225 createTex(rhi, texFmt, chromaW, chromaH);
226 createTex(rhi, texFmt, chromaW, chromaH);
227 createTex(rhi, texFmt, w, h); // Alpha at full resolution
228
229 if(!is10)
230 {
232 r.state, vertexShader(),
233 QString(YUVA444Decoder::frag).arg("").arg(colorMatrix(decoder)));
234 }
235 else
236 {
237 // R16_UNORM samples as raw_value/65535. Scale to actual bit range.
238 // 10-bit: 65535/1023 ≈ 64. 12-bit: 65535/4095 = 16. 16-bit: 1.
239 double scale = 65535.0 / ((1 << m_fmt.bitDepth) - 1);
240 QString frag = QString(R"_(#version 450
241
242)_" SCORE_GFX_VIDEO_UNIFORMS R"_(
243
244layout(binding=3) uniform sampler2D y_tex;
245layout(binding=4) uniform sampler2D u_tex;
246layout(binding=5) uniform sampler2D v_tex;
247layout(binding=6) uniform sampler2D a_tex;
248
249layout(location = 0) in vec2 v_texcoord;
250layout(location = 0) out vec4 fragColor;
251
252%2
253
254vec4 processTexture(vec4 tex) {
255 vec4 processed = convert_to_rgb(tex);
256 { %1 }
257 return processed;
258}
259
260void main()
261{
262 float sc = float(%3);
263 float y = sc * texture(y_tex, v_texcoord).r;
264 float u = sc * texture(u_tex, v_texcoord).r;
265 float v = sc * texture(v_tex, v_texcoord).r;
266 float a = sc * texture(a_tex, v_texcoord).r;
267
268 vec4 rgb = processTexture(vec4(y,u,v, 1.));
269 fragColor = vec4(rgb.rgb, a);
270}
271)_").arg("").arg(colorMatrix(decoder)).arg(scale, 0, 'f', 6);
272
273 return score::gfx::makeShaders(r.state, vertexShader(), frag);
274 }
275 }
276 else
277 {
278 // 3 planes: Y + U + V (YUV420P, YUV422P, YUV444P, etc.)
279 createTex(rhi, texFmt, w, h);
280 createTex(rhi, texFmt, chromaW, chromaH);
281 createTex(rhi, texFmt, chromaW, chromaH);
282
283 if(!is10)
284 {
285 const char* fragSrc = YUV420Decoder::frag;
286 if(m_fmt.log2ChromaW == 1 && m_fmt.log2ChromaH == 0)
287 fragSrc = YUV422Decoder::frag;
288 else if(m_fmt.log2ChromaW == 0 && m_fmt.log2ChromaH == 0)
289 fragSrc = YUV444Decoder::frag;
291 r.state, vertexShader(),
292 QString(fragSrc).arg("").arg(colorMatrix(decoder)));
293 }
294 else
295 {
296 // R16_UNORM: scale from actual bit depth to normalized range
297 double scale = 65535.0 / ((1 << m_fmt.bitDepth) - 1);
298 QString frag = QString(R"_(#version 450
299
300)_" SCORE_GFX_VIDEO_UNIFORMS R"_(
301
302layout(binding=3) uniform sampler2D y_tex;
303layout(binding=4) uniform sampler2D u_tex;
304layout(binding=5) uniform sampler2D v_tex;
305
306layout(location = 0) in vec2 v_texcoord;
307layout(location = 0) out vec4 fragColor;
308
309%2
310
311vec4 processTexture(vec4 tex) {
312 vec4 processed = convert_to_rgb(tex);
313 { %1 }
314 return processed;
315}
316
317void main()
318{
319 float sc = float(%3);
320 float y = sc * texture(y_tex, v_texcoord).r;
321 float u = sc * texture(u_tex, v_texcoord).r;
322 float v = sc * texture(v_tex, v_texcoord).r;
323
324 fragColor = processTexture(vec4(y,u,v, 1.));
325}
326)_").arg("").arg(colorMatrix(decoder)).arg(scale, 0, 'f', 6);
327
328 return score::gfx::makeShaders(r.state, vertexShader(), frag);
329 }
330 }
331 }
332
333 // ------------------------------------------------------------------
334 // exec -- per-plane VkImageViews on multiplane image (true zero-copy)
335 // ------------------------------------------------------------------
336
337 void exec(RenderList& r, QRhiResourceUpdateBatch& res, AVFrame& frame) override
338 {
339#if LIBAVUTIL_VERSION_MAJOR >= 57
340 if(!Video::formatIsHardwareDecoded(
341 static_cast<AVPixelFormat>(frame.format)))
342 return;
343
344 auto* vkf = reinterpret_cast<AVVkFrame*>(frame.data[0]);
345 if(!vkf || vkf->img[0] == VK_NULL_HANDLE)
346 return;
347
348 if(!m_cmdReady && !setupCommandInfra())
349 return;
350
351 // Wait on FFmpeg's timeline semaphores (host-side)
352 if(m_vkWaitSemaphores && vkf->sem[0] != VK_NULL_HANDLE)
353 {
354 int numSems = 0;
355 for(int i = 0; i < 4; i++)
356 if(vkf->sem[i] != VK_NULL_HANDLE)
357 numSems = i + 1;
358 if(numSems > 0)
359 {
360 VkSemaphoreWaitInfo waitInfo{};
361 waitInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO;
362 waitInfo.semaphoreCount = static_cast<uint32_t>(numSems);
363 waitInfo.pSemaphores = vkf->sem;
364 waitInfo.pValues = vkf->sem_value;
365 m_vkWaitSemaphores(m_dev, &waitInfo, UINT64_MAX);
366 }
367 }
368
369 // Ring buffer management
370 auto& slot = m_slots[m_slotIdx];
371 cleanupSlot(slot);
372 m_slotIdx = (m_slotIdx + 1) % NumSlots;
373
374 slot.frameRef = av_frame_alloc();
375 if(av_frame_ref(slot.frameRef, &frame) < 0)
376 {
377 av_frame_free(&slot.frameRef);
378 slot.frameRef = nullptr;
379 return;
380 }
381
382 // Count separate VkImages
383 int numSrcImages = 0;
384 for(int i = 0; i < m_numPlanes; i++)
385 if(vkf->img[i] != VK_NULL_HANDLE)
386 numSrcImages++;
387
388 const bool isMultiplane = (numSrcImages == 1 && m_numPlanes > 1);
389
390 if(isMultiplane)
391 {
392 // --- Submit a custom barrier for the multiplane image ---
393 // QRhi would use VK_IMAGE_ASPECT_COLOR_BIT which is invalid for
394 // multiplane. We submit our own barrier with the correct plane aspects,
395 // transitioning to SHADER_READ_ONLY_OPTIMAL. Then we tell QRhi the
396 // image is already in that layout so it doesn't insert its own barrier.
397
398 m_dfuncs->vkResetCommandBuffer(m_cmdBuf, 0);
399
400 VkCommandBufferBeginInfo beginInfo{};
401 beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
402 beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
403 m_dfuncs->vkBeginCommandBuffer(m_cmdBuf, &beginInfo);
404
405 // One barrier per plane with the correct aspect mask
406 VkImageMemoryBarrier barriers[4]{};
407 static const VkImageAspectFlagBits planeAspects[] = {
408 VK_IMAGE_ASPECT_PLANE_0_BIT,
409 VK_IMAGE_ASPECT_PLANE_1_BIT,
410 VK_IMAGE_ASPECT_PLANE_2_BIT,
411 };
412
413 for(int i = 0; i < m_numPlanes && i < 3; i++)
414 {
415 barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
416 barriers[i].srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
417 barriers[i].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
418 barriers[i].oldLayout = vkf->layout[0];
419 barriers[i].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
420 barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
421 barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
422 barriers[i].image = vkf->img[0];
423 barriers[i].subresourceRange.aspectMask = planeAspects[i];
424 barriers[i].subresourceRange.baseMipLevel = 0;
425 barriers[i].subresourceRange.levelCount = 1;
426 barriers[i].subresourceRange.baseArrayLayer = 0;
427 barriers[i].subresourceRange.layerCount = 1;
428 }
429
430 m_dfuncs->vkCmdPipelineBarrier(
431 m_cmdBuf,
432 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
433 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
434 0, 0, nullptr, 0, nullptr,
435 static_cast<uint32_t>(m_numPlanes), barriers);
436
437 m_dfuncs->vkEndCommandBuffer(m_cmdBuf);
438
439 m_dfuncs->vkResetFences(m_dev, 1, &m_fence);
440 VkSubmitInfo submitInfo{};
441 submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
442 submitInfo.commandBufferCount = 1;
443 submitInfo.pCommandBuffers = &m_cmdBuf;
444 m_dfuncs->vkQueueSubmit(m_gfxQueue, 1, &submitInfo, m_fence);
445 m_dfuncs->vkWaitForFences(m_dev, 1, &m_fence, VK_TRUE, UINT64_MAX);
446
447 // --- Create per-plane VkImageViews and patch QVkTexture ---
448 for(int i = 0; i < m_numPlanes && i < (int)samplers.size(); i++)
449 {
450 auto* vkTex = static_cast<QVkTexture*>(samplers[i].texture);
451
452 VkImageViewCreateInfo viewInfo{};
453 viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
454 viewInfo.image = vkf->img[0];
455 viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
456 viewInfo.format = vkTex->vkformat;
457 viewInfo.components = {
458 VK_COMPONENT_SWIZZLE_IDENTITY,
459 VK_COMPONENT_SWIZZLE_IDENTITY,
460 VK_COMPONENT_SWIZZLE_IDENTITY,
461 VK_COMPONENT_SWIZZLE_IDENTITY};
462 viewInfo.subresourceRange.aspectMask = planeAspects[i];
463 viewInfo.subresourceRange.baseMipLevel = 0;
464 viewInfo.subresourceRange.levelCount = 1;
465 viewInfo.subresourceRange.baseArrayLayer = 0;
466 viewInfo.subresourceRange.layerCount = 1;
467
468 VkImageView planeView = VK_NULL_HANDLE;
469 if(m_dfuncs->vkCreateImageView(
470 m_dev, &viewInfo, nullptr, &planeView)
471 != VK_SUCCESS)
472 return;
473
474 slot.planeViews[i] = planeView;
475 slot.numViews = i + 1;
476
477 // Destroy QRhi's current view
478 if(vkTex->imageView != VK_NULL_HANDLE)
479 m_dfuncs->vkDestroyImageView(m_dev, vkTex->imageView, nullptr);
480
481 vkTex->image = vkf->img[0];
482 vkTex->imageView = planeView;
483 vkTex->owns = false;
484 // Already transitioned to SHADER_READ_ONLY by our barrier
485 vkTex->usageState.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
486 vkTex->usageState.access = VK_ACCESS_SHADER_READ_BIT;
487 vkTex->usageState.stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
488 vkTex->generation++;
489 }
490 }
491 else
492 {
493 // Separate per-plane VkImages: direct createFrom() wrapping
494 for(int i = 0; i < m_numPlanes && i < (int)samplers.size(); i++)
495 {
496 if(vkf->img[i] != VK_NULL_HANDLE)
497 {
498 samplers[i].texture->createFrom(QRhiTexture::NativeTexture{
499 quint64(vkf->img[i]), int(vkf->layout[i])});
500 }
501 }
502 }
503#endif
504 }
505
506private:
507 void createTex(QRhi& rhi, QRhiTexture::Format fmt, int w, int h)
508 {
509 auto tex = rhi.newTexture(fmt, {w, h}, 1, QRhiTexture::Flag{});
510 tex->create();
511 auto sampler = rhi.newSampler(
512 QRhiSampler::Linear, QRhiSampler::Linear, QRhiSampler::None,
513 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
514 sampler->create();
515 samplers.push_back({sampler, tex});
516 }
517};
518
519} // namespace score::gfx
520
521#endif // SCORE_HAS_VULKAN_HWCONTEXT_SHARED && QT_VERSION >= 6.6
522#endif // QT_HAS_VULKAN
Graphics rendering pipeline for ossia score.
Definition Filter/PreviewWidget.hpp:12
std::pair< QShader, QShader > makeShaders(const RenderState &v, QString vert, QString frag)
Get a pair of compiled vertex / fragment shaders from GLSL 4.5 sources.
Definition score-plugin-gfx/Gfx/Graph/Utils.cpp:395
Definition VideoInterface.hpp:26