Loading...
Searching...
No Matches
LibavOutputStream.hpp
1#pragma once
2
3extern "C" {
4
5#include <libavcodec/avcodec.h>
6#include <libavdevice/avdevice.h>
7#include <libavformat/avformat.h>
8#include <libavutil/pixdesc.h>
9#include <libswresample/swresample.h>
10#include <libswscale/swscale.h>
11}
12
13#include <Audio/Settings/Model.hpp>
14#include <Gfx/Libav/AudioFrameEncoder.hpp>
15#include <Gfx/Libav/LibavOutputSettings.hpp>
16
17#include <score/application/ApplicationContext.hpp>
18#include <score/tools/Debug.hpp>
19
20#include <ossia/detail/flat_map.hpp>
21
22#include <QApplication>
23
24#include <CDSPResampler.h>
25
26#include <string>
27
28namespace Gfx
29{
30
32{
33 std::string name;
34 std::string codec;
35 ossia::flat_map<std::string, std::string> options;
36};
37
39{
40 const AVCodec* codec{};
41 AVStream* st{};
42 AVCodecContext* enc{};
43
44 /* pts of the next frame that will be generated */
45 int64_t next_pts{};
46 int samples_count{};
47
48 AVFrame* cache_input_frame{};
49 AVFrame* tmp_frame{};
50
51 AVPacket* tmp_pkt{};
52
53 struct SwsContext* sws_ctx{};
54 std::vector<std::unique_ptr<r8b::CDSPResampler>> resamplers;
55
56 std::unique_ptr<AudioFrameEncoder> encoder;
57
58 // Pre-allocated buffers for audio resampling (avoid per-frame heap allocs)
59 std::vector<std::vector<double>> resample_in_buf;
60 std::vector<ossia::float_vector> resample_out_buf;
61
62 bool m_valid{};
63
65 const LibavOutputSettings& set, AVFormatContext* oc, const StreamOptions& opts)
66 {
67 codec = avcodec_find_encoder_by_name(opts.codec.c_str());
68 if(!codec)
69 {
70 qDebug() << "Could not find encoder for " << opts.codec.c_str();
71 return;
72 }
73
74 this->tmp_pkt = av_packet_alloc();
75 if(!this->tmp_pkt)
76 {
77 qDebug() << "Could not allocate AVPacket";
78 return;
79 }
80
81 this->st = avformat_new_stream(oc, nullptr);
82 if(!this->st)
83 {
84 qDebug() << "Could not allocate stream";
85 return;
86 }
87 this->st->id = oc->nb_streams - 1;
88
89 this->enc = avcodec_alloc_context3(codec);
90 if(!this->enc)
91 {
92 qDebug() << "Could not alloc an encoding context";
93 return;
94 }
95
96 switch(codec->type)
97 {
98 case AVMEDIA_TYPE_AUDIO:
99 init_audio(set, this->enc);
100 break;
101 case AVMEDIA_TYPE_VIDEO:
102 init_video(set, this->enc);
103 break;
104
105 default:
106 break;
107 }
108
109 /* Some formats want stream headers to be separate. */
110 if(oc->oformat->flags & AVFMT_GLOBALHEADER)
111 {
112 this->enc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
113 }
114 }
115
116 void init_audio(const LibavOutputSettings& set, AVCodecContext* c)
117 {
118#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
119 c->sample_fmt = av_get_sample_fmt(set.audio_converted_smpfmt.toStdString().c_str());
120
121 {
122 const int* supported_samplerates{};
123#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 19, 100)
124 avcodec_get_supported_config(
125 c, codec, AV_CODEC_CONFIG_SAMPLE_RATE, 0, (const void**)&supported_samplerates,
126 nullptr);
127#else
128 supported_samplerates = codec->supported_samplerates;
129#endif
130 if(supported_samplerates)
131 {
132 c->sample_rate = supported_samplerates[0];
133 for(int i = 0; supported_samplerates[i]; i++)
134 {
135 if(supported_samplerates[i] == set.audio_sample_rate)
136 {
137 c->sample_rate = set.audio_sample_rate;
138 break;
139 }
140 }
141 }
142 else
143 {
144 c->sample_rate = set.audio_sample_rate;
145 }
146 }
147
148 c->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
149 c->ch_layout.nb_channels = set.audio_channels;
150 c->thread_count = set.threads > 0 ? set.threads : 0;
151 if(set.audio_encoder_short == "pcm_s24le" || set.audio_encoder_short == "pcm_s24be")
152 c->bits_per_raw_sample = 24;
153
154 this->st->time_base = AVRational{1, c->sample_rate};
155 c->time_base = AVRational{1, c->sample_rate};
156 c->framerate = AVRational{c->sample_rate, 1};
157 qDebug() << "Opening audio encoder with: rate: " << c->sample_rate;
158#endif
159 }
160
161 void init_video(const LibavOutputSettings& set, AVCodecContext* c)
162 {
163 c->codec_id = codec->id;
164 c->width = set.width;
165 c->height = set.height;
166
167 // Enable multi-threaded encoding (0 = auto-detect CPU count)
168 c->thread_count = set.threads > 0 ? set.threads : 0;
169 c->thread_type = FF_THREAD_FRAME | FF_THREAD_SLICE;
170 /* timebase: This is the fundamental unit of time (in seconds) in terms
171 * of which frame timestamps are represented. For fixed-fps content,
172 * timebase should be 1/framerate and timestamp increments should be
173 * identical to 1. */
174 this->st->time_base = AVRational{100000, int(100000 * set.rate)};
175 c->time_base = this->st->time_base;
176 c->framerate = AVRational{this->st->time_base.den, this->st->time_base.num};
177
178 // gop_size and max_b_frames: use FFmpeg/codec defaults.
179 // Users can override via the options dict (g=<N>, bf=<N>).
180 // FFmpeg default: g=12, bf=0. Presets set explicit values where needed.
181
182 c->pix_fmt = av_get_pix_fmt(set.video_converted_pixfmt.toStdString().c_str());
183 if(c->pix_fmt == AV_PIX_FMT_NONE)
184 {
185 // Default to first supported format of this codec
186 const AVPixelFormat* fmts = nullptr;
187#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(61, 19, 100)
188 avcodec_get_supported_config(
189 c, codec, AV_CODEC_CONFIG_PIX_FORMAT, 0,
190 (const void**)&fmts, nullptr);
191#else
192 fmts = codec->pix_fmts;
193#endif
194 if(fmts && fmts[0] != AV_PIX_FMT_NONE)
195 c->pix_fmt = fmts[0];
196 else
197 c->pix_fmt = AV_PIX_FMT_YUV420P;
198 }
199 c->strict_std_compliance = FF_COMPLIANCE_NORMAL;
200 }
201
202 void open_audio(
203 const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
204 AVDictionary* opt_arg)
205 {
206#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
207 AVDictionary* opt = nullptr;
208
209 av_dict_copy(&opt, opt_arg, 0);
210 int ret = avcodec_open2(enc, codec, &opt);
211 av_dict_free(&opt);
212 if(ret < 0)
213 {
214 qDebug() << "Could not open audio codec: " << av_to_string(ret);
215 return;
216 }
217
218 int nb_samples = 0;
219 if(enc->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
220 {
221 auto& audio_stgs = score::AppContext().settings<Audio::Settings::Model>();
222 nb_samples = audio_stgs.getBufferSize();
223 enc->frame_size = nb_samples;
224 qDebug() << "Setting frame_size: " << nb_samples;
225 }
226 else
227 {
228 nb_samples = enc->frame_size;
229 qDebug() << "Forcing frame_size: " << nb_samples;
230 }
231
232 cache_input_frame = alloc_audio_frame(
233 enc->sample_fmt, &enc->ch_layout, enc->sample_rate, nb_samples);
234
235 /* copy the stream parameters to the muxer */
236 ret = avcodec_parameters_from_context(this->st->codecpar, enc);
237 if(ret < 0)
238 {
239 qDebug() << "Could not copy the stream parameters";
240 return;
241 }
242
243 {
244 auto conv_fmt
245 = av_get_sample_fmt(set.audio_converted_smpfmt.toStdString().c_str());
246 if(conv_fmt == AV_SAMPLE_FMT_NONE)
247 {
248 qDebug() << "Invalid audio sample format:" << set.audio_converted_smpfmt;
249 return;
250 }
251
252 auto& ctx = score::AppContext().settings<Audio::Settings::Model>();
253
254 const int input_sample_rate = ctx.getRate();
255 if(enc->sample_rate != input_sample_rate)
256 {
257 for(int i = 0; i < set.audio_channels; i++)
258 this->resamplers.push_back(std::make_unique<r8b::CDSPResampler>(
259 input_sample_rate, enc->sample_rate, nb_samples * 2, 3.0, 206.91,
260 r8b::fprMinPhase));
261 }
262
263 switch(conv_fmt)
264 {
265 case AV_SAMPLE_FMT_NONE:
266 case AV_SAMPLE_FMT_U8:
267 case AV_SAMPLE_FMT_S16:
268 encoder = std::make_unique<S16IAudioFrameEncoder>(nb_samples);
269 break;
270 case AV_SAMPLE_FMT_S32:
271 if(enc->bits_per_raw_sample == 24)
272 encoder = std::make_unique<S24IAudioFrameEncoder>(nb_samples);
273 else
274 encoder = std::make_unique<S32IAudioFrameEncoder>(nb_samples);
275 break;
276 case AV_SAMPLE_FMT_FLT:
277 encoder = std::make_unique<FltIAudioFrameEncoder>(nb_samples);
278 break;
279 case AV_SAMPLE_FMT_DBL:
280 encoder = std::make_unique<DblIAudioFrameEncoder>(nb_samples);
281 break;
282
283 case AV_SAMPLE_FMT_U8P:
284 case AV_SAMPLE_FMT_S16P:
285 encoder = std::make_unique<S16PAudioFrameEncoder>(nb_samples);
286 break;
287 case AV_SAMPLE_FMT_S32P:
288 encoder = std::make_unique<S32PAudioFrameEncoder>(nb_samples);
289 break;
290 case AV_SAMPLE_FMT_FLTP:
291 encoder = std::make_unique<FltPAudioFrameEncoder>(nb_samples);
292 break;
293 case AV_SAMPLE_FMT_DBLP:
294 encoder = std::make_unique<DblPAudioFrameEncoder>(nb_samples);
295 break;
296 case AV_SAMPLE_FMT_S64:
297 case AV_SAMPLE_FMT_S64P:
298 qDebug() << "64-bit integer audio sample format not supported for encoding";
299 break;
300 default:
301 break;
302 }
303 }
304
305 m_valid = true;
306#endif
307 }
308
309#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
310 static AVFrame* alloc_audio_frame(
311 enum AVSampleFormat sample_fmt, const AVChannelLayout* channel_layout,
312 int sample_rate, int nb_samples)
313 {
314 AVFrame* frame = av_frame_alloc();
315 if(!frame)
316 {
317 qDebug() << "Error allocating an audio frame";
318 return nullptr;
319 }
320
321 frame->format = sample_fmt;
322 av_channel_layout_copy(&frame->ch_layout, channel_layout);
323 frame->sample_rate = sample_rate;
324 frame->nb_samples = nb_samples;
325
326 if(nb_samples)
327 {
328 if(av_frame_get_buffer(frame, 0) < 0)
329 {
330 qDebug() << "Error allocating an audio buffer";
331 av_frame_free(&frame);
332 return nullptr;
333 }
334 }
335
336 return frame;
337 }
338#endif
339
340 static AVFrame* alloc_video_frame(enum AVPixelFormat pix_fmt, int width, int height)
341 {
342 auto frame = av_frame_alloc();
343 if(!frame)
344 return NULL;
345
346 frame->format = pix_fmt;
347 frame->width = width;
348 frame->height = height;
349
350 /* allocate the buffers for the frame data */
351 const int ret = av_frame_get_buffer(frame, 0);
352 if(ret < 0)
353 {
354 qDebug() << "Could not allocate frame data.";
355 av_frame_free(&frame);
356 return nullptr;
357 }
358
359 return frame;
360 }
361
362 void open_video(
363 const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
364 AVDictionary* opt_arg)
365 {
366 AVCodecContext* c = this->enc;
367 AVDictionary* opt = nullptr;
368
369 av_dict_copy(&opt, opt_arg, 0);
370
371 /* open the codec */
372 int ret = avcodec_open2(this->enc, codec, &opt);
373 av_dict_free(&opt);
374 if(ret < 0)
375 {
376 qDebug() << "Could not open video codec: " << av_to_string(ret);
377 return;
378 }
379
380 /* allocate and init a re-usable frame */
381 this->cache_input_frame = alloc_video_frame(AV_PIX_FMT_RGBA, c->width, c->height);
382 if(!this->cache_input_frame)
383 {
384 qDebug() << "Could not allocate video frame";
385 return;
386 }
387
388 this->tmp_frame = nullptr;
389 {
390 auto input_fmt = av_get_pix_fmt(set.video_render_pixfmt.toStdString().c_str());
391 auto conv_fmt = av_get_pix_fmt(set.video_converted_pixfmt.toStdString().c_str());
392 if(input_fmt == AV_PIX_FMT_NONE || conv_fmt == AV_PIX_FMT_NONE)
393 {
394 qDebug() << "Invalid pixel format:" << set.video_render_pixfmt
395 << "->" << set.video_converted_pixfmt;
396 return;
397 }
398 sws_ctx = sws_getContext(
399 set.width, set.height, input_fmt, set.width, set.height, conv_fmt,
400 SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
401 if(!sws_ctx)
402 {
403 qDebug() << "Could not create swscale context";
404 return;
405 }
406 this->tmp_frame = alloc_video_frame(conv_fmt, c->width, c->height);
407 if(!this->tmp_frame)
408 {
409 qDebug() << "Could not allocate temporary video frame";
410 return;
411 }
412 }
413
414 /* copy the stream parameters to the muxer */
415 ret = avcodec_parameters_from_context(this->st->codecpar, c);
416 if(ret < 0)
417 {
418 qDebug() << "Could not copy the stream parameters";
419 return;
420 }
421
422 m_valid = true;
423 }
424
425 void open(
426 const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
427 AVDictionary* opt_arg)
428 {
429 SCORE_ASSERT(oc);
430 SCORE_ASSERT(codec);
431 if(codec->type == AVMEDIA_TYPE_AUDIO)
432 {
433 open_audio(set, oc, codec, opt_arg);
434 }
435 else if(codec->type == AVMEDIA_TYPE_VIDEO)
436 {
437 open_video(set, oc, codec, opt_arg);
438 }
439 }
440
441 void close(AVFormatContext* oc)
442 {
443 avcodec_free_context(&enc);
444 av_frame_free(&cache_input_frame);
445 av_frame_free(&tmp_frame);
446 av_packet_free(&tmp_pkt);
447 sws_freeContext(sws_ctx);
448 sws_ctx = nullptr;
449 }
450
451 AVFrame* get_video_frame()
452 {
453 if(!m_valid || !this->cache_input_frame)
454 return nullptr;
455 if(av_frame_make_writable(this->cache_input_frame) < 0)
456 return nullptr;
457
458 this->cache_input_frame->pts = this->next_pts++;
459 return this->cache_input_frame;
460 }
461
462 AVFrame* get_audio_frame()
463 {
464 if(!m_valid || !this->cache_input_frame)
465 return nullptr;
466 if(av_frame_make_writable(this->cache_input_frame) < 0)
467 return nullptr;
468
469 this->cache_input_frame->pts = this->next_pts;
470 this->next_pts += this->enc->frame_size;
471 return this->cache_input_frame;
472 }
473
474 int write_video_frame(AVFormatContext* fmt_ctx, AVFrame* input_frame)
475 {
476 if(!m_valid)
477 return -1;
478#if LIBSWSCALE_VERSION_INT >= AV_VERSION_INT(7, 5, 100)
479 // Must unref before reuse — sws_scale_frame allocates internal buffers
480 av_frame_unref(tmp_frame);
481 tmp_frame->format = enc->pix_fmt;
482 tmp_frame->width = enc->width;
483 tmp_frame->height = enc->height;
484
485 // scale the frame
486 int ret = sws_scale_frame(sws_ctx, tmp_frame, input_frame);
487 if(ret < 0)
488 {
489 qDebug() << "Error during sws_scale_frame: " << av_to_string(ret);
490 return ret;
491 }
492
493 tmp_frame->pts = input_frame->pts;
494
495 // send the frame to the encoder
496 ret = avcodec_send_frame(enc, tmp_frame);
497 if(ret < 0)
498 {
499 qDebug() << "Error sending a frame to the encoder: " << av_to_string(ret);
500 return ret;
501 }
502
503 while(ret >= 0)
504 {
505 ret = avcodec_receive_packet(enc, tmp_pkt);
506 if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
507 break;
508 else if(ret < 0)
509 {
510 qDebug() << "Error encoding a frame: " << av_to_string(ret);
511 return ret;
512 }
513
514 /* rescale output packet timestamp values from codec to stream timebase */
515 av_packet_rescale_ts(tmp_pkt, enc->time_base, st->time_base);
516 tmp_pkt->stream_index = st->index;
517
518 ret = av_interleaved_write_frame(fmt_ctx, tmp_pkt);
519 if(ret < 0)
520 {
521 qDebug() << "Error while writing output packet: " << av_to_string(ret);
522 return ret;
523 }
524 }
525
526 return ret == AVERROR_EOF ? 1 : 0;
527#endif
528 return 1;
529 }
530
531 // Write a pre-converted frame directly to the encoder — no sws_scale.
532 // The frame must already be in enc->pix_fmt with correct dimensions.
533 int write_video_frame_direct(AVFormatContext* fmt_ctx, AVFrame* frame)
534 {
535 if(!m_valid)
536 return -1;
537
538 int ret = avcodec_send_frame(enc, frame);
539 if(ret < 0)
540 {
541 qDebug() << "Error sending a frame to the encoder: " << av_to_string(ret);
542 return ret;
543 }
544
545 while(ret >= 0)
546 {
547 ret = avcodec_receive_packet(enc, tmp_pkt);
548 if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
549 break;
550 else if(ret < 0)
551 {
552 qDebug() << "Error encoding a frame: " << av_to_string(ret);
553 return ret;
554 }
555
556 av_packet_rescale_ts(tmp_pkt, enc->time_base, st->time_base);
557 tmp_pkt->stream_index = st->index;
558
559 ret = av_interleaved_write_frame(fmt_ctx, tmp_pkt);
560 if(ret < 0)
561 {
562 qDebug() << "Error while writing output packet: " << av_to_string(ret);
563 return ret;
564 }
565 }
566
567 return ret == AVERROR_EOF ? 1 : 0;
568 }
569
570 int write_audio_frame(AVFormatContext* fmt_ctx, AVFrame* input_frame)
571 {
572 if(!m_valid)
573 return -1;
574 // send the frame to the encoder
575 int ret = avcodec_send_frame(enc, input_frame);
576 if(ret < 0)
577 {
578 qDebug() << "Error sending a frame to the encoder: " << av_to_string(ret);
579 return ret;
580 }
581
582 while(ret >= 0)
583 {
584 ret = avcodec_receive_packet(enc, tmp_pkt);
585 if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
586 break;
587 else if(ret < 0)
588 {
589 qDebug() << "Error encoding a frame: " << av_to_string(ret);
590 return ret;
591 }
592
593 /* rescale output packet timestamp values from codec to stream timebase */
594 av_packet_rescale_ts(tmp_pkt, enc->time_base, st->time_base);
595 tmp_pkt->stream_index = st->index;
596
597 ret = av_interleaved_write_frame(fmt_ctx, tmp_pkt);
598 if(ret < 0)
599 {
600 qDebug() << "Error while writing output packet: " << av_to_string(ret);
601 return ret;
602 }
603 }
604
605 return ret == AVERROR_EOF ? 1 : 0;
606 }
607};
608}
Definition score-plugin-audio/Audio/Settings/Model.hpp:22
Binds the rendering pipeline to ossia processes.
Definition CameraDevice.cpp:30
Definition LibavOutputSettings.hpp:16
Definition LibavOutputStream.hpp:39
Definition LibavOutputStream.hpp:32
T & settings() const
Access a specific Settings model instance.
Definition ApplicationContext.hpp:41