LibavOutputStream.hpp
1 #pragma once
2 
3 extern "C" {
4 
5 #include <libavcodec/avcodec.h>
6 #include <libavdevice/avdevice.h>
7 #include <libavformat/avformat.h>
8 #include <libavutil/pixdesc.h>
9 #include <libswresample/swresample.h>
10 #include <libswscale/swscale.h>
11 }
12 
13 #include <Audio/Settings/Model.hpp>
14 #include <Gfx/Libav/AudioFrameEncoder.hpp>
15 #include <Gfx/Libav/LibavOutputSettings.hpp>
16 
17 #include <score/application/ApplicationContext.hpp>
18 #include <score/tools/Debug.hpp>
19 
20 #include <ossia/detail/flat_map.hpp>
21 
22 #include <QApplication>
23 
24 #include <CDSPResampler.h>
25 
26 #include <string>
27 
28 namespace Gfx
29 {
30 
32 {
33  std::string name;
34  std::string codec;
35  ossia::flat_map<std::string, std::string> options;
36 };
37 
39 {
40  const AVCodec* codec{};
41  AVStream* st{};
42  AVCodecContext* enc{};
43 
44  /* pts of the next frame that will be generated */
45  int64_t next_pts{};
46  int samples_count{};
47 
48  AVFrame* cache_input_frame{};
49  AVFrame* tmp_frame{};
50 
51  AVPacket* tmp_pkt{};
52 
53  struct SwsContext* sws_ctx{};
54  std::vector<std::unique_ptr<r8b::CDSPResampler>> resamplers;
55 
56  std::unique_ptr<AudioFrameEncoder> encoder;
57 
59  const LibavOutputSettings& set, AVFormatContext* oc, const StreamOptions& opts)
60  {
61  codec = avcodec_find_encoder_by_name(opts.codec.c_str());
62  if(!codec)
63  {
64  qDebug() << "Could not find encoder for " << opts.codec.c_str();
65  exit(1);
66  }
67 
68  this->tmp_pkt = av_packet_alloc();
69  if(!this->tmp_pkt)
70  {
71  qDebug() << "Could not allocate AVPacket";
72  exit(1);
73  }
74 
75  this->st = avformat_new_stream(oc, nullptr);
76  if(!this->st)
77  {
78  qDebug() << "Could not allocate stream";
79  exit(1);
80  }
81  this->st->id = oc->nb_streams - 1;
82 
83  // Init hw accel
84  AVBufferRef* hw_ctx{};
85 #if 0
86  {
87  // HW Accel
88  AVHWDeviceType device = AV_HWDEVICE_TYPE_QSV;
89  int ret = av_hwdevice_ctx_create(&hw_ctx, device, "auto", nullptr, 0);
90  if(ret != 0)
91  {
92  qDebug() << "Error while opening hardware encoder: " << av_to_string(ret);
93  exit(1);
94  }
95  }
96 #endif
97  this->enc = avcodec_alloc_context3(codec);
98  if(!this->enc)
99  {
100  qDebug() << "Could not alloc an encoding context";
101  exit(1);
102  }
103 
104  switch(codec->type)
105  {
106  case AVMEDIA_TYPE_AUDIO:
107  init_audio(set, this->enc);
108  break;
109  case AVMEDIA_TYPE_VIDEO:
110  init_video(set, this->enc);
111  break;
112 
113  default:
114  break;
115  }
116 
117  /* Some formats want stream headers to be separate. */
118  if(oc->oformat->flags & AVFMT_GLOBALHEADER)
119  {
120  this->enc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
121  }
122  }
123 
124  void init_audio(const LibavOutputSettings& set, AVCodecContext* c)
125  {
126 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
127  c->sample_fmt = av_get_sample_fmt(set.audio_converted_smpfmt.toStdString().c_str());
128 
129  if(codec->supported_samplerates)
130  {
131  c->sample_rate = codec->supported_samplerates[0];
132  for(int i = 0; codec->supported_samplerates[i]; i++)
133  {
134  if(codec->supported_samplerates[i] == set.audio_sample_rate)
135  c->sample_rate = set.audio_sample_rate;
136  }
137  }
138  else
139  {
140  c->sample_rate = set.audio_sample_rate;
141  }
142 
143  c->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
144  c->ch_layout.nb_channels = set.audio_channels;
145  if(set.audio_encoder_short == "pcm_s24le" || set.audio_encoder_short == "pcm_s24be")
146  c->bits_per_raw_sample = 24;
147 
148  this->st->time_base = AVRational{1, c->sample_rate};
149  c->time_base = AVRational{1, c->sample_rate};
150  c->framerate = AVRational{c->sample_rate, 1};
151  qDebug() << "Opening audio encoder with: rate: " << c->sample_rate;
152 #endif
153  }
154 
155  void init_video(const LibavOutputSettings& set, AVCodecContext* c)
156  {
157  c->codec_id = codec->id;
158  // c->bit_rate = 400000;
159  // c->bit_rate_tolerance = 10000;
160  // c->global_quality = 1;
161  // c->compression_level = 1;
162  // c->hw_device_ctx = hw_ctx;
163 
164  // c->flags |= AV_CODEC_FLAG_QSCALE;
165  // c->global_quality = FF_QP2LAMBDA * 3.0;
166  /* Resolution must be a multiple of two. */
167  c->width = set.width;
168  c->height = set.height;
169  /* timebase: This is the fundamental unit of time (in seconds) in terms
170  * of which frame timestamps are represented. For fixed-fps content,
171  * timebase should be 1/framerate and timestamp increments should be
172  * identical to 1. */
173  this->st->time_base = AVRational{100000, int(100000 * set.rate)};
174  c->time_base = this->st->time_base;
175  c->framerate = AVRational{this->st->time_base.den, this->st->time_base.num};
176 
177  //c->gop_size = 12; /* emit one intra frame every twelve frames at most */
178 
179  // ignored if frame->pict_type is AV_PICTURE_TYPE_I
180  c->gop_size = 0;
181  c->max_b_frames = 0;
182 
183  // c->pix_fmt = AV_PIX_FMT_RGB24;
184  c->pix_fmt = av_get_pix_fmt(set.video_converted_pixfmt.toStdString().c_str());
185  c->strict_std_compliance = FF_COMPLIANCE_NORMAL;
186  if(c->codec_id == AV_CODEC_ID_MPEG2VIDEO)
187  {
188  /* just for testing, we also add B-frames */
189  c->max_b_frames = 2;
190  }
191  if(c->codec_id == AV_CODEC_ID_MPEG1VIDEO)
192  {
193  /* Needed to avoid using macroblocks in which some coeffs overflow.
194  * This does not happen with normal video, it just happens here as
195  * the motion of the chroma plane does not match the luma plane. */
196  c->mb_decision = 2;
197  }
198  }
199 
200  void open_audio(
201  const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
202  AVDictionary* opt_arg)
203  {
204 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
205  AVDictionary* opt = nullptr;
206 
207  av_dict_copy(&opt, opt_arg, 0);
208  int ret = avcodec_open2(enc, codec, &opt);
209  av_dict_free(&opt);
210  if(ret < 0)
211  {
212  qDebug() << "Could not open audio codec: " << av_to_string(ret);
213  exit(1);
214  }
215 
216  int nb_samples = 0;
217  if(enc->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
218  {
219  auto& audio_stgs = score::AppContext().settings<Audio::Settings::Model>();
220  nb_samples = audio_stgs.getBufferSize();
221  enc->frame_size = nb_samples;
222  qDebug() << "Setting frame_size: " << nb_samples;
223  }
224  else
225  {
226  nb_samples = enc->frame_size;
227  qDebug() << "Forcing frame_size: " << nb_samples;
228  }
229 
230  cache_input_frame = alloc_audio_frame(
231  enc->sample_fmt, &enc->ch_layout, enc->sample_rate, nb_samples);
232 
233  /* copy the stream parameters to the muxer */
234  ret = avcodec_parameters_from_context(this->st->codecpar, enc);
235  if(ret < 0)
236  {
237  qDebug() << "Could not copy the stream parameters";
238  exit(1);
239  }
240 
241  {
242  auto input_fmt = AV_SAMPLE_FMT_FLTP;
243  auto conv_fmt
244  = av_get_sample_fmt(set.audio_converted_smpfmt.toStdString().c_str());
245  SCORE_ASSERT(input_fmt != -1);
246  SCORE_ASSERT(conv_fmt != -1);
247 
248  auto& ctx = score::AppContext().settings<Audio::Settings::Model>();
249 
250  const int input_sample_rate = ctx.getRate();
251  if(enc->sample_rate != input_sample_rate)
252  {
253  for(int i = 0; i < set.audio_channels; i++)
254  this->resamplers.push_back(std::make_unique<r8b::CDSPResampler>(
255  input_sample_rate, enc->sample_rate, nb_samples * 2, 3.0, 206.91,
256  r8b::fprMinPhase));
257  }
258 
259  switch(conv_fmt)
260  {
261  case AV_SAMPLE_FMT_NONE:
262  case AV_SAMPLE_FMT_U8:
263  case AV_SAMPLE_FMT_S16:
264  encoder = std::make_unique<S16IAudioFrameEncoder>(nb_samples);
265  break;
266  case AV_SAMPLE_FMT_S32:
267  if(enc->bits_per_raw_sample == 24)
268  encoder = std::make_unique<S24IAudioFrameEncoder>(nb_samples);
269  else
270  encoder = std::make_unique<S32IAudioFrameEncoder>(nb_samples);
271  break;
272  case AV_SAMPLE_FMT_FLT:
273  encoder = std::make_unique<FltIAudioFrameEncoder>(nb_samples);
274  break;
275  case AV_SAMPLE_FMT_DBL:
276  encoder = std::make_unique<DblIAudioFrameEncoder>(nb_samples);
277  break;
278 
279  case AV_SAMPLE_FMT_U8P:
280  case AV_SAMPLE_FMT_S16P:
281  case AV_SAMPLE_FMT_S32P:
282  case AV_SAMPLE_FMT_FLTP:
283  encoder = std::make_unique<FltPAudioFrameEncoder>(nb_samples);
284  break;
285  case AV_SAMPLE_FMT_DBLP:
286  case AV_SAMPLE_FMT_S64:
287  case AV_SAMPLE_FMT_S64P:
288  break;
289  default:
290  break;
291  }
292  }
293 #endif
294  }
295 
296 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
297  static AVFrame* alloc_audio_frame(
298  enum AVSampleFormat sample_fmt, const AVChannelLayout* channel_layout,
299  int sample_rate, int nb_samples)
300  {
301  AVFrame* frame = av_frame_alloc();
302  if(!frame)
303  {
304  qDebug() << "Error allocating an audio frame";
305  exit(1);
306  }
307 
308  frame->format = sample_fmt;
309  frame->ch_layout.order = channel_layout->order;
310  frame->ch_layout.nb_channels = channel_layout->nb_channels;
311  frame->sample_rate = sample_rate;
312  frame->nb_samples = nb_samples;
313 
314  if(nb_samples)
315  {
316  if(av_frame_get_buffer(frame, 0) < 0)
317  {
318  qDebug() << "Error allocating an audio buffer";
319  exit(1);
320  }
321  }
322 
323  return frame;
324  }
325 #endif
326 
327  static AVFrame* alloc_video_frame(enum AVPixelFormat pix_fmt, int width, int height)
328  {
329  auto frame = av_frame_alloc();
330  if(!frame)
331  return NULL;
332 
333  frame->format = pix_fmt;
334  frame->width = width;
335  frame->height = height;
336 
337  /* allocate the buffers for the frame data */
338  const int ret = av_frame_get_buffer(frame, 0);
339  if(ret < 0)
340  {
341  qDebug() << "Could not allocate frame data.";
342  exit(1);
343  }
344 
345  return frame;
346  }
347 
348  void open_video(
349  const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
350  AVDictionary* opt_arg)
351  {
352  AVCodecContext* c = this->enc;
353  AVDictionary* opt = nullptr;
354 
355  av_dict_copy(&opt, opt_arg, 0);
356 
357  /* set some options */
358  int err = av_opt_set_double(this->enc->priv_data, "crf", 0.0, 0);
359  if(err < 0)
360  {
361  qDebug() << "failed to initialize encoder: " << av_to_string(err);
362  }
363 
364  /* open the codec */
365  SCORE_ASSERT(this->enc->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
366  int ret = avcodec_open2(this->enc, codec, &opt);
367  av_dict_free(&opt);
368  if(ret < 0)
369  {
370  qDebug() << "Could not open video codec: " << av_to_string(ret);
371  exit(1);
372  }
373 
374  /* allocate and init a re-usable frame */
375  this->cache_input_frame = alloc_video_frame(AV_PIX_FMT_RGBA, c->width, c->height);
376  if(!this->cache_input_frame)
377  {
378  qDebug() << "Could not allocate video frame";
379  exit(1);
380  }
381 
382  this->tmp_frame = nullptr;
383  // If conversion is needed :
384  // if(c->pix_fmt != AV_PIX_FMT_YUVJ420P)
385  {
386  auto input_fmt = av_get_pix_fmt(set.video_render_pixfmt.toStdString().c_str());
387  auto conv_fmt = av_get_pix_fmt(set.video_converted_pixfmt.toStdString().c_str());
388  SCORE_ASSERT(input_fmt != -1);
389  SCORE_ASSERT(conv_fmt != -1);
390  sws_ctx = sws_getContext(
391  set.width, set.height, input_fmt, set.width, set.height, conv_fmt, 1, nullptr,
392  nullptr, nullptr);
393  SCORE_ASSERT(sws_ctx);
394  this->tmp_frame = alloc_video_frame(conv_fmt, c->width, c->height);
395  if(!this->tmp_frame)
396  {
397  qDebug() << "Could not allocate temporary video frame";
398  exit(1);
399  }
400  }
401 
402  /* copy the stream parameters to the muxer */
403  ret = avcodec_parameters_from_context(this->st->codecpar, c);
404  if(ret < 0)
405  {
406  qDebug() << "Could not copy the stream parameters";
407  exit(1);
408  }
409  }
410 
411  void open(
412  const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
413  AVDictionary* opt_arg)
414  {
415  SCORE_ASSERT(oc);
416  SCORE_ASSERT(codec);
417  SCORE_ASSERT(opt_arg);
418  if(codec->type == AVMEDIA_TYPE_AUDIO)
419  {
420  open_audio(set, oc, codec, opt_arg);
421  }
422  else if(codec->type == AVMEDIA_TYPE_VIDEO)
423  {
424  open_video(set, oc, codec, opt_arg);
425  }
426  }
427 
428  void close(AVFormatContext* oc)
429  {
430  avcodec_free_context(&enc);
431  av_frame_free(&cache_input_frame);
432  av_frame_free(&tmp_frame);
433  av_packet_free(&tmp_pkt);
434  sws_freeContext(sws_ctx);
435  sws_ctx = nullptr;
436  }
437 
438  AVFrame* get_video_frame()
439  {
440  /* when we pass a frame to the encoder, it may keep a reference to it
441  * internally; make sure we do not overwrite it here */
442  if(av_frame_make_writable(this->cache_input_frame) < 0)
443  exit(1);
444 
445  this->cache_input_frame->pts = this->next_pts++;
446 
447  return this->cache_input_frame;
448  }
449 
450  AVFrame* get_audio_frame()
451  {
452  /* when we pass a frame to the encoder, it may keep a reference to it
453  * internally; make sure we do not overwrite it here */
454  if(av_frame_make_writable(this->cache_input_frame) < 0)
455  exit(1);
456 
457  this->cache_input_frame->pts = this->next_pts;
458  this->next_pts += this->enc->frame_size;
459 
460  return this->cache_input_frame;
461  }
462 
463  int write_video_frame(AVFormatContext* fmt_ctx, AVFrame* input_frame)
464  {
465 #if LIBSWSCALE_VERSION_INT >= AV_VERSION_INT(7, 5, 100)
466  // scale the frame
467  int ret = sws_scale_frame(sws_ctx, tmp_frame, input_frame);
468  if(ret < 0)
469  {
470  qDebug() << "Error during sws_scale_frame: " << av_to_string(ret);
471  exit(1);
472  }
473 
474  tmp_frame->quality = FF_LAMBDA_MAX; //c->global_quality;
475  tmp_frame->pict_type = AV_PICTURE_TYPE_I;
476  tmp_frame->pts++;
477 
478  // send the frame to the encoder
479  ret = avcodec_send_frame(enc, tmp_frame);
480  if(ret < 0)
481  {
482  qDebug() << "Error sending a frame to the encoder: " << av_to_string(ret);
483  exit(1);
484  }
485 
486  while(ret >= 0)
487  {
488  ret = avcodec_receive_packet(enc, tmp_pkt);
489  if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
490  break;
491  else if(ret < 0)
492  {
493  qDebug() << "Error encoding a frame: " << av_to_string(ret);
494  exit(1);
495  }
496 
497  /* rescale output packet timestamp values from codec to stream timebase */
498  av_packet_rescale_ts(tmp_pkt, enc->time_base, st->time_base);
499  tmp_pkt->stream_index = st->index;
500  tmp_pkt->flags |= AV_PKT_FLAG_KEY;
501 
502  ret = av_interleaved_write_frame(fmt_ctx, tmp_pkt);
503  if(ret < 0)
504  {
505  qDebug() << "Error while writing output packet: " << av_to_string(ret);
506  exit(1);
507  }
508  }
509 
510  return ret == AVERROR_EOF ? 1 : 0;
511 #endif
512  return 1;
513  }
514 
515  // #define SRC_RATE SAMPLE_RATE_TEST
516  // #define DST_RATE SAMPLE_RATE_TEST
517  // static int64_t conv_audio_pts(SwrContext* ctx, int64_t in, int sample_rate)
518  // {
519  // //int64_t d = (int64_t) AUDIO_RATE * AUDIO_RATE;
520  // int64_t d = (int64_t)sample_rate * sample_rate;
521  //
522  // /* Convert from audio_src_tb to 1/(src_samplerate * dst_samplerate) */
523  // in = av_rescale_rnd(in, d, SRC_RATE, AV_ROUND_NEAR_INF);
524  //
525  // /* In units of 1/(src_samplerate * dst_samplerate) */
526  // in = swr_next_pts(ctx, in);
527  //
528  // /* Convert from 1/(src_samplerate * dst_samplerate) to audio_dst_tb */
529  // return av_rescale_rnd(in, DST_RATE, d, AV_ROUND_NEAR_INF);
530  // }
531 
532  int write_audio_frame(AVFormatContext* fmt_ctx, AVFrame* input_frame)
533  {
534  // send the frame to the encoder
535  int ret = avcodec_send_frame(enc, input_frame);
536  if(ret < 0)
537  {
538  qDebug() << "Error sending a frame to the encoder: " << av_to_string(ret);
539  exit(1);
540  }
541 
542  while(ret >= 0)
543  {
544  ret = avcodec_receive_packet(enc, tmp_pkt);
545  if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
546  break;
547  else if(ret < 0)
548  {
549  qDebug() << "Error encoding a frame: " << av_to_string(ret);
550  exit(1);
551  }
552 
553  /* rescale output packet timestamp values from codec to stream timebase */
554  av_packet_rescale_ts(tmp_pkt, enc->time_base, st->time_base);
555  tmp_pkt->stream_index = st->index;
556 
557  ret = av_interleaved_write_frame(fmt_ctx, tmp_pkt);
558  if(ret < 0)
559  {
560  qDebug() << "Error while writing output packet: " << av_to_string(ret);
561  exit(1);
562  }
563  }
564 
565  return ret == AVERROR_EOF ? 1 : 0;
566  }
567 };
568 }
Definition: score-plugin-audio/Audio/Settings/Model.hpp:22
Binds the rendering pipeline to ossia processes.
Definition: CameraDevice.cpp:28
Definition: LibavOutputSettings.hpp:16
Definition: LibavOutputStream.hpp:39
Definition: LibavOutputStream.hpp:32
T & settings() const
Access a specific Settings model instance.
Definition: ApplicationContext.hpp:40