LibavOutputStream.hpp
1 #pragma once
2 
3 extern "C" {
4 
5 #include <libavcodec/avcodec.h>
6 #include <libavdevice/avdevice.h>
7 #include <libavformat/avformat.h>
8 #include <libavutil/pixdesc.h>
9 #include <libswresample/swresample.h>
10 #include <libswscale/swscale.h>
11 }
12 
13 #include <Audio/Settings/Model.hpp>
14 #include <Gfx/Libav/AudioFrameEncoder.hpp>
15 #include <Gfx/Libav/LibavOutputSettings.hpp>
16 
17 #include <score/application/ApplicationContext.hpp>
18 #include <score/tools/Debug.hpp>
19 
20 #include <ossia/detail/flat_map.hpp>
21 
22 #include <QApplication>
23 
24 #include <CDSPResampler.h>
25 
26 #include <string>
27 
28 namespace Gfx
29 {
30 
32 {
33  std::string name;
34  std::string codec;
35  ossia::flat_map<std::string, std::string> options;
36 };
37 
39 {
40  const AVCodec* codec{};
41  AVStream* st{};
42  AVCodecContext* enc{};
43 
44  /* pts of the next frame that will be generated */
45  int64_t next_pts{};
46  int samples_count{};
47 
48  AVFrame* cache_input_frame{};
49  AVFrame* tmp_frame{};
50 
51  AVPacket* tmp_pkt{};
52 
53  struct SwsContext* sws_ctx{};
54  std::vector<std::unique_ptr<r8b::CDSPResampler>> resamplers;
55 
56  std::unique_ptr<AudioFrameEncoder> encoder;
57 
59  const LibavOutputSettings& set, AVFormatContext* oc, const StreamOptions& opts)
60  {
61  codec = avcodec_find_encoder_by_name(opts.codec.c_str());
62  if(!codec)
63  {
64  qDebug() << "Could not find encoder for " << opts.codec.c_str();
65  exit(1);
66  }
67 
68  this->tmp_pkt = av_packet_alloc();
69  if(!this->tmp_pkt)
70  {
71  qDebug() << "Could not allocate AVPacket";
72  exit(1);
73  }
74 
75  this->st = avformat_new_stream(oc, nullptr);
76  if(!this->st)
77  {
78  qDebug() << "Could not allocate stream";
79  exit(1);
80  }
81  this->st->id = oc->nb_streams - 1;
82 
83  // Init hw accel
84  // FIXME IMPLEMENT
85 #if 0
86  AVBufferRef* hw_ctx{};
87  {
88  // HW Accel
89  AVHWDeviceType device = AV_HWDEVICE_TYPE_QSV;
90  int ret = av_hwdevice_ctx_create(&hw_ctx, device, "auto", nullptr, 0);
91  if(ret != 0)
92  {
93  qDebug() << "Error while opening hardware encoder: " << av_to_string(ret);
94  exit(1);
95  }
96  }
97 #endif
98  this->enc = avcodec_alloc_context3(codec);
99  if(!this->enc)
100  {
101  qDebug() << "Could not alloc an encoding context";
102  exit(1);
103  }
104 
105  switch(codec->type)
106  {
107  case AVMEDIA_TYPE_AUDIO:
108  init_audio(set, this->enc);
109  break;
110  case AVMEDIA_TYPE_VIDEO:
111  init_video(set, this->enc);
112  break;
113 
114  default:
115  break;
116  }
117 
118  /* Some formats want stream headers to be separate. */
119  if(oc->oformat->flags & AVFMT_GLOBALHEADER)
120  {
121  this->enc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
122  }
123  }
124 
125  void init_audio(const LibavOutputSettings& set, AVCodecContext* c)
126  {
127 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
128  c->sample_fmt = av_get_sample_fmt(set.audio_converted_smpfmt.toStdString().c_str());
129 
130  if(codec->supported_samplerates)
131  {
132  c->sample_rate = codec->supported_samplerates[0];
133  for(int i = 0; codec->supported_samplerates[i]; i++)
134  {
135  if(codec->supported_samplerates[i] == set.audio_sample_rate)
136  c->sample_rate = set.audio_sample_rate;
137  }
138  }
139  else
140  {
141  c->sample_rate = set.audio_sample_rate;
142  }
143 
144  c->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
145  c->ch_layout.nb_channels = set.audio_channels;
146  if(set.audio_encoder_short == "pcm_s24le" || set.audio_encoder_short == "pcm_s24be")
147  c->bits_per_raw_sample = 24;
148 
149  this->st->time_base = AVRational{1, c->sample_rate};
150  c->time_base = AVRational{1, c->sample_rate};
151  c->framerate = AVRational{c->sample_rate, 1};
152  qDebug() << "Opening audio encoder with: rate: " << c->sample_rate;
153 #endif
154  }
155 
156  void init_video(const LibavOutputSettings& set, AVCodecContext* c)
157  {
158  c->codec_id = codec->id;
159  // c->bit_rate = 400000;
160  // c->bit_rate_tolerance = 10000;
161  // c->global_quality = 1;
162  // c->compression_level = 1;
163  // c->hw_device_ctx = hw_ctx;
164 
165  // c->flags |= AV_CODEC_FLAG_QSCALE;
166  // c->global_quality = FF_QP2LAMBDA * 3.0;
167  /* Resolution must be a multiple of two. */
168  c->width = set.width;
169  c->height = set.height;
170  /* timebase: This is the fundamental unit of time (in seconds) in terms
171  * of which frame timestamps are represented. For fixed-fps content,
172  * timebase should be 1/framerate and timestamp increments should be
173  * identical to 1. */
174  this->st->time_base = AVRational{100000, int(100000 * set.rate)};
175  c->time_base = this->st->time_base;
176  c->framerate = AVRational{this->st->time_base.den, this->st->time_base.num};
177 
178  //c->gop_size = 12; /* emit one intra frame every twelve frames at most */
179 
180  // ignored if frame->pict_type is AV_PICTURE_TYPE_I
181  c->gop_size = 0;
182  c->max_b_frames = 0;
183 
184  // c->pix_fmt = AV_PIX_FMT_RGB24;
185  c->pix_fmt = av_get_pix_fmt(set.video_converted_pixfmt.toStdString().c_str());
186  c->strict_std_compliance = FF_COMPLIANCE_NORMAL;
187  if(c->codec_id == AV_CODEC_ID_MPEG2VIDEO)
188  {
189  /* just for testing, we also add B-frames */
190  c->max_b_frames = 2;
191  }
192  if(c->codec_id == AV_CODEC_ID_MPEG1VIDEO)
193  {
194  /* Needed to avoid using macroblocks in which some coeffs overflow.
195  * This does not happen with normal video, it just happens here as
196  * the motion of the chroma plane does not match the luma plane. */
197  c->mb_decision = 2;
198  }
199  }
200 
201  void open_audio(
202  const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
203  AVDictionary* opt_arg)
204  {
205 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
206  AVDictionary* opt = nullptr;
207 
208  av_dict_copy(&opt, opt_arg, 0);
209  int ret = avcodec_open2(enc, codec, &opt);
210  av_dict_free(&opt);
211  if(ret < 0)
212  {
213  qDebug() << "Could not open audio codec: " << av_to_string(ret);
214  exit(1);
215  }
216 
217  int nb_samples = 0;
218  if(enc->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
219  {
220  auto& audio_stgs = score::AppContext().settings<Audio::Settings::Model>();
221  nb_samples = audio_stgs.getBufferSize();
222  enc->frame_size = nb_samples;
223  qDebug() << "Setting frame_size: " << nb_samples;
224  }
225  else
226  {
227  nb_samples = enc->frame_size;
228  qDebug() << "Forcing frame_size: " << nb_samples;
229  }
230 
231  cache_input_frame = alloc_audio_frame(
232  enc->sample_fmt, &enc->ch_layout, enc->sample_rate, nb_samples);
233 
234  /* copy the stream parameters to the muxer */
235  ret = avcodec_parameters_from_context(this->st->codecpar, enc);
236  if(ret < 0)
237  {
238  qDebug() << "Could not copy the stream parameters";
239  exit(1);
240  }
241 
242  {
243  auto input_fmt = AV_SAMPLE_FMT_FLTP;
244  auto conv_fmt
245  = av_get_sample_fmt(set.audio_converted_smpfmt.toStdString().c_str());
246  SCORE_ASSERT(input_fmt != -1);
247  SCORE_ASSERT(conv_fmt != -1);
248 
249  auto& ctx = score::AppContext().settings<Audio::Settings::Model>();
250 
251  const int input_sample_rate = ctx.getRate();
252  if(enc->sample_rate != input_sample_rate)
253  {
254  for(int i = 0; i < set.audio_channels; i++)
255  this->resamplers.push_back(std::make_unique<r8b::CDSPResampler>(
256  input_sample_rate, enc->sample_rate, nb_samples * 2, 3.0, 206.91,
257  r8b::fprMinPhase));
258  }
259 
260  switch(conv_fmt)
261  {
262  case AV_SAMPLE_FMT_NONE:
263  case AV_SAMPLE_FMT_U8:
264  case AV_SAMPLE_FMT_S16:
265  encoder = std::make_unique<S16IAudioFrameEncoder>(nb_samples);
266  break;
267  case AV_SAMPLE_FMT_S32:
268  if(enc->bits_per_raw_sample == 24)
269  encoder = std::make_unique<S24IAudioFrameEncoder>(nb_samples);
270  else
271  encoder = std::make_unique<S32IAudioFrameEncoder>(nb_samples);
272  break;
273  case AV_SAMPLE_FMT_FLT:
274  encoder = std::make_unique<FltIAudioFrameEncoder>(nb_samples);
275  break;
276  case AV_SAMPLE_FMT_DBL:
277  encoder = std::make_unique<DblIAudioFrameEncoder>(nb_samples);
278  break;
279 
280  case AV_SAMPLE_FMT_U8P:
281  case AV_SAMPLE_FMT_S16P:
282  case AV_SAMPLE_FMT_S32P:
283  case AV_SAMPLE_FMT_FLTP:
284  encoder = std::make_unique<FltPAudioFrameEncoder>(nb_samples);
285  break;
286  case AV_SAMPLE_FMT_DBLP:
287  case AV_SAMPLE_FMT_S64:
288  case AV_SAMPLE_FMT_S64P:
289  break;
290  default:
291  break;
292  }
293  }
294 #endif
295  }
296 
297 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
298  static AVFrame* alloc_audio_frame(
299  enum AVSampleFormat sample_fmt, const AVChannelLayout* channel_layout,
300  int sample_rate, int nb_samples)
301  {
302  AVFrame* frame = av_frame_alloc();
303  if(!frame)
304  {
305  qDebug() << "Error allocating an audio frame";
306  exit(1);
307  }
308 
309  frame->format = sample_fmt;
310  frame->ch_layout.order = channel_layout->order;
311  frame->ch_layout.nb_channels = channel_layout->nb_channels;
312  frame->sample_rate = sample_rate;
313  frame->nb_samples = nb_samples;
314 
315  if(nb_samples)
316  {
317  if(av_frame_get_buffer(frame, 0) < 0)
318  {
319  qDebug() << "Error allocating an audio buffer";
320  exit(1);
321  }
322  }
323 
324  return frame;
325  }
326 #endif
327 
328  static AVFrame* alloc_video_frame(enum AVPixelFormat pix_fmt, int width, int height)
329  {
330  auto frame = av_frame_alloc();
331  if(!frame)
332  return NULL;
333 
334  frame->format = pix_fmt;
335  frame->width = width;
336  frame->height = height;
337 
338  /* allocate the buffers for the frame data */
339  const int ret = av_frame_get_buffer(frame, 0);
340  if(ret < 0)
341  {
342  qDebug() << "Could not allocate frame data.";
343  exit(1);
344  }
345 
346  return frame;
347  }
348 
349  void open_video(
350  const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
351  AVDictionary* opt_arg)
352  {
353  AVCodecContext* c = this->enc;
354  AVDictionary* opt = nullptr;
355 
356  av_dict_copy(&opt, opt_arg, 0);
357 
358  /* set some options */
359  int err = av_opt_set_double(this->enc->priv_data, "crf", 0.0, 0);
360  if(err < 0)
361  {
362  qDebug() << "failed to initialize encoder: " << av_to_string(err);
363  }
364 
365  /* open the codec */
366  SCORE_ASSERT(this->enc->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
367  int ret = avcodec_open2(this->enc, codec, &opt);
368  av_dict_free(&opt);
369  if(ret < 0)
370  {
371  qDebug() << "Could not open video codec: " << av_to_string(ret);
372  exit(1);
373  }
374 
375  /* allocate and init a re-usable frame */
376  this->cache_input_frame = alloc_video_frame(AV_PIX_FMT_RGBA, c->width, c->height);
377  if(!this->cache_input_frame)
378  {
379  qDebug() << "Could not allocate video frame";
380  exit(1);
381  }
382 
383  this->tmp_frame = nullptr;
384  // If conversion is needed :
385  // if(c->pix_fmt != AV_PIX_FMT_YUVJ420P)
386  {
387  auto input_fmt = av_get_pix_fmt(set.video_render_pixfmt.toStdString().c_str());
388  auto conv_fmt = av_get_pix_fmt(set.video_converted_pixfmt.toStdString().c_str());
389  SCORE_ASSERT(input_fmt != -1);
390  SCORE_ASSERT(conv_fmt != -1);
391  sws_ctx = sws_getContext(
392  set.width, set.height, input_fmt, set.width, set.height, conv_fmt, 1, nullptr,
393  nullptr, nullptr);
394  SCORE_ASSERT(sws_ctx);
395  this->tmp_frame = alloc_video_frame(conv_fmt, c->width, c->height);
396  if(!this->tmp_frame)
397  {
398  qDebug() << "Could not allocate temporary video frame";
399  exit(1);
400  }
401  }
402 
403  /* copy the stream parameters to the muxer */
404  ret = avcodec_parameters_from_context(this->st->codecpar, c);
405  if(ret < 0)
406  {
407  qDebug() << "Could not copy the stream parameters";
408  exit(1);
409  }
410  }
411 
412  void open(
413  const LibavOutputSettings& set, AVFormatContext* oc, const AVCodec* codec,
414  AVDictionary* opt_arg)
415  {
416  SCORE_ASSERT(oc);
417  SCORE_ASSERT(codec);
418  SCORE_ASSERT(opt_arg);
419  if(codec->type == AVMEDIA_TYPE_AUDIO)
420  {
421  open_audio(set, oc, codec, opt_arg);
422  }
423  else if(codec->type == AVMEDIA_TYPE_VIDEO)
424  {
425  open_video(set, oc, codec, opt_arg);
426  }
427  }
428 
429  void close(AVFormatContext* oc)
430  {
431  avcodec_free_context(&enc);
432  av_frame_free(&cache_input_frame);
433  av_frame_free(&tmp_frame);
434  av_packet_free(&tmp_pkt);
435  sws_freeContext(sws_ctx);
436  sws_ctx = nullptr;
437  }
438 
439  AVFrame* get_video_frame()
440  {
441  /* when we pass a frame to the encoder, it may keep a reference to it
442  * internally; make sure we do not overwrite it here */
443  if(av_frame_make_writable(this->cache_input_frame) < 0)
444  exit(1);
445 
446  this->cache_input_frame->pts = this->next_pts++;
447 
448  return this->cache_input_frame;
449  }
450 
451  AVFrame* get_audio_frame()
452  {
453  /* when we pass a frame to the encoder, it may keep a reference to it
454  * internally; make sure we do not overwrite it here */
455  if(av_frame_make_writable(this->cache_input_frame) < 0)
456  exit(1);
457 
458  this->cache_input_frame->pts = this->next_pts;
459  this->next_pts += this->enc->frame_size;
460 
461  return this->cache_input_frame;
462  }
463 
464  int write_video_frame(AVFormatContext* fmt_ctx, AVFrame* input_frame)
465  {
466 #if LIBSWSCALE_VERSION_INT >= AV_VERSION_INT(7, 5, 100)
467  // scale the frame
468  int ret = sws_scale_frame(sws_ctx, tmp_frame, input_frame);
469  if(ret < 0)
470  {
471  qDebug() << "Error during sws_scale_frame: " << av_to_string(ret);
472  exit(1);
473  }
474 
475  tmp_frame->quality = FF_LAMBDA_MAX; //c->global_quality;
476  tmp_frame->pict_type = AV_PICTURE_TYPE_I;
477  tmp_frame->pts++;
478 
479  // send the frame to the encoder
480  ret = avcodec_send_frame(enc, tmp_frame);
481  if(ret < 0)
482  {
483  qDebug() << "Error sending a frame to the encoder: " << av_to_string(ret);
484  exit(1);
485  }
486 
487  while(ret >= 0)
488  {
489  ret = avcodec_receive_packet(enc, tmp_pkt);
490  if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
491  break;
492  else if(ret < 0)
493  {
494  qDebug() << "Error encoding a frame: " << av_to_string(ret);
495  exit(1);
496  }
497 
498  /* rescale output packet timestamp values from codec to stream timebase */
499  av_packet_rescale_ts(tmp_pkt, enc->time_base, st->time_base);
500  tmp_pkt->stream_index = st->index;
501  tmp_pkt->flags |= AV_PKT_FLAG_KEY;
502 
503  ret = av_interleaved_write_frame(fmt_ctx, tmp_pkt);
504  if(ret < 0)
505  {
506  qDebug() << "Error while writing output packet: " << av_to_string(ret);
507  exit(1);
508  }
509  }
510 
511  return ret == AVERROR_EOF ? 1 : 0;
512 #endif
513  return 1;
514  }
515 
516  // #define SRC_RATE SAMPLE_RATE_TEST
517  // #define DST_RATE SAMPLE_RATE_TEST
518  // static int64_t conv_audio_pts(SwrContext* ctx, int64_t in, int sample_rate)
519  // {
520  // //int64_t d = (int64_t) AUDIO_RATE * AUDIO_RATE;
521  // int64_t d = (int64_t)sample_rate * sample_rate;
522  //
523  // /* Convert from audio_src_tb to 1/(src_samplerate * dst_samplerate) */
524  // in = av_rescale_rnd(in, d, SRC_RATE, AV_ROUND_NEAR_INF);
525  //
526  // /* In units of 1/(src_samplerate * dst_samplerate) */
527  // in = swr_next_pts(ctx, in);
528  //
529  // /* Convert from 1/(src_samplerate * dst_samplerate) to audio_dst_tb */
530  // return av_rescale_rnd(in, DST_RATE, d, AV_ROUND_NEAR_INF);
531  // }
532 
533  int write_audio_frame(AVFormatContext* fmt_ctx, AVFrame* input_frame)
534  {
535  // send the frame to the encoder
536  int ret = avcodec_send_frame(enc, input_frame);
537  if(ret < 0)
538  {
539  qDebug() << "Error sending a frame to the encoder: " << av_to_string(ret);
540  exit(1);
541  }
542 
543  while(ret >= 0)
544  {
545  ret = avcodec_receive_packet(enc, tmp_pkt);
546  if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
547  break;
548  else if(ret < 0)
549  {
550  qDebug() << "Error encoding a frame: " << av_to_string(ret);
551  exit(1);
552  }
553 
554  /* rescale output packet timestamp values from codec to stream timebase */
555  av_packet_rescale_ts(tmp_pkt, enc->time_base, st->time_base);
556  tmp_pkt->stream_index = st->index;
557 
558  ret = av_interleaved_write_frame(fmt_ctx, tmp_pkt);
559  if(ret < 0)
560  {
561  qDebug() << "Error while writing output packet: " << av_to_string(ret);
562  exit(1);
563  }
564  }
565 
566  return ret == AVERROR_EOF ? 1 : 0;
567  }
568 };
569 }
Definition: score-plugin-audio/Audio/Settings/Model.hpp:22
Binds the rendering pipeline to ossia processes.
Definition: CameraDevice.cpp:28
Definition: LibavOutputSettings.hpp:16
Definition: LibavOutputStream.hpp:39
Definition: LibavOutputStream.hpp:32
T & settings() const
Access a specific Settings model instance.
Definition: ApplicationContext.hpp:40