FFmpeg  1.2.12
proresenc_kostya.c
Go to the documentation of this file.
1 /*
2  * Apple ProRes encoder
3  *
4  * Copyright (c) 2012 Konstantin Shishkov
5  *
6  * This encoder appears to be based on Anatoliy Wassermans considering
7  * similarities in the bugs.
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/opt.h"
27 #include "avcodec.h"
28 #include "dsputil.h"
29 #include "put_bits.h"
30 #include "bytestream.h"
31 #include "internal.h"
32 #include "proresdsp.h"
33 #include "proresdata.h"
34 
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
37 
38 #define MAX_MBS_PER_SLICE 8
39 
40 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
41 
42 enum {
47 };
48 
49 enum {
55 };
56 
57 static const uint8_t prores_quant_matrices[][64] = {
58  { // proxy
59  4, 7, 9, 11, 13, 14, 15, 63,
60  7, 7, 11, 12, 14, 15, 63, 63,
61  9, 11, 13, 14, 15, 63, 63, 63,
62  11, 11, 13, 14, 63, 63, 63, 63,
63  11, 13, 14, 63, 63, 63, 63, 63,
64  13, 14, 63, 63, 63, 63, 63, 63,
65  13, 63, 63, 63, 63, 63, 63, 63,
66  63, 63, 63, 63, 63, 63, 63, 63,
67  },
68  { // LT
69  4, 5, 6, 7, 9, 11, 13, 15,
70  5, 5, 7, 8, 11, 13, 15, 17,
71  6, 7, 9, 11, 13, 15, 15, 17,
72  7, 7, 9, 11, 13, 15, 17, 19,
73  7, 9, 11, 13, 14, 16, 19, 23,
74  9, 11, 13, 14, 16, 19, 23, 29,
75  9, 11, 13, 15, 17, 21, 28, 35,
76  11, 13, 16, 17, 21, 28, 35, 41,
77  },
78  { // standard
79  4, 4, 5, 5, 6, 7, 7, 9,
80  4, 4, 5, 6, 7, 7, 9, 9,
81  5, 5, 6, 7, 7, 9, 9, 10,
82  5, 5, 6, 7, 7, 9, 9, 10,
83  5, 6, 7, 7, 8, 9, 10, 12,
84  6, 7, 7, 8, 9, 10, 12, 15,
85  6, 7, 7, 9, 10, 11, 14, 17,
86  7, 7, 9, 10, 11, 14, 17, 21,
87  },
88  { // high quality
89  4, 4, 4, 4, 4, 4, 4, 4,
90  4, 4, 4, 4, 4, 4, 4, 4,
91  4, 4, 4, 4, 4, 4, 4, 4,
92  4, 4, 4, 4, 4, 4, 4, 5,
93  4, 4, 4, 4, 4, 4, 5, 5,
94  4, 4, 4, 4, 4, 5, 5, 6,
95  4, 4, 4, 4, 5, 5, 6, 7,
96  4, 4, 4, 4, 5, 6, 7, 7,
97  },
98  { // codec default
99  4, 4, 4, 4, 4, 4, 4, 4,
100  4, 4, 4, 4, 4, 4, 4, 4,
101  4, 4, 4, 4, 4, 4, 4, 4,
102  4, 4, 4, 4, 4, 4, 4, 4,
103  4, 4, 4, 4, 4, 4, 4, 4,
104  4, 4, 4, 4, 4, 4, 4, 4,
105  4, 4, 4, 4, 4, 4, 4, 4,
106  4, 4, 4, 4, 4, 4, 4, 4,
107  },
108 };
109 
110 #define NUM_MB_LIMITS 4
111 static const int prores_mb_limits[NUM_MB_LIMITS] = {
112  1620, // up to 720x576
113  2700, // up to 960x720
114  6075, // up to 1440x1080
115  9216, // up to 2048x1152
116 };
117 
118 static const struct prores_profile {
119  const char *full_name;
120  uint32_t tag;
124  int quant;
125 } prores_profile_info[4] = {
126  {
127  .full_name = "proxy",
128  .tag = MKTAG('a', 'p', 'c', 'o'),
129  .min_quant = 4,
130  .max_quant = 8,
131  .br_tab = { 300, 242, 220, 194 },
132  .quant = QUANT_MAT_PROXY,
133  },
134  {
135  .full_name = "LT",
136  .tag = MKTAG('a', 'p', 'c', 's'),
137  .min_quant = 1,
138  .max_quant = 9,
139  .br_tab = { 720, 560, 490, 440 },
140  .quant = QUANT_MAT_LT,
141  },
142  {
143  .full_name = "standard",
144  .tag = MKTAG('a', 'p', 'c', 'n'),
145  .min_quant = 1,
146  .max_quant = 6,
147  .br_tab = { 1050, 808, 710, 632 },
148  .quant = QUANT_MAT_STANDARD,
149  },
150  {
151  .full_name = "high quality",
152  .tag = MKTAG('a', 'p', 'c', 'h'),
153  .min_quant = 1,
154  .max_quant = 6,
155  .br_tab = { 1566, 1216, 1070, 950 },
156  .quant = QUANT_MAT_HQ,
157  }
158 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
159 };
160 
161 #define TRELLIS_WIDTH 16
162 #define SCORE_LIMIT INT_MAX / 2
163 
164 struct TrellisNode {
166  int quant;
167  int bits;
168  int score;
169 };
170 
171 #define MAX_STORED_Q 16
172 
173 typedef struct ProresThreadData {
174  DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
175  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
176  int16_t custom_q[64];
179 
180 typedef struct ProresContext {
181  AVClass *class;
183  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
184  int16_t quants[MAX_STORED_Q][64];
185  int16_t custom_q[64];
187 
190 
196  int pictures_per_frame; // 1 for progressive, 2 for interlaced
201 
202  char *vendor;
204 
206 
207  int profile;
209 
210  int *slice_q;
211 
213 } ProresContext;
214 
215 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
216  int linesize, int x, int y, int w, int h,
217  int16_t *blocks, uint16_t *emu_buf,
218  int mbs_per_slice, int blocks_per_mb, int is_chroma)
219 {
220  const uint16_t *esrc;
221  const int mb_width = 4 * blocks_per_mb;
222  int elinesize;
223  int i, j, k;
224 
225  for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
226  if (x >= w) {
227  memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
228  * sizeof(*blocks));
229  return;
230  }
231  if (x + mb_width <= w && y + 16 <= h) {
232  esrc = src;
233  elinesize = linesize;
234  } else {
235  int bw, bh, pix;
236 
237  esrc = emu_buf;
238  elinesize = 16 * sizeof(*emu_buf);
239 
240  bw = FFMIN(w - x, mb_width);
241  bh = FFMIN(h - y, 16);
242 
243  for (j = 0; j < bh; j++) {
244  memcpy(emu_buf + j * 16,
245  (const uint8_t*)src + j * linesize,
246  bw * sizeof(*src));
247  pix = emu_buf[j * 16 + bw - 1];
248  for (k = bw; k < mb_width; k++)
249  emu_buf[j * 16 + k] = pix;
250  }
251  for (; j < 16; j++)
252  memcpy(emu_buf + j * 16,
253  emu_buf + (bh - 1) * 16,
254  mb_width * sizeof(*emu_buf));
255  }
256  if (!is_chroma) {
257  ctx->dsp.fdct(esrc, elinesize, blocks);
258  blocks += 64;
259  if (blocks_per_mb > 2) {
260  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
261  blocks += 64;
262  }
263  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
264  blocks += 64;
265  if (blocks_per_mb > 2) {
266  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
267  blocks += 64;
268  }
269  } else {
270  ctx->dsp.fdct(esrc, elinesize, blocks);
271  blocks += 64;
272  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
273  blocks += 64;
274  if (blocks_per_mb > 2) {
275  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
276  blocks += 64;
277  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
278  blocks += 64;
279  }
280  }
281 
282  x += mb_width;
283  }
284 }
285 
289 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
290 {
291  unsigned int rice_order, exp_order, switch_bits, switch_val;
292  int exponent;
293 
294  /* number of prefix bits to switch between Rice and expGolomb */
295  switch_bits = (codebook & 3) + 1;
296  rice_order = codebook >> 5; /* rice code order */
297  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
298 
299  switch_val = switch_bits << rice_order;
300 
301  if (val >= switch_val) {
302  val -= switch_val - (1 << exp_order);
303  exponent = av_log2(val);
304 
305  put_bits(pb, exponent - exp_order + switch_bits, 0);
306  put_bits(pb, exponent + 1, val);
307  } else {
308  exponent = val >> rice_order;
309 
310  if (exponent)
311  put_bits(pb, exponent, 0);
312  put_bits(pb, 1, 1);
313  if (rice_order)
314  put_sbits(pb, rice_order, val);
315  }
316 }
317 
318 #define GET_SIGN(x) ((x) >> 31)
319 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
320 
321 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
322  int blocks_per_slice, int scale)
323 {
324  int i;
325  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
326 
327  prev_dc = (blocks[0] - 0x4000) / scale;
329  sign = 0;
330  codebook = 3;
331  blocks += 64;
332 
333  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
334  dc = (blocks[0] - 0x4000) / scale;
335  delta = dc - prev_dc;
336  new_sign = GET_SIGN(delta);
337  delta = (delta ^ sign) - sign;
338  code = MAKE_CODE(delta);
339  encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
340  codebook = (code + (code & 1)) >> 1;
341  codebook = FFMIN(codebook, 3);
342  sign = new_sign;
343  prev_dc = dc;
344  }
345 }
346 
347 static void encode_acs(PutBitContext *pb, int16_t *blocks,
348  int blocks_per_slice,
349  int plane_size_factor,
350  const uint8_t *scan, const int16_t *qmat)
351 {
352  int idx, i;
353  int run, level, run_cb, lev_cb;
354  int max_coeffs, abs_level;
355 
356  max_coeffs = blocks_per_slice << 6;
357  run_cb = ff_prores_run_to_cb_index[4];
358  lev_cb = ff_prores_lev_to_cb_index[2];
359  run = 0;
360 
361  for (i = 1; i < 64; i++) {
362  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
363  level = blocks[idx] / qmat[scan[i]];
364  if (level) {
365  abs_level = FFABS(level);
366  encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
368  abs_level - 1);
369  put_sbits(pb, 1, GET_SIGN(level));
370 
371  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
372  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
373  run = 0;
374  } else {
375  run++;
376  }
377  }
378  }
379 }
380 
382  const uint16_t *src, int linesize,
383  int mbs_per_slice, int16_t *blocks,
384  int blocks_per_mb, int plane_size_factor,
385  const int16_t *qmat)
386 {
387  int blocks_per_slice, saved_pos;
388 
389  saved_pos = put_bits_count(pb);
390  blocks_per_slice = mbs_per_slice * blocks_per_mb;
391 
392  encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
393  encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
394  ctx->scantable.permutated, qmat);
395  flush_put_bits(pb);
396 
397  return (put_bits_count(pb) - saved_pos) >> 3;
398 }
399 
400 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
401  PutBitContext *pb,
402  int sizes[4], int x, int y, int quant,
403  int mbs_per_slice)
404 {
405  ProresContext *ctx = avctx->priv_data;
406  int i, xp, yp;
407  int total_size = 0;
408  const uint16_t *src;
409  int slice_width_factor = av_log2(mbs_per_slice);
410  int num_cblocks, pwidth, linesize, line_add;
411  int plane_factor, is_chroma;
412  uint16_t *qmat;
413 
414  if (ctx->pictures_per_frame == 1)
415  line_add = 0;
416  else
417  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
418 
419  if (ctx->force_quant) {
420  qmat = ctx->quants[0];
421  } else if (quant < MAX_STORED_Q) {
422  qmat = ctx->quants[quant];
423  } else {
424  qmat = ctx->custom_q;
425  for (i = 0; i < 64; i++)
426  qmat[i] = ctx->quant_mat[i] * quant;
427  }
428 
429  for (i = 0; i < ctx->num_planes; i++) {
430  is_chroma = (i == 1 || i == 2);
431  plane_factor = slice_width_factor + 2;
432  if (is_chroma)
433  plane_factor += ctx->chroma_factor - 3;
434  if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
435  xp = x << 4;
436  yp = y << 4;
437  num_cblocks = 4;
438  pwidth = avctx->width;
439  } else {
440  xp = x << 3;
441  yp = y << 4;
442  num_cblocks = 2;
443  pwidth = avctx->width >> 1;
444  }
445 
446  linesize = pic->linesize[i] * ctx->pictures_per_frame;
447  src = (const uint16_t*)(pic->data[i] + yp * linesize +
448  line_add * pic->linesize[i]) + xp;
449 
450  get_slice_data(ctx, src, linesize, xp, yp,
451  pwidth, avctx->height / ctx->pictures_per_frame,
452  ctx->blocks[0], ctx->emu_buf,
453  mbs_per_slice, num_cblocks, is_chroma);
454  sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
455  mbs_per_slice, ctx->blocks[0],
456  num_cblocks, plane_factor,
457  qmat);
458  total_size += sizes[i];
459  if (put_bits_left(pb) < 0) {
460  av_log(avctx, AV_LOG_ERROR, "Serious underevaluation of"
461  "required buffer size");
463  }
464  }
465  return total_size;
466 }
467 
468 static inline int estimate_vlc(unsigned codebook, int val)
469 {
470  unsigned int rice_order, exp_order, switch_bits, switch_val;
471  int exponent;
472 
473  /* number of prefix bits to switch between Rice and expGolomb */
474  switch_bits = (codebook & 3) + 1;
475  rice_order = codebook >> 5; /* rice code order */
476  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
477 
478  switch_val = switch_bits << rice_order;
479 
480  if (val >= switch_val) {
481  val -= switch_val - (1 << exp_order);
482  exponent = av_log2(val);
483 
484  return exponent * 2 - exp_order + switch_bits + 1;
485  } else {
486  return (val >> rice_order) + rice_order + 1;
487  }
488 }
489 
490 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
491  int scale)
492 {
493  int i;
494  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
495  int bits;
496 
497  prev_dc = (blocks[0] - 0x4000) / scale;
498  bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
499  sign = 0;
500  codebook = 3;
501  blocks += 64;
502  *error += FFABS(blocks[0] - 0x4000) % scale;
503 
504  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
505  dc = (blocks[0] - 0x4000) / scale;
506  *error += FFABS(blocks[0] - 0x4000) % scale;
507  delta = dc - prev_dc;
508  new_sign = GET_SIGN(delta);
509  delta = (delta ^ sign) - sign;
510  code = MAKE_CODE(delta);
511  bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
512  codebook = (code + (code & 1)) >> 1;
513  codebook = FFMIN(codebook, 3);
514  sign = new_sign;
515  prev_dc = dc;
516  }
517 
518  return bits;
519 }
520 
521 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
522  int plane_size_factor,
523  const uint8_t *scan, const int16_t *qmat)
524 {
525  int idx, i;
526  int run, level, run_cb, lev_cb;
527  int max_coeffs, abs_level;
528  int bits = 0;
529 
530  max_coeffs = blocks_per_slice << 6;
531  run_cb = ff_prores_run_to_cb_index[4];
532  lev_cb = ff_prores_lev_to_cb_index[2];
533  run = 0;
534 
535  for (i = 1; i < 64; i++) {
536  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
537  level = blocks[idx] / qmat[scan[i]];
538  *error += FFABS(blocks[idx]) % qmat[scan[i]];
539  if (level) {
540  abs_level = FFABS(level);
541  bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
542  bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
543  abs_level - 1) + 1;
544 
545  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
546  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
547  run = 0;
548  } else {
549  run++;
550  }
551  }
552  }
553 
554  return bits;
555 }
556 
557 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
558  const uint16_t *src, int linesize,
559  int mbs_per_slice,
560  int blocks_per_mb, int plane_size_factor,
561  const int16_t *qmat, ProresThreadData *td)
562 {
563  int blocks_per_slice;
564  int bits;
565 
566  blocks_per_slice = mbs_per_slice * blocks_per_mb;
567 
568  bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
569  bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
570  plane_size_factor, ctx->scantable.permutated, qmat);
571 
572  return FFALIGN(bits, 8);
573 }
574 
575 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
576  int trellis_node, int x, int y, int mbs_per_slice,
577  ProresThreadData *td)
578 {
579  ProresContext *ctx = avctx->priv_data;
580  int i, q, pq, xp, yp;
581  const uint16_t *src;
582  int slice_width_factor = av_log2(mbs_per_slice);
583  int num_cblocks[MAX_PLANES], pwidth;
584  int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
585  const int min_quant = ctx->profile_info->min_quant;
586  const int max_quant = ctx->profile_info->max_quant;
587  int error, bits, bits_limit;
588  int mbs, prev, cur, new_score;
589  int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
590  int overquant;
591  uint16_t *qmat;
592  int linesize[4], line_add;
593 
594  if (ctx->pictures_per_frame == 1)
595  line_add = 0;
596  else
597  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
598  mbs = x + mbs_per_slice;
599 
600  for (i = 0; i < ctx->num_planes; i++) {
601  is_chroma[i] = (i == 1 || i == 2);
602  plane_factor[i] = slice_width_factor + 2;
603  if (is_chroma[i])
604  plane_factor[i] += ctx->chroma_factor - 3;
605  if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
606  xp = x << 4;
607  yp = y << 4;
608  num_cblocks[i] = 4;
609  pwidth = avctx->width;
610  } else {
611  xp = x << 3;
612  yp = y << 4;
613  num_cblocks[i] = 2;
614  pwidth = avctx->width >> 1;
615  }
616 
617  linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
618  src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
619  line_add * pic->linesize[i]) + xp;
620 
621  get_slice_data(ctx, src, linesize[i], xp, yp,
622  pwidth, avctx->height / ctx->pictures_per_frame,
623  td->blocks[i], td->emu_buf,
624  mbs_per_slice, num_cblocks[i], is_chroma[i]);
625  }
626 
627  for (q = min_quant; q < max_quant + 2; q++) {
628  td->nodes[trellis_node + q].prev_node = -1;
629  td->nodes[trellis_node + q].quant = q;
630  }
631 
632  // todo: maybe perform coarser quantising to fit into frame size when needed
633  for (q = min_quant; q <= max_quant; q++) {
634  bits = 0;
635  error = 0;
636  for (i = 0; i < ctx->num_planes; i++) {
637  bits += estimate_slice_plane(ctx, &error, i,
638  src, linesize[i],
639  mbs_per_slice,
640  num_cblocks[i], plane_factor[i],
641  ctx->quants[q], td);
642  }
643  if (bits > 65000 * 8) {
644  error = SCORE_LIMIT;
645  break;
646  }
647  slice_bits[q] = bits;
648  slice_score[q] = error;
649  }
650  if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
651  slice_bits[max_quant + 1] = slice_bits[max_quant];
652  slice_score[max_quant + 1] = slice_score[max_quant] + 1;
653  overquant = max_quant;
654  } else {
655  for (q = max_quant + 1; q < 128; q++) {
656  bits = 0;
657  error = 0;
658  if (q < MAX_STORED_Q) {
659  qmat = ctx->quants[q];
660  } else {
661  qmat = td->custom_q;
662  for (i = 0; i < 64; i++)
663  qmat[i] = ctx->quant_mat[i] * q;
664  }
665  for (i = 0; i < ctx->num_planes; i++) {
666  bits += estimate_slice_plane(ctx, &error, i,
667  src, linesize[i],
668  mbs_per_slice,
669  num_cblocks[i], plane_factor[i],
670  qmat, td);
671  }
672  if (bits <= ctx->bits_per_mb * mbs_per_slice)
673  break;
674  }
675 
676  slice_bits[max_quant + 1] = bits;
677  slice_score[max_quant + 1] = error;
678  overquant = q;
679  }
680  td->nodes[trellis_node + max_quant + 1].quant = overquant;
681 
682  bits_limit = mbs * ctx->bits_per_mb;
683  for (pq = min_quant; pq < max_quant + 2; pq++) {
684  prev = trellis_node - TRELLIS_WIDTH + pq;
685 
686  for (q = min_quant; q < max_quant + 2; q++) {
687  cur = trellis_node + q;
688 
689  bits = td->nodes[prev].bits + slice_bits[q];
690  error = slice_score[q];
691  if (bits > bits_limit)
692  error = SCORE_LIMIT;
693 
694  if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
695  new_score = td->nodes[prev].score + error;
696  else
697  new_score = SCORE_LIMIT;
698  if (td->nodes[cur].prev_node == -1 ||
699  td->nodes[cur].score >= new_score) {
700 
701  td->nodes[cur].bits = bits;
702  td->nodes[cur].score = new_score;
703  td->nodes[cur].prev_node = prev;
704  }
705  }
706  }
707 
708  error = td->nodes[trellis_node + min_quant].score;
709  pq = trellis_node + min_quant;
710  for (q = min_quant + 1; q < max_quant + 2; q++) {
711  if (td->nodes[trellis_node + q].score <= error) {
712  error = td->nodes[trellis_node + q].score;
713  pq = trellis_node + q;
714  }
715  }
716 
717  return pq;
718 }
719 
720 static int find_quant_thread(AVCodecContext *avctx, void *arg,
721  int jobnr, int threadnr)
722 {
723  ProresContext *ctx = avctx->priv_data;
724  ProresThreadData *td = ctx->tdata + threadnr;
725  int mbs_per_slice = ctx->mbs_per_slice;
726  int x, y = jobnr, mb, q = 0;
727 
728  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
729  while (ctx->mb_width - x < mbs_per_slice)
730  mbs_per_slice >>= 1;
731  q = find_slice_quant(avctx, avctx->coded_frame,
732  (mb + 1) * TRELLIS_WIDTH, x, y,
733  mbs_per_slice, td);
734  }
735 
736  for (x = ctx->slices_width - 1; x >= 0; x--) {
737  ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
738  q = td->nodes[q].prev_node;
739  }
740 
741  return 0;
742 }
743 
745  const AVFrame *pic, int *got_packet)
746 {
747  ProresContext *ctx = avctx->priv_data;
748  uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
749  uint8_t *picture_size_pos;
750  PutBitContext pb;
751  int x, y, i, mb, q = 0;
752  int sizes[4] = { 0 };
753  int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
754  int frame_size, picture_size, slice_size;
755  int pkt_size, ret;
756  uint8_t frame_flags;
757 
758  *avctx->coded_frame = *pic;
760  avctx->coded_frame->key_frame = 1;
761 
762  pkt_size = ctx->frame_size_upper_bound;
763 
764  if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
765  return ret;
766 
767  orig_buf = pkt->data;
768 
769  // frame atom
770  orig_buf += 4; // frame size
771  bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
772  buf = orig_buf;
773 
774  // frame header
775  tmp = buf;
776  buf += 2; // frame header size will be stored here
777  bytestream_put_be16 (&buf, 0); // version 1
778  bytestream_put_buffer(&buf, ctx->vendor, 4);
779  bytestream_put_be16 (&buf, avctx->width);
780  bytestream_put_be16 (&buf, avctx->height);
781 
782  frame_flags = ctx->chroma_factor << 6;
783  if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
784  frame_flags |= pic->top_field_first ? 0x04 : 0x08;
785  bytestream_put_byte (&buf, frame_flags);
786 
787  bytestream_put_byte (&buf, 0); // reserved
788  bytestream_put_byte (&buf, avctx->color_primaries);
789  bytestream_put_byte (&buf, avctx->color_trc);
790  bytestream_put_byte (&buf, avctx->colorspace);
791  bytestream_put_byte (&buf, 0x40); // source format and alpha information
792  bytestream_put_byte (&buf, 0); // reserved
793  if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
794  bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
795  // luma quantisation matrix
796  for (i = 0; i < 64; i++)
797  bytestream_put_byte(&buf, ctx->quant_mat[i]);
798  // chroma quantisation matrix
799  for (i = 0; i < 64; i++)
800  bytestream_put_byte(&buf, ctx->quant_mat[i]);
801  } else {
802  bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
803  }
804  bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
805 
806  for (ctx->cur_picture_idx = 0;
808  ctx->cur_picture_idx++) {
809  // picture header
810  picture_size_pos = buf + 1;
811  bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
812  buf += 4; // picture data size will be stored here
813  bytestream_put_be16 (&buf, ctx->slices_per_picture);
814  bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
815 
816  // seek table - will be filled during slice encoding
817  slice_sizes = buf;
818  buf += ctx->slices_per_picture * 2;
819 
820  // slices
821  if (!ctx->force_quant) {
822  ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
823  ctx->mb_height);
824  if (ret)
825  return ret;
826  }
827 
828  for (y = 0; y < ctx->mb_height; y++) {
829  int mbs_per_slice = ctx->mbs_per_slice;
830  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
831  q = ctx->force_quant ? ctx->force_quant
832  : ctx->slice_q[mb + y * ctx->slices_width];
833 
834  while (ctx->mb_width - x < mbs_per_slice)
835  mbs_per_slice >>= 1;
836 
837  bytestream_put_byte(&buf, slice_hdr_size << 3);
838  slice_hdr = buf;
839  buf += slice_hdr_size - 1;
840  init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
841  ret = encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
842  if (ret < 0)
843  return ret;
844 
845  bytestream_put_byte(&slice_hdr, q);
846  slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
847  for (i = 0; i < ctx->num_planes - 1; i++) {
848  bytestream_put_be16(&slice_hdr, sizes[i]);
849  slice_size += sizes[i];
850  }
851  bytestream_put_be16(&slice_sizes, slice_size);
852  buf += slice_size - slice_hdr_size;
853  }
854  }
855 
856  picture_size = buf - (picture_size_pos - 1);
857  bytestream_put_be32(&picture_size_pos, picture_size);
858  }
859 
860  orig_buf -= 8;
861  frame_size = buf - orig_buf;
862  bytestream_put_be32(&orig_buf, frame_size);
863 
864  pkt->size = frame_size;
865  pkt->flags |= AV_PKT_FLAG_KEY;
866  *got_packet = 1;
867 
868  return 0;
869 }
870 
872 {
873  ProresContext *ctx = avctx->priv_data;
874  int i;
875 
876  av_freep(&avctx->coded_frame);
877 
878  if (ctx->tdata) {
879  for (i = 0; i < avctx->thread_count; i++)
880  av_free(ctx->tdata[i].nodes);
881  }
882  av_freep(&ctx->tdata);
883  av_freep(&ctx->slice_q);
884 
885  return 0;
886 }
887 
889 {
890  ProresContext *ctx = avctx->priv_data;
891  int mps;
892  int i, j;
893  int min_quant, max_quant;
894  int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
895 
896  avctx->bits_per_raw_sample = 10;
897  avctx->coded_frame = avcodec_alloc_frame();
898  if (!avctx->coded_frame)
899  return AVERROR(ENOMEM);
900 
901  ff_proresdsp_init(&ctx->dsp, avctx);
902  ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
903  interlaced ? ff_prores_interlaced_scan
905 
906  mps = ctx->mbs_per_slice;
907  if (mps & (mps - 1)) {
908  av_log(avctx, AV_LOG_ERROR,
909  "there should be an integer power of two MBs per slice\n");
910  return AVERROR(EINVAL);
911  }
912 
914  ? CFACTOR_Y422
915  : CFACTOR_Y444;
917  ctx->num_planes = 3;
918 
919  ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
920 
921  if (interlaced)
922  ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
923  else
924  ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
925 
926  ctx->slices_width = ctx->mb_width / mps;
927  ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
928  ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
929  ctx->pictures_per_frame = 1 + interlaced;
930 
931  if (ctx->quant_sel == -1)
933  else
935 
936  if (strlen(ctx->vendor) != 4) {
937  av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
938  return AVERROR_INVALIDDATA;
939  }
940 
941  ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
942  if (!ctx->force_quant) {
943  if (!ctx->bits_per_mb) {
944  for (i = 0; i < NUM_MB_LIMITS - 1; i++)
945  if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
946  ctx->pictures_per_frame)
947  break;
948  ctx->bits_per_mb = ctx->profile_info->br_tab[i];
949  } else if (ctx->bits_per_mb < 128) {
950  av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
951  return AVERROR_INVALIDDATA;
952  }
953 
954  min_quant = ctx->profile_info->min_quant;
955  max_quant = ctx->profile_info->max_quant;
956  for (i = min_quant; i < MAX_STORED_Q; i++) {
957  for (j = 0; j < 64; j++)
958  ctx->quants[i][j] = ctx->quant_mat[j] * i;
959  }
960 
961  ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
962  if (!ctx->slice_q) {
963  encode_close(avctx);
964  return AVERROR(ENOMEM);
965  }
966 
967  ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
968  if (!ctx->tdata) {
969  encode_close(avctx);
970  return AVERROR(ENOMEM);
971  }
972 
973  for (j = 0; j < avctx->thread_count; j++) {
974  ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
975  * TRELLIS_WIDTH
976  * sizeof(*ctx->tdata->nodes));
977  if (!ctx->tdata[j].nodes) {
978  encode_close(avctx);
979  return AVERROR(ENOMEM);
980  }
981  for (i = min_quant; i < max_quant + 2; i++) {
982  ctx->tdata[j].nodes[i].prev_node = -1;
983  ctx->tdata[j].nodes[i].bits = 0;
984  ctx->tdata[j].nodes[i].score = 0;
985  }
986  }
987  } else {
988  int ls = 0;
989 
990  if (ctx->force_quant > 64) {
991  av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
992  return AVERROR_INVALIDDATA;
993  }
994 
995  for (j = 0; j < 64; j++) {
996  ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
997  ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
998  }
999 
1000  ctx->bits_per_mb = ls * 8;
1001  if (ctx->chroma_factor == CFACTOR_Y444)
1002  ctx->bits_per_mb += ls * 4;
1003  if (ctx->num_planes == 4)
1004  ctx->bits_per_mb += ls * 4;
1005  }
1006 
1008  ctx->slices_per_picture *
1009  (2 + 2 * ctx->num_planes +
1010  (mps * ctx->bits_per_mb) / 8)
1011  + 200;
1012 
1013  avctx->codec_tag = ctx->profile_info->tag;
1014 
1015  av_log(avctx, AV_LOG_DEBUG,
1016  "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1017  ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1018  interlaced ? "yes" : "no", ctx->bits_per_mb);
1019  av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1020  ctx->frame_size_upper_bound);
1021 
1022  return 0;
1023 }
1024 
1025 #define OFFSET(x) offsetof(ProresContext, x)
1026 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1027 
1028 static const AVOption options[] = {
1029  { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1030  AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1031  { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1032  { .i64 = PRORES_PROFILE_STANDARD },
1033  PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1034  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1035  0, 0, VE, "profile" },
1036  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1037  0, 0, VE, "profile" },
1038  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1039  0, 0, VE, "profile" },
1040  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1041  0, 0, VE, "profile" },
1042  { "vendor", "vendor ID", OFFSET(vendor),
1043  AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1044  { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1045  AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1046  { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1047  { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1048  { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1049  0, 0, VE, "quant_mat" },
1050  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1051  0, 0, VE, "quant_mat" },
1052  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1053  0, 0, VE, "quant_mat" },
1054  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1055  0, 0, VE, "quant_mat" },
1056  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1057  0, 0, VE, "quant_mat" },
1058  { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1059  0, 0, VE, "quant_mat" },
1060  { NULL }
1061 };
1062 
1063 static const AVClass proresenc_class = {
1064  .class_name = "ProRes encoder",
1065  .item_name = av_default_item_name,
1066  .option = options,
1067  .version = LIBAVUTIL_VERSION_INT,
1068 };
1069 
1071  .name = "prores_kostya",
1072  .type = AVMEDIA_TYPE_VIDEO,
1073  .id = AV_CODEC_ID_PRORES,
1074  .priv_data_size = sizeof(ProresContext),
1075  .init = encode_init,
1076  .close = encode_close,
1077  .encode2 = encode_frame,
1078  .capabilities = CODEC_CAP_SLICE_THREADS,
1079  .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1080  .pix_fmts = (const enum AVPixelFormat[]) {
1082  },
1083  .priv_class = &proresenc_class,
1084 };