FFmpeg  2.8.15
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "thread.h"
28 #include "videodsp.h"
29 #include "vp56.h"
30 #include "vp9.h"
31 #include "vp9data.h"
32 #include "vp9dsp.h"
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
35 
36 #define VP9_SYNCCODE 0x498342
37 
42 };
43 
44 enum BlockLevel {
49 };
50 
51 enum BlockSize {
66 };
67 
68 struct VP9mvrefPair {
69  VP56mv mv[2];
70  int8_t ref[2];
71 };
72 
73 typedef struct VP9Frame {
77  struct VP9mvrefPair *mv;
79 } VP9Frame;
80 
81 struct VP9Filter {
82  uint8_t level[8 * 8];
83  uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84  [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 };
86 
87 typedef struct VP9Block {
90  VP56mv mv[4 /* b_idx */][2 /* ref */];
91  enum BlockSize bs;
92  enum TxfmMode tx, uvtx;
93  enum BlockLevel bl;
95 } VP9Block;
96 
97 typedef struct VP9Context {
103  unsigned c_b_size;
105  int pass;
106  int row, row7, col, col7;
108  ptrdiff_t y_stride, uv_stride;
109 
110  // bitstream header
131 #define CUR_FRAME 0
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
135 
136  struct {
138  int8_t sharpness;
141  } filter;
142  struct {
144  int8_t mode[2];
145  int8_t ref[4];
146  } lf_delta;
150 #define MAX_SEGMENT 8
151  struct {
157  struct {
163  int16_t q_val;
164  int8_t lf_val;
165  int16_t qmul[2][2];
166  uint8_t lflvl[4][2];
167  } feat[MAX_SEGMENT];
168  } segmentation;
169  struct {
171  unsigned tile_cols, tile_rows;
173  } tiling;
174  unsigned sb_cols, sb_rows, rows, cols;
175  struct {
177  uint8_t coef[4][2][2][6][6][3];
178  } prob_ctx[4];
179  struct {
180  prob_context p;
181  uint8_t coef[4][2][2][6][6][11];
184  } prob;
185  struct {
186  unsigned y_mode[4][10];
187  unsigned uv_mode[10][10];
188  unsigned filter[4][3];
189  unsigned mv_mode[7][4];
190  unsigned intra[4][2];
191  unsigned comp[5][2];
192  unsigned single_ref[5][2][2];
193  unsigned comp_ref[5][2];
194  unsigned tx32p[2][4];
195  unsigned tx16p[2][3];
196  unsigned tx8p[2][2];
197  unsigned skip[3][2];
198  unsigned mv_joint[4];
199  struct {
200  unsigned sign[2];
201  unsigned classes[11];
202  unsigned class0[2];
203  unsigned bits[10][2];
204  unsigned class0_fp[2][4];
205  unsigned fp[4];
206  unsigned class0_hp[2];
207  unsigned hp[2];
208  } mv_comp[2];
209  unsigned partition[4][4][4];
210  unsigned coef[4][2][2][6][6][3];
211  unsigned eob[4][2][2][6][6][2];
212  } counts;
215 
216  // contextual (left/above) cache
231  // FIXME maybe merge some of the below in a flags field?
242 
243  // whole-frame cache
245  struct VP9Filter *lflvl;
247 
248  // block reconstruction intermediates
250  int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
252  struct { int x, y; } min_mv, max_mv;
253  DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254  DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255  uint16_t mvscale[3][2];
257 } VP9Context;
258 
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
260  {
261  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
263  }, {
264  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
266  }
267 };
268 
270 {
271  VP9Context *s = ctx->priv_data;
272  int ret, sz;
273 
274  if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
275  return ret;
276  sz = 64 * s->sb_cols * s->sb_rows;
277  if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278  ff_thread_release_buffer(ctx, &f->tf);
279  return AVERROR(ENOMEM);
280  }
281 
283  f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
284 
285  return 0;
286 }
287 
289 {
290  ff_thread_release_buffer(ctx, &f->tf);
292  f->segmentation_map = NULL;
293 }
294 
296 {
297  int res;
298 
299  if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300  return res;
301  } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
302  vp9_unref_frame(ctx, dst);
303  return AVERROR(ENOMEM);
304  }
305 
307  dst->mv = src->mv;
308  dst->uses_2pass = src->uses_2pass;
309 
310  return 0;
311 }
312 
313 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 {
315  VP9Context *s = ctx->priv_data;
316  uint8_t *p;
317  int bytesperpixel = s->bytesperpixel;
318 
319  av_assert0(w > 0 && h > 0);
320 
321  if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
322  return 0;
323 
324  ctx->width = w;
325  ctx->height = h;
326  ctx->pix_fmt = fmt;
327  s->sb_cols = (w + 63) >> 6;
328  s->sb_rows = (h + 63) >> 6;
329  s->cols = (w + 7) >> 3;
330  s->rows = (h + 7) >> 3;
331 
332 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
333  av_freep(&s->intra_pred_data[0]);
334  // FIXME we slightly over-allocate here for subsampled chroma, but a little
335  // bit of padding shouldn't affect performance...
336  p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
337  sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338  if (!p)
339  return AVERROR(ENOMEM);
340  assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
341  assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
342  assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
343  assign(s->above_y_nnz_ctx, uint8_t *, 16);
344  assign(s->above_mode_ctx, uint8_t *, 16);
345  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
346  assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
347  assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
349  assign(s->above_skip_ctx, uint8_t *, 8);
350  assign(s->above_txfm_ctx, uint8_t *, 8);
351  assign(s->above_segpred_ctx, uint8_t *, 8);
352  assign(s->above_intra_ctx, uint8_t *, 8);
353  assign(s->above_comp_ctx, uint8_t *, 8);
354  assign(s->above_ref_ctx, uint8_t *, 8);
355  assign(s->above_filter_ctx, uint8_t *, 8);
356  assign(s->lflvl, struct VP9Filter *, 1);
357 #undef assign
358 
359  // these will be re-allocated a little later
360  av_freep(&s->b_base);
361  av_freep(&s->block_base);
362 
363  if (s->bpp != s->last_bpp) {
364  ff_vp9dsp_init(&s->dsp, s->bpp);
365  ff_videodsp_init(&s->vdsp, s->bpp);
366  s->last_bpp = s->bpp;
367  }
368 
369  return 0;
370 }
371 
373 {
374  VP9Context *s = ctx->priv_data;
375  int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 
378  return 0;
379 
380  av_free(s->b_base);
381  av_free(s->block_base);
382  chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
383  chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
384  if (s->frames[CUR_FRAME].uses_2pass) {
385  int sbs = s->sb_cols * s->sb_rows;
386 
387  s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
388  s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
389  16 * 16 + 2 * chroma_eobs) * sbs);
390  if (!s->b_base || !s->block_base)
391  return AVERROR(ENOMEM);
392  s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
393  s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
394  s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
395  s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
396  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397  } else {
398  s->b_base = av_malloc(sizeof(VP9Block));
399  s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
400  16 * 16 + 2 * chroma_eobs);
401  if (!s->b_base || !s->block_base)
402  return AVERROR(ENOMEM);
403  s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
404  s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
405  s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
406  s->uveob_base[0] = s->eob_base + 16 * 16;
407  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408  }
410 
411  return 0;
412 }
413 
414 // for some reason the sign bit is at the end, not the start, of a bit sequence
416 {
417  int v = get_bits(gb, n);
418  return get_bits1(gb) ? -v : v;
419 }
420 
422 {
423  return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
424 }
425 
426 // differential forward probability updates
427 static int update_prob(VP56RangeCoder *c, int p)
428 {
429  static const int inv_map_table[255] = {
430  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
431  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
432  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
433  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
434  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
435  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
436  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
437  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
438  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
439  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
440  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
441  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
442  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
443  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
444  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
445  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
446  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
447  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
448  252, 253, 253,
449  };
450  int d;
451 
452  /* This code is trying to do a differential probability update. For a
453  * current probability A in the range [1, 255], the difference to a new
454  * probability of any value can be expressed differentially as 1-A,255-A
455  * where some part of this (absolute range) exists both in positive as
456  * well as the negative part, whereas another part only exists in one
457  * half. We're trying to code this shared part differentially, i.e.
458  * times two where the value of the lowest bit specifies the sign, and
459  * the single part is then coded on top of this. This absolute difference
460  * then again has a value of [0,254], but a bigger value in this range
461  * indicates that we're further away from the original value A, so we
462  * can code this as a VLC code, since higher values are increasingly
463  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
464  * updates vs. the 'fine, exact' updates further down the range, which
465  * adds one extra dimension to this differential update model. */
466 
467  if (!vp8_rac_get(c)) {
468  d = vp8_rac_get_uint(c, 4) + 0;
469  } else if (!vp8_rac_get(c)) {
470  d = vp8_rac_get_uint(c, 4) + 16;
471  } else if (!vp8_rac_get(c)) {
472  d = vp8_rac_get_uint(c, 5) + 32;
473  } else {
474  d = vp8_rac_get_uint(c, 7);
475  if (d >= 65)
476  d = (d << 1) - 65 + vp8_rac_get(c);
477  d += 64;
478  av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
479  }
480 
481  return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
482  255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
483 }
484 
486 {
487  static const enum AVColorSpace colorspaces[8] = {
490  };
491  VP9Context *s = ctx->priv_data;
492  enum AVPixelFormat res;
493  int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
494 
495  s->bpp_index = bits;
496  s->bpp = 8 + bits * 2;
497  s->bytesperpixel = (7 + s->bpp) >> 3;
498  ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
499  if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
500  static const enum AVPixelFormat pix_fmt_rgb[3] = {
502  };
503  if (ctx->profile & 1) {
504  s->ss_h = s->ss_v = 0;
505  res = pix_fmt_rgb[bits];
507  if (get_bits1(&s->gb)) {
508  av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
509  return AVERROR_INVALIDDATA;
510  }
511  } else {
512  av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
513  ctx->profile);
514  return AVERROR_INVALIDDATA;
515  }
516  } else {
517  static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
524  };
526  if (ctx->profile & 1) {
527  s->ss_h = get_bits1(&s->gb);
528  s->ss_v = get_bits1(&s->gb);
529  if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
530  av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
531  ctx->profile);
532  return AVERROR_INVALIDDATA;
533  } else if (get_bits1(&s->gb)) {
534  av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
535  ctx->profile);
536  return AVERROR_INVALIDDATA;
537  }
538  } else {
539  s->ss_h = s->ss_v = 1;
540  res = pix_fmt_for_ss[bits][1][1];
541  }
542  }
543 
544  return res;
545 }
546 
548  const uint8_t *data, int size, int *ref)
549 {
550  VP9Context *s = ctx->priv_data;
551  int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
552  enum AVPixelFormat fmt = ctx->pix_fmt;
553  int last_invisible;
554  const uint8_t *data2;
555 
556  /* general header */
557  if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
558  av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
559  return res;
560  }
561  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
562  av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
563  return AVERROR_INVALIDDATA;
564  }
565  ctx->profile = get_bits1(&s->gb);
566  ctx->profile |= get_bits1(&s->gb) << 1;
567  if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
568  if (ctx->profile > 3) {
569  av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
570  return AVERROR_INVALIDDATA;
571  }
572  if (get_bits1(&s->gb)) {
573  *ref = get_bits(&s->gb, 3);
574  return 0;
575  }
576  s->last_keyframe = s->keyframe;
577  s->keyframe = !get_bits1(&s->gb);
578  last_invisible = s->invisible;
579  s->invisible = !get_bits1(&s->gb);
580  s->errorres = get_bits1(&s->gb);
581  s->use_last_frame_mvs = !s->errorres && !last_invisible;
582  if (s->keyframe) {
583  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
584  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
585  return AVERROR_INVALIDDATA;
586  }
587  if ((fmt = read_colorspace_details(ctx)) < 0)
588  return fmt;
589  // for profile 1, here follows the subsampling bits
590  s->refreshrefmask = 0xff;
591  w = get_bits(&s->gb, 16) + 1;
592  h = get_bits(&s->gb, 16) + 1;
593  if (get_bits1(&s->gb)) // display size
594  skip_bits(&s->gb, 32);
595  } else {
596  s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
597  s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
598  if (s->intraonly) {
599  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
600  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
601  return AVERROR_INVALIDDATA;
602  }
603  if (ctx->profile >= 1) {
604  if ((fmt = read_colorspace_details(ctx)) < 0)
605  return fmt;
606  } else {
607  s->ss_h = s->ss_v = 1;
608  s->bpp = 8;
609  s->bpp_index = 0;
610  s->bytesperpixel = 1;
611  fmt = AV_PIX_FMT_YUV420P;
614  }
615  s->refreshrefmask = get_bits(&s->gb, 8);
616  w = get_bits(&s->gb, 16) + 1;
617  h = get_bits(&s->gb, 16) + 1;
618  if (get_bits1(&s->gb)) // display size
619  skip_bits(&s->gb, 32);
620  } else {
621  s->refreshrefmask = get_bits(&s->gb, 8);
622  s->refidx[0] = get_bits(&s->gb, 3);
623  s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
624  s->refidx[1] = get_bits(&s->gb, 3);
625  s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
626  s->refidx[2] = get_bits(&s->gb, 3);
627  s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
628  if (!s->refs[s->refidx[0]].f->data[0] ||
629  !s->refs[s->refidx[1]].f->data[0] ||
630  !s->refs[s->refidx[2]].f->data[0]) {
631  av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
632  return AVERROR_INVALIDDATA;
633  }
634  if (get_bits1(&s->gb)) {
635  w = s->refs[s->refidx[0]].f->width;
636  h = s->refs[s->refidx[0]].f->height;
637  } else if (get_bits1(&s->gb)) {
638  w = s->refs[s->refidx[1]].f->width;
639  h = s->refs[s->refidx[1]].f->height;
640  } else if (get_bits1(&s->gb)) {
641  w = s->refs[s->refidx[2]].f->width;
642  h = s->refs[s->refidx[2]].f->height;
643  } else {
644  w = get_bits(&s->gb, 16) + 1;
645  h = get_bits(&s->gb, 16) + 1;
646  }
647  // Note that in this code, "CUR_FRAME" is actually before we
648  // have formally allocated a frame, and thus actually represents
649  // the _last_ frame
650  s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
651  s->frames[CUR_FRAME].tf.f->height == h;
652  if (get_bits1(&s->gb)) // display size
653  skip_bits(&s->gb, 32);
654  s->highprecisionmvs = get_bits1(&s->gb);
656  get_bits(&s->gb, 2);
657  s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
658  s->signbias[0] != s->signbias[2]);
659  if (s->allowcompinter) {
660  if (s->signbias[0] == s->signbias[1]) {
661  s->fixcompref = 2;
662  s->varcompref[0] = 0;
663  s->varcompref[1] = 1;
664  } else if (s->signbias[0] == s->signbias[2]) {
665  s->fixcompref = 1;
666  s->varcompref[0] = 0;
667  s->varcompref[1] = 2;
668  } else {
669  s->fixcompref = 0;
670  s->varcompref[0] = 1;
671  s->varcompref[1] = 2;
672  }
673  }
674 
675  for (i = 0; i < 3; i++) {
676  AVFrame *ref = s->refs[s->refidx[i]].f;
677  int refw = ref->width, refh = ref->height;
678 
679  if (ref->format != fmt) {
680  av_log(ctx, AV_LOG_ERROR,
681  "Ref pixfmt (%s) did not match current frame (%s)",
683  av_get_pix_fmt_name(fmt));
684  return AVERROR_INVALIDDATA;
685  } else if (refw == w && refh == h) {
686  s->mvscale[i][0] = s->mvscale[i][1] = 0;
687  } else {
688  if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
689  av_log(ctx, AV_LOG_ERROR,
690  "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
691  refw, refh, w, h);
692  return AVERROR_INVALIDDATA;
693  }
694  s->mvscale[i][0] = (refw << 14) / w;
695  s->mvscale[i][1] = (refh << 14) / h;
696  s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
697  s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
698  }
699  }
700  }
701  }
702  s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
703  s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
704  s->framectxid = c = get_bits(&s->gb, 2);
705 
706  /* loopfilter header data */
707  if (s->keyframe || s->errorres || s->intraonly) {
708  // reset loopfilter defaults
709  s->lf_delta.ref[0] = 1;
710  s->lf_delta.ref[1] = 0;
711  s->lf_delta.ref[2] = -1;
712  s->lf_delta.ref[3] = -1;
713  s->lf_delta.mode[0] = 0;
714  s->lf_delta.mode[1] = 0;
715  memset(s->segmentation.feat, 0, sizeof(s->segmentation.feat));
716  }
717  s->filter.level = get_bits(&s->gb, 6);
718  sharp = get_bits(&s->gb, 3);
719  // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
720  // the old cache values since they are still valid
721  if (s->filter.sharpness != sharp)
722  memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
723  s->filter.sharpness = sharp;
724  if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
725  if (get_bits1(&s->gb)) {
726  for (i = 0; i < 4; i++)
727  if (get_bits1(&s->gb))
728  s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
729  for (i = 0; i < 2; i++)
730  if (get_bits1(&s->gb))
731  s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
732  }
733  }
734 
735  /* quantization header data */
736  s->yac_qi = get_bits(&s->gb, 8);
737  s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
738  s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
739  s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
740  s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
741  s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
742  if (s->lossless)
744 
745  /* segmentation header info */
747  if ((s->segmentation.enabled = get_bits1(&s->gb))) {
748  if ((s->segmentation.update_map = get_bits1(&s->gb))) {
749  for (i = 0; i < 7; i++)
750  s->prob.seg[i] = get_bits1(&s->gb) ?
751  get_bits(&s->gb, 8) : 255;
752  if ((s->segmentation.temporal = get_bits1(&s->gb))) {
753  for (i = 0; i < 3; i++)
754  s->prob.segpred[i] = get_bits1(&s->gb) ?
755  get_bits(&s->gb, 8) : 255;
756  }
757  }
758  if ((!s->segmentation.update_map || s->segmentation.temporal) &&
759  (w != s->frames[CUR_FRAME].tf.f->width ||
760  h != s->frames[CUR_FRAME].tf.f->height)) {
761  av_log(ctx, AV_LOG_WARNING,
762  "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
765  //return AVERROR_INVALIDDATA;
766  }
767 
768  if (get_bits1(&s->gb)) {
770  for (i = 0; i < 8; i++) {
771  if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
772  s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
773  if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
774  s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
775  if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
776  s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
777  s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
778  }
779  }
780  }
781 
782  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
783  for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
784  int qyac, qydc, quvac, quvdc, lflvl, sh;
785 
786  if (s->segmentation.enabled && s->segmentation.feat[i].q_enabled) {
788  qyac = av_clip_uintp2(s->segmentation.feat[i].q_val, 8);
789  else
790  qyac = av_clip_uintp2(s->yac_qi + s->segmentation.feat[i].q_val, 8);
791  } else {
792  qyac = s->yac_qi;
793  }
794  qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
795  quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
796  quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
797  qyac = av_clip_uintp2(qyac, 8);
798 
799  s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
800  s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
801  s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
802  s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
803 
804  sh = s->filter.level >= 32;
805  if (s->segmentation.enabled && s->segmentation.feat[i].lf_enabled) {
807  lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
808  else
809  lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
810  } else {
811  lflvl = s->filter.level;
812  }
813  if (s->lf_delta.enabled) {
814  s->segmentation.feat[i].lflvl[0][0] =
815  s->segmentation.feat[i].lflvl[0][1] =
816  av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
817  for (j = 1; j < 4; j++) {
818  s->segmentation.feat[i].lflvl[j][0] =
819  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
820  s->lf_delta.mode[0]) * (1 << sh)), 6);
821  s->segmentation.feat[i].lflvl[j][1] =
822  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
823  s->lf_delta.mode[1]) * (1 << sh)), 6);
824  }
825  } else {
826  memset(s->segmentation.feat[i].lflvl, lflvl,
827  sizeof(s->segmentation.feat[i].lflvl));
828  }
829  }
830 
831  /* tiling info */
832  if ((res = update_size(ctx, w, h, fmt)) < 0) {
833  av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
834  return res;
835  }
836  for (s->tiling.log2_tile_cols = 0;
837  (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
838  s->tiling.log2_tile_cols++) ;
839  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
840  max = FFMAX(0, max - 1);
841  while (max > s->tiling.log2_tile_cols) {
842  if (get_bits1(&s->gb))
843  s->tiling.log2_tile_cols++;
844  else
845  break;
846  }
847  s->tiling.log2_tile_rows = decode012(&s->gb);
848  s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
849  if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
850  s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
851  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
852  sizeof(VP56RangeCoder) * s->tiling.tile_cols);
853  if (!s->c_b) {
854  av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
855  return AVERROR(ENOMEM);
856  }
857  }
858 
859  if (s->keyframe || s->errorres || (s->intraonly && s->resetctx == 3)) {
860  s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
861  s->prob_ctx[3].p = vp9_default_probs;
862  memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
863  sizeof(vp9_default_coef_probs));
864  memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
865  sizeof(vp9_default_coef_probs));
866  memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
867  sizeof(vp9_default_coef_probs));
868  memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
869  sizeof(vp9_default_coef_probs));
870  } else if (s->intraonly && s->resetctx == 2) {
872  memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
873  sizeof(vp9_default_coef_probs));
874  }
875 
876  // next 16 bits is size of the rest of the header (arith-coded)
877  size2 = get_bits(&s->gb, 16);
878  data2 = align_get_bits(&s->gb);
879  if (size2 > size - (data2 - data)) {
880  av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
881  return AVERROR_INVALIDDATA;
882  }
883  res = ff_vp56_init_range_decoder(&s->c, data2, size2);
884  if (res < 0)
885  return res;
886 
887  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
888  av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
889  return AVERROR_INVALIDDATA;
890  }
891 
892  if (s->keyframe || s->intraonly) {
893  memset(s->counts.coef, 0, sizeof(s->counts.coef));
894  memset(s->counts.eob, 0, sizeof(s->counts.eob));
895  } else {
896  memset(&s->counts, 0, sizeof(s->counts));
897  }
898  // FIXME is it faster to not copy here, but do it down in the fw updates
899  // as explicit copies if the fw update is missing (and skip the copy upon
900  // fw update)?
901  s->prob.p = s->prob_ctx[c].p;
902 
903  // txfm updates
904  if (s->lossless) {
905  s->txfmmode = TX_4X4;
906  } else {
907  s->txfmmode = vp8_rac_get_uint(&s->c, 2);
908  if (s->txfmmode == 3)
909  s->txfmmode += vp8_rac_get(&s->c);
910 
911  if (s->txfmmode == TX_SWITCHABLE) {
912  for (i = 0; i < 2; i++)
913  if (vp56_rac_get_prob_branchy(&s->c, 252))
914  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
915  for (i = 0; i < 2; i++)
916  for (j = 0; j < 2; j++)
917  if (vp56_rac_get_prob_branchy(&s->c, 252))
918  s->prob.p.tx16p[i][j] =
919  update_prob(&s->c, s->prob.p.tx16p[i][j]);
920  for (i = 0; i < 2; i++)
921  for (j = 0; j < 3; j++)
922  if (vp56_rac_get_prob_branchy(&s->c, 252))
923  s->prob.p.tx32p[i][j] =
924  update_prob(&s->c, s->prob.p.tx32p[i][j]);
925  }
926  }
927 
928  // coef updates
929  for (i = 0; i < 4; i++) {
930  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
931  if (vp8_rac_get(&s->c)) {
932  for (j = 0; j < 2; j++)
933  for (k = 0; k < 2; k++)
934  for (l = 0; l < 6; l++)
935  for (m = 0; m < 6; m++) {
936  uint8_t *p = s->prob.coef[i][j][k][l][m];
937  uint8_t *r = ref[j][k][l][m];
938  if (m >= 3 && l == 0) // dc only has 3 pt
939  break;
940  for (n = 0; n < 3; n++) {
941  if (vp56_rac_get_prob_branchy(&s->c, 252)) {
942  p[n] = update_prob(&s->c, r[n]);
943  } else {
944  p[n] = r[n];
945  }
946  }
947  p[3] = 0;
948  }
949  } else {
950  for (j = 0; j < 2; j++)
951  for (k = 0; k < 2; k++)
952  for (l = 0; l < 6; l++)
953  for (m = 0; m < 6; m++) {
954  uint8_t *p = s->prob.coef[i][j][k][l][m];
955  uint8_t *r = ref[j][k][l][m];
956  if (m > 3 && l == 0) // dc only has 3 pt
957  break;
958  memcpy(p, r, 3);
959  p[3] = 0;
960  }
961  }
962  if (s->txfmmode == i)
963  break;
964  }
965 
966  // mode updates
967  for (i = 0; i < 3; i++)
968  if (vp56_rac_get_prob_branchy(&s->c, 252))
969  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
970  if (!s->keyframe && !s->intraonly) {
971  for (i = 0; i < 7; i++)
972  for (j = 0; j < 3; j++)
973  if (vp56_rac_get_prob_branchy(&s->c, 252))
974  s->prob.p.mv_mode[i][j] =
975  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
976 
977  if (s->filtermode == FILTER_SWITCHABLE)
978  for (i = 0; i < 4; i++)
979  for (j = 0; j < 2; j++)
980  if (vp56_rac_get_prob_branchy(&s->c, 252))
981  s->prob.p.filter[i][j] =
982  update_prob(&s->c, s->prob.p.filter[i][j]);
983 
984  for (i = 0; i < 4; i++)
985  if (vp56_rac_get_prob_branchy(&s->c, 252))
986  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
987 
988  if (s->allowcompinter) {
989  s->comppredmode = vp8_rac_get(&s->c);
990  if (s->comppredmode)
991  s->comppredmode += vp8_rac_get(&s->c);
992  if (s->comppredmode == PRED_SWITCHABLE)
993  for (i = 0; i < 5; i++)
994  if (vp56_rac_get_prob_branchy(&s->c, 252))
995  s->prob.p.comp[i] =
996  update_prob(&s->c, s->prob.p.comp[i]);
997  } else {
999  }
1000 
1001  if (s->comppredmode != PRED_COMPREF) {
1002  for (i = 0; i < 5; i++) {
1003  if (vp56_rac_get_prob_branchy(&s->c, 252))
1004  s->prob.p.single_ref[i][0] =
1005  update_prob(&s->c, s->prob.p.single_ref[i][0]);
1006  if (vp56_rac_get_prob_branchy(&s->c, 252))
1007  s->prob.p.single_ref[i][1] =
1008  update_prob(&s->c, s->prob.p.single_ref[i][1]);
1009  }
1010  }
1011 
1012  if (s->comppredmode != PRED_SINGLEREF) {
1013  for (i = 0; i < 5; i++)
1014  if (vp56_rac_get_prob_branchy(&s->c, 252))
1015  s->prob.p.comp_ref[i] =
1016  update_prob(&s->c, s->prob.p.comp_ref[i]);
1017  }
1018 
1019  for (i = 0; i < 4; i++)
1020  for (j = 0; j < 9; j++)
1021  if (vp56_rac_get_prob_branchy(&s->c, 252))
1022  s->prob.p.y_mode[i][j] =
1023  update_prob(&s->c, s->prob.p.y_mode[i][j]);
1024 
1025  for (i = 0; i < 4; i++)
1026  for (j = 0; j < 4; j++)
1027  for (k = 0; k < 3; k++)
1028  if (vp56_rac_get_prob_branchy(&s->c, 252))
1029  s->prob.p.partition[3 - i][j][k] =
1030  update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1031 
1032  // mv fields don't use the update_prob subexp model for some reason
1033  for (i = 0; i < 3; i++)
1034  if (vp56_rac_get_prob_branchy(&s->c, 252))
1035  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1036 
1037  for (i = 0; i < 2; i++) {
1038  if (vp56_rac_get_prob_branchy(&s->c, 252))
1039  s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1040 
1041  for (j = 0; j < 10; j++)
1042  if (vp56_rac_get_prob_branchy(&s->c, 252))
1043  s->prob.p.mv_comp[i].classes[j] =
1044  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1045 
1046  if (vp56_rac_get_prob_branchy(&s->c, 252))
1047  s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1048 
1049  for (j = 0; j < 10; j++)
1050  if (vp56_rac_get_prob_branchy(&s->c, 252))
1051  s->prob.p.mv_comp[i].bits[j] =
1052  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1053  }
1054 
1055  for (i = 0; i < 2; i++) {
1056  for (j = 0; j < 2; j++)
1057  for (k = 0; k < 3; k++)
1058  if (vp56_rac_get_prob_branchy(&s->c, 252))
1059  s->prob.p.mv_comp[i].class0_fp[j][k] =
1060  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1061 
1062  for (j = 0; j < 3; j++)
1063  if (vp56_rac_get_prob_branchy(&s->c, 252))
1064  s->prob.p.mv_comp[i].fp[j] =
1065  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1066  }
1067 
1068  if (s->highprecisionmvs) {
1069  for (i = 0; i < 2; i++) {
1070  if (vp56_rac_get_prob_branchy(&s->c, 252))
1071  s->prob.p.mv_comp[i].class0_hp =
1072  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1073 
1074  if (vp56_rac_get_prob_branchy(&s->c, 252))
1075  s->prob.p.mv_comp[i].hp =
1076  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1077  }
1078  }
1079  }
1080 
1081  return (data2 - data) + size2;
1082 }
1083 
1084 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1085  VP9Context *s)
1086 {
1087  dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1088  dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1089 }
1090 
1092  VP56mv *pmv, int ref, int z, int idx, int sb)
1093 {
1094  static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1095  [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1096  { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1097  [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1098  { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1099  [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1100  { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1101  [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1102  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1103  [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1104  { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1105  [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1106  { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1107  [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1108  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1109  [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1110  { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1111  [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1112  { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1113  [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1114  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1115  [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1116  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1117  [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1118  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1119  [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1120  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1121  };
1122  VP9Block *b = s->b;
1123  int row = s->row, col = s->col, row7 = s->row7;
1124  const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1125 #define INVALID_MV 0x80008000U
1126  uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1127  int i;
1128 
1129 #define RETURN_DIRECT_MV(mv) \
1130  do { \
1131  uint32_t m = AV_RN32A(&mv); \
1132  if (!idx) { \
1133  AV_WN32A(pmv, m); \
1134  return; \
1135  } else if (mem == INVALID_MV) { \
1136  mem = m; \
1137  } else if (m != mem) { \
1138  AV_WN32A(pmv, m); \
1139  return; \
1140  } \
1141  } while (0)
1142 
1143  if (sb >= 0) {
1144  if (sb == 2 || sb == 1) {
1145  RETURN_DIRECT_MV(b->mv[0][z]);
1146  } else if (sb == 3) {
1147  RETURN_DIRECT_MV(b->mv[2][z]);
1148  RETURN_DIRECT_MV(b->mv[1][z]);
1149  RETURN_DIRECT_MV(b->mv[0][z]);
1150  }
1151 
1152 #define RETURN_MV(mv) \
1153  do { \
1154  if (sb > 0) { \
1155  VP56mv tmp; \
1156  uint32_t m; \
1157  av_assert2(idx == 1); \
1158  av_assert2(mem != INVALID_MV); \
1159  if (mem_sub8x8 == INVALID_MV) { \
1160  clamp_mv(&tmp, &mv, s); \
1161  m = AV_RN32A(&tmp); \
1162  if (m != mem) { \
1163  AV_WN32A(pmv, m); \
1164  return; \
1165  } \
1166  mem_sub8x8 = AV_RN32A(&mv); \
1167  } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1168  clamp_mv(&tmp, &mv, s); \
1169  m = AV_RN32A(&tmp); \
1170  if (m != mem) { \
1171  AV_WN32A(pmv, m); \
1172  } else { \
1173  /* BUG I'm pretty sure this isn't the intention */ \
1174  AV_WN32A(pmv, 0); \
1175  } \
1176  return; \
1177  } \
1178  } else { \
1179  uint32_t m = AV_RN32A(&mv); \
1180  if (!idx) { \
1181  clamp_mv(pmv, &mv, s); \
1182  return; \
1183  } else if (mem == INVALID_MV) { \
1184  mem = m; \
1185  } else if (m != mem) { \
1186  clamp_mv(pmv, &mv, s); \
1187  return; \
1188  } \
1189  } \
1190  } while (0)
1191 
1192  if (row > 0) {
1193  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1194  if (mv->ref[0] == ref) {
1195  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1196  } else if (mv->ref[1] == ref) {
1197  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1198  }
1199  }
1200  if (col > s->tiling.tile_col_start) {
1201  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1202  if (mv->ref[0] == ref) {
1203  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1204  } else if (mv->ref[1] == ref) {
1205  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1206  }
1207  }
1208  i = 2;
1209  } else {
1210  i = 0;
1211  }
1212 
1213  // previously coded MVs in this neighbourhood, using same reference frame
1214  for (; i < 8; i++) {
1215  int c = p[i][0] + col, r = p[i][1] + row;
1216 
1217  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1218  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1219 
1220  if (mv->ref[0] == ref) {
1221  RETURN_MV(mv->mv[0]);
1222  } else if (mv->ref[1] == ref) {
1223  RETURN_MV(mv->mv[1]);
1224  }
1225  }
1226  }
1227 
1228  // MV at this position in previous frame, using same reference frame
1229  if (s->use_last_frame_mvs) {
1230  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1231 
1234  if (mv->ref[0] == ref) {
1235  RETURN_MV(mv->mv[0]);
1236  } else if (mv->ref[1] == ref) {
1237  RETURN_MV(mv->mv[1]);
1238  }
1239  }
1240 
1241 #define RETURN_SCALE_MV(mv, scale) \
1242  do { \
1243  if (scale) { \
1244  VP56mv mv_temp = { -mv.x, -mv.y }; \
1245  RETURN_MV(mv_temp); \
1246  } else { \
1247  RETURN_MV(mv); \
1248  } \
1249  } while (0)
1250 
1251  // previously coded MVs in this neighbourhood, using different reference frame
1252  for (i = 0; i < 8; i++) {
1253  int c = p[i][0] + col, r = p[i][1] + row;
1254 
1255  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1256  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1257 
1258  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1259  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1260  }
1261  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1262  // BUG - libvpx has this condition regardless of whether
1263  // we used the first ref MV and pre-scaling
1264  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1265  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1266  }
1267  }
1268  }
1269 
1270  // MV at this position in previous frame, using different reference frame
1271  if (s->use_last_frame_mvs) {
1272  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1273 
1274  // no need to await_progress, because we already did that above
1275  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1276  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1277  }
1278  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1279  // BUG - libvpx has this condition regardless of whether
1280  // we used the first ref MV and pre-scaling
1281  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1282  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1283  }
1284  }
1285 
1286  AV_ZERO32(pmv);
1287  clamp_mv(pmv, pmv, s);
1288 #undef INVALID_MV
1289 #undef RETURN_MV
1290 #undef RETURN_SCALE_MV
1291 }
1292 
1293 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1294 {
1295  int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1296  int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1297  s->prob.p.mv_comp[idx].classes);
1298 
1299  s->counts.mv_comp[idx].sign[sign]++;
1300  s->counts.mv_comp[idx].classes[c]++;
1301  if (c) {
1302  int m;
1303 
1304  for (n = 0, m = 0; m < c; m++) {
1305  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1306  n |= bit << m;
1307  s->counts.mv_comp[idx].bits[m][bit]++;
1308  }
1309  n <<= 3;
1310  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1311  n |= bit << 1;
1312  s->counts.mv_comp[idx].fp[bit]++;
1313  if (hp) {
1314  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1315  s->counts.mv_comp[idx].hp[bit]++;
1316  n |= bit;
1317  } else {
1318  n |= 1;
1319  // bug in libvpx - we count for bw entropy purposes even if the
1320  // bit wasn't coded
1321  s->counts.mv_comp[idx].hp[1]++;
1322  }
1323  n += 8 << c;
1324  } else {
1325  n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1326  s->counts.mv_comp[idx].class0[n]++;
1327  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1328  s->prob.p.mv_comp[idx].class0_fp[n]);
1329  s->counts.mv_comp[idx].class0_fp[n][bit]++;
1330  n = (n << 3) | (bit << 1);
1331  if (hp) {
1332  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1333  s->counts.mv_comp[idx].class0_hp[bit]++;
1334  n |= bit;
1335  } else {
1336  n |= 1;
1337  // bug in libvpx - we count for bw entropy purposes even if the
1338  // bit wasn't coded
1339  s->counts.mv_comp[idx].class0_hp[1]++;
1340  }
1341  }
1342 
1343  return sign ? -(n + 1) : (n + 1);
1344 }
1345 
1346 static void fill_mv(VP9Context *s,
1347  VP56mv *mv, int mode, int sb)
1348 {
1349  VP9Block *b = s->b;
1350 
1351  if (mode == ZEROMV) {
1352  AV_ZERO64(mv);
1353  } else {
1354  int hp;
1355 
1356  // FIXME cache this value and reuse for other subblocks
1357  find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1358  mode == NEWMV ? -1 : sb);
1359  // FIXME maybe move this code into find_ref_mvs()
1360  if ((mode == NEWMV || sb == -1) &&
1361  !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1362  if (mv[0].y & 1) {
1363  if (mv[0].y < 0)
1364  mv[0].y++;
1365  else
1366  mv[0].y--;
1367  }
1368  if (mv[0].x & 1) {
1369  if (mv[0].x < 0)
1370  mv[0].x++;
1371  else
1372  mv[0].x--;
1373  }
1374  }
1375  if (mode == NEWMV) {
1377  s->prob.p.mv_joint);
1378 
1379  s->counts.mv_joint[j]++;
1380  if (j >= MV_JOINT_V)
1381  mv[0].y += read_mv_component(s, 0, hp);
1382  if (j & 1)
1383  mv[0].x += read_mv_component(s, 1, hp);
1384  }
1385 
1386  if (b->comp) {
1387  // FIXME cache this value and reuse for other subblocks
1388  find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1389  mode == NEWMV ? -1 : sb);
1390  if ((mode == NEWMV || sb == -1) &&
1391  !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1392  if (mv[1].y & 1) {
1393  if (mv[1].y < 0)
1394  mv[1].y++;
1395  else
1396  mv[1].y--;
1397  }
1398  if (mv[1].x & 1) {
1399  if (mv[1].x < 0)
1400  mv[1].x++;
1401  else
1402  mv[1].x--;
1403  }
1404  }
1405  if (mode == NEWMV) {
1407  s->prob.p.mv_joint);
1408 
1409  s->counts.mv_joint[j]++;
1410  if (j >= MV_JOINT_V)
1411  mv[1].y += read_mv_component(s, 0, hp);
1412  if (j & 1)
1413  mv[1].x += read_mv_component(s, 1, hp);
1414  }
1415  }
1416  }
1417 }
1418 
1419 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1420  ptrdiff_t stride, int v)
1421 {
1422  switch (w) {
1423  case 1:
1424  do {
1425  *ptr = v;
1426  ptr += stride;
1427  } while (--h);
1428  break;
1429  case 2: {
1430  int v16 = v * 0x0101;
1431  do {
1432  AV_WN16A(ptr, v16);
1433  ptr += stride;
1434  } while (--h);
1435  break;
1436  }
1437  case 4: {
1438  uint32_t v32 = v * 0x01010101;
1439  do {
1440  AV_WN32A(ptr, v32);
1441  ptr += stride;
1442  } while (--h);
1443  break;
1444  }
1445  case 8: {
1446 #if HAVE_FAST_64BIT
1447  uint64_t v64 = v * 0x0101010101010101ULL;
1448  do {
1449  AV_WN64A(ptr, v64);
1450  ptr += stride;
1451  } while (--h);
1452 #else
1453  uint32_t v32 = v * 0x01010101;
1454  do {
1455  AV_WN32A(ptr, v32);
1456  AV_WN32A(ptr + 4, v32);
1457  ptr += stride;
1458  } while (--h);
1459 #endif
1460  break;
1461  }
1462  }
1463 }
1464 
1465 static void decode_mode(AVCodecContext *ctx)
1466 {
1467  static const uint8_t left_ctx[N_BS_SIZES] = {
1468  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1469  };
1470  static const uint8_t above_ctx[N_BS_SIZES] = {
1471  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1472  };
1473  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1475  TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1476  };
1477  VP9Context *s = ctx->priv_data;
1478  VP9Block *b = s->b;
1479  int row = s->row, col = s->col, row7 = s->row7;
1480  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1481  int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1482  int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1483  int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1484  int vref, filter_id;
1485 
1486  if (!s->segmentation.enabled) {
1487  b->seg_id = 0;
1488  } else if (s->keyframe || s->intraonly) {
1489  b->seg_id = !s->segmentation.update_map ? 0 :
1491  } else if (!s->segmentation.update_map ||
1492  (s->segmentation.temporal &&
1494  s->prob.segpred[s->above_segpred_ctx[col] +
1495  s->left_segpred_ctx[row7]]))) {
1496  if (!s->errorres && !s->segmentation.ignore_refmap) {
1497  int pred = 8, x;
1499 
1502  for (y = 0; y < h4; y++) {
1503  int idx_base = (y + row) * 8 * s->sb_cols + col;
1504  for (x = 0; x < w4; x++)
1505  pred = FFMIN(pred, refsegmap[idx_base + x]);
1506  }
1507  av_assert1(pred < 8);
1508  b->seg_id = pred;
1509  } else {
1510  b->seg_id = 0;
1511  }
1512 
1513  memset(&s->above_segpred_ctx[col], 1, w4);
1514  memset(&s->left_segpred_ctx[row7], 1, h4);
1515  } else {
1517  s->prob.seg);
1518 
1519  memset(&s->above_segpred_ctx[col], 0, w4);
1520  memset(&s->left_segpred_ctx[row7], 0, h4);
1521  }
1522  if (s->segmentation.enabled &&
1523  (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1524  setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1525  bw4, bh4, 8 * s->sb_cols, b->seg_id);
1526  }
1527 
1528  b->skip = s->segmentation.enabled &&
1529  s->segmentation.feat[b->seg_id].skip_enabled;
1530  if (!b->skip) {
1531  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1532  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1533  s->counts.skip[c][b->skip]++;
1534  }
1535 
1536  if (s->keyframe || s->intraonly) {
1537  b->intra = 1;
1538  } else if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1539  b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1540  } else {
1541  int c, bit;
1542 
1543  if (have_a && have_l) {
1544  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1545  c += (c == 2);
1546  } else {
1547  c = have_a ? 2 * s->above_intra_ctx[col] :
1548  have_l ? 2 * s->left_intra_ctx[row7] : 0;
1549  }
1550  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1551  s->counts.intra[c][bit]++;
1552  b->intra = !bit;
1553  }
1554 
1555  if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1556  int c;
1557  if (have_a) {
1558  if (have_l) {
1559  c = (s->above_skip_ctx[col] ? max_tx :
1560  s->above_txfm_ctx[col]) +
1561  (s->left_skip_ctx[row7] ? max_tx :
1562  s->left_txfm_ctx[row7]) > max_tx;
1563  } else {
1564  c = s->above_skip_ctx[col] ? 1 :
1565  (s->above_txfm_ctx[col] * 2 > max_tx);
1566  }
1567  } else if (have_l) {
1568  c = s->left_skip_ctx[row7] ? 1 :
1569  (s->left_txfm_ctx[row7] * 2 > max_tx);
1570  } else {
1571  c = 1;
1572  }
1573  switch (max_tx) {
1574  case TX_32X32:
1575  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1576  if (b->tx) {
1577  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1578  if (b->tx == 2)
1579  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1580  }
1581  s->counts.tx32p[c][b->tx]++;
1582  break;
1583  case TX_16X16:
1584  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1585  if (b->tx)
1586  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1587  s->counts.tx16p[c][b->tx]++;
1588  break;
1589  case TX_8X8:
1590  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1591  s->counts.tx8p[c][b->tx]++;
1592  break;
1593  case TX_4X4:
1594  b->tx = TX_4X4;
1595  break;
1596  }
1597  } else {
1598  b->tx = FFMIN(max_tx, s->txfmmode);
1599  }
1600 
1601  if (s->keyframe || s->intraonly) {
1602  uint8_t *a = &s->above_mode_ctx[col * 2];
1603  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1604 
1605  b->comp = 0;
1606  if (b->bs > BS_8x8) {
1607  // FIXME the memory storage intermediates here aren't really
1608  // necessary, they're just there to make the code slightly
1609  // simpler for now
1610  b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1611  vp9_default_kf_ymode_probs[a[0]][l[0]]);
1612  if (b->bs != BS_8x4) {
1614  vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1615  l[0] = a[1] = b->mode[1];
1616  } else {
1617  l[0] = a[1] = b->mode[1] = b->mode[0];
1618  }
1619  if (b->bs != BS_4x8) {
1620  b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1621  vp9_default_kf_ymode_probs[a[0]][l[1]]);
1622  if (b->bs != BS_8x4) {
1624  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1625  l[1] = a[1] = b->mode[3];
1626  } else {
1627  l[1] = a[1] = b->mode[3] = b->mode[2];
1628  }
1629  } else {
1630  b->mode[2] = b->mode[0];
1631  l[1] = a[1] = b->mode[3] = b->mode[1];
1632  }
1633  } else {
1635  vp9_default_kf_ymode_probs[*a][*l]);
1636  b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1637  // FIXME this can probably be optimized
1638  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1639  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1640  }
1643  } else if (b->intra) {
1644  b->comp = 0;
1645  if (b->bs > BS_8x8) {
1647  s->prob.p.y_mode[0]);
1648  s->counts.y_mode[0][b->mode[0]]++;
1649  if (b->bs != BS_8x4) {
1651  s->prob.p.y_mode[0]);
1652  s->counts.y_mode[0][b->mode[1]]++;
1653  } else {
1654  b->mode[1] = b->mode[0];
1655  }
1656  if (b->bs != BS_4x8) {
1658  s->prob.p.y_mode[0]);
1659  s->counts.y_mode[0][b->mode[2]]++;
1660  if (b->bs != BS_8x4) {
1662  s->prob.p.y_mode[0]);
1663  s->counts.y_mode[0][b->mode[3]]++;
1664  } else {
1665  b->mode[3] = b->mode[2];
1666  }
1667  } else {
1668  b->mode[2] = b->mode[0];
1669  b->mode[3] = b->mode[1];
1670  }
1671  } else {
1672  static const uint8_t size_group[10] = {
1673  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1674  };
1675  int sz = size_group[b->bs];
1676 
1678  s->prob.p.y_mode[sz]);
1679  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1680  s->counts.y_mode[sz][b->mode[3]]++;
1681  }
1683  s->prob.p.uv_mode[b->mode[3]]);
1684  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1685  } else {
1686  static const uint8_t inter_mode_ctx_lut[14][14] = {
1687  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1688  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1689  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1690  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1691  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1692  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1693  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1694  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1695  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1696  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1697  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1698  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1699  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1700  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1701  };
1702 
1703  if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1704  av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1705  b->comp = 0;
1706  b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1707  } else {
1708  // read comp_pred flag
1709  if (s->comppredmode != PRED_SWITCHABLE) {
1710  b->comp = s->comppredmode == PRED_COMPREF;
1711  } else {
1712  int c;
1713 
1714  // FIXME add intra as ref=0xff (or -1) to make these easier?
1715  if (have_a) {
1716  if (have_l) {
1717  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1718  c = 4;
1719  } else if (s->above_comp_ctx[col]) {
1720  c = 2 + (s->left_intra_ctx[row7] ||
1721  s->left_ref_ctx[row7] == s->fixcompref);
1722  } else if (s->left_comp_ctx[row7]) {
1723  c = 2 + (s->above_intra_ctx[col] ||
1724  s->above_ref_ctx[col] == s->fixcompref);
1725  } else {
1726  c = (!s->above_intra_ctx[col] &&
1727  s->above_ref_ctx[col] == s->fixcompref) ^
1728  (!s->left_intra_ctx[row7] &&
1729  s->left_ref_ctx[row & 7] == s->fixcompref);
1730  }
1731  } else {
1732  c = s->above_comp_ctx[col] ? 3 :
1733  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1734  }
1735  } else if (have_l) {
1736  c = s->left_comp_ctx[row7] ? 3 :
1737  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1738  } else {
1739  c = 1;
1740  }
1741  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1742  s->counts.comp[c][b->comp]++;
1743  }
1744 
1745  // read actual references
1746  // FIXME probably cache a few variables here to prevent repetitive
1747  // memory accesses below
1748  if (b->comp) /* two references */ {
1749  int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1750 
1751  b->ref[fix_idx] = s->fixcompref;
1752  // FIXME can this codeblob be replaced by some sort of LUT?
1753  if (have_a) {
1754  if (have_l) {
1755  if (s->above_intra_ctx[col]) {
1756  if (s->left_intra_ctx[row7]) {
1757  c = 2;
1758  } else {
1759  c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1760  }
1761  } else if (s->left_intra_ctx[row7]) {
1762  c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1763  } else {
1764  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1765 
1766  if (refl == refa && refa == s->varcompref[1]) {
1767  c = 0;
1768  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1769  if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1770  (refl == s->fixcompref && refa == s->varcompref[0])) {
1771  c = 4;
1772  } else {
1773  c = (refa == refl) ? 3 : 1;
1774  }
1775  } else if (!s->left_comp_ctx[row7]) {
1776  if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1777  c = 1;
1778  } else {
1779  c = (refl == s->varcompref[1] &&
1780  refa != s->varcompref[1]) ? 2 : 4;
1781  }
1782  } else if (!s->above_comp_ctx[col]) {
1783  if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1784  c = 1;
1785  } else {
1786  c = (refa == s->varcompref[1] &&
1787  refl != s->varcompref[1]) ? 2 : 4;
1788  }
1789  } else {
1790  c = (refl == refa) ? 4 : 2;
1791  }
1792  }
1793  } else {
1794  if (s->above_intra_ctx[col]) {
1795  c = 2;
1796  } else if (s->above_comp_ctx[col]) {
1797  c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1798  } else {
1799  c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1800  }
1801  }
1802  } else if (have_l) {
1803  if (s->left_intra_ctx[row7]) {
1804  c = 2;
1805  } else if (s->left_comp_ctx[row7]) {
1806  c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1807  } else {
1808  c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1809  }
1810  } else {
1811  c = 2;
1812  }
1813  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1814  b->ref[var_idx] = s->varcompref[bit];
1815  s->counts.comp_ref[c][bit]++;
1816  } else /* single reference */ {
1817  int bit, c;
1818 
1819  if (have_a && !s->above_intra_ctx[col]) {
1820  if (have_l && !s->left_intra_ctx[row7]) {
1821  if (s->left_comp_ctx[row7]) {
1822  if (s->above_comp_ctx[col]) {
1823  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1824  !s->above_ref_ctx[col]);
1825  } else {
1826  c = (3 * !s->above_ref_ctx[col]) +
1827  (!s->fixcompref || !s->left_ref_ctx[row7]);
1828  }
1829  } else if (s->above_comp_ctx[col]) {
1830  c = (3 * !s->left_ref_ctx[row7]) +
1831  (!s->fixcompref || !s->above_ref_ctx[col]);
1832  } else {
1833  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1834  }
1835  } else if (s->above_intra_ctx[col]) {
1836  c = 2;
1837  } else if (s->above_comp_ctx[col]) {
1838  c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1839  } else {
1840  c = 4 * (!s->above_ref_ctx[col]);
1841  }
1842  } else if (have_l && !s->left_intra_ctx[row7]) {
1843  if (s->left_intra_ctx[row7]) {
1844  c = 2;
1845  } else if (s->left_comp_ctx[row7]) {
1846  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1847  } else {
1848  c = 4 * (!s->left_ref_ctx[row7]);
1849  }
1850  } else {
1851  c = 2;
1852  }
1853  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1854  s->counts.single_ref[c][0][bit]++;
1855  if (!bit) {
1856  b->ref[0] = 0;
1857  } else {
1858  // FIXME can this codeblob be replaced by some sort of LUT?
1859  if (have_a) {
1860  if (have_l) {
1861  if (s->left_intra_ctx[row7]) {
1862  if (s->above_intra_ctx[col]) {
1863  c = 2;
1864  } else if (s->above_comp_ctx[col]) {
1865  c = 1 + 2 * (s->fixcompref == 1 ||
1866  s->above_ref_ctx[col] == 1);
1867  } else if (!s->above_ref_ctx[col]) {
1868  c = 3;
1869  } else {
1870  c = 4 * (s->above_ref_ctx[col] == 1);
1871  }
1872  } else if (s->above_intra_ctx[col]) {
1873  if (s->left_intra_ctx[row7]) {
1874  c = 2;
1875  } else if (s->left_comp_ctx[row7]) {
1876  c = 1 + 2 * (s->fixcompref == 1 ||
1877  s->left_ref_ctx[row7] == 1);
1878  } else if (!s->left_ref_ctx[row7]) {
1879  c = 3;
1880  } else {
1881  c = 4 * (s->left_ref_ctx[row7] == 1);
1882  }
1883  } else if (s->above_comp_ctx[col]) {
1884  if (s->left_comp_ctx[row7]) {
1885  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1886  c = 3 * (s->fixcompref == 1 ||
1887  s->left_ref_ctx[row7] == 1);
1888  } else {
1889  c = 2;
1890  }
1891  } else if (!s->left_ref_ctx[row7]) {
1892  c = 1 + 2 * (s->fixcompref == 1 ||
1893  s->above_ref_ctx[col] == 1);
1894  } else {
1895  c = 3 * (s->left_ref_ctx[row7] == 1) +
1896  (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1897  }
1898  } else if (s->left_comp_ctx[row7]) {
1899  if (!s->above_ref_ctx[col]) {
1900  c = 1 + 2 * (s->fixcompref == 1 ||
1901  s->left_ref_ctx[row7] == 1);
1902  } else {
1903  c = 3 * (s->above_ref_ctx[col] == 1) +
1904  (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1905  }
1906  } else if (!s->above_ref_ctx[col]) {
1907  if (!s->left_ref_ctx[row7]) {
1908  c = 3;
1909  } else {
1910  c = 4 * (s->left_ref_ctx[row7] == 1);
1911  }
1912  } else if (!s->left_ref_ctx[row7]) {
1913  c = 4 * (s->above_ref_ctx[col] == 1);
1914  } else {
1915  c = 2 * (s->left_ref_ctx[row7] == 1) +
1916  2 * (s->above_ref_ctx[col] == 1);
1917  }
1918  } else {
1919  if (s->above_intra_ctx[col] ||
1920  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1921  c = 2;
1922  } else if (s->above_comp_ctx[col]) {
1923  c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1924  } else {
1925  c = 4 * (s->above_ref_ctx[col] == 1);
1926  }
1927  }
1928  } else if (have_l) {
1929  if (s->left_intra_ctx[row7] ||
1930  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1931  c = 2;
1932  } else if (s->left_comp_ctx[row7]) {
1933  c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1934  } else {
1935  c = 4 * (s->left_ref_ctx[row7] == 1);
1936  }
1937  } else {
1938  c = 2;
1939  }
1940  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1941  s->counts.single_ref[c][1][bit]++;
1942  b->ref[0] = 1 + bit;
1943  }
1944  }
1945  }
1946 
1947  if (b->bs <= BS_8x8) {
1948  if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].skip_enabled) {
1949  b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1950  } else {
1951  static const uint8_t off[10] = {
1952  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1953  };
1954 
1955  // FIXME this needs to use the LUT tables from find_ref_mvs
1956  // because not all are -1,0/0,-1
1957  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1958  [s->left_mode_ctx[row7 + off[b->bs]]];
1959 
1961  s->prob.p.mv_mode[c]);
1962  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1963  s->counts.mv_mode[c][b->mode[0] - 10]++;
1964  }
1965  }
1966 
1967  if (s->filtermode == FILTER_SWITCHABLE) {
1968  int c;
1969 
1970  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1971  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1972  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1973  s->left_filter_ctx[row7] : 3;
1974  } else {
1975  c = s->above_filter_ctx[col];
1976  }
1977  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1978  c = s->left_filter_ctx[row7];
1979  } else {
1980  c = 3;
1981  }
1982 
1983  filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1984  s->prob.p.filter[c]);
1985  s->counts.filter[c][filter_id]++;
1986  b->filter = vp9_filter_lut[filter_id];
1987  } else {
1988  b->filter = s->filtermode;
1989  }
1990 
1991  if (b->bs > BS_8x8) {
1992  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1993 
1995  s->prob.p.mv_mode[c]);
1996  s->counts.mv_mode[c][b->mode[0] - 10]++;
1997  fill_mv(s, b->mv[0], b->mode[0], 0);
1998 
1999  if (b->bs != BS_8x4) {
2001  s->prob.p.mv_mode[c]);
2002  s->counts.mv_mode[c][b->mode[1] - 10]++;
2003  fill_mv(s, b->mv[1], b->mode[1], 1);
2004  } else {
2005  b->mode[1] = b->mode[0];
2006  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2007  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2008  }
2009 
2010  if (b->bs != BS_4x8) {
2012  s->prob.p.mv_mode[c]);
2013  s->counts.mv_mode[c][b->mode[2] - 10]++;
2014  fill_mv(s, b->mv[2], b->mode[2], 2);
2015 
2016  if (b->bs != BS_8x4) {
2018  s->prob.p.mv_mode[c]);
2019  s->counts.mv_mode[c][b->mode[3] - 10]++;
2020  fill_mv(s, b->mv[3], b->mode[3], 3);
2021  } else {
2022  b->mode[3] = b->mode[2];
2023  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2024  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2025  }
2026  } else {
2027  b->mode[2] = b->mode[0];
2028  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2029  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2030  b->mode[3] = b->mode[1];
2031  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2032  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2033  }
2034  } else {
2035  fill_mv(s, b->mv[0], b->mode[0], -1);
2036  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2037  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2038  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2039  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2040  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2041  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2042  }
2043 
2044  vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2045  }
2046 
2047 #if HAVE_FAST_64BIT
2048 #define SPLAT_CTX(var, val, n) \
2049  switch (n) { \
2050  case 1: var = val; break; \
2051  case 2: AV_WN16A(&var, val * 0x0101); break; \
2052  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2053  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2054  case 16: { \
2055  uint64_t v64 = val * 0x0101010101010101ULL; \
2056  AV_WN64A( &var, v64); \
2057  AV_WN64A(&((uint8_t *) &var)[8], v64); \
2058  break; \
2059  } \
2060  }
2061 #else
2062 #define SPLAT_CTX(var, val, n) \
2063  switch (n) { \
2064  case 1: var = val; break; \
2065  case 2: AV_WN16A(&var, val * 0x0101); break; \
2066  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2067  case 8: { \
2068  uint32_t v32 = val * 0x01010101; \
2069  AV_WN32A( &var, v32); \
2070  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2071  break; \
2072  } \
2073  case 16: { \
2074  uint32_t v32 = val * 0x01010101; \
2075  AV_WN32A( &var, v32); \
2076  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2077  AV_WN32A(&((uint8_t *) &var)[8], v32); \
2078  AV_WN32A(&((uint8_t *) &var)[12], v32); \
2079  break; \
2080  } \
2081  }
2082 #endif
2083 
2084  switch (bwh_tab[1][b->bs][0]) {
2085 #define SET_CTXS(dir, off, n) \
2086  do { \
2087  SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2088  SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2089  SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2090  if (!s->keyframe && !s->intraonly) { \
2091  SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2092  SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2093  SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2094  if (!b->intra) { \
2095  SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2096  if (s->filtermode == FILTER_SWITCHABLE) { \
2097  SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2098  } \
2099  } \
2100  } \
2101  } while (0)
2102  case 1: SET_CTXS(above, col, 1); break;
2103  case 2: SET_CTXS(above, col, 2); break;
2104  case 4: SET_CTXS(above, col, 4); break;
2105  case 8: SET_CTXS(above, col, 8); break;
2106  }
2107  switch (bwh_tab[1][b->bs][1]) {
2108  case 1: SET_CTXS(left, row7, 1); break;
2109  case 2: SET_CTXS(left, row7, 2); break;
2110  case 4: SET_CTXS(left, row7, 4); break;
2111  case 8: SET_CTXS(left, row7, 8); break;
2112  }
2113 #undef SPLAT_CTX
2114 #undef SET_CTXS
2115 
2116  if (!s->keyframe && !s->intraonly) {
2117  if (b->bs > BS_8x8) {
2118  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2119 
2120  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2121  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2122  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2123  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2124  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2125  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2126  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2127  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2128  } else {
2129  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2130 
2131  for (n = 0; n < w4 * 2; n++) {
2132  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2133  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2134  }
2135  for (n = 0; n < h4 * 2; n++) {
2136  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2137  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2138  }
2139  }
2140  }
2141 
2142  // FIXME kinda ugly
2143  for (y = 0; y < h4; y++) {
2144  int x, o = (row + y) * s->sb_cols * 8 + col;
2145  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2146 
2147  if (b->intra) {
2148  for (x = 0; x < w4; x++) {
2149  mv[x].ref[0] =
2150  mv[x].ref[1] = -1;
2151  }
2152  } else if (b->comp) {
2153  for (x = 0; x < w4; x++) {
2154  mv[x].ref[0] = b->ref[0];
2155  mv[x].ref[1] = b->ref[1];
2156  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2157  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2158  }
2159  } else {
2160  for (x = 0; x < w4; x++) {
2161  mv[x].ref[0] = b->ref[0];
2162  mv[x].ref[1] = -1;
2163  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2164  }
2165  }
2166  }
2167 }
2168 
2169 // FIXME merge cnt/eob arguments?
2170 static av_always_inline int
2171 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2172  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2173  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2174  int nnz, const int16_t *scan, const int16_t (*nb)[2],
2175  const int16_t *band_counts, const int16_t *qmul)
2176 {
2177  int i = 0, band = 0, band_left = band_counts[band];
2178  uint8_t *tp = p[0][nnz];
2179  uint8_t cache[1024];
2180 
2181  do {
2182  int val, rc;
2183 
2184  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2185  eob[band][nnz][val]++;
2186  if (!val)
2187  break;
2188 
2189  skip_eob:
2190  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2191  cnt[band][nnz][0]++;
2192  if (!--band_left)
2193  band_left = band_counts[++band];
2194  cache[scan[i]] = 0;
2195  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2196  tp = p[band][nnz];
2197  if (++i == n_coeffs)
2198  break; //invalid input; blocks should end with EOB
2199  goto skip_eob;
2200  }
2201 
2202  rc = scan[i];
2203  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2204  cnt[band][nnz][1]++;
2205  val = 1;
2206  cache[rc] = 1;
2207  } else {
2208  // fill in p[3-10] (model fill) - only once per frame for each pos
2209  if (!tp[3])
2210  memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2211 
2212  cnt[band][nnz][2]++;
2213  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2214  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2215  cache[rc] = val = 2;
2216  } else {
2217  val = 3 + vp56_rac_get_prob(c, tp[5]);
2218  cache[rc] = 3;
2219  }
2220  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2221  cache[rc] = 4;
2222  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2223  val = 5 + vp56_rac_get_prob(c, 159);
2224  } else {
2225  val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2226  val += vp56_rac_get_prob(c, 145);
2227  }
2228  } else { // cat 3-6
2229  cache[rc] = 5;
2230  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2231  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2232  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2233  val += (vp56_rac_get_prob(c, 148) << 1);
2234  val += vp56_rac_get_prob(c, 140);
2235  } else {
2236  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2237  val += (vp56_rac_get_prob(c, 155) << 2);
2238  val += (vp56_rac_get_prob(c, 140) << 1);
2239  val += vp56_rac_get_prob(c, 135);
2240  }
2241  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2242  val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2243  val += (vp56_rac_get_prob(c, 157) << 3);
2244  val += (vp56_rac_get_prob(c, 141) << 2);
2245  val += (vp56_rac_get_prob(c, 134) << 1);
2246  val += vp56_rac_get_prob(c, 130);
2247  } else {
2248  val = 67;
2249  if (!is8bitsperpixel) {
2250  if (bpp == 12) {
2251  val += vp56_rac_get_prob(c, 255) << 17;
2252  val += vp56_rac_get_prob(c, 255) << 16;
2253  }
2254  val += (vp56_rac_get_prob(c, 255) << 15);
2255  val += (vp56_rac_get_prob(c, 255) << 14);
2256  }
2257  val += (vp56_rac_get_prob(c, 254) << 13);
2258  val += (vp56_rac_get_prob(c, 254) << 12);
2259  val += (vp56_rac_get_prob(c, 254) << 11);
2260  val += (vp56_rac_get_prob(c, 252) << 10);
2261  val += (vp56_rac_get_prob(c, 249) << 9);
2262  val += (vp56_rac_get_prob(c, 243) << 8);
2263  val += (vp56_rac_get_prob(c, 230) << 7);
2264  val += (vp56_rac_get_prob(c, 196) << 6);
2265  val += (vp56_rac_get_prob(c, 177) << 5);
2266  val += (vp56_rac_get_prob(c, 153) << 4);
2267  val += (vp56_rac_get_prob(c, 140) << 3);
2268  val += (vp56_rac_get_prob(c, 133) << 2);
2269  val += (vp56_rac_get_prob(c, 130) << 1);
2270  val += vp56_rac_get_prob(c, 129);
2271  }
2272  }
2273  }
2274 #define STORE_COEF(c, i, v) do { \
2275  if (is8bitsperpixel) { \
2276  c[i] = v; \
2277  } else { \
2278  AV_WN32A(&c[i * 2], v); \
2279  } \
2280 } while (0)
2281  if (!--band_left)
2282  band_left = band_counts[++band];
2283  if (is_tx32x32)
2284  STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2285  else
2286  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2287  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2288  tp = p[band][nnz];
2289  } while (++i < n_coeffs);
2290 
2291  return i;
2292 }
2293 
2294 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2295  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2296  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2297  const int16_t (*nb)[2], const int16_t *band_counts,
2298  const int16_t *qmul)
2299 {
2300  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2301  nnz, scan, nb, band_counts, qmul);
2302 }
2303 
2304 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2305  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2306  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2307  const int16_t (*nb)[2], const int16_t *band_counts,
2308  const int16_t *qmul)
2309 {
2310  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2311  nnz, scan, nb, band_counts, qmul);
2312 }
2313 
2314 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2315  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2316  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2317  const int16_t (*nb)[2], const int16_t *band_counts,
2318  const int16_t *qmul)
2319 {
2320  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2321  nnz, scan, nb, band_counts, qmul);
2322 }
2323 
2324 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2325  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2326  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2327  const int16_t (*nb)[2], const int16_t *band_counts,
2328  const int16_t *qmul)
2329 {
2330  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2331  nnz, scan, nb, band_counts, qmul);
2332 }
2333 
2334 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2335 {
2336  VP9Context *s = ctx->priv_data;
2337  VP9Block *b = s->b;
2338  int row = s->row, col = s->col;
2339  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2340  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2341  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2342  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2343  int end_x = FFMIN(2 * (s->cols - col), w4);
2344  int end_y = FFMIN(2 * (s->rows - row), h4);
2345  int n, pl, x, y, res;
2346  int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2347  int tx = 4 * s->lossless + b->tx;
2348  const int16_t * const *yscans = vp9_scans[tx];
2349  const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2350  const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2351  const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2352  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2353  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2354  static const int16_t band_counts[4][8] = {
2355  { 1, 2, 3, 4, 3, 16 - 13 },
2356  { 1, 2, 3, 4, 11, 64 - 21 },
2357  { 1, 2, 3, 4, 11, 256 - 21 },
2358  { 1, 2, 3, 4, 11, 1024 - 21 },
2359  };
2360  const int16_t *y_band_counts = band_counts[b->tx];
2361  const int16_t *uv_band_counts = band_counts[b->uvtx];
2362  int bytesperpixel = is8bitsperpixel ? 1 : 2;
2363  int total_coeff = 0;
2364 
2365 #define MERGE(la, end, step, rd) \
2366  for (n = 0; n < end; n += step) \
2367  la[n] = !!rd(&la[n])
2368 #define MERGE_CTX(step, rd) \
2369  do { \
2370  MERGE(l, end_y, step, rd); \
2371  MERGE(a, end_x, step, rd); \
2372  } while (0)
2373 
2374 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2375  for (n = 0, y = 0; y < end_y; y += step) { \
2376  for (x = 0; x < end_x; x += step, n += step * step) { \
2377  enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2378  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2379  (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2380  c, e, p, a[x] + l[y], yscans[txtp], \
2381  ynbs[txtp], y_band_counts, qmul[0]); \
2382  a[x] = l[y] = !!res; \
2383  total_coeff |= !!res; \
2384  if (step >= 4) { \
2385  AV_WN16A(&s->eob[n], res); \
2386  } else { \
2387  s->eob[n] = res; \
2388  } \
2389  } \
2390  }
2391 
2392 #define SPLAT(la, end, step, cond) \
2393  if (step == 2) { \
2394  for (n = 1; n < end; n += step) \
2395  la[n] = la[n - 1]; \
2396  } else if (step == 4) { \
2397  if (cond) { \
2398  for (n = 0; n < end; n += step) \
2399  AV_WN32A(&la[n], la[n] * 0x01010101); \
2400  } else { \
2401  for (n = 0; n < end; n += step) \
2402  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2403  } \
2404  } else /* step == 8 */ { \
2405  if (cond) { \
2406  if (HAVE_FAST_64BIT) { \
2407  for (n = 0; n < end; n += step) \
2408  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2409  } else { \
2410  for (n = 0; n < end; n += step) { \
2411  uint32_t v32 = la[n] * 0x01010101; \
2412  AV_WN32A(&la[n], v32); \
2413  AV_WN32A(&la[n + 4], v32); \
2414  } \
2415  } \
2416  } else { \
2417  for (n = 0; n < end; n += step) \
2418  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2419  } \
2420  }
2421 #define SPLAT_CTX(step) \
2422  do { \
2423  SPLAT(a, end_x, step, end_x == w4); \
2424  SPLAT(l, end_y, step, end_y == h4); \
2425  } while (0)
2426 
2427  /* y tokens */
2428  switch (b->tx) {
2429  case TX_4X4:
2430  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2431  break;
2432  case TX_8X8:
2433  MERGE_CTX(2, AV_RN16A);
2434  DECODE_Y_COEF_LOOP(2, 0,);
2435  SPLAT_CTX(2);
2436  break;
2437  case TX_16X16:
2438  MERGE_CTX(4, AV_RN32A);
2439  DECODE_Y_COEF_LOOP(4, 0,);
2440  SPLAT_CTX(4);
2441  break;
2442  case TX_32X32:
2443  MERGE_CTX(8, AV_RN64A);
2444  DECODE_Y_COEF_LOOP(8, 0, 32);
2445  SPLAT_CTX(8);
2446  break;
2447  }
2448 
2449 #define DECODE_UV_COEF_LOOP(step, v) \
2450  for (n = 0, y = 0; y < end_y; y += step) { \
2451  for (x = 0; x < end_x; x += step, n += step * step) { \
2452  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2453  (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2454  16 * step * step, c, e, p, a[x] + l[y], \
2455  uvscan, uvnb, uv_band_counts, qmul[1]); \
2456  a[x] = l[y] = !!res; \
2457  total_coeff |= !!res; \
2458  if (step >= 4) { \
2459  AV_WN16A(&s->uveob[pl][n], res); \
2460  } else { \
2461  s->uveob[pl][n] = res; \
2462  } \
2463  } \
2464  }
2465 
2466  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2467  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2468  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2469  w4 >>= s->ss_h;
2470  end_x >>= s->ss_h;
2471  h4 >>= s->ss_v;
2472  end_y >>= s->ss_v;
2473  for (pl = 0; pl < 2; pl++) {
2474  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2475  l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2476  switch (b->uvtx) {
2477  case TX_4X4:
2478  DECODE_UV_COEF_LOOP(1,);
2479  break;
2480  case TX_8X8:
2481  MERGE_CTX(2, AV_RN16A);
2482  DECODE_UV_COEF_LOOP(2,);
2483  SPLAT_CTX(2);
2484  break;
2485  case TX_16X16:
2486  MERGE_CTX(4, AV_RN32A);
2487  DECODE_UV_COEF_LOOP(4,);
2488  SPLAT_CTX(4);
2489  break;
2490  case TX_32X32:
2491  MERGE_CTX(8, AV_RN64A);
2492  DECODE_UV_COEF_LOOP(8, 32);
2493  SPLAT_CTX(8);
2494  break;
2495  }
2496  }
2497 
2498  return total_coeff;
2499 }
2500 
2502 {
2503  return decode_coeffs(ctx, 1);
2504 }
2505 
2507 {
2508  return decode_coeffs(ctx, 0);
2509 }
2510 
2512  uint8_t *dst_edge, ptrdiff_t stride_edge,
2513  uint8_t *dst_inner, ptrdiff_t stride_inner,
2514  uint8_t *l, int col, int x, int w,
2515  int row, int y, enum TxfmMode tx,
2516  int p, int ss_h, int ss_v, int bytesperpixel)
2517 {
2518  int have_top = row > 0 || y > 0;
2519  int have_left = col > s->tiling.tile_col_start || x > 0;
2520  int have_right = x < w - 1;
2521  int bpp = s->bpp;
2522  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2523  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2524  { DC_127_PRED, VERT_PRED } },
2525  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2526  { HOR_PRED, HOR_PRED } },
2527  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2528  { LEFT_DC_PRED, DC_PRED } },
2538  { DC_127_PRED, VERT_LEFT_PRED } },
2539  [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2540  { HOR_UP_PRED, HOR_UP_PRED } },
2541  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2542  { HOR_PRED, TM_VP8_PRED } },
2543  };
2544  static const struct {
2545  uint8_t needs_left:1;
2546  uint8_t needs_top:1;
2547  uint8_t needs_topleft:1;
2548  uint8_t needs_topright:1;
2549  uint8_t invert_left:1;
2550  } edges[N_INTRA_PRED_MODES] = {
2551  [VERT_PRED] = { .needs_top = 1 },
2552  [HOR_PRED] = { .needs_left = 1 },
2553  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2554  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2555  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2556  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2557  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2558  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2559  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2560  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2561  [LEFT_DC_PRED] = { .needs_left = 1 },
2562  [TOP_DC_PRED] = { .needs_top = 1 },
2563  [DC_128_PRED] = { 0 },
2564  [DC_127_PRED] = { 0 },
2565  [DC_129_PRED] = { 0 }
2566  };
2567 
2568  av_assert2(mode >= 0 && mode < 10);
2569  mode = mode_conv[mode][have_left][have_top];
2570  if (edges[mode].needs_top) {
2571  uint8_t *top, *topleft;
2572  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2573  int n_px_need_tr = 0;
2574 
2575  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2576  n_px_need_tr = 4;
2577 
2578  // if top of sb64-row, use s->intra_pred_data[] instead of
2579  // dst[-stride] for intra prediction (it contains pre- instead of
2580  // post-loopfilter data)
2581  if (have_top) {
2582  top = !(row & 7) && !y ?
2583  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2584  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2585  if (have_left)
2586  topleft = !(row & 7) && !y ?
2587  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2588  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2589  &dst_inner[-stride_inner];
2590  }
2591 
2592  if (have_top &&
2593  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2594  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2595  n_px_need + n_px_need_tr <= n_px_have) {
2596  *a = top;
2597  } else {
2598  if (have_top) {
2599  if (n_px_need <= n_px_have) {
2600  memcpy(*a, top, n_px_need * bytesperpixel);
2601  } else {
2602 #define memset_bpp(c, i1, v, i2, num) do { \
2603  if (bytesperpixel == 1) { \
2604  memset(&(c)[(i1)], (v)[(i2)], (num)); \
2605  } else { \
2606  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2607  for (n = 0; n < (num); n++) { \
2608  AV_WN16A(&(c)[((i1) + n) * 2], val); \
2609  } \
2610  } \
2611 } while (0)
2612  memcpy(*a, top, n_px_have * bytesperpixel);
2613  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2614  }
2615  } else {
2616 #define memset_val(c, val, num) do { \
2617  if (bytesperpixel == 1) { \
2618  memset((c), (val), (num)); \
2619  } else { \
2620  int n; \
2621  for (n = 0; n < (num); n++) { \
2622  AV_WN16A(&(c)[n * 2], (val)); \
2623  } \
2624  } \
2625 } while (0)
2626  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2627  }
2628  if (edges[mode].needs_topleft) {
2629  if (have_left && have_top) {
2630 #define assign_bpp(c, i1, v, i2) do { \
2631  if (bytesperpixel == 1) { \
2632  (c)[(i1)] = (v)[(i2)]; \
2633  } else { \
2634  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2635  } \
2636 } while (0)
2637  assign_bpp(*a, -1, topleft, -1);
2638  } else {
2639 #define assign_val(c, i, v) do { \
2640  if (bytesperpixel == 1) { \
2641  (c)[(i)] = (v); \
2642  } else { \
2643  AV_WN16A(&(c)[(i) * 2], (v)); \
2644  } \
2645 } while (0)
2646  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2647  }
2648  }
2649  if (tx == TX_4X4 && edges[mode].needs_topright) {
2650  if (have_top && have_right &&
2651  n_px_need + n_px_need_tr <= n_px_have) {
2652  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2653  } else {
2654  memset_bpp(*a, 4, *a, 3, 4);
2655  }
2656  }
2657  }
2658  }
2659  if (edges[mode].needs_left) {
2660  if (have_left) {
2661  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2662  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2663  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2664 
2665  if (edges[mode].invert_left) {
2666  if (n_px_need <= n_px_have) {
2667  for (i = 0; i < n_px_need; i++)
2668  assign_bpp(l, i, &dst[i * stride], -1);
2669  } else {
2670  for (i = 0; i < n_px_have; i++)
2671  assign_bpp(l, i, &dst[i * stride], -1);
2672  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2673  }
2674  } else {
2675  if (n_px_need <= n_px_have) {
2676  for (i = 0; i < n_px_need; i++)
2677  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2678  } else {
2679  for (i = 0; i < n_px_have; i++)
2680  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2681  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2682  }
2683  }
2684  } else {
2685  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2686  }
2687  }
2688 
2689  return mode;
2690 }
2691 
2692 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2693  ptrdiff_t uv_off, int bytesperpixel)
2694 {
2695  VP9Context *s = ctx->priv_data;
2696  VP9Block *b = s->b;
2697  int row = s->row, col = s->col;
2698  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2699  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2700  int end_x = FFMIN(2 * (s->cols - col), w4);
2701  int end_y = FFMIN(2 * (s->rows - row), h4);
2702  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2703  int uvstep1d = 1 << b->uvtx, p;
2704  uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2705  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2706  LOCAL_ALIGNED_32(uint8_t, l, [64]);
2707 
2708  for (n = 0, y = 0; y < end_y; y += step1d) {
2709  uint8_t *ptr = dst, *ptr_r = dst_r;
2710  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2711  ptr_r += 4 * step1d * bytesperpixel, n += step) {
2712  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2713  y * 2 + x : 0];
2714  uint8_t *a = &a_buf[32];
2715  enum TxfmType txtp = vp9_intra_txfm_type[mode];
2716  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2717 
2718  mode = check_intra_mode(s, mode, &a, ptr_r,
2719  s->frames[CUR_FRAME].tf.f->linesize[0],
2720  ptr, s->y_stride, l,
2721  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2722  s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2723  if (eob)
2724  s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2725  s->block + 16 * n * bytesperpixel, eob);
2726  }
2727  dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2728  dst += 4 * step1d * s->y_stride;
2729  }
2730 
2731  // U/V
2732  w4 >>= s->ss_h;
2733  end_x >>= s->ss_h;
2734  end_y >>= s->ss_v;
2735  step = 1 << (b->uvtx * 2);
2736  for (p = 0; p < 2; p++) {
2737  dst = s->dst[1 + p];
2738  dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2739  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2740  uint8_t *ptr = dst, *ptr_r = dst_r;
2741  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2742  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2743  int mode = b->uvmode;
2744  uint8_t *a = &a_buf[32];
2745  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2746 
2747  mode = check_intra_mode(s, mode, &a, ptr_r,
2748  s->frames[CUR_FRAME].tf.f->linesize[1],
2749  ptr, s->uv_stride, l, col, x, w4, row, y,
2750  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2751  s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2752  if (eob)
2753  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2754  s->uvblock[p] + 16 * n * bytesperpixel, eob);
2755  }
2756  dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2757  dst += 4 * uvstep1d * s->uv_stride;
2758  }
2759  }
2760 }
2761 
2762 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2763 {
2764  intra_recon(ctx, y_off, uv_off, 1);
2765 }
2766 
2767 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2768 {
2769  intra_recon(ctx, y_off, uv_off, 2);
2770 }
2771 
2773  uint8_t *dst, ptrdiff_t dst_stride,
2774  const uint8_t *ref, ptrdiff_t ref_stride,
2776  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2777  int px, int py, int pw, int ph,
2778  int bw, int bh, int w, int h, int bytesperpixel,
2779  const uint16_t *scale, const uint8_t *step)
2780 {
2781 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2782  int mx, my;
2783  int refbw_m1, refbh_m1;
2784  int th;
2785  VP56mv mv;
2786 
2787  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2788  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2789  // BUG libvpx seems to scale the two components separately. This introduces
2790  // rounding errors but we have to reproduce them to be exactly compatible
2791  // with the output from libvpx...
2792  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2793  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2794 
2795  y = my >> 4;
2796  x = mx >> 4;
2797  ref += y * ref_stride + x * bytesperpixel;
2798  mx &= 15;
2799  my &= 15;
2800  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2801  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2802  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2803  // we use +7 because the last 7 pixels of each sbrow can be changed in
2804  // the longest loopfilter of the next sbrow
2805  th = (y + refbh_m1 + 4 + 7) >> 6;
2806  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2807  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2809  ref - 3 * ref_stride - 3 * bytesperpixel,
2810  288, ref_stride,
2811  refbw_m1 + 8, refbh_m1 + 8,
2812  x - 3, y - 3, w, h);
2813  ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2814  ref_stride = 288;
2815  }
2816  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2817 }
2818 
2820  uint8_t *dst_u, uint8_t *dst_v,
2821  ptrdiff_t dst_stride,
2822  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2823  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2825  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2826  int px, int py, int pw, int ph,
2827  int bw, int bh, int w, int h, int bytesperpixel,
2828  const uint16_t *scale, const uint8_t *step)
2829 {
2830  int mx, my;
2831  int refbw_m1, refbh_m1;
2832  int th;
2833  VP56mv mv;
2834 
2835  if (s->ss_h) {
2836  // BUG https://code.google.com/p/webm/issues/detail?id=820
2837  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2838  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2839  } else {
2840  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2841  mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2842  }
2843  if (s->ss_v) {
2844  // BUG https://code.google.com/p/webm/issues/detail?id=820
2845  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2846  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2847  } else {
2848  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2849  my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2850  }
2851 #undef scale_mv
2852  y = my >> 4;
2853  x = mx >> 4;
2854  ref_u += y * src_stride_u + x * bytesperpixel;
2855  ref_v += y * src_stride_v + x * bytesperpixel;
2856  mx &= 15;
2857  my &= 15;
2858  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2859  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2860  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2861  // we use +7 because the last 7 pixels of each sbrow can be changed in
2862  // the longest loopfilter of the next sbrow
2863  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2864  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2865  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2867  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2868  288, src_stride_u,
2869  refbw_m1 + 8, refbh_m1 + 8,
2870  x - 3, y - 3, w, h);
2871  ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2872  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2873 
2875  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2876  288, src_stride_v,
2877  refbw_m1 + 8, refbh_m1 + 8,
2878  x - 3, y - 3, w, h);
2879  ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2880  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2881  } else {
2882  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2883  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2884  }
2885 }
2886 
2887 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2888  px, py, pw, ph, bw, bh, w, h, i) \
2889  mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2890  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2891  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2892 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2893  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2894  mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2895  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2896  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2897 #define SCALED 1
2898 #define FN(x) x##_scaled_8bpp
2899 #define BYTES_PER_PIXEL 1
2900 #include "vp9_mc_template.c"
2901 #undef FN
2902 #undef BYTES_PER_PIXEL
2903 #define FN(x) x##_scaled_16bpp
2904 #define BYTES_PER_PIXEL 2
2905 #include "vp9_mc_template.c"
2906 #undef mc_luma_dir
2907 #undef mc_chroma_dir
2908 #undef FN
2909 #undef BYTES_PER_PIXEL
2910 #undef SCALED
2911 
2913  uint8_t *dst, ptrdiff_t dst_stride,
2914  const uint8_t *ref, ptrdiff_t ref_stride,
2916  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2917  int bw, int bh, int w, int h, int bytesperpixel)
2918 {
2919  int mx = mv->x, my = mv->y, th;
2920 
2921  y += my >> 3;
2922  x += mx >> 3;
2923  ref += y * ref_stride + x * bytesperpixel;
2924  mx &= 7;
2925  my &= 7;
2926  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2927  // we use +7 because the last 7 pixels of each sbrow can be changed in
2928  // the longest loopfilter of the next sbrow
2929  th = (y + bh + 4 * !!my + 7) >> 6;
2930  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2931  if (x < !!mx * 3 || y < !!my * 3 ||
2932  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2934  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2935  160, ref_stride,
2936  bw + !!mx * 7, bh + !!my * 7,
2937  x - !!mx * 3, y - !!my * 3, w, h);
2938  ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2939  ref_stride = 160;
2940  }
2941  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2942 }
2943 
2945  uint8_t *dst_u, uint8_t *dst_v,
2946  ptrdiff_t dst_stride,
2947  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2948  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2950  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2951  int bw, int bh, int w, int h, int bytesperpixel)
2952 {
2953  int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2954 
2955  y += my >> 4;
2956  x += mx >> 4;
2957  ref_u += y * src_stride_u + x * bytesperpixel;
2958  ref_v += y * src_stride_v + x * bytesperpixel;
2959  mx &= 15;
2960  my &= 15;
2961  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2962  // we use +7 because the last 7 pixels of each sbrow can be changed in
2963  // the longest loopfilter of the next sbrow
2964  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2965  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2966  if (x < !!mx * 3 || y < !!my * 3 ||
2967  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2969  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2970  160, src_stride_u,
2971  bw + !!mx * 7, bh + !!my * 7,
2972  x - !!mx * 3, y - !!my * 3, w, h);
2973  ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2974  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2975 
2977  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2978  160, src_stride_v,
2979  bw + !!mx * 7, bh + !!my * 7,
2980  x - !!mx * 3, y - !!my * 3, w, h);
2981  ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2982  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2983  } else {
2984  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2985  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2986  }
2987 }
2988 
2989 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2990  px, py, pw, ph, bw, bh, w, h, i) \
2991  mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2992  mv, bw, bh, w, h, bytesperpixel)
2993 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2994  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2995  mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2996  row, col, mv, bw, bh, w, h, bytesperpixel)
2997 #define SCALED 0
2998 #define FN(x) x##_8bpp
2999 #define BYTES_PER_PIXEL 1
3000 #include "vp9_mc_template.c"
3001 #undef FN
3002 #undef BYTES_PER_PIXEL
3003 #define FN(x) x##_16bpp
3004 #define BYTES_PER_PIXEL 2
3005 #include "vp9_mc_template.c"
3006 #undef mc_luma_dir_dir
3007 #undef mc_chroma_dir_dir
3008 #undef FN
3009 #undef BYTES_PER_PIXEL
3010 #undef SCALED
3011 
3012 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3013 {
3014  VP9Context *s = ctx->priv_data;
3015  VP9Block *b = s->b;
3016  int row = s->row, col = s->col;
3017 
3018  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3019  if (bytesperpixel == 1) {
3020  inter_pred_scaled_8bpp(ctx);
3021  } else {
3022  inter_pred_scaled_16bpp(ctx);
3023  }
3024  } else {
3025  if (bytesperpixel == 1) {
3026  inter_pred_8bpp(ctx);
3027  } else {
3028  inter_pred_16bpp(ctx);
3029  }
3030  }
3031  if (!b->skip) {
3032  /* mostly copied intra_recon() */
3033 
3034  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3035  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3036  int end_x = FFMIN(2 * (s->cols - col), w4);
3037  int end_y = FFMIN(2 * (s->rows - row), h4);
3038  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3039  int uvstep1d = 1 << b->uvtx, p;
3040  uint8_t *dst = s->dst[0];
3041 
3042  // y itxfm add
3043  for (n = 0, y = 0; y < end_y; y += step1d) {
3044  uint8_t *ptr = dst;
3045  for (x = 0; x < end_x; x += step1d,
3046  ptr += 4 * step1d * bytesperpixel, n += step) {
3047  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3048 
3049  if (eob)
3050  s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3051  s->block + 16 * n * bytesperpixel, eob);
3052  }
3053  dst += 4 * s->y_stride * step1d;
3054  }
3055 
3056  // uv itxfm add
3057  end_x >>= s->ss_h;
3058  end_y >>= s->ss_v;
3059  step = 1 << (b->uvtx * 2);
3060  for (p = 0; p < 2; p++) {
3061  dst = s->dst[p + 1];
3062  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3063  uint8_t *ptr = dst;
3064  for (x = 0; x < end_x; x += uvstep1d,
3065  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3066  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3067 
3068  if (eob)
3069  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3070  s->uvblock[p] + 16 * n * bytesperpixel, eob);
3071  }
3072  dst += 4 * uvstep1d * s->uv_stride;
3073  }
3074  }
3075  }
3076 }
3077 
3079 {
3080  inter_recon(ctx, 1);
3081 }
3082 
3084 {
3085  inter_recon(ctx, 2);
3086 }
3087 
3088 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3089  int row_and_7, int col_and_7,
3090  int w, int h, int col_end, int row_end,
3091  enum TxfmMode tx, int skip_inter)
3092 {
3093  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3094  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3095 
3096  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3097  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3098  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3099  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3100 
3101  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3102  // edges. This means that for UV, we work on two subsampled blocks at
3103  // a time, and we only use the topleft block's mode information to set
3104  // things like block strength. Thus, for any block size smaller than
3105  // 16x16, ignore the odd portion of the block.
3106  if (tx == TX_4X4 && (ss_v | ss_h)) {
3107  if (h == ss_v) {
3108  if (row_and_7 & 1)
3109  return;
3110  if (!row_end)
3111  h += 1;
3112  }
3113  if (w == ss_h) {
3114  if (col_and_7 & 1)
3115  return;
3116  if (!col_end)
3117  w += 1;
3118  }
3119  }
3120 
3121  if (tx == TX_4X4 && !skip_inter) {
3122  int t = 1 << col_and_7, m_col = (t << w) - t, y;
3123  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3124  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3125 
3126  for (y = row_and_7; y < h + row_and_7; y++) {
3127  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3128 
3129  mask[0][y][1] |= m_row_8;
3130  mask[0][y][2] |= m_row_4;
3131  // for odd lines, if the odd col is not being filtered,
3132  // skip odd row also:
3133  // .---. <-- a
3134  // | |
3135  // |___| <-- b
3136  // ^ ^
3137  // c d
3138  //
3139  // if a/c are even row/col and b/d are odd, and d is skipped,
3140  // e.g. right edge of size-66x66.webm, then skip b also (bug)
3141  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3142  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3143  } else {
3144  mask[1][y][col_mask_id] |= m_col;
3145  }
3146  if (!ss_h)
3147  mask[0][y][3] |= m_col;
3148  if (!ss_v) {
3149  if (ss_h && (col_end & 1))
3150  mask[1][y][3] |= (t << (w - 1)) - t;
3151  else
3152  mask[1][y][3] |= m_col;
3153  }
3154  }
3155  } else {
3156  int y, t = 1 << col_and_7, m_col = (t << w) - t;
3157 
3158  if (!skip_inter) {
3159  int mask_id = (tx == TX_8X8);
3160  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3161  int l2 = tx + ss_h - 1, step1d;
3162  int m_row = m_col & masks[l2];
3163 
3164  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3165  // 8wd loopfilter to prevent going off the visible edge.
3166  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3167  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3168  int m_row_8 = m_row - m_row_16;
3169 
3170  for (y = row_and_7; y < h + row_and_7; y++) {
3171  mask[0][y][0] |= m_row_16;
3172  mask[0][y][1] |= m_row_8;
3173  }
3174  } else {
3175  for (y = row_and_7; y < h + row_and_7; y++)
3176  mask[0][y][mask_id] |= m_row;
3177  }
3178 
3179  l2 = tx + ss_v - 1;
3180  step1d = 1 << l2;
3181  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3182  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3183  mask[1][y][0] |= m_col;
3184  if (y - row_and_7 == h - 1)
3185  mask[1][y][1] |= m_col;
3186  } else {
3187  for (y = row_and_7; y < h + row_and_7; y += step1d)
3188  mask[1][y][mask_id] |= m_col;
3189  }
3190  } else if (tx != TX_4X4) {
3191  int mask_id;
3192 
3193  mask_id = (tx == TX_8X8) || (h == ss_v);
3194  mask[1][row_and_7][mask_id] |= m_col;
3195  mask_id = (tx == TX_8X8) || (w == ss_h);
3196  for (y = row_and_7; y < h + row_and_7; y++)
3197  mask[0][y][mask_id] |= t;
3198  } else {
3199  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3200 
3201  for (y = row_and_7; y < h + row_and_7; y++) {
3202  mask[0][y][2] |= t4;
3203  mask[0][y][1] |= t8;
3204  }
3205  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3206  }
3207  }
3208 }
3209 
3210 static void decode_b(AVCodecContext *ctx, int row, int col,
3211  struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3212  enum BlockLevel bl, enum BlockPartition bp)
3213 {
3214  VP9Context *s = ctx->priv_data;
3215  VP9Block *b = s->b;
3216  enum BlockSize bs = bl * 3 + bp;
3217  int bytesperpixel = s->bytesperpixel;
3218  int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3219  int emu[2];
3220  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3221 
3222  s->row = row;
3223  s->row7 = row & 7;
3224  s->col = col;
3225  s->col7 = col & 7;
3226  s->min_mv.x = -(128 + col * 64);
3227  s->min_mv.y = -(128 + row * 64);
3228  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3229  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3230  if (s->pass < 2) {
3231  b->bs = bs;
3232  b->bl = bl;
3233  b->bp = bp;
3234  decode_mode(ctx);
3235  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3236  (s->ss_v && h4 * 2 == (1 << b->tx)));
3237 
3238  if (!b->skip) {
3239  int has_coeffs;
3240 
3241  if (bytesperpixel == 1) {
3242  has_coeffs = decode_coeffs_8bpp(ctx);
3243  } else {
3244  has_coeffs = decode_coeffs_16bpp(ctx);
3245  }
3246  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3247  b->skip = 1;
3248  memset(&s->above_skip_ctx[col], 1, w4);
3249  memset(&s->left_skip_ctx[s->row7], 1, h4);
3250  }
3251  } else {
3252  int row7 = s->row7;
3253 
3254 #define SPLAT_ZERO_CTX(v, n) \
3255  switch (n) { \
3256  case 1: v = 0; break; \
3257  case 2: AV_ZERO16(&v); break; \
3258  case 4: AV_ZERO32(&v); break; \
3259  case 8: AV_ZERO64(&v); break; \
3260  case 16: AV_ZERO128(&v); break; \
3261  }
3262 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3263  do { \
3264  SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3265  if (s->ss_##dir2) { \
3266  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3267  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3268  } else { \
3269  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3270  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3271  } \
3272  } while (0)
3273 
3274  switch (w4) {
3275  case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3276  case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3277  case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3278  case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3279  }
3280  switch (h4) {
3281  case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3282  case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3283  case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3284  case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3285  }
3286  }
3287  if (s->pass == 1) {
3288  s->b++;
3289  s->block += w4 * h4 * 64 * bytesperpixel;
3290  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3291  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3292  s->eob += 4 * w4 * h4;
3293  s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3294  s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3295 
3296  return;
3297  }
3298  }
3299 
3300  // emulated overhangs if the stride of the target buffer can't hold. This
3301  // makes it possible to support emu-edge and so on even if we have large block
3302  // overhangs
3303  emu[0] = (col + w4) * 8 > f->linesize[0] ||
3304  (row + h4) > s->rows;
3305  emu[1] = (col + w4) * 4 > f->linesize[1] ||
3306  (row + h4) > s->rows;
3307  if (emu[0]) {
3308  s->dst[0] = s->tmp_y;
3309  s->y_stride = 128;
3310  } else {
3311  s->dst[0] = f->data[0] + yoff;
3312  s->y_stride = f->linesize[0];
3313  }
3314  if (emu[1]) {
3315  s->dst[1] = s->tmp_uv[0];
3316  s->dst[2] = s->tmp_uv[1];
3317  s->uv_stride = 128;
3318  } else {
3319  s->dst[1] = f->data[1] + uvoff;
3320  s->dst[2] = f->data[2] + uvoff;
3321  s->uv_stride = f->linesize[1];
3322  }
3323  if (b->intra) {
3324  if (s->bpp > 8) {
3325  intra_recon_16bpp(ctx, yoff, uvoff);
3326  } else {
3327  intra_recon_8bpp(ctx, yoff, uvoff);
3328  }
3329  } else {
3330  if (s->bpp > 8) {
3331  inter_recon_16bpp(ctx);
3332  } else {
3333  inter_recon_8bpp(ctx);
3334  }
3335  }
3336  if (emu[0]) {
3337  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3338 
3339  for (n = 0; o < w; n++) {
3340  int bw = 64 >> n;
3341 
3342  av_assert2(n <= 4);
3343  if (w & bw) {
3344  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3345  s->tmp_y + o, 128, h, 0, 0);
3346  o += bw * bytesperpixel;
3347  }
3348  }
3349  }
3350  if (emu[1]) {
3351  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3352  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3353 
3354  for (n = s->ss_h; o < w; n++) {
3355  int bw = 64 >> n;
3356 
3357  av_assert2(n <= 4);
3358  if (w & bw) {
3359  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3360  s->tmp_uv[0] + o, 128, h, 0, 0);
3361  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3362  s->tmp_uv[1] + o, 128, h, 0, 0);
3363  o += bw * bytesperpixel;
3364  }
3365  }
3366  }
3367 
3368  // pick filter level and find edges to apply filter to
3369  if (s->filter.level &&
3370  (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3371  [b->mode[3] != ZEROMV]) > 0) {
3372  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3373  int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3374 
3375  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3376  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3377  if (s->ss_h || s->ss_v)
3378  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3379  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3380  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3381  b->uvtx, skip_inter);
3382 
3383  if (!s->filter.lim_lut[lvl]) {
3384  int sharp = s->filter.sharpness;
3385  int limit = lvl;
3386 
3387  if (sharp > 0) {
3388  limit >>= (sharp + 3) >> 2;
3389  limit = FFMIN(limit, 9 - sharp);
3390  }
3391  limit = FFMAX(limit, 1);
3392 
3393  s->filter.lim_lut[lvl] = limit;
3394  s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3395  }
3396  }
3397 
3398  if (s->pass == 2) {
3399  s->b++;
3400  s->block += w4 * h4 * 64 * bytesperpixel;
3401  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3402  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3403  s->eob += 4 * w4 * h4;
3404  s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3405  s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3406  }
3407 }
3408 
3409 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3410  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3411 {
3412  VP9Context *s = ctx->priv_data;
3413  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3414  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3415  const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3416  s->prob.p.partition[bl][c];
3417  enum BlockPartition bp;
3418  ptrdiff_t hbs = 4 >> bl;
3419  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3420  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3421  int bytesperpixel = s->bytesperpixel;
3422 
3423  if (bl == BL_8X8) {
3424  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3425  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3426  } else if (col + hbs < s->cols) { // FIXME why not <=?
3427  if (row + hbs < s->rows) { // FIXME why not <=?
3428  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3429  switch (bp) {
3430  case PARTITION_NONE:
3431  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3432  break;
3433  case PARTITION_H:
3434  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3435  yoff += hbs * 8 * y_stride;
3436  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3437  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3438  break;
3439  case PARTITION_V:
3440  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3441  yoff += hbs * 8 * bytesperpixel;
3442  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3443  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3444  break;
3445  case PARTITION_SPLIT:
3446  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3447  decode_sb(ctx, row, col + hbs, lflvl,
3448  yoff + 8 * hbs * bytesperpixel,
3449  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3450  yoff += hbs * 8 * y_stride;
3451  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3452  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3453  decode_sb(ctx, row + hbs, col + hbs, lflvl,
3454  yoff + 8 * hbs * bytesperpixel,
3455  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3456  break;
3457  default:
3458  av_assert0(0);
3459  }
3460  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3461  bp = PARTITION_SPLIT;
3462  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3463  decode_sb(ctx, row, col + hbs, lflvl,
3464  yoff + 8 * hbs * bytesperpixel,
3465  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3466  } else {
3467  bp = PARTITION_H;
3468  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3469  }
3470  } else if (row + hbs < s->rows) { // FIXME why not <=?
3471  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3472  bp = PARTITION_SPLIT;
3473  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3474  yoff += hbs * 8 * y_stride;
3475  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3476  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3477  } else {
3478  bp = PARTITION_V;
3479  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3480  }
3481  } else {
3482  bp = PARTITION_SPLIT;
3483  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3484  }
3485  s->counts.partition[bl][c][bp]++;
3486 }
3487 
3488 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3489  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3490 {
3491  VP9Context *s = ctx->priv_data;
3492  VP9Block *b = s->b;
3493  ptrdiff_t hbs = 4 >> bl;
3494  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3495  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3496  int bytesperpixel = s->bytesperpixel;
3497 
3498  if (bl == BL_8X8) {
3499  av_assert2(b->bl == BL_8X8);
3500  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3501  } else if (s->b->bl == bl) {
3502  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3503  if (b->bp == PARTITION_H && row + hbs < s->rows) {
3504  yoff += hbs * 8 * y_stride;
3505  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3506  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3507  } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3508  yoff += hbs * 8 * bytesperpixel;
3509  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3510  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3511  }
3512  } else {
3513  decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3514  if (col + hbs < s->cols) { // FIXME why not <=?
3515  if (row + hbs < s->rows) {
3516  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3517  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3518  yoff += hbs * 8 * y_stride;
3519  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3520  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3521  decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3522  yoff + 8 * hbs * bytesperpixel,
3523  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3524  } else {
3525  yoff += hbs * 8 * bytesperpixel;
3526  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3527  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3528  }
3529  } else if (row + hbs < s->rows) {
3530  yoff += hbs * 8 * y_stride;
3531  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3532  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3533  }
3534  }
3535 }
3536 
3537 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3538  uint8_t *lvl, uint8_t (*mask)[4],
3539  uint8_t *dst, ptrdiff_t ls)
3540 {
3541  int y, x, bytesperpixel = s->bytesperpixel;
3542 
3543  // filter edges between columns (e.g. block1 | block2)
3544  for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3545  uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3546  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3547  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3548  unsigned hm = hm1 | hm2 | hm13 | hm23;
3549 
3550  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3551  if (col || x > 1) {
3552  if (hm1 & x) {
3553  int L = *l, H = L >> 4;
3554  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3555 
3556  if (hmask1[0] & x) {
3557  if (hmask2[0] & x) {
3558  av_assert2(l[8 << ss_v] == L);
3559  s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3560  } else {
3561  s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3562  }
3563  } else if (hm2 & x) {
3564  L = l[8 << ss_v];
3565  H |= (L >> 4) << 8;
3566  E |= s->filter.mblim_lut[L] << 8;
3567  I |= s->filter.lim_lut[L] << 8;
3568  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3569  [!!(hmask2[1] & x)]
3570  [0](ptr, ls, E, I, H);
3571  } else {
3572  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3573  [0](ptr, ls, E, I, H);
3574  }
3575  } else if (hm2 & x) {
3576  int L = l[8 << ss_v], H = L >> 4;
3577  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3578 
3579  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3580  [0](ptr + 8 * ls, ls, E, I, H);
3581  }
3582  }
3583  if (ss_h) {
3584  if (x & 0xAA)
3585  l += 2;
3586  } else {
3587  if (hm13 & x) {
3588  int L = *l, H = L >> 4;
3589  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3590 
3591  if (hm23 & x) {
3592  L = l[8 << ss_v];
3593  H |= (L >> 4) << 8;
3594  E |= s->filter.mblim_lut[L] << 8;
3595  I |= s->filter.lim_lut[L] << 8;
3596  s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3597  } else {
3598  s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3599  }
3600  } else if (hm23 & x) {
3601  int L = l[8 << ss_v], H = L >> 4;
3602  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3603 
3604  s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3605  }
3606  l++;
3607  }
3608  }
3609  }
3610 }
3611 
3612 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3613  uint8_t *lvl, uint8_t (*mask)[4],
3614  uint8_t *dst, ptrdiff_t ls)
3615 {
3616  int y, x, bytesperpixel = s->bytesperpixel;
3617 
3618  // block1
3619  // filter edges between rows (e.g. ------)
3620  // block2
3621  for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3622  uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3623  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3624 
3625  for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3626  if (row || y) {
3627  if (vm & x) {
3628  int L = *l, H = L >> 4;
3629  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3630 
3631  if (vmask[0] & x) {
3632  if (vmask[0] & (x << (1 + ss_h))) {
3633  av_assert2(l[1 + ss_h] == L);
3634  s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3635  } else {
3636  s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3637  }
3638  } else if (vm & (x << (1 + ss_h))) {
3639  L = l[1 + ss_h];
3640  H |= (L >> 4) << 8;
3641  E |= s->filter.mblim_lut[L] << 8;
3642  I |= s->filter.lim_lut[L] << 8;
3643  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3644  [!!(vmask[1] & (x << (1 + ss_h)))]
3645  [1](ptr, ls, E, I, H);
3646  } else {
3647  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3648  [1](ptr, ls, E, I, H);
3649  }
3650  } else if (vm & (x << (1 + ss_h))) {
3651  int L = l[1 + ss_h], H = L >> 4;
3652  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3653 
3654  s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3655  [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3656  }
3657  }
3658  if (!ss_v) {
3659  if (vm3 & x) {
3660  int L = *l, H = L >> 4;
3661  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3662 
3663  if (vm3 & (x << (1 + ss_h))) {
3664  L = l[1 + ss_h];
3665  H |= (L >> 4) << 8;
3666  E |= s->filter.mblim_lut[L] << 8;
3667  I |= s->filter.lim_lut[L] << 8;
3668  s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3669  } else {
3670  s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3671  }
3672  } else if (vm3 & (x << (1 + ss_h))) {
3673  int L = l[1 + ss_h], H = L >> 4;
3674  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3675 
3676  s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3677  }
3678  }
3679  }
3680  if (ss_v) {
3681  if (y & 1)
3682  lvl += 16;
3683  } else {
3684  lvl += 8;
3685  }
3686  }
3687 }
3688 
3689 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3690  int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3691 {
3692  VP9Context *s = ctx->priv_data;
3693  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3694  uint8_t *dst = f->data[0] + yoff;
3695  ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3696  uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3697  int p;
3698 
3699  // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3700  // if you think of them as acting on a 8x8 block max, we can interleave
3701  // each v/h within the single x loop, but that only works if we work on
3702  // 8 pixel blocks, and we won't always do that (we want at least 16px
3703  // to use SSE2 optimizations, perhaps 32 for AVX2)
3704 
3705  filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3706  filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3707 
3708  for (p = 0; p < 2; p++) {
3709  dst = f->data[1 + p] + uvoff;
3710  filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3711  filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3712  }
3713 }
3714 
3715 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3716 {
3717  int sb_start = ( idx * n) >> log2_n;
3718  int sb_end = ((idx + 1) * n) >> log2_n;
3719  *start = FFMIN(sb_start, n) << 3;
3720  *end = FFMIN(sb_end, n) << 3;
3721 }
3722 
3723 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3724  int max_count, int update_factor)
3725 {
3726  unsigned ct = ct0 + ct1, p2, p1;
3727 
3728  if (!ct)
3729  return;
3730 
3731  update_factor = FASTDIV(update_factor * FFMIN(ct, max_count), max_count);
3732  p1 = *p;
3733  p2 = ((((int64_t) ct0) << 8) + (ct >> 1)) / ct;
3734  p2 = av_clip(p2, 1, 255);
3735 
3736  // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3737  *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3738 }
3739 
3740 static void adapt_probs(VP9Context *s)
3741 {
3742  int i, j, k, l, m;
3743  prob_context *p = &s->prob_ctx[s->framectxid].p;
3744  int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3745 
3746  // coefficients
3747  for (i = 0; i < 4; i++)
3748  for (j = 0; j < 2; j++)
3749  for (k = 0; k < 2; k++)
3750  for (l = 0; l < 6; l++)
3751  for (m = 0; m < 6; m++) {
3752  uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3753  unsigned *e = s->counts.eob[i][j][k][l][m];
3754  unsigned *c = s->counts.coef[i][j][k][l][m];
3755 
3756  if (l == 0 && m >= 3) // dc only has 3 pt
3757  break;
3758 
3759  adapt_prob(&pp[0], e[0], e[1], 24, uf);
3760  adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3761  adapt_prob(&pp[2], c[1], c[2], 24, uf);
3762  }
3763 
3764  if (s->keyframe || s->intraonly) {
3765  memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3766  memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3767  memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3768  memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3769  return;
3770  }
3771 
3772  // skip flag
3773  for (i = 0; i < 3; i++)
3774  adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3775 
3776  // intra/inter flag
3777  for (i = 0; i < 4; i++)
3778  adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3779 
3780  // comppred flag
3781  if (s->comppredmode == PRED_SWITCHABLE) {
3782  for (i = 0; i < 5; i++)
3783  adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3784  }
3785 
3786  // reference frames
3787  if (s->comppredmode != PRED_SINGLEREF) {
3788  for (i = 0; i < 5; i++)
3789  adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3790  s->counts.comp_ref[i][1], 20, 128);
3791  }
3792 
3793  if (s->comppredmode != PRED_COMPREF) {
3794  for (i = 0; i < 5; i++) {
3795  uint8_t *pp = p->single_ref[i];
3796  unsigned (*c)[2] = s->counts.single_ref[i];
3797 
3798  adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3799  adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3800  }
3801  }
3802 
3803  // block partitioning
3804  for (i = 0; i < 4; i++)
3805  for (j = 0; j < 4; j++) {
3806  uint8_t *pp = p->partition[i][j];
3807  unsigned *c = s->counts.partition[i][j];
3808 
3809  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3810  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3811  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3812  }
3813 
3814  // tx size
3815  if (s->txfmmode == TX_SWITCHABLE) {
3816  for (i = 0; i < 2; i++) {
3817  unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3818 
3819  adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3820  adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3821  adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3822  adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3823  adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3824  adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3825  }
3826  }
3827 
3828  // interpolation filter
3829  if (s->filtermode == FILTER_SWITCHABLE) {
3830  for (i = 0; i < 4; i++) {
3831  uint8_t *pp = p->filter[i];
3832  unsigned *c = s->counts.filter[i];
3833 
3834  adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3835  adapt_prob(&pp[1], c[1], c[2], 20, 128);
3836  }
3837  }
3838 
3839  // inter modes
3840  for (i = 0; i < 7; i++) {
3841  uint8_t *pp = p->mv_mode[i];
3842  unsigned *c = s->counts.mv_mode[i];
3843 
3844  adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3845  adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3846  adapt_prob(&pp[2], c[1], c[3], 20, 128);
3847  }
3848 
3849  // mv joints
3850  {
3851  uint8_t *pp = p->mv_joint;
3852  unsigned *c = s->counts.mv_joint;
3853 
3854  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3855  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3856  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3857  }
3858 
3859  // mv components
3860  for (i = 0; i < 2; i++) {
3861  uint8_t *pp;
3862  unsigned *c, (*c2)[2], sum;
3863 
3864  adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3865  s->counts.mv_comp[i].sign[1], 20, 128);
3866 
3867  pp = p->mv_comp[i].classes;
3868  c = s->counts.mv_comp[i].classes;
3869  sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3870  adapt_prob(&pp[0], c[0], sum, 20, 128);
3871  sum -= c[1];
3872  adapt_prob(&pp[1], c[1], sum, 20, 128);
3873  sum -= c[2] + c[3];
3874  adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3875  adapt_prob(&pp[3], c[2], c[3], 20, 128);
3876  sum -= c[4] + c[5];
3877  adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3878  adapt_prob(&pp[5], c[4], c[5], 20, 128);
3879  sum -= c[6];
3880  adapt_prob(&pp[6], c[6], sum, 20, 128);
3881  adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3882  adapt_prob(&pp[8], c[7], c[8], 20, 128);
3883  adapt_prob(&pp[9], c[9], c[10], 20, 128);
3884 
3885  adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3886  s->counts.mv_comp[i].class0[1], 20, 128);
3887  pp = p->mv_comp[i].bits;
3888  c2 = s->counts.mv_comp[i].bits;
3889  for (j = 0; j < 10; j++)
3890  adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3891 
3892  for (j = 0; j < 2; j++) {
3893  pp = p->mv_comp[i].class0_fp[j];
3894  c = s->counts.mv_comp[i].class0_fp[j];
3895  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3896  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3897  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3898  }
3899  pp = p->mv_comp[i].fp;
3900  c = s->counts.mv_comp[i].fp;
3901  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3902  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3903  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3904 
3905  if (s->highprecisionmvs) {
3906  adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3907  s->counts.mv_comp[i].class0_hp[1], 20, 128);
3908  adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3909  s->counts.mv_comp[i].hp[1], 20, 128);
3910  }
3911  }
3912 
3913  // y intra modes
3914  for (i = 0; i < 4; i++) {
3915  uint8_t *pp = p->y_mode[i];
3916  unsigned *c = s->counts.y_mode[i], sum, s2;
3917 
3918  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3919  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3920  sum -= c[TM_VP8_PRED];
3921  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3922  sum -= c[VERT_PRED];
3923  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3924  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3925  sum -= s2;
3926  adapt_prob(&pp[3], s2, sum, 20, 128);
3927  s2 -= c[HOR_PRED];
3928  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3929  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3930  sum -= c[DIAG_DOWN_LEFT_PRED];
3931  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3932  sum -= c[VERT_LEFT_PRED];
3933  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3934  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3935  }
3936 
3937  // uv intra modes
3938  for (i = 0; i < 10; i++) {
3939  uint8_t *pp = p->uv_mode[i];
3940  unsigned *c = s->counts.uv_mode[i], sum, s2;
3941 
3942  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3943  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3944  sum -= c[TM_VP8_PRED];
3945  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3946  sum -= c[VERT_PRED];
3947  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3948  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3949  sum -= s2;
3950  adapt_prob(&pp[3], s2, sum, 20, 128);
3951  s2 -= c[HOR_PRED];
3952  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3953  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3954  sum -= c[DIAG_DOWN_LEFT_PRED];
3955  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3956  sum -= c[VERT_LEFT_PRED];
3957  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3958  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3959  }
3960 }
3961 
3962 static void free_buffers(VP9Context *s)
3963 {
3964  av_freep(&s->intra_pred_data[0]);
3965  av_freep(&s->b_base);
3966  av_freep(&s->block_base);
3967 }
3968 
3970 {
3971  VP9Context *s = ctx->priv_data;
3972  int i;
3973 
3974  for (i = 0; i < 3; i++) {
3975  if (s->frames[i].tf.f->data[0])
3976  vp9_unref_frame(ctx, &s->frames[i]);
3977  av_frame_free(&s->frames[i].tf.f);
3978  }
3979  for (i = 0; i < 8; i++) {
3980  if (s->refs[i].f->data[0])
3981  ff_thread_release_buffer(ctx, &s->refs[i]);
3982  av_frame_free(&s->refs[i].f);
3983  if (s->next_refs[i].f->data[0])
3984  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3985  av_frame_free(&s->next_refs[i].f);
3986  }
3987  free_buffers(s);
3988  av_freep(&s->c_b);
3989  s->c_b_size = 0;
3990 
3991  return 0;
3992 }
3993 
3994 
3995 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3996  int *got_frame, AVPacket *pkt)
3997 {
3998  const uint8_t *data = pkt->data;
3999  int size = pkt->size;
4000  VP9Context *s = ctx->priv_data;
4001  int res, tile_row, tile_col, i, ref, row, col;
4002  int retain_segmap_ref = s->frames[REF_FRAME_SEGMAP].segmentation_map &&
4004  ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4005  AVFrame *f;
4006  int bytesperpixel;
4007 
4008  if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4009  return res;
4010  } else if (res == 0) {
4011  if (!s->refs[ref].f->data[0]) {
4012  av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4013  return AVERROR_INVALIDDATA;
4014  }
4015  if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4016  return res;
4017  ((AVFrame *)frame)->pkt_pts = pkt->pts;
4018  ((AVFrame *)frame)->pkt_dts = pkt->dts;
4019  for (i = 0; i < 8; i++) {
4020  if (s->next_refs[i].f->data[0])
4021  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4022  if (s->refs[i].f->data[0] &&
4023  (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4024  return res;
4025  }
4026  *got_frame = 1;
4027  return pkt->size;
4028  }
4029  data += res;
4030  size -= res;
4031 
4032  if (!retain_segmap_ref || s->keyframe || s->intraonly) {
4033  if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4035  if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4036  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4037  return res;
4038  }
4039  if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4041  if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4042  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4043  return res;
4044  if (s->frames[CUR_FRAME].tf.f->data[0])
4045  vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4046  if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4047  return res;
4048  f = s->frames[CUR_FRAME].tf.f;
4049  f->key_frame = s->keyframe;
4051  ls_y = f->linesize[0];
4052  ls_uv =f->linesize[1];
4053 
4054  // ref frame setup
4055  for (i = 0; i < 8; i++) {
4056  if (s->next_refs[i].f->data[0])
4057  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4058  if (s->refreshrefmask & (1 << i)) {
4059  res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4060  } else if (s->refs[i].f->data[0]) {
4061  res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4062  }
4063  if (res < 0)
4064  return res;
4065  }
4066 
4067  // main tile decode loop
4068  bytesperpixel = s->bytesperpixel;
4069  memset(s->above_partition_ctx, 0, s->cols);
4070  memset(s->above_skip_ctx, 0, s->cols);
4071  if (s->keyframe || s->intraonly) {
4072  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4073  } else {
4074  memset(s->above_mode_ctx, NEARESTMV, s->cols);
4075  }
4076  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4077  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4078  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4079  memset(s->above_segpred_ctx, 0, s->cols);
4080  s->pass = s->frames[CUR_FRAME].uses_2pass =
4082  if ((res = update_block_buffers(ctx)) < 0) {
4083  av_log(ctx, AV_LOG_ERROR,
4084  "Failed to allocate block buffers\n");
4085  return res;
4086  }
4087  if (s->refreshctx && s->parallelmode) {
4088  int j, k, l, m;
4089 
4090  for (i = 0; i < 4; i++) {
4091  for (j = 0; j < 2; j++)
4092  for (k = 0; k < 2; k++)
4093  for (l = 0; l < 6; l++)
4094  for (m = 0; m < 6; m++)
4095  memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4096  s->prob.coef[i][j][k][l][m], 3);
4097  if (s->txfmmode == i)
4098  break;
4099  }
4100  s->prob_ctx[s->framectxid].p = s->prob.p;
4102  } else if (!s->refreshctx) {
4104  }
4105 
4106  do {
4107  yoff = uvoff = 0;
4108  s->b = s->b_base;
4109  s->block = s->block_base;
4110  s->uvblock[0] = s->uvblock_base[0];
4111  s->uvblock[1] = s->uvblock_base[1];
4112  s->eob = s->eob_base;
4113  s->uveob[0] = s->uveob_base[0];
4114  s->uveob[1] = s->uveob_base[1];
4115 
4116  for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4118  tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4119  if (s->pass != 2) {
4120  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4121  int64_t tile_size;
4122 
4123  if (tile_col == s->tiling.tile_cols - 1 &&
4124  tile_row == s->tiling.tile_rows - 1) {
4125  tile_size = size;
4126  } else {
4127  tile_size = AV_RB32(data);
4128  data += 4;
4129  size -= 4;
4130  }
4131  if (tile_size > size) {
4132  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4133  return AVERROR_INVALIDDATA;
4134  }
4135  res = ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4136  if (res < 0)
4137  return res;
4138  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4139  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4140  return AVERROR_INVALIDDATA;
4141  }
4142  data += tile_size;
4143  size -= tile_size;
4144  }
4145  }
4146 
4147  for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4148  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4149  struct VP9Filter *lflvl_ptr = s->lflvl;
4150  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4151 
4152  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4154  tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4155 
4156  if (s->pass != 2) {
4157  memset(s->left_partition_ctx, 0, 8);
4158  memset(s->left_skip_ctx, 0, 8);
4159  if (s->keyframe || s->intraonly) {
4160  memset(s->left_mode_ctx, DC_PRED, 16);
4161  } else {
4162  memset(s->left_mode_ctx, NEARESTMV, 8);
4163  }
4164  memset(s->left_y_nnz_ctx, 0, 16);
4165  memset(s->left_uv_nnz_ctx, 0, 32);
4166  memset(s->left_segpred_ctx, 0, 8);
4167 
4168  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4169  }
4170 
4171  for (col = s->tiling.tile_col_start;
4172  col < s->tiling.tile_col_end;
4173  col += 8, yoff2 += 64 * bytesperpixel,
4174  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4175  // FIXME integrate with lf code (i.e. zero after each
4176  // use, similar to invtxfm coefficients, or similar)
4177  if (s->pass != 1) {
4178  memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4179  }
4180 
4181  if (s->pass == 2) {
4182  decode_sb_mem(ctx, row, col, lflvl_ptr,
4183  yoff2, uvoff2, BL_64X64);
4184  } else {
4185  decode_sb(ctx, row, col, lflvl_ptr,
4186  yoff2, uvoff2, BL_64X64);
4187  }
4188  }
4189  if (s->pass != 2) {
4190  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4191  }
4192  }
4193 
4194  if (s->pass == 1) {
4195  continue;
4196  }
4197 
4198  // backup pre-loopfilter reconstruction data for intra
4199  // prediction of next row of sb64s
4200  if (row + 8 < s->rows) {
4201  memcpy(s->intra_pred_data[0],
4202  f->data[0] + yoff + 63 * ls_y,
4203  8 * s->cols * bytesperpixel);
4204  memcpy(s->intra_pred_data[1],
4205  f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4206  8 * s->cols * bytesperpixel >> s->ss_h);
4207  memcpy(s->intra_pred_data[2],
4208  f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4209  8 * s->cols * bytesperpixel >> s->ss_h);
4210  }
4211 
4212  // loopfilter one row
4213  if (s->filter.level) {
4214  yoff2 = yoff;
4215  uvoff2 = uvoff;
4216  lflvl_ptr = s->lflvl;
4217  for (col = 0; col < s->cols;
4218  col += 8, yoff2 += 64 * bytesperpixel,
4219  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4220  loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4221  }
4222  }
4223 
4224  // FIXME maybe we can make this more finegrained by running the
4225  // loopfilter per-block instead of after each sbrow
4226  // In fact that would also make intra pred left preparation easier?
4227  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4228  }
4229  }
4230 
4231  if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4232  adapt_probs(s);
4234  }
4235  } while (s->pass++ == 1);
4236  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4237 
4238  // ref frame setup
4239  for (i = 0; i < 8; i++) {
4240  if (s->refs[i].f->data[0])
4241  ff_thread_release_buffer(ctx, &s->refs[i]);
4242  ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4243  }
4244 
4245  if (!s->invisible) {
4246  if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4247  return res;
4248  *got_frame = 1;
4249  }
4250 
4251  return pkt->size;
4252 }
4253 
4255 {
4256  VP9Context *s = ctx->priv_data;
4257  int i;
4258 
4259  for (i = 0; i < 3; i++)
4260  vp9_unref_frame(ctx, &s->frames[i]);
4261  for (i = 0; i < 8; i++)
4262  ff_thread_release_buffer(ctx, &s->refs[i]);
4263 }
4264 
4265 static int init_frames(AVCodecContext *ctx)
4266 {
4267  VP9Context *s = ctx->priv_data;
4268  int i;
4269 
4270  for (i = 0; i < 3; i++) {
4271  s->frames[i].tf.f = av_frame_alloc();
4272  if (!s->frames[i].tf.f) {
4273  vp9_decode_free(ctx);
4274  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4275  return AVERROR(ENOMEM);
4276  }
4277  }
4278  for (i = 0; i < 8; i++) {
4279  s->refs[i].f = av_frame_alloc();
4280  s->next_refs[i].f = av_frame_alloc();
4281  if (!s->refs[i].f || !s->next_refs[i].f) {
4282  vp9_decode_free(ctx);
4283  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4284  return AVERROR(ENOMEM);
4285  }
4286  }
4287 
4288  return 0;
4289 }
4290 
4292 {
4293  VP9Context *s = ctx->priv_data;
4294 
4295  ctx->internal->allocate_progress = 1;
4296  s->last_bpp = 0;
4297  s->filter.sharpness = -1;
4298 
4299  return init_frames(ctx);
4300 }
4301 
4303 {
4304  return init_frames(avctx);
4305 }
4306 
4308 {
4309  int i, res;
4310  VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4311 
4312  // detect size changes in other threads
4313  if (s->intra_pred_data[0] &&
4314  (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4315  free_buffers(s);
4316  }
4317 
4318  for (i = 0; i < 3; i++) {
4319  if (s->frames[i].tf.f->data[0])
4320  vp9_unref_frame(dst, &s->frames[i]);
4321  if (ssrc->frames[i].tf.f->data[0]) {
4322  if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4323  return res;
4324  }
4325  }
4326  for (i = 0; i < 8; i++) {
4327  if (s->refs[i].f->data[0])
4328  ff_thread_release_buffer(dst, &s->refs[i]);
4329  if (ssrc->next_refs[i].f->data[0]) {
4330  if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4331  return res;
4332  }
4333  }
4334 
4335  s->invisible = ssrc->invisible;
4336  s->keyframe = ssrc->keyframe;
4337  s->intraonly = ssrc->intraonly;
4338  s->ss_v = ssrc->ss_v;
4339  s->ss_h = ssrc->ss_h;
4340  s->segmentation.enabled = ssrc->segmentation.enabled;
4341  s->segmentation.update_map = ssrc->segmentation.update_map;
4342  s->bytesperpixel = ssrc->bytesperpixel;
4343  s->bpp = ssrc->bpp;
4344  s->bpp_index = ssrc->bpp_index;
4345  memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4346  memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4347  if (ssrc->segmentation.enabled) {
4348  memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4349  sizeof(s->segmentation.feat));
4350  }
4351 
4352  return 0;
4353 }
4354 
4355 static const AVProfile profiles[] = {
4356  { FF_PROFILE_VP9_0, "Profile 0" },
4357  { FF_PROFILE_VP9_1, "Profile 1" },
4358  { FF_PROFILE_VP9_2, "Profile 2" },
4359  { FF_PROFILE_VP9_3, "Profile 3" },
4360  { FF_PROFILE_UNKNOWN },
4361 };
4362 
4364  .name = "vp9",
4365  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4366  .type = AVMEDIA_TYPE_VIDEO,
4367  .id = AV_CODEC_ID_VP9,
4368  .priv_data_size = sizeof(VP9Context),
4369  .init = vp9_decode_init,
4370  .close = vp9_decode_free,
4372  .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4376  .profiles = NULL_IF_CONFIG_SMALL(profiles),
4377 };
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
Definition: pixfmt.h:519
ThreadFrame tf
Definition: vp9.c:74
BlockPartition
Definition: vp9data.h:29
CompPredMode
Definition: vp9.c:38
#define NULL
Definition: coverity.c:32
uint8_t skip[3]
Definition: vp9data.h:1455
uint8_t resetctx
Definition: vp9.c:118
VP9Frame frames[3]
Definition: vp9.c:134
const char const char void * val
Definition: avisynth_c.h:634
Definition: vp9.c:54
unsigned hp[2]
Definition: vp9.c:207
Definition: vp9.h:47
float v
const char * s
Definition: avisynth_c.h:631
uint8_t lossless
Definition: vp9.c:149
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
uint8_t * segmentation_map
Definition: vp9.c:76
#define AV_PIX_FMT_YUV440P10
Definition: pixfmt.h:382
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it...
Definition: buffer.c:124
static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v, uint8_t *lvl, uint8_t(*mask)[4], uint8_t *dst, ptrdiff_t ls)
Definition: vp9.c:3537
This structure describes decoded (raw) audio or video data.
Definition: frame.h:171
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
Definition: vp9.c:3715
unsigned comp_ref[5][2]
Definition: vp9.c:193
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
static void flush(AVCodecContext *avctx)
Definition: vp9.c:53
uint8_t mblim_lut[64]
Definition: vp9.c:140
uint8_t left_segpred_ctx[8]
Definition: vp9.c:224
VP5 and VP6 compatible video decoder (common features)
static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
Definition: vp9.c:415
static const int8_t vp9_segmentation_tree[7][2]
Definition: vp9data.h:66
struct VP9Context::@107::@113 feat[MAX_SEGMENT]
static const uint8_t vp9_model_pareto8[256][8]
Definition: vp9data.h:1184
uint8_t * above_skip_ctx
Definition: vp9.c:234
uint8_t * eob_base
Definition: vp9.c:251
uint8_t comp[5]
Definition: vp9data.h:1449
const char * fmt
Definition: avisynth_c.h:632
static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v, uint8_t *lvl, uint8_t(*mask)[4], uint8_t *dst, ptrdiff_t ls)
Definition: vp9.c:3612
uint8_t mvstep[3][2]
Definition: vp9.c:256
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:68
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:261
uint8_t fp[3]
Definition: vp9data.h:1463
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:182
AVFrame * f
Definition: thread.h:36
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:379
Definition: vp9.c:56
int row
Definition: vp9.c:106
#define INVALID_MV
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
uint8_t tx32p[2][3]
Definition: vp9data.h:1452
#define SPLAT_CTX(var, val, n)
static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
Definition: vp9.c:269
static const uint8_t vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.h:88
static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2294
also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / IEC 61966-2-4 xvYCC601 ...
Definition: pixfmt.h:523
VideoDSPContext vdsp
Definition: vp9.c:99
int8_t uvdc_qdelta
Definition: vp9.c:148
static const int8_t vp9_mv_fp_tree[3][2]
Definition: vp9data.h:2285
static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9.c:2511
uint8_t tx16p[2][2]
Definition: vp9data.h:1453
uint8_t last_keyframe
Definition: vp9.c:111
enum AVColorRange color_range
MPEG vs JPEG YUV range.
Definition: avcodec.h:2247
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:188
uint8_t ss_v
Definition: vp9.c:116
#define SET_CTXS(dir, off, n)
int size
Definition: avcodec.h:1434
const char * b
Definition: vf_curves.c:109
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:396
static const int8_t vp9_intramode_tree[9][2]
Definition: vp9data.h:76
#define FF_CODEC_PROPERTY_LOSSLESS
Definition: avcodec.h:3446
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:53
Definition: vp9.c:52
also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / functionally identical to above ...
Definition: pixfmt.h:524
static const uint8_t vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.h:202
struct VP9mvrefPair * mv
Definition: vp9.c:77
enum AVPixelFormat pix_fmt
Pixel format, see AV_PIX_FMT_xxx.
Definition: avcodec.h:1732
unsigned skip[3][2]
Definition: vp9.c:197
#define REF_FRAME_SEGMAP
Definition: vp9.c:133
uint8_t left_uv_nnz_ctx[2][16]
Definition: vp9.c:220
#define AV_PIX_FMT_YUV420P12
Definition: pixfmt.h:384
unsigned tile_cols
Definition: vp9.c:171
uint8_t lf_enabled
Definition: vp9.c:159
static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
Definition: vp9.c:313
#define t8
Definition: regdef.h:53
static void adapt_probs(VP9Context *s)
Definition: vp9.c:3740
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Definition: vp9dsp.h:51
Definition: vp9.c:87
static AVPacket pkt
static int decode_frame_header(AVCodecContext *ctx, const uint8_t *data, int size, int *ref)
Definition: vp9.c:547
uint8_t parallelmode
Definition: vp9.c:125
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9.c:3088
unsigned cols
Definition: vp9.c:174
unsigned tile_col_end
Definition: vp9.c:172
int profile
profile
Definition: avcodec.h:3125
uint8_t class0_hp
Definition: vp9data.h:1464
uint8_t ref[2]
Definition: vp9.c:88
AVCodec.
Definition: avcodec.h:3482
uint8_t intra[4]
Definition: vp9data.h:1448
static int vp9_decode_frame(AVCodecContext *ctx, void *frame, int *got_frame, AVPacket *pkt)
Definition: vp9.c:3995
order of coefficients is actually GBR, also IEC 61966-2-1 (sRGB)
Definition: pixfmt.h:518
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
#define AV_COPY32(d, s)
Definition: intreadwrite.h:586
unsigned fp[4]
Definition: vp9.c:205
uint8_t update_map
Definition: vp9.c:155
Definition: vp9.h:29
struct VP9Context::@112 max_mv
uint8_t * intra_pred_data[3]
Definition: vp9.c:244
uint8_t errorres
Definition: vp9.c:115
int y
Definition: vp9.c:252
uint8_t varcompref[2]
Definition: vp9.c:129
#define AV_RN32A(p)
Definition: intreadwrite.h:526
Definition: vp9.c:63
unsigned uv_mode[10][10]
Definition: vp9.c:187
vp9_mc_func mc[5][4][2][2][2]
Definition: vp9dsp.h:114
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9.c:2767
int16_t y
Definition: vp56.h:67
static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
Definition: vp9.c:288
void(* emulated_edge_mc)(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
Definition: videodsp.h:63
uint8_t coef[4][2][2][6][6][3]
Definition: vp9.c:177
#define VP9_SYNCCODE
Definition: vp9.c:36
static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
Definition: vp9.c:3012
uint8_t bits
Definition: crc.c:295
int mem
Definition: avisynth_c.h:684
uint8_t
#define av_cold
Definition: attributes.h:74
static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
Definition: vp9.c:4307
#define av_malloc(s)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:135
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
Definition: vp9.c:58
int16_t qmul[2][2]
Definition: vp9.c:165
unsigned y_mode[4][10]
Definition: vp9.c:186
mode
Definition: f_perms.c:27
#define H
Definition: swscale-test.c:344
TxfmType
Definition: vp9.h:37
AVColorSpace
YUV colorspace type.
Definition: pixfmt.h:517
uint8_t classes[10]
Definition: vp9data.h:1459
static void free_buffers(VP9Context *s)
Definition: vp9.c:3962
#define AV_RB32
Definition: intreadwrite.h:130
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
Multithreading support functions.
Definition: vp9.h:46
uint8_t bpp
Definition: vp9.c:112
static const int16_t vp9_ac_qlookup[3][256]
Definition: vp9data.h:342
int av_frame_ref(AVFrame *dst, const AVFrame *src)
Set up a new reference to the data described by the source frame.
Definition: frame.c:366
#define FF_PROFILE_UNKNOWN
Definition: avcodec.h:3126
uint8_t * uveob_base[2]
Definition: vp9.c:251
static const uint8_t vp9_default_coef_probs[4][2][2][6][6][3]
Definition: vp9data.h:1575
int col
Definition: vp9.c:106
struct VP9Context::@109 prob_ctx[4]
unsigned log2_tile_rows
Definition: vp9.c:170
static AVFrame * frame
BlockLevel
Definition: vp9.c:44
static av_cold int vp9_decode_free(AVCodecContext *ctx)
Definition: vp9.c:3969
uint8_t * data
Definition: avcodec.h:1433
AVBufferRef * extradata
Definition: vp9.c:75
int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src)
Definition: utils.c:3774
bitstream reader API header.
uint8_t * above_uv_nnz_ctx[2]
Definition: vp9.c:233
VP9DSPContext dsp
Definition: vp9.c:98
uint8_t lim_lut[64]
Definition: vp9.c:139
static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src, VP9Context *s)
Definition: vp9.c:1084
ptrdiff_t size
Definition: opengl_enc.c:101
unsigned sign[2]
Definition: vp9.c:200
#define AV_PIX_FMT_YUV422P12
Definition: pixfmt.h:385
Definition: vp9.h:38
static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
Definition: vp9.c:3409
void ff_thread_finish_setup(AVCodecContext *avctx)
If the codec defines update_thread_context(), call this when they are ready for the next thread to st...
uint16_t mvscale[3][2]
Definition: vp9.c:255
uint8_t mode[4]
Definition: vp9.c:88
Definition: vp9.c:81
uint8_t left_ref_ctx[8]
Definition: vp9.c:227
struct VP9Context::@111::@114 mv_comp[2]
int x
Definition: vp9.c:252
uint8_t * above_txfm_ctx
Definition: vp9.c:235
#define assign_val(c, i, v)
#define av_log(a,...)
int8_t ref[4]
Definition: vp9.c:145
static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9.c:2762
unsigned m
Definition: audioconvert.c:187
Definition: vp9.h:30
int16_t * block
Definition: vp9.c:250
uint8_t bytesperpixel
Definition: vp9.c:112
uint8_t fixcompref
Definition: vp9.c:123
uint8_t mask[2][2][8][4]
Definition: vp9.c:84
int16_t * uvblock[2]
Definition: vp9.c:250
Definition: vp9.c:46
Definition: vp9.h:28
uint8_t keyframe
Definition: vp9.c:111
int width
width and height of the video frame
Definition: frame.h:220
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
uint8_t allowcompinter
Definition: vp9.c:122
int8_t sharpness
Definition: vp9.c:138
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given block if it is not large enough, otherwise do nothing.
Definition: mem.c:480
#define s2
Definition: regdef.h:39
void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
Wrapper around release_buffer() frame-for multithreaded codecs.
static int update_block_buffers(AVCodecContext *ctx)
Definition: vp9.c:372
unsigned mv_mode[7][4]
Definition: vp9.c:189
static const uint16_t mask[17]
Definition: lzw.c:38
enum CompPredMode comppredmode
Definition: vp9.c:214
static const int8_t vp9_mv_class_tree[10][2]
Definition: vp9data.h:2272
uint8_t left_partition_ctx[8]
Definition: vp9.c:221
Definition: vp9.c:62
#define AVERROR(e)
Definition: error.h:43
uint8_t comp_ref[5]
Definition: vp9data.h:1451
GetBitContext gb
Definition: vp9.c:100
ptrdiff_t uv_stride
Definition: vp9.c:108
uint8_t single_ref[5][2]
Definition: vp9data.h:1450
uint8_t mv_mode[7][3]
Definition: vp9data.h:1447
uint8_t filter[4][2]
Definition: vp9data.h:1446
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:148
unsigned mv_joint[4]
Definition: vp9.c:198
static enum FilterMode vp9_filter_lut[3]
Definition: vp9data.h:233
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:178
int active_thread_type
Which multithreading methods are in use by the codec.
Definition: avcodec.h:3062
struct VP9Context::@106 lf_delta
const char * r
Definition: vf_curves.c:107
struct VP9Context::@108 tiling
unsigned tile_row_start
Definition: vp9.c:172
static int decode_coeffs_8bpp(AVCodecContext *ctx)
Definition: vp9.c:2501
struct prob_context::@115 mv_comp[2]
static void inter_recon_16bpp(AVCodecContext *ctx)
Definition: vp9.c:3083
uint8_t bpp_index
Definition: vp9.c:112
uint8_t intra
Definition: vp9.c:88
#define MERGE_CTX(step, rd)
static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2304
TxfmMode
Definition: vp9.h:27
static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9.c:2944
simple assert() macros that are a bit more flexible than ISO C assert().
static void find_ref_mvs(VP9Context *s, VP56mv *pmv, int ref, int z, int idx, int sb)
Definition: vp9.c:1091
static enum TxfmType vp9_intra_txfm_type[14]
Definition: vp9data.h:445
uint8_t refidx[3]
Definition: vp9.c:127
const char * name
Name of the codec implementation.
Definition: avcodec.h:3489
#define AV_PIX_FMT_YUV444P10
Definition: pixfmt.h:383
unsigned comp[5][2]
Definition: vp9.c:191
unsigned tx8p[2][2]
Definition: vp9.c:196
int16_t * uvblock_base[2]
Definition: vp9.c:250
uint8_t use_last_frame_mvs
Definition: vp9.c:114
#define FFMAX(a, b)
Definition: common.h:90
Libavcodec external API header.
uint8_t class0_fp[2][3]
Definition: vp9data.h:1462
#define AV_CODEC_CAP_FRAME_THREADS
Codec supports frame-level multithreading.
Definition: avcodec.h:920
Definition: vp9.c:97
uint8_t * above_filter_ctx
Definition: vp9.c:240
#define RETURN_DIRECT_MV(mv)
uint8_t hp
Definition: vp9data.h:1465
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:67
#define ONLY_IF_THREADS_ENABLED(x)
Define a function with only the non-default version specified.
Definition: internal.h:217
unsigned tile_rows
Definition: vp9.c:171
Definition: vp9.c:45
struct VP9Filter * lflvl
Definition: vp9.c:245
static void vp9_decode_flush(AVCodecContext *ctx)
Definition: vp9.c:4254
#define CUR_FRAME
Definition: vp9.c:131
static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl, int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
Definition: vp9.c:3689
unsigned c_b_size
Definition: vp9.c:103
int ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size)
Definition: vp56rac.c:40
uint8_t yac_qi
Definition: vp9.c:147
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc, uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9.c:2819
#define th
Definition: regdef.h:75
av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
Definition: videodsp.c:38
static void decode_mode(AVCodecContext *ctx)
Definition: vp9.c:1465
enum AVPictureType pict_type
Picture type of the frame.
Definition: frame.h:242
int uses_2pass
Definition: vp9.c:78
#define E
Definition: avdct.c:32
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
uint8_t framectxid
Definition: vp9.c:126
unsigned class0_fp[2][4]
Definition: vp9.c:204
#define scale_mv(n, dim)
#define FFMIN(a, b)
Definition: common.h:92
VP56mv left_mv_ctx[16][2]
Definition: vp9.c:219
uint8_t left_y_nnz_ctx[16]
Definition: vp9.c:217
float y
int8_t ref[2]
Definition: vp9.c:70
uint8_t level[8 *8]
Definition: vp9.c:82
int width
picture width / height.
Definition: avcodec.h:1691
int col7
Definition: vp9.c:106
static int decode_coeffs_16bpp(AVCodecContext *ctx)
Definition: vp9.c:2506
uint8_t left_mode_ctx[16]
Definition: vp9.c:218
static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
Definition: vp9.c:3488
unsigned eob[4][2][2][6][6][2]
Definition: vp9.c:211
ITU-R BT2020 non-constant luminance system.
Definition: pixfmt.h:527
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:88
void ff_thread_report_progress(ThreadFrame *f, int n, int field)
Notify later decoding threads when part of their reference picture is ready.
uint8_t partition[4][4][3]
Definition: vp9data.h:1467
static const AVProfile profiles[]
Definition: vp9.c:4355
unsigned tx32p[2][4]
Definition: vp9.c:194
unsigned tx16p[2][3]
Definition: vp9.c:195
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9.c:2912
av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp.c:28
enum FilterMode filtermode
Definition: vp9.c:121
static void inter_recon_8bpp(AVCodecContext *ctx)
Definition: vp9.c:3078
static const uint8_t bwh_tab[2][N_BS_SIZES][2]
Definition: vp9.c:259
uint8_t ref_val
Definition: vp9.c:162
uint8_t uvmode
Definition: vp9.c:88
uint8_t * above_partition_ctx
Definition: vp9.c:229
int n
Definition: avisynth_c.h:547
uint8_t tmp_y[64 *64 *2]
Definition: vp9.c:253
uint8_t left_comp_ctx[8]
Definition: vp9.c:226
#define AV_WN64A(p, v)
Definition: intreadwrite.h:542
#define AV_WN16A(p, v)
Definition: intreadwrite.h:534
static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
Definition: vp9.c:4302
#define L(x)
Definition: vp56_arith.h:36
static const int8_t vp9_inter_mode_tree[3][2]
Definition: vp9data.h:222
#define vp56_rac_get_prob
Definition: vp56.h:253
static int init_frames(AVCodecContext *ctx)
Definition: vp9.c:4265
uint8_t * above_segpred_ctx
Definition: vp9.c:236
#define FF_ARRAY_ELEMS(a)
unsigned tile_col_start
Definition: vp9.c:172
the normal 2^n-1 "JPEG" YUV ranges
Definition: pixfmt.h:540
unsigned intra[4][2]
Definition: vp9.c:190
#define mc
static const float pred[4]
Definition: siprdata.h:259
unsigned rows
Definition: vp9.c:174
unsigned sb_cols
Definition: vp9.c:174
static const int8_t mv[256][2]
Definition: 4xm.c:77
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:232
uint8_t sign
Definition: vp9data.h:1458
uint8_t enabled
Definition: vp9.c:143
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:102
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:270
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
int row7
Definition: vp9.c:106
int8_t uvac_qdelta
Definition: vp9.c:148
FilterMode
Definition: vp9.h:64
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:80
#define STORE_COEF(c, i, v)
unsigned class0[2]
Definition: vp9.c:202
VP56mv(* above_mv_ctx)[2]
Definition: vp9.c:241
AVS_Value src
Definition: avisynth_c.h:482
uint8_t ref_enabled
Definition: vp9.c:160
struct VP9Context::@110 prob
int16_t * block_base
Definition: vp9.c:250
static const prob_context vp9_default_probs
Definition: vp9data.h:1470
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
static void fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb)
Definition: vp9.c:1346
uint8_t level
Definition: vp9.c:137
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:199
#define memset_bpp(c, i1, v, i2, num)
Definition: vp9.c:57
int pass
Definition: vp9.c:105
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:446
int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
Wrapper around get_buffer() for frame-multithreaded codecs.
uint8_t left_skip_ctx[8]
Definition: vp9.c:222
main external API structure.
Definition: avcodec.h:1512
#define FASTDIV(a, b)
Definition: mathops.h:210
Definition: vp9data.h:219
int16_t q_val
Definition: vp9.c:163
uint8_t * data
The data buffer.
Definition: buffer.h:89
uint8_t left_txfm_ctx[8]
Definition: vp9.c:223
Definition: vp9.c:48
static av_cold int vp9_decode_init(AVCodecContext *ctx)
Definition: vp9.c:4291
VP56RangeCoder * c_b
Definition: vp9.c:102
uint8_t invisible
Definition: vp9.c:113
enum TxfmMode tx uvtx
Definition: vp9.c:92
unsigned single_ref[5][2][2]
Definition: vp9.c:192
uint8_t y_mode[4][9]
Definition: vp9data.h:1444
AVBufferRef * av_buffer_allocz(int size)
Same as av_buffer_alloc(), except the returned buffer will be initialized to zero.
Definition: buffer.c:82
ThreadFrame refs[8]
Definition: vp9.c:130
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:305
uint8_t tx8p[2]
Definition: vp9data.h:1454
Definition: vp9.c:55
#define AV_PIX_FMT_YUV420P10
Definition: pixfmt.h:380
#define FF_THREAD_FRAME
Decode more than one frame at once.
Definition: avcodec.h:3054
unsigned partition[4][4][4]
Definition: vp9.c:209
uint8_t * above_y_nnz_ctx
Definition: vp9.c:232
uint8_t temporal
Definition: vp9.c:153
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:298
static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
Definition: vp9.c:2334
enum AVColorSpace colorspace
YUV colorspace type.
Definition: avcodec.h:2240
static const int16_t *const vp9_scans[5][4]
Definition: vp9data.h:608
uint8_t seg_id
Definition: vp9.c:88
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
Definition: vp9.c:65
uint8_t ss_h
Definition: vp9.c:116
static const int8_t vp9_filter_tree[2][2]
Definition: vp9data.h:228
#define REF_FRAME_MVPAIR
Definition: vp9.c:132
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc, uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9.c:2772
#define AV_PIX_FMT_YUV440P12
Definition: pixfmt.h:386
uint8_t left_filter_ctx[8]
Definition: vp9.c:228
uint8_t last_bpp
Definition: vp9.c:112
uint8_t intraonly
Definition: vp9.c:117
uint8_t signbias[3]
Definition: vp9.c:128
uint8_t * above_intra_ctx
Definition: vp9.c:237
enum BlockSize bs
Definition: vp9.c:91
#define MAX_SEGMENT
Definition: vp9.c:150
#define assign_bpp(c, i1, v, i2)
int allocate_progress
Whether to allocate progress for frame threading.
Definition: internal.h:115
static unsigned int get_bits_long(GetBitContext *s, int n)
Read 0-32 bits.
Definition: get_bits.h:338
uint8_t * dst[3]
Definition: vp9.c:107
struct VP9Context::@107 segmentation
VP56mv mv[4][2]
Definition: vp9.c:90
enum BlockPartition bp
Definition: vp9.c:94
static int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
Definition: vp56.h:323
VP9Block * b
Definition: vp9.c:104
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:397
static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
Definition: vp9.c:295
enum TxfmMode txfmmode
Definition: vp9.c:213
uint8_t * uveob[2]
Definition: vp9.c:251
uint8_t * above_mode_ctx
Definition: vp9.c:230
Definition: vp56.h:65
#define AV_PIX_FMT_YUV422P10
Definition: pixfmt.h:381
uint8_t comp
Definition: vp9.c:88
struct VP9Context::@105 filter
uint8_t refreshctx
Definition: vp9.c:124
ThreadFrame next_refs[8]
Definition: vp9.c:130
#define AV_PIX_FMT_YUV444P12
Definition: pixfmt.h:387
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:182
unsigned class0_hp[2]
Definition: vp9.c:206
Definition: vp9.h:48
the normal 219*2^(n-8) "MPEG" YUV ranges
Definition: pixfmt.h:539
struct VP9Context::@111 counts
#define LOCAL_ALIGNED_32(t, v,...)
Definition: internal.h:129
int8_t ydc_qdelta
Definition: vp9.c:148
static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
Definition: ccaption_dec.c:521
MVJoint
Definition: vp9data.h:2259
uint8_t highprecisionmvs
Definition: vp9.c:120
#define FF_PROFILE_VP9_1
Definition: avcodec.h:3200
A reference to a data buffer.
Definition: buffer.h:81
static av_always_inline int inv_recenter_nonneg(int v, int m)
Definition: vp9.c:421
#define RETURN_MV(mv)
BlockSize
Definition: vp9.c:51
#define FF_PROFILE_VP9_3
Definition: avcodec.h:3202
static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2324
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2171
void(* itxfm_add[N_TXFM_SIZES+1][N_TXFM_TYPES])(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob)
Definition: vp9dsp.h:70
uint8_t seg[7]
Definition: vp9.c:182
#define AV_ZERO64(d)
Definition: intreadwrite.h:618
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:63
int16_t x
Definition: vp56.h:66
uint8_t class0
Definition: vp9data.h:1460
common internal api header.
if(ret< 0)
Definition: vf_mcdeint.c:280
static const uint8_t vp9_default_kf_partition_probs[4][4][3]
Definition: vp9data.h:42
uint8_t uv_mode[10][9]
Definition: vp9data.h:1445
static int ref_frame(Vp3DecodeContext *s, ThreadFrame *dst, ThreadFrame *src)
Definition: vp3.c:1927
Definition: vp9.c:59
uint8_t segpred[3]
Definition: vp9.c:183
#define assign(var, type, n)
prob_context p
Definition: vp9.c:176
static double c[64]
uint8_t tmp_uv[2][64 *64 *2]
Definition: vp9.c:254
uint8_t ignore_refmap
Definition: vp9.c:156
#define FF_PROFILE_VP9_0
Definition: avcodec.h:3199
Definition: vp9.c:60
AVCodec ff_vp9_decoder
Definition: vp9.c:4363
unsigned sb_rows
Definition: vp9.c:174
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:92
AVProfile.
Definition: avcodec.h:3470
uint8_t lflvl[4][2]
Definition: vp9.c:166
unsigned properties
Definition: avcodec.h:3445
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:307
static const int16_t vp9_dc_qlookup[3][256]
Definition: vp9data.h:239
Core video DSP helper functions.
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9.c:1419
Definition: vp9.c:47
static const int8_t vp9_mv_joint_tree[3][2]
Definition: vp9data.h:2266
enum BlockLevel bl
Definition: vp9.c:93
void * priv_data
Definition: avcodec.h:1554
unsigned bits[10][2]
Definition: vp9.c:203
#define t4
Definition: regdef.h:32
static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
Definition: vp9.c:1293
struct VP9Context::@112 min_mv
#define av_free(p)
#define memset_val(c, val, num)
#define FF_PROFILE_VP9_2
Definition: avcodec.h:3201
unsigned tile_row_end
Definition: vp9.c:172
Definition: vp9.c:61
struct AVCodecInternal * internal
Private context used for internal data.
Definition: avcodec.h:1562
enum FilterMode filter
Definition: vp9.c:89
static int decode012(GetBitContext *gb)
Definition: get_bits.h:576
int key_frame
1 -> keyframe, 0-> not
Definition: frame.h:237
#define AV_ZERO32(d)
Definition: intreadwrite.h:614
static void decode_b(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9.c:3210
uint8_t edge_emu_buffer[135 *144 *2]
Definition: vp9.c:246
Definition: vp9.h:31
uint8_t bits[10]
Definition: vp9data.h:1461
static const uint8_t * align_get_bits(GetBitContext *s)
Definition: get_bits.h:454
uint8_t absolute_vals
Definition: vp9.c:154
#define AV_RN16A(p)
Definition: intreadwrite.h:522
#define DECODE_UV_COEF_LOOP(step, v)
Definition: vp9.c:64
int64_t dts
Decompression timestamp in AVStream->time_base units; the time at which the packet is decompressed...
Definition: avcodec.h:1432
int8_t lf_val
Definition: vp9.c:164
static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1, int max_count, int update_factor)
Definition: vp9.c:3723
static const int16_t(*const [5][4] vp9_scans_nb)[2]
Definition: vp9data.h:1165
int height
Definition: frame.h:220
#define av_freep(p)
planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
Definition: pixfmt.h:101
VP9Block * b_base
Definition: vp9.c:104
void INT64 start
Definition: avisynth_c.h:553
Definition: vp9.c:73
static int init_thread_copy(AVCodecContext *avctx)
Definition: alac.c:646
#define av_always_inline
Definition: attributes.h:37
static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
Definition: vp9.c:485
VP56mv mv[2]
Definition: vp9.c:69
static int update_prob(VP56RangeCoder *c, int p)
Definition: vp9.c:427
#define av_malloc_array(a, b)
ptrdiff_t y_stride
Definition: vp9.c:108
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2050
#define stride
uint8_t skip
Definition: vp9.c:88
int8_t mode[2]
Definition: vp9.c:144
uint8_t * above_ref_ctx
Definition: vp9.c:239
uint8_t mv_joint[3]
Definition: vp9data.h:1456
uint8_t q_enabled
Definition: vp9.c:158
uint8_t left_intra_ctx[8]
Definition: vp9.c:225
#define AV_RN64A(p)
Definition: intreadwrite.h:530
unsigned classes[11]
Definition: vp9.c:201
uint8_t refreshrefmask
Definition: vp9.c:119
AVPixelFormat
Pixel format.
Definition: pixfmt.h:61
This structure stores compressed data.
Definition: avcodec.h:1410
#define AV_GET_BUFFER_FLAG_REF
The decoder will keep a reference to the frame and may reuse it later.
Definition: avcodec.h:1216
uint8_t * above_comp_ctx
Definition: vp9.c:238
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
Definition: mem.c:252
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
Definition: avcodec.h:857
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1426
#define RETURN_SCALE_MV(mv, scale)
for(j=16;j >0;--j)
int block_alloc_using_2pass
Definition: vp9.c:249
Predicted.
Definition: avutil.h:267
uint8_t skip_enabled
Definition: vp9.c:161
static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9.c:2314
unsigned log2_tile_cols
Definition: vp9.c:170
static const int8_t vp9_partition_tree[3][2]
Definition: vp9data.h:36
static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9.c:2692
VP56RangeCoder c
Definition: vp9.c:101