FFmpeg  1.2.12
h264.h
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #ifndef AVCODEC_H264_H
29 #define AVCODEC_H264_H
30 
31 #include "libavutil/intreadwrite.h"
32 #include "cabac.h"
33 #include "get_bits.h"
34 #include "mpegvideo.h"
35 #include "h264chroma.h"
36 #include "h264dsp.h"
37 #include "h264pred.h"
38 #include "h264qpel.h"
39 #include "rectangle.h"
40 
41 #define MAX_SPS_COUNT 32
42 #define MAX_PPS_COUNT 256
43 
44 #define MAX_MMCO_COUNT 66
45 
46 #define MAX_DELAYED_PIC_COUNT 16
47 
48 #define MAX_MBPAIR_SIZE (256*1024) // a tighter bound could be calculated if someone cares about a few bytes
49 
50 /* Compiling in interlaced support reduces the speed
51  * of progressive decoding by about 2%. */
52 #define ALLOW_INTERLACE
53 
54 #define FMO 0
55 
60 #define MAX_SLICES 16
61 
62 #ifdef ALLOW_INTERLACE
63 #define MB_MBAFF h->mb_mbaff
64 #define MB_FIELD h->mb_field_decoding_flag
65 #define FRAME_MBAFF h->mb_aff_frame
66 #define FIELD_PICTURE (h->picture_structure != PICT_FRAME)
67 #define LEFT_MBS 2
68 #define LTOP 0
69 #define LBOT 1
70 #define LEFT(i) (i)
71 #else
72 #define MB_MBAFF 0
73 #define MB_FIELD 0
74 #define FRAME_MBAFF 0
75 #define FIELD_PICTURE 0
76 #undef IS_INTERLACED
77 #define IS_INTERLACED(mb_type) 0
78 #define LEFT_MBS 1
79 #define LTOP 0
80 #define LBOT 0
81 #define LEFT(i) 0
82 #endif
83 #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
84 
85 #ifndef CABAC
86 #define CABAC h->pps.cabac
87 #endif
88 
89 #define CHROMA (h->sps.chroma_format_idc)
90 #define CHROMA422 (h->sps.chroma_format_idc == 2)
91 #define CHROMA444 (h->sps.chroma_format_idc == 3)
92 
93 #define EXTENDED_SAR 255
94 
95 #define MB_TYPE_REF0 MB_TYPE_ACPRED // dirty but it fits in 16 bit
96 #define MB_TYPE_8x8DCT 0x01000000
97 #define IS_REF0(a) ((a) & MB_TYPE_REF0)
98 #define IS_8x8DCT(a) ((a) & MB_TYPE_8x8DCT)
99 
100 #define QP_MAX_NUM (51 + 6*6) // The maximum supported qp
101 
102 /* NAL unit types */
103 enum {
118  NAL_FF_IGNORE = 0xff0f001,
119 };
120 
124 typedef enum {
130 } SEI_Type;
131 
135 typedef enum {
146 
150 typedef struct SPS {
156  int poc_type;
164  int mb_width;
165  int mb_height;
167  int mb_aff;
169  int crop;
170  unsigned int crop_left;
171  unsigned int crop_right;
172  unsigned int crop_top;
173  unsigned int crop_bottom;
184  uint32_t time_scale;
186  short offset_for_ref_frame[256]; // FIXME dyn aloc?
196  int cpb_cnt;
204  int new;
205 } SPS;
206 
210 typedef struct PPS {
211  unsigned int sps_id;
212  int cabac;
216  unsigned int ref_count[2];
219  int init_qp;
220  int init_qs;
230 } PPS;
231 
235 typedef enum MMCOOpcode {
236  MMCO_END = 0,
243 } MMCOOpcode;
244 
248 typedef struct MMCO {
251  int long_arg;
252 } MMCO;
253 
257 typedef struct H264Context {
269 
275 
277  int chroma_qp[2]; // QPc
278 
279  int qp_thresh;
280 
281  int width, height;
284 
285  int qscale;
290 
292  int flags;
294 
297 
298  // prediction stuff
301 
306 
308  int top_type;
311 
314 
319  unsigned int top_samples_available;
322  uint8_t (*top_borders[2])[(16 * 3) * 2];
323 
329 
331 
335  DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
336  DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
337 #define LIST_NOT_USED -1 // FIXME rename?
338 #define PART_NOT_AVAILABLE -2
339 
344 
349  int block_offset[2 * (16 * 3)];
350 
351  uint32_t *mb2b_xy; // FIXME are these 4 a good idea?
352  uint32_t *mb2br_xy;
353  int b_stride; // FIXME use s->b4_stride
354 
357 
358  unsigned current_sps_id;
360 
364  PPS pps; // FIXME move to Picture perhaps? (->no) do we need that?
365 
366  uint32_t dequant4_buffer[6][QP_MAX_NUM + 1][16]; // FIXME should these be moved down?
367  uint32_t dequant8_buffer[6][QP_MAX_NUM + 1][64];
368  uint32_t(*dequant4_coeff[6])[16];
369  uint32_t(*dequant8_coeff[6])[64];
370 
372  uint16_t *slice_table;
376 
377  // interlacing specific flags
380  int mb_mbaff;
383 
384  DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
385 
386  // Weighted pred stuff
391  // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
392  int luma_weight[48][2][2];
393  int chroma_weight[48][2][2][2];
394  int implicit_weight[48][48][2];
395 
401  int map_col_to_list0[2][16 + 32];
402  int map_col_to_list0_field[2][2][16 + 32];
403 
407  unsigned int ref_count[2];
408  unsigned int list_count;
410  Picture ref_list[2][48];
413  int ref2frm[MAX_SLICES][2][64];
414 
415  // data partitioning
420 
422  DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
423  DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
424  int16_t mb_padding[256 * 2];
425 
431 
432  /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0, 1, 2), 0x0? luma_cbp */
433  uint16_t *cbp_table;
434  int cbp;
435  int top_cbp;
436  int left_cbp;
437  /* chroma_pred_mode for i4x4 or i16x16, else 0 */
440  uint8_t (*mvd_table[2])[2];
441  DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5 * 8][2];
444 
457 
459 
460  int mb_x, mb_y;
466  int mb_num;
467  int mb_xy;
468 
470 
471  // deblock
475 
476  // =============================================================
477  // Things below are not used in the MB or more inner code
478 
482  unsigned int rbsp_buffer_size[2];
483 
487  int is_avc;
489  int got_first;
490 
493 
496 
498 
499  uint16_t *slice_table_base;
500 
501  // POC stuff
502  int poc_lsb;
503  int poc_msb;
505  int delta_poc[2];
512 
517 
522 
524 
533 
540 
543 
545 
551 
556 
564 
566 
572 
574 
576  unsigned int last_ref_count[2];
583 
591 
598 
603 
608 
624 
629 
632 
633  // Timestamp stuff
636 
639 
641 
642  int sync;
643 
648  int16_t *dc_val_base;
649 
651 } H264Context;
652 
653 extern const uint8_t ff_h264_chroma_qp[7][QP_MAX_NUM + 1];
654 extern const uint16_t ff_h264_mb_sizes[4];
655 
660 
665 
670 
674 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length);
675 
684 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
685  int *dst_length, int *consumed, int length);
686 
692 
696 int ff_h264_get_slice_type(const H264Context *h);
697 
703 
708 
712 
716 int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count);
717 
719  int first_slice);
720 
721 int ff_generate_sliding_window_mmcos(H264Context *h, int first_slice);
722 
728 
733 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma);
734 
737 int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size);
739 void ff_h264_decode_init_vlc(void);
740 
746 
752 
754 
757 void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type);
758 
759 void ff_h264_filter_mb_fast(H264Context *h, int mb_x, int mb_y,
760  uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
761  unsigned int linesize, unsigned int uvlinesize);
762 void ff_h264_filter_mb(H264Context *h, int mb_x, int mb_y,
763  uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
764  unsigned int linesize, unsigned int uvlinesize);
765 
772 
773 /*
774  * o-o o-o
775  * / / /
776  * o-o o-o
777  * ,---'
778  * o-o o-o
779  * / / /
780  * o-o o-o
781  */
782 
783 /* Scan8 organization:
784  * 0 1 2 3 4 5 6 7
785  * 0 DY y y y y y
786  * 1 y Y Y Y Y
787  * 2 y Y Y Y Y
788  * 3 y Y Y Y Y
789  * 4 y Y Y Y Y
790  * 5 DU u u u u u
791  * 6 u U U U U
792  * 7 u U U U U
793  * 8 u U U U U
794  * 9 u U U U U
795  * 10 DV v v v v v
796  * 11 v V V V V
797  * 12 v V V V V
798  * 13 v V V V V
799  * 14 v V V V V
800  * DY/DU/DV are for luma/chroma DC.
801  */
802 
803 #define LUMA_DC_BLOCK_INDEX 48
804 #define CHROMA_DC_BLOCK_INDEX 49
805 
806 // This table must be here because scan8[constant] must be known at compiletime
807 static const uint8_t scan8[16 * 3 + 3] = {
808  4 + 1 * 8, 5 + 1 * 8, 4 + 2 * 8, 5 + 2 * 8,
809  6 + 1 * 8, 7 + 1 * 8, 6 + 2 * 8, 7 + 2 * 8,
810  4 + 3 * 8, 5 + 3 * 8, 4 + 4 * 8, 5 + 4 * 8,
811  6 + 3 * 8, 7 + 3 * 8, 6 + 4 * 8, 7 + 4 * 8,
812  4 + 6 * 8, 5 + 6 * 8, 4 + 7 * 8, 5 + 7 * 8,
813  6 + 6 * 8, 7 + 6 * 8, 6 + 7 * 8, 7 + 7 * 8,
814  4 + 8 * 8, 5 + 8 * 8, 4 + 9 * 8, 5 + 9 * 8,
815  6 + 8 * 8, 7 + 8 * 8, 6 + 9 * 8, 7 + 9 * 8,
816  4 + 11 * 8, 5 + 11 * 8, 4 + 12 * 8, 5 + 12 * 8,
817  6 + 11 * 8, 7 + 11 * 8, 6 + 12 * 8, 7 + 12 * 8,
818  4 + 13 * 8, 5 + 13 * 8, 4 + 14 * 8, 5 + 14 * 8,
819  6 + 13 * 8, 7 + 13 * 8, 6 + 14 * 8, 7 + 14 * 8,
820  0 + 0 * 8, 0 + 5 * 8, 0 + 10 * 8
821 };
822 
823 static av_always_inline uint32_t pack16to32(int a, int b)
824 {
825 #if HAVE_BIGENDIAN
826  return (b & 0xFFFF) + (a << 16);
827 #else
828  return (a & 0xFFFF) + (b << 16);
829 #endif
830 }
831 
832 static av_always_inline uint16_t pack8to16(int a, int b)
833 {
834 #if HAVE_BIGENDIAN
835  return (b & 0xFF) + (a << 8);
836 #else
837  return (a & 0xFF) + (b << 8);
838 #endif
839 }
840 
845 {
846  return h->pps.chroma_qp_table[t][qscale];
847 }
848 
853 {
854  const int index8 = scan8[n];
855  const int left = h->intra4x4_pred_mode_cache[index8 - 1];
856  const int top = h->intra4x4_pred_mode_cache[index8 - 8];
857  const int min = FFMIN(left, top);
858 
859  tprintf(h->avctx, "mode:%d %d min:%d\n", left, top, min);
860 
861  if (min < 0)
862  return DC_PRED;
863  else
864  return min;
865 }
866 
868 {
869  int8_t *i4x4 = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
870  int8_t *i4x4_cache = h->intra4x4_pred_mode_cache;
871 
872  AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
873  i4x4[4] = i4x4_cache[7 + 8 * 3];
874  i4x4[5] = i4x4_cache[7 + 8 * 2];
875  i4x4[6] = i4x4_cache[7 + 8 * 1];
876 }
877 
879 {
880  const int mb_xy = h->mb_xy;
881  uint8_t *nnz = h->non_zero_count[mb_xy];
882  uint8_t *nnz_cache = h->non_zero_count_cache;
883 
884  AV_COPY32(&nnz[ 0], &nnz_cache[4 + 8 * 1]);
885  AV_COPY32(&nnz[ 4], &nnz_cache[4 + 8 * 2]);
886  AV_COPY32(&nnz[ 8], &nnz_cache[4 + 8 * 3]);
887  AV_COPY32(&nnz[12], &nnz_cache[4 + 8 * 4]);
888  AV_COPY32(&nnz[16], &nnz_cache[4 + 8 * 6]);
889  AV_COPY32(&nnz[20], &nnz_cache[4 + 8 * 7]);
890  AV_COPY32(&nnz[32], &nnz_cache[4 + 8 * 11]);
891  AV_COPY32(&nnz[36], &nnz_cache[4 + 8 * 12]);
892 
893  if (!h->chroma_y_shift) {
894  AV_COPY32(&nnz[24], &nnz_cache[4 + 8 * 8]);
895  AV_COPY32(&nnz[28], &nnz_cache[4 + 8 * 9]);
896  AV_COPY32(&nnz[40], &nnz_cache[4 + 8 * 13]);
897  AV_COPY32(&nnz[44], &nnz_cache[4 + 8 * 14]);
898  }
899 }
900 
902  int b_stride,
903  int b_xy, int b8_xy,
904  int mb_type, int list)
905 {
906  int16_t(*mv_dst)[2] = &h->cur_pic.f.motion_val[list][b_xy];
907  int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]];
908  AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0);
909  AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1);
910  AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2);
911  AV_COPY128(mv_dst + 3 * b_stride, mv_src + 8 * 3);
912  if (CABAC) {
913  uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8 * h->mb_xy
914  : h->mb2br_xy[h->mb_xy]];
915  uint8_t(*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
916  if (IS_SKIP(mb_type)) {
917  AV_ZERO128(mvd_dst);
918  } else {
919  AV_COPY64(mvd_dst, mvd_src + 8 * 3);
920  AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8 * 0);
921  AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8 * 1);
922  AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8 * 2);
923  }
924  }
925 
926  {
927  int8_t *ref_index = &h->cur_pic.f.ref_index[list][b8_xy];
928  int8_t *ref_cache = h->ref_cache[list];
929  ref_index[0 + 0 * 2] = ref_cache[scan8[0]];
930  ref_index[1 + 0 * 2] = ref_cache[scan8[4]];
931  ref_index[0 + 1 * 2] = ref_cache[scan8[8]];
932  ref_index[1 + 1 * 2] = ref_cache[scan8[12]];
933  }
934 }
935 
936 static av_always_inline void write_back_motion(H264Context *h, int mb_type)
937 {
938  const int b_stride = h->b_stride;
939  const int b_xy = 4 * h->mb_x + 4 * h->mb_y * h->b_stride; // try mb2b(8)_xy
940  const int b8_xy = 4 * h->mb_xy;
941 
942  if (USES_LIST(mb_type, 0)) {
943  write_back_motion_list(h, b_stride, b_xy, b8_xy, mb_type, 0);
944  } else {
945  fill_rectangle(&h->cur_pic.f.ref_index[0][b8_xy],
946  2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
947  }
948  if (USES_LIST(mb_type, 1))
949  write_back_motion_list(h, b_stride, b_xy, b8_xy, mb_type, 1);
950 
951  if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC) {
952  if (IS_8X8(mb_type)) {
953  uint8_t *direct_table = &h->direct_table[4 * h->mb_xy];
954  direct_table[1] = h->sub_mb_type[1] >> 1;
955  direct_table[2] = h->sub_mb_type[2] >> 1;
956  direct_table[3] = h->sub_mb_type[3] >> 1;
957  }
958  }
959 }
960 
962 {
964  return !(AV_RN64A(h->sub_mb_type) &
966  0x0001000100010001ULL));
967  else
968  return !(AV_RN64A(h->sub_mb_type) &
970  0x0001000100010001ULL));
971 }
972 
973 static inline int find_start_code(const uint8_t *buf, int buf_size,
974  int buf_index, int next_avc)
975 {
976  uint32_t state = -1;
977 
978  buf_index = avpriv_mpv_find_start_code(buf + buf_index, buf + next_avc + 1, &state) - buf - 1;
979 
980  return FFMIN(buf_index, buf_size);
981 }
982 
983 static inline int get_avc_nalsize(H264Context *h, const uint8_t *buf,
984  int buf_size, int *buf_index)
985 {
986  int i, nalsize = 0;
987 
988  if (*buf_index >= buf_size - h->nal_length_size)
989  return -1;
990 
991  for (i = 0; i < h->nal_length_size; i++)
992  nalsize = ((unsigned)nalsize << 8) | buf[(*buf_index)++];
993  if (nalsize <= 0 || nalsize > buf_size - *buf_index) {
995  "AVC: nal size %d\n", nalsize);
996  return -1;
997  }
998  return nalsize;
999 }
1000 void ff_h264_draw_horiz_band(H264Context *h, int y, int height);
1001 
1002 #endif /* AVCODEC_H264_H */