29 static void hardthresh_mmx(int16_t dst[64],
const int16_t
src[64],
30 int qp,
const uint8_t *permutation)
33 unsigned int threshold1;
35 threshold1 = qp * ((1<<4) - bias) - 1;
37 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ 38 "movq " #src0 ", %%mm0 \n" \ 39 "movq " #src1 ", %%mm1 \n" \ 40 "movq " #src2 ", %%mm2 \n" \ 41 "movq " #src3 ", %%mm3 \n" \ 42 "psubw %%mm4, %%mm0 \n" \ 43 "psubw %%mm4, %%mm1 \n" \ 44 "psubw %%mm4, %%mm2 \n" \ 45 "psubw %%mm4, %%mm3 \n" \ 46 "paddusw %%mm5, %%mm0 \n" \ 47 "paddusw %%mm5, %%mm1 \n" \ 48 "paddusw %%mm5, %%mm2 \n" \ 49 "paddusw %%mm5, %%mm3 \n" \ 50 "paddw %%mm6, %%mm0 \n" \ 51 "paddw %%mm6, %%mm1 \n" \ 52 "paddw %%mm6, %%mm2 \n" \ 53 "paddw %%mm6, %%mm3 \n" \ 54 "psubusw %%mm6, %%mm0 \n" \ 55 "psubusw %%mm6, %%mm1 \n" \ 56 "psubusw %%mm6, %%mm2 \n" \ 57 "psubusw %%mm6, %%mm3 \n" \ 58 "psraw $3, %%mm0 \n" \ 59 "psraw $3, %%mm1 \n" \ 60 "psraw $3, %%mm2 \n" \ 61 "psraw $3, %%mm3 \n" \ 63 "movq %%mm0, %%mm7 \n" \ 64 "punpcklwd %%mm2, %%mm0 \n" \ 65 "punpckhwd %%mm2, %%mm7 \n" \ 66 "movq %%mm1, %%mm2 \n" \ 67 "punpcklwd %%mm3, %%mm1 \n" \ 68 "punpckhwd %%mm3, %%mm2 \n" \ 69 "movq %%mm0, %%mm3 \n" \ 70 "punpcklwd %%mm1, %%mm0 \n" \ 71 "punpckhwd %%mm7, %%mm3 \n" \ 72 "punpcklwd %%mm2, %%mm7 \n" \ 73 "punpckhwd %%mm2, %%mm1 \n" \ 75 "movq %%mm0, " #dst0 " \n" \ 76 "movq %%mm7, " #dst1 " \n" \ 77 "movq %%mm3, " #dst2 " \n" \ 78 "movq %%mm1, " #dst3 " \n" 84 "packssdw %%mm4, %%mm4 \n" 85 "packssdw %%mm5, %%mm5 \n" 86 "packssdw %%mm6, %%mm6 \n" 87 "packssdw %%mm4, %%mm4 \n" 88 "packssdw %%mm5, %%mm5 \n" 89 "packssdw %%mm6, %%mm6 \n" 90 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
91 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
92 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
93 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
94 : : "
r" (
src), "
r" (dst), "
g" (threshold1+1), "
g" (threshold1+5), "
g" (threshold1-4)
96 dst[0] = (src[0] + 4) >> 3;
99 static
void softthresh_mmx(int16_t dst[64], const int16_t src[64],
100 int qp, const
uint8_t *permutation)
103 unsigned int threshold1;
105 threshold1 = qp*((1<<4) - bias) - 1;
108 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ 109 "movq " #src0 ", %%mm0 \n" \ 110 "movq " #src1 ", %%mm1 \n" \ 111 "pxor %%mm6, %%mm6 \n" \ 112 "pxor %%mm7, %%mm7 \n" \ 113 "pcmpgtw %%mm0, %%mm6 \n" \ 114 "pcmpgtw %%mm1, %%mm7 \n" \ 115 "pxor %%mm6, %%mm0 \n" \ 116 "pxor %%mm7, %%mm1 \n" \ 117 "psubusw %%mm4, %%mm0 \n" \ 118 "psubusw %%mm4, %%mm1 \n" \ 119 "pxor %%mm6, %%mm0 \n" \ 120 "pxor %%mm7, %%mm1 \n" \ 121 "movq " #src2 ", %%mm2 \n" \ 122 "movq " #src3 ", %%mm3 \n" \ 123 "pxor %%mm6, %%mm6 \n" \ 124 "pxor %%mm7, %%mm7 \n" \ 125 "pcmpgtw %%mm2, %%mm6 \n" \ 126 "pcmpgtw %%mm3, %%mm7 \n" \ 127 "pxor %%mm6, %%mm2 \n" \ 128 "pxor %%mm7, %%mm3 \n" \ 129 "psubusw %%mm4, %%mm2 \n" \ 130 "psubusw %%mm4, %%mm3 \n" \ 131 "pxor %%mm6, %%mm2 \n" \ 132 "pxor %%mm7, %%mm3 \n" \ 134 "paddsw %%mm5, %%mm0 \n" \ 135 "paddsw %%mm5, %%mm1 \n" \ 136 "paddsw %%mm5, %%mm2 \n" \ 137 "paddsw %%mm5, %%mm3 \n" \ 138 "psraw $3, %%mm0 \n" \ 139 "psraw $3, %%mm1 \n" \ 140 "psraw $3, %%mm2 \n" \ 141 "psraw $3, %%mm3 \n" \ 143 "movq %%mm0, %%mm7 \n" \ 144 "punpcklwd %%mm2, %%mm0 \n" \ 145 "punpckhwd %%mm2, %%mm7 \n" \ 146 "movq %%mm1, %%mm2 \n" \ 147 "punpcklwd %%mm3, %%mm1 \n" \ 148 "punpckhwd %%mm3, %%mm2 \n" \ 149 "movq %%mm0, %%mm3 \n" \ 150 "punpcklwd %%mm1, %%mm0 \n" \ 151 "punpckhwd %%mm7, %%mm3 \n" \ 152 "punpcklwd %%mm2, %%mm7 \n" \ 153 "punpckhwd %%mm2, %%mm1 \n" \ 155 "movq %%mm0, " #dst0 " \n" \ 156 "movq %%mm7, " #dst1 " \n" \ 157 "movq %%mm3, " #dst2 " \n" \ 158 "movq %%mm1, " #dst3 " \n" 163 "packssdw %%mm4, %%mm4 \n" 164 "packssdw %%mm5, %%mm5 \n" 165 "packssdw %%mm4, %%mm4 \n" 166 "packssdw %%mm5, %%mm5 \n" 167 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
168 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
169 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
170 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
171 : : "
r" (
src), "
r" (dst), "
g" (threshold1), "rm" (4)
174 dst[0] = (src[0] + 4) >> 3;
177 static
void store_slice_mmx(
uint8_t *dst, const int16_t *src,
178 int dst_stride,
int src_stride,
184 for (y = 0; y <
height; y++) {
188 "movq (%3), %%mm3 \n" 189 "movq (%3), %%mm4 \n" 191 "pxor %%mm0, %%mm0 \n" 192 "punpcklbw %%mm0, %%mm3 \n" 193 "punpckhbw %%mm0, %%mm4 \n" 194 "psraw %%mm2, %%mm3 \n" 195 "psraw %%mm2, %%mm4 \n" 198 "movq (%0), %%mm0 \n" 199 "movq 8(%0), %%mm1 \n" 200 "paddw %%mm3, %%mm0 \n" 201 "paddw %%mm4, %%mm1 \n" 202 "psraw %%mm2, %%mm0 \n" 203 "psraw %%mm2, %%mm1 \n" 204 "packuswb %%mm1, %%mm0 \n" 205 "movq %%mm0, (%1) \n" 210 :
"+r" (
src1),
"+r"(dst1)
211 :
"r"(dst +
width),
"r"(dither[y]),
"g"(log2_scale),
"g"(
MAX_LEVEL - log2_scale)
231 case 0: s->
requantize = hardthresh_mmx;
break;
232 case 1: s->
requantize = softthresh_mmx;
break;
void(* store_slice)(uint8_t *dst, const int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale, const uint8_t dither[8][8])
av_cold void ff_spp_init_x86(SPPContext *s)
Memory handling functions.
static atomic_int cpu_flags
Macro definitions for various function/variable attributes.
static const uint8_t dither[8][8]
int av_opt_get_int(void *obj, const char *name, int search_flags, int64_t *out_val)
#define AV_CPU_FLAG_MMX
standard MMX
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
void(* requantize)(int16_t dst[64], const int16_t src[64], int qp, const uint8_t *permutation)