28 #define DECLARE_DOUBLE_1 double db_1 29 #define DECLARE_DOUBLE_2 double db_2 30 #define DECLARE_UINT32_T uint32_t it_1 31 #define RESTRICT_ASM_DOUBLE_1 [db_1]"=&f"(db_1) 32 #define RESTRICT_ASM_DOUBLE_2 [db_2]"=&f"(db_2) 33 #define RESTRICT_ASM_UINT32_T [it_1]"=&r"(it_1) 35 #define MMI_PCMPGTUB(dst, src1, src2) \ 36 "pcmpeqb %[db_1], "#src1", "#src2" \n\t" \ 37 "pmaxub %[db_2], "#src1", "#src2" \n\t" \ 38 "pcmpeqb %[db_2], %[db_2], "#src1" \n\t" \ 39 "xor "#dst", %[db_2], %[db_1] \n\t" 41 #define MMI_BTOH(dst_l, dst_r, src) \ 42 "xor %[db_1], %[db_1], %[db_1] \n\t" \ 43 "pcmpgtb %[db_2], %[db_1], "#src" \n\t" \ 44 "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \ 45 "punpckhbh "#dst_l", "#src", %[db_2] \n\t" 47 #define MMI_VP8_LOOP_FILTER \ 49 "dmtc1 %[thresh], %[ftmp3] \n\t" \ 50 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 51 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 52 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 53 "pasubub %[ftmp0], %[p1], %[p0] \n\t" \ 54 "pasubub %[ftmp1], %[q1], %[q0] \n\t" \ 55 "pmaxub %[ftmp0], %[ftmp0], %[ftmp1] \n\t" \ 56 MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3]) \ 58 "pasubub %[ftmp1], %[p0], %[q0] \n\t" \ 59 "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \ 60 "pasubub %[ftmp2], %[p1], %[q1] \n\t" \ 61 "li %[tmp0], 0x09 \n\t" \ 62 "dmtc1 %[tmp0], %[ftmp3] \n\t" \ 63 PSRLB_MMI(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], %[ftmp2]) \ 64 "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 65 "dmtc1 %[e], %[ftmp3] \n\t" \ 66 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 67 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 68 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 69 MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3]) \ 70 "pmaxub %[mask], %[mask], %[ftmp0] \n\t" \ 71 "pasubub %[ftmp1], %[p3], %[p2] \n\t" \ 72 "pasubub %[ftmp2], %[p2], %[p1] \n\t" \ 73 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 74 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \ 75 "pasubub %[ftmp1], %[q3], %[q2] \n\t" \ 76 "pasubub %[ftmp2], %[q2], %[q1] \n\t" \ 77 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 78 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \ 79 "dmtc1 %[i], %[ftmp3] \n\t" \ 80 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 81 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 82 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 83 MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3]) \ 84 "pcmpeqw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \ 85 "xor %[mask], %[mask], %[ftmp3] \n\t" \ 87 "li %[tmp0], 0x80808080 \n\t" \ 88 "dmtc1 %[tmp0], %[ftmp7] \n\t" \ 89 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \ 90 "xor %[p2], %[p2], %[ftmp7] \n\t" \ 91 "xor %[p1], %[p1], %[ftmp7] \n\t" \ 92 "xor %[p0], %[p0], %[ftmp7] \n\t" \ 93 "xor %[q0], %[q0], %[ftmp7] \n\t" \ 94 "xor %[q1], %[q1], %[ftmp7] \n\t" \ 95 "xor %[q2], %[q2], %[ftmp7] \n\t" \ 96 "psubsb %[ftmp4], %[p1], %[q1] \n\t" \ 97 "psubb %[ftmp5], %[q0], %[p0] \n\t" \ 98 MMI_BTOH(%[ftmp1], %[ftmp0], %[ftmp5]) \ 99 MMI_BTOH(%[ftmp3], %[ftmp2], %[ftmp4]) \ 101 "paddh %[ftmp5], %[ftmp0], %[ftmp0] \n\t" \ 102 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" \ 103 "paddh %[ftmp0], %[ftmp2], %[ftmp0] \n\t" \ 105 "paddh %[ftmp5], %[ftmp1], %[ftmp1] \n\t" \ 106 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \ 107 "paddh %[ftmp1], %[ftmp3], %[ftmp1] \n\t" \ 109 "packsshb %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \ 110 "and %[ftmp1], %[ftmp1], %[mask] \n\t" \ 111 "and %[ftmp2], %[ftmp1], %[hev] \n\t" \ 112 "li %[tmp0], 0x04040404 \n\t" \ 113 "dmtc1 %[tmp0], %[ftmp0] \n\t" \ 114 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 115 "paddsb %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \ 116 "li %[tmp0], 0x0B \n\t" \ 117 "dmtc1 %[tmp0], %[ftmp4] \n\t" \ 118 PSRAB_MMI(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], %[ftmp3]) \ 119 "li %[tmp0], 0x03030303 \n\t" \ 120 "dmtc1 %[tmp0], %[ftmp0] \n\t" \ 121 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 122 "paddsb %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \ 123 "li %[tmp0], 0x0B \n\t" \ 124 "dmtc1 %[tmp0], %[ftmp2] \n\t" \ 125 PSRAB_MMI(%[ftmp4], %[ftmp2], %[ftmp5], %[ftmp6], %[ftmp4]) \ 126 "psubsb %[q0], %[q0], %[ftmp3] \n\t" \ 127 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \ 129 "pcmpeqw %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 130 "xor %[hev], %[hev], %[ftmp0] \n\t" \ 131 "and %[ftmp1], %[ftmp1], %[hev] \n\t" \ 132 MMI_BTOH(%[ftmp5], %[ftmp6], %[ftmp1]) \ 133 "li %[tmp0], 0x07 \n\t" \ 134 "dmtc1 %[tmp0], %[ftmp2] \n\t" \ 135 "li %[tmp0], 0x001b001b \n\t" \ 136 "dmtc1 %[tmp0], %[ftmp1] \n\t" \ 137 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \ 138 "li %[tmp0], 0x003f003f \n\t" \ 139 "dmtc1 %[tmp0], %[ftmp0] \n\t" \ 140 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ 142 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \ 143 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 144 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 146 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \ 147 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 148 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \ 150 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \ 151 "psubsb %[q0], %[q0], %[ftmp4] \n\t" \ 152 "xor %[q0], %[q0], %[ftmp7] \n\t" \ 153 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \ 154 "xor %[p0], %[p0], %[ftmp7] \n\t" \ 155 "li %[tmp0], 0x00120012 \n\t" \ 156 "dmtc1 %[tmp0], %[ftmp1] \n\t" \ 157 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \ 159 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \ 160 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 161 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 163 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \ 164 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 165 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \ 167 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \ 168 "psubsb %[q1], %[q1], %[ftmp4] \n\t" \ 169 "xor %[q1], %[q1], %[ftmp7] \n\t" \ 170 "paddsb %[p1], %[p1], %[ftmp4] \n\t" \ 171 "xor %[p1], %[p1], %[ftmp7] \n\t" \ 172 "li %[tmp0], 0x03 \n\t" \ 173 "dmtc1 %[tmp0], %[ftmp1] \n\t" \ 175 "psllh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \ 176 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" \ 177 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 178 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 180 "psllh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \ 181 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ 182 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 183 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \ 185 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \ 186 "psubsb %[q2], %[q2], %[ftmp4] \n\t" \ 187 "xor %[q2], %[q2], %[ftmp7] \n\t" \ 188 "paddsb %[p2], %[p2], %[ftmp4] \n\t" \ 189 "xor %[p2], %[p2], %[ftmp7] \n\t" 191 #define PUT_VP8_EPEL4_H6_MMI(src, dst) \ 192 MMI_ULWC1(%[ftmp1], src, 0x00) \ 193 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 194 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 196 MMI_ULWC1(%[ftmp1], src, -0x01) \ 197 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 198 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 199 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 201 MMI_ULWC1(%[ftmp1], src, -0x02) \ 202 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 203 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 204 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 206 MMI_ULWC1(%[ftmp1], src, 0x01) \ 207 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 208 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 210 MMI_ULWC1(%[ftmp1], src, 0x02) \ 211 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 212 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 213 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 215 MMI_ULWC1(%[ftmp1], src, 0x03) \ 216 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 217 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 218 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 220 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 221 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 222 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 223 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 225 MMI_SWC1(%[ftmp1], dst, 0x00) 228 #define PUT_VP8_EPEL4_H4_MMI(src, dst) \ 229 MMI_ULWC1(%[ftmp1], src, 0x00) \ 230 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 231 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 233 MMI_ULWC1(%[ftmp1], src, -0x01) \ 234 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 235 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 236 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 238 MMI_ULWC1(%[ftmp1], src, 0x01) \ 239 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 240 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 242 MMI_ULWC1(%[ftmp1], src, 0x02) \ 243 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 244 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 245 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 247 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 249 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 250 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 252 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 253 MMI_SWC1(%[ftmp1], dst, 0x00) 256 #define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \ 257 MMI_ULWC1(%[ftmp1], src, 0x00) \ 258 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 259 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 261 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 262 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 263 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 264 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 265 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 267 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \ 268 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 269 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 270 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 271 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 273 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 274 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 275 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 276 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 278 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 279 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 280 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 281 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 282 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 284 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 285 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 286 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 287 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 288 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 290 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 292 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 293 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 294 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 296 MMI_SWC1(%[ftmp1], dst, 0x00) 299 #define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \ 300 MMI_ULWC1(%[ftmp1], src, 0x00) \ 301 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 302 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \ 304 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 305 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 306 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 307 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 308 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \ 310 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 311 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 312 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 313 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \ 315 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 316 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 317 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 318 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 319 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 321 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ 323 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \ 324 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 325 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 327 MMI_SWC1(%[ftmp1], dst, 0x00) 330 #define PUT_VP8_EPEL8_H6_MMI(src, dst) \ 331 MMI_ULDC1(%[ftmp1], src, 0x00) \ 332 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 333 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 334 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 335 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 337 MMI_ULDC1(%[ftmp1], src, -0x01) \ 338 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 339 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 340 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 341 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 342 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 343 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 345 MMI_ULDC1(%[ftmp1], src, -0x02) \ 346 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 347 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 348 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 349 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \ 350 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 351 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 353 MMI_ULDC1(%[ftmp1], src, 0x01) \ 354 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 355 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 356 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 357 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 359 MMI_ULDC1(%[ftmp1], src, 0x02) \ 360 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 361 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 362 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 363 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 364 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 365 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 367 MMI_ULDC1(%[ftmp1], src, 0x03) \ 368 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 369 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 370 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 371 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \ 372 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 373 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 375 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 376 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 378 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 379 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 380 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 381 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 382 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 384 MMI_SDC1(%[ftmp1], dst, 0x00) 387 #define PUT_VP8_EPEL8_H4_MMI(src, dst) \ 388 MMI_ULDC1(%[ftmp1], src, 0x00) \ 389 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 390 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 391 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 392 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 394 MMI_ULDC1(%[ftmp1], src, -0x01) \ 395 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 396 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 397 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 398 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 399 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 400 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 402 MMI_ULDC1(%[ftmp1], src, 0x01) \ 403 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 404 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 405 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 406 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 408 MMI_ULDC1(%[ftmp1], src, 0x02) \ 409 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 410 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 411 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 412 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 413 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 414 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 416 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 417 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 419 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 420 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 421 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 422 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 424 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 425 MMI_SDC1(%[ftmp1], dst, 0x00) 428 #define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \ 429 MMI_ULDC1(%[ftmp1], src, 0x00) \ 430 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 431 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 432 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 433 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 435 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 436 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 437 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 438 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 439 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 440 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 441 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 442 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 444 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \ 445 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 446 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 447 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 448 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \ 449 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \ 450 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 451 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 453 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 454 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 455 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 456 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 457 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 458 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 460 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 461 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 462 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 463 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 464 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 465 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 466 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 467 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 469 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 470 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 471 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 472 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 473 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \ 474 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \ 475 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 476 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 478 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 479 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 481 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 482 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 483 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 484 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 485 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 487 MMI_SDC1(%[ftmp1], dst, 0x00) 490 #define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \ 491 MMI_ULDC1(%[ftmp1], src, 0x00) \ 492 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 493 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 494 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \ 495 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \ 497 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \ 498 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 499 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 500 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 501 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \ 502 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \ 503 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \ 504 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \ 506 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \ 507 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 508 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 509 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 510 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \ 511 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \ 513 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \ 514 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 515 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 516 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 517 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \ 518 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \ 519 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 520 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 522 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 523 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \ 525 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \ 526 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \ 527 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 528 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 529 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 531 MMI_SDC1(%[ftmp1], dst, 0x00) 534 #define PUT_VP8_BILINEAR8_H_MMI(src, dst) \ 535 MMI_ULDC1(%[ftmp1], src, 0x00) \ 536 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 537 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 538 "pmullh %[ftmp5], %[ftmp2], %[a] \n\t" \ 539 "pmullh %[ftmp6], %[ftmp3], %[a] \n\t" \ 541 MMI_ULDC1(%[ftmp1], src, 0x01) \ 542 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 543 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 544 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \ 545 "pmullh %[ftmp3], %[ftmp3], %[b] \n\t" \ 546 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 547 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 549 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \ 550 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \ 551 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 552 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 554 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 555 MMI_SDC1(%[ftmp1], dst, 0x00) 558 #define PUT_VP8_BILINEAR4_H_MMI(src, dst) \ 559 MMI_ULWC1(%[ftmp1], src, 0x00) \ 560 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 561 "pmullh %[ftmp3], %[ftmp2], %[a] \n\t" \ 563 MMI_ULWC1(%[ftmp1], src, 0x01) \ 564 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 565 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \ 566 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 568 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \ 569 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 571 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 572 MMI_SWC1(%[ftmp1], dst, 0x00) 575 #define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \ 576 MMI_ULDC1(%[ftmp1], src, 0x00) \ 577 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 578 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 579 "pmullh %[ftmp5], %[ftmp2], %[c] \n\t" \ 580 "pmullh %[ftmp6], %[ftmp3], %[c] \n\t" \ 582 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \ 583 MMI_ULDC1(%[ftmp1], src1, 0x00) \ 584 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 585 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ 586 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \ 587 "pmullh %[ftmp3], %[ftmp3], %[d] \n\t" \ 588 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ 589 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \ 591 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \ 592 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \ 593 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \ 594 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \ 596 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \ 597 MMI_SDC1(%[ftmp1], dst, 0x00) 600 #define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \ 601 MMI_ULWC1(%[ftmp1], src, 0x00) \ 602 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 603 "pmullh %[ftmp3], %[ftmp2], %[c] \n\t" \ 605 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \ 606 MMI_ULWC1(%[ftmp1], src1, 0x00) \ 607 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ 608 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \ 609 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \ 611 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \ 612 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 614 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ 615 MMI_SWC1(%[ftmp1], dst, 0x00) 619 {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b,
620 0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000},
622 {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c,
623 0x0024002400240024, 0x0008000800080008, 0x0001000100010001},
625 {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d,
626 0x0032003200320032, 0x0006000600060006, 0x0000000000000000},
628 {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d,
629 0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003},
631 {0x0000000000000000, 0x0006000600060006, 0x0032003200320032,
632 0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000},
634 {0x0001000100010001, 0x0008000800080008, 0x0024002400240024,
635 0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002},
637 {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c,
638 0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000}
642 #define FILTER_6TAP(src, F, stride) \ 643 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ 644 F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \ 645 F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7] 647 #define FILTER_4TAP(src, F, stride) \ 648 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ 649 F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7] 652 { 0, 6, 123, 12, 1, 0 },
653 { 2, 11, 108, 36, 8, 1 },
654 { 0, 9, 93, 50, 6, 0 },
655 { 3, 16, 77, 77, 16, 3 },
656 { 0, 6, 50, 93, 9, 0 },
657 { 1, 8, 36, 108, 11, 2 },
658 { 0, 1, 12, 123, 6, 0 },
661 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a)) 662 #define MUL_35468(a) (((a) * 35468) >> 16) 665 #define clip_int8(n) (cm[(n) + 0x80] - 0x80) 682 f1 =
FFMIN(a + 4, 127) >> 3;
683 f2 =
FFMIN(a + 3, 127) >> 3;
687 p[-1 *
stride] = cm[p0 + f2];
688 p[ 0 *
stride] = cm[q0 - f1];
706 f1 =
FFMIN(a + 4, 127) >> 3;
707 f2 =
FFMIN(a + 3, 127) >> 3;
711 p[-1 *
stride] = cm[p0 + f2];
712 p[ 0 *
stride] = cm[q0 - f1];
726 return 2 *
FFABS(p0 - q0) + (
FFABS(p1 - q1) >> 1) <= flim;
736 return FFABS(p1 - p0) > thresh ||
FFABS(q1 - q0) > thresh;
754 a0 = (27 * w + 63) >> 7;
755 a1 = (18 * w + 63) >> 7;
756 a2 = (9 * w + 63) >> 7;
785 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
794 "gsldlc1 %[q0], 0x07(%[dst]) \n\t" 795 "gsldrc1 %[q0], 0x00(%[dst]) \n\t" 796 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t" 797 "gsldlc1 %[p0], 0x07(%[tmp0]) \n\t" 798 "gsldrc1 %[p0], 0x00(%[tmp0]) \n\t" 799 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 800 "gsldlc1 %[p1], 0x07(%[tmp0]) \n\t" 801 "gsldrc1 %[p1], 0x00(%[tmp0]) \n\t" 802 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 803 "gsldlc1 %[p2], 0x07(%[tmp0]) \n\t" 804 "gsldrc1 %[p2], 0x00(%[tmp0]) \n\t" 805 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 806 "gsldlc1 %[p3], 0x07(%[tmp0]) \n\t" 807 "gsldrc1 %[p3], 0x00(%[tmp0]) \n\t" 808 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t" 809 "gsldlc1 %[q1], 0x07(%[tmp0]) \n\t" 810 "gsldrc1 %[q1], 0x00(%[tmp0]) \n\t" 811 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 812 "gsldlc1 %[q2], 0x07(%[tmp0]) \n\t" 813 "gsldrc1 %[q2], 0x00(%[tmp0]) \n\t" 814 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 815 "gsldlc1 %[q3], 0x07(%[tmp0]) \n\t" 816 "gsldrc1 %[q3], 0x00(%[tmp0]) \n\t" 819 "gssdlc1 %[q0], 0x07(%[dst]) \n\t" 820 "gssdrc1 %[q0], 0x00(%[dst]) \n\t" 821 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t" 822 "gssdlc1 %[p0], 0x07(%[tmp0]) \n\t" 823 "gssdrc1 %[p0], 0x00(%[tmp0]) \n\t" 824 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 825 "gssdlc1 %[p1], 0x07(%[tmp0]) \n\t" 826 "gssdrc1 %[p1], 0x00(%[tmp0]) \n\t" 827 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t" 828 "gssdlc1 %[p2], 0x07(%[tmp0]) \n\t" 829 "gssdrc1 %[p2], 0x00(%[tmp0]) \n\t" 830 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t" 831 "gssdlc1 %[q1], 0x07(%[tmp0]) \n\t" 832 "gssdrc1 %[q1], 0x00(%[tmp0]) \n\t" 833 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 834 "gssdlc1 %[q2], 0x07(%[tmp0]) \n\t" 835 "gssdrc1 %[q2], 0x00(%[tmp0]) \n\t" 836 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
837 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
838 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
839 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
840 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
841 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
842 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
843 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
844 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
845 [dst]
"+&r"(dst), [tmp0]
"=&r"(tmp[0]),
855 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
859 for (i = 0; i < 8; i++)
861 int hv =
hev(dst + i * 1, stride, hev_thresh);
870 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
879 "gsldlc1 %[p3], 0x03(%[dst]) \n\t" 880 "gsldrc1 %[p3], -0x04(%[dst]) \n\t" 881 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t" 882 "gsldlc1 %[p2], 0x03(%[tmp0]) \n\t" 883 "gsldrc1 %[p2], -0x04(%[tmp0]) \n\t" 884 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 885 "gsldlc1 %[p1], 0x03(%[tmp0]) \n\t" 886 "gsldrc1 %[p1], -0x04(%[tmp0]) \n\t" 887 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 888 "gsldlc1 %[p0], 0x03(%[tmp0]) \n\t" 889 "gsldrc1 %[p0], -0x04(%[tmp0]) \n\t" 890 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 891 "gsldlc1 %[q0], 0x03(%[tmp0]) \n\t" 892 "gsldrc1 %[q0], -0x04(%[tmp0]) \n\t" 893 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 894 "gsldlc1 %[q1], 0x03(%[tmp0]) \n\t" 895 "gsldrc1 %[q1], -0x04(%[tmp0]) \n\t" 896 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 897 "gsldlc1 %[q2], 0x03(%[tmp0]) \n\t" 898 "gsldrc1 %[q2], -0x04(%[tmp0]) \n\t" 899 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t" 900 "gsldlc1 %[q3], 0x03(%[tmp0]) \n\t" 901 "gsldrc1 %[q3], -0x04(%[tmp0]) \n\t" 904 %[
q0], %[
q1], %[q2], %[q3],
905 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
909 %[
q0], %[
q1], %[q2], %[q3],
910 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
912 "gssdlc1 %[p3], 0x03(%[dst]) \n\t" 913 "gssdrc1 %[p3], -0x04(%[dst]) \n\t" 914 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 915 "gssdlc1 %[p2], 0x03(%[dst]) \n\t" 916 "gssdrc1 %[p2], -0x04(%[dst]) \n\t" 917 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 918 "gssdlc1 %[p1], 0x03(%[dst]) \n\t" 919 "gssdrc1 %[p1], -0x04(%[dst]) \n\t" 920 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 921 "gssdlc1 %[p0], 0x03(%[dst]) \n\t" 922 "gssdrc1 %[p0], -0x04(%[dst]) \n\t" 923 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 924 "gssdlc1 %[q0], 0x03(%[dst]) \n\t" 925 "gssdrc1 %[q0], -0x04(%[dst]) \n\t" 926 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 927 "gssdlc1 %[q1], 0x03(%[dst]) \n\t" 928 "gssdrc1 %[q1], -0x04(%[dst]) \n\t" 929 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 930 "gssdlc1 %[q2], 0x03(%[dst]) \n\t" 931 "gssdrc1 %[q2], -0x04(%[dst]) \n\t" 932 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 933 "gssdlc1 %[q3], 0x03(%[dst]) \n\t" 934 "gssdrc1 %[q3], -0x04(%[dst]) \n\t" 935 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
936 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
937 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
938 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
939 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
940 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
941 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
942 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
943 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
944 [dst]
"+&r"(dst), [tmp0]
"=&r"(tmp[0]),
954 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
958 for (i = 0; i < 8; i++)
960 int hv =
hev(dst + i * stride, 1, hev_thresh);
975 MMI_LDC1(%[ftmp0], %[dc], 0x00)
976 MMI_LDC1(%[ftmp1], %[dc], 0x08)
977 MMI_LDC1(%[ftmp2], %[dc], 0x10)
978 MMI_LDC1(%[ftmp3], %[dc], 0x18)
979 "paddsh %[ftmp4], %[ftmp0], %[ftmp3] \n\t" 980 "psubsh %[ftmp5], %[ftmp0], %[ftmp3] \n\t" 981 "paddsh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 982 "psubsh %[ftmp7], %[ftmp1], %[ftmp2] \n\t" 983 "paddsh %[ftmp0], %[ftmp4], %[ftmp6] \n\t" 984 "paddsh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 985 "psubsh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" 986 "psubsh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" 987 MMI_SDC1(%[ftmp0], %[dc], 0x00)
988 MMI_SDC1(%[ftmp1], %[dc], 0x08)
989 MMI_SDC1(%[ftmp2], %[dc], 0x10)
990 MMI_SDC1(%[ftmp3], %[dc], 0x18)
991 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
992 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
993 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
994 [ftmp6]
"=&f"(ftmp[6]),
996 [ftmp7]
"=&f"(ftmp[7])
1001 block[0][0][0] = (dc[0] + dc[3] + 3 + dc[1] + dc[2]) >> 3;
1002 block[0][1][0] = (dc[0] - dc[3] + 3 + dc[1] - dc[2]) >> 3;
1003 block[0][2][0] = (dc[0] + dc[3] + 3 - dc[1] - dc[2]) >> 3;
1004 block[0][3][0] = (dc[0] - dc[3] + 3 - dc[1] + dc[2]) >> 3;
1006 block[1][0][0] = (dc[4] + dc[7] + 3 + dc[5] + dc[6]) >> 3;
1007 block[1][1][0] = (dc[4] - dc[7] + 3 + dc[5] - dc[6]) >> 3;
1008 block[1][2][0] = (dc[4] + dc[7] + 3 - dc[5] - dc[6]) >> 3;
1009 block[1][3][0] = (dc[4] - dc[7] + 3 - dc[5] + dc[6]) >> 3;
1011 block[2][0][0] = (dc[8] + dc[11] + 3 + dc[9] + dc[10]) >> 3;
1012 block[2][1][0] = (dc[8] - dc[11] + 3 + dc[9] - dc[10]) >> 3;
1013 block[2][2][0] = (dc[8] + dc[11] + 3 - dc[9] - dc[10]) >> 3;
1014 block[2][3][0] = (dc[8] - dc[11] + 3 - dc[9] + dc[10]) >> 3;
1016 block[3][0][0] = (dc[12] + dc[15] + 3 + dc[13] + dc[14]) >> 3;
1017 block[3][1][0] = (dc[12] - dc[15] + 3 + dc[13] - dc[14]) >> 3;
1018 block[3][2][0] = (dc[12] + dc[15] + 3 - dc[13] - dc[14]) >> 3;
1019 block[3][3][0] = (dc[12] - dc[15] + 3 - dc[13] + dc[14]) >> 3;
1022 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1023 MMI_SDC1(%[ftmp0], %[dc], 0x00)
1024 MMI_SDC1(%[ftmp0], %[dc], 0x08)
1025 MMI_SDC1(%[ftmp0], %[dc], 0x10)
1026 MMI_SDC1(%[ftmp0], %[dc], 0x18)
1027 : RESTRICT_ASM_ALL64
1028 [ftmp0]
"=&f"(ftmp[0])
1033 int t00, t01, t02, t03,
t10,
t11,
t12, t13, t20, t21, t22, t23, t30, t31, t32, t33;
1035 t00 = dc[0] + dc[12];
1036 t10 = dc[1] + dc[13];
1037 t20 = dc[2] + dc[14];
1038 t30 = dc[3] + dc[15];
1040 t03 = dc[0] - dc[12];
1041 t13 = dc[1] - dc[13];
1042 t23 = dc[2] - dc[14];
1043 t33 = dc[3] - dc[15];
1045 t01 = dc[4] + dc[ 8];
1046 t11 = dc[5] + dc[ 9];
1047 t21 = dc[6] + dc[10];
1048 t31 = dc[7] + dc[11];
1050 t02 = dc[4] - dc[ 8];
1051 t12 = dc[5] - dc[ 9];
1052 t22 = dc[6] - dc[10];
1053 t32 = dc[7] - dc[11];
1075 block[0][0][0] = (dc[0] + dc[3] + 3 + dc[1] + dc[2]) >> 3;
1076 block[0][1][0] = (dc[0] - dc[3] + 3 + dc[1] - dc[2]) >> 3;
1077 block[0][2][0] = (dc[0] + dc[3] + 3 - dc[1] - dc[2]) >> 3;
1078 block[0][3][0] = (dc[0] - dc[3] + 3 - dc[1] + dc[2]) >> 3;
1080 block[1][0][0] = (dc[4] + dc[7] + 3 + dc[5] + dc[6]) >> 3;
1081 block[1][1][0] = (dc[4] - dc[7] + 3 + dc[5] - dc[6]) >> 3;
1082 block[1][2][0] = (dc[4] + dc[7] + 3 - dc[5] - dc[6]) >> 3;
1083 block[1][3][0] = (dc[4] - dc[7] + 3 - dc[5] + dc[6]) >> 3;
1085 block[2][0][0] = (dc[8] + dc[11] + 3 + dc[9] + dc[10]) >> 3;
1086 block[2][1][0] = (dc[8] - dc[11] + 3 + dc[9] - dc[10]) >> 3;
1087 block[2][2][0] = (dc[8] + dc[11] + 3 - dc[9] - dc[10]) >> 3;
1088 block[2][3][0] = (dc[8] - dc[11] + 3 - dc[9] + dc[10]) >> 3;
1090 block[3][0][0] = (dc[12] + dc[15] + 3 + dc[13] + dc[14]) >> 3;
1091 block[3][1][0] = (dc[12] - dc[15] + 3 + dc[13] - dc[14]) >> 3;
1092 block[3][2][0] = (dc[12] + dc[15] + 3 - dc[13] - dc[14]) >> 3;
1093 block[3][3][0] = (dc[12] - dc[15] + 3 - dc[13] + dc[14]) >> 3;
1104 int val = (dc[0] + 3) >> 3;
1108 block[0][0][0] =
val;
1109 block[0][1][0] =
val;
1110 block[0][2][0] =
val;
1111 block[0][3][0] =
val;
1112 block[1][0][0] =
val;
1113 block[1][1][0] =
val;
1114 block[1][2][0] =
val;
1115 block[1][3][0] =
val;
1116 block[2][0][0] =
val;
1117 block[2][1][0] =
val;
1118 block[2][2][0] =
val;
1119 block[2][3][0] =
val;
1120 block[3][0][0] =
val;
1121 block[3][1][0] =
val;
1122 block[3][2][0] =
val;
1123 block[3][3][0] =
val;
1129 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_4e7b) = {0x4e7b4e7b4e7b4e7bULL};
1130 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_22a3) = {0x22a322a322a322a3ULL};
1137 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1138 MMI_LDC1(%[ftmp1], %[block], 0x00)
1139 MMI_LDC1(%[ftmp2], %[block], 0x08)
1140 MMI_LDC1(%[ftmp3], %[block], 0x10)
1141 MMI_LDC1(%[ftmp4], %[block], 0x18)
1143 "li %[tmp0], 0x02 \n\t" 1144 "mtc1 %[tmp0], %[ftmp11] \n\t" 1147 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" 1149 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" 1151 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" 1152 "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t" 1154 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" 1155 "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t" 1157 "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t" 1158 "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t" 1160 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" 1161 "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t" 1164 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" 1165 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 1167 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" 1168 "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" 1170 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t" 1171 "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t" 1173 "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t" 1174 "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 1176 MMI_SDC1(%[ftmp0], %[block], 0x00)
1177 MMI_SDC1(%[ftmp0], %[block], 0x08)
1178 MMI_SDC1(%[ftmp0], %[block], 0x10)
1179 MMI_SDC1(%[ftmp0], %[block], 0x18)
1182 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1185 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" 1187 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" 1189 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" 1190 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" 1191 "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t" 1192 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" 1193 "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" 1195 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" 1196 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" 1197 "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t" 1198 "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t" 1199 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t" 1201 "li %[tmp0], 0x03 \n\t" 1202 "mtc1 %[tmp0], %[ftmp11] \n\t" 1203 "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t" 1204 "paddh %[ftmp1], %[ftmp1], %[ff_pw_4] \n\t" 1205 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 1206 "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t" 1207 "paddh %[ftmp2], %[ftmp2], %[ff_pw_4] \n\t" 1208 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t" 1209 "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t" 1210 "paddh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" 1211 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t" 1212 "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t" 1213 "paddh %[ftmp4], %[ftmp4], %[ff_pw_4] \n\t" 1214 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t" 1217 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1219 MMI_LWC1(%[ftmp5], %[dst0], 0x00)
1220 MMI_LWC1(%[ftmp6], %[dst1], 0x00)
1221 MMI_LWC1(%[ftmp7], %[dst2], 0x00)
1222 MMI_LWC1(%[ftmp8], %[dst3], 0x00)
1224 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1225 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1226 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1227 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1229 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1230 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 1231 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 1232 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 1234 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1235 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1236 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1237 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1239 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1240 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1241 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1242 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1243 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1244 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1245 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1246 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1247 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1248 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1252 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1253 [dst2]
"r"(dst+2*stride), [dst3]
"r"(dst+3*
stride),
1255 [ff_ph_4e7b]
"f"(ff_ph_4e7b), [ff_ph_22a3]
"f"(ff_ph_22a3)
1262 for (i = 0; i < 4; i++) {
1263 t0 = block[0 +
i] + block[8 +
i];
1264 t1 = block[0 +
i] - block[8 +
i];
1265 t2 = MUL_35468(block[4 + i]) - MUL_20091(block[12 + i]);
1266 t3 = MUL_20091(block[4 + i]) + MUL_35468(block[12 + i]);
1272 tmp[i * 4 + 0] = t0 +
t3;
1273 tmp[i * 4 + 1] = t1 +
t2;
1274 tmp[i * 4 + 2] = t1 -
t2;
1275 tmp[i * 4 + 3] = t0 -
t3;
1278 for (i = 0; i < 4; i++) {
1279 t0 = tmp[0 +
i] + tmp[8 +
i];
1280 t1 = tmp[0 +
i] - tmp[8 +
i];
1281 t2 = MUL_35468(tmp[4 + i]) - MUL_20091(tmp[12 + i]);
1282 t3 = MUL_20091(tmp[4 + i]) + MUL_35468(tmp[12 + i]);
1284 dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
1285 dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
1286 dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
1287 dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
1296 int dc = (block[0] + 4) >> 3;
1303 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1304 "mtc1 %[dc], %[ftmp5] \n\t" 1305 MMI_LWC1(%[ftmp1], %[dst0], 0x00)
1306 MMI_LWC1(%[ftmp2], %[dst1], 0x00)
1307 MMI_LWC1(%[ftmp3], %[dst2], 0x00)
1308 MMI_LWC1(%[ftmp4], %[dst3], 0x00)
1309 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1310 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1311 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1312 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1313 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1314 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 1315 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t" 1316 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" 1317 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" 1318 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1319 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1320 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1321 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1322 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1323 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1324 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1325 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1326 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1327 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1328 [ftmp4]
"=&f"(ftmp[4]),
1330 [ftmp5]
"=&f"(ftmp[5])
1331 : [dst0]
"r"(dst), [dst1]
"r"(dst+stride),
1332 [dst2]
"r"(dst+2*
stride), [dst3]
"r"(dst+3*stride),
1337 int i, dc = (block[0] + 4) >> 3;
1341 for (i = 0; i < 4; i++) {
1342 dst[0] = av_clip_uint8(dst[0] + dc);
1343 dst[1] = av_clip_uint8(dst[1] + dc);
1344 dst[2] = av_clip_uint8(dst[2] + dc);
1345 dst[3] = av_clip_uint8(dst[3] + dc);
1371 int flim_I,
int hev_thresh)
1378 int flim_I,
int hev_thresh)
1386 int flim_E,
int flim_I,
int hev_thresh)
1393 int flim_E,
int flim_I,
int hev_thresh)
1401 int flim_E,
int flim_I,
int hev_thresh)
1405 for (i = 0; i < 16; i++)
1407 int hv =
hev(dst + i * 1, stride, hev_thresh);
1416 int flim_E,
int flim_I,
int hev_thresh)
1420 for (i = 0; i < 16; i++)
1422 int hv =
hev(dst + i * stride, 1, hev_thresh);
1431 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1438 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1448 for (i = 0; i < 16; i++)
1457 for (i = 0; i < 16; i++)
1463 ptrdiff_t srcstride,
int h,
int x,
int y)
1473 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t" 1474 MMI_ULDC1(%[ftmp0], %[src], 0x00)
1475 "ldl %[tmp0], 0x0f(%[src]) \n\t" 1476 "ldr %[tmp0], 0x08(%[src]) \n\t" 1477 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
1478 "ldl %[tmp1], 0x0f(%[addr0]) \n\t" 1479 "ldr %[tmp1], 0x08(%[addr0]) \n\t" 1480 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t" 1481 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1482 "sdl %[tmp0], 0x0f(%[dst]) \n\t" 1483 "sdr %[tmp0], 0x08(%[dst]) \n\t" 1484 "addiu %[h], %[h], -0x02 \n\t" 1485 MMI_SDC1(%[ftmp1], %[addr1], 0x00)
1486 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t" 1487 "sdl %[tmp1], 0x0f(%[addr1]) \n\t" 1488 "sdr %[tmp1], 0x08(%[addr1]) \n\t" 1489 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t" 1490 "bnez %[h], 1b \n\t" 1491 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1492 [tmp0]
"=&r"(tmp[0]), [tmp1]
"=&r"(tmp[1]),
1494 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1495 [dst]
"+&r"(dst), [src]
"+&r"(src),
1497 : [dststride]
"r"((
mips_reg)dststride),
1498 [srcstride]
"r"((
mips_reg)srcstride)
1504 for (i = 0; i <
h; i++, dst += dststride, src += srcstride)
1505 memcpy(dst, src, 16);
1510 ptrdiff_t srcstride,
int h,
int x,
int y)
1520 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t" 1521 MMI_ULDC1(%[ftmp0], %[src], 0x00)
1522 "ldl %[tmp0], 0x07(%[addr0]) \n\t" 1523 "ldr %[tmp0], 0x00(%[addr0]) \n\t" 1524 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t" 1525 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1526 "addiu %[h], %[h], -0x02 \n\t" 1527 "sdl %[tmp0], 0x07(%[addr1]) \n\t" 1528 "sdr %[tmp0], 0x00(%[addr1]) \n\t" 1529 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t" 1530 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t" 1531 "bnez %[h], 1b \n\t" 1532 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(tmp[0]),
1534 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1535 [dst]
"+&r"(dst), [src]
"+&r"(src),
1537 : [dststride]
"r"((
mips_reg)dststride),
1538 [srcstride]
"r"((
mips_reg)srcstride)
1544 for (i = 0; i <
h; i++, dst += dststride, src += srcstride)
1545 memcpy(dst, src, 8);
1550 ptrdiff_t srcstride,
int h,
int x,
int y)
1560 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t" 1561 MMI_LWC1(%[ftmp0], %[src], 0x00)
1562 "lwl %[tmp0], 0x03(%[addr0]) \n\t" 1563 "lwr %[tmp0], 0x00(%[addr0]) \n\t" 1564 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t" 1565 MMI_SWC1(%[ftmp0], %[dst], 0x00)
1566 "addiu %[h], %[h], -0x02 \n\t" 1567 "swl %[tmp0], 0x03(%[addr1]) \n\t" 1568 "swr %[tmp0], 0x00(%[addr1]) \n\t" 1569 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t" 1570 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t" 1571 "bnez %[h], 1b \n\t" 1572 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(tmp[0]),
1574 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1575 [dst]
"+&r"(dst), [src]
"+&r"(src),
1577 : [dststride]
"r"((
mips_reg)dststride),
1578 [srcstride]
"r"((
mips_reg)srcstride)
1584 for (i = 0; i <
h; i++, dst += dststride, src += srcstride)
1585 memcpy(dst, src, 4);
1590 ptrdiff_t srcstride,
int h,
int mx,
int my)
1619 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1620 "li %[tmp0], 0x07 \n\t" 1621 "mtc1 %[tmp0], %[ftmp4] \n\t" 1631 "addiu %[h], %[h], -0x01 \n\t" 1632 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1633 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1634 "bnez %[h], 1b \n\t" 1635 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1636 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1637 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1638 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1639 [ftmp8]
"=&f"(ftmp[8]),
1640 [tmp0]
"=&r"(tmp[0]),
1642 [dst1]
"=&r"(dst1), [src1]
"=&r"(src1),
1644 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1646 [srcstride]
"r"((
mips_reg)srcstride),
1647 [dststride]
"r"((
mips_reg)dststride),
1648 [
filter1]
"f"(filter[1]), [filter2]
"f"(filter[2]),
1649 [filter3]
"f"(filter[3]), [filter4]
"f"(filter[4])
1657 for (y = 0; y <
h; y++) {
1658 for (x = 0; x < 16; x++)
1667 ptrdiff_t srcstride,
int h,
int mx,
int my)
1686 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1687 "li %[tmp0], 0x07 \n\t" 1688 "mtc1 %[tmp0], %[ftmp4] \n\t" 1693 "addiu %[h], %[h], -0x01 \n\t" 1694 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1695 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1696 "bnez %[h], 1b \n\t" 1697 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1698 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1699 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1700 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1701 [ftmp8]
"=&f"(ftmp[8]),
1702 [tmp0]
"=&r"(tmp[0]),
1705 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1707 [srcstride]
"r"((
mips_reg)srcstride),
1708 [dststride]
"r"((
mips_reg)dststride),
1709 [
filter1]
"f"(filter[1]), [filter2]
"f"(filter[2]),
1710 [filter3]
"f"(filter[3]), [filter4]
"f"(filter[4])
1718 for (y = 0; y <
h; y++) {
1719 for (x = 0; x < 8; x++)
1728 ptrdiff_t srcstride,
int h,
int mx,
int my)
1743 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1744 "li %[tmp0], 0x07 \n\t" 1745 "mtc1 %[tmp0], %[ftmp4] \n\t" 1750 "addiu %[h], %[h], -0x01 \n\t" 1751 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1752 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1753 "bnez %[h], 1b \n\t" 1754 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1755 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1756 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1757 [tmp0]
"=&r"(tmp[0]),
1760 [dst]
"+&r"(dst), [src]
"+&r"(src)
1762 [srcstride]
"r"((
mips_reg)srcstride),
1763 [dststride]
"r"((
mips_reg)dststride),
1764 [
filter1]
"f"(filter[1]), [filter2]
"f"(filter[2]),
1765 [filter3]
"f"(filter[3]), [filter4]
"f"(filter[4])
1773 for (y = 0; y <
h; y++) {
1774 for (x = 0; x < 4; x++)
1783 ptrdiff_t srcstride,
int h,
int mx,
int my)
1812 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1813 "li %[tmp0], 0x07 \n\t" 1814 "mtc1 %[tmp0], %[ftmp4] \n\t" 1824 "addiu %[h], %[h], -0x01 \n\t" 1825 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1826 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1827 "bnez %[h], 1b \n\t" 1828 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1829 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1830 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1831 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1832 [ftmp8]
"=&f"(ftmp[8]),
1833 [tmp0]
"=&r"(tmp[0]),
1835 [dst1]
"=&r"(dst1), [src1]
"=&r"(src1),
1837 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1839 [srcstride]
"r"((
mips_reg)srcstride),
1840 [dststride]
"r"((
mips_reg)dststride),
1842 [filter2]
"f"(filter[2]), [filter3]
"f"(filter[3]),
1843 [filter4]
"f"(filter[4]), [filter5]
"f"(filter[5])
1851 for (y = 0; y <
h; y++) {
1852 for (x = 0; x < 16; x++)
1861 ptrdiff_t srcstride,
int h,
int mx,
int my)
1880 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1881 "li %[tmp0], 0x07 \n\t" 1882 "mtc1 %[tmp0], %[ftmp4] \n\t" 1887 "addiu %[h], %[h], -0x01 \n\t" 1888 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1889 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1890 "bnez %[h], 1b \n\t" 1891 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1892 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1893 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1894 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1895 [ftmp8]
"=&f"(ftmp[8]),
1896 [tmp0]
"=&r"(tmp[0]),
1899 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1901 [srcstride]
"r"((
mips_reg)srcstride),
1902 [dststride]
"r"((
mips_reg)dststride),
1904 [filter2]
"f"(filter[2]), [filter3]
"f"(filter[3]),
1905 [filter4]
"f"(filter[4]), [filter5]
"f"(filter[5])
1913 for (y = 0; y <
h; y++) {
1914 for (x = 0; x < 8; x++)
1923 ptrdiff_t srcstride,
int h,
int mx,
int my)
1938 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1939 "li %[tmp0], 0x07 \n\t" 1940 "mtc1 %[tmp0], %[ftmp4] \n\t" 1945 "addiu %[h], %[h], -0x01 \n\t" 1946 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 1947 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 1948 "bnez %[h], 1b \n\t" 1949 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1950 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1951 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1952 [tmp0]
"=&r"(tmp[0]),
1955 [dst]
"+&r"(dst), [src]
"+&r"(src)
1957 [srcstride]
"r"((
mips_reg)srcstride),
1958 [dststride]
"r"((
mips_reg)dststride),
1960 [filter2]
"f"(filter[2]), [filter3]
"f"(filter[3]),
1961 [filter4]
"f"(filter[4]), [filter5]
"f"(filter[5])
1969 for (y = 0; y <
h; y++) {
1970 for (x = 0; x < 4; x++)
1979 ptrdiff_t srcstride,
int h,
int mx,
int my)
2008 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2009 "li %[tmp0], 0x07 \n\t" 2010 "mtc1 %[tmp0], %[ftmp4] \n\t" 2020 "addiu %[h], %[h], -0x01 \n\t" 2021 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2022 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2023 "bnez %[h], 1b \n\t" 2024 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2025 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2026 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2027 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2028 [ftmp8]
"=&f"(ftmp[8]),
2029 [tmp0]
"=&r"(tmp[0]),
2031 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2034 [dst]
"+&r"(dst), [src]
"+&r"(src)
2036 [srcstride]
"r"((
mips_reg)srcstride),
2037 [dststride]
"r"((
mips_reg)dststride),
2038 [
filter1]
"f"(filter[1]), [filter2]
"f"(filter[2]),
2039 [filter3]
"f"(filter[3]), [filter4]
"f"(filter[4])
2047 for (y = 0; y <
h; y++) {
2048 for (x = 0; x < 16; x++)
2057 ptrdiff_t srcstride,
int h,
int mx,
int my)
2077 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2078 "li %[tmp0], 0x07 \n\t" 2079 "mtc1 %[tmp0], %[ftmp4] \n\t" 2084 "addiu %[h], %[h], -0x01 \n\t" 2085 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2086 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2087 "bnez %[h], 1b \n\t" 2088 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2089 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2090 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2091 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2092 [ftmp8]
"=&f"(ftmp[8]),
2093 [tmp0]
"=&r"(tmp[0]),
2097 [dst]
"+&r"(dst), [src]
"+&r"(src)
2099 [srcstride]
"r"((
mips_reg)srcstride),
2100 [dststride]
"r"((
mips_reg)dststride),
2101 [
filter1]
"f"(filter[1]), [filter2]
"f"(filter[2]),
2102 [filter3]
"f"(filter[3]), [filter4]
"f"(filter[4])
2110 for (y = 0; y <
h; y++) {
2111 for (x = 0; x < 8; x++)
2120 ptrdiff_t srcstride,
int h,
int mx,
int my)
2136 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2137 "li %[tmp0], 0x07 \n\t" 2138 "mtc1 %[tmp0], %[ftmp4] \n\t" 2143 "addiu %[h], %[h], -0x01 \n\t" 2144 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2145 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2146 "bnez %[h], 1b \n\t" 2147 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2148 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2149 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2150 [tmp0]
"=&r"(tmp[0]),
2154 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2156 [srcstride]
"r"((
mips_reg)srcstride),
2157 [dststride]
"r"((
mips_reg)dststride),
2158 [
filter1]
"f"(filter[1]), [filter2]
"f"(filter[2]),
2159 [filter3]
"f"(filter[3]), [filter4]
"f"(filter[4])
2167 for (y = 0; y <
h; y++) {
2168 for (x = 0; x < 4; x++)
2177 ptrdiff_t srcstride,
int h,
int mx,
int my)
2206 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2207 "li %[tmp0], 0x07 \n\t" 2208 "mtc1 %[tmp0], %[ftmp4] \n\t" 2218 "addiu %[h], %[h], -0x01 \n\t" 2219 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2220 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2221 "bnez %[h], 1b \n\t" 2222 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2223 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2224 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2225 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2226 [ftmp8]
"=&f"(ftmp[8]),
2227 [tmp0]
"=&r"(tmp[0]),
2229 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2232 [dst]
"+&r"(dst), [src]
"+&r"(src)
2234 [srcstride]
"r"((
mips_reg)srcstride),
2235 [dststride]
"r"((
mips_reg)dststride),
2237 [filter2]
"f"(filter[2]), [filter3]
"f"(filter[3]),
2238 [filter4]
"f"(filter[4]), [filter5]
"f"(filter[5])
2246 for (y = 0; y <
h; y++) {
2247 for (x = 0; x < 16; x++)
2256 ptrdiff_t srcstride,
int h,
int mx,
int my)
2276 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2277 "li %[tmp0], 0x07 \n\t" 2278 "mtc1 %[tmp0], %[ftmp4] \n\t" 2283 "addiu %[h], %[h], -0x01 \n\t" 2284 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2285 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2286 "bnez %[h], 1b \n\t" 2287 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2288 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2289 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2290 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2291 [ftmp8]
"=&f"(ftmp[8]),
2292 [tmp0]
"=&r"(tmp[0]),
2296 [dst]
"+&r"(dst), [src]
"+&r"(src)
2298 [srcstride]
"r"((
mips_reg)srcstride),
2299 [dststride]
"r"((
mips_reg)dststride),
2301 [filter2]
"f"(filter[2]), [filter3]
"f"(filter[3]),
2302 [filter4]
"f"(filter[4]), [filter5]
"f"(filter[5])
2310 for (y = 0; y <
h; y++) {
2311 for (x = 0; x < 8; x++)
2320 ptrdiff_t srcstride,
int h,
int mx,
int my)
2336 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2337 "li %[tmp0], 0x07 \n\t" 2338 "mtc1 %[tmp0], %[ftmp4] \n\t" 2343 "addiu %[h], %[h], -0x01 \n\t" 2344 PTR_ADDU "%[src], %[src], %[srcstride] \n\t" 2345 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t" 2346 "bnez %[h], 1b \n\t" 2347 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2348 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2349 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2350 [tmp0]
"=&r"(tmp[0]),
2354 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2356 [srcstride]
"r"((
mips_reg)srcstride),
2357 [dststride]
"r"((
mips_reg)dststride),
2359 [filter2]
"f"(filter[2]), [filter3]
"f"(filter[3]),
2360 [filter4]
"f"(filter[4]), [filter5]
"f"(filter[5])
2368 for (y = 0; y <
h; y++) {
2369 for (x = 0; x < 4; x++)
2378 ptrdiff_t srcstride,
int h,
int mx,
int my)
2386 tmp = tmp_array + 16;
2397 for (y = 0; y < h + 3; y++) {
2398 for (x = 0; x < 16; x++)
2404 tmp = tmp_array + 16;
2407 for (y = 0; y <
h; y++) {
2408 for (x = 0; x < 16; x++)
2417 ptrdiff_t srcstride,
int h,
int mx,
int my)
2425 tmp = tmp_array + 8;
2436 for (y = 0; y < h + 3; y++) {
2437 for (x = 0; x < 8; x++)
2443 tmp = tmp_array + 8;
2446 for (y = 0; y <
h; y++) {
2447 for (x = 0; x < 8; x++)
2456 ptrdiff_t srcstride,
int h,
int mx,
int my)
2464 tmp = tmp_array + 4;
2475 for (y = 0; y < h + 3; y++) {
2476 for (x = 0; x < 4; x++)
2481 tmp = tmp_array + 4;
2484 for (y = 0; y <
h; y++) {
2485 for (x = 0; x < 4; x++)
2494 ptrdiff_t srcstride,
int h,
int mx,
int my)
2500 src -= 2 * srcstride;
2502 tmp = tmp_array + 32;
2511 src -= 2 * srcstride;
2513 for (y = 0; y < h + 5; y++) {
2514 for (x = 0; x < 16; x++)
2520 tmp = tmp_array + 32;
2523 for (y = 0; y <
h; y++) {
2524 for (x = 0; x < 16; x++)
2533 ptrdiff_t srcstride,
int h,
int mx,
int my)
2539 src -= 2 * srcstride;
2541 tmp = tmp_array + 16;
2550 src -= 2 * srcstride;
2552 for (y = 0; y < h + 5; y++) {
2553 for (x = 0; x < 8; x++)
2559 tmp = tmp_array + 16;
2562 for (y = 0; y <
h; y++) {
2563 for (x = 0; x < 8; x++)
2572 ptrdiff_t srcstride,
int h,
int mx,
int my)
2578 src -= 2 * srcstride;
2580 tmp = tmp_array + 8;
2589 src -= 2 * srcstride;
2591 for (y = 0; y < h + 5; y++) {
2592 for (x = 0; x < 4; x++)
2598 tmp = tmp_array + 8;
2601 for (y = 0; y <
h; y++) {
2602 for (x = 0; x < 4; x++)
2611 ptrdiff_t srcstride,
int h,
int mx,
int my)
2619 tmp = tmp_array + 16;
2630 for (y = 0; y < h + 3; y++) {
2631 for (x = 0; x < 16; x++)
2637 tmp = tmp_array + 16;
2640 for (y = 0; y <
h; y++) {
2641 for (x = 0; x < 16; x++)
2650 ptrdiff_t srcstride,
int h,
int mx,
int my)
2658 tmp = tmp_array + 8;
2669 for (y = 0; y < h + 3; y++) {
2670 for (x = 0; x < 8; x++)
2676 tmp = tmp_array + 8;
2679 for (y = 0; y <
h; y++) {
2680 for (x = 0; x < 8; x++)
2689 ptrdiff_t srcstride,
int h,
int mx,
int my)
2697 tmp = tmp_array + 4;
2708 for (y = 0; y < h + 3; y++) {
2709 for (x = 0; x < 4; x++)
2715 tmp = tmp_array + 4;
2718 for (y = 0; y <
h; y++) {
2719 for (x = 0; x < 4; x++)
2728 ptrdiff_t srcstride,
int h,
int mx,
int my)
2734 src -= 2 * srcstride;
2736 tmp = tmp_array + 32;
2745 src -= 2 * srcstride;
2747 for (y = 0; y < h + 5; y++) {
2748 for (x = 0; x < 16; x++)
2754 tmp = tmp_array + 32;
2757 for (y = 0; y <
h; y++) {
2758 for (x = 0; x < 16; x++)
2767 ptrdiff_t srcstride,
int h,
int mx,
int my)
2773 src -= 2 * srcstride;
2775 tmp = tmp_array + 16;
2784 src -= 2 * srcstride;
2786 for (y = 0; y < h + 5; y++) {
2787 for (x = 0; x < 8; x++)
2793 tmp = tmp_array + 16;
2796 for (y = 0; y <
h; y++) {
2797 for (x = 0; x < 8; x++)
2806 ptrdiff_t srcstride,
int h,
int mx,
int my)
2812 src -= 2 * srcstride;
2814 tmp = tmp_array + 8;
2823 src -= 2 * srcstride;
2825 for (y = 0; y < h + 5; y++) {
2826 for (x = 0; x < 4; x++)
2832 tmp = tmp_array + 8;
2835 for (y = 0; y <
h; y++) {
2836 for (x = 0; x < 4; x++)
2845 ptrdiff_t sstride,
int h,
int mx,
int my)
2848 int a = 8 - mx,
b = mx;
2874 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2875 "li %[tmp0], 0x03 \n\t" 2876 "mtc1 %[tmp0], %[ftmp4] \n\t" 2877 "pshufh %[a], %[a], %[ftmp0] \n\t" 2878 "pshufh %[b], %[b], %[ftmp0] \n\t" 2888 "addiu %[h], %[h], -0x01 \n\t" 2889 PTR_ADDU "%[src], %[src], %[sstride] \n\t" 2890 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 2891 "bnez %[h], 1b \n\t" 2892 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2893 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2894 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2895 [ftmp6]
"=&f"(ftmp[6]),
2896 [tmp0]
"=&r"(tmp[0]),
2898 [dst0]
"=&r"(dst0), [src0]
"=&r"(src0),
2900 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
2901 [a]
"+&f"(a), [
b]
"+&f"(
b)
2908 int a = 8 - mx, b = mx;
2911 for (y = 0; y <
h; y++) {
2912 for (x = 0; x < 16; x++)
2913 dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
2921 ptrdiff_t sstride,
int h,
int mx,
int my)
2924 int c = 8 - my, d = my;
2941 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2942 "li %[tmp0], 0x03 \n\t" 2943 "mtc1 %[tmp0], %[ftmp4] \n\t" 2944 "pshufh %[c], %[c], %[ftmp0] \n\t" 2945 "pshufh %[d], %[d], %[ftmp0] \n\t" 2955 "addiu %[h], %[h], -0x01 \n\t" 2956 PTR_ADDU "%[src], %[src], %[sstride] \n\t" 2957 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 2958 "bnez %[h], 1b \n\t" 2959 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2960 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2961 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2962 [ftmp6]
"=&f"(ftmp[6]),
2963 [tmp0]
"=&r"(tmp[0]),
2965 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2968 [dst]
"+&r"(dst), [src]
"+&r"(src),
2969 [
c]
"+&f"(
c), [d]
"+&f"(d)
2976 int c = 8 - my, d = my;
2979 for (y = 0; y <
h; y++) {
2980 for (x = 0; x < 16; x++)
2981 dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
2989 ptrdiff_t sstride,
int h,
int mx,
int my)
2998 int a = 8 - mx,
b = mx;
2999 int c = 8 - my, d = my;
3004 for (y = 0; y < h + 1; y++) {
3005 for (x = 0; x < 16; x++)
3006 tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3013 for (y = 0; y <
h; y++) {
3014 for (x = 0; x < 16; x++)
3015 dst[x] = (c * tmp[x] + d * tmp[x + 16] + 4) >> 3;
3023 ptrdiff_t sstride,
int h,
int mx,
int my)
3026 int a = 8 - mx,
b = mx;
3042 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3043 "li %[tmp0], 0x03 \n\t" 3044 "mtc1 %[tmp0], %[ftmp4] \n\t" 3045 "pshufh %[a], %[a], %[ftmp0] \n\t" 3046 "pshufh %[b], %[b], %[ftmp0] \n\t" 3051 "addiu %[h], %[h], -0x01 \n\t" 3052 PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3053 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3054 "bnez %[h], 1b \n\t" 3055 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3056 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3057 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3058 [ftmp6]
"=&f"(ftmp[6]),
3059 [tmp0]
"=&r"(tmp[0]),
3062 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3063 [a]
"+&f"(a), [
b]
"+&f"(
b)
3070 int a = 8 - mx, b = mx;
3073 for (y = 0; y <
h; y++) {
3074 for (x = 0; x < 8; x++)
3075 dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3083 ptrdiff_t sstride,
int h,
int mx,
int my)
3086 int c = 8 - my, d = my;
3103 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3104 "li %[tmp0], 0x03 \n\t" 3105 "mtc1 %[tmp0], %[ftmp4] \n\t" 3106 "pshufh %[c], %[c], %[ftmp0] \n\t" 3107 "pshufh %[d], %[d], %[ftmp0] \n\t" 3112 "addiu %[h], %[h], -0x01 \n\t" 3113 PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3114 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3115 "bnez %[h], 1b \n\t" 3116 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3117 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3118 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3119 [ftmp6]
"=&f"(ftmp[6]),
3120 [tmp0]
"=&r"(tmp[0]),
3124 [dst]
"+&r"(dst), [src]
"+&r"(src),
3125 [
c]
"+&f"(
c), [d]
"+&f"(d)
3132 int c = 8 - my, d = my;
3135 for (y = 0; y <
h; y++) {
3136 for (x = 0; x < 8; x++)
3137 dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3145 ptrdiff_t sstride,
int h,
int mx,
int my)
3154 int a = 8 - mx,
b = mx;
3155 int c = 8 - my, d = my;
3160 for (y = 0; y < h + 1; y++) {
3161 for (x = 0; x < 8; x++)
3162 tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3169 for (y = 0; y <
h; y++) {
3170 for (x = 0; x < 8; x++)
3171 dst[x] = (c * tmp[x] + d * tmp[x + 8] + 4) >> 3;
3179 ptrdiff_t sstride,
int h,
int mx,
int my)
3182 int a = 8 - mx,
b = mx;
3195 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3196 "li %[tmp0], 0x03 \n\t" 3197 "mtc1 %[tmp0], %[ftmp4] \n\t" 3198 "pshufh %[a], %[a], %[ftmp0] \n\t" 3199 "pshufh %[b], %[b], %[ftmp0] \n\t" 3204 "addiu %[h], %[h], -0x01 \n\t" 3205 PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3206 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3207 "bnez %[h], 1b \n\t" 3208 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3209 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3210 [ftmp4]
"=&f"(ftmp[4]),
3211 [tmp0]
"=&r"(tmp[0]),
3215 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3216 [a]
"+&f"(a), [
b]
"+&f"(
b)
3223 int a = 8 - mx, b = mx;
3226 for (y = 0; y <
h; y++) {
3227 for (x = 0; x < 4; x++)
3228 dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3236 ptrdiff_t sstride,
int h,
int mx,
int my)
3239 int c = 8 - my, d = my;
3253 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 3254 "li %[tmp0], 0x03 \n\t" 3255 "mtc1 %[tmp0], %[ftmp4] \n\t" 3256 "pshufh %[c], %[c], %[ftmp0] \n\t" 3257 "pshufh %[d], %[d], %[ftmp0] \n\t" 3262 "addiu %[h], %[h], -0x01 \n\t" 3263 PTR_ADDU "%[src], %[src], %[sstride] \n\t" 3264 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t" 3265 "bnez %[h], 1b \n\t" 3266 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3267 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3268 [ftmp4]
"=&f"(ftmp[4]),
3269 [tmp0]
"=&r"(tmp[0]),
3274 [dst]
"+&r"(dst), [src]
"+&r"(src),
3275 [
c]
"+&f"(
c), [d]
"+&f"(d)
3282 int c = 8 - my, d = my;
3285 for (y = 0; y <
h; y++) {
3286 for (x = 0; x < 4; x++)
3287 dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3295 ptrdiff_t sstride,
int h,
int mx,
int my)
3304 int a = 8 - mx,
b = mx;
3305 int c = 8 - my, d = my;
3310 for (y = 0; y < h + 1; y++) {
3311 for (x = 0; x < 4; x++)
3312 tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3319 for (y = 0; y <
h; y++) {
3320 for (x = 0; x < 4; x++)
3321 dst[x] = (c * tmp[x] + d * tmp[x + 4] + 4) >> 3;
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
const char const char void * val
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint8_t subpel_filters[7][6]
#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride)
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
#define RESTRICT_ASM_UINT32_T
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
#define PUT_VP8_EPEL8_H6_MMI(src, dst)
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static const uint8_t q1[256]
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride)
#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
#define FILTER_4TAP(src, F, stride)
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
#define PUT_VP8_BILINEAR8_H_MMI(src, dst)
static void filter(int16_t *output, ptrdiff_t out_stride, int16_t *low, ptrdiff_t low_stride, int16_t *high, ptrdiff_t high_stride, int len, int clip)
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 8x8 byte packaged data.
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define i(width, name, range_min, range_max)
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
static const uint16_t mask[17]
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint8_t q0[256]
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p, ptrdiff_t stride)
static const uint64_t fourtap_subpel_filters[7][6]
#define FILTER_6TAP(src, F, stride)
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride)
#define PUT_VP8_BILINEAR4_H_MMI(src, dst)
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
#define PUT_VP8_EPEL4_H4_MMI(src, dst)
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
#define RESTRICT_ASM_DOUBLE_1
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL4_H6_MMI(src, dst)
static av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_filter_common_is4tap(uint8_t *p, ptrdiff_t stride)
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> dc
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16])
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
#define RESTRICT_ASM_DOUBLE_2
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride)
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
#define PUT_VP8_EPEL8_H4_MMI(src, dst)
static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define MMI_VP8_LOOP_FILTER
static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)