30 #define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0) \ 31 "li %[tmp0], "#r1" \n\t" \ 32 "mtc1 %[tmp0], %[ftmp13] \n\t" \ 33 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 34 "li %[tmp0], "#r2" \n\t" \ 35 "mtc1 %[tmp0], %[ftmp14] \n\t" \ 36 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 37 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp13] \n\t" \ 38 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp14] \n\t" \ 39 "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 40 "pmaddhw %[ftmp2], %[ftmp6], %[ftmp13] \n\t" \ 41 "pmaddhw %[ftmp3], %[ftmp8], %[ftmp14] \n\t" \ 42 "paddw %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ 44 "li %[tmp0], "#r3" \n\t" \ 45 "mtc1 %[tmp0], %[ftmp13] \n\t" \ 46 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 47 "li %[tmp0], "#r4" \n\t" \ 48 "mtc1 %[tmp0], %[ftmp14] \n\t" \ 49 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 50 "pmaddhw %[ftmp3], %[ftmp9], %[ftmp13] \n\t" \ 51 "pmaddhw %[ftmp4], %[ftmp11], %[ftmp14] \n\t" \ 52 "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 53 "pmaddhw %[ftmp4], %[ftmp10], %[ftmp13] \n\t" \ 54 "pmaddhw %[ftmp13], %[ftmp12], %[ftmp14] \n\t" \ 55 "paddw %[ftmp4], %[ftmp4], %[ftmp13] \n\t" \ 57 "paddw %[ftmp1], %[ftmp1], "#c0" \n\t" \ 58 "paddw %[ftmp2], %[ftmp2], "#c0" \n\t" \ 59 "paddw %[ftmp13], %[ftmp1], %[ftmp3] \n\t" \ 60 "psubw %[ftmp14], %[ftmp1], %[ftmp3] \n\t" \ 61 "paddw %[ftmp1], %[ftmp2], %[ftmp4] \n\t" \ 62 "psubw %[ftmp3], %[ftmp2], %[ftmp4] \n\t" \ 63 "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" \ 64 "psraw %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 65 "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" \ 66 "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 67 "punpcklhw %[ftmp2], %[ftmp13], %[ftmp1] \n\t" \ 68 "punpckhhw %[ftmp4], %[ftmp13], %[ftmp1] \n\t" \ 69 "punpcklhw "#o1", %[ftmp2], %[ftmp4] \n\t" \ 70 "punpcklhw %[ftmp2], %[ftmp14], %[ftmp3] \n\t" \ 71 "punpckhhw %[ftmp4], %[ftmp14], %[ftmp3] \n\t" \ 72 "punpcklhw "#o2", %[ftmp2], %[ftmp4] \n\t" 74 #define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1) \ 75 "li %[tmp0], "#r1" \n\t" \ 76 "mtc1 %[tmp0], %[ftmp13] \n\t" \ 77 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 78 "li %[tmp0], "#r2" \n\t" \ 79 "mtc1 %[tmp0], %[ftmp14] \n\t" \ 80 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 81 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp13] \n\t" \ 82 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp14] \n\t" \ 83 "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 84 "pmaddhw %[ftmp2], %[ftmp6], %[ftmp13] \n\t" \ 85 "pmaddhw %[ftmp3], %[ftmp8], %[ftmp14] \n\t" \ 86 "paddw %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ 88 "li %[tmp0], "#r3" \n\t" \ 89 "mtc1 %[tmp0], %[ftmp13] \n\t" \ 90 "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t" \ 91 "li %[tmp0], "#r4" \n\t" \ 92 "mtc1 %[tmp0], %[ftmp14] \n\t" \ 93 "punpcklwd %[ftmp14], %[ftmp14], %[ftmp14] \n\t" \ 94 "pmaddhw %[ftmp3], %[ftmp9], %[ftmp13] \n\t" \ 95 "pmaddhw %[ftmp4], %[ftmp11], %[ftmp14] \n\t" \ 96 "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 97 "pmaddhw %[ftmp4], %[ftmp10], %[ftmp13] \n\t" \ 98 "pmaddhw %[ftmp13], %[ftmp12], %[ftmp14] \n\t" \ 99 "paddw %[ftmp4], %[ftmp4], %[ftmp13] \n\t" \ 101 "paddw %[ftmp13], %[ftmp1], %[ftmp3] \n\t" \ 102 "psubw %[ftmp14], %[ftmp1], %[ftmp3] \n\t" \ 103 "paddw %[ftmp14], %[ftmp14], "#c1" \n\t" \ 104 "paddw %[ftmp1], %[ftmp2], %[ftmp4] \n\t" \ 105 "psubw %[ftmp3], %[ftmp2], %[ftmp4] \n\t" \ 106 "paddw %[ftmp3], %[ftmp3], "#c1" \n\t" \ 107 "paddw %[ftmp13], %[ftmp13], "#c0" \n\t" \ 108 "paddw %[ftmp14], %[ftmp14], "#c0" \n\t" \ 109 "paddw %[ftmp1], %[ftmp1], "#c0" \n\t" \ 110 "paddw %[ftmp3], %[ftmp3], "#c0" \n\t" \ 111 "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" \ 112 "psraw %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 113 "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" \ 114 "psraw %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 115 "punpcklhw %[ftmp2], %[ftmp13], %[ftmp1] \n\t" \ 116 "punpckhhw %[ftmp4], %[ftmp13], %[ftmp1] \n\t" \ 117 "punpcklhw "#o1", %[ftmp2], %[ftmp4] \n\t" \ 118 "punpcklhw %[ftmp2], %[ftmp14], %[ftmp3] \n\t" \ 119 "punpckhhw %[ftmp4], %[ftmp14], %[ftmp3] \n\t" \ 120 "punpcklhw "#o2", %[ftmp2], %[ftmp4] \n\t" 130 dc = (3 * dc + 1) >> 1;
131 dc = (3 * dc + 16) >> 5;
134 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 135 "pshufh %[dc], %[dc], %[ftmp0] \n\t" 136 "li %[count], 0x02 \n\t" 139 MMI_LDC1(%[ftmp1], %[dest], 0x00)
140 PTR_ADDU "%[addr0], %[dest], %[linesize] \n\t" 141 MMI_LDC1(%[ftmp2], %[addr0], 0x00)
142 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 143 MMI_LDC1(%[ftmp3], %[addr0], 0x00)
144 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 145 MMI_LDC1(%[ftmp4], %[addr0], 0x00)
147 "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 148 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 149 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 150 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 151 "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" 152 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 153 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 154 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 156 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 157 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 158 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 159 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 160 "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t" 161 "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t" 162 "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t" 163 "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t" 165 "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 166 "packushb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 167 "packushb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 168 "packushb %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 170 MMI_SDC1(%[ftmp1], %[dest], 0x00)
171 PTR_ADDU "%[addr0], %[dest], %[linesize] \n\t" 172 MMI_SDC1(%[ftmp2], %[addr0], 0x00)
173 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 174 MMI_SDC1(%[ftmp3], %[addr0], 0x00)
175 PTR_ADDU "%[addr0], %[addr0], %[linesize] \n\t" 176 MMI_SDC1(%[ftmp4], %[addr0], 0x00)
178 "addiu %[count], %[count], -0x01 \n\t" 179 PTR_ADDU "%[dest], %[addr0], %[linesize] \n\t" 180 "bnez %[count], 1b \n\t" 181 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
182 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
183 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
184 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
185 [ftmp8]
"=&f"(ftmp[8]),
186 [addr0]
"=&r"(addr[0]),
188 : [linesize]
"r"((
mips_reg)linesize),
194 #if _MIPS_SIM != _ABIO32 198 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
199 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
200 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
206 "li %[tmp0], 0x03 \n\t" 207 "mtc1 %[tmp0], %[ftmp0] \n\t" 210 MMI_LDC1(%[ftmp1], %[block], 0x00)
211 MMI_LDC1(%[ftmp11], %[block], 0x10)
212 MMI_LDC1(%[ftmp2], %[block], 0x20)
213 MMI_LDC1(%[ftmp12], %[block], 0x30)
214 MMI_LDC1(%[ftmp3], %[block], 0x40)
215 MMI_LDC1(%[ftmp13], %[block], 0x50)
216 MMI_LDC1(%[ftmp4], %[block], 0x60)
217 MMI_LDC1(%[ftmp14], %[block], 0x70)
218 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 219 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 220 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 221 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 223 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 224 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 225 "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t" 226 "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t" 230 0x000f0010, 0x00040009, %[
ff_pw_4])
234 0xfffc000f, 0xfff7fff0, %[
ff_pw_4])
238 0xfff00009, 0x000f0004, %[
ff_pw_4])
242 0xfff70004, 0xfff0000f, %[
ff_pw_4])
244 TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
245 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
247 TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
248 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
250 MMI_SDC1(%[ftmp15], %[
temp], 0x00)
251 MMI_SDC1(%[ftmp19], %[
temp], 0x08)
252 MMI_SDC1(%[ftmp16], %[
temp], 0x10)
253 MMI_SDC1(%[ftmp20], %[
temp], 0x18)
254 MMI_SDC1(%[ftmp17], %[
temp], 0x20)
255 MMI_SDC1(%[ftmp21], %[
temp], 0x28)
256 MMI_SDC1(%[ftmp18], %[
temp], 0x30)
257 MMI_SDC1(%[ftmp22], %[
temp], 0x38)
260 MMI_LDC1(%[ftmp1], %[block], 0x08)
261 MMI_LDC1(%[ftmp11], %[block], 0x18)
262 MMI_LDC1(%[ftmp2], %[block], 0x28)
263 MMI_LDC1(%[ftmp12], %[block], 0x38)
264 MMI_LDC1(%[ftmp3], %[block], 0x48)
265 MMI_LDC1(%[ftmp13], %[block], 0x58)
266 MMI_LDC1(%[ftmp4], %[block], 0x68)
267 MMI_LDC1(%[ftmp14], %[block], 0x78)
268 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 269 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 270 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 271 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 273 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 274 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 275 "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t" 276 "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t" 280 0x000f0010, 0x00040009, %[
ff_pw_4])
284 0xfffc000f, 0xfff7fff0, %[
ff_pw_4])
288 0xfff00009, 0x000f0004, %[
ff_pw_4])
292 0xfff70004, 0xfff0000f, %[
ff_pw_4])
294 TRANSPOSE_4H(%[ftmp15], %[ftmp16], %[ftmp17], %[ftmp18],
295 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
297 TRANSPOSE_4H(%[ftmp19], %[ftmp20], %[ftmp21], %[ftmp22],
298 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
300 MMI_SDC1(%[ftmp19], %[
temp], 0x48)
301 MMI_SDC1(%[ftmp20], %[
temp], 0x58)
302 MMI_SDC1(%[ftmp21], %[
temp], 0x68)
303 MMI_SDC1(%[ftmp22], %[
temp], 0x78)
307 "li %[tmp0], 0x07 \n\t" 308 "mtc1 %[tmp0], %[ftmp0] \n\t" 311 MMI_LDC1(%[ftmp1], %[
temp], 0x00)
312 MMI_LDC1(%[ftmp11], %[
temp], 0x10)
313 MMI_LDC1(%[ftmp2], %[
temp], 0x20)
314 MMI_LDC1(%[ftmp12], %[
temp], 0x30)
315 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 316 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 317 "punpcklhw %[ftmp7], %[ftmp15], %[ftmp17] \n\t" 318 "punpckhhw %[ftmp8], %[ftmp15], %[ftmp17] \n\t" 320 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 321 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 322 "punpcklhw %[ftmp11], %[ftmp16], %[ftmp18] \n\t" 323 "punpckhhw %[ftmp12], %[ftmp16], %[ftmp18] \n\t" 341 MMI_SDC1(%[ftmp15], %[block], 0x00)
342 MMI_SDC1(%[ftmp16], %[block], 0x10)
343 MMI_SDC1(%[ftmp17], %[block], 0x20)
344 MMI_SDC1(%[ftmp18], %[block], 0x30)
345 MMI_SDC1(%[ftmp19], %[block], 0x40)
346 MMI_SDC1(%[ftmp20], %[block], 0x50)
347 MMI_SDC1(%[ftmp21], %[block], 0x60)
348 MMI_SDC1(%[ftmp22], %[block], 0x70)
351 MMI_LDC1(%[ftmp1], %[
temp], 0x08)
352 MMI_LDC1(%[ftmp11], %[
temp], 0x18)
353 MMI_LDC1(%[ftmp2], %[
temp], 0x28)
354 MMI_LDC1(%[ftmp12], %[
temp], 0x38)
355 MMI_LDC1(%[ftmp3], %[
temp], 0x48)
356 MMI_LDC1(%[ftmp13], %[
temp], 0x58)
357 MMI_LDC1(%[ftmp4], %[
temp], 0x68)
358 MMI_LDC1(%[ftmp14], %[
temp], 0x78)
359 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 360 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 361 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 362 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 364 "punpcklhw %[ftmp9], %[ftmp11], %[ftmp12] \n\t" 365 "punpckhhw %[ftmp10], %[ftmp11], %[ftmp12] \n\t" 366 "punpcklhw %[ftmp11], %[ftmp13], %[ftmp14] \n\t" 367 "punpckhhw %[ftmp12], %[ftmp13], %[ftmp14] \n\t" 385 MMI_SDC1(%[ftmp15], %[block], 0x08)
386 MMI_SDC1(%[ftmp16], %[block], 0x18)
387 MMI_SDC1(%[ftmp17], %[block], 0x28)
388 MMI_SDC1(%[ftmp18], %[block], 0x38)
389 MMI_SDC1(%[ftmp19], %[block], 0x48)
390 MMI_SDC1(%[ftmp20], %[block], 0x58)
391 MMI_SDC1(%[ftmp21], %[block], 0x68)
392 MMI_SDC1(%[ftmp22], %[block], 0x78)
394 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
395 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
396 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
397 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
398 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
399 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
400 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
401 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
402 [ftmp16]
"=&f"(ftmp[16]), [ftmp17]
"=&f"(ftmp[17]),
403 [ftmp18]
"=&f"(ftmp[18]), [ftmp19]
"=&f"(ftmp[19]),
404 [ftmp20]
"=&f"(ftmp[20]), [ftmp21]
"=&f"(ftmp[21]),
405 [ftmp22]
"=&f"(ftmp[22]),
408 [
ff_pw_4]
"f"(ff_pw_4_local), [block]
"r"(block),
421 dc = ( 3 * dc + 1) >> 1;
422 dc = (17 * dc + 64) >> 7;
425 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 426 "pshufh %[dc], %[dc], %[ftmp0] \n\t" 428 MMI_LDC1(%[ftmp1], %[dest0], 0x00)
429 MMI_LDC1(%[ftmp2], %[dest1], 0x00)
430 MMI_LDC1(%[ftmp3], %[dest2], 0x00)
431 MMI_LDC1(%[ftmp4], %[dest3], 0x00)
433 "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" 434 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 435 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" 436 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 437 "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" 438 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 439 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" 440 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 442 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 443 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 444 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 445 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 446 "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t" 447 "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t" 448 "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t" 449 "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t" 451 "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 452 "packushb %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 453 "packushb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 454 "packushb %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 456 MMI_SDC1(%[ftmp1], %[dest0], 0x00)
457 MMI_SDC1(%[ftmp2], %[dest1], 0x00)
458 MMI_SDC1(%[ftmp3], %[dest2], 0x00)
459 MMI_SDC1(%[ftmp4], %[dest3], 0x00)
460 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
461 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
462 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
463 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
464 [ftmp8]
"=&f"(ftmp[8])
465 : [dest0]
"r"(dest+0*linesize), [dest1]
"r"(dest+1*linesize),
466 [dest2]
"r"(dest+2*linesize), [dest3]
"r"(dest+3*linesize),
472 #if _MIPS_SIM != _ABIO32 476 int16_t *dst =
block;
480 DECLARE_ALIGNED(16,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
481 DECLARE_ALIGNED(16,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
482 int16_t
coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4,
483 12, 15, 6, -4, -12, -16, -16, -9,
484 12, 9, -6, -16, -12, 4, 16, 15,
485 12, 4, -16, -9, 12, 15, -6, -16,
486 12, -4, -16, 9, 12, -15, -6, 16,
487 12, -9, -6, 16, -12, -4, 16, -15,
488 12, -15, 6, 4, -12, 16, -16, 9,
489 12, -16, 16, -15, 12, -9, 6, -4};
493 "li %[tmp0], 0x03 \n\t" 494 "mtc1 %[tmp0], %[ftmp0] \n\t" 497 MMI_LDC1(%[ftmp1], %[src], 0x00)
498 MMI_LDC1(%[ftmp2], %[src], 0x08)
501 MMI_LDC1(%[ftmp3], %[coeff], 0x00)
502 MMI_LDC1(%[ftmp4], %[coeff], 0x08)
503 MMI_LDC1(%[ftmp5], %[coeff], 0x10)
504 MMI_LDC1(%[ftmp6], %[coeff], 0x18)
505 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 506 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 507 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 508 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 509 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 510 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 511 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 512 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 513 "paddw %[ftmp11], %[ftmp7], %[ftmp8] \n\t" 514 "paddw %[ftmp11], %[ftmp11], %[ff_pw_4] \n\t" 517 MMI_LDC1(%[ftmp3], %[coeff], 0x20)
518 MMI_LDC1(%[ftmp4], %[coeff], 0x28)
519 MMI_LDC1(%[ftmp5], %[coeff], 0x30)
520 MMI_LDC1(%[ftmp6], %[coeff], 0x38)
521 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 522 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 523 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 524 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 525 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 526 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 527 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 528 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 529 "paddw %[ftmp12], %[ftmp7], %[ftmp8] \n\t" 530 "paddw %[ftmp12], %[ftmp12], %[ff_pw_4] \n\t" 533 MMI_LDC1(%[ftmp3], %[coeff], 0x40)
534 MMI_LDC1(%[ftmp4], %[coeff], 0x48)
535 MMI_LDC1(%[ftmp5], %[coeff], 0x50)
536 MMI_LDC1(%[ftmp6], %[coeff], 0x58)
537 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 538 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 539 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 540 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 541 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 542 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 543 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 544 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 545 "paddw %[ftmp13], %[ftmp7], %[ftmp8] \n\t" 546 "paddw %[ftmp13], %[ftmp13], %[ff_pw_4] \n\t" 549 MMI_LDC1(%[ftmp3], %[coeff], 0x60)
550 MMI_LDC1(%[ftmp4], %[coeff], 0x68)
551 MMI_LDC1(%[ftmp5], %[coeff], 0x70)
552 MMI_LDC1(%[ftmp6], %[coeff], 0x78)
553 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" 554 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp4] \n\t" 555 "paddw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 556 "pmaddhw %[ftmp7], %[ftmp1], %[ftmp5] \n\t" 557 "pmaddhw %[ftmp8], %[ftmp2], %[ftmp6] \n\t" 558 "paddw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 559 "punpcklwd %[ftmp7], %[ftmp9], %[ftmp10] \n\t" 560 "punpckhwd %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 561 "paddw %[ftmp14], %[ftmp7], %[ftmp8] \n\t" 562 "paddw %[ftmp14], %[ftmp14], %[ff_pw_4] \n\t" 565 "psraw %[ftmp11], %[ftmp11], %[ftmp0] \n\t" 566 "psraw %[ftmp12], %[ftmp12], %[ftmp0] \n\t" 567 "psraw %[ftmp13], %[ftmp13], %[ftmp0] \n\t" 568 "psraw %[ftmp14], %[ftmp14], %[ftmp0] \n\t" 569 "punpcklhw %[ftmp7], %[ftmp11], %[ftmp12] \n\t" 570 "punpckhhw %[ftmp8], %[ftmp11], %[ftmp12] \n\t" 571 "punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 572 "punpcklhw %[ftmp7], %[ftmp13], %[ftmp14] \n\t" 573 "punpckhhw %[ftmp8], %[ftmp13], %[ftmp14] \n\t" 574 "punpcklhw %[ftmp10], %[ftmp7], %[ftmp8] \n\t" 575 MMI_SDC1(%[ftmp9], %[dst], 0x00)
576 MMI_SDC1(%[ftmp10], %[dst], 0x08)
580 "addiu %[count], %[count], -0x01 \n\t" 581 "bnez %[count], 1b \n\t" 582 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
583 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
584 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
585 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
586 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
587 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
588 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
589 [ftmp14]
"=&f"(ftmp[14]), [tmp0]
"=&r"(tmp[0]),
599 "li %[tmp0], 0x44 \n\t" 600 "mtc1 %[tmp0], %[ftmp15] \n\t" 603 "li %[tmp0], 0x07 \n\t" 604 "mtc1 %[tmp0], %[ftmp0] \n\t" 605 MMI_LDC1(%[ftmp1], %[src], 0x00)
606 MMI_LDC1(%[ftmp2], %[src], 0x10)
607 MMI_LDC1(%[ftmp3], %[src], 0x20)
608 MMI_LDC1(%[ftmp4], %[src], 0x30)
609 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 610 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 611 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 612 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 615 "li %[tmp0], 0x00160011 \n\t" 616 "mtc1 %[tmp0], %[ftmp3] \n\t" 617 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 618 "li %[tmp0], 0x000a0011 \n\t" 619 "mtc1 %[tmp0], %[ftmp4] \n\t" 620 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 621 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 622 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 623 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 624 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 625 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 626 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 627 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 628 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 629 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 630 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 631 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 632 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 633 "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t" 636 "li %[tmp0], 0x000a0011 \n\t" 637 "mtc1 %[tmp0], %[ftmp3] \n\t" 638 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 639 "li %[tmp0], 0xffeaffef \n\t" 640 "mtc1 %[tmp0], %[ftmp4] \n\t" 641 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 642 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 643 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 644 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 645 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 646 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 647 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 648 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 649 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 650 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 651 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 652 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 653 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 654 "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t" 657 "li %[tmp0], 0xfff60011 \n\t" 658 "mtc1 %[tmp0], %[ftmp3] \n\t" 659 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 660 "li %[tmp0], 0x0016ffef \n\t" 661 "mtc1 %[tmp0], %[ftmp4] \n\t" 662 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 663 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 664 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 665 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 666 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 667 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 668 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 669 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 670 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 671 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 672 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 673 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 674 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 675 "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t" 678 "li %[tmp0], 0xffea0011 \n\t" 679 "mtc1 %[tmp0], %[ftmp3] \n\t" 680 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 681 "li %[tmp0], 0xfff60011 \n\t" 682 "mtc1 %[tmp0], %[ftmp4] \n\t" 683 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 684 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 685 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 686 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 687 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 688 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 689 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 690 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 691 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 692 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 693 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 694 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 695 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 696 "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t" 698 MMI_LWC1(%[ftmp1], %[dest], 0x00)
699 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 700 MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
701 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 702 MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
703 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 704 MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
705 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 706 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 707 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 708 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 709 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 710 "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 711 "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t" 712 "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t" 713 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" 714 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 715 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 716 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 717 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 718 MMI_SWC1(%[ftmp1], %[dest], 0x00)
719 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 720 MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
721 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 722 MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
723 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 724 MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
727 "li %[tmp0], 0x07 \n\t" 728 "mtc1 %[tmp0], %[ftmp0] \n\t" 729 MMI_LDC1(%[ftmp1], %[src], 0x08)
730 MMI_LDC1(%[ftmp2], %[src], 0x18)
731 MMI_LDC1(%[ftmp3], %[src], 0x28)
732 MMI_LDC1(%[ftmp4], %[src], 0x38)
733 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 734 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 735 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 736 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 739 "li %[tmp0], 0x00160011 \n\t" 740 "mtc1 %[tmp0], %[ftmp3] \n\t" 741 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 742 "li %[tmp0], 0x000a0011 \n\t" 743 "mtc1 %[tmp0], %[ftmp4] \n\t" 744 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 745 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 746 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 747 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 748 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 749 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 750 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 751 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 752 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 753 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 754 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 755 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 756 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 757 "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t" 760 "li %[tmp0], 0x000a0011 \n\t" 761 "mtc1 %[tmp0], %[ftmp3] \n\t" 762 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 763 "li %[tmp0], 0xffeaffef \n\t" 764 "mtc1 %[tmp0], %[ftmp4] \n\t" 765 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 766 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 767 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 768 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 769 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 770 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 771 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 772 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 773 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 774 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 775 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 776 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 777 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 778 "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t" 781 "li %[tmp0], 0xfff60011 \n\t" 782 "mtc1 %[tmp0], %[ftmp3] \n\t" 783 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 784 "li %[tmp0], 0x0016ffef \n\t" 785 "mtc1 %[tmp0], %[ftmp4] \n\t" 786 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 787 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 788 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 789 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 790 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 791 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 792 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 793 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 794 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 795 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 796 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 797 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 798 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 799 "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t" 802 "li %[tmp0], 0xffea0011 \n\t" 803 "mtc1 %[tmp0], %[ftmp3] \n\t" 804 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 805 "li %[tmp0], 0xfff60011 \n\t" 806 "mtc1 %[tmp0], %[ftmp4] \n\t" 807 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 808 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 809 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 810 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 811 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 812 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 813 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 814 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 815 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 816 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 817 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 818 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 819 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 820 "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t" 822 MMI_LWC1(%[ftmp1], %[dest], 0x04)
823 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 824 MMI_LWC1(%[ftmp2], %[tmp0], 0x04)
825 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 826 MMI_LWC1(%[ftmp3], %[tmp0], 0x04)
827 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 828 MMI_LWC1(%[ftmp4], %[tmp0], 0x04)
829 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 830 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 831 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 832 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 833 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 834 "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 835 "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t" 836 "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t" 837 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" 838 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 839 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 840 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 841 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 842 MMI_SWC1(%[ftmp1], %[dest], 0x04)
843 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 844 MMI_SWC1(%[ftmp2], %[tmp0], 0x04)
845 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 846 MMI_SWC1(%[ftmp3], %[tmp0], 0x04)
847 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 848 MMI_SWC1(%[ftmp4], %[tmp0], 0x04)
850 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
851 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
852 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
853 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
854 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
855 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
856 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
857 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
860 [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
873 dc = (17 * dc + 4) >> 3;
874 dc = (12 * dc + 64) >> 7;
877 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 878 "pshufh %[dc], %[dc], %[ftmp0] \n\t" 880 MMI_LWC1(%[ftmp1], %[dest0], 0x00)
881 MMI_LWC1(%[ftmp2], %[dest1], 0x00)
882 MMI_LWC1(%[ftmp3], %[dest2], 0x00)
883 MMI_LWC1(%[ftmp4], %[dest3], 0x00)
884 MMI_LWC1(%[ftmp5], %[dest4], 0x00)
885 MMI_LWC1(%[ftmp6], %[dest5], 0x00)
886 MMI_LWC1(%[ftmp7], %[dest6], 0x00)
887 MMI_LWC1(%[ftmp8], %[dest7], 0x00)
889 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 890 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 891 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 892 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 893 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 894 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 895 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 896 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 898 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 899 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 900 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 901 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 902 "paddsh %[ftmp5], %[ftmp5], %[dc] \n\t" 903 "paddsh %[ftmp6], %[ftmp6], %[dc] \n\t" 904 "paddsh %[ftmp7], %[ftmp7], %[dc] \n\t" 905 "paddsh %[ftmp8], %[ftmp8], %[dc] \n\t" 907 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 908 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 909 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 910 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 911 "packushb %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 912 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 913 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 914 "packushb %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 916 MMI_SWC1(%[ftmp1], %[dest0], 0x00)
917 MMI_SWC1(%[ftmp2], %[dest1], 0x00)
918 MMI_SWC1(%[ftmp3], %[dest2], 0x00)
919 MMI_SWC1(%[ftmp4], %[dest3], 0x00)
920 MMI_SWC1(%[ftmp5], %[dest4], 0x00)
921 MMI_SWC1(%[ftmp6], %[dest5], 0x00)
922 MMI_SWC1(%[ftmp7], %[dest6], 0x00)
923 MMI_SWC1(%[ftmp8], %[dest7], 0x00)
924 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
925 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
926 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
927 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
929 [ftmp8]
"=&f"(ftmp[8])
930 : [dest0]
"r"(dest+0*linesize), [dest1]
"r"(dest+1*linesize),
931 [dest2]
"r"(dest+2*linesize), [dest3]
"r"(dest+3*linesize),
932 [dest4]
"r"(dest+4*linesize), [dest5]
"r"(dest+5*linesize),
933 [dest6]
"r"(dest+6*linesize), [dest7]
"r"(dest+7*linesize),
939 #if _MIPS_SIM != _ABIO32 943 int16_t *dst =
block;
946 int16_t
coeff[16] = {17, 22, 17, 10,
950 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
951 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
952 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
957 "li %[tmp0], 0x03 \n\t" 958 "mtc1 %[tmp0], %[ftmp0] \n\t" 960 MMI_LDC1(%[ftmp2], %[coeff], 0x00)
961 MMI_LDC1(%[ftmp3], %[coeff], 0x08)
962 MMI_LDC1(%[ftmp4], %[coeff], 0x10)
963 MMI_LDC1(%[ftmp5], %[coeff], 0x18)
966 MMI_LDC1(%[ftmp1], %[src], 0x00)
967 "pmaddhw %[ftmp6], %[ftmp2], %[ftmp1] \n\t" 968 "pmaddhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t" 969 "pmaddhw %[ftmp8], %[ftmp4], %[ftmp1] \n\t" 970 "pmaddhw %[ftmp9], %[ftmp5], %[ftmp1] \n\t" 971 "punpcklwd %[ftmp10], %[ftmp6], %[ftmp7] \n\t" 972 "punpckhwd %[ftmp11], %[ftmp6], %[ftmp7] \n\t" 973 "punpcklwd %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 974 "punpckhwd %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 975 "paddw %[ftmp8], %[ftmp10], %[ftmp11] \n\t" 976 "paddw %[ftmp9], %[ftmp6], %[ftmp7] \n\t" 977 "paddw %[ftmp8], %[ftmp8], %[ff_pw_4] \n\t" 978 "paddw %[ftmp9], %[ftmp9], %[ff_pw_4] \n\t" 979 "psraw %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 980 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 981 "punpcklhw %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 982 "punpckhhw %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 983 "punpcklhw %[ftmp8], %[ftmp6], %[ftmp7] \n\t" 984 MMI_SDC1(%[ftmp8], %[dst], 0x00)
988 "addiu %[count], %[count], -0x01 \n\t" 989 "bnez %[count], 1b \n\t" 990 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
991 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
992 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
993 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
994 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
995 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
996 [tmp0]
"=&r"(
tmp[0]), [count]
"+&r"(count),
997 [
src]
"+&r"(
src), [dst]
"+&r"(dst)
998 : [
ff_pw_4]
"f"(ff_pw_4_local), [coeff]
"r"(coeff)
1006 "li %[tmp0], 0x07 \n\t" 1007 "mtc1 %[tmp0], %[ftmp0] \n\t" 1009 MMI_LDC1(%[ftmp1], %[src], 0x00)
1010 MMI_LDC1(%[ftmp2], %[src], 0x20)
1011 MMI_LDC1(%[ftmp3], %[src], 0x40)
1012 MMI_LDC1(%[ftmp4], %[src], 0x60)
1013 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1014 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1015 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1016 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 1018 MMI_LDC1(%[ftmp1], %[src], 0x10)
1019 MMI_LDC1(%[ftmp2], %[src], 0x30)
1020 MMI_LDC1(%[ftmp3], %[src], 0x50)
1021 MMI_LDC1(%[ftmp4], %[src], 0x70)
1022 "punpcklhw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1023 "punpckhhw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1024 "punpcklhw %[ftmp11], %[ftmp3], %[ftmp4] \n\t" 1025 "punpckhhw %[ftmp12], %[ftmp3], %[ftmp4] \n\t" 1043 MMI_LWC1(%[ftmp1], %[dest], 0x00)
1044 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1045 MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
1046 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1047 MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
1048 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1049 MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
1050 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1051 MMI_LWC1(%[ftmp5], %[tmp0], 0x00)
1052 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1053 MMI_LWC1(%[ftmp6], %[tmp0], 0x00)
1054 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1055 MMI_LWC1(%[ftmp7], %[tmp0], 0x00)
1056 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1057 MMI_LWC1(%[ftmp8], %[tmp0], 0x00)
1058 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1059 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1060 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1061 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1062 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1063 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1064 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1065 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1066 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1068 "paddh %[ftmp1], %[ftmp1], %[ftmp15] \n\t" 1069 "paddh %[ftmp2], %[ftmp2], %[ftmp16] \n\t" 1070 "paddh %[ftmp3], %[ftmp3], %[ftmp17] \n\t" 1071 "paddh %[ftmp4], %[ftmp4], %[ftmp18] \n\t" 1072 "paddh %[ftmp5], %[ftmp5], %[ftmp19] \n\t" 1073 "paddh %[ftmp6], %[ftmp6], %[ftmp20] \n\t" 1074 "paddh %[ftmp7], %[ftmp7], %[ftmp21] \n\t" 1075 "paddh %[ftmp8], %[ftmp8], %[ftmp22] \n\t" 1077 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1078 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1079 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1080 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1081 "packushb %[ftmp5], %[ftmp5], %[ftmp0] \n\t" 1082 "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" 1083 "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t" 1084 "packushb %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1086 MMI_SWC1(%[ftmp1], %[dest], 0x00)
1087 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1088 MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
1089 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1090 MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
1091 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1092 MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
1093 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1094 MMI_SWC1(%[ftmp5], %[tmp0], 0x00)
1095 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1096 MMI_SWC1(%[ftmp6], %[tmp0], 0x00)
1097 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1098 MMI_SWC1(%[ftmp7], %[tmp0], 0x00)
1099 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1100 MMI_SWC1(%[ftmp8], %[tmp0], 0x00)
1102 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1103 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1104 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1105 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1106 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1107 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1108 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
1109 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
1110 [ftmp16]
"=&f"(ftmp[16]), [ftmp17]
"=&f"(ftmp[17]),
1111 [ftmp18]
"=&f"(ftmp[18]), [ftmp19]
"=&f"(ftmp[19]),
1112 [ftmp20]
"=&f"(ftmp[20]), [ftmp21]
"=&f"(ftmp[21]),
1113 [ftmp22]
"=&f"(ftmp[22]),
1116 [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
1129 dc = (17 * dc + 4) >> 3;
1130 dc = (17 * dc + 64) >> 7;
1133 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1134 "pshufh %[dc], %[dc], %[ftmp0] \n\t" 1136 MMI_LWC1(%[ftmp1], %[dest0], 0x00)
1137 MMI_LWC1(%[ftmp2], %[dest1], 0x00)
1138 MMI_LWC1(%[ftmp3], %[dest2], 0x00)
1139 MMI_LWC1(%[ftmp4], %[dest3], 0x00)
1141 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1142 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1143 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1144 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1146 "paddsh %[ftmp1], %[ftmp1], %[dc] \n\t" 1147 "paddsh %[ftmp2], %[ftmp2], %[dc] \n\t" 1148 "paddsh %[ftmp3], %[ftmp3], %[dc] \n\t" 1149 "paddsh %[ftmp4], %[ftmp4], %[dc] \n\t" 1151 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1152 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1153 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1154 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1156 MMI_SWC1(%[ftmp1], %[dest0], 0x00)
1157 MMI_SWC1(%[ftmp2], %[dest1], 0x00)
1158 MMI_SWC1(%[ftmp3], %[dest2], 0x00)
1159 MMI_SWC1(%[ftmp4], %[dest3], 0x00)
1160 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1161 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1163 [ftmp4]
"=&f"(ftmp[4])
1164 : [dest0]
"r"(dest+0*linesize), [dest1]
"r"(dest+1*linesize),
1165 [dest2]
"r"(dest+2*linesize), [dest3]
"r"(dest+3*linesize),
1174 int16_t *dst =
block;
1177 int16_t
coeff[16] = {17, 22, 17, 10,
1181 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
1182 DECLARE_ALIGNED(8,
const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
1186 "li %[tmp0], 0x03 \n\t" 1187 "mtc1 %[tmp0], %[ftmp0] \n\t" 1188 MMI_LDC1(%[ftmp2], %[coeff], 0x00)
1189 MMI_LDC1(%[ftmp3], %[coeff], 0x08)
1190 MMI_LDC1(%[ftmp4], %[coeff], 0x10)
1191 MMI_LDC1(%[ftmp5], %[coeff], 0x18)
1194 MMI_LDC1(%[ftmp1], %[src], 0x00)
1195 "pmaddhw %[ftmp6], %[ftmp2], %[ftmp1] \n\t" 1196 "pmaddhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t" 1197 "pmaddhw %[ftmp8], %[ftmp4], %[ftmp1] \n\t" 1198 "pmaddhw %[ftmp9], %[ftmp5], %[ftmp1] \n\t" 1199 "punpcklwd %[ftmp10], %[ftmp6], %[ftmp7] \n\t" 1200 "punpckhwd %[ftmp11], %[ftmp6], %[ftmp7] \n\t" 1201 "punpcklwd %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 1202 "punpckhwd %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 1203 "paddw %[ftmp8], %[ftmp10], %[ftmp11] \n\t" 1204 "paddw %[ftmp9], %[ftmp6], %[ftmp7] \n\t" 1205 "paddw %[ftmp8], %[ftmp8], %[ff_pw_4] \n\t" 1206 "paddw %[ftmp9], %[ftmp9], %[ff_pw_4] \n\t" 1207 "psraw %[ftmp8], %[ftmp8], %[ftmp0] \n\t" 1208 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1209 "punpcklhw %[ftmp6], %[ftmp8], %[ftmp9] \n\t" 1210 "punpckhhw %[ftmp7], %[ftmp8], %[ftmp9] \n\t" 1211 "punpcklhw %[ftmp8], %[ftmp6], %[ftmp7] \n\t" 1212 MMI_SDC1(%[ftmp8], %[dst], 0x00)
1216 "addiu %[count], %[count], -0x01 \n\t" 1217 "bnez %[count], 1b \n\t" 1218 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1219 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1220 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1221 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1222 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1223 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1224 [tmp0]
"=&r"(
tmp[0]), [count]
"+&r"(count),
1225 [
src]
"+&r"(
src), [dst]
"+&r"(dst)
1226 : [
ff_pw_4]
"f"(ff_pw_4_local), [coeff]
"r"(coeff)
1234 "li %[tmp0], 0x07 \n\t" 1235 "mtc1 %[tmp0], %[ftmp0] \n\t" 1236 "li %[tmp0], 0x44 \n\t" 1237 "mtc1 %[tmp0], %[ftmp15] \n\t" 1239 MMI_LDC1(%[ftmp1], %[src], 0x00)
1240 MMI_LDC1(%[ftmp2], %[src], 0x10)
1241 MMI_LDC1(%[ftmp3], %[src], 0x20)
1242 MMI_LDC1(%[ftmp4], %[src], 0x30)
1243 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp2] \n\t" 1244 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp2] \n\t" 1245 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp4] \n\t" 1246 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp4] \n\t" 1249 "li %[tmp0], 0x00160011 \n\t" 1250 "mtc1 %[tmp0], %[ftmp3] \n\t" 1251 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1252 "li %[tmp0], 0x000a0011 \n\t" 1253 "mtc1 %[tmp0], %[ftmp4] \n\t" 1254 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1255 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1256 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1257 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1258 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1259 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1260 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1261 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1262 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1263 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1264 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1265 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1266 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1267 "punpcklhw %[ftmp11], %[ftmp1], %[ftmp2] \n\t" 1270 "li %[tmp0], 0x000a0011 \n\t" 1271 "mtc1 %[tmp0], %[ftmp3] \n\t" 1272 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1273 "li %[tmp0], 0xffeaffef \n\t" 1274 "mtc1 %[tmp0], %[ftmp4] \n\t" 1275 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1276 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1277 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1278 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1279 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1280 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1281 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1282 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1283 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1284 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1285 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1286 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1287 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1288 "punpcklhw %[ftmp12], %[ftmp1], %[ftmp2] \n\t" 1291 "li %[tmp0], 0xfff60011 \n\t" 1292 "mtc1 %[tmp0], %[ftmp3] \n\t" 1293 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1294 "li %[tmp0], 0x0016ffef \n\t" 1295 "mtc1 %[tmp0], %[ftmp4] \n\t" 1296 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1297 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1298 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1299 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1300 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1301 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1302 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1303 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1304 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1305 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1306 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1307 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1308 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1309 "punpcklhw %[ftmp13], %[ftmp1], %[ftmp2] \n\t" 1312 "li %[tmp0], 0xffea0011 \n\t" 1313 "mtc1 %[tmp0], %[ftmp3] \n\t" 1314 "pshufh %[ftmp3], %[ftmp3], %[ftmp15] \n\t" 1315 "li %[tmp0], 0xfff60011 \n\t" 1316 "mtc1 %[tmp0], %[ftmp4] \n\t" 1317 "pshufh %[ftmp4], %[ftmp4], %[ftmp15] \n\t" 1318 "pmaddhw %[ftmp1], %[ftmp5], %[ftmp3] \n\t" 1319 "pmaddhw %[ftmp2], %[ftmp7], %[ftmp4] \n\t" 1320 "paddw %[ftmp9], %[ftmp1], %[ftmp2] \n\t" 1321 "pmaddhw %[ftmp1], %[ftmp6], %[ftmp3] \n\t" 1322 "pmaddhw %[ftmp2], %[ftmp8], %[ftmp4] \n\t" 1323 "paddw %[ftmp10], %[ftmp1], %[ftmp2] \n\t" 1324 "paddw %[ftmp9], %[ftmp9], %[ff_pw_64] \n\t" 1325 "paddw %[ftmp10], %[ftmp10], %[ff_pw_64] \n\t" 1326 "psraw %[ftmp9], %[ftmp9], %[ftmp0] \n\t" 1327 "psraw %[ftmp10], %[ftmp10], %[ftmp0] \n\t" 1328 "punpcklhw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 1329 "punpckhhw %[ftmp2], %[ftmp9], %[ftmp10] \n\t" 1330 "punpcklhw %[ftmp14], %[ftmp1], %[ftmp2] \n\t" 1332 MMI_LWC1(%[ftmp1], %[dest], 0x00)
1333 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1334 MMI_LWC1(%[ftmp2], %[tmp0], 0x00)
1335 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1336 MMI_LWC1(%[ftmp3], %[tmp0], 0x00)
1337 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1338 MMI_LWC1(%[ftmp4], %[tmp0], 0x00)
1339 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 1340 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1341 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1342 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1343 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1344 "paddh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 1345 "paddh %[ftmp2], %[ftmp2], %[ftmp12] \n\t" 1346 "paddh %[ftmp3], %[ftmp3], %[ftmp13] \n\t" 1347 "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" 1348 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 1349 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" 1350 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 1351 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" 1353 MMI_SWC1(%[ftmp1], %[dest], 0x00)
1354 PTR_ADDU "%[tmp0], %[dest], %[linesize] \n\t" 1355 MMI_SWC1(%[ftmp2], %[tmp0], 0x00)
1356 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1357 MMI_SWC1(%[ftmp3], %[tmp0], 0x00)
1358 PTR_ADDU "%[tmp0], %[tmp0], %[linesize] \n\t" 1359 MMI_SWC1(%[ftmp4], %[tmp0], 0x00)
1361 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1362 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1363 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1364 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1365 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1366 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1367 [ftmp12]
"=&f"(ftmp[12]), [ftmp13]
"=&f"(ftmp[13]),
1368 [ftmp14]
"=&f"(ftmp[14]), [ftmp15]
"=&f"(ftmp[15]),
1371 [
src]
"r"(
src), [dest]
"r"(dest), [linesize]
"r"(linesize)
1383 for (i = 0; i < 8; i++) {
1388 d1 = (a - d + 3 +
rnd) >> 3;
1389 d2 = (a - d + b - c + 4 -
rnd) >> 3;
1392 src[-1] = av_clip_uint8(b - d2);
1393 src[0] = av_clip_uint8(c + d2);
1405 int rnd1 = flags & 2 ? 3 : 4;
1406 int rnd2 = 7 - rnd1;
1407 for (i = 0; i < 8; i++) {
1415 left[6] = ((a << 3) - d1 + rnd1) >> 3;
1416 left[7] = ((b << 3) - d2 + rnd2) >> 3;
1417 right[0] = ((c << 3) + d2 + rnd1) >> 3;
1418 right[1] = ((d << 3) + d1 + rnd2) >> 3;
1420 right += right_stride;
1421 left += left_stride;
1436 for (i = 0; i < 8; i++) {
1441 d1 = (a - d + 3 +
rnd) >> 3;
1442 d2 = (a - d + b - c + 4 -
rnd) >> 3;
1444 src[-2 *
stride] = a - d1;
1445 src[-
stride] = av_clip_uint8(b - d2);
1446 src[0] = av_clip_uint8(c + d2);
1458 int rnd1 = 4, rnd2 = 3;
1459 for (i = 0; i < 8; i++) {
1467 top[48] = ((a << 3) - d1 + rnd1) >> 3;
1468 top[56] = ((b << 3) - d2 + rnd2) >> 3;
1469 bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
1470 bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
1490 5 * (src[-1 * stride] - src[0 * stride]) + 4) >> 3;
1491 int a0_sign = a0 >> 31;
1493 a0 = (a0 ^ a0_sign) - a0_sign;
1495 int a1 =
FFABS((2 * (src[-4 * stride] - src[-1 * stride]) -
1496 5 * (src[-3 * stride] - src[-2 * stride]) + 4) >> 3);
1497 int a2 =
FFABS((2 * (src[ 0 * stride] - src[ 3 * stride]) -
1498 5 * (src[ 1 * stride] - src[ 2 * stride]) + 4) >> 3);
1499 if (a1 < a0 || a2 < a0) {
1501 int clip_sign = clip >> 31;
1503 clip = ((clip ^ clip_sign) - clip_sign) >> 1;
1506 int d = 5 * (a3 -
a0);
1507 int d_sign = (d >> 31);
1509 d = ((d ^ d_sign) - d_sign) >> 3;
1512 if (d_sign ^ clip_sign)
1516 d = (d ^ d_sign) - d_sign;
1517 src[-1 *
stride] = av_clip_uint8(src[-1 * stride] - d);
1518 src[ 0 *
stride] = av_clip_uint8(src[ 0 * stride] + d);
1542 for (i = 0; i <
len; i += 4) {
1604 #define OP_PUT(S, D) 1605 #define OP_AVG(S, D) \ 1606 "ldc1 $f16, "#S" \n\t" \ 1607 "pavgb "#D", "#D", $f16 \n\t" 1610 #define NORMALIZE_MMI(SHIFT) \ 1611 "paddh $f6, $f6, $f14 \n\t" \ 1612 "paddh $f8, $f8, $f14 \n\t" \ 1613 "psrah $f6, $f6, "SHIFT" \n\t" \ 1614 "psrah $f8, $f8, "SHIFT" \n\t" 1616 #define TRANSFER_DO_PACK(OP) \ 1617 "packushb $f6, $f6, $f8 \n\t" \ 1619 "sdc1 $f6, 0x00(%[dst]) \n\t" 1621 #define TRANSFER_DONT_PACK(OP) \ 1622 OP(0(%[dst]), $f6) \ 1623 OP(8(%[dst]), $f8) \ 1624 "sdc1 $f6, 0x00(%[dst]) \n\t" \ 1625 "sdc1 $f8, 0x08(%[dst]) \n\t" 1628 #define DO_UNPACK(reg) \ 1629 "punpcklbh "reg", "reg", $f0 \n\t" 1630 #define DONT_UNPACK(reg) 1633 #define LOAD_ROUNDER_MMI(ROUND) \ 1634 "lwc1 $f14, "ROUND" \n\t" \ 1635 "punpcklhw $f14, $f14, $f14 \n\t" \ 1636 "punpcklwd $f14, $f14, $f14 \n\t" 1639 #define SHIFT2_LINE(OFF, R0, R1, R2, R3) \ 1640 "paddh "#R1", "#R1", "#R2" \n\t" \ 1641 PTR_ADDU "$9, %[src], %[stride1] \n\t" \ 1642 MMI_ULWC1(R0, $9, 0x00) \ 1643 "pmullh "#R1", "#R1", $f6 \n\t" \ 1644 "punpcklbh "#R0", "#R0", $f0 \n\t" \ 1645 PTR_ADDU "$9, %[src], %[stride] \n\t" \ 1646 MMI_ULWC1(R3, $9, 0x00) \ 1647 "psubh "#R1", "#R1", "#R0" \n\t" \ 1648 "punpcklbh "#R3", "#R3", $f0 \n\t" \ 1649 "paddh "#R1", "#R1", $f14 \n\t" \ 1650 "psubh "#R1", "#R1", "#R3" \n\t" \ 1651 "psrah "#R1", "#R1", %[shift] \n\t" \ 1652 MMI_SDC1(R1, %[dst], OFF) \ 1653 PTR_ADDU "%[src], %[src], %[stride] \n\t" 1664 "xor $f0, $f0, $f0 \n\t" 1667 "ldc1 $f12, %[ff_pw_9] \n\t" 1669 MMI_ULWC1($f4, %[src], 0x00)
1670 PTR_ADDU "%[src], %[src], %[stride] \n\t" 1671 MMI_ULWC1($f6, %[src], 0x00)
1672 "punpcklbh $f4, $f4, $f0 \n\t" 1673 "punpcklbh $f6, $f6, $f0 \n\t" 1682 PTR_SUBU "%[src], %[src], %[stride2] \n\t" 1684 "addiu $8, $8, -0x01 \n\t" 1686 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT
1687 [
src]
"+r"(
src), [dst]
"+r"(dst)
1691 :
"$8",
"$9",
"$f0",
"$f2",
"$f4",
"$f6",
"$f8",
"$f10",
"$f12",
1692 "$f14",
"$f16",
"memory" 1700 #define VC1_HOR_16B_SHIFT2(OP, OPNAME) \ 1701 static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \ 1702 const int16_t *src, int rnd) \ 1705 DECLARE_VAR_ALL64; \ 1706 DECLARE_VAR_ADDRT; \ 1709 rnd -= (-1+9+9-1)*1024; \ 1712 LOAD_ROUNDER_MMI("%[rnd]") \ 1713 "ldc1 $f12, %[ff_pw_128] \n\t" \ 1714 "ldc1 $f10, %[ff_pw_9] \n\t" \ 1716 MMI_ULDC1($f2, %[src], 0x00) \ 1717 MMI_ULDC1($f4, %[src], 0x08) \ 1718 MMI_ULDC1($f6, %[src], 0x02) \ 1719 MMI_ULDC1($f8, %[src], 0x0a) \ 1720 MMI_ULDC1($f0, %[src], 0x06) \ 1721 "paddh $f2, $f2, $f0 \n\t" \ 1722 MMI_ULDC1($f0, %[src], 0x0e) \ 1723 "paddh $f4, $f4, $f0 \n\t" \ 1724 MMI_ULDC1($f0, %[src], 0x04) \ 1725 "paddh $f6, $f6, $f0 \n\t" \ 1726 MMI_ULDC1($f0, %[src], 0x0b) \ 1727 "paddh $f8, $f8, $f0 \n\t" \ 1728 "pmullh $f6, $f6, $f10 \n\t" \ 1729 "pmullh $f8, $f8, $f10 \n\t" \ 1730 "psubh $f6, $f6, $f2 \n\t" \ 1731 "psubh $f8, $f8, $f4 \n\t" \ 1732 "li $8, 0x07 \n\t" \ 1733 "mtc1 $8, $f16 \n\t" \ 1734 NORMALIZE_MMI("$f16") \ 1736 "paddh $f6, $f6, $f12 \n\t" \ 1737 "paddh $f8, $f8, $f12 \n\t" \ 1738 TRANSFER_DO_PACK(OP) \ 1739 "addiu %[h], %[h], -0x01 \n\t" \ 1740 PTR_ADDIU "%[src], %[src], 0x18 \n\t" \ 1741 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 1742 "bnez %[h], 1b \n\t" \ 1743 : RESTRICT_ASM_ALL64 RESTRICT_ASM_ADDRT \ 1745 [src]"+r"(src), [dst]"+r"(dst) \ 1746 : [stride]"r"(stride), [rnd]"m"(rnd), \ 1747 [ff_pw_9]"m"(ff_pw_9), [ff_pw_128]"m"(ff_pw_128) \ 1748 : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", \ 1760 #define VC1_SHIFT2(OP, OPNAME)\ 1761 static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \ 1762 mips_reg stride, int rnd, \ 1765 DECLARE_VAR_LOW32; \ 1766 DECLARE_VAR_ADDRT; \ 1771 "xor $f0, $f0, $f0 \n\t" \ 1772 "li $10, 0x08 \n\t" \ 1773 LOAD_ROUNDER_MMI("%[rnd]") \ 1774 "ldc1 $f12, %[ff_pw_9] \n\t" \ 1776 MMI_ULWC1($f6, %[src], 0x00) \ 1777 MMI_ULWC1($f8, %[src], 0x04) \ 1778 PTR_ADDU "$9, %[src], %[offset] \n\t" \ 1779 MMI_ULWC1($f2, $9, 0x00) \ 1780 MMI_ULWC1($f4, $9, 0x04) \ 1781 PTR_ADDU "%[src], %[src], %[offset] \n\t" \ 1782 "punpcklbh $f6, $f6, $f0 \n\t" \ 1783 "punpcklbh $f8, $f8, $f0 \n\t" \ 1784 "punpcklbh $f2, $f2, $f0 \n\t" \ 1785 "punpcklbh $f4, $f4, $f0 \n\t" \ 1786 "paddh $f6, $f6, $f2 \n\t" \ 1787 "paddh $f8, $f8, $f4 \n\t" \ 1788 PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \ 1789 MMI_ULWC1($f2, $9, 0x00) \ 1790 MMI_ULWC1($f4, $9, 0x04) \ 1791 "pmullh $f6, $f6, $f12 \n\t" \ 1792 "pmullh $f8, $f8, $f12 \n\t" \ 1793 "punpcklbh $f2, $f2, $f0 \n\t" \ 1794 "punpcklbh $f4, $f4, $f0 \n\t" \ 1795 "psubh $f6, $f6, $f2 \n\t" \ 1796 "psubh $f8, $f8, $f4 \n\t" \ 1797 PTR_ADDU "$9, %[src], %[offset] \n\t" \ 1798 MMI_ULWC1($f2, $9, 0x00) \ 1799 MMI_ULWC1($f4, $9, 0x04) \ 1800 "punpcklbh $f2, $f2, $f0 \n\t" \ 1801 "punpcklbh $f4, $f4, $f0 \n\t" \ 1802 "psubh $f6, $f6, $f2 \n\t" \ 1803 "psubh $f8, $f8, $f4 \n\t" \ 1804 "li $8, 0x04 \n\t" \ 1805 "mtc1 $8, $f16 \n\t" \ 1806 NORMALIZE_MMI("$f16") \ 1807 "packushb $f6, $f6, $f8 \n\t" \ 1809 "sdc1 $f6, 0x00(%[dst]) \n\t" \ 1810 "addiu $10, $10, -0x01 \n\t" \ 1811 PTR_ADDU "%[src], %[src], %[stride1] \n\t" \ 1812 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 1813 "bnez $10, 1b \n\t" \ 1814 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \ 1815 [src]"+r"(src), [dst]"+r"(dst) \ 1816 : [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \ 1817 [stride]"r"(stride), [rnd]"m"(rnd), \ 1818 [stride1]"r"(stride-offset), \ 1819 [ff_pw_9]"m"(ff_pw_9) \ 1820 : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \ 1821 "$f12", "$f14", "$f16", "memory" \ 1839 #define MSPEL_FILTER13_CORE(UNPACK, LOAD, M, A1, A2, A3, A4) \ 1840 PTR_ADDU "$9, %[src], "#A1" \n\t" \ 1841 LOAD($f2, $9, M*0) \ 1842 LOAD($f4, $9, M*4) \ 1845 "pmullh $f2, $f2, %[ff_pw_3] \n\t" \ 1846 "pmullh $f4, $f4, %[ff_pw_3] \n\t" \ 1847 PTR_ADDU "$9, %[src], "#A2" \n\t" \ 1848 LOAD($f6, $9, M*0) \ 1849 LOAD($f8, $9, M*4) \ 1852 "pmullh $f6, $f6, $f12 \n\t" \ 1853 "pmullh $f8, $f8, $f12 \n\t" \ 1854 "psubh $f6, $f6, $f2 \n\t" \ 1855 "psubh $f8, $f8, $f4 \n\t" \ 1856 PTR_ADDU "$9, %[src], "#A4" \n\t" \ 1857 LOAD($f2, $9, M*0) \ 1858 LOAD($f4, $9, M*4) \ 1861 "li $8, 0x02 \n\t" \ 1862 "mtc1 $8, $f16 \n\t" \ 1863 "psllh $f2, $f2, $f16 \n\t" \ 1864 "psllh $f4, $f4, $f16 \n\t" \ 1865 "psubh $f6, $f6, $f2 \n\t" \ 1866 "psubh $f8, $f8, $f4 \n\t" \ 1867 PTR_ADDU "$9, %[src], "#A3" \n\t" \ 1868 LOAD($f2, $9, M*0) \ 1869 LOAD($f4, $9, M*4) \ 1872 "pmullh $f2, $f2, $f10 \n\t" \ 1873 "pmullh $f4, $f4, $f10 \n\t" \ 1874 "paddh $f6, $f6, $f2 \n\t" \ 1875 "paddh $f8, $f8, $f4 \n\t" 1885 #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \ 1887 vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \ 1888 mips_reg src_stride, \ 1889 int rnd, int64_t shift) \ 1892 DECLARE_VAR_LOW32; \ 1893 DECLARE_VAR_ADDRT; \ 1895 src -= src_stride; \ 1898 "xor $f0, $f0, $f0 \n\t" \ 1899 LOAD_ROUNDER_MMI("%[rnd]") \ 1900 "ldc1 $f10, %[ff_pw_53] \n\t" \ 1901 "ldc1 $f12, %[ff_pw_18] \n\t" \ 1904 MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \ 1905 NORMALIZE_MMI("%[shift]") \ 1906 TRANSFER_DONT_PACK(OP_PUT) \ 1908 PTR_ADDU "$9, %[src], "#A1" \n\t" \ 1909 MMI_ULWC1($f2, $9, 0x08) \ 1911 "mov.d $f6, $f2 \n\t" \ 1912 "paddh $f2, $f2, $f2 \n\t" \ 1913 "paddh $f2, $f2, $f6 \n\t" \ 1914 PTR_ADDU "$9, %[src], "#A2" \n\t" \ 1915 MMI_ULWC1($f6, $9, 0x08) \ 1917 "pmullh $f6, $f6, $f12 \n\t" \ 1918 "psubh $f6, $f6, $f2 \n\t" \ 1919 PTR_ADDU "$9, %[src], "#A3" \n\t" \ 1920 MMI_ULWC1($f2, $9, 0x08) \ 1922 "pmullh $f2, $f2, $f10 \n\t" \ 1923 "paddh $f6, $f6, $f2 \n\t" \ 1924 PTR_ADDU "$9, %[src], "#A4" \n\t" \ 1925 MMI_ULWC1($f2, $9, 0x08) \ 1927 "li $8, 0x02 \n\t" \ 1928 "mtc1 $8, $f16 \n\t" \ 1929 "psllh $f2, $f2, $f16 \n\t" \ 1930 "psubh $f6, $f6, $f2 \n\t" \ 1931 "paddh $f6, $f6, $f14 \n\t" \ 1932 "li $8, 0x06 \n\t" \ 1933 "mtc1 $8, $f16 \n\t" \ 1934 "psrah $f6, $f6, $f16 \n\t" \ 1935 "sdc1 $f6, 0x10(%[dst]) \n\t" \ 1936 "addiu %[h], %[h], -0x01 \n\t" \ 1937 PTR_ADDU "%[src], %[src], %[stride_x1] \n\t" \ 1938 PTR_ADDIU "%[dst], %[dst], 0x18 \n\t" \ 1939 "bnez %[h], 1b \n\t" \ 1940 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \ 1942 [src]"+r"(src), [dst]"+r"(dst) \ 1943 : [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \ 1944 [stride_x3]"r"(3*src_stride), \ 1945 [rnd]"m"(rnd), [shift]"f"(shift), \ 1946 [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \ 1947 [ff_pw_3]"f"(ff_pw_3) \ 1948 : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \ 1949 "$f14", "$f16", "memory" \ 1960 #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 1962 OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \ 1963 const int16_t *src, int rnd) \ 1966 DECLARE_VAR_ALL64; \ 1967 DECLARE_VAR_ADDRT; \ 1970 rnd -= (-4+58+13-3)*256; \ 1973 "xor $f0, $f0, $f0 \n\t" \ 1974 LOAD_ROUNDER_MMI("%[rnd]") \ 1975 "ldc1 $f10, %[ff_pw_53] \n\t" \ 1976 "ldc1 $f12, %[ff_pw_18] \n\t" \ 1979 MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \ 1980 "li $8, 0x07 \n\t" \ 1981 "mtc1 $8, $f16 \n\t" \ 1982 NORMALIZE_MMI("$f16") \ 1984 "paddh $f6, $f6, %[ff_pw_128] \n\t" \ 1985 "paddh $f8, $f8, %[ff_pw_128] \n\t" \ 1986 TRANSFER_DO_PACK(OP) \ 1987 "addiu %[h], %[h], -0x01 \n\t" \ 1988 PTR_ADDU "%[src], %[src], 0x18 \n\t" \ 1989 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 1990 "bnez %[h], 1b \n\t" \ 1991 : RESTRICT_ASM_ALL64 RESTRICT_ASM_ADDRT \ 1993 [src]"+r"(src), [dst]"+r"(dst) \ 1994 : [stride]"r"(stride), [rnd]"m"(rnd), \ 1995 [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \ 1996 [ff_pw_3]"f"(ff_pw_3), [ff_pw_128]"f"(ff_pw_128) \ 1997 : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \ 1998 "$f14", "$f16", "memory" \ 2010 #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME) \ 2012 OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \ 2013 mips_reg stride, int rnd, mips_reg offset) \ 2016 DECLARE_VAR_LOW32; \ 2017 DECLARE_VAR_ADDRT; \ 2022 __asm__ volatile ( \ 2023 "xor $f0, $f0, $f0 \n\t" \ 2024 LOAD_ROUNDER_MMI("%[rnd]") \ 2025 "ldc1 $f10, %[ff_pw_53] \n\t" \ 2026 "ldc1 $f12, %[ff_pw_18] \n\t" \ 2029 MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \ 2030 "li $8, 0x06 \n\t" \ 2031 "mtc1 $8, $f16 \n\t" \ 2032 NORMALIZE_MMI("$f16") \ 2033 TRANSFER_DO_PACK(OP) \ 2034 "addiu %[h], %[h], -0x01 \n\t" \ 2035 PTR_ADDU "%[src], %[src], %[stride] \n\t" \ 2036 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" \ 2037 "bnez %[h], 1b \n\t" \ 2038 : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT \ 2040 [src]"+r"(src), [dst]"+r"(dst) \ 2041 : [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \ 2042 [offset_x3]"r"(3*offset), [stride]"r"(stride), \ 2044 [ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \ 2045 [ff_pw_3]"f"(ff_pw_3) \ 2046 : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \ 2047 "$f14", "$f16", "memory" \ 2060 MSPEL_FILTER13_8B(shift3, $0, %[offset_x1], %[offset_x2], %[offset_x3], OP_PUT, put_)
2061 MSPEL_FILTER13_8B(shift3, $0, %[offset_x1], %[offset_x2], %[offset_x3], OP_AVG, avg_)
2086 #define VC1_MSPEL_MC(OP) \ 2087 static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\ 2088 int hmode, int vmode, int rnd) \ 2090 static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\ 2091 { NULL, vc1_put_ver_16b_shift1_mmi, \ 2092 vc1_put_ver_16b_shift2_mmi, \ 2093 vc1_put_ver_16b_shift3_mmi }; \ 2094 static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\ 2095 { NULL, OP ## vc1_hor_16b_shift1_mmi, \ 2096 OP ## vc1_hor_16b_shift2_mmi, \ 2097 OP ## vc1_hor_16b_shift3_mmi }; \ 2098 static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] = \ 2099 { NULL, OP ## vc1_shift1_mmi, \ 2100 OP ## vc1_shift2_mmi, \ 2101 OP ## vc1_shift3_mmi }; \ 2105 static const int shift_value[] = { 0, 5, 1, 5 }; \ 2106 int shift = (shift_value[hmode]+shift_value[vmode])>>1; \ 2108 LOCAL_ALIGNED(16, int16_t, tmp, [12*8]); \ 2110 r = (1<<(shift-1)) + rnd-1; \ 2111 vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift); \ 2113 vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd); \ 2117 vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride); \ 2123 vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1); \ 2125 static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \ 2126 int stride, int hmode, int vmode, int rnd)\ 2128 OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ 2129 OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ 2130 dst += 8*stride; src += 8*stride; \ 2131 OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \ 2132 OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \ 2139 #define DECLARE_FUNCTION(a, b) \ 2140 void ff_put_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst, \ 2141 const uint8_t *src, \ 2145 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 2147 void ff_avg_vc1_mspel_mc ## a ## b ## _mmi(uint8_t *dst, \ 2148 const uint8_t *src, \ 2152 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ 2154 void ff_put_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst, \ 2155 const uint8_t *src, \ 2159 put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ 2161 void ff_avg_vc1_mspel_mc ## a ## b ## _16_mmi(uint8_t *dst, \ 2162 const uint8_t *src, \ 2166 avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ 2188 #define CHROMA_MC_8_MMI \ 2189 "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ 2190 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 2191 "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" \ 2192 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 2193 "punpckhbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 2194 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 2195 "punpckhbh %[ftmp8], %[ftmp4], %[ftmp0] \n\t" \ 2196 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 2198 "pmullh %[ftmp1], %[ftmp1], %[A] \n\t" \ 2199 "pmullh %[ftmp5], %[ftmp5], %[A] \n\t" \ 2200 "pmullh %[ftmp2], %[ftmp2], %[B] \n\t" \ 2201 "pmullh %[ftmp6], %[ftmp6], %[B] \n\t" \ 2202 "pmullh %[ftmp3], %[ftmp3], %[C] \n\t" \ 2203 "pmullh %[ftmp7], %[ftmp7], %[C] \n\t" \ 2204 "pmullh %[ftmp4], %[ftmp4], %[D] \n\t" \ 2205 "pmullh %[ftmp8], %[ftmp8], %[D] \n\t" \ 2207 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 2208 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 2209 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 2210 "paddh %[ftmp1], %[ftmp1], %[ff_pw_28] \n\t" \ 2212 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t" \ 2213 "paddh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" \ 2214 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ 2215 "paddh %[ftmp5], %[ftmp5], %[ff_pw_28] \n\t" \ 2217 "psrlh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \ 2218 "psrlh %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ 2219 "packushb %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 2222 #define CHROMA_MC_4_MMI \ 2223 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" \ 2224 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ 2225 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \ 2226 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ 2228 "pmullh %[ftmp1], %[ftmp1], %[A] \n\t" \ 2229 "pmullh %[ftmp2], %[ftmp2], %[B] \n\t" \ 2230 "pmullh %[ftmp3], %[ftmp3], %[C] \n\t" \ 2231 "pmullh %[ftmp4], %[ftmp4], %[D] \n\t" \ 2233 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ 2234 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \ 2235 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ 2236 "paddh %[ftmp1], %[ftmp1], %[ff_pw_28] \n\t" \ 2238 "psrlh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \ 2239 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" 2244 int stride,
int h,
int x,
int y)
2246 const int A = (8 - x) * (8 - y);
2247 const int B = (x) * (8 - y);
2248 const int C = (8 - x) * (y);
2249 const int D = (x) * (y);
2255 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2258 "li %[tmp0], 0x06 \n\t" 2259 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2260 "mtc1 %[tmp0], %[ftmp9] \n\t" 2261 "pshufh %[A], %[A], %[ftmp0] \n\t" 2262 "pshufh %[B], %[B], %[ftmp0] \n\t" 2263 "pshufh %[C], %[C], %[ftmp0] \n\t" 2264 "pshufh %[D], %[D], %[ftmp0] \n\t" 2267 MMI_ULDC1(%[ftmp1], %[src], 0x00)
2268 MMI_ULDC1(%[ftmp2], %[src], 0x01)
2269 PTR_ADDU "%[src], %[src], %[stride] \n\t" 2270 MMI_ULDC1(%[ftmp3], %[src], 0x00)
2271 MMI_ULDC1(%[ftmp4], %[src], 0x01)
2275 MMI_SDC1(%[ftmp1], %[dst], 0x00)
2276 "addiu %[h], %[h], -0x01 \n\t" 2277 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2278 "bnez %[h], 1b \n\t" 2279 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2280 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2281 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2282 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2283 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2286 [tmp0]
"=&r"(tmp[0]),
2287 [src]
"+&r"(src), [dst]
"+&r"(dst),
2290 [
A]
"f"(
A), [B]
"f"(B),
2291 [
C]
"f"(
C), [D]
"f"(D),
2299 int stride,
int h,
int x,
int y)
2301 const int A = (8 - x) * (8 - y);
2302 const int B = (x) * (8 - y);
2303 const int C = (8 - x) * (y);
2304 const int D = (x) * (y);
2310 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2313 "li %[tmp0], 0x06 \n\t" 2314 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2315 "mtc1 %[tmp0], %[ftmp5] \n\t" 2316 "pshufh %[A], %[A], %[ftmp0] \n\t" 2317 "pshufh %[B], %[B], %[ftmp0] \n\t" 2318 "pshufh %[C], %[C], %[ftmp0] \n\t" 2319 "pshufh %[D], %[D], %[ftmp0] \n\t" 2322 MMI_ULWC1(%[ftmp1], %[src], 0x00)
2323 MMI_ULWC1(%[ftmp2], %[src], 0x01)
2324 PTR_ADDU "%[src], %[src], %[stride] \n\t" 2325 MMI_ULWC1(%[ftmp3], %[src], 0x00)
2326 MMI_ULWC1(%[ftmp4], %[src], 0x01)
2330 MMI_SWC1(%[ftmp1], %[dst], 0x00)
2331 "addiu %[h], %[h], -0x01 \n\t" 2332 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2333 "bnez %[h], 1b \n\t" 2334 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2335 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2336 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2337 [tmp0]
"=&r"(tmp[0]),
2340 [src]
"+&r"(src), [dst]
"+&r"(dst),
2343 [
A]
"f"(
A), [B]
"f"(B),
2344 [
C]
"f"(
C), [D]
"f"(D),
2352 int stride,
int h,
int x,
int y)
2354 const int A = (8 - x) * (8 - y);
2355 const int B = (x) * (8 - y);
2356 const int C = (8 - x) * (y);
2357 const int D = (x) * (y);
2363 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2366 "li %[tmp0], 0x06 \n\t" 2367 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2368 "mtc1 %[tmp0], %[ftmp9] \n\t" 2369 "pshufh %[A], %[A], %[ftmp0] \n\t" 2370 "pshufh %[B], %[B], %[ftmp0] \n\t" 2371 "pshufh %[C], %[C], %[ftmp0] \n\t" 2372 "pshufh %[D], %[D], %[ftmp0] \n\t" 2375 MMI_ULDC1(%[ftmp1], %[src], 0x00)
2376 MMI_ULDC1(%[ftmp2], %[src], 0x01)
2377 PTR_ADDU "%[src], %[src], %[stride] \n\t" 2378 MMI_ULDC1(%[ftmp3], %[src], 0x00)
2379 MMI_ULDC1(%[ftmp4], %[src], 0x01)
2383 MMI_LDC1(%[ftmp2], %[dst], 0x00)
2384 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2386 MMI_SDC1(%[ftmp1], %[dst], 0x00)
2387 "addiu %[h], %[h], -0x01 \n\t" 2388 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2389 "bnez %[h], 1b \n\t" 2390 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2391 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2392 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2393 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2394 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2395 [tmp0]
"=&r"(tmp[0]),
2398 [src]
"+&r"(src), [dst]
"+&r"(dst),
2401 [
A]
"f"(
A), [B]
"f"(B),
2402 [
C]
"f"(
C), [D]
"f"(D),
2410 int stride,
int h,
int x,
int y)
2412 const int A = (8 - x) * (8 - y);
2413 const int B = ( x) * (8 - y);
2414 const int C = (8 - x) * ( y);
2415 const int D = ( x) * ( y);
2421 av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
2424 "li %[tmp0], 0x06 \n\t" 2425 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 2426 "mtc1 %[tmp0], %[ftmp5] \n\t" 2427 "pshufh %[A], %[A], %[ftmp0] \n\t" 2428 "pshufh %[B], %[B], %[ftmp0] \n\t" 2429 "pshufh %[C], %[C], %[ftmp0] \n\t" 2430 "pshufh %[D], %[D], %[ftmp0] \n\t" 2433 MMI_ULWC1(%[ftmp1], %[src], 0x00)
2434 MMI_ULWC1(%[ftmp2], %[src], 0x01)
2435 PTR_ADDU "%[src], %[src], %[stride] \n\t" 2436 MMI_ULWC1(%[ftmp3], %[src], 0x00)
2437 MMI_ULWC1(%[ftmp4], %[src], 0x01)
2441 MMI_LWC1(%[ftmp2], %[dst], 0x00)
2442 "pavgb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" 2444 MMI_SWC1(%[ftmp1], %[dst], 0x00)
2445 "addiu %[h], %[h], -0x01 \n\t" 2446 PTR_ADDU "%[dst], %[dst], %[stride] \n\t" 2447 "bnez %[h], 1b \n\t" 2448 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2449 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2450 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2451 [tmp0]
"=&r"(tmp[0]),
2454 [src]
"+&r"(src), [dst]
"+&r"(dst),
2457 [
A]
"f"(
A), [B]
"f"(B),
2458 [
C]
"f"(
C), [D]
"f"(D),
void ff_vc1_v_loop_filter8_mmi(uint8_t *src, int stride, int pq)
void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
static int shift(int a, int b)
void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
#define VC1_INV_TRANCS_8_TYPE1(o1, o2, r1, r2, r3, r4, c0)
void(* vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, mips_reg dst_stride, const int16_t *src, int rnd)
void ff_vc1_h_loop_filter4_mmi(uint8_t *src, int stride, int pq)
void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
#define DECLARE_FUNCTION(a, b)
Macro to ease bicubic filter interpolation functions declarations.
static void vc1_loop_filter(uint8_t *src, int step, int stride, int len, int pq)
VC-1 in-loop deblocking filter.
void ff_vc1_v_s_overlap_mmi(int16_t *top, int16_t *bottom)
void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
void ff_vc1_v_overlap_mmi(uint8_t *src, int stride)
void(* vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, mips_reg src_stride, int rnd, int64_t shift)
1/4 shift bicubic interpolation
void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
#define LOAD_ROUNDER_MMI(ROUND)
Compute the rounder 32-r or 8-r and unpacks it to $f14.
#define VC1_SHIFT2(OP, OPNAME)
Purely vertical or horizontal 1/2 shift interpolation.
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
void ff_vc1_h_overlap_mmi(uint8_t *src, int stride)
static void vc1_put_ver_16b_shift2_mmi(int16_t *dst, const uint8_t *src, mips_reg stride, int rnd, int64_t shift)
Sacrificing $f12 makes it possible to pipeline loads from src.
void ff_avg_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
#define i(width, name, range_min, range_max)
static const int shift1[6]
void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
#define VC1_INV_TRANCS_8_TYPE2(o1, o2, r1, r2, r3, r4, c0, c1)
static av_always_inline int vc1_filter_line(uint8_t *src, int stride, int pq)
VC-1 in-loop deblocking filter for one line.
simple assert() macros that are a bit more flexible than ISO C assert().
void ff_vc1_v_loop_filter4_mmi(uint8_t *src, int stride, int pq)
void ff_vc1_h_loop_filter16_mmi(uint8_t *src, int stride, int pq)
static const uint8_t offset[127][2]
void ff_put_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
#define VC1_MSPEL_MC(OP)
Interpolate fractional pel values by applying proper vertical then horizontal filter.
void ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right, int left_stride, int right_stride, int flags)
void ff_avg_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME)
Macro to build the 8bits, any direction, version of vc1_put_shift[13].
void ff_put_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int rnd)
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
#define SHIFT2_LINE(OFF, R0, R1, R2, R3)
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_vc1_h_loop_filter8_mmi(uint8_t *src, int stride, int pq)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> dc
#define flags(name, subs,...)
void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
static double clip(void *opaque, double val)
Clip value val in the minval - maxval range.
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
void ff_vc1_v_loop_filter16_mmi(uint8_t *src, int stride, int pq)
void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
static const double coeff[2][5]
#define VC1_HOR_16B_SHIFT2(OP, OPNAME)
Data is already unpacked, so some operations can directly be made from memory.
void(* vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, mips_reg stride, int rnd, mips_reg offset)
#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)
Macro to build the vertical 16bits version of vc1_put_shift[13].
#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME)
Macro to build the horizontal 16bits version of vc1_put_shift[13].