43 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 44 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 45 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 46 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 47 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5 48 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 49 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 50 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 53 #define COL_SHIFT 20 // 6 89 int16_t *
const temp= (int16_t*)align_tmp;
92 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 93 "movq " #src0 ", %%mm0 \n\t" \ 94 "movq " #src4 ", %%mm1 \n\t" \ 95 "movq " #src1 ", %%mm2 \n\t" \ 96 "movq " #src5 ", %%mm3 \n\t" \ 97 "movq "MANGLE(wm1010)", %%mm4 \n\t"\ 98 "pand %%mm0, %%mm4 \n\t"\ 99 "por %%mm1, %%mm4 \n\t"\ 100 "por %%mm2, %%mm4 \n\t"\ 101 "por %%mm3, %%mm4 \n\t"\ 102 "packssdw %%mm4,%%mm4 \n\t"\ 103 "movd %%mm4, %%eax \n\t"\ 104 "orl %%eax, %%eax \n\t"\ 106 "movq 16(%2), %%mm4 \n\t" \ 107 "pmaddwd %%mm0, %%mm4 \n\t" \ 108 "movq 24(%2), %%mm5 \n\t" \ 109 "pmaddwd %%mm5, %%mm0 \n\t" \ 110 "movq 32(%2), %%mm5 \n\t" \ 111 "pmaddwd %%mm1, %%mm5 \n\t" \ 112 "movq 40(%2), %%mm6 \n\t" \ 113 "pmaddwd %%mm6, %%mm1 \n\t" \ 114 "movq 48(%2), %%mm7 \n\t" \ 115 "pmaddwd %%mm2, %%mm7 \n\t" \ 116 #rounder ", %%mm4 \n\t"\ 117 "movq %%mm4, %%mm6 \n\t" \ 118 "paddd %%mm5, %%mm4 \n\t" \ 119 "psubd %%mm5, %%mm6 \n\t" \ 120 "movq 56(%2), %%mm5 \n\t" \ 121 "pmaddwd %%mm3, %%mm5 \n\t" \ 122 #rounder ", %%mm0 \n\t"\ 123 "paddd %%mm0, %%mm1 \n\t" \ 124 "paddd %%mm0, %%mm0 \n\t" \ 125 "psubd %%mm1, %%mm0 \n\t" \ 126 "pmaddwd 64(%2), %%mm2 \n\t" \ 127 "paddd %%mm5, %%mm7 \n\t" \ 128 "movq 72(%2), %%mm5 \n\t" \ 129 "pmaddwd %%mm3, %%mm5 \n\t" \ 130 "paddd %%mm4, %%mm7 \n\t" \ 131 "paddd %%mm4, %%mm4 \n\t" \ 132 "psubd %%mm7, %%mm4 \n\t" \ 133 "paddd %%mm2, %%mm5 \n\t" \ 134 "psrad $" #shift ", %%mm7 \n\t"\ 135 "psrad $" #shift ", %%mm4 \n\t"\ 136 "movq %%mm1, %%mm2 \n\t" \ 137 "paddd %%mm5, %%mm1 \n\t" \ 138 "psubd %%mm5, %%mm2 \n\t" \ 139 "psrad $" #shift ", %%mm1 \n\t"\ 140 "psrad $" #shift ", %%mm2 \n\t"\ 141 "packssdw %%mm1, %%mm7 \n\t" \ 142 "packssdw %%mm4, %%mm2 \n\t" \ 143 "movq %%mm7, " #dst " \n\t"\ 144 "movq " #src1 ", %%mm1 \n\t" \ 145 "movq 80(%2), %%mm4 \n\t" \ 146 "movq %%mm2, 24+" #dst " \n\t"\ 147 "pmaddwd %%mm1, %%mm4 \n\t" \ 148 "movq 88(%2), %%mm7 \n\t" \ 149 "pmaddwd 96(%2), %%mm1 \n\t" \ 150 "pmaddwd %%mm3, %%mm7 \n\t" \ 151 "movq %%mm0, %%mm2 \n\t" \ 152 "pmaddwd 104(%2), %%mm3 \n\t" \ 153 "paddd %%mm7, %%mm4 \n\t" \ 154 "paddd %%mm4, %%mm2 \n\t" \ 155 "psubd %%mm4, %%mm0 \n\t" \ 156 "psrad $" #shift ", %%mm2 \n\t"\ 157 "psrad $" #shift ", %%mm0 \n\t"\ 158 "movq %%mm6, %%mm4 \n\t" \ 159 "paddd %%mm1, %%mm3 \n\t" \ 160 "paddd %%mm3, %%mm6 \n\t" \ 161 "psubd %%mm3, %%mm4 \n\t" \ 162 "psrad $" #shift ", %%mm6 \n\t"\ 163 "packssdw %%mm6, %%mm2 \n\t" \ 164 "movq %%mm2, 8+" #dst " \n\t"\ 165 "psrad $" #shift ", %%mm4 \n\t"\ 166 "packssdw %%mm0, %%mm4 \n\t" \ 167 "movq %%mm4, 16+" #dst " \n\t"\ 170 "pslld $16, %%mm0 \n\t"\ 171 "paddd "MANGLE(d40000)", %%mm0 \n\t"\ 172 "psrad $13, %%mm0 \n\t"\ 173 "packssdw %%mm0, %%mm0 \n\t"\ 174 "movq %%mm0, " #dst " \n\t"\ 175 "movq %%mm0, 8+" #dst " \n\t"\ 176 "movq %%mm0, 16+" #dst " \n\t"\ 177 "movq %%mm0, 24+" #dst " \n\t"\ 180 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \ 181 "movq " #src0 ", %%mm0 \n\t" \ 182 "movq " #src4 ", %%mm1 \n\t" \ 183 "movq " #src1 ", %%mm2 \n\t" \ 184 "movq " #src5 ", %%mm3 \n\t" \ 185 "movq %%mm0, %%mm4 \n\t"\ 186 "por %%mm1, %%mm4 \n\t"\ 187 "por %%mm2, %%mm4 \n\t"\ 188 "por %%mm3, %%mm4 \n\t"\ 189 "packssdw %%mm4,%%mm4 \n\t"\ 190 "movd %%mm4, %%eax \n\t"\ 191 "orl %%eax, %%eax \n\t"\ 193 "movq 16(%2), %%mm4 \n\t" \ 194 "pmaddwd %%mm0, %%mm4 \n\t" \ 195 "movq 24(%2), %%mm5 \n\t" \ 196 "pmaddwd %%mm5, %%mm0 \n\t" \ 197 "movq 32(%2), %%mm5 \n\t" \ 198 "pmaddwd %%mm1, %%mm5 \n\t" \ 199 "movq 40(%2), %%mm6 \n\t" \ 200 "pmaddwd %%mm6, %%mm1 \n\t" \ 201 "movq 48(%2), %%mm7 \n\t" \ 202 "pmaddwd %%mm2, %%mm7 \n\t" \ 203 #rounder ", %%mm4 \n\t"\ 204 "movq %%mm4, %%mm6 \n\t" \ 205 "paddd %%mm5, %%mm4 \n\t" \ 206 "psubd %%mm5, %%mm6 \n\t" \ 207 "movq 56(%2), %%mm5 \n\t" \ 208 "pmaddwd %%mm3, %%mm5 \n\t" \ 209 #rounder ", %%mm0 \n\t"\ 210 "paddd %%mm0, %%mm1 \n\t" \ 211 "paddd %%mm0, %%mm0 \n\t" \ 212 "psubd %%mm1, %%mm0 \n\t" \ 213 "pmaddwd 64(%2), %%mm2 \n\t" \ 214 "paddd %%mm5, %%mm7 \n\t" \ 215 "movq 72(%2), %%mm5 \n\t" \ 216 "pmaddwd %%mm3, %%mm5 \n\t" \ 217 "paddd %%mm4, %%mm7 \n\t" \ 218 "paddd %%mm4, %%mm4 \n\t" \ 219 "psubd %%mm7, %%mm4 \n\t" \ 220 "paddd %%mm2, %%mm5 \n\t" \ 221 "psrad $" #shift ", %%mm7 \n\t"\ 222 "psrad $" #shift ", %%mm4 \n\t"\ 223 "movq %%mm1, %%mm2 \n\t" \ 224 "paddd %%mm5, %%mm1 \n\t" \ 225 "psubd %%mm5, %%mm2 \n\t" \ 226 "psrad $" #shift ", %%mm1 \n\t"\ 227 "psrad $" #shift ", %%mm2 \n\t"\ 228 "packssdw %%mm1, %%mm7 \n\t" \ 229 "packssdw %%mm4, %%mm2 \n\t" \ 230 "movq %%mm7, " #dst " \n\t"\ 231 "movq " #src1 ", %%mm1 \n\t" \ 232 "movq 80(%2), %%mm4 \n\t" \ 233 "movq %%mm2, 24+" #dst " \n\t"\ 234 "pmaddwd %%mm1, %%mm4 \n\t" \ 235 "movq 88(%2), %%mm7 \n\t" \ 236 "pmaddwd 96(%2), %%mm1 \n\t" \ 237 "pmaddwd %%mm3, %%mm7 \n\t" \ 238 "movq %%mm0, %%mm2 \n\t" \ 239 "pmaddwd 104(%2), %%mm3 \n\t" \ 240 "paddd %%mm7, %%mm4 \n\t" \ 241 "paddd %%mm4, %%mm2 \n\t" \ 242 "psubd %%mm4, %%mm0 \n\t" \ 243 "psrad $" #shift ", %%mm2 \n\t"\ 244 "psrad $" #shift ", %%mm0 \n\t"\ 245 "movq %%mm6, %%mm4 \n\t" \ 246 "paddd %%mm1, %%mm3 \n\t" \ 247 "paddd %%mm3, %%mm6 \n\t" \ 248 "psubd %%mm3, %%mm4 \n\t" \ 249 "psrad $" #shift ", %%mm6 \n\t"\ 250 "packssdw %%mm6, %%mm2 \n\t" \ 251 "movq %%mm2, 8+" #dst " \n\t"\ 252 "psrad $" #shift ", %%mm4 \n\t"\ 253 "packssdw %%mm0, %%mm4 \n\t" \ 254 "movq %%mm4, 16+" #dst " \n\t"\ 256 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ 257 "movq " #src0 ", %%mm0 \n\t" \ 258 "movq " #src4 ", %%mm1 \n\t" \ 259 "movq " #src1 ", %%mm2 \n\t" \ 260 "movq " #src5 ", %%mm3 \n\t" \ 261 "movq 16(%2), %%mm4 \n\t" \ 262 "pmaddwd %%mm0, %%mm4 \n\t" \ 263 "movq 24(%2), %%mm5 \n\t" \ 264 "pmaddwd %%mm5, %%mm0 \n\t" \ 265 "movq 32(%2), %%mm5 \n\t" \ 266 "pmaddwd %%mm1, %%mm5 \n\t" \ 267 "movq 40(%2), %%mm6 \n\t" \ 268 "pmaddwd %%mm6, %%mm1 \n\t" \ 269 "movq 48(%2), %%mm7 \n\t" \ 270 "pmaddwd %%mm2, %%mm7 \n\t" \ 271 #rounder ", %%mm4 \n\t"\ 272 "movq %%mm4, %%mm6 \n\t" \ 273 "paddd %%mm5, %%mm4 \n\t" \ 274 "psubd %%mm5, %%mm6 \n\t" \ 275 "movq 56(%2), %%mm5 \n\t" \ 276 "pmaddwd %%mm3, %%mm5 \n\t" \ 277 #rounder ", %%mm0 \n\t"\ 278 "paddd %%mm0, %%mm1 \n\t" \ 279 "paddd %%mm0, %%mm0 \n\t" \ 280 "psubd %%mm1, %%mm0 \n\t" \ 281 "pmaddwd 64(%2), %%mm2 \n\t" \ 282 "paddd %%mm5, %%mm7 \n\t" \ 283 "movq 72(%2), %%mm5 \n\t" \ 284 "pmaddwd %%mm3, %%mm5 \n\t" \ 285 "paddd %%mm4, %%mm7 \n\t" \ 286 "paddd %%mm4, %%mm4 \n\t" \ 287 "psubd %%mm7, %%mm4 \n\t" \ 288 "paddd %%mm2, %%mm5 \n\t" \ 289 "psrad $" #shift ", %%mm7 \n\t"\ 290 "psrad $" #shift ", %%mm4 \n\t"\ 291 "movq %%mm1, %%mm2 \n\t" \ 292 "paddd %%mm5, %%mm1 \n\t" \ 293 "psubd %%mm5, %%mm2 \n\t" \ 294 "psrad $" #shift ", %%mm1 \n\t"\ 295 "psrad $" #shift ", %%mm2 \n\t"\ 296 "packssdw %%mm1, %%mm7 \n\t" \ 297 "packssdw %%mm4, %%mm2 \n\t" \ 298 "movq %%mm7, " #dst " \n\t"\ 299 "movq " #src1 ", %%mm1 \n\t" \ 300 "movq 80(%2), %%mm4 \n\t" \ 301 "movq %%mm2, 24+" #dst " \n\t"\ 302 "pmaddwd %%mm1, %%mm4 \n\t" \ 303 "movq 88(%2), %%mm7 \n\t" \ 304 "pmaddwd 96(%2), %%mm1 \n\t" \ 305 "pmaddwd %%mm3, %%mm7 \n\t" \ 306 "movq %%mm0, %%mm2 \n\t" \ 307 "pmaddwd 104(%2), %%mm3 \n\t" \ 308 "paddd %%mm7, %%mm4 \n\t" \ 309 "paddd %%mm4, %%mm2 \n\t" \ 310 "psubd %%mm4, %%mm0 \n\t" \ 311 "psrad $" #shift ", %%mm2 \n\t"\ 312 "psrad $" #shift ", %%mm0 \n\t"\ 313 "movq %%mm6, %%mm4 \n\t" \ 314 "paddd %%mm1, %%mm3 \n\t" \ 315 "paddd %%mm3, %%mm6 \n\t" \ 316 "psubd %%mm3, %%mm4 \n\t" \ 317 "psrad $" #shift ", %%mm6 \n\t"\ 318 "packssdw %%mm6, %%mm2 \n\t" \ 319 "movq %%mm2, 8+" #dst " \n\t"\ 320 "psrad $" #shift ", %%mm4 \n\t"\ 321 "packssdw %%mm0, %%mm4 \n\t" \ 322 "movq %%mm4, 16+" #dst " \n\t"\ 325 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
326 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
327 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
328 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
331 #define IDCT(src0, src4, src1, src5, dst, shift) \ 332 "movq " #src0 ", %%mm0 \n\t" \ 333 "movq " #src4 ", %%mm1 \n\t" \ 334 "movq " #src1 ", %%mm2 \n\t" \ 335 "movq " #src5 ", %%mm3 \n\t" \ 336 "movq 16(%2), %%mm4 \n\t" \ 337 "pmaddwd %%mm0, %%mm4 \n\t" \ 338 "movq 24(%2), %%mm5 \n\t" \ 339 "pmaddwd %%mm5, %%mm0 \n\t" \ 340 "movq 32(%2), %%mm5 \n\t" \ 341 "pmaddwd %%mm1, %%mm5 \n\t" \ 342 "movq 40(%2), %%mm6 \n\t" \ 343 "pmaddwd %%mm6, %%mm1 \n\t" \ 344 "movq %%mm4, %%mm6 \n\t" \ 345 "movq 48(%2), %%mm7 \n\t" \ 346 "pmaddwd %%mm2, %%mm7 \n\t" \ 347 "paddd %%mm5, %%mm4 \n\t" \ 348 "psubd %%mm5, %%mm6 \n\t" \ 349 "movq %%mm0, %%mm5 \n\t" \ 350 "paddd %%mm1, %%mm0 \n\t" \ 351 "psubd %%mm1, %%mm5 \n\t" \ 352 "movq 56(%2), %%mm1 \n\t" \ 353 "pmaddwd %%mm3, %%mm1 \n\t" \ 354 "pmaddwd 64(%2), %%mm2 \n\t" \ 355 "paddd %%mm1, %%mm7 \n\t" \ 356 "movq 72(%2), %%mm1 \n\t" \ 357 "pmaddwd %%mm3, %%mm1 \n\t" \ 358 "paddd %%mm4, %%mm7 \n\t" \ 359 "paddd %%mm4, %%mm4 \n\t" \ 360 "psubd %%mm7, %%mm4 \n\t" \ 361 "paddd %%mm2, %%mm1 \n\t" \ 362 "psrad $" #shift ", %%mm7 \n\t"\ 363 "psrad $" #shift ", %%mm4 \n\t"\ 364 "movq %%mm0, %%mm2 \n\t" \ 365 "paddd %%mm1, %%mm0 \n\t" \ 366 "psubd %%mm1, %%mm2 \n\t" \ 367 "psrad $" #shift ", %%mm0 \n\t"\ 368 "psrad $" #shift ", %%mm2 \n\t"\ 369 "packssdw %%mm7, %%mm7 \n\t" \ 370 "movd %%mm7, " #dst " \n\t"\ 371 "packssdw %%mm0, %%mm0 \n\t" \ 372 "movd %%mm0, 16+" #dst " \n\t"\ 373 "packssdw %%mm2, %%mm2 \n\t" \ 374 "movd %%mm2, 96+" #dst " \n\t"\ 375 "packssdw %%mm4, %%mm4 \n\t" \ 376 "movd %%mm4, 112+" #dst " \n\t"\ 377 "movq " #src1 ", %%mm0 \n\t" \ 378 "movq 80(%2), %%mm4 \n\t" \ 379 "pmaddwd %%mm0, %%mm4 \n\t" \ 380 "movq 88(%2), %%mm7 \n\t" \ 381 "pmaddwd 96(%2), %%mm0 \n\t" \ 382 "pmaddwd %%mm3, %%mm7 \n\t" \ 383 "movq %%mm5, %%mm2 \n\t" \ 384 "pmaddwd 104(%2), %%mm3 \n\t" \ 385 "paddd %%mm7, %%mm4 \n\t" \ 386 "paddd %%mm4, %%mm2 \n\t" \ 387 "psubd %%mm4, %%mm5 \n\t" \ 388 "psrad $" #shift ", %%mm2 \n\t"\ 389 "psrad $" #shift ", %%mm5 \n\t"\ 390 "movq %%mm6, %%mm4 \n\t" \ 391 "paddd %%mm0, %%mm3 \n\t" \ 392 "paddd %%mm3, %%mm6 \n\t" \ 393 "psubd %%mm3, %%mm4 \n\t" \ 394 "psrad $" #shift ", %%mm6 \n\t"\ 395 "psrad $" #shift ", %%mm4 \n\t"\ 396 "packssdw %%mm2, %%mm2 \n\t" \ 397 "packssdw %%mm6, %%mm6 \n\t" \ 398 "movd %%mm2, 32+" #dst " \n\t"\ 399 "packssdw %%mm4, %%mm4 \n\t" \ 400 "packssdw %%mm5, %%mm5 \n\t" \ 401 "movd %%mm6, 48+" #dst " \n\t"\ 402 "movd %%mm4, 64+" #dst " \n\t"\ 403 "movd %%mm5, 80+" #dst " \n\t" 407 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
408 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
409 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
410 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
415 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
416 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
419 #define IDCT(src0, src4, src1, src5, dst, shift) \ 420 "movq " #src0 ", %%mm0 \n\t" \ 421 "movq " #src4 ", %%mm1 \n\t" \ 422 "movq " #src5 ", %%mm3 \n\t" \ 423 "movq 16(%2), %%mm4 \n\t" \ 424 "pmaddwd %%mm0, %%mm4 \n\t" \ 425 "movq 24(%2), %%mm5 \n\t" \ 426 "pmaddwd %%mm5, %%mm0 \n\t" \ 427 "movq 32(%2), %%mm5 \n\t" \ 428 "pmaddwd %%mm1, %%mm5 \n\t" \ 429 "movq 40(%2), %%mm6 \n\t" \ 430 "pmaddwd %%mm6, %%mm1 \n\t" \ 431 "movq %%mm4, %%mm6 \n\t" \ 432 "paddd %%mm5, %%mm4 \n\t" \ 433 "psubd %%mm5, %%mm6 \n\t" \ 434 "movq %%mm0, %%mm5 \n\t" \ 435 "paddd %%mm1, %%mm0 \n\t" \ 436 "psubd %%mm1, %%mm5 \n\t" \ 437 "movq 56(%2), %%mm1 \n\t" \ 438 "pmaddwd %%mm3, %%mm1 \n\t" \ 439 "movq 72(%2), %%mm7 \n\t" \ 440 "pmaddwd %%mm3, %%mm7 \n\t" \ 441 "paddd %%mm4, %%mm1 \n\t" \ 442 "paddd %%mm4, %%mm4 \n\t" \ 443 "psubd %%mm1, %%mm4 \n\t" \ 444 "psrad $" #shift ", %%mm1 \n\t"\ 445 "psrad $" #shift ", %%mm4 \n\t"\ 446 "movq %%mm0, %%mm2 \n\t" \ 447 "paddd %%mm7, %%mm0 \n\t" \ 448 "psubd %%mm7, %%mm2 \n\t" \ 449 "psrad $" #shift ", %%mm0 \n\t"\ 450 "psrad $" #shift ", %%mm2 \n\t"\ 451 "packssdw %%mm1, %%mm1 \n\t" \ 452 "movd %%mm1, " #dst " \n\t"\ 453 "packssdw %%mm0, %%mm0 \n\t" \ 454 "movd %%mm0, 16+" #dst " \n\t"\ 455 "packssdw %%mm2, %%mm2 \n\t" \ 456 "movd %%mm2, 96+" #dst " \n\t"\ 457 "packssdw %%mm4, %%mm4 \n\t" \ 458 "movd %%mm4, 112+" #dst " \n\t"\ 459 "movq 88(%2), %%mm1 \n\t" \ 460 "pmaddwd %%mm3, %%mm1 \n\t" \ 461 "movq %%mm5, %%mm2 \n\t" \ 462 "pmaddwd 104(%2), %%mm3 \n\t" \ 463 "paddd %%mm1, %%mm2 \n\t" \ 464 "psubd %%mm1, %%mm5 \n\t" \ 465 "psrad $" #shift ", %%mm2 \n\t"\ 466 "psrad $" #shift ", %%mm5 \n\t"\ 467 "movq %%mm6, %%mm1 \n\t" \ 468 "paddd %%mm3, %%mm6 \n\t" \ 469 "psubd %%mm3, %%mm1 \n\t" \ 470 "psrad $" #shift ", %%mm6 \n\t"\ 471 "psrad $" #shift ", %%mm1 \n\t"\ 472 "packssdw %%mm2, %%mm2 \n\t" \ 473 "packssdw %%mm6, %%mm6 \n\t" \ 474 "movd %%mm2, 32+" #dst " \n\t"\ 475 "packssdw %%mm1, %%mm1 \n\t" \ 476 "packssdw %%mm5, %%mm5 \n\t" \ 477 "movd %%mm6, 48+" #dst " \n\t"\ 478 "movd %%mm1, 64+" #dst " \n\t"\ 479 "movd %%mm5, 80+" #dst " \n\t" 482 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
483 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
484 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
485 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
490 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
493 #define IDCT(src0, src4, src1, src5, dst, shift) \ 494 "movq " #src0 ", %%mm0 \n\t" \ 495 "movq " #src5 ", %%mm3 \n\t" \ 496 "movq 16(%2), %%mm4 \n\t" \ 497 "pmaddwd %%mm0, %%mm4 \n\t" \ 498 "movq 24(%2), %%mm5 \n\t" \ 499 "pmaddwd %%mm5, %%mm0 \n\t" \ 500 "movq %%mm4, %%mm6 \n\t" \ 501 "movq %%mm0, %%mm5 \n\t" \ 502 "movq 56(%2), %%mm1 \n\t" \ 503 "pmaddwd %%mm3, %%mm1 \n\t" \ 504 "movq 72(%2), %%mm7 \n\t" \ 505 "pmaddwd %%mm3, %%mm7 \n\t" \ 506 "paddd %%mm4, %%mm1 \n\t" \ 507 "paddd %%mm4, %%mm4 \n\t" \ 508 "psubd %%mm1, %%mm4 \n\t" \ 509 "psrad $" #shift ", %%mm1 \n\t"\ 510 "psrad $" #shift ", %%mm4 \n\t"\ 511 "movq %%mm0, %%mm2 \n\t" \ 512 "paddd %%mm7, %%mm0 \n\t" \ 513 "psubd %%mm7, %%mm2 \n\t" \ 514 "psrad $" #shift ", %%mm0 \n\t"\ 515 "psrad $" #shift ", %%mm2 \n\t"\ 516 "packssdw %%mm1, %%mm1 \n\t" \ 517 "movd %%mm1, " #dst " \n\t"\ 518 "packssdw %%mm0, %%mm0 \n\t" \ 519 "movd %%mm0, 16+" #dst " \n\t"\ 520 "packssdw %%mm2, %%mm2 \n\t" \ 521 "movd %%mm2, 96+" #dst " \n\t"\ 522 "packssdw %%mm4, %%mm4 \n\t" \ 523 "movd %%mm4, 112+" #dst " \n\t"\ 524 "movq 88(%2), %%mm1 \n\t" \ 525 "pmaddwd %%mm3, %%mm1 \n\t" \ 526 "movq %%mm5, %%mm2 \n\t" \ 527 "pmaddwd 104(%2), %%mm3 \n\t" \ 528 "paddd %%mm1, %%mm2 \n\t" \ 529 "psubd %%mm1, %%mm5 \n\t" \ 530 "psrad $" #shift ", %%mm2 \n\t"\ 531 "psrad $" #shift ", %%mm5 \n\t"\ 532 "movq %%mm6, %%mm1 \n\t" \ 533 "paddd %%mm3, %%mm6 \n\t" \ 534 "psubd %%mm3, %%mm1 \n\t" \ 535 "psrad $" #shift ", %%mm6 \n\t"\ 536 "psrad $" #shift ", %%mm1 \n\t"\ 537 "packssdw %%mm2, %%mm2 \n\t" \ 538 "packssdw %%mm6, %%mm6 \n\t" \ 539 "movd %%mm2, 32+" #dst " \n\t"\ 540 "packssdw %%mm1, %%mm1 \n\t" \ 541 "packssdw %%mm5, %%mm5 \n\t" \ 542 "movd %%mm6, 48+" #dst " \n\t"\ 543 "movd %%mm1, 64+" #dst " \n\t"\ 544 "movd %%mm5, 80+" #dst " \n\t" 548 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
549 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
550 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
551 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
556 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
559 #define IDCT(src0, src4, src1, src5, dst, shift) \ 560 "movq " #src0 ", %%mm0 \n\t" \ 561 "movq " #src1 ", %%mm2 \n\t" \ 562 "movq " #src5 ", %%mm3 \n\t" \ 563 "movq 16(%2), %%mm4 \n\t" \ 564 "pmaddwd %%mm0, %%mm4 \n\t" \ 565 "movq 24(%2), %%mm5 \n\t" \ 566 "pmaddwd %%mm5, %%mm0 \n\t" \ 567 "movq %%mm4, %%mm6 \n\t" \ 568 "movq 48(%2), %%mm7 \n\t" \ 569 "pmaddwd %%mm2, %%mm7 \n\t" \ 570 "movq %%mm0, %%mm5 \n\t" \ 571 "movq 56(%2), %%mm1 \n\t" \ 572 "pmaddwd %%mm3, %%mm1 \n\t" \ 573 "pmaddwd 64(%2), %%mm2 \n\t" \ 574 "paddd %%mm1, %%mm7 \n\t" \ 575 "movq 72(%2), %%mm1 \n\t" \ 576 "pmaddwd %%mm3, %%mm1 \n\t" \ 577 "paddd %%mm4, %%mm7 \n\t" \ 578 "paddd %%mm4, %%mm4 \n\t" \ 579 "psubd %%mm7, %%mm4 \n\t" \ 580 "paddd %%mm2, %%mm1 \n\t" \ 581 "psrad $" #shift ", %%mm7 \n\t"\ 582 "psrad $" #shift ", %%mm4 \n\t"\ 583 "movq %%mm0, %%mm2 \n\t" \ 584 "paddd %%mm1, %%mm0 \n\t" \ 585 "psubd %%mm1, %%mm2 \n\t" \ 586 "psrad $" #shift ", %%mm0 \n\t"\ 587 "psrad $" #shift ", %%mm2 \n\t"\ 588 "packssdw %%mm7, %%mm7 \n\t" \ 589 "movd %%mm7, " #dst " \n\t"\ 590 "packssdw %%mm0, %%mm0 \n\t" \ 591 "movd %%mm0, 16+" #dst " \n\t"\ 592 "packssdw %%mm2, %%mm2 \n\t" \ 593 "movd %%mm2, 96+" #dst " \n\t"\ 594 "packssdw %%mm4, %%mm4 \n\t" \ 595 "movd %%mm4, 112+" #dst " \n\t"\ 596 "movq " #src1 ", %%mm0 \n\t" \ 597 "movq 80(%2), %%mm4 \n\t" \ 598 "pmaddwd %%mm0, %%mm4 \n\t" \ 599 "movq 88(%2), %%mm7 \n\t" \ 600 "pmaddwd 96(%2), %%mm0 \n\t" \ 601 "pmaddwd %%mm3, %%mm7 \n\t" \ 602 "movq %%mm5, %%mm2 \n\t" \ 603 "pmaddwd 104(%2), %%mm3 \n\t" \ 604 "paddd %%mm7, %%mm4 \n\t" \ 605 "paddd %%mm4, %%mm2 \n\t" \ 606 "psubd %%mm4, %%mm5 \n\t" \ 607 "psrad $" #shift ", %%mm2 \n\t"\ 608 "psrad $" #shift ", %%mm5 \n\t"\ 609 "movq %%mm6, %%mm4 \n\t" \ 610 "paddd %%mm0, %%mm3 \n\t" \ 611 "paddd %%mm3, %%mm6 \n\t" \ 612 "psubd %%mm3, %%mm4 \n\t" \ 613 "psrad $" #shift ", %%mm6 \n\t"\ 614 "psrad $" #shift ", %%mm4 \n\t"\ 615 "packssdw %%mm2, %%mm2 \n\t" \ 616 "packssdw %%mm6, %%mm6 \n\t" \ 617 "movd %%mm2, 32+" #dst " \n\t"\ 618 "packssdw %%mm4, %%mm4 \n\t" \ 619 "packssdw %%mm5, %%mm5 \n\t" \ 620 "movd %%mm6, 48+" #dst " \n\t"\ 621 "movd %%mm4, 64+" #dst " \n\t"\ 622 "movd %%mm5, 80+" #dst " \n\t" 625 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
626 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
627 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
628 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
634 #define IDCT(src0, src4, src1, src5, dst, shift) \ 635 "movq " #src0 ", %%mm0 \n\t" \ 636 "movq " #src1 ", %%mm2 \n\t" \ 637 "movq 16(%2), %%mm4 \n\t" \ 638 "pmaddwd %%mm0, %%mm4 \n\t" \ 639 "movq 24(%2), %%mm5 \n\t" \ 640 "pmaddwd %%mm5, %%mm0 \n\t" \ 641 "movq %%mm4, %%mm6 \n\t" \ 642 "movq 48(%2), %%mm7 \n\t" \ 643 "pmaddwd %%mm2, %%mm7 \n\t" \ 644 "movq %%mm0, %%mm5 \n\t" \ 645 "movq 64(%2), %%mm3 \n\t"\ 646 "pmaddwd %%mm2, %%mm3 \n\t" \ 647 "paddd %%mm4, %%mm7 \n\t" \ 648 "paddd %%mm4, %%mm4 \n\t" \ 649 "psubd %%mm7, %%mm4 \n\t" \ 650 "psrad $" #shift ", %%mm7 \n\t"\ 651 "psrad $" #shift ", %%mm4 \n\t"\ 652 "movq %%mm0, %%mm1 \n\t" \ 653 "paddd %%mm3, %%mm0 \n\t" \ 654 "psubd %%mm3, %%mm1 \n\t" \ 655 "psrad $" #shift ", %%mm0 \n\t"\ 656 "psrad $" #shift ", %%mm1 \n\t"\ 657 "packssdw %%mm7, %%mm7 \n\t" \ 658 "movd %%mm7, " #dst " \n\t"\ 659 "packssdw %%mm0, %%mm0 \n\t" \ 660 "movd %%mm0, 16+" #dst " \n\t"\ 661 "packssdw %%mm1, %%mm1 \n\t" \ 662 "movd %%mm1, 96+" #dst " \n\t"\ 663 "packssdw %%mm4, %%mm4 \n\t" \ 664 "movd %%mm4, 112+" #dst " \n\t"\ 665 "movq 80(%2), %%mm4 \n\t" \ 666 "pmaddwd %%mm2, %%mm4 \n\t" \ 667 "pmaddwd 96(%2), %%mm2 \n\t" \ 668 "movq %%mm5, %%mm1 \n\t" \ 669 "paddd %%mm4, %%mm1 \n\t" \ 670 "psubd %%mm4, %%mm5 \n\t" \ 671 "psrad $" #shift ", %%mm1 \n\t"\ 672 "psrad $" #shift ", %%mm5 \n\t"\ 673 "movq %%mm6, %%mm4 \n\t" \ 674 "paddd %%mm2, %%mm6 \n\t" \ 675 "psubd %%mm2, %%mm4 \n\t" \ 676 "psrad $" #shift ", %%mm6 \n\t"\ 677 "psrad $" #shift ", %%mm4 \n\t"\ 678 "packssdw %%mm1, %%mm1 \n\t" \ 679 "packssdw %%mm6, %%mm6 \n\t" \ 680 "movd %%mm1, 32+" #dst " \n\t"\ 681 "packssdw %%mm4, %%mm4 \n\t" \ 682 "packssdw %%mm5, %%mm5 \n\t" \ 683 "movd %%mm6, 48+" #dst " \n\t"\ 684 "movd %%mm4, 64+" #dst " \n\t"\ 685 "movd %%mm5, 80+" #dst " \n\t" 689 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
690 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
691 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
692 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
698 #define IDCT(src0, src4, src1, src5, dst, shift) \ 699 "movq " #src0 ", %%mm0 \n\t" \ 700 "movq " #src4 ", %%mm1 \n\t" \ 701 "movq 16(%2), %%mm4 \n\t" \ 702 "pmaddwd %%mm0, %%mm4 \n\t" \ 703 "movq 24(%2), %%mm5 \n\t" \ 704 "pmaddwd %%mm5, %%mm0 \n\t" \ 705 "movq 32(%2), %%mm5 \n\t" \ 706 "pmaddwd %%mm1, %%mm5 \n\t" \ 707 "movq 40(%2), %%mm6 \n\t" \ 708 "pmaddwd %%mm6, %%mm1 \n\t" \ 709 "movq %%mm4, %%mm6 \n\t" \ 710 "paddd %%mm5, %%mm4 \n\t" \ 711 "psubd %%mm5, %%mm6 \n\t" \ 712 "movq %%mm0, %%mm5 \n\t" \ 713 "paddd %%mm1, %%mm0 \n\t" \ 714 "psubd %%mm1, %%mm5 \n\t" \ 715 "movq 8+" #src0 ", %%mm2 \n\t" \ 716 "movq 8+" #src4 ", %%mm3 \n\t" \ 717 "movq 16(%2), %%mm1 \n\t" \ 718 "pmaddwd %%mm2, %%mm1 \n\t" \ 719 "movq 24(%2), %%mm7 \n\t" \ 720 "pmaddwd %%mm7, %%mm2 \n\t" \ 721 "movq 32(%2), %%mm7 \n\t" \ 722 "pmaddwd %%mm3, %%mm7 \n\t" \ 723 "pmaddwd 40(%2), %%mm3 \n\t" \ 724 "paddd %%mm1, %%mm7 \n\t" \ 725 "paddd %%mm1, %%mm1 \n\t" \ 726 "psubd %%mm7, %%mm1 \n\t" \ 727 "paddd %%mm2, %%mm3 \n\t" \ 728 "paddd %%mm2, %%mm2 \n\t" \ 729 "psubd %%mm3, %%mm2 \n\t" \ 730 "psrad $" #shift ", %%mm4 \n\t"\ 731 "psrad $" #shift ", %%mm7 \n\t"\ 732 "psrad $" #shift ", %%mm3 \n\t"\ 733 "packssdw %%mm7, %%mm4 \n\t" \ 734 "movq %%mm4, " #dst " \n\t"\ 735 "psrad $" #shift ", %%mm0 \n\t"\ 736 "packssdw %%mm3, %%mm0 \n\t" \ 737 "movq %%mm0, 16+" #dst " \n\t"\ 738 "movq %%mm0, 96+" #dst " \n\t"\ 739 "movq %%mm4, 112+" #dst " \n\t"\ 740 "psrad $" #shift ", %%mm5 \n\t"\ 741 "psrad $" #shift ", %%mm6 \n\t"\ 742 "psrad $" #shift ", %%mm2 \n\t"\ 743 "packssdw %%mm2, %%mm5 \n\t" \ 744 "movq %%mm5, 32+" #dst " \n\t"\ 745 "psrad $" #shift ", %%mm1 \n\t"\ 746 "packssdw %%mm1, %%mm6 \n\t" \ 747 "movq %%mm6, 48+" #dst " \n\t"\ 748 "movq %%mm6, 64+" #dst " \n\t"\ 749 "movq %%mm5, 80+" #dst " \n\t" 753 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
755 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
763 #define IDCT(src0, src4, src1, src5, dst, shift) \ 764 "movq " #src0 ", %%mm0 \n\t" \ 765 "movq " #src4 ", %%mm1 \n\t" \ 766 "movq " #src1 ", %%mm2 \n\t" \ 767 "movq 16(%2), %%mm4 \n\t" \ 768 "pmaddwd %%mm0, %%mm4 \n\t" \ 769 "movq 24(%2), %%mm5 \n\t" \ 770 "pmaddwd %%mm5, %%mm0 \n\t" \ 771 "movq 32(%2), %%mm5 \n\t" \ 772 "pmaddwd %%mm1, %%mm5 \n\t" \ 773 "movq 40(%2), %%mm6 \n\t" \ 774 "pmaddwd %%mm6, %%mm1 \n\t" \ 775 "movq %%mm4, %%mm6 \n\t" \ 776 "movq 48(%2), %%mm7 \n\t" \ 777 "pmaddwd %%mm2, %%mm7 \n\t" \ 778 "paddd %%mm5, %%mm4 \n\t" \ 779 "psubd %%mm5, %%mm6 \n\t" \ 780 "movq %%mm0, %%mm5 \n\t" \ 781 "paddd %%mm1, %%mm0 \n\t" \ 782 "psubd %%mm1, %%mm5 \n\t" \ 783 "movq 64(%2), %%mm1 \n\t"\ 784 "pmaddwd %%mm2, %%mm1 \n\t" \ 785 "paddd %%mm4, %%mm7 \n\t" \ 786 "paddd %%mm4, %%mm4 \n\t" \ 787 "psubd %%mm7, %%mm4 \n\t" \ 788 "psrad $" #shift ", %%mm7 \n\t"\ 789 "psrad $" #shift ", %%mm4 \n\t"\ 790 "movq %%mm0, %%mm3 \n\t" \ 791 "paddd %%mm1, %%mm0 \n\t" \ 792 "psubd %%mm1, %%mm3 \n\t" \ 793 "psrad $" #shift ", %%mm0 \n\t"\ 794 "psrad $" #shift ", %%mm3 \n\t"\ 795 "packssdw %%mm7, %%mm7 \n\t" \ 796 "movd %%mm7, " #dst " \n\t"\ 797 "packssdw %%mm0, %%mm0 \n\t" \ 798 "movd %%mm0, 16+" #dst " \n\t"\ 799 "packssdw %%mm3, %%mm3 \n\t" \ 800 "movd %%mm3, 96+" #dst " \n\t"\ 801 "packssdw %%mm4, %%mm4 \n\t" \ 802 "movd %%mm4, 112+" #dst " \n\t"\ 803 "movq 80(%2), %%mm4 \n\t" \ 804 "pmaddwd %%mm2, %%mm4 \n\t" \ 805 "pmaddwd 96(%2), %%mm2 \n\t" \ 806 "movq %%mm5, %%mm3 \n\t" \ 807 "paddd %%mm4, %%mm3 \n\t" \ 808 "psubd %%mm4, %%mm5 \n\t" \ 809 "psrad $" #shift ", %%mm3 \n\t"\ 810 "psrad $" #shift ", %%mm5 \n\t"\ 811 "movq %%mm6, %%mm4 \n\t" \ 812 "paddd %%mm2, %%mm6 \n\t" \ 813 "psubd %%mm2, %%mm4 \n\t" \ 814 "psrad $" #shift ", %%mm6 \n\t"\ 815 "packssdw %%mm3, %%mm3 \n\t" \ 816 "movd %%mm3, 32+" #dst " \n\t"\ 817 "psrad $" #shift ", %%mm4 \n\t"\ 818 "packssdw %%mm6, %%mm6 \n\t" \ 819 "movd %%mm6, 48+" #dst " \n\t"\ 820 "packssdw %%mm4, %%mm4 \n\t" \ 821 "packssdw %%mm5, %%mm5 \n\t" \ 822 "movd %%mm4, 64+" #dst " \n\t"\ 823 "movd %%mm5, 80+" #dst " \n\t" 827 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
828 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
829 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
830 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
837 #define IDCT(src0, src4, src1, src5, dst, shift) \ 838 "movq " #src0 ", %%mm0 \n\t" \ 839 "movq 16(%2), %%mm4 \n\t" \ 840 "pmaddwd %%mm0, %%mm4 \n\t" \ 841 "movq 24(%2), %%mm5 \n\t" \ 842 "pmaddwd %%mm5, %%mm0 \n\t" \ 843 "psrad $" #shift ", %%mm4 \n\t"\ 844 "psrad $" #shift ", %%mm0 \n\t"\ 845 "movq 8+" #src0 ", %%mm2 \n\t" \ 846 "movq 16(%2), %%mm1 \n\t" \ 847 "pmaddwd %%mm2, %%mm1 \n\t" \ 848 "movq 24(%2), %%mm7 \n\t" \ 849 "pmaddwd %%mm7, %%mm2 \n\t" \ 850 "movq 32(%2), %%mm7 \n\t" \ 851 "psrad $" #shift ", %%mm1 \n\t"\ 852 "packssdw %%mm1, %%mm4 \n\t" \ 853 "movq %%mm4, " #dst " \n\t"\ 854 "psrad $" #shift ", %%mm2 \n\t"\ 855 "packssdw %%mm2, %%mm0 \n\t" \ 856 "movq %%mm0, 16+" #dst " \n\t"\ 857 "movq %%mm0, 96+" #dst " \n\t"\ 858 "movq %%mm4, 112+" #dst " \n\t"\ 859 "movq %%mm0, 32+" #dst " \n\t"\ 860 "movq %%mm4, 48+" #dst " \n\t"\ 861 "movq %%mm4, 64+" #dst " \n\t"\ 862 "movq %%mm0, 80+" #dst " \n\t" 865 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
867 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift)
static void idct(int16_t block[64])
Memory handling functions.
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define LOCAL_ALIGNED_8(t, v,...)
#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift, bt)
void(* ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
#define DECLARE_ASM_CONST(n, t, v)
Declare a static constant aligned variable appropriate for use in inline assembly code...
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void ff_simple_idct_mmx(int16_t *block)
void(* ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
static const int16_t coeffs[]
#define NAMED_CONSTRAINTS_ADD(...)