FFmpeg  4.3
rgb2rgb_template.c
Go to the documentation of this file.
1 /*
2  * software RGB to RGB converter
3  * pluralize by software PAL8 to RGB converter
4  * software YUV to YUV converter
5  * software YUV to RGB converter
6  * Written by Nick Kurshev.
7  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8  * lot of big-endian byte order fixes by Alex Beregszaszi
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 #include "libavutil/attributes.h"
31 #include "libavutil/x86/asm.h"
32 
33 #undef PREFETCH
34 #undef MOVNTQ
35 #undef EMMS
36 #undef SFENCE
37 #undef PAVGB
38 
39 #if COMPILE_TEMPLATE_AMD3DNOW
40 #define PREFETCH "prefetch"
41 #define PAVGB "pavgusb"
42 #elif COMPILE_TEMPLATE_MMXEXT
43 #define PREFETCH "prefetchnta"
44 #define PAVGB "pavgb"
45 #else
46 #define PREFETCH " # nop"
47 #endif
48 
49 #if COMPILE_TEMPLATE_AMD3DNOW
50 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
51 #define EMMS "femms"
52 #else
53 #define EMMS "emms"
54 #endif
55 
56 #if COMPILE_TEMPLATE_MMXEXT
57 #define MOVNTQ "movntq"
58 #define SFENCE "sfence"
59 #else
60 #define MOVNTQ "movq"
61 #define SFENCE " # nop"
62 #endif
63 
64 #if !COMPILE_TEMPLATE_SSE2
65 
66 #if !COMPILE_TEMPLATE_AMD3DNOW
67 
68 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
69 {
70  uint8_t *dest = dst;
71  const uint8_t *s = src;
72  const uint8_t *end;
73  const uint8_t *mm_end;
74  end = s + src_size;
75  __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
76  mm_end = end - 23;
77  __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
78  while (s < mm_end) {
79  __asm__ volatile(
80  PREFETCH" 32(%1) \n\t"
81  "movd (%1), %%mm0 \n\t"
82  "punpckldq 3(%1), %%mm0 \n\t"
83  "movd 6(%1), %%mm1 \n\t"
84  "punpckldq 9(%1), %%mm1 \n\t"
85  "movd 12(%1), %%mm2 \n\t"
86  "punpckldq 15(%1), %%mm2 \n\t"
87  "movd 18(%1), %%mm3 \n\t"
88  "punpckldq 21(%1), %%mm3 \n\t"
89  "por %%mm7, %%mm0 \n\t"
90  "por %%mm7, %%mm1 \n\t"
91  "por %%mm7, %%mm2 \n\t"
92  "por %%mm7, %%mm3 \n\t"
93  MOVNTQ" %%mm0, (%0) \n\t"
94  MOVNTQ" %%mm1, 8(%0) \n\t"
95  MOVNTQ" %%mm2, 16(%0) \n\t"
96  MOVNTQ" %%mm3, 24(%0)"
97  :: "r"(dest), "r"(s)
98  :"memory");
99  dest += 32;
100  s += 24;
101  }
102  __asm__ volatile(SFENCE:::"memory");
103  __asm__ volatile(EMMS:::"memory");
104  while (s < end) {
105  *dest++ = *s++;
106  *dest++ = *s++;
107  *dest++ = *s++;
108  *dest++ = 255;
109  }
110 }
111 
112 #define STORE_BGR24_MMX \
113  "psrlq $8, %%mm2 \n\t" \
114  "psrlq $8, %%mm3 \n\t" \
115  "psrlq $8, %%mm6 \n\t" \
116  "psrlq $8, %%mm7 \n\t" \
117  "pand "MANGLE(mask24l)", %%mm0\n\t" \
118  "pand "MANGLE(mask24l)", %%mm1\n\t" \
119  "pand "MANGLE(mask24l)", %%mm4\n\t" \
120  "pand "MANGLE(mask24l)", %%mm5\n\t" \
121  "pand "MANGLE(mask24h)", %%mm2\n\t" \
122  "pand "MANGLE(mask24h)", %%mm3\n\t" \
123  "pand "MANGLE(mask24h)", %%mm6\n\t" \
124  "pand "MANGLE(mask24h)", %%mm7\n\t" \
125  "por %%mm2, %%mm0 \n\t" \
126  "por %%mm3, %%mm1 \n\t" \
127  "por %%mm6, %%mm4 \n\t" \
128  "por %%mm7, %%mm5 \n\t" \
129  \
130  "movq %%mm1, %%mm2 \n\t" \
131  "movq %%mm4, %%mm3 \n\t" \
132  "psllq $48, %%mm2 \n\t" \
133  "psllq $32, %%mm3 \n\t" \
134  "por %%mm2, %%mm0 \n\t" \
135  "psrlq $16, %%mm1 \n\t" \
136  "psrlq $32, %%mm4 \n\t" \
137  "psllq $16, %%mm5 \n\t" \
138  "por %%mm3, %%mm1 \n\t" \
139  "por %%mm5, %%mm4 \n\t" \
140  \
141  MOVNTQ" %%mm0, (%0) \n\t" \
142  MOVNTQ" %%mm1, 8(%0) \n\t" \
143  MOVNTQ" %%mm4, 16(%0)"
144 
145 
146 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
147 {
148  uint8_t *dest = dst;
149  const uint8_t *s = src;
150  const uint8_t *end;
151  const uint8_t *mm_end;
152  end = s + src_size;
153  __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
154  mm_end = end - 31;
155  while (s < mm_end) {
156  __asm__ volatile(
157  PREFETCH" 32(%1) \n\t"
158  "movq (%1), %%mm0 \n\t"
159  "movq 8(%1), %%mm1 \n\t"
160  "movq 16(%1), %%mm4 \n\t"
161  "movq 24(%1), %%mm5 \n\t"
162  "movq %%mm0, %%mm2 \n\t"
163  "movq %%mm1, %%mm3 \n\t"
164  "movq %%mm4, %%mm6 \n\t"
165  "movq %%mm5, %%mm7 \n\t"
167  :: "r"(dest), "r"(s)
168  NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
169  :"memory");
170  dest += 24;
171  s += 32;
172  }
173  __asm__ volatile(SFENCE:::"memory");
174  __asm__ volatile(EMMS:::"memory");
175  while (s < end) {
176  *dest++ = *s++;
177  *dest++ = *s++;
178  *dest++ = *s++;
179  s++;
180  }
181 }
182 
183 /*
184  original by Strepto/Astral
185  ported to gcc & bugfixed: A'rpi
186  MMXEXT, 3DNOW optimization by Nick Kurshev
187  32-bit C version, and and&add trick by Michael Niedermayer
188 */
189 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size)
190 {
191  register const uint8_t* s=src;
192  register uint8_t* d=dst;
193  register const uint8_t *end;
194  const uint8_t *mm_end;
195  end = s + src_size;
196  __asm__ volatile(PREFETCH" %0"::"m"(*s));
197  __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
198  mm_end = end - 15;
199  while (s<mm_end) {
200  __asm__ volatile(
201  PREFETCH" 32(%1) \n\t"
202  "movq (%1), %%mm0 \n\t"
203  "movq 8(%1), %%mm2 \n\t"
204  "movq %%mm0, %%mm1 \n\t"
205  "movq %%mm2, %%mm3 \n\t"
206  "pand %%mm4, %%mm0 \n\t"
207  "pand %%mm4, %%mm2 \n\t"
208  "paddw %%mm1, %%mm0 \n\t"
209  "paddw %%mm3, %%mm2 \n\t"
210  MOVNTQ" %%mm0, (%0) \n\t"
211  MOVNTQ" %%mm2, 8(%0)"
212  :: "r"(d), "r"(s)
213  );
214  d+=16;
215  s+=16;
216  }
217  __asm__ volatile(SFENCE:::"memory");
218  __asm__ volatile(EMMS:::"memory");
219  mm_end = end - 3;
220  while (s < mm_end) {
221  register unsigned x= *((const uint32_t *)s);
222  *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
223  d+=4;
224  s+=4;
225  }
226  if (s < end) {
227  register unsigned short x= *((const uint16_t *)s);
228  *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
229  }
230 }
231 
232 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size)
233 {
234  register const uint8_t* s=src;
235  register uint8_t* d=dst;
236  register const uint8_t *end;
237  const uint8_t *mm_end;
238  end = s + src_size;
239  __asm__ volatile(PREFETCH" %0"::"m"(*s));
240  __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
241  __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
242  mm_end = end - 15;
243  while (s<mm_end) {
244  __asm__ volatile(
245  PREFETCH" 32(%1) \n\t"
246  "movq (%1), %%mm0 \n\t"
247  "movq 8(%1), %%mm2 \n\t"
248  "movq %%mm0, %%mm1 \n\t"
249  "movq %%mm2, %%mm3 \n\t"
250  "psrlq $1, %%mm0 \n\t"
251  "psrlq $1, %%mm2 \n\t"
252  "pand %%mm7, %%mm0 \n\t"
253  "pand %%mm7, %%mm2 \n\t"
254  "pand %%mm6, %%mm1 \n\t"
255  "pand %%mm6, %%mm3 \n\t"
256  "por %%mm1, %%mm0 \n\t"
257  "por %%mm3, %%mm2 \n\t"
258  MOVNTQ" %%mm0, (%0) \n\t"
259  MOVNTQ" %%mm2, 8(%0)"
260  :: "r"(d), "r"(s)
261  );
262  d+=16;
263  s+=16;
264  }
265  __asm__ volatile(SFENCE:::"memory");
266  __asm__ volatile(EMMS:::"memory");
267  mm_end = end - 3;
268  while (s < mm_end) {
269  register uint32_t x= *((const uint32_t*)s);
270  *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
271  s+=4;
272  d+=4;
273  }
274  if (s < end) {
275  register uint16_t x= *((const uint16_t*)s);
276  *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
277  }
278 }
279 
280 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size)
281 {
282  const uint8_t *s = src;
283  const uint8_t *end;
284  const uint8_t *mm_end;
285  uint16_t *d = (uint16_t *)dst;
286  end = s + src_size;
287  mm_end = end - 15;
288  __asm__ volatile(
289  "movq %3, %%mm5 \n\t"
290  "movq %4, %%mm6 \n\t"
291  "movq %5, %%mm7 \n\t"
292  "jmp 2f \n\t"
293  ".p2align 4 \n\t"
294  "1: \n\t"
295  PREFETCH" 32(%1) \n\t"
296  "movd (%1), %%mm0 \n\t"
297  "movd 4(%1), %%mm3 \n\t"
298  "punpckldq 8(%1), %%mm0 \n\t"
299  "punpckldq 12(%1), %%mm3 \n\t"
300  "movq %%mm0, %%mm1 \n\t"
301  "movq %%mm3, %%mm4 \n\t"
302  "pand %%mm6, %%mm0 \n\t"
303  "pand %%mm6, %%mm3 \n\t"
304  "pmaddwd %%mm7, %%mm0 \n\t"
305  "pmaddwd %%mm7, %%mm3 \n\t"
306  "pand %%mm5, %%mm1 \n\t"
307  "pand %%mm5, %%mm4 \n\t"
308  "por %%mm1, %%mm0 \n\t"
309  "por %%mm4, %%mm3 \n\t"
310  "psrld $5, %%mm0 \n\t"
311  "pslld $11, %%mm3 \n\t"
312  "por %%mm3, %%mm0 \n\t"
313  MOVNTQ" %%mm0, (%0) \n\t"
314  "add $16, %1 \n\t"
315  "add $8, %0 \n\t"
316  "2: \n\t"
317  "cmp %2, %1 \n\t"
318  " jb 1b \n\t"
319  : "+r" (d), "+r"(s)
320  : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
321  );
322  __asm__ volatile(SFENCE:::"memory");
323  __asm__ volatile(EMMS:::"memory");
324  while (s < end) {
325  register int rgb = *(const uint32_t*)s; s += 4;
326  *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
327  }
328 }
329 
330 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
331 {
332  const uint8_t *s = src;
333  const uint8_t *end;
334  const uint8_t *mm_end;
335  uint16_t *d = (uint16_t *)dst;
336  end = s + src_size;
337  __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
338  __asm__ volatile(
339  "movq %0, %%mm7 \n\t"
340  "movq %1, %%mm6 \n\t"
341  ::"m"(red_16mask),"m"(green_16mask));
342  mm_end = end - 15;
343  while (s < mm_end) {
344  __asm__ volatile(
345  PREFETCH" 32(%1) \n\t"
346  "movd (%1), %%mm0 \n\t"
347  "movd 4(%1), %%mm3 \n\t"
348  "punpckldq 8(%1), %%mm0 \n\t"
349  "punpckldq 12(%1), %%mm3 \n\t"
350  "movq %%mm0, %%mm1 \n\t"
351  "movq %%mm0, %%mm2 \n\t"
352  "movq %%mm3, %%mm4 \n\t"
353  "movq %%mm3, %%mm5 \n\t"
354  "psllq $8, %%mm0 \n\t"
355  "psllq $8, %%mm3 \n\t"
356  "pand %%mm7, %%mm0 \n\t"
357  "pand %%mm7, %%mm3 \n\t"
358  "psrlq $5, %%mm1 \n\t"
359  "psrlq $5, %%mm4 \n\t"
360  "pand %%mm6, %%mm1 \n\t"
361  "pand %%mm6, %%mm4 \n\t"
362  "psrlq $19, %%mm2 \n\t"
363  "psrlq $19, %%mm5 \n\t"
364  "pand %2, %%mm2 \n\t"
365  "pand %2, %%mm5 \n\t"
366  "por %%mm1, %%mm0 \n\t"
367  "por %%mm4, %%mm3 \n\t"
368  "por %%mm2, %%mm0 \n\t"
369  "por %%mm5, %%mm3 \n\t"
370  "psllq $16, %%mm3 \n\t"
371  "por %%mm3, %%mm0 \n\t"
372  MOVNTQ" %%mm0, (%0) \n\t"
373  :: "r"(d),"r"(s),"m"(blue_16mask):"memory");
374  d += 4;
375  s += 16;
376  }
377  __asm__ volatile(SFENCE:::"memory");
378  __asm__ volatile(EMMS:::"memory");
379  while (s < end) {
380  register int rgb = *(const uint32_t*)s; s += 4;
381  *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
382  }
383 }
384 
385 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size)
386 {
387  const uint8_t *s = src;
388  const uint8_t *end;
389  const uint8_t *mm_end;
390  uint16_t *d = (uint16_t *)dst;
391  end = s + src_size;
392  mm_end = end - 15;
393  __asm__ volatile(
394  "movq %3, %%mm5 \n\t"
395  "movq %4, %%mm6 \n\t"
396  "movq %5, %%mm7 \n\t"
397  "jmp 2f \n\t"
398  ".p2align 4 \n\t"
399  "1: \n\t"
400  PREFETCH" 32(%1) \n\t"
401  "movd (%1), %%mm0 \n\t"
402  "movd 4(%1), %%mm3 \n\t"
403  "punpckldq 8(%1), %%mm0 \n\t"
404  "punpckldq 12(%1), %%mm3 \n\t"
405  "movq %%mm0, %%mm1 \n\t"
406  "movq %%mm3, %%mm4 \n\t"
407  "pand %%mm6, %%mm0 \n\t"
408  "pand %%mm6, %%mm3 \n\t"
409  "pmaddwd %%mm7, %%mm0 \n\t"
410  "pmaddwd %%mm7, %%mm3 \n\t"
411  "pand %%mm5, %%mm1 \n\t"
412  "pand %%mm5, %%mm4 \n\t"
413  "por %%mm1, %%mm0 \n\t"
414  "por %%mm4, %%mm3 \n\t"
415  "psrld $6, %%mm0 \n\t"
416  "pslld $10, %%mm3 \n\t"
417  "por %%mm3, %%mm0 \n\t"
418  MOVNTQ" %%mm0, (%0) \n\t"
419  "add $16, %1 \n\t"
420  "add $8, %0 \n\t"
421  "2: \n\t"
422  "cmp %2, %1 \n\t"
423  " jb 1b \n\t"
424  : "+r" (d), "+r"(s)
425  : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
426  );
427  __asm__ volatile(SFENCE:::"memory");
428  __asm__ volatile(EMMS:::"memory");
429  while (s < end) {
430  register int rgb = *(const uint32_t*)s; s += 4;
431  *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
432  }
433 }
434 
435 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
436 {
437  const uint8_t *s = src;
438  const uint8_t *end;
439  const uint8_t *mm_end;
440  uint16_t *d = (uint16_t *)dst;
441  end = s + src_size;
442  __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
443  __asm__ volatile(
444  "movq %0, %%mm7 \n\t"
445  "movq %1, %%mm6 \n\t"
446  ::"m"(red_15mask),"m"(green_15mask));
447  mm_end = end - 15;
448  while (s < mm_end) {
449  __asm__ volatile(
450  PREFETCH" 32(%1) \n\t"
451  "movd (%1), %%mm0 \n\t"
452  "movd 4(%1), %%mm3 \n\t"
453  "punpckldq 8(%1), %%mm0 \n\t"
454  "punpckldq 12(%1), %%mm3 \n\t"
455  "movq %%mm0, %%mm1 \n\t"
456  "movq %%mm0, %%mm2 \n\t"
457  "movq %%mm3, %%mm4 \n\t"
458  "movq %%mm3, %%mm5 \n\t"
459  "psllq $7, %%mm0 \n\t"
460  "psllq $7, %%mm3 \n\t"
461  "pand %%mm7, %%mm0 \n\t"
462  "pand %%mm7, %%mm3 \n\t"
463  "psrlq $6, %%mm1 \n\t"
464  "psrlq $6, %%mm4 \n\t"
465  "pand %%mm6, %%mm1 \n\t"
466  "pand %%mm6, %%mm4 \n\t"
467  "psrlq $19, %%mm2 \n\t"
468  "psrlq $19, %%mm5 \n\t"
469  "pand %2, %%mm2 \n\t"
470  "pand %2, %%mm5 \n\t"
471  "por %%mm1, %%mm0 \n\t"
472  "por %%mm4, %%mm3 \n\t"
473  "por %%mm2, %%mm0 \n\t"
474  "por %%mm5, %%mm3 \n\t"
475  "psllq $16, %%mm3 \n\t"
476  "por %%mm3, %%mm0 \n\t"
477  MOVNTQ" %%mm0, (%0) \n\t"
478  ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
479  d += 4;
480  s += 16;
481  }
482  __asm__ volatile(SFENCE:::"memory");
483  __asm__ volatile(EMMS:::"memory");
484  while (s < end) {
485  register int rgb = *(const uint32_t*)s; s += 4;
486  *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
487  }
488 }
489 
490 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size)
491 {
492  const uint8_t *s = src;
493  const uint8_t *end;
494  const uint8_t *mm_end;
495  uint16_t *d = (uint16_t *)dst;
496  end = s + src_size;
497  __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
498  __asm__ volatile(
499  "movq %0, %%mm7 \n\t"
500  "movq %1, %%mm6 \n\t"
501  ::"m"(red_16mask),"m"(green_16mask));
502  mm_end = end - 11;
503  while (s < mm_end) {
504  __asm__ volatile(
505  PREFETCH" 32(%1) \n\t"
506  "movd (%1), %%mm0 \n\t"
507  "movd 3(%1), %%mm3 \n\t"
508  "punpckldq 6(%1), %%mm0 \n\t"
509  "punpckldq 9(%1), %%mm3 \n\t"
510  "movq %%mm0, %%mm1 \n\t"
511  "movq %%mm0, %%mm2 \n\t"
512  "movq %%mm3, %%mm4 \n\t"
513  "movq %%mm3, %%mm5 \n\t"
514  "psrlq $3, %%mm0 \n\t"
515  "psrlq $3, %%mm3 \n\t"
516  "pand %2, %%mm0 \n\t"
517  "pand %2, %%mm3 \n\t"
518  "psrlq $5, %%mm1 \n\t"
519  "psrlq $5, %%mm4 \n\t"
520  "pand %%mm6, %%mm1 \n\t"
521  "pand %%mm6, %%mm4 \n\t"
522  "psrlq $8, %%mm2 \n\t"
523  "psrlq $8, %%mm5 \n\t"
524  "pand %%mm7, %%mm2 \n\t"
525  "pand %%mm7, %%mm5 \n\t"
526  "por %%mm1, %%mm0 \n\t"
527  "por %%mm4, %%mm3 \n\t"
528  "por %%mm2, %%mm0 \n\t"
529  "por %%mm5, %%mm3 \n\t"
530  "psllq $16, %%mm3 \n\t"
531  "por %%mm3, %%mm0 \n\t"
532  MOVNTQ" %%mm0, (%0) \n\t"
533  ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
534  d += 4;
535  s += 12;
536  }
537  __asm__ volatile(SFENCE:::"memory");
538  __asm__ volatile(EMMS:::"memory");
539  while (s < end) {
540  const int b = *s++;
541  const int g = *s++;
542  const int r = *s++;
543  *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
544  }
545 }
546 
547 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size)
548 {
549  const uint8_t *s = src;
550  const uint8_t *end;
551  const uint8_t *mm_end;
552  uint16_t *d = (uint16_t *)dst;
553  end = s + src_size;
554  __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
555  __asm__ volatile(
556  "movq %0, %%mm7 \n\t"
557  "movq %1, %%mm6 \n\t"
558  ::"m"(red_16mask),"m"(green_16mask));
559  mm_end = end - 15;
560  while (s < mm_end) {
561  __asm__ volatile(
562  PREFETCH" 32(%1) \n\t"
563  "movd (%1), %%mm0 \n\t"
564  "movd 3(%1), %%mm3 \n\t"
565  "punpckldq 6(%1), %%mm0 \n\t"
566  "punpckldq 9(%1), %%mm3 \n\t"
567  "movq %%mm0, %%mm1 \n\t"
568  "movq %%mm0, %%mm2 \n\t"
569  "movq %%mm3, %%mm4 \n\t"
570  "movq %%mm3, %%mm5 \n\t"
571  "psllq $8, %%mm0 \n\t"
572  "psllq $8, %%mm3 \n\t"
573  "pand %%mm7, %%mm0 \n\t"
574  "pand %%mm7, %%mm3 \n\t"
575  "psrlq $5, %%mm1 \n\t"
576  "psrlq $5, %%mm4 \n\t"
577  "pand %%mm6, %%mm1 \n\t"
578  "pand %%mm6, %%mm4 \n\t"
579  "psrlq $19, %%mm2 \n\t"
580  "psrlq $19, %%mm5 \n\t"
581  "pand %2, %%mm2 \n\t"
582  "pand %2, %%mm5 \n\t"
583  "por %%mm1, %%mm0 \n\t"
584  "por %%mm4, %%mm3 \n\t"
585  "por %%mm2, %%mm0 \n\t"
586  "por %%mm5, %%mm3 \n\t"
587  "psllq $16, %%mm3 \n\t"
588  "por %%mm3, %%mm0 \n\t"
589  MOVNTQ" %%mm0, (%0) \n\t"
590  ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
591  d += 4;
592  s += 12;
593  }
594  __asm__ volatile(SFENCE:::"memory");
595  __asm__ volatile(EMMS:::"memory");
596  while (s < end) {
597  const int r = *s++;
598  const int g = *s++;
599  const int b = *s++;
600  *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
601  }
602 }
603 
604 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size)
605 {
606  const uint8_t *s = src;
607  const uint8_t *end;
608  const uint8_t *mm_end;
609  uint16_t *d = (uint16_t *)dst;
610  end = s + src_size;
611  __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
612  __asm__ volatile(
613  "movq %0, %%mm7 \n\t"
614  "movq %1, %%mm6 \n\t"
615  ::"m"(red_15mask),"m"(green_15mask));
616  mm_end = end - 11;
617  while (s < mm_end) {
618  __asm__ volatile(
619  PREFETCH" 32(%1) \n\t"
620  "movd (%1), %%mm0 \n\t"
621  "movd 3(%1), %%mm3 \n\t"
622  "punpckldq 6(%1), %%mm0 \n\t"
623  "punpckldq 9(%1), %%mm3 \n\t"
624  "movq %%mm0, %%mm1 \n\t"
625  "movq %%mm0, %%mm2 \n\t"
626  "movq %%mm3, %%mm4 \n\t"
627  "movq %%mm3, %%mm5 \n\t"
628  "psrlq $3, %%mm0 \n\t"
629  "psrlq $3, %%mm3 \n\t"
630  "pand %2, %%mm0 \n\t"
631  "pand %2, %%mm3 \n\t"
632  "psrlq $6, %%mm1 \n\t"
633  "psrlq $6, %%mm4 \n\t"
634  "pand %%mm6, %%mm1 \n\t"
635  "pand %%mm6, %%mm4 \n\t"
636  "psrlq $9, %%mm2 \n\t"
637  "psrlq $9, %%mm5 \n\t"
638  "pand %%mm7, %%mm2 \n\t"
639  "pand %%mm7, %%mm5 \n\t"
640  "por %%mm1, %%mm0 \n\t"
641  "por %%mm4, %%mm3 \n\t"
642  "por %%mm2, %%mm0 \n\t"
643  "por %%mm5, %%mm3 \n\t"
644  "psllq $16, %%mm3 \n\t"
645  "por %%mm3, %%mm0 \n\t"
646  MOVNTQ" %%mm0, (%0) \n\t"
647  ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
648  d += 4;
649  s += 12;
650  }
651  __asm__ volatile(SFENCE:::"memory");
652  __asm__ volatile(EMMS:::"memory");
653  while (s < end) {
654  const int b = *s++;
655  const int g = *s++;
656  const int r = *s++;
657  *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
658  }
659 }
660 
661 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size)
662 {
663  const uint8_t *s = src;
664  const uint8_t *end;
665  const uint8_t *mm_end;
666  uint16_t *d = (uint16_t *)dst;
667  end = s + src_size;
668  __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
669  __asm__ volatile(
670  "movq %0, %%mm7 \n\t"
671  "movq %1, %%mm6 \n\t"
672  ::"m"(red_15mask),"m"(green_15mask));
673  mm_end = end - 15;
674  while (s < mm_end) {
675  __asm__ volatile(
676  PREFETCH" 32(%1) \n\t"
677  "movd (%1), %%mm0 \n\t"
678  "movd 3(%1), %%mm3 \n\t"
679  "punpckldq 6(%1), %%mm0 \n\t"
680  "punpckldq 9(%1), %%mm3 \n\t"
681  "movq %%mm0, %%mm1 \n\t"
682  "movq %%mm0, %%mm2 \n\t"
683  "movq %%mm3, %%mm4 \n\t"
684  "movq %%mm3, %%mm5 \n\t"
685  "psllq $7, %%mm0 \n\t"
686  "psllq $7, %%mm3 \n\t"
687  "pand %%mm7, %%mm0 \n\t"
688  "pand %%mm7, %%mm3 \n\t"
689  "psrlq $6, %%mm1 \n\t"
690  "psrlq $6, %%mm4 \n\t"
691  "pand %%mm6, %%mm1 \n\t"
692  "pand %%mm6, %%mm4 \n\t"
693  "psrlq $19, %%mm2 \n\t"
694  "psrlq $19, %%mm5 \n\t"
695  "pand %2, %%mm2 \n\t"
696  "pand %2, %%mm5 \n\t"
697  "por %%mm1, %%mm0 \n\t"
698  "por %%mm4, %%mm3 \n\t"
699  "por %%mm2, %%mm0 \n\t"
700  "por %%mm5, %%mm3 \n\t"
701  "psllq $16, %%mm3 \n\t"
702  "por %%mm3, %%mm0 \n\t"
703  MOVNTQ" %%mm0, (%0) \n\t"
704  ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
705  d += 4;
706  s += 12;
707  }
708  __asm__ volatile(SFENCE:::"memory");
709  __asm__ volatile(EMMS:::"memory");
710  while (s < end) {
711  const int r = *s++;
712  const int g = *s++;
713  const int b = *s++;
714  *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
715  }
716 }
717 
718 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
719 {
720  const uint16_t *end;
721  const uint16_t *mm_end;
722  uint8_t *d = dst;
723  const uint16_t *s = (const uint16_t*)src;
724  end = s + src_size/2;
725  __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
726  mm_end = end - 7;
727  while (s < mm_end) {
728  __asm__ volatile(
729  PREFETCH" 32(%1) \n\t"
730  "movq (%1), %%mm0 \n\t"
731  "movq (%1), %%mm1 \n\t"
732  "movq (%1), %%mm2 \n\t"
733  "pand %2, %%mm0 \n\t"
734  "pand %3, %%mm1 \n\t"
735  "pand %4, %%mm2 \n\t"
736  "psllq $5, %%mm0 \n\t"
737  "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
738  "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
739  "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
740  "movq %%mm0, %%mm3 \n\t"
741  "movq %%mm1, %%mm4 \n\t"
742  "movq %%mm2, %%mm5 \n\t"
743  "punpcklwd %5, %%mm0 \n\t"
744  "punpcklwd %5, %%mm1 \n\t"
745  "punpcklwd %5, %%mm2 \n\t"
746  "punpckhwd %5, %%mm3 \n\t"
747  "punpckhwd %5, %%mm4 \n\t"
748  "punpckhwd %5, %%mm5 \n\t"
749  "psllq $8, %%mm1 \n\t"
750  "psllq $16, %%mm2 \n\t"
751  "por %%mm1, %%mm0 \n\t"
752  "por %%mm2, %%mm0 \n\t"
753  "psllq $8, %%mm4 \n\t"
754  "psllq $16, %%mm5 \n\t"
755  "por %%mm4, %%mm3 \n\t"
756  "por %%mm5, %%mm3 \n\t"
757 
758  "movq %%mm0, %%mm6 \n\t"
759  "movq %%mm3, %%mm7 \n\t"
760 
761  "movq 8(%1), %%mm0 \n\t"
762  "movq 8(%1), %%mm1 \n\t"
763  "movq 8(%1), %%mm2 \n\t"
764  "pand %2, %%mm0 \n\t"
765  "pand %3, %%mm1 \n\t"
766  "pand %4, %%mm2 \n\t"
767  "psllq $5, %%mm0 \n\t"
768  "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
769  "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
770  "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
771  "movq %%mm0, %%mm3 \n\t"
772  "movq %%mm1, %%mm4 \n\t"
773  "movq %%mm2, %%mm5 \n\t"
774  "punpcklwd %5, %%mm0 \n\t"
775  "punpcklwd %5, %%mm1 \n\t"
776  "punpcklwd %5, %%mm2 \n\t"
777  "punpckhwd %5, %%mm3 \n\t"
778  "punpckhwd %5, %%mm4 \n\t"
779  "punpckhwd %5, %%mm5 \n\t"
780  "psllq $8, %%mm1 \n\t"
781  "psllq $16, %%mm2 \n\t"
782  "por %%mm1, %%mm0 \n\t"
783  "por %%mm2, %%mm0 \n\t"
784  "psllq $8, %%mm4 \n\t"
785  "psllq $16, %%mm5 \n\t"
786  "por %%mm4, %%mm3 \n\t"
787  "por %%mm5, %%mm3 \n\t"
788 
789  :"=m"(*d)
790  :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
791  NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi)
792  :"memory");
793  /* borrowed 32 to 24 */
794  __asm__ volatile(
795  "movq %%mm0, %%mm4 \n\t"
796  "movq %%mm3, %%mm5 \n\t"
797  "movq %%mm6, %%mm0 \n\t"
798  "movq %%mm7, %%mm1 \n\t"
799 
800  "movq %%mm4, %%mm6 \n\t"
801  "movq %%mm5, %%mm7 \n\t"
802  "movq %%mm0, %%mm2 \n\t"
803  "movq %%mm1, %%mm3 \n\t"
804 
806 
807  :: "r"(d), "m"(*s)
808  NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
809  :"memory");
810  d += 24;
811  s += 8;
812  }
813  __asm__ volatile(SFENCE:::"memory");
814  __asm__ volatile(EMMS:::"memory");
815  while (s < end) {
816  register uint16_t bgr;
817  bgr = *s++;
818  *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
819  *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
820  *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
821  }
822 }
823 
824 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
825 {
826  const uint16_t *end;
827  const uint16_t *mm_end;
828  uint8_t *d = (uint8_t *)dst;
829  const uint16_t *s = (const uint16_t *)src;
830  end = s + src_size/2;
831  __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
832  mm_end = end - 7;
833  while (s < mm_end) {
834  __asm__ volatile(
835  PREFETCH" 32(%1) \n\t"
836  "movq (%1), %%mm0 \n\t"
837  "movq (%1), %%mm1 \n\t"
838  "movq (%1), %%mm2 \n\t"
839  "pand %2, %%mm0 \n\t"
840  "pand %3, %%mm1 \n\t"
841  "pand %4, %%mm2 \n\t"
842  "psllq $5, %%mm0 \n\t"
843  "psrlq $1, %%mm2 \n\t"
844  "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
845  "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
846  "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
847  "movq %%mm0, %%mm3 \n\t"
848  "movq %%mm1, %%mm4 \n\t"
849  "movq %%mm2, %%mm5 \n\t"
850  "punpcklwd %5, %%mm0 \n\t"
851  "punpcklwd %5, %%mm1 \n\t"
852  "punpcklwd %5, %%mm2 \n\t"
853  "punpckhwd %5, %%mm3 \n\t"
854  "punpckhwd %5, %%mm4 \n\t"
855  "punpckhwd %5, %%mm5 \n\t"
856  "psllq $8, %%mm1 \n\t"
857  "psllq $16, %%mm2 \n\t"
858  "por %%mm1, %%mm0 \n\t"
859  "por %%mm2, %%mm0 \n\t"
860  "psllq $8, %%mm4 \n\t"
861  "psllq $16, %%mm5 \n\t"
862  "por %%mm4, %%mm3 \n\t"
863  "por %%mm5, %%mm3 \n\t"
864 
865  "movq %%mm0, %%mm6 \n\t"
866  "movq %%mm3, %%mm7 \n\t"
867 
868  "movq 8(%1), %%mm0 \n\t"
869  "movq 8(%1), %%mm1 \n\t"
870  "movq 8(%1), %%mm2 \n\t"
871  "pand %2, %%mm0 \n\t"
872  "pand %3, %%mm1 \n\t"
873  "pand %4, %%mm2 \n\t"
874  "psllq $5, %%mm0 \n\t"
875  "psrlq $1, %%mm2 \n\t"
876  "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
877  "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
878  "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
879  "movq %%mm0, %%mm3 \n\t"
880  "movq %%mm1, %%mm4 \n\t"
881  "movq %%mm2, %%mm5 \n\t"
882  "punpcklwd %5, %%mm0 \n\t"
883  "punpcklwd %5, %%mm1 \n\t"
884  "punpcklwd %5, %%mm2 \n\t"
885  "punpckhwd %5, %%mm3 \n\t"
886  "punpckhwd %5, %%mm4 \n\t"
887  "punpckhwd %5, %%mm5 \n\t"
888  "psllq $8, %%mm1 \n\t"
889  "psllq $16, %%mm2 \n\t"
890  "por %%mm1, %%mm0 \n\t"
891  "por %%mm2, %%mm0 \n\t"
892  "psllq $8, %%mm4 \n\t"
893  "psllq $16, %%mm5 \n\t"
894  "por %%mm4, %%mm3 \n\t"
895  "por %%mm5, %%mm3 \n\t"
896  :"=m"(*d)
897  :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
898  NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi)
899  :"memory");
900  /* borrowed 32 to 24 */
901  __asm__ volatile(
902  "movq %%mm0, %%mm4 \n\t"
903  "movq %%mm3, %%mm5 \n\t"
904  "movq %%mm6, %%mm0 \n\t"
905  "movq %%mm7, %%mm1 \n\t"
906 
907  "movq %%mm4, %%mm6 \n\t"
908  "movq %%mm5, %%mm7 \n\t"
909  "movq %%mm0, %%mm2 \n\t"
910  "movq %%mm1, %%mm3 \n\t"
911 
913 
914  :: "r"(d), "m"(*s)
915  NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
916  :"memory");
917  d += 24;
918  s += 8;
919  }
920  __asm__ volatile(SFENCE:::"memory");
921  __asm__ volatile(EMMS:::"memory");
922  while (s < end) {
923  register uint16_t bgr;
924  bgr = *s++;
925  *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
926  *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
927  *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
928  }
929 }
930 
931 /*
932  * mm0 = 00 B3 00 B2 00 B1 00 B0
933  * mm1 = 00 G3 00 G2 00 G1 00 G0
934  * mm2 = 00 R3 00 R2 00 R1 00 R0
935  * mm6 = FF FF FF FF FF FF FF FF
936  * mm7 = 00 00 00 00 00 00 00 00
937  */
938 #define PACK_RGB32 \
939  "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
940  "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
941  "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
942  "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
943  "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
944  "movq %%mm0, %%mm3 \n\t" \
945  "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
946  "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
947  MOVNTQ" %%mm0, (%0) \n\t" \
948  MOVNTQ" %%mm3, 8(%0) \n\t" \
949 
950 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
951 {
952  const uint16_t *end;
953  const uint16_t *mm_end;
954  uint8_t *d = dst;
955  const uint16_t *s = (const uint16_t *)src;
956  end = s + src_size/2;
957  __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
958  __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
959  __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
960  mm_end = end - 3;
961  while (s < mm_end) {
962  __asm__ volatile(
963  PREFETCH" 32(%1) \n\t"
964  "movq (%1), %%mm0 \n\t"
965  "movq (%1), %%mm1 \n\t"
966  "movq (%1), %%mm2 \n\t"
967  "pand %2, %%mm0 \n\t"
968  "pand %3, %%mm1 \n\t"
969  "pand %4, %%mm2 \n\t"
970  "psllq $5, %%mm0 \n\t"
971  "pmulhw %5, %%mm0 \n\t"
972  "pmulhw %5, %%mm1 \n\t"
973  "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
974  PACK_RGB32
975  ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
976  NAMED_CONSTRAINTS_ADD(mul15_hi)
977  :"memory");
978  d += 16;
979  s += 4;
980  }
981  __asm__ volatile(SFENCE:::"memory");
982  __asm__ volatile(EMMS:::"memory");
983  while (s < end) {
984  register uint16_t bgr;
985  bgr = *s++;
986  *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
987  *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
988  *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
989  *d++ = 255;
990  }
991 }
992 
993 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size)
994 {
995  const uint16_t *end;
996  const uint16_t *mm_end;
997  uint8_t *d = dst;
998  const uint16_t *s = (const uint16_t*)src;
999  end = s + src_size/2;
1000  __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
1001  __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1002  __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1003  mm_end = end - 3;
1004  while (s < mm_end) {
1005  __asm__ volatile(
1006  PREFETCH" 32(%1) \n\t"
1007  "movq (%1), %%mm0 \n\t"
1008  "movq (%1), %%mm1 \n\t"
1009  "movq (%1), %%mm2 \n\t"
1010  "pand %2, %%mm0 \n\t"
1011  "pand %3, %%mm1 \n\t"
1012  "pand %4, %%mm2 \n\t"
1013  "psllq $5, %%mm0 \n\t"
1014  "psrlq $1, %%mm2 \n\t"
1015  "pmulhw %5, %%mm0 \n\t"
1016  "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
1017  "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
1018  PACK_RGB32
1019  ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
1020  NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi)
1021  :"memory");
1022  d += 16;
1023  s += 4;
1024  }
1025  __asm__ volatile(SFENCE:::"memory");
1026  __asm__ volatile(EMMS:::"memory");
1027  while (s < end) {
1028  register uint16_t bgr;
1029  bgr = *s++;
1030  *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
1031  *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
1032  *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
1033  *d++ = 255;
1034  }
1035 }
1036 
1037 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
1038 {
1039  unsigned i;
1040  x86_reg mmx_size= 23 - src_size;
1041  __asm__ volatile (
1042  "test %%"FF_REG_a", %%"FF_REG_a" \n\t"
1043  "jns 2f \n\t"
1044  "movq "MANGLE(mask24r)", %%mm5 \n\t"
1045  "movq "MANGLE(mask24g)", %%mm6 \n\t"
1046  "movq "MANGLE(mask24b)", %%mm7 \n\t"
1047  ".p2align 4 \n\t"
1048  "1: \n\t"
1049  PREFETCH" 32(%1, %%"FF_REG_a") \n\t"
1050  "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
1051  "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG
1052  "movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B
1053  "psllq $16, %%mm0 \n\t" // 00 BGR BGR
1054  "pand %%mm5, %%mm0 \n\t"
1055  "pand %%mm6, %%mm1 \n\t"
1056  "pand %%mm7, %%mm2 \n\t"
1057  "por %%mm0, %%mm1 \n\t"
1058  "por %%mm2, %%mm1 \n\t"
1059  "movq 6(%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
1060  MOVNTQ" %%mm1,(%2, %%"FF_REG_a") \n\t" // RGB RGB RG
1061  "movq 8(%1, %%"FF_REG_a"), %%mm1 \n\t" // R BGR BGR B
1062  "movq 10(%1, %%"FF_REG_a"), %%mm2 \n\t" // GR BGR BGR
1063  "pand %%mm7, %%mm0 \n\t"
1064  "pand %%mm5, %%mm1 \n\t"
1065  "pand %%mm6, %%mm2 \n\t"
1066  "por %%mm0, %%mm1 \n\t"
1067  "por %%mm2, %%mm1 \n\t"
1068  "movq 14(%1, %%"FF_REG_a"), %%mm0 \n\t" // R BGR BGR B
1069  MOVNTQ" %%mm1, 8(%2, %%"FF_REG_a")\n\t" // B RGB RGB R
1070  "movq 16(%1, %%"FF_REG_a"), %%mm1 \n\t" // GR BGR BGR
1071  "movq 18(%1, %%"FF_REG_a"), %%mm2 \n\t" // BGR BGR BG
1072  "pand %%mm6, %%mm0 \n\t"
1073  "pand %%mm7, %%mm1 \n\t"
1074  "pand %%mm5, %%mm2 \n\t"
1075  "por %%mm0, %%mm1 \n\t"
1076  "por %%mm2, %%mm1 \n\t"
1077  MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a") \n\t"
1078  "add $24, %%"FF_REG_a" \n\t"
1079  " js 1b \n\t"
1080  "2: \n\t"
1081  : "+a" (mmx_size)
1082  : "r" (src-mmx_size), "r"(dst-mmx_size)
1083  NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b)
1084  );
1085 
1086  __asm__ volatile(SFENCE:::"memory");
1087  __asm__ volatile(EMMS:::"memory");
1088 
1089  if (mmx_size==23) return; //finished, was multiple of 8
1090 
1091  src+= src_size;
1092  dst+= src_size;
1093  src_size= 23-mmx_size;
1094  src-= src_size;
1095  dst-= src_size;
1096  for (i=0; i<src_size; i+=3) {
1097  register uint8_t x;
1098  x = src[i + 2];
1099  dst[i + 1] = src[i + 1];
1100  dst[i + 2] = src[i + 0];
1101  dst[i + 0] = x;
1102  }
1103 }
1104 
1105 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1106  int width, int height,
1107  int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
1108 {
1109  int y;
1110  const x86_reg chromWidth= width>>1;
1111  for (y=0; y<height; y++) {
1112  //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1113  __asm__ volatile(
1114  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
1115  ".p2align 4 \n\t"
1116  "1: \n\t"
1117  PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
1118  PREFETCH" 32(%2, %%"FF_REG_a") \n\t"
1119  PREFETCH" 32(%3, %%"FF_REG_a") \n\t"
1120  "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
1121  "movq %%mm0, %%mm2 \n\t" // U(0)
1122  "movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0)
1123  "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1124  "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1125 
1126  "movq (%1, %%"FF_REG_a",2), %%mm3 \n\t" // Y(0)
1127  "movq 8(%1, %%"FF_REG_a",2), %%mm5 \n\t" // Y(8)
1128  "movq %%mm3, %%mm4 \n\t" // Y(0)
1129  "movq %%mm5, %%mm6 \n\t" // Y(8)
1130  "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
1131  "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
1132  "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
1133  "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
1134 
1135  MOVNTQ" %%mm3, (%0, %%"FF_REG_a", 4) \n\t"
1136  MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
1137  MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a", 4) \n\t"
1138  MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
1139 
1140  "add $8, %%"FF_REG_a" \n\t"
1141  "cmp %4, %%"FF_REG_a" \n\t"
1142  " jb 1b \n\t"
1143  ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1144  : "%"FF_REG_a
1145  );
1146  if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1147  usrc += chromStride;
1148  vsrc += chromStride;
1149  }
1150  ysrc += lumStride;
1151  dst += dstStride;
1152  }
1153  __asm__(EMMS" \n\t"
1154  SFENCE" \n\t"
1155  :::"memory");
1156 }
1157 
1158 /**
1159  * Height should be a multiple of 2 and width should be a multiple of 16.
1160  * (If this is a problem for anyone then tell me, and I will fix it.)
1161  */
1162 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1163  int width, int height,
1164  int lumStride, int chromStride, int dstStride)
1165 {
1166  //FIXME interpolate chroma
1167  RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
1168 }
1169 
1170 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1171  int width, int height,
1172  int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
1173 {
1174  int y;
1175  const x86_reg chromWidth= width>>1;
1176  for (y=0; y<height; y++) {
1177  //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1178  __asm__ volatile(
1179  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
1180  ".p2align 4 \n\t"
1181  "1: \n\t"
1182  PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
1183  PREFETCH" 32(%2, %%"FF_REG_a") \n\t"
1184  PREFETCH" 32(%3, %%"FF_REG_a") \n\t"
1185  "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
1186  "movq %%mm0, %%mm2 \n\t" // U(0)
1187  "movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0)
1188  "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1189  "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1190 
1191  "movq (%1, %%"FF_REG_a",2), %%mm3 \n\t" // Y(0)
1192  "movq 8(%1, %%"FF_REG_a",2), %%mm5 \n\t" // Y(8)
1193  "movq %%mm0, %%mm4 \n\t" // Y(0)
1194  "movq %%mm2, %%mm6 \n\t" // Y(8)
1195  "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
1196  "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
1197  "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
1198  "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
1199 
1200  MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 4) \n\t"
1201  MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
1202  MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 4) \n\t"
1203  MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
1204 
1205  "add $8, %%"FF_REG_a" \n\t"
1206  "cmp %4, %%"FF_REG_a" \n\t"
1207  " jb 1b \n\t"
1208  ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1209  : "%"FF_REG_a
1210  );
1211  if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1212  usrc += chromStride;
1213  vsrc += chromStride;
1214  }
1215  ysrc += lumStride;
1216  dst += dstStride;
1217  }
1218  __asm__(EMMS" \n\t"
1219  SFENCE" \n\t"
1220  :::"memory");
1221 }
1222 
1223 /**
1224  * Height should be a multiple of 2 and width should be a multiple of 16
1225  * (If this is a problem for anyone then tell me, and I will fix it.)
1226  */
1227 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1228  int width, int height,
1229  int lumStride, int chromStride, int dstStride)
1230 {
1231  //FIXME interpolate chroma
1232  RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
1233 }
1234 
1235 /**
1236  * Width should be a multiple of 16.
1237  */
1238 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1239  int width, int height,
1240  int lumStride, int chromStride, int dstStride)
1241 {
1242  RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
1243 }
1244 
1245 /**
1246  * Width should be a multiple of 16.
1247  */
1248 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1249  int width, int height,
1250  int lumStride, int chromStride, int dstStride)
1251 {
1252  RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
1253 }
1254 
1255 /**
1256  * Height should be a multiple of 2 and width should be a multiple of 16.
1257  * (If this is a problem for anyone then tell me, and I will fix it.)
1258  */
1259 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1260  int width, int height,
1261  int lumStride, int chromStride, int srcStride)
1262 {
1263  int y;
1264  const x86_reg chromWidth= width>>1;
1265  for (y=0; y<height; y+=2) {
1266  __asm__ volatile(
1267  "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
1268  "pcmpeqw %%mm7, %%mm7 \n\t"
1269  "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1270  ".p2align 4 \n\t"
1271  "1: \n\t"
1272  PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
1273  "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
1274  "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
1275  "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
1276  "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
1277  "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
1278  "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
1279  "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1280  "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1281  "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1282  "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1283 
1284  MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
1285 
1286  "movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8)
1287  "movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12)
1288  "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
1289  "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
1290  "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
1291  "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
1292  "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1293  "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1294  "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1295  "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1296 
1297  MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
1298 
1299  "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1300  "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1301  "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1302  "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1303  "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1304  "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1305  "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1306  "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1307 
1308  MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
1309  MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
1310 
1311  "add $8, %%"FF_REG_a" \n\t"
1312  "cmp %4, %%"FF_REG_a" \n\t"
1313  " jb 1b \n\t"
1314  ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1315  : "memory", "%"FF_REG_a
1316  );
1317 
1318  ydst += lumStride;
1319  src += srcStride;
1320 
1321  __asm__ volatile(
1322  "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
1323  ".p2align 4 \n\t"
1324  "1: \n\t"
1325  PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
1326  "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
1327  "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
1328  "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
1329  "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
1330  "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
1331  "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
1332  "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
1333  "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
1334  "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
1335  "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
1336 
1337  MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
1338  MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
1339 
1340  "add $8, %%"FF_REG_a"\n\t"
1341  "cmp %4, %%"FF_REG_a"\n\t"
1342  " jb 1b \n\t"
1343 
1344  ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1345  : "memory", "%"FF_REG_a
1346  );
1347  udst += chromStride;
1348  vdst += chromStride;
1349  ydst += lumStride;
1350  src += srcStride;
1351  }
1352  __asm__ volatile(EMMS" \n\t"
1353  SFENCE" \n\t"
1354  :::"memory");
1355 }
1356 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
1357 
1358 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
1359 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
1360 {
1361  int x,y;
1362 
1363  dst[0]= src[0];
1364 
1365  // first line
1366  for (x=0; x<srcWidth-1; x++) {
1367  dst[2*x+1]= (3*src[x] + src[x+1])>>2;
1368  dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
1369  }
1370  dst[2*srcWidth-1]= src[srcWidth-1];
1371 
1372  dst+= dstStride;
1373 
1374  for (y=1; y<srcHeight; y++) {
1375  x86_reg mmxSize= srcWidth&~15;
1376 
1377  if (mmxSize) {
1378  __asm__ volatile(
1379  "mov %4, %%"FF_REG_a" \n\t"
1380  "movq "MANGLE(mmx_ff)", %%mm0 \n\t"
1381  "movq (%0, %%"FF_REG_a"), %%mm4 \n\t"
1382  "movq %%mm4, %%mm2 \n\t"
1383  "psllq $8, %%mm4 \n\t"
1384  "pand %%mm0, %%mm2 \n\t"
1385  "por %%mm2, %%mm4 \n\t"
1386  "movq (%1, %%"FF_REG_a"), %%mm5 \n\t"
1387  "movq %%mm5, %%mm3 \n\t"
1388  "psllq $8, %%mm5 \n\t"
1389  "pand %%mm0, %%mm3 \n\t"
1390  "por %%mm3, %%mm5 \n\t"
1391  "1: \n\t"
1392  "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
1393  "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
1394  "movq 1(%0, %%"FF_REG_a"), %%mm2 \n\t"
1395  "movq 1(%1, %%"FF_REG_a"), %%mm3 \n\t"
1396  PAVGB" %%mm0, %%mm5 \n\t"
1397  PAVGB" %%mm0, %%mm3 \n\t"
1398  PAVGB" %%mm0, %%mm5 \n\t"
1399  PAVGB" %%mm0, %%mm3 \n\t"
1400  PAVGB" %%mm1, %%mm4 \n\t"
1401  PAVGB" %%mm1, %%mm2 \n\t"
1402  PAVGB" %%mm1, %%mm4 \n\t"
1403  PAVGB" %%mm1, %%mm2 \n\t"
1404  "movq %%mm5, %%mm7 \n\t"
1405  "movq %%mm4, %%mm6 \n\t"
1406  "punpcklbw %%mm3, %%mm5 \n\t"
1407  "punpckhbw %%mm3, %%mm7 \n\t"
1408  "punpcklbw %%mm2, %%mm4 \n\t"
1409  "punpckhbw %%mm2, %%mm6 \n\t"
1410  MOVNTQ" %%mm5, (%2, %%"FF_REG_a", 2) \n\t"
1411  MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a", 2) \n\t"
1412  MOVNTQ" %%mm4, (%3, %%"FF_REG_a", 2) \n\t"
1413  MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a", 2) \n\t"
1414  "add $8, %%"FF_REG_a" \n\t"
1415  "movq -1(%0, %%"FF_REG_a"), %%mm4 \n\t"
1416  "movq -1(%1, %%"FF_REG_a"), %%mm5 \n\t"
1417  " js 1b \n\t"
1418  :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
1419  "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
1420  "g" (-mmxSize)
1421  NAMED_CONSTRAINTS_ADD(mmx_ff)
1422  : "%"FF_REG_a
1423  );
1424  } else {
1425  mmxSize = 1;
1426  dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
1427  dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
1428  }
1429 
1430  for (x=mmxSize-1; x<srcWidth-1; x++) {
1431  dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
1432  dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
1433  dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
1434  dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
1435  }
1436  dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
1437  dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
1438 
1439  dst+=dstStride*2;
1440  src+=srcStride;
1441  }
1442 
1443  // last line
1444  dst[0]= src[0];
1445 
1446  for (x=0; x<srcWidth-1; x++) {
1447  dst[2*x+1]= (3*src[x] + src[x+1])>>2;
1448  dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
1449  }
1450  dst[2*srcWidth-1]= src[srcWidth-1];
1451 
1452  __asm__ volatile(EMMS" \n\t"
1453  SFENCE" \n\t"
1454  :::"memory");
1455 }
1456 #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
1457 
1458 #if !COMPILE_TEMPLATE_AMD3DNOW
1459 /**
1460  * Height should be a multiple of 2 and width should be a multiple of 16.
1461  * (If this is a problem for anyone then tell me, and I will fix it.)
1462  * Chrominance data is only taken from every second line, others are ignored.
1463  * FIXME: Write HQ version.
1464  */
1465 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1466  int width, int height,
1467  int lumStride, int chromStride, int srcStride)
1468 {
1469  int y;
1470  const x86_reg chromWidth= width>>1;
1471  for (y=0; y<height; y+=2) {
1472  __asm__ volatile(
1473  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
1474  "pcmpeqw %%mm7, %%mm7 \n\t"
1475  "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1476  ".p2align 4 \n\t"
1477  "1: \n\t"
1478  PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
1479  "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
1480  "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4)
1481  "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
1482  "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
1483  "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
1484  "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4)
1485  "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1486  "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1487  "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1488  "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1489 
1490  MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
1491 
1492  "movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8)
1493  "movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12)
1494  "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
1495  "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
1496  "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
1497  "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12)
1498  "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1499  "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1500  "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1501  "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1502 
1503  MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
1504 
1505  "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1506  "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1507  "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1508  "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1509  "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1510  "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1511  "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1512  "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1513 
1514  MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
1515  MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
1516 
1517  "add $8, %%"FF_REG_a" \n\t"
1518  "cmp %4, %%"FF_REG_a" \n\t"
1519  " jb 1b \n\t"
1520  ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1521  : "memory", "%"FF_REG_a
1522  );
1523 
1524  ydst += lumStride;
1525  src += srcStride;
1526 
1527  __asm__ volatile(
1528  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
1529  ".p2align 4 \n\t"
1530  "1: \n\t"
1531  PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
1532  "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
1533  "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
1534  "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
1535  "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
1536  "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
1537  "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
1538  "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
1539  "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
1540  "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
1541  "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
1542 
1543  MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
1544  MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
1545 
1546  "add $8, %%"FF_REG_a" \n\t"
1547  "cmp %4, %%"FF_REG_a" \n\t"
1548  " jb 1b \n\t"
1549 
1550  ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1551  : "memory", "%"FF_REG_a
1552  );
1553  udst += chromStride;
1554  vdst += chromStride;
1555  ydst += lumStride;
1556  src += srcStride;
1557  }
1558  __asm__ volatile(EMMS" \n\t"
1559  SFENCE" \n\t"
1560  :::"memory");
1561 }
1562 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
1563 
1564 /**
1565  * Height should be a multiple of 2 and width should be a multiple of 2.
1566  * (If this is a problem for anyone then tell me, and I will fix it.)
1567  * Chrominance data is only taken from every second line,
1568  * others are ignored in the C version.
1569  * FIXME: Write HQ version.
1570  */
1571 #if HAVE_7REGS
1572 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1573  int width, int height,
1574  int lumStride, int chromStride, int srcStride,
1575  int32_t *rgb2yuv)
1576 {
1577 #define BGR2Y_IDX "16*4+16*32"
1578 #define BGR2U_IDX "16*4+16*33"
1579 #define BGR2V_IDX "16*4+16*34"
1580  int y;
1581  const x86_reg chromWidth= width>>1;
1582 
1583  if (height > 2) {
1584  ff_rgb24toyv12_c(src, ydst, udst, vdst, width, 2, lumStride, chromStride, srcStride, rgb2yuv);
1585  src += 2*srcStride;
1586  ydst += 2*lumStride;
1587  udst += chromStride;
1588  vdst += chromStride;
1589  height -= 2;
1590  }
1591 
1592  for (y=0; y<height-2; y+=2) {
1593  int i;
1594  for (i=0; i<2; i++) {
1595  __asm__ volatile(
1596  "mov %2, %%"FF_REG_a"\n\t"
1597  "movq "BGR2Y_IDX"(%3), %%mm6 \n\t"
1598  "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
1599  "pxor %%mm7, %%mm7 \n\t"
1600  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
1601  ".p2align 4 \n\t"
1602  "1: \n\t"
1603  PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
1604  "movd (%0, %%"FF_REG_d"), %%mm0 \n\t"
1605  "movd 3(%0, %%"FF_REG_d"), %%mm1 \n\t"
1606  "punpcklbw %%mm7, %%mm0 \n\t"
1607  "punpcklbw %%mm7, %%mm1 \n\t"
1608  "movd 6(%0, %%"FF_REG_d"), %%mm2 \n\t"
1609  "movd 9(%0, %%"FF_REG_d"), %%mm3 \n\t"
1610  "punpcklbw %%mm7, %%mm2 \n\t"
1611  "punpcklbw %%mm7, %%mm3 \n\t"
1612  "pmaddwd %%mm6, %%mm0 \n\t"
1613  "pmaddwd %%mm6, %%mm1 \n\t"
1614  "pmaddwd %%mm6, %%mm2 \n\t"
1615  "pmaddwd %%mm6, %%mm3 \n\t"
1616  "psrad $8, %%mm0 \n\t"
1617  "psrad $8, %%mm1 \n\t"
1618  "psrad $8, %%mm2 \n\t"
1619  "psrad $8, %%mm3 \n\t"
1620  "packssdw %%mm1, %%mm0 \n\t"
1621  "packssdw %%mm3, %%mm2 \n\t"
1622  "pmaddwd %%mm5, %%mm0 \n\t"
1623  "pmaddwd %%mm5, %%mm2 \n\t"
1624  "packssdw %%mm2, %%mm0 \n\t"
1625  "psraw $7, %%mm0 \n\t"
1626 
1627  "movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
1628  "movd 15(%0, %%"FF_REG_d"), %%mm1 \n\t"
1629  "punpcklbw %%mm7, %%mm4 \n\t"
1630  "punpcklbw %%mm7, %%mm1 \n\t"
1631  "movd 18(%0, %%"FF_REG_d"), %%mm2 \n\t"
1632  "movd 21(%0, %%"FF_REG_d"), %%mm3 \n\t"
1633  "punpcklbw %%mm7, %%mm2 \n\t"
1634  "punpcklbw %%mm7, %%mm3 \n\t"
1635  "pmaddwd %%mm6, %%mm4 \n\t"
1636  "pmaddwd %%mm6, %%mm1 \n\t"
1637  "pmaddwd %%mm6, %%mm2 \n\t"
1638  "pmaddwd %%mm6, %%mm3 \n\t"
1639  "psrad $8, %%mm4 \n\t"
1640  "psrad $8, %%mm1 \n\t"
1641  "psrad $8, %%mm2 \n\t"
1642  "psrad $8, %%mm3 \n\t"
1643  "packssdw %%mm1, %%mm4 \n\t"
1644  "packssdw %%mm3, %%mm2 \n\t"
1645  "pmaddwd %%mm5, %%mm4 \n\t"
1646  "pmaddwd %%mm5, %%mm2 \n\t"
1647  "add $24, %%"FF_REG_d"\n\t"
1648  "packssdw %%mm2, %%mm4 \n\t"
1649  "psraw $7, %%mm4 \n\t"
1650 
1651  "packuswb %%mm4, %%mm0 \n\t"
1652  "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
1653 
1654  MOVNTQ" %%mm0, (%1, %%"FF_REG_a") \n\t"
1655  "add $8, %%"FF_REG_a" \n\t"
1656  " js 1b \n\t"
1657  : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
1658  NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset)
1659  : "%"FF_REG_a, "%"FF_REG_d
1660  );
1661  ydst += lumStride;
1662  src += srcStride;
1663  }
1664  src -= srcStride*2;
1665  __asm__ volatile(
1666  "mov %4, %%"FF_REG_a"\n\t"
1667  "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
1668  "movq "BGR2U_IDX"(%5), %%mm6 \n\t"
1669  "pxor %%mm7, %%mm7 \n\t"
1670  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
1671  "add %%"FF_REG_d", %%"FF_REG_d"\n\t"
1672  ".p2align 4 \n\t"
1673  "1: \n\t"
1674  PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
1675  PREFETCH" 64(%1, %%"FF_REG_d") \n\t"
1676 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
1677  "movq (%0, %%"FF_REG_d"), %%mm0 \n\t"
1678  "movq (%1, %%"FF_REG_d"), %%mm1 \n\t"
1679  "movq 6(%0, %%"FF_REG_d"), %%mm2 \n\t"
1680  "movq 6(%1, %%"FF_REG_d"), %%mm3 \n\t"
1681  PAVGB" %%mm1, %%mm0 \n\t"
1682  PAVGB" %%mm3, %%mm2 \n\t"
1683  "movq %%mm0, %%mm1 \n\t"
1684  "movq %%mm2, %%mm3 \n\t"
1685  "psrlq $24, %%mm0 \n\t"
1686  "psrlq $24, %%mm2 \n\t"
1687  PAVGB" %%mm1, %%mm0 \n\t"
1688  PAVGB" %%mm3, %%mm2 \n\t"
1689  "punpcklbw %%mm7, %%mm0 \n\t"
1690  "punpcklbw %%mm7, %%mm2 \n\t"
1691 #else
1692  "movd (%0, %%"FF_REG_d"), %%mm0 \n\t"
1693  "movd (%1, %%"FF_REG_d"), %%mm1 \n\t"
1694  "movd 3(%0, %%"FF_REG_d"), %%mm2 \n\t"
1695  "movd 3(%1, %%"FF_REG_d"), %%mm3 \n\t"
1696  "punpcklbw %%mm7, %%mm0 \n\t"
1697  "punpcklbw %%mm7, %%mm1 \n\t"
1698  "punpcklbw %%mm7, %%mm2 \n\t"
1699  "punpcklbw %%mm7, %%mm3 \n\t"
1700  "paddw %%mm1, %%mm0 \n\t"
1701  "paddw %%mm3, %%mm2 \n\t"
1702  "paddw %%mm2, %%mm0 \n\t"
1703  "movd 6(%0, %%"FF_REG_d"), %%mm4 \n\t"
1704  "movd 6(%1, %%"FF_REG_d"), %%mm1 \n\t"
1705  "movd 9(%0, %%"FF_REG_d"), %%mm2 \n\t"
1706  "movd 9(%1, %%"FF_REG_d"), %%mm3 \n\t"
1707  "punpcklbw %%mm7, %%mm4 \n\t"
1708  "punpcklbw %%mm7, %%mm1 \n\t"
1709  "punpcklbw %%mm7, %%mm2 \n\t"
1710  "punpcklbw %%mm7, %%mm3 \n\t"
1711  "paddw %%mm1, %%mm4 \n\t"
1712  "paddw %%mm3, %%mm2 \n\t"
1713  "paddw %%mm4, %%mm2 \n\t"
1714  "psrlw $2, %%mm0 \n\t"
1715  "psrlw $2, %%mm2 \n\t"
1716 #endif
1717  "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
1718  "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
1719 
1720  "pmaddwd %%mm0, %%mm1 \n\t"
1721  "pmaddwd %%mm2, %%mm3 \n\t"
1722  "pmaddwd %%mm6, %%mm0 \n\t"
1723  "pmaddwd %%mm6, %%mm2 \n\t"
1724  "psrad $8, %%mm0 \n\t"
1725  "psrad $8, %%mm1 \n\t"
1726  "psrad $8, %%mm2 \n\t"
1727  "psrad $8, %%mm3 \n\t"
1728  "packssdw %%mm2, %%mm0 \n\t"
1729  "packssdw %%mm3, %%mm1 \n\t"
1730  "pmaddwd %%mm5, %%mm0 \n\t"
1731  "pmaddwd %%mm5, %%mm1 \n\t"
1732  "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
1733  "psraw $7, %%mm0 \n\t"
1734 
1735 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
1736  "movq 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
1737  "movq 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
1738  "movq 18(%0, %%"FF_REG_d"), %%mm2 \n\t"
1739  "movq 18(%1, %%"FF_REG_d"), %%mm3 \n\t"
1740  PAVGB" %%mm1, %%mm4 \n\t"
1741  PAVGB" %%mm3, %%mm2 \n\t"
1742  "movq %%mm4, %%mm1 \n\t"
1743  "movq %%mm2, %%mm3 \n\t"
1744  "psrlq $24, %%mm4 \n\t"
1745  "psrlq $24, %%mm2 \n\t"
1746  PAVGB" %%mm1, %%mm4 \n\t"
1747  PAVGB" %%mm3, %%mm2 \n\t"
1748  "punpcklbw %%mm7, %%mm4 \n\t"
1749  "punpcklbw %%mm7, %%mm2 \n\t"
1750 #else
1751  "movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
1752  "movd 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
1753  "movd 15(%0, %%"FF_REG_d"), %%mm2 \n\t"
1754  "movd 15(%1, %%"FF_REG_d"), %%mm3 \n\t"
1755  "punpcklbw %%mm7, %%mm4 \n\t"
1756  "punpcklbw %%mm7, %%mm1 \n\t"
1757  "punpcklbw %%mm7, %%mm2 \n\t"
1758  "punpcklbw %%mm7, %%mm3 \n\t"
1759  "paddw %%mm1, %%mm4 \n\t"
1760  "paddw %%mm3, %%mm2 \n\t"
1761  "paddw %%mm2, %%mm4 \n\t"
1762  "movd 18(%0, %%"FF_REG_d"), %%mm5 \n\t"
1763  "movd 18(%1, %%"FF_REG_d"), %%mm1 \n\t"
1764  "movd 21(%0, %%"FF_REG_d"), %%mm2 \n\t"
1765  "movd 21(%1, %%"FF_REG_d"), %%mm3 \n\t"
1766  "punpcklbw %%mm7, %%mm5 \n\t"
1767  "punpcklbw %%mm7, %%mm1 \n\t"
1768  "punpcklbw %%mm7, %%mm2 \n\t"
1769  "punpcklbw %%mm7, %%mm3 \n\t"
1770  "paddw %%mm1, %%mm5 \n\t"
1771  "paddw %%mm3, %%mm2 \n\t"
1772  "paddw %%mm5, %%mm2 \n\t"
1773  "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
1774  "psrlw $2, %%mm4 \n\t"
1775  "psrlw $2, %%mm2 \n\t"
1776 #endif
1777  "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
1778  "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
1779 
1780  "pmaddwd %%mm4, %%mm1 \n\t"
1781  "pmaddwd %%mm2, %%mm3 \n\t"
1782  "pmaddwd %%mm6, %%mm4 \n\t"
1783  "pmaddwd %%mm6, %%mm2 \n\t"
1784  "psrad $8, %%mm4 \n\t"
1785  "psrad $8, %%mm1 \n\t"
1786  "psrad $8, %%mm2 \n\t"
1787  "psrad $8, %%mm3 \n\t"
1788  "packssdw %%mm2, %%mm4 \n\t"
1789  "packssdw %%mm3, %%mm1 \n\t"
1790  "pmaddwd %%mm5, %%mm4 \n\t"
1791  "pmaddwd %%mm5, %%mm1 \n\t"
1792  "add $24, %%"FF_REG_d"\n\t"
1793  "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
1794  "psraw $7, %%mm4 \n\t"
1795 
1796  "movq %%mm0, %%mm1 \n\t"
1797  "punpckldq %%mm4, %%mm0 \n\t"
1798  "punpckhdq %%mm4, %%mm1 \n\t"
1799  "packsswb %%mm1, %%mm0 \n\t"
1800  "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
1801  "movd %%mm0, (%2, %%"FF_REG_a") \n\t"
1802  "punpckhdq %%mm0, %%mm0 \n\t"
1803  "movd %%mm0, (%3, %%"FF_REG_a") \n\t"
1804  "add $4, %%"FF_REG_a" \n\t"
1805  " js 1b \n\t"
1806  : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
1807  NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset)
1808  : "%"FF_REG_a, "%"FF_REG_d
1809  );
1810 
1811  udst += chromStride;
1812  vdst += chromStride;
1813  src += srcStride*2;
1814  }
1815 
1816  __asm__ volatile(EMMS" \n\t"
1817  SFENCE" \n\t"
1818  :::"memory");
1819 
1820  ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv);
1821 }
1822 #endif /* HAVE_7REGS */
1823 #endif /* !COMPILE_TEMPLATE_SSE2 */
1824 
1825 #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
1826 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
1827  int width, int height, int src1Stride,
1828  int src2Stride, int dstStride)
1829 {
1830  int h;
1831 
1832  for (h=0; h < height; h++) {
1833  int w;
1834 
1835  if (width >= 16) {
1836 #if COMPILE_TEMPLATE_SSE2
1837  if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
1838  __asm__(
1839  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
1840  "1: \n\t"
1841  PREFETCH" 64(%1, %%"FF_REG_a") \n\t"
1842  PREFETCH" 64(%2, %%"FF_REG_a") \n\t"
1843  "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t"
1844  "movdqa (%1, %%"FF_REG_a"), %%xmm1 \n\t"
1845  "movdqa (%2, %%"FF_REG_a"), %%xmm2 \n\t"
1846  "punpcklbw %%xmm2, %%xmm0 \n\t"
1847  "punpckhbw %%xmm2, %%xmm1 \n\t"
1848  "movntdq %%xmm0, (%0, %%"FF_REG_a", 2) \n\t"
1849  "movntdq %%xmm1, 16(%0, %%"FF_REG_a", 2) \n\t"
1850  "add $16, %%"FF_REG_a" \n\t"
1851  "cmp %3, %%"FF_REG_a" \n\t"
1852  " jb 1b \n\t"
1853  ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
1854  : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a
1855  );
1856  } else
1857 #endif
1858  __asm__(
1859  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
1860  "1: \n\t"
1861  PREFETCH" 64(%1, %%"FF_REG_a") \n\t"
1862  PREFETCH" 64(%2, %%"FF_REG_a") \n\t"
1863  "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
1864  "movq 8(%1, %%"FF_REG_a"), %%mm2 \n\t"
1865  "movq %%mm0, %%mm1 \n\t"
1866  "movq %%mm2, %%mm3 \n\t"
1867  "movq (%2, %%"FF_REG_a"), %%mm4 \n\t"
1868  "movq 8(%2, %%"FF_REG_a"), %%mm5 \n\t"
1869  "punpcklbw %%mm4, %%mm0 \n\t"
1870  "punpckhbw %%mm4, %%mm1 \n\t"
1871  "punpcklbw %%mm5, %%mm2 \n\t"
1872  "punpckhbw %%mm5, %%mm3 \n\t"
1873  MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 2) \n\t"
1874  MOVNTQ" %%mm1, 8(%0, %%"FF_REG_a", 2) \n\t"
1875  MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 2) \n\t"
1876  MOVNTQ" %%mm3, 24(%0, %%"FF_REG_a", 2) \n\t"
1877  "add $16, %%"FF_REG_a" \n\t"
1878  "cmp %3, %%"FF_REG_a" \n\t"
1879  " jb 1b \n\t"
1880  ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
1881  : "memory", "%"FF_REG_a
1882  );
1883 
1884  }
1885  for (w= (width&(~15)); w < width; w++) {
1886  dest[2*w+0] = src1[w];
1887  dest[2*w+1] = src2[w];
1888  }
1889  dest += dstStride;
1890  src1 += src1Stride;
1891  src2 += src2Stride;
1892  }
1893  __asm__(
1894  EMMS" \n\t"
1895  SFENCE" \n\t"
1896  ::: "memory"
1897  );
1898 }
1899 #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
1900 
1901 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
1902 #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
1903 void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
1904  const uint8_t *unused,
1905  const uint8_t *src1,
1906  const uint8_t *src2,
1907  int w,
1908  uint32_t *unused2);
1909 static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
1910  int width, int height, int srcStride,
1911  int dst1Stride, int dst2Stride)
1912 {
1913  int h;
1914 
1915  for (h = 0; h < height; h++) {
1916  RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL);
1917  src += srcStride;
1918  dst1 += dst1Stride;
1919  dst2 += dst2Stride;
1920  }
1921  __asm__(
1922 #if !COMPILE_TEMPLATE_SSE2
1923  EMMS" \n\t"
1924 #endif
1925  SFENCE" \n\t"
1926  ::: "memory"
1927  );
1928 }
1929 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
1930 #endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
1931 
1932 #if !COMPILE_TEMPLATE_SSE2
1933 #if !COMPILE_TEMPLATE_AMD3DNOW
1934 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
1935  uint8_t *dst1, uint8_t *dst2,
1936  int width, int height,
1937  int srcStride1, int srcStride2,
1938  int dstStride1, int dstStride2)
1939 {
1940  x86_reg x, y;
1941  int w,h;
1942  w=width/2; h=height/2;
1943  __asm__ volatile(
1944  PREFETCH" %0 \n\t"
1945  PREFETCH" %1 \n\t"
1946  ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
1947  for (y=0;y<h;y++) {
1948  const uint8_t* s1=src1+srcStride1*(y>>1);
1949  uint8_t* d=dst1+dstStride1*y;
1950  x=0;
1951  for (;x<w-31;x+=32) {
1952  __asm__ volatile(
1953  PREFETCH" 32(%1,%2) \n\t"
1954  "movq (%1,%2), %%mm0 \n\t"
1955  "movq 8(%1,%2), %%mm2 \n\t"
1956  "movq 16(%1,%2), %%mm4 \n\t"
1957  "movq 24(%1,%2), %%mm6 \n\t"
1958  "movq %%mm0, %%mm1 \n\t"
1959  "movq %%mm2, %%mm3 \n\t"
1960  "movq %%mm4, %%mm5 \n\t"
1961  "movq %%mm6, %%mm7 \n\t"
1962  "punpcklbw %%mm0, %%mm0 \n\t"
1963  "punpckhbw %%mm1, %%mm1 \n\t"
1964  "punpcklbw %%mm2, %%mm2 \n\t"
1965  "punpckhbw %%mm3, %%mm3 \n\t"
1966  "punpcklbw %%mm4, %%mm4 \n\t"
1967  "punpckhbw %%mm5, %%mm5 \n\t"
1968  "punpcklbw %%mm6, %%mm6 \n\t"
1969  "punpckhbw %%mm7, %%mm7 \n\t"
1970  MOVNTQ" %%mm0, (%0,%2,2) \n\t"
1971  MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
1972  MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
1973  MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
1974  MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
1975  MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
1976  MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
1977  MOVNTQ" %%mm7, 56(%0,%2,2)"
1978  :: "r"(d), "r"(s1), "r"(x)
1979  :"memory");
1980  }
1981  for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
1982  }
1983  for (y=0;y<h;y++) {
1984  const uint8_t* s2=src2+srcStride2*(y>>1);
1985  uint8_t* d=dst2+dstStride2*y;
1986  x=0;
1987  for (;x<w-31;x+=32) {
1988  __asm__ volatile(
1989  PREFETCH" 32(%1,%2) \n\t"
1990  "movq (%1,%2), %%mm0 \n\t"
1991  "movq 8(%1,%2), %%mm2 \n\t"
1992  "movq 16(%1,%2), %%mm4 \n\t"
1993  "movq 24(%1,%2), %%mm6 \n\t"
1994  "movq %%mm0, %%mm1 \n\t"
1995  "movq %%mm2, %%mm3 \n\t"
1996  "movq %%mm4, %%mm5 \n\t"
1997  "movq %%mm6, %%mm7 \n\t"
1998  "punpcklbw %%mm0, %%mm0 \n\t"
1999  "punpckhbw %%mm1, %%mm1 \n\t"
2000  "punpcklbw %%mm2, %%mm2 \n\t"
2001  "punpckhbw %%mm3, %%mm3 \n\t"
2002  "punpcklbw %%mm4, %%mm4 \n\t"
2003  "punpckhbw %%mm5, %%mm5 \n\t"
2004  "punpcklbw %%mm6, %%mm6 \n\t"
2005  "punpckhbw %%mm7, %%mm7 \n\t"
2006  MOVNTQ" %%mm0, (%0,%2,2) \n\t"
2007  MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
2008  MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
2009  MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
2010  MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
2011  MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
2012  MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
2013  MOVNTQ" %%mm7, 56(%0,%2,2)"
2014  :: "r"(d), "r"(s2), "r"(x)
2015  :"memory");
2016  }
2017  for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
2018  }
2019  __asm__(
2020  EMMS" \n\t"
2021  SFENCE" \n\t"
2022  ::: "memory"
2023  );
2024 }
2025 
2026 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
2027  uint8_t *dst,
2028  int width, int height,
2029  int srcStride1, int srcStride2,
2030  int srcStride3, int dstStride)
2031 {
2032  x86_reg x;
2033  int y,w,h;
2034  w=width/2; h=height;
2035  for (y=0;y<h;y++) {
2036  const uint8_t* yp=src1+srcStride1*y;
2037  const uint8_t* up=src2+srcStride2*(y>>2);
2038  const uint8_t* vp=src3+srcStride3*(y>>2);
2039  uint8_t* d=dst+dstStride*y;
2040  x=0;
2041  for (;x<w-7;x+=8) {
2042  __asm__ volatile(
2043  PREFETCH" 32(%1, %0) \n\t"
2044  PREFETCH" 32(%2, %0) \n\t"
2045  PREFETCH" 32(%3, %0) \n\t"
2046  "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2047  "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */
2048  "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */
2049  "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2050  "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */
2051  "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */
2052  "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */
2053  "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */
2054  "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */
2055  "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */
2056 
2057  "movq %%mm1, %%mm6 \n\t"
2058  "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/
2059  "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
2060  "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
2061  MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
2062  MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
2063 
2064  "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/
2065  "movq 8(%1, %0, 4), %%mm0 \n\t"
2066  "movq %%mm0, %%mm3 \n\t"
2067  "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/
2068  "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/
2069  MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
2070  MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
2071 
2072  "movq %%mm4, %%mm6 \n\t"
2073  "movq 16(%1, %0, 4), %%mm0 \n\t"
2074  "movq %%mm0, %%mm3 \n\t"
2075  "punpcklbw %%mm5, %%mm4 \n\t"
2076  "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/
2077  "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/
2078  MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
2079  MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
2080 
2081  "punpckhbw %%mm5, %%mm6 \n\t"
2082  "movq 24(%1, %0, 4), %%mm0 \n\t"
2083  "movq %%mm0, %%mm3 \n\t"
2084  "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/
2085  "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/
2086  MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
2087  MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
2088 
2089  : "+r" (x)
2090  : "r"(yp), "r" (up), "r"(vp), "r"(d)
2091  :"memory");
2092  }
2093  for (; x<w; x++) {
2094  const int x2 = x<<2;
2095  d[8*x+0] = yp[x2];
2096  d[8*x+1] = up[x];
2097  d[8*x+2] = yp[x2+1];
2098  d[8*x+3] = vp[x];
2099  d[8*x+4] = yp[x2+2];
2100  d[8*x+5] = up[x];
2101  d[8*x+6] = yp[x2+3];
2102  d[8*x+7] = vp[x];
2103  }
2104  }
2105  __asm__(
2106  EMMS" \n\t"
2107  SFENCE" \n\t"
2108  ::: "memory"
2109  );
2110 }
2111 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
2112 
2113 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
2114 {
2115  dst += count;
2116  src += 2*count;
2117  count= - count;
2118 
2119  if(count <= -16) {
2120  count += 15;
2121  __asm__ volatile(
2122  "pcmpeqw %%mm7, %%mm7 \n\t"
2123  "psrlw $8, %%mm7 \n\t"
2124  "1: \n\t"
2125  "movq -30(%1, %0, 2), %%mm0 \n\t"
2126  "movq -22(%1, %0, 2), %%mm1 \n\t"
2127  "movq -14(%1, %0, 2), %%mm2 \n\t"
2128  "movq -6(%1, %0, 2), %%mm3 \n\t"
2129  "pand %%mm7, %%mm0 \n\t"
2130  "pand %%mm7, %%mm1 \n\t"
2131  "pand %%mm7, %%mm2 \n\t"
2132  "pand %%mm7, %%mm3 \n\t"
2133  "packuswb %%mm1, %%mm0 \n\t"
2134  "packuswb %%mm3, %%mm2 \n\t"
2135  MOVNTQ" %%mm0,-15(%2, %0) \n\t"
2136  MOVNTQ" %%mm2,- 7(%2, %0) \n\t"
2137  "add $16, %0 \n\t"
2138  " js 1b \n\t"
2139  : "+r"(count)
2140  : "r"(src), "r"(dst)
2141  );
2142  count -= 15;
2143  }
2144  while(count<0) {
2145  dst[count]= src[2*count];
2146  count++;
2147  }
2148 }
2149 
2150 static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count)
2151 {
2152  src ++;
2153  dst += count;
2154  src += 2*count;
2155  count= - count;
2156 
2157  if(count < -16) {
2158  count += 16;
2159  __asm__ volatile(
2160  "pcmpeqw %%mm7, %%mm7 \n\t"
2161  "psrlw $8, %%mm7 \n\t"
2162  "1: \n\t"
2163  "movq -32(%1, %0, 2), %%mm0 \n\t"
2164  "movq -24(%1, %0, 2), %%mm1 \n\t"
2165  "movq -16(%1, %0, 2), %%mm2 \n\t"
2166  "movq -8(%1, %0, 2), %%mm3 \n\t"
2167  "pand %%mm7, %%mm0 \n\t"
2168  "pand %%mm7, %%mm1 \n\t"
2169  "pand %%mm7, %%mm2 \n\t"
2170  "pand %%mm7, %%mm3 \n\t"
2171  "packuswb %%mm1, %%mm0 \n\t"
2172  "packuswb %%mm3, %%mm2 \n\t"
2173  MOVNTQ" %%mm0,-16(%2, %0) \n\t"
2174  MOVNTQ" %%mm2,- 8(%2, %0) \n\t"
2175  "add $16, %0 \n\t"
2176  " js 1b \n\t"
2177  : "+r"(count)
2178  : "r"(src), "r"(dst)
2179  );
2180  count -= 16;
2181  }
2182  while(count<0) {
2183  dst[count]= src[2*count];
2184  count++;
2185  }
2186 }
2187 
2188 #if !COMPILE_TEMPLATE_AMD3DNOW
2189 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2190 {
2191  dst0+= count;
2192  dst1+= count;
2193  src += 4*count;
2194  count= - count;
2195  if(count <= -8) {
2196  count += 7;
2197  __asm__ volatile(
2198  "pcmpeqw %%mm7, %%mm7 \n\t"
2199  "psrlw $8, %%mm7 \n\t"
2200  "1: \n\t"
2201  "movq -28(%1, %0, 4), %%mm0 \n\t"
2202  "movq -20(%1, %0, 4), %%mm1 \n\t"
2203  "movq -12(%1, %0, 4), %%mm2 \n\t"
2204  "movq -4(%1, %0, 4), %%mm3 \n\t"
2205  "pand %%mm7, %%mm0 \n\t"
2206  "pand %%mm7, %%mm1 \n\t"
2207  "pand %%mm7, %%mm2 \n\t"
2208  "pand %%mm7, %%mm3 \n\t"
2209  "packuswb %%mm1, %%mm0 \n\t"
2210  "packuswb %%mm3, %%mm2 \n\t"
2211  "movq %%mm0, %%mm1 \n\t"
2212  "movq %%mm2, %%mm3 \n\t"
2213  "psrlw $8, %%mm0 \n\t"
2214  "psrlw $8, %%mm2 \n\t"
2215  "pand %%mm7, %%mm1 \n\t"
2216  "pand %%mm7, %%mm3 \n\t"
2217  "packuswb %%mm2, %%mm0 \n\t"
2218  "packuswb %%mm3, %%mm1 \n\t"
2219  MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
2220  MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
2221  "add $8, %0 \n\t"
2222  " js 1b \n\t"
2223  : "+r"(count)
2224  : "r"(src), "r"(dst0), "r"(dst1)
2225  );
2226  count -= 7;
2227  }
2228  while(count<0) {
2229  dst0[count]= src[4*count+0];
2230  dst1[count]= src[4*count+2];
2231  count++;
2232  }
2233 }
2234 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
2235 
2236 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2237 {
2238  dst0 += count;
2239  dst1 += count;
2240  src0 += 4*count;
2241  src1 += 4*count;
2242  count= - count;
2243 #ifdef PAVGB
2244  if(count <= -8) {
2245  count += 7;
2246  __asm__ volatile(
2247  "pcmpeqw %%mm7, %%mm7 \n\t"
2248  "psrlw $8, %%mm7 \n\t"
2249  "1: \n\t"
2250  "movq -28(%1, %0, 4), %%mm0 \n\t"
2251  "movq -20(%1, %0, 4), %%mm1 \n\t"
2252  "movq -12(%1, %0, 4), %%mm2 \n\t"
2253  "movq -4(%1, %0, 4), %%mm3 \n\t"
2254  PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
2255  PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
2256  PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
2257  PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
2258  "pand %%mm7, %%mm0 \n\t"
2259  "pand %%mm7, %%mm1 \n\t"
2260  "pand %%mm7, %%mm2 \n\t"
2261  "pand %%mm7, %%mm3 \n\t"
2262  "packuswb %%mm1, %%mm0 \n\t"
2263  "packuswb %%mm3, %%mm2 \n\t"
2264  "movq %%mm0, %%mm1 \n\t"
2265  "movq %%mm2, %%mm3 \n\t"
2266  "psrlw $8, %%mm0 \n\t"
2267  "psrlw $8, %%mm2 \n\t"
2268  "pand %%mm7, %%mm1 \n\t"
2269  "pand %%mm7, %%mm3 \n\t"
2270  "packuswb %%mm2, %%mm0 \n\t"
2271  "packuswb %%mm3, %%mm1 \n\t"
2272  MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
2273  MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
2274  "add $8, %0 \n\t"
2275  " js 1b \n\t"
2276  : "+r"(count)
2277  : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
2278  );
2279  count -= 7;
2280  }
2281 #endif
2282  while(count<0) {
2283  dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
2284  dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
2285  count++;
2286  }
2287 }
2288 
2289 #if !COMPILE_TEMPLATE_AMD3DNOW
2290 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2291 {
2292  dst0+= count;
2293  dst1+= count;
2294  src += 4*count;
2295  count= - count;
2296  if(count <= -8) {
2297  count += 7;
2298  __asm__ volatile(
2299  "pcmpeqw %%mm7, %%mm7 \n\t"
2300  "psrlw $8, %%mm7 \n\t"
2301  "1: \n\t"
2302  "movq -28(%1, %0, 4), %%mm0 \n\t"
2303  "movq -20(%1, %0, 4), %%mm1 \n\t"
2304  "movq -12(%1, %0, 4), %%mm2 \n\t"
2305  "movq -4(%1, %0, 4), %%mm3 \n\t"
2306  "psrlw $8, %%mm0 \n\t"
2307  "psrlw $8, %%mm1 \n\t"
2308  "psrlw $8, %%mm2 \n\t"
2309  "psrlw $8, %%mm3 \n\t"
2310  "packuswb %%mm1, %%mm0 \n\t"
2311  "packuswb %%mm3, %%mm2 \n\t"
2312  "movq %%mm0, %%mm1 \n\t"
2313  "movq %%mm2, %%mm3 \n\t"
2314  "psrlw $8, %%mm0 \n\t"
2315  "psrlw $8, %%mm2 \n\t"
2316  "pand %%mm7, %%mm1 \n\t"
2317  "pand %%mm7, %%mm3 \n\t"
2318  "packuswb %%mm2, %%mm0 \n\t"
2319  "packuswb %%mm3, %%mm1 \n\t"
2320  MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
2321  MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
2322  "add $8, %0 \n\t"
2323  " js 1b \n\t"
2324  : "+r"(count)
2325  : "r"(src), "r"(dst0), "r"(dst1)
2326  );
2327  count -= 7;
2328  }
2329  src++;
2330  while(count<0) {
2331  dst0[count]= src[4*count+0];
2332  dst1[count]= src[4*count+2];
2333  count++;
2334  }
2335 }
2336 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
2337 
2338 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
2339 {
2340  dst0 += count;
2341  dst1 += count;
2342  src0 += 4*count;
2343  src1 += 4*count;
2344  count= - count;
2345 #ifdef PAVGB
2346  if(count <= -8) {
2347  count += 7;
2348  __asm__ volatile(
2349  "pcmpeqw %%mm7, %%mm7 \n\t"
2350  "psrlw $8, %%mm7 \n\t"
2351  "1: \n\t"
2352  "movq -28(%1, %0, 4), %%mm0 \n\t"
2353  "movq -20(%1, %0, 4), %%mm1 \n\t"
2354  "movq -12(%1, %0, 4), %%mm2 \n\t"
2355  "movq -4(%1, %0, 4), %%mm3 \n\t"
2356  PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
2357  PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
2358  PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
2359  PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
2360  "psrlw $8, %%mm0 \n\t"
2361  "psrlw $8, %%mm1 \n\t"
2362  "psrlw $8, %%mm2 \n\t"
2363  "psrlw $8, %%mm3 \n\t"
2364  "packuswb %%mm1, %%mm0 \n\t"
2365  "packuswb %%mm3, %%mm2 \n\t"
2366  "movq %%mm0, %%mm1 \n\t"
2367  "movq %%mm2, %%mm3 \n\t"
2368  "psrlw $8, %%mm0 \n\t"
2369  "psrlw $8, %%mm2 \n\t"
2370  "pand %%mm7, %%mm1 \n\t"
2371  "pand %%mm7, %%mm3 \n\t"
2372  "packuswb %%mm2, %%mm0 \n\t"
2373  "packuswb %%mm3, %%mm1 \n\t"
2374  MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
2375  MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
2376  "add $8, %0 \n\t"
2377  " js 1b \n\t"
2378  : "+r"(count)
2379  : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
2380  );
2381  count -= 7;
2382  }
2383 #endif
2384  src0++;
2385  src1++;
2386  while(count<0) {
2387  dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
2388  dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
2389  count++;
2390  }
2391 }
2392 
2393 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2394  int width, int height,
2395  int lumStride, int chromStride, int srcStride)
2396 {
2397  int y;
2398  const int chromWidth = AV_CEIL_RSHIFT(width, 1);
2399 
2400  for (y=0; y<height; y++) {
2401  RENAME(extract_even)(src, ydst, width);
2402  if(y&1) {
2403  RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
2404  udst+= chromStride;
2405  vdst+= chromStride;
2406  }
2407 
2408  src += srcStride;
2409  ydst+= lumStride;
2410  }
2411  __asm__(
2412  EMMS" \n\t"
2413  SFENCE" \n\t"
2414  ::: "memory"
2415  );
2416 }
2417 
2418 #if !COMPILE_TEMPLATE_AMD3DNOW
2419 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2420  int width, int height,
2421  int lumStride, int chromStride, int srcStride)
2422 {
2423  int y;
2424  const int chromWidth = AV_CEIL_RSHIFT(width, 1);
2425 
2426  for (y=0; y<height; y++) {
2427  RENAME(extract_even)(src, ydst, width);
2428  RENAME(extract_odd2)(src, udst, vdst, chromWidth);
2429 
2430  src += srcStride;
2431  ydst+= lumStride;
2432  udst+= chromStride;
2433  vdst+= chromStride;
2434  }
2435  __asm__(
2436  EMMS" \n\t"
2437  SFENCE" \n\t"
2438  ::: "memory"
2439  );
2440 }
2441 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
2442 
2443 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2444  int width, int height,
2445  int lumStride, int chromStride, int srcStride)
2446 {
2447  int y;
2448  const int chromWidth = AV_CEIL_RSHIFT(width, 1);
2449 
2450  for (y=0; y<height; y++) {
2451  RENAME(extract_odd)(src, ydst, width);
2452  if(y&1) {
2453  RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
2454  udst+= chromStride;
2455  vdst+= chromStride;
2456  }
2457 
2458  src += srcStride;
2459  ydst+= lumStride;
2460  }
2461  __asm__(
2462  EMMS" \n\t"
2463  SFENCE" \n\t"
2464  ::: "memory"
2465  );
2466 }
2467 
2468 #if !COMPILE_TEMPLATE_AMD3DNOW
2469 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
2470  int width, int height,
2471  int lumStride, int chromStride, int srcStride)
2472 {
2473  int y;
2474  const int chromWidth = AV_CEIL_RSHIFT(width, 1);
2475 
2476  for (y=0; y<height; y++) {
2477  RENAME(extract_odd)(src, ydst, width);
2478  RENAME(extract_even2)(src, udst, vdst, chromWidth);
2479 
2480  src += srcStride;
2481  ydst+= lumStride;
2482  udst+= chromStride;
2483  vdst+= chromStride;
2484  }
2485  __asm__(
2486  EMMS" \n\t"
2487  SFENCE" \n\t"
2488  ::: "memory"
2489  );
2490 }
2491 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
2492 #endif /* !COMPILE_TEMPLATE_SSE2 */
2493 
2495 {
2496 #if !COMPILE_TEMPLATE_SSE2
2497 #if !COMPILE_TEMPLATE_AMD3DNOW
2524 #endif /* !COMPILE_TEMPLATE_AMD3DNOW */
2525 
2526 #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
2528 #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
2529 #if HAVE_7REGS
2530  ff_rgb24toyv12 = RENAME(rgb24toyv12);
2531 #endif /* HAVE_7REGS */
2532 
2535 #endif /* !COMPILE_TEMPLATE_SSE2 */
2536 
2537 #if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
2539 #endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
2540 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
2541 #if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
2543 #endif
2544 #endif
2545 }
yv12toyuy2
static void RENAME() yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 16.
Definition: rgb2rgb_template.c:1162
STORE_BGR24_MMX
#define STORE_BGR24_MMX
Definition: rgb2rgb_template.c:112
yuvPlanartoyuy2
static void RENAME() yuvPlanartoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
Definition: rgb2rgb_template.c:1105
rgb16to32
static void RENAME() rgb16to32(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:993
PACK_RGB32
#define PACK_RGB32
Definition: rgb2rgb_template.c:938
rgb24tobgr16
static void RENAME() rgb24tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:490
rgb24tobgr32
static void RENAME() rgb24tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:68
end
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
b
#define b
Definition: input.c:41
NAMED_CONSTRAINTS_ADD
#define NAMED_CONSTRAINTS_ADD(...)
Definition: asm.h:145
rgb2yuv
static const char rgb2yuv[]
Definition: vf_scale_vulkan.c:65
SFENCE
#define SFENCE
Definition: rgb2rgb_template.c:61
yv12touyvy
static void RENAME() yv12touyvy(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 16 (If this is a problem for anyon...
Definition: rgb2rgb_template.c:1227
rgb24to16
static void RENAME() rgb24to16(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:547
rgb32to15
static void RENAME() rgb32to15(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:385
rgb16to15
static void RENAME() rgb16to15(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:232
rgb24to15
static void RENAME() rgb24to15(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:661
x
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo x
Definition: fate.txt:150
rgb2rgb_init
static av_cold void RENAME() rgb2rgb_init(void)
Definition: rgb2rgb_template.c:2494
extract_even
static void RENAME() extract_even(const uint8_t *src, uint8_t *dst, x86_reg count)
Definition: rgb2rgb_template.c:2113
rgb32to16
static void RENAME() rgb32to16(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:280
ff_rgb24toyv12
void(* ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv)
Height should be a multiple of 2 and width should be a multiple of 2.
Definition: rgb2rgb.c:81
extract_odd
static void RENAME() extract_odd(const uint8_t *src, uint8_t *dst, x86_reg count)
Definition: rgb2rgb_template.c:2150
vu9_to_vu12
static void RENAME() vu9_to_vu12(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride1, int srcStride2, int dstStride1, int dstStride2)
Definition: rgb2rgb_template.c:1934
av_cold
#define av_cold
Definition: attributes.h:90
width
#define width
s
#define s(width, name)
Definition: cbs_vp9.c:257
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
g
const char * g
Definition: vf_curves.c:115
interleaveBytes
static void RENAME() interleaveBytes(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride)
Height should be a multiple of 2 and width should be a multiple of 2.
Definition: rgb2rgb_template.c:1826
rgb15tobgr24
static void RENAME() rgb15tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:718
s1
#define s1
Definition: regdef.h:38
extract_even2
static void RENAME() extract_even2(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
Definition: rgb2rgb_template.c:2189
XMM_CLOBBERS
#define XMM_CLOBBERS(...)
Definition: asm.h:98
int32_t
int32_t
Definition: audio_convert.c:194
yuv422ptoyuy2
static void RENAME() yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Width should be a multiple of 16.
Definition: rgb2rgb_template.c:1248
NULL
#define NULL
Definition: coverity.c:32
rgb15to32
static void RENAME() rgb15to32(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:950
src
#define src
Definition: vp8dsp.c:254
yuy2toyv12
static void RENAME() yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride)
Height should be a multiple of 2 and width should be a multiple of 16.
Definition: rgb2rgb_template.c:1259
s2
#define s2
Definition: regdef.h:39
PAVGB
#define PAVGB(a, b)
Definition: postprocess_template.c:87
asm.h
yuyvtoyuv422
static void RENAME() yuyvtoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
Definition: rgb2rgb_template.c:2419
height
#define height
ff_rgb24toyv12_c
void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, int32_t *rgb2yuv)
Height should be a multiple of 2 and width should be a multiple of 2.
Definition: rgb2rgb_template.c:651
attributes.h
EMMS
#define EMMS
Definition: rgb2rgb_template.c:53
r
#define r
Definition: input.c:40
src0
#define src0
Definition: h264pred.c:138
yvu9_to_yuy2
static void RENAME() yvu9_to_yuy2(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, uint8_t *dst, int width, int height, int srcStride1, int srcStride2, int srcStride3, int dstStride)
Definition: rgb2rgb_template.c:2026
src1
#define src1
Definition: h264pred.c:139
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
PREFETCH
#define PREFETCH
Definition: rgb2rgb_template.c:46
RENAME
#define RENAME(name)
Definition: ffv1.h:196
rgb24tobgr24
static void RENAME() rgb24tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:1037
uint8_t
uint8_t
Definition: audio_convert.c:194
yuvPlanartouyvy
static void RENAME() yuvPlanartouyvy(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride, int vertLumPerChroma)
Definition: rgb2rgb_template.c:1170
rgb16tobgr24
static void RENAME() rgb16tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:824
uyvytoyuv422
static void RENAME() uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
Definition: rgb2rgb_template.c:2469
deinterleaveBytes
void(* deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride, int dst1Stride, int dst2Stride)
Definition: rgb2rgb.c:91
w
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo ug o o w
Definition: fate.txt:150
uyvytoyuv420
static void RENAME() uyvytoyuv420(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
Definition: rgb2rgb_template.c:2443
rgb15to16
static void RENAME() rgb15to16(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:189
uyvytoyv12
static void RENAME() uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride)
Height should be a multiple of 2 and width should be a multiple of 16.
Definition: rgb2rgb_template.c:1465
void
typedef void(RENAME(mix_any_func_type))
Definition: rematrix_template.c:52
rgb32tobgr16
static void RENAME() rgb32tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:330
MOVNTQ
#define MOVNTQ
Definition: rgb2rgb_template.c:60
rgb32tobgr15
static void RENAME() rgb32tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:435
rgb32tobgr24
static void RENAME() rgb32tobgr24(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:146
MANGLE
#define MANGLE(a)
Definition: asm.h:127
x86_reg
int x86_reg
Definition: asm.h:72
planar2x
void(* planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride)
Definition: rgb2rgb.c:86
h
h
Definition: vp9dsp_template.c:2038
yuyvtoyuv420
static void RENAME() yuyvtoyuv420(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
Definition: rgb2rgb_template.c:2393
extract_odd2
static void RENAME() extract_odd2(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
Definition: rgb2rgb_template.c:2290
extract_even2avg
static void RENAME() extract_even2avg(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
Definition: rgb2rgb_template.c:2236
rgb24tobgr15
static void RENAME() rgb24tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb_template.c:604
yuv422ptouyvy
static void RENAME() yuv422ptouyvy(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, int width, int height, int lumStride, int chromStride, int dstStride)
Width should be a multiple of 16.
Definition: rgb2rgb_template.c:1238
extract_odd2avg
static void RENAME() extract_odd2avg(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
Definition: rgb2rgb_template.c:2338