28 v8i16 r0, r1, r2, r3, r4, r5, r6, r7, sign;
29 v4i32 r0_r, r0_l, r1_r, r1_l, r2_r, r2_l, r3_r, r3_l,
30 r4_r, r4_l, r5_r, r5_l, r6_r, r6_l, r7_r, r7_l;
31 v4i32
A,
B,
C,
D, Ad, Bd, Cd, Dd,
E,
F,
G,
H;
32 v4i32 Ed, Gd, Add, Bdd, Fd, Hd;
34 v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
35 v4i32 c0,
c1,
c2, c3, c4, c5, c6, c7;
36 v4i32 f0, f1, f2, f3, f4, f5, f6, f7;
39 v16i8
mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
40 v4i32 cnst64277w = {64277, 64277, 64277, 64277};
41 v4i32 cnst60547w = {60547, 60547, 60547, 60547};
42 v4i32 cnst54491w = {54491, 54491, 54491, 54491};
43 v4i32 cnst46341w = {46341, 46341, 46341, 46341};
44 v4i32 cnst36410w = {36410, 36410, 36410, 36410};
45 v4i32 cnst25080w = {25080, 25080, 25080, 25080};
46 v4i32 cnst12785w = {12785, 12785, 12785, 12785};
47 v4i32 cnst8w = {8, 8, 8, 8};
48 v4i32 cnst2048w = {2048, 2048, 2048, 2048};
49 v4i32 cnst128w = {128, 128, 128, 128};
52 LD_SH8(input, 8, r0, r1, r2, r3, r4, r5, r6, r7);
53 sign = __msa_clti_s_h(r0, 0);
54 r0_r = (v4i32) __msa_ilvr_h(sign, r0);
55 r0_l = (v4i32) __msa_ilvl_h(sign, r0);
56 sign = __msa_clti_s_h(r1, 0);
57 r1_r = (v4i32) __msa_ilvr_h(sign, r1);
58 r1_l = (v4i32) __msa_ilvl_h(sign, r1);
59 sign = __msa_clti_s_h(r2, 0);
60 r2_r = (v4i32) __msa_ilvr_h(sign, r2);
61 r2_l = (v4i32) __msa_ilvl_h(sign, r2);
62 sign = __msa_clti_s_h(r3, 0);
63 r3_r = (v4i32) __msa_ilvr_h(sign, r3);
64 r3_l = (v4i32) __msa_ilvl_h(sign, r3);
65 sign = __msa_clti_s_h(r4, 0);
66 r4_r = (v4i32) __msa_ilvr_h(sign, r4);
67 r4_l = (v4i32) __msa_ilvl_h(sign, r4);
68 sign = __msa_clti_s_h(r5, 0);
69 r5_r = (v4i32) __msa_ilvr_h(sign, r5);
70 r5_l = (v4i32) __msa_ilvl_h(sign, r5);
71 sign = __msa_clti_s_h(r6, 0);
72 r6_r = (v4i32) __msa_ilvr_h(sign, r6);
73 r6_l = (v4i32) __msa_ilvl_h(sign, r6);
74 sign = __msa_clti_s_h(r7, 0);
75 r7_r = (v4i32) __msa_ilvr_h(sign, r7);
76 r7_l = (v4i32) __msa_ilvl_h(sign, r7);
79 A = ((r1_r * cnst64277w) >> 16) + ((r7_r * cnst12785w) >> 16);
80 B = ((r1_r * cnst12785w) >> 16) - ((r7_r * cnst64277w) >> 16);
81 C = ((r3_r * cnst54491w) >> 16) + ((r5_r * cnst36410w) >> 16);
82 D = ((r5_r * cnst54491w) >> 16) - ((r3_r * cnst36410w) >> 16);
83 Ad = ((A -
C) * cnst46341w) >> 16;
84 Bd = ((B -
D) * cnst46341w) >> 16;
87 E = ((r0_r + r4_r) * cnst46341w) >> 16;
88 F = ((r0_r - r4_r) * cnst46341w) >> 16;
89 G = ((r2_r * cnst60547w) >> 16) + ((r6_r * cnst25080w) >> 16);
90 H = ((r2_r * cnst25080w) >> 16) - ((r6_r * cnst60547w) >> 16);
107 A = ((r1_l * cnst64277w) >> 16) + ((r7_l * cnst12785w) >> 16);
108 B = ((r1_l * cnst12785w) >> 16) - ((r7_l * cnst64277w) >> 16);
109 C = ((r3_l * cnst54491w) >> 16) + ((r5_l * cnst36410w) >> 16);
110 D = ((r5_l * cnst54491w) >> 16) - ((r3_l * cnst36410w) >> 16);
111 Ad = ((A -
C) * cnst46341w) >> 16;
112 Bd = ((B -
D) * cnst46341w) >> 16;
115 E = ((r0_l + r4_l) * cnst46341w) >> 16;
116 F = ((r0_l - r4_l) * cnst46341w) >> 16;
117 G = ((r2_l * cnst60547w) >> 16) + ((r6_l * cnst25080w) >> 16);
118 H = ((r2_l * cnst25080w) >> 16) - ((r6_l * cnst60547w) >> 16);
136 r0_r, r1_r, r2_r, r3_r);
138 r0_l, r1_l, r2_l, r3_l);
139 A = ((r1_r * cnst64277w) >> 16) + ((r3_l * cnst12785w) >> 16);
140 B = ((r1_r * cnst12785w) >> 16) - ((r3_l * cnst64277w) >> 16);
141 C = ((r3_r * cnst54491w) >> 16) + ((r1_l * cnst36410w) >> 16);
142 D = ((r1_l * cnst54491w) >> 16) - ((r3_r * cnst36410w) >> 16);
143 Ad = ((A -
C) * cnst46341w) >> 16;
144 Bd = ((B -
D) * cnst46341w) >> 16;
147 E = ((r0_r + r0_l) * cnst46341w) >> 16;
149 F = ((r0_r - r0_l) * cnst46341w) >> 16;
155 G = ((r2_r * cnst60547w) >> 16) + ((r2_l * cnst25080w) >> 16);
156 H = ((r2_r * cnst25080w) >> 16) - ((r2_l * cnst60547w) >> 16);
172 LD_SB8(dst, stride, d0, d1, d2, d3, d4, d5, d6, d7);
173 ILVR_B4_SW(zero, d0, zero, d1, zero, d2, zero, d3,
175 ILVR_B4_SW(zero, d4, zero, d5, zero, d6, zero, d7,
177 ILVR_H4_SW(zero, f0, zero, f1, zero, f2, zero, f3,
179 ILVR_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
198 sign_l = __msa_or_v((v16u8)r1_r, (v16u8)r2_r);
199 sign_l = __msa_or_v(sign_l, (v16u8)r3_r);
200 sign_l = __msa_or_v(sign_l, (v16u8)r0_l);
201 sign_l = __msa_or_v(sign_l, (v16u8)r1_l);
202 sign_l = __msa_or_v(sign_l, (v16u8)r2_l);
203 sign_l = __msa_or_v(sign_l, (v16u8)r3_l);
204 sign_t = __msa_ceqi_w((v4i32)sign_l, 0);
205 Add = ((r0_r * cnst46341w) + (8 << 16)) >> 20;
207 Bdd = Add + cnst128w;
235 Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
236 Bd = (v4i32)__msa_and_v((v16u8)Bd, (v16u8)sign_t);
237 Cd = (v4i32)__msa_and_v((v16u8)Cd, (v16u8)sign_t);
238 Dd = (v4i32)__msa_and_v((v16u8)Dd, (v16u8)sign_t);
239 Ed = (v4i32)__msa_and_v((v16u8)Ed, (v16u8)sign_t);
240 Fd = (v4i32)__msa_and_v((v16u8)Fd, (v16u8)sign_t);
241 Gd = (v4i32)__msa_and_v((v16u8)Gd, (v16u8)sign_t);
242 Hd = (v4i32)__msa_and_v((v16u8)Hd, (v16u8)sign_t);
243 sign_t = __msa_ceqi_w(sign_t, 0);
244 A = (v4i32)__msa_and_v((v16u8)
A, (v16u8)sign_t);
245 B = (v4i32)__msa_and_v((v16u8)
B, (v16u8)sign_t);
246 C = (v4i32)__msa_and_v((v16u8)
C, (v16u8)sign_t);
247 D = (v4i32)__msa_and_v((v16u8)
D, (v16u8)sign_t);
248 E = (v4i32)__msa_and_v((v16u8)
E, (v16u8)sign_t);
249 F = (v4i32)__msa_and_v((v16u8)
F, (v16u8)sign_t);
250 G = (v4i32)__msa_and_v((v16u8)
G, (v16u8)sign_t);
251 H = (v4i32)__msa_and_v((v16u8)
H, (v16u8)sign_t);
263 r4_r, r5_r, r6_r, r7_r);
265 r4_l, r5_l, r6_l, r7_l);
266 A = ((r5_r * cnst64277w) >> 16) + ((r7_l * cnst12785w) >> 16);
267 B = ((r5_r * cnst12785w) >> 16) - ((r7_l * cnst64277w) >> 16);
268 C = ((r7_r * cnst54491w) >> 16) + ((r5_l * cnst36410w) >> 16);
269 D = ((r5_l * cnst54491w) >> 16) - ((r7_r * cnst36410w) >> 16);
270 Ad = ((A -
C) * cnst46341w) >> 16;
271 Bd = ((B -
D) * cnst46341w) >> 16;
274 E = ((r4_r + r4_l) * cnst46341w) >> 16;
276 F = ((r4_r - r4_l) * cnst46341w) >> 16;
282 G = ((r6_r * cnst60547w) >> 16) + ((r6_l * cnst25080w) >> 16);
283 H = ((r6_r * cnst25080w) >> 16) - ((r6_l * cnst60547w) >> 16);
299 ILVL_H4_SW(zero, f0, zero, f1, zero, f2, zero, f3,
301 ILVL_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
320 sign_l = __msa_or_v((v16u8)r5_r, (v16u8)r6_r);
321 sign_l = __msa_or_v(sign_l, (v16u8)r7_r);
322 sign_l = __msa_or_v(sign_l, (v16u8)r4_l);
323 sign_l = __msa_or_v(sign_l, (v16u8)r5_l);
324 sign_l = __msa_or_v(sign_l, (v16u8)r6_l);
325 sign_l = __msa_or_v(sign_l, (v16u8)r7_l);
326 sign_t = __msa_ceqi_w((v4i32)sign_l, 0);
327 Add = ((r4_r * cnst46341w) + (8 << 16)) >> 20;
329 Bdd = Add + cnst128w;
357 Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
358 Bd = (v4i32)__msa_and_v((v16u8)Bd, (v16u8)sign_t);
359 Cd = (v4i32)__msa_and_v((v16u8)Cd, (v16u8)sign_t);
360 Dd = (v4i32)__msa_and_v((v16u8)Dd, (v16u8)sign_t);
361 Ed = (v4i32)__msa_and_v((v16u8)Ed, (v16u8)sign_t);
362 Fd = (v4i32)__msa_and_v((v16u8)Fd, (v16u8)sign_t);
363 Gd = (v4i32)__msa_and_v((v16u8)Gd, (v16u8)sign_t);
364 Hd = (v4i32)__msa_and_v((v16u8)Hd, (v16u8)sign_t);
365 sign_t = __msa_ceqi_w(sign_t, 0);
366 A = (v4i32)__msa_and_v((v16u8)
A, (v16u8)sign_t);
367 B = (v4i32)__msa_and_v((v16u8)
B, (v16u8)sign_t);
368 C = (v4i32)__msa_and_v((v16u8)
C, (v16u8)sign_t);
369 D = (v4i32)__msa_and_v((v16u8)
D, (v16u8)sign_t);
370 E = (v4i32)__msa_and_v((v16u8)
E, (v16u8)sign_t);
371 F = (v4i32)__msa_and_v((v16u8)
F, (v16u8)sign_t);
372 G = (v4i32)__msa_and_v((v16u8)
G, (v16u8)sign_t);
373 H = (v4i32)__msa_and_v((v16u8)
H, (v16u8)sign_t);
382 VSHF_B2_SB(r0_r, r4_r, r1_r, r5_r, mask, mask, d0, d1);
383 VSHF_B2_SB(r2_r, r6_r, r3_r, r7_r, mask, mask, d2, d3);
384 VSHF_B2_SB(r0_l, r4_l, r1_l, r5_l, mask, mask, d4, d5);
385 VSHF_B2_SB(r2_l, r6_l, r3_l, r7_l, mask, mask, d6, d7);
389 ST_D1(d1, 0, dst + stride);
390 ST_D1(d2, 0, dst + 2 * stride);
391 ST_D1(d3, 0, dst + 3 * stride);
392 ST_D1(d4, 0, dst + 4 * stride);
393 ST_D1(d5, 0, dst + 5 * stride);
394 ST_D1(d6, 0, dst + 6 * stride);
395 ST_D1(d7, 0, dst + 7 * stride);
400 idct_msa(dest, line_size, block, 1);
401 memset(block, 0,
sizeof(*block) * 64);
406 idct_msa(dest, line_size, block, 2);
407 memset(block, 0,
sizeof(*block) * 64);
412 int i = (block[0] + 15) >> 5;
413 v4i32
dc = {
i,
i,
i, i};
414 v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
415 v4i32 c0,
c1,
c2, c3, c4, c5, c6, c7;
416 v4i32 e0, e1, e2, e3, e4, e5, e6, e7;
417 v4i32 r0, r1, r2, r3, r4, r5, r6, r7;
418 v16i8
mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
421 LD_SB8(dest, line_size, d0, d1, d2, d3, d4, d5, d6, d7);
422 ILVR_B4_SW(zero, d0, zero, d1, zero, d2, zero, d3,
424 ILVR_B4_SW(zero, d4, zero, d5, zero, d6, zero, d7,
427 ILVR_H4_SW(zero, c0, zero, c1, zero, c2, zero, c3,
429 ILVR_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
449 ILVL_H4_SW(zero, c0, zero, c1, zero, c2, zero, c3,
451 ILVL_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
469 VSHF_B2_SB(e0, r0, e1, r1, mask, mask, d0, d1);
470 VSHF_B2_SB(e2, r2, e3, r3, mask, mask, d2, d3);
471 VSHF_B2_SB(e4, r4, e5, r5, mask, mask, d4, d5);
472 VSHF_B2_SB(e6, r6, e7, r7, mask, mask, d6, d7);
476 ST_D1(d1, 0, dest + line_size);
477 ST_D1(d2, 0, dest + 2 * line_size);
478 ST_D1(d3, 0, dest + 3 * line_size);
479 ST_D1(d4, 0, dest + 4 * line_size);
480 ST_D1(d5, 0, dest + 5 * line_size);
481 ST_D1(d6, 0, dest + 6 * line_size);
482 ST_D1(d7, 0, dest + 7 * line_size);
488 int *bounding_values)
491 v4i32 e0, e1, f0, f1, g0, g1;
493 v16i8 d0, d1, d2, d3;
494 v8i16 c0,
c1,
c2, c3;
496 v8i16 cnst3h = {3, 3, 3, 3, 3, 3, 3, 3},
497 cnst4h = {4, 4, 4, 4, 4, 4, 4, 4};
498 v16i8
mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
502 LD_SB4(first_pixel + nstride * 2, stride, d0, d1, d2, d3);
503 ILVR_B4_SH(zero, d0, zero, d1, zero, d2, zero, d3,
505 r0 = (c0 - c3) + (c2 - c1) * cnst3h;
510 for (
int i = 0;
i < 8;
i++)
511 temp_32[
i] = bounding_values[temp_16[
i]];
512 LD_SW2(temp_32, 4, e0, e1);
523 VSHF_B2_SB(f0, f1, g0, g1, mask, mask, d1, d2);
526 ST_D1(d1, 0, first_pixel + nstride);
527 ST_D1(d2, 0, first_pixel);
531 int *bounding_values)
533 v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
534 v8i16 c0,
c1,
c2, c3, c4, c5, c6, c7;
536 v4i32 e0, e1, f0, f1, g0, g1;
538 v8i16 cnst3h = {3, 3, 3, 3, 3, 3, 3, 3},
539 cnst4h = {4, 4, 4, 4, 4, 4, 4, 4};
540 v16i8
mask = {0, 16, 4, 20, 8, 24, 12, 28, 0, 0, 0, 0, 0, 0, 0, 0};
544 LD_SB8(first_pixel - 2, stride, d0, d1, d2, d3, d4, d5, d6, d7);
545 ILVR_B4_SH(zero, d0, zero, d1, zero, d2, zero, d3,
547 ILVR_B4_SH(zero, d4, zero, d5, zero, d6, zero, d7,
550 c0, c1, c2, c3, c4, c5, c6, c7);
551 r0 = (c0 - c3) + (c2 - c1) * cnst3h;
557 for (
int i = 0;
i < 8;
i++)
558 temp_32[
i] = bounding_values[temp_16[
i]];
559 LD_SW2(temp_32, 4, e0, e1);
570 VSHF_B2_SB(f0, g0, f1, g1, mask, mask, d1, d2);
572 ST_H4(d1, 0, 1, 2, 3, first_pixel - 1, stride);
573 ST_H4(d2, 0, 1, 2, 3, first_pixel - 1 + 4 * stride, stride);
580 v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
581 v16i8 c0,
c1,
c2, c3;
582 v4i32
a0,
a1,
a2,
a3, b0, b1, b2, b3;
586 v16i8
mask = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
590 LD_SB8(src1, stride, d0, d1, d2, d3, d4, d5, d6, d7);
591 VSHF_B2_SB(d0, d1, d2, d3, mask, mask, c0, c1);
592 VSHF_B2_SB(d4, d5, d6, d7, mask, mask, c2, c3);
593 a0 = (v4i32) __msa_pckev_d((v2i64)
c1, (v2i64)c0);
594 a2 = (v4i32) __msa_pckod_d((v2i64)
c1, (v2i64)c0);
595 a1 = (v4i32) __msa_pckev_d((v2i64)c3, (v2i64)c2);
596 a3 = (v4i32) __msa_pckod_d((v2i64)c3, (v2i64)c2);
598 LD_SB8(src2, stride, d0, d1, d2, d3, d4, d5, d6, d7);
599 VSHF_B2_SB(d0, d1, d2, d3, mask, mask, c0, c1);
600 VSHF_B2_SB(d4, d5, d6, d7, mask, mask, c2, c3);
601 b0 = (v4i32) __msa_pckev_d((v2i64)
c1, (v2i64)c0);
602 b2 = (v4i32) __msa_pckod_d((v2i64)
c1, (v2i64)c0);
603 b1 = (v4i32) __msa_pckev_d((v2i64)c3, (v2i64)c2);
604 b3 = (v4i32) __msa_pckod_d((v2i64)c3, (v2i64)c2);
606 e0 = (v4i32) __msa_xor_v((v16u8)
a0, (v16u8)b0);
607 e0 = (v4i32) __msa_and_v((v16u8)e0, (v16u8)fmask);
608 t0 = ((v4u32)e0) >> 1;
609 e2 = (v4i32) __msa_and_v((v16u8)
a0, (v16u8)b0);
612 e1 = (v4i32) __msa_xor_v((v16u8)
a1, (v16u8)b1);
613 e1 = (v4i32) __msa_and_v((v16u8)e1, (v16u8)fmask);
614 t1 = ((v4u32)e1) >> 1;
615 e2 = (v4i32) __msa_and_v((v16u8)
a1, (v16u8)b1);
618 f0 = (v4i32) __msa_xor_v((v16u8)
a2, (v16u8)b2);
619 f0 = (v4i32) __msa_and_v((v16u8)f0, (v16u8)fmask);
620 t2 = ((v4u32)f0) >> 1;
621 f2 = (v4i32) __msa_and_v((v16u8)
a2, (v16u8)b2);
624 f1 = (v4i32) __msa_xor_v((v16u8)
a3, (v16u8)b3);
625 f1 = (v4i32) __msa_and_v((v16u8)f1, (v16u8)fmask);
626 t3 = ((v4u32)f1) >> 1;
627 f2 = (v4i32) __msa_and_v((v16u8)
a3, (v16u8)b3);
630 ST_W8(t0, t1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
631 ST_W8(t2, t3, 0, 1, 2, 3, 0, 1, 2, 3, dst + 4, stride);
635 for (i = 0; i <
h; i++) {
638 a =
AV_RN32(&src1[i * stride]);
639 b =
AV_RN32(&src2[i * stride]);
641 a =
AV_RN32(&src1[i * stride + 4]);
642 b =
AV_RN32(&src2[i * stride + 4]);
void ff_vp3_idct_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void ff_vp3_v_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values)
#define ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride)
void ff_put_no_rnd_pixels_l2_msa(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
#define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3)
#define ST_W8(in0, in1, idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7, pdst, stride)
#define ST_D1(in, idx, pdst)
#define CLIP_SW_0_255(in)
#define i(width, name, range_min, range_max)
static const uint16_t mask[17]
#define TRANSPOSE8x8_SH_SH(...)
static uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void ff_vp3_idct_put_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> dc
static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
void ff_vp3_h_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values)