FFmpeg  4.3
sbrdsp_mips.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012
3  * MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  * contributors may be used to endorse or promote products derived from
15  * this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Authors: Darko Laus (darko@mips.com)
30  * Djordje Pesut (djordje@mips.com)
31  * Mirjana Vulin (mvulin@mips.com)
32  *
33  * AAC Spectral Band Replication decoding functions optimized for MIPS
34  *
35  * This file is part of FFmpeg.
36  *
37  * FFmpeg is free software; you can redistribute it and/or
38  * modify it under the terms of the GNU Lesser General Public
39  * License as published by the Free Software Foundation; either
40  * version 2.1 of the License, or (at your option) any later version.
41  *
42  * FFmpeg is distributed in the hope that it will be useful,
43  * but WITHOUT ANY WARRANTY; without even the implied warranty of
44  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
45  * Lesser General Public License for more details.
46  *
47  * You should have received a copy of the GNU Lesser General Public
48  * License along with FFmpeg; if not, write to the Free Software
49  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
50  */
51 
52 /**
53  * @file
54  * Reference: libavcodec/sbrdsp.c
55  */
56 
57 #include "config.h"
58 #include "libavcodec/sbrdsp.h"
59 #include "libavutil/mips/asmdefs.h"
60 
61 #if HAVE_INLINE_ASM
62 #if HAVE_MIPSFPU
63 static void sbr_qmf_pre_shuffle_mips(float *z)
64 {
65  int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6;
66  float *z1 = &z[66];
67  float *z2 = &z[59];
68  float *z3 = &z[2];
69  float *z4 = z1 + 60;
70 
71  /* loop unrolled 5 times */
72  __asm__ volatile (
73  "lui %[Temp6], 0x8000 \n\t"
74  "1: \n\t"
75  "lw %[Temp1], 0(%[z2]) \n\t"
76  "lw %[Temp2], 4(%[z2]) \n\t"
77  "lw %[Temp3], 8(%[z2]) \n\t"
78  "lw %[Temp4], 12(%[z2]) \n\t"
79  "lw %[Temp5], 16(%[z2]) \n\t"
80  "xor %[Temp1], %[Temp1], %[Temp6] \n\t"
81  "xor %[Temp2], %[Temp2], %[Temp6] \n\t"
82  "xor %[Temp3], %[Temp3], %[Temp6] \n\t"
83  "xor %[Temp4], %[Temp4], %[Temp6] \n\t"
84  "xor %[Temp5], %[Temp5], %[Temp6] \n\t"
85  PTR_ADDIU "%[z2], %[z2], -20 \n\t"
86  "sw %[Temp1], 32(%[z1]) \n\t"
87  "sw %[Temp2], 24(%[z1]) \n\t"
88  "sw %[Temp3], 16(%[z1]) \n\t"
89  "sw %[Temp4], 8(%[z1]) \n\t"
90  "sw %[Temp5], 0(%[z1]) \n\t"
91  "lw %[Temp1], 0(%[z3]) \n\t"
92  "lw %[Temp2], 4(%[z3]) \n\t"
93  "lw %[Temp3], 8(%[z3]) \n\t"
94  "lw %[Temp4], 12(%[z3]) \n\t"
95  "lw %[Temp5], 16(%[z3]) \n\t"
96  "sw %[Temp1], 4(%[z1]) \n\t"
97  "sw %[Temp2], 12(%[z1]) \n\t"
98  "sw %[Temp3], 20(%[z1]) \n\t"
99  "sw %[Temp4], 28(%[z1]) \n\t"
100  "sw %[Temp5], 36(%[z1]) \n\t"
101  PTR_ADDIU "%[z3], %[z3], 20 \n\t"
102  PTR_ADDIU "%[z1], %[z1], 40 \n\t"
103  "bne %[z1], %[z4], 1b \n\t"
104  "lw %[Temp1], 132(%[z]) \n\t"
105  "lw %[Temp2], 128(%[z]) \n\t"
106  "lw %[Temp3], 0(%[z]) \n\t"
107  "lw %[Temp4], 4(%[z]) \n\t"
108  "xor %[Temp1], %[Temp1], %[Temp6] \n\t"
109  "sw %[Temp1], 504(%[z]) \n\t"
110  "sw %[Temp2], 508(%[z]) \n\t"
111  "sw %[Temp3], 256(%[z]) \n\t"
112  "sw %[Temp4], 260(%[z]) \n\t"
113 
114  : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
115  [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
116  [Temp5]"=&r"(Temp5), [Temp6]"=&r"(Temp6),
117  [z1]"+r"(z1), [z2]"+r"(z2), [z3]"+r"(z3)
118  : [z4]"r"(z4), [z]"r"(z)
119  : "memory"
120  );
121 }
122 
123 static void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z)
124 {
125  int Temp1, Temp2, Temp3, Temp4, Temp5;
126  float *W_ptr = (float *)W;
127  float *z1 = (float *)z;
128  float *z2 = (float *)&z[60];
129  float *z_end = z1 + 32;
130 
131  /* loop unrolled 4 times */
132  __asm__ volatile (
133  "lui %[Temp5], 0x8000 \n\t"
134  "1: \n\t"
135  "lw %[Temp1], 0(%[z2]) \n\t"
136  "lw %[Temp2], 4(%[z2]) \n\t"
137  "lw %[Temp3], 8(%[z2]) \n\t"
138  "lw %[Temp4], 12(%[z2]) \n\t"
139  "xor %[Temp1], %[Temp1], %[Temp5] \n\t"
140  "xor %[Temp2], %[Temp2], %[Temp5] \n\t"
141  "xor %[Temp3], %[Temp3], %[Temp5] \n\t"
142  "xor %[Temp4], %[Temp4], %[Temp5] \n\t"
143  PTR_ADDIU "%[z2], %[z2], -16 \n\t"
144  "sw %[Temp1], 24(%[W_ptr]) \n\t"
145  "sw %[Temp2], 16(%[W_ptr]) \n\t"
146  "sw %[Temp3], 8(%[W_ptr]) \n\t"
147  "sw %[Temp4], 0(%[W_ptr]) \n\t"
148  "lw %[Temp1], 0(%[z1]) \n\t"
149  "lw %[Temp2], 4(%[z1]) \n\t"
150  "lw %[Temp3], 8(%[z1]) \n\t"
151  "lw %[Temp4], 12(%[z1]) \n\t"
152  "sw %[Temp1], 4(%[W_ptr]) \n\t"
153  "sw %[Temp2], 12(%[W_ptr]) \n\t"
154  "sw %[Temp3], 20(%[W_ptr]) \n\t"
155  "sw %[Temp4], 28(%[W_ptr]) \n\t"
156  PTR_ADDIU "%[z1], %[z1], 16 \n\t"
157  PTR_ADDIU "%[W_ptr],%[W_ptr], 32 \n\t"
158  "bne %[z1], %[z_end], 1b \n\t"
159 
160  : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
161  [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
162  [Temp5]"=&r"(Temp5), [z1]"+r"(z1),
163  [z2]"+r"(z2), [W_ptr]"+r"(W_ptr)
164  : [z_end]"r"(z_end)
165  : "memory"
166  );
167 }
168 
169 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
170 static void sbr_sum64x5_mips(float *z)
171 {
172  int k;
173  float *z1;
174  float f1, f2, f3, f4, f5, f6, f7, f8;
175  for (k = 0; k < 64; k += 8) {
176 
177  z1 = &z[k];
178 
179  /* loop unrolled 8 times */
180  __asm__ volatile (
181  "lwc1 $f0, 0(%[z1]) \n\t"
182  "lwc1 $f1, 256(%[z1]) \n\t"
183  "lwc1 $f2, 4(%[z1]) \n\t"
184  "lwc1 $f3, 260(%[z1]) \n\t"
185  "lwc1 $f4, 8(%[z1]) \n\t"
186  "add.s %[f1], $f0, $f1 \n\t"
187  "lwc1 $f5, 264(%[z1]) \n\t"
188  "add.s %[f2], $f2, $f3 \n\t"
189  "lwc1 $f6, 12(%[z1]) \n\t"
190  "lwc1 $f7, 268(%[z1]) \n\t"
191  "add.s %[f3], $f4, $f5 \n\t"
192  "lwc1 $f8, 16(%[z1]) \n\t"
193  "lwc1 $f9, 272(%[z1]) \n\t"
194  "add.s %[f4], $f6, $f7 \n\t"
195  "lwc1 $f10, 20(%[z1]) \n\t"
196  "lwc1 $f11, 276(%[z1]) \n\t"
197  "add.s %[f5], $f8, $f9 \n\t"
198  "lwc1 $f12, 24(%[z1]) \n\t"
199  "lwc1 $f13, 280(%[z1]) \n\t"
200  "add.s %[f6], $f10, $f11 \n\t"
201  "lwc1 $f14, 28(%[z1]) \n\t"
202  "lwc1 $f15, 284(%[z1]) \n\t"
203  "add.s %[f7], $f12, $f13 \n\t"
204  "lwc1 $f0, 512(%[z1]) \n\t"
205  "lwc1 $f1, 516(%[z1]) \n\t"
206  "add.s %[f8], $f14, $f15 \n\t"
207  "lwc1 $f2, 520(%[z1]) \n\t"
208  "add.s %[f1], %[f1], $f0 \n\t"
209  "add.s %[f2], %[f2], $f1 \n\t"
210  "lwc1 $f3, 524(%[z1]) \n\t"
211  "add.s %[f3], %[f3], $f2 \n\t"
212  "lwc1 $f4, 528(%[z1]) \n\t"
213  "lwc1 $f5, 532(%[z1]) \n\t"
214  "add.s %[f4], %[f4], $f3 \n\t"
215  "lwc1 $f6, 536(%[z1]) \n\t"
216  "add.s %[f5], %[f5], $f4 \n\t"
217  "add.s %[f6], %[f6], $f5 \n\t"
218  "lwc1 $f7, 540(%[z1]) \n\t"
219  "add.s %[f7], %[f7], $f6 \n\t"
220  "lwc1 $f0, 768(%[z1]) \n\t"
221  "lwc1 $f1, 772(%[z1]) \n\t"
222  "add.s %[f8], %[f8], $f7 \n\t"
223  "lwc1 $f2, 776(%[z1]) \n\t"
224  "add.s %[f1], %[f1], $f0 \n\t"
225  "add.s %[f2], %[f2], $f1 \n\t"
226  "lwc1 $f3, 780(%[z1]) \n\t"
227  "add.s %[f3], %[f3], $f2 \n\t"
228  "lwc1 $f4, 784(%[z1]) \n\t"
229  "lwc1 $f5, 788(%[z1]) \n\t"
230  "add.s %[f4], %[f4], $f3 \n\t"
231  "lwc1 $f6, 792(%[z1]) \n\t"
232  "add.s %[f5], %[f5], $f4 \n\t"
233  "add.s %[f6], %[f6], $f5 \n\t"
234  "lwc1 $f7, 796(%[z1]) \n\t"
235  "add.s %[f7], %[f7], $f6 \n\t"
236  "lwc1 $f0, 1024(%[z1]) \n\t"
237  "lwc1 $f1, 1028(%[z1]) \n\t"
238  "add.s %[f8], %[f8], $f7 \n\t"
239  "lwc1 $f2, 1032(%[z1]) \n\t"
240  "add.s %[f1], %[f1], $f0 \n\t"
241  "add.s %[f2], %[f2], $f1 \n\t"
242  "lwc1 $f3, 1036(%[z1]) \n\t"
243  "add.s %[f3], %[f3], $f2 \n\t"
244  "lwc1 $f4, 1040(%[z1]) \n\t"
245  "lwc1 $f5, 1044(%[z1]) \n\t"
246  "add.s %[f4], %[f4], $f3 \n\t"
247  "lwc1 $f6, 1048(%[z1]) \n\t"
248  "add.s %[f5], %[f5], $f4 \n\t"
249  "add.s %[f6], %[f6], $f5 \n\t"
250  "lwc1 $f7, 1052(%[z1]) \n\t"
251  "add.s %[f7], %[f7], $f6 \n\t"
252  "swc1 %[f1], 0(%[z1]) \n\t"
253  "swc1 %[f2], 4(%[z1]) \n\t"
254  "add.s %[f8], %[f8], $f7 \n\t"
255  "swc1 %[f3], 8(%[z1]) \n\t"
256  "swc1 %[f4], 12(%[z1]) \n\t"
257  "swc1 %[f5], 16(%[z1]) \n\t"
258  "swc1 %[f6], 20(%[z1]) \n\t"
259  "swc1 %[f7], 24(%[z1]) \n\t"
260  "swc1 %[f8], 28(%[z1]) \n\t"
261 
262  : [f1]"=&f"(f1), [f2]"=&f"(f2), [f3]"=&f"(f3),
263  [f4]"=&f"(f4), [f5]"=&f"(f5), [f6]"=&f"(f6),
264  [f7]"=&f"(f7), [f8]"=&f"(f8)
265  : [z1]"r"(z1)
266  : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
267  "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
268  "$f12", "$f13", "$f14", "$f15",
269  "memory"
270  );
271  }
272 }
273 
274 static float sbr_sum_square_mips(float (*x)[2], int n)
275 {
276  float sum0 = 0.0f, sum1 = 0.0f;
277  float *p_x;
278  float temp0, temp1, temp2, temp3;
279  float *loop_end;
280  p_x = &x[0][0];
281  loop_end = p_x + (n >> 1)*4 - 4;
282 
283  __asm__ volatile (
284  ".set push \n\t"
285  ".set noreorder \n\t"
286  "lwc1 %[temp0], 0(%[p_x]) \n\t"
287  "lwc1 %[temp1], 4(%[p_x]) \n\t"
288  "lwc1 %[temp2], 8(%[p_x]) \n\t"
289  "lwc1 %[temp3], 12(%[p_x]) \n\t"
290  "1: \n\t"
291  PTR_ADDIU "%[p_x], %[p_x], 16 \n\t"
292  "madd.s %[sum0], %[sum0], %[temp0], %[temp0] \n\t"
293  "lwc1 %[temp0], 0(%[p_x]) \n\t"
294  "madd.s %[sum1], %[sum1], %[temp1], %[temp1] \n\t"
295  "lwc1 %[temp1], 4(%[p_x]) \n\t"
296  "madd.s %[sum0], %[sum0], %[temp2], %[temp2] \n\t"
297  "lwc1 %[temp2], 8(%[p_x]) \n\t"
298  "madd.s %[sum1], %[sum1], %[temp3], %[temp3] \n\t"
299  "bne %[p_x], %[loop_end], 1b \n\t"
300  " lwc1 %[temp3], 12(%[p_x]) \n\t"
301  "madd.s %[sum0], %[sum0], %[temp0], %[temp0] \n\t"
302  "madd.s %[sum1], %[sum1], %[temp1], %[temp1] \n\t"
303  "madd.s %[sum0], %[sum0], %[temp2], %[temp2] \n\t"
304  "madd.s %[sum1], %[sum1], %[temp3], %[temp3] \n\t"
305  ".set pop \n\t"
306 
307  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
308  [temp3]"=&f"(temp3), [sum0]"+f"(sum0), [sum1]"+f"(sum1),
309  [p_x]"+r"(p_x)
310  : [loop_end]"r"(loop_end)
311  : "memory"
312  );
313  return sum0 + sum1;
314 }
315 
316 static void sbr_qmf_deint_bfly_mips(float *v, const float *src0, const float *src1)
317 {
318  int i;
319  float temp0, temp1, temp2, temp3, temp4, temp5;
320  float temp6, temp7, temp8, temp9, temp10, temp11;
321  float *v0 = v;
322  float *v1 = &v[127];
323  float *psrc0 = (float*)src0;
324  float *psrc1 = (float*)&src1[63];
325 
326  for (i = 0; i < 4; i++) {
327 
328  /* loop unrolled 16 times */
329  __asm__ volatile(
330  "lwc1 %[temp0], 0(%[src0]) \n\t"
331  "lwc1 %[temp1], 0(%[src1]) \n\t"
332  "lwc1 %[temp3], 4(%[src0]) \n\t"
333  "lwc1 %[temp4], -4(%[src1]) \n\t"
334  "lwc1 %[temp6], 8(%[src0]) \n\t"
335  "lwc1 %[temp7], -8(%[src1]) \n\t"
336  "lwc1 %[temp9], 12(%[src0]) \n\t"
337  "lwc1 %[temp10], -12(%[src1]) \n\t"
338  "add.s %[temp2], %[temp0], %[temp1] \n\t"
339  "add.s %[temp5], %[temp3], %[temp4] \n\t"
340  "add.s %[temp8], %[temp6], %[temp7] \n\t"
341  "add.s %[temp11], %[temp9], %[temp10] \n\t"
342  "sub.s %[temp0], %[temp0], %[temp1] \n\t"
343  "sub.s %[temp3], %[temp3], %[temp4] \n\t"
344  "sub.s %[temp6], %[temp6], %[temp7] \n\t"
345  "sub.s %[temp9], %[temp9], %[temp10] \n\t"
346  "swc1 %[temp2], 0(%[v1]) \n\t"
347  "swc1 %[temp0], 0(%[v0]) \n\t"
348  "swc1 %[temp5], -4(%[v1]) \n\t"
349  "swc1 %[temp3], 4(%[v0]) \n\t"
350  "swc1 %[temp8], -8(%[v1]) \n\t"
351  "swc1 %[temp6], 8(%[v0]) \n\t"
352  "swc1 %[temp11], -12(%[v1]) \n\t"
353  "swc1 %[temp9], 12(%[v0]) \n\t"
354  "lwc1 %[temp0], 16(%[src0]) \n\t"
355  "lwc1 %[temp1], -16(%[src1]) \n\t"
356  "lwc1 %[temp3], 20(%[src0]) \n\t"
357  "lwc1 %[temp4], -20(%[src1]) \n\t"
358  "lwc1 %[temp6], 24(%[src0]) \n\t"
359  "lwc1 %[temp7], -24(%[src1]) \n\t"
360  "lwc1 %[temp9], 28(%[src0]) \n\t"
361  "lwc1 %[temp10], -28(%[src1]) \n\t"
362  "add.s %[temp2], %[temp0], %[temp1] \n\t"
363  "add.s %[temp5], %[temp3], %[temp4] \n\t"
364  "add.s %[temp8], %[temp6], %[temp7] \n\t"
365  "add.s %[temp11], %[temp9], %[temp10] \n\t"
366  "sub.s %[temp0], %[temp0], %[temp1] \n\t"
367  "sub.s %[temp3], %[temp3], %[temp4] \n\t"
368  "sub.s %[temp6], %[temp6], %[temp7] \n\t"
369  "sub.s %[temp9], %[temp9], %[temp10] \n\t"
370  "swc1 %[temp2], -16(%[v1]) \n\t"
371  "swc1 %[temp0], 16(%[v0]) \n\t"
372  "swc1 %[temp5], -20(%[v1]) \n\t"
373  "swc1 %[temp3], 20(%[v0]) \n\t"
374  "swc1 %[temp8], -24(%[v1]) \n\t"
375  "swc1 %[temp6], 24(%[v0]) \n\t"
376  "swc1 %[temp11], -28(%[v1]) \n\t"
377  "swc1 %[temp9], 28(%[v0]) \n\t"
378  "lwc1 %[temp0], 32(%[src0]) \n\t"
379  "lwc1 %[temp1], -32(%[src1]) \n\t"
380  "lwc1 %[temp3], 36(%[src0]) \n\t"
381  "lwc1 %[temp4], -36(%[src1]) \n\t"
382  "lwc1 %[temp6], 40(%[src0]) \n\t"
383  "lwc1 %[temp7], -40(%[src1]) \n\t"
384  "lwc1 %[temp9], 44(%[src0]) \n\t"
385  "lwc1 %[temp10], -44(%[src1]) \n\t"
386  "add.s %[temp2], %[temp0], %[temp1] \n\t"
387  "add.s %[temp5], %[temp3], %[temp4] \n\t"
388  "add.s %[temp8], %[temp6], %[temp7] \n\t"
389  "add.s %[temp11], %[temp9], %[temp10] \n\t"
390  "sub.s %[temp0], %[temp0], %[temp1] \n\t"
391  "sub.s %[temp3], %[temp3], %[temp4] \n\t"
392  "sub.s %[temp6], %[temp6], %[temp7] \n\t"
393  "sub.s %[temp9], %[temp9], %[temp10] \n\t"
394  "swc1 %[temp2], -32(%[v1]) \n\t"
395  "swc1 %[temp0], 32(%[v0]) \n\t"
396  "swc1 %[temp5], -36(%[v1]) \n\t"
397  "swc1 %[temp3], 36(%[v0]) \n\t"
398  "swc1 %[temp8], -40(%[v1]) \n\t"
399  "swc1 %[temp6], 40(%[v0]) \n\t"
400  "swc1 %[temp11], -44(%[v1]) \n\t"
401  "swc1 %[temp9], 44(%[v0]) \n\t"
402  "lwc1 %[temp0], 48(%[src0]) \n\t"
403  "lwc1 %[temp1], -48(%[src1]) \n\t"
404  "lwc1 %[temp3], 52(%[src0]) \n\t"
405  "lwc1 %[temp4], -52(%[src1]) \n\t"
406  "lwc1 %[temp6], 56(%[src0]) \n\t"
407  "lwc1 %[temp7], -56(%[src1]) \n\t"
408  "lwc1 %[temp9], 60(%[src0]) \n\t"
409  "lwc1 %[temp10], -60(%[src1]) \n\t"
410  "add.s %[temp2], %[temp0], %[temp1] \n\t"
411  "add.s %[temp5], %[temp3], %[temp4] \n\t"
412  "add.s %[temp8], %[temp6], %[temp7] \n\t"
413  "add.s %[temp11], %[temp9], %[temp10] \n\t"
414  "sub.s %[temp0], %[temp0], %[temp1] \n\t"
415  "sub.s %[temp3], %[temp3], %[temp4] \n\t"
416  "sub.s %[temp6], %[temp6], %[temp7] \n\t"
417  "sub.s %[temp9], %[temp9], %[temp10] \n\t"
418  "swc1 %[temp2], -48(%[v1]) \n\t"
419  "swc1 %[temp0], 48(%[v0]) \n\t"
420  "swc1 %[temp5], -52(%[v1]) \n\t"
421  "swc1 %[temp3], 52(%[v0]) \n\t"
422  "swc1 %[temp8], -56(%[v1]) \n\t"
423  "swc1 %[temp6], 56(%[v0]) \n\t"
424  "swc1 %[temp11], -60(%[v1]) \n\t"
425  "swc1 %[temp9], 60(%[v0]) \n\t"
426  PTR_ADDIU " %[src0], %[src0], 64 \n\t"
427  PTR_ADDIU " %[src1], %[src1], -64 \n\t"
428  PTR_ADDIU " %[v0], %[v0], 64 \n\t"
429  PTR_ADDIU " %[v1], %[v1], -64 \n\t"
430 
431  : [v0]"+r"(v0), [v1]"+r"(v1), [src0]"+r"(psrc0), [src1]"+r"(psrc1),
432  [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
433  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
434  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
435  [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11)
436  :
437  :"memory"
438  );
439  }
440 }
441 
442 static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
443 {
444  int i;
445  float real_sum_0 = 0.0f;
446  float real_sum_1 = 0.0f;
447  float real_sum_2 = 0.0f;
448  float imag_sum_1 = 0.0f;
449  float imag_sum_2 = 0.0f;
450  float *p_x, *p_phi;
451  float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
452  float temp7, temp_r, temp_r1, temp_r2, temp_r3, temp_r4;
453  p_x = (float*)&x[0][0];
454  p_phi = &phi[0][0][0];
455 
456  __asm__ volatile (
457  "lwc1 %[temp0], 8(%[p_x]) \n\t"
458  "lwc1 %[temp1], 12(%[p_x]) \n\t"
459  "lwc1 %[temp2], 16(%[p_x]) \n\t"
460  "lwc1 %[temp3], 20(%[p_x]) \n\t"
461  "lwc1 %[temp4], 24(%[p_x]) \n\t"
462  "lwc1 %[temp5], 28(%[p_x]) \n\t"
463  "mul.s %[temp_r], %[temp1], %[temp1] \n\t"
464  "mul.s %[temp_r1], %[temp1], %[temp3] \n\t"
465  "mul.s %[temp_r2], %[temp1], %[temp2] \n\t"
466  "mul.s %[temp_r3], %[temp1], %[temp5] \n\t"
467  "mul.s %[temp_r4], %[temp1], %[temp4] \n\t"
468  "madd.s %[temp_r], %[temp_r], %[temp0], %[temp0] \n\t"
469  "madd.s %[temp_r1], %[temp_r1], %[temp0], %[temp2] \n\t"
470  "msub.s %[temp_r2], %[temp_r2], %[temp0], %[temp3] \n\t"
471  "madd.s %[temp_r3], %[temp_r3], %[temp0], %[temp4] \n\t"
472  "msub.s %[temp_r4], %[temp_r4], %[temp0], %[temp5] \n\t"
473  "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t"
474  "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t"
475  "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t"
476  "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t"
477  "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t"
478  PTR_ADDIU "%[p_x], %[p_x], 8 \n\t"
479 
480  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
481  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
482  [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
483  [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
484  [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1), [temp_r2]"=&f"(temp_r2),
485  [temp_r3]"=&f"(temp_r3), [temp_r4]"=&f"(temp_r4),
486  [p_x]"+r"(p_x), [imag_sum_2]"+f"(imag_sum_2)
487  :
488  : "memory"
489  );
490 
491  for (i = 0; i < 12; i++) {
492  __asm__ volatile (
493  "lwc1 %[temp0], 8(%[p_x]) \n\t"
494  "lwc1 %[temp1], 12(%[p_x]) \n\t"
495  "lwc1 %[temp2], 16(%[p_x]) \n\t"
496  "lwc1 %[temp3], 20(%[p_x]) \n\t"
497  "lwc1 %[temp4], 24(%[p_x]) \n\t"
498  "lwc1 %[temp5], 28(%[p_x]) \n\t"
499  "mul.s %[temp_r], %[temp1], %[temp1] \n\t"
500  "mul.s %[temp_r1], %[temp1], %[temp3] \n\t"
501  "mul.s %[temp_r2], %[temp1], %[temp2] \n\t"
502  "mul.s %[temp_r3], %[temp1], %[temp5] \n\t"
503  "mul.s %[temp_r4], %[temp1], %[temp4] \n\t"
504  "madd.s %[temp_r], %[temp_r], %[temp0], %[temp0] \n\t"
505  "madd.s %[temp_r1], %[temp_r1], %[temp0], %[temp2] \n\t"
506  "msub.s %[temp_r2], %[temp_r2], %[temp0], %[temp3] \n\t"
507  "madd.s %[temp_r3], %[temp_r3], %[temp0], %[temp4] \n\t"
508  "msub.s %[temp_r4], %[temp_r4], %[temp0], %[temp5] \n\t"
509  "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t"
510  "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t"
511  "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t"
512  "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t"
513  "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t"
514  "lwc1 %[temp0], 32(%[p_x]) \n\t"
515  "lwc1 %[temp1], 36(%[p_x]) \n\t"
516  "mul.s %[temp_r], %[temp3], %[temp3] \n\t"
517  "mul.s %[temp_r1], %[temp3], %[temp5] \n\t"
518  "mul.s %[temp_r2], %[temp3], %[temp4] \n\t"
519  "mul.s %[temp_r3], %[temp3], %[temp1] \n\t"
520  "mul.s %[temp_r4], %[temp3], %[temp0] \n\t"
521  "madd.s %[temp_r], %[temp_r], %[temp2], %[temp2] \n\t"
522  "madd.s %[temp_r1], %[temp_r1], %[temp2], %[temp4] \n\t"
523  "msub.s %[temp_r2], %[temp_r2], %[temp2], %[temp5] \n\t"
524  "madd.s %[temp_r3], %[temp_r3], %[temp2], %[temp0] \n\t"
525  "msub.s %[temp_r4], %[temp_r4], %[temp2], %[temp1] \n\t"
526  "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t"
527  "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t"
528  "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t"
529  "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t"
530  "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t"
531  "lwc1 %[temp2], 40(%[p_x]) \n\t"
532  "lwc1 %[temp3], 44(%[p_x]) \n\t"
533  "mul.s %[temp_r], %[temp5], %[temp5] \n\t"
534  "mul.s %[temp_r1], %[temp5], %[temp1] \n\t"
535  "mul.s %[temp_r2], %[temp5], %[temp0] \n\t"
536  "mul.s %[temp_r3], %[temp5], %[temp3] \n\t"
537  "mul.s %[temp_r4], %[temp5], %[temp2] \n\t"
538  "madd.s %[temp_r], %[temp_r], %[temp4], %[temp4] \n\t"
539  "madd.s %[temp_r1], %[temp_r1], %[temp4], %[temp0] \n\t"
540  "msub.s %[temp_r2], %[temp_r2], %[temp4], %[temp1] \n\t"
541  "madd.s %[temp_r3], %[temp_r3], %[temp4], %[temp2] \n\t"
542  "msub.s %[temp_r4], %[temp_r4], %[temp4], %[temp3] \n\t"
543  "add.s %[real_sum_0], %[real_sum_0], %[temp_r] \n\t"
544  "add.s %[real_sum_1], %[real_sum_1], %[temp_r1] \n\t"
545  "add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t"
546  "add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t"
547  "add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t"
548  PTR_ADDIU "%[p_x], %[p_x], 24 \n\t"
549 
550  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
551  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
552  [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
553  [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
554  [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1),
555  [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
556  [temp_r4]"=&f"(temp_r4), [p_x]"+r"(p_x),
557  [imag_sum_2]"+f"(imag_sum_2)
558  :
559  : "memory"
560  );
561  }
562  __asm__ volatile (
563  "lwc1 %[temp0], -296(%[p_x]) \n\t"
564  "lwc1 %[temp1], -292(%[p_x]) \n\t"
565  "lwc1 %[temp2], 8(%[p_x]) \n\t"
566  "lwc1 %[temp3], 12(%[p_x]) \n\t"
567  "lwc1 %[temp4], -288(%[p_x]) \n\t"
568  "lwc1 %[temp5], -284(%[p_x]) \n\t"
569  "lwc1 %[temp6], -280(%[p_x]) \n\t"
570  "lwc1 %[temp7], -276(%[p_x]) \n\t"
571  "madd.s %[temp_r], %[real_sum_0], %[temp0], %[temp0] \n\t"
572  "madd.s %[temp_r1], %[real_sum_0], %[temp2], %[temp2] \n\t"
573  "madd.s %[temp_r2], %[real_sum_1], %[temp0], %[temp4] \n\t"
574  "madd.s %[temp_r3], %[imag_sum_1], %[temp0], %[temp5] \n\t"
575  "madd.s %[temp_r], %[temp_r], %[temp1], %[temp1] \n\t"
576  "madd.s %[temp_r1], %[temp_r1], %[temp3], %[temp3] \n\t"
577  "madd.s %[temp_r2], %[temp_r2], %[temp1], %[temp5] \n\t"
578  "nmsub.s %[temp_r3], %[temp_r3], %[temp1], %[temp4] \n\t"
579  "lwc1 %[temp4], 16(%[p_x]) \n\t"
580  "lwc1 %[temp5], 20(%[p_x]) \n\t"
581  "swc1 %[temp_r], 40(%[p_phi]) \n\t"
582  "swc1 %[temp_r1], 16(%[p_phi]) \n\t"
583  "swc1 %[temp_r2], 24(%[p_phi]) \n\t"
584  "swc1 %[temp_r3], 28(%[p_phi]) \n\t"
585  "madd.s %[temp_r], %[real_sum_1], %[temp2], %[temp4] \n\t"
586  "madd.s %[temp_r1], %[imag_sum_1], %[temp2], %[temp5] \n\t"
587  "madd.s %[temp_r2], %[real_sum_2], %[temp0], %[temp6] \n\t"
588  "madd.s %[temp_r3], %[imag_sum_2], %[temp0], %[temp7] \n\t"
589  "madd.s %[temp_r], %[temp_r], %[temp3], %[temp5] \n\t"
590  "nmsub.s %[temp_r1], %[temp_r1], %[temp3], %[temp4] \n\t"
591  "madd.s %[temp_r2], %[temp_r2], %[temp1], %[temp7] \n\t"
592  "nmsub.s %[temp_r3], %[temp_r3], %[temp1], %[temp6] \n\t"
593  "swc1 %[temp_r], 0(%[p_phi]) \n\t"
594  "swc1 %[temp_r1], 4(%[p_phi]) \n\t"
595  "swc1 %[temp_r2], 8(%[p_phi]) \n\t"
596  "swc1 %[temp_r3], 12(%[p_phi]) \n\t"
597 
598  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
599  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
600  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp_r]"=&f"(temp_r),
601  [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
602  [real_sum_2]"+f"(real_sum_2), [imag_sum_1]"+f"(imag_sum_1),
603  [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
604  [temp_r1]"=&f"(temp_r1), [p_phi]"+r"(p_phi),
605  [imag_sum_2]"+f"(imag_sum_2)
606  : [p_x]"r"(p_x)
607  : "memory"
608  );
609 }
610 
611 static void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2],
612  const float alpha0[2], const float alpha1[2],
613  float bw, int start, int end)
614 {
615  float alpha[4];
616  int i;
617  float *p_x_low = (float*)&X_low[0][0] + 2*start;
618  float *p_x_high = &X_high[0][0] + 2*start;
619  float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
620  float temp7, temp8, temp9, temp10, temp11, temp12;
621 
622  alpha[0] = alpha1[0] * bw * bw;
623  alpha[1] = alpha1[1] * bw * bw;
624  alpha[2] = alpha0[0] * bw;
625  alpha[3] = alpha0[1] * bw;
626 
627  for (i = start; i < end; i++) {
628  __asm__ volatile (
629  "lwc1 %[temp0], -16(%[p_x_low]) \n\t"
630  "lwc1 %[temp1], -12(%[p_x_low]) \n\t"
631  "lwc1 %[temp2], -8(%[p_x_low]) \n\t"
632  "lwc1 %[temp3], -4(%[p_x_low]) \n\t"
633  "lwc1 %[temp5], 0(%[p_x_low]) \n\t"
634  "lwc1 %[temp6], 4(%[p_x_low]) \n\t"
635  "lwc1 %[temp7], 0(%[alpha]) \n\t"
636  "lwc1 %[temp8], 4(%[alpha]) \n\t"
637  "lwc1 %[temp9], 8(%[alpha]) \n\t"
638  "lwc1 %[temp10], 12(%[alpha]) \n\t"
639  PTR_ADDIU "%[p_x_high], %[p_x_high], 8 \n\t"
640  PTR_ADDIU "%[p_x_low], %[p_x_low], 8 \n\t"
641  "mul.s %[temp11], %[temp1], %[temp8] \n\t"
642  "msub.s %[temp11], %[temp11], %[temp0], %[temp7] \n\t"
643  "madd.s %[temp11], %[temp11], %[temp2], %[temp9] \n\t"
644  "nmsub.s %[temp11], %[temp11], %[temp3], %[temp10] \n\t"
645  "add.s %[temp11], %[temp11], %[temp5] \n\t"
646  "swc1 %[temp11], -8(%[p_x_high]) \n\t"
647  "mul.s %[temp12], %[temp1], %[temp7] \n\t"
648  "madd.s %[temp12], %[temp12], %[temp0], %[temp8] \n\t"
649  "madd.s %[temp12], %[temp12], %[temp3], %[temp9] \n\t"
650  "madd.s %[temp12], %[temp12], %[temp2], %[temp10] \n\t"
651  "add.s %[temp12], %[temp12], %[temp6] \n\t"
652  "swc1 %[temp12], -4(%[p_x_high]) \n\t"
653 
654  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
655  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
656  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
657  [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
658  [temp12]"=&f"(temp12), [p_x_high]"+r"(p_x_high),
659  [p_x_low]"+r"(p_x_low)
660  : [alpha]"r"(alpha)
661  : "memory"
662  );
663  }
664 }
665 
666 static void sbr_hf_g_filt_mips(float (*Y)[2], const float (*X_high)[40][2],
667  const float *g_filt, int m_max, intptr_t ixh)
668 {
669  const float *p_x, *p_g, *loop_end;
670  float *p_y;
671  float temp0, temp1, temp2;
672 
673  p_g = &g_filt[0];
674  p_y = &Y[0][0];
675  p_x = &X_high[0][ixh][0];
676  loop_end = p_g + m_max;
677 
678  __asm__ volatile(
679  ".set push \n\t"
680  ".set noreorder \n\t"
681  "1: \n\t"
682  "lwc1 %[temp0], 0(%[p_g]) \n\t"
683  "lwc1 %[temp1], 0(%[p_x]) \n\t"
684  "lwc1 %[temp2], 4(%[p_x]) \n\t"
685  "mul.s %[temp1], %[temp1], %[temp0] \n\t"
686  "mul.s %[temp2], %[temp2], %[temp0] \n\t"
687  PTR_ADDIU "%[p_g], %[p_g], 4 \n\t"
688  PTR_ADDIU "%[p_x], %[p_x], 320 \n\t"
689  "swc1 %[temp1], 0(%[p_y]) \n\t"
690  "swc1 %[temp2], 4(%[p_y]) \n\t"
691  "bne %[p_g], %[loop_end], 1b \n\t"
692  PTR_ADDIU "%[p_y], %[p_y], 8 \n\t"
693  ".set pop \n\t"
694 
695  : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
696  [temp2]"=&f"(temp2), [p_x]"+r"(p_x),
697  [p_y]"+r"(p_y), [p_g]"+r"(p_g)
698  : [loop_end]"r"(loop_end)
699  : "memory"
700  );
701 }
702 
703 static void sbr_hf_apply_noise_0_mips(float (*Y)[2], const float *s_m,
704  const float *q_filt, int noise,
705  int kx, int m_max)
706 {
707  int m;
708 
709  for (m = 0; m < m_max; m++){
710 
711  float *Y1=&Y[m][0];
712  float *ff_table;
713  float y0,y1, temp1, temp2, temp4, temp5;
714  int temp0, temp3;
715  const float *s_m1=&s_m[m];
716  const float *q_filt1= &q_filt[m];
717 
718  __asm__ volatile(
719  "lwc1 %[y0], 0(%[Y1]) \n\t"
720  "lwc1 %[temp1], 0(%[s_m1]) \n\t"
721  "addiu %[noise], %[noise], 1 \n\t"
722  "andi %[noise], %[noise], 0x1ff \n\t"
723  "sll %[temp0], %[noise], 3 \n\t"
724  PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0] \n\t"
725  "add.s %[y0], %[y0], %[temp1] \n\t"
726  "mfc1 %[temp3], %[temp1] \n\t"
727  "bne %[temp3], $0, 1f \n\t"
728  "lwc1 %[y1], 4(%[Y1]) \n\t"
729  "lwc1 %[temp2], 0(%[q_filt1]) \n\t"
730  "lwc1 %[temp4], 0(%[ff_table]) \n\t"
731  "lwc1 %[temp5], 4(%[ff_table]) \n\t"
732  "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t"
733  "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t"
734  "swc1 %[y1], 4(%[Y1]) \n\t"
735  "1: \n\t"
736  "swc1 %[y0], 0(%[Y1]) \n\t"
737 
738  : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
739  [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
740  [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
742  [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
743  : "memory"
744  );
745  }
746 }
747 
748 static void sbr_hf_apply_noise_1_mips(float (*Y)[2], const float *s_m,
749  const float *q_filt, int noise,
750  int kx, int m_max)
751 {
752  float y0,y1,temp1, temp2, temp4, temp5;
753  int temp0, temp3, m;
754  float phi_sign = 1 - 2 * (kx & 1);
755 
756  for (m = 0; m < m_max; m++) {
757 
758  float *ff_table;
759  float *Y1=&Y[m][0];
760  const float *s_m1=&s_m[m];
761  const float *q_filt1= &q_filt[m];
762 
763  __asm__ volatile(
764  "lwc1 %[y1], 4(%[Y1]) \n\t"
765  "lwc1 %[temp1], 0(%[s_m1]) \n\t"
766  "lw %[temp3], 0(%[s_m1]) \n\t"
767  "addiu %[noise], %[noise], 1 \n\t"
768  "andi %[noise], %[noise], 0x1ff \n\t"
769  "sll %[temp0], %[noise], 3 \n\t"
770  PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0] \n\t"
771  "madd.s %[y1], %[y1], %[temp1], %[phi_sign] \n\t"
772  "bne %[temp3], $0, 1f \n\t"
773  "lwc1 %[y0], 0(%[Y1]) \n\t"
774  "lwc1 %[temp2], 0(%[q_filt1]) \n\t"
775  "lwc1 %[temp4], 0(%[ff_table]) \n\t"
776  "lwc1 %[temp5], 4(%[ff_table]) \n\t"
777  "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t"
778  "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t"
779  "swc1 %[y0], 0(%[Y1]) \n\t"
780  "1: \n\t"
781  "swc1 %[y1], 4(%[Y1]) \n\t"
782 
783  : [ff_table] "=&r" (ff_table), [y0] "=&f" (y0), [y1] "=&f" (y1),
784  [temp0] "=&r" (temp0), [temp1] "=&f" (temp1), [temp2] "=&f" (temp2),
785  [temp3] "=&r" (temp3), [temp4] "=&f" (temp4), [temp5] "=&f" (temp5)
787  [Y1] "r" (Y1), [s_m1] "r" (s_m1), [q_filt1] "r" (q_filt1),
788  [phi_sign] "f" (phi_sign)
789  : "memory"
790  );
791  phi_sign = -phi_sign;
792  }
793 }
794 
795 static void sbr_hf_apply_noise_2_mips(float (*Y)[2], const float *s_m,
796  const float *q_filt, int noise,
797  int kx, int m_max)
798 {
799  int m;
800  float *ff_table;
801  float y0,y1, temp0, temp1, temp2, temp3, temp4, temp5;
802 
803  for (m = 0; m < m_max; m++) {
804 
805  float *Y1=&Y[m][0];
806  const float *s_m1=&s_m[m];
807  const float *q_filt1= &q_filt[m];
808 
809  __asm__ volatile(
810  "lwc1 %[y0], 0(%[Y1]) \n\t"
811  "lwc1 %[temp1], 0(%[s_m1]) \n\t"
812  "addiu %[noise], %[noise], 1 \n\t"
813  "andi %[noise], %[noise], 0x1ff \n\t"
814  "sll %[temp0], %[noise], 3 \n\t"
815  PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0] \n\t"
816  "sub.s %[y0], %[y0], %[temp1] \n\t"
817  "mfc1 %[temp3], %[temp1] \n\t"
818  "bne %[temp3], $0, 1f \n\t"
819  "lwc1 %[y1], 4(%[Y1]) \n\t"
820  "lwc1 %[temp2], 0(%[q_filt1]) \n\t"
821  "lwc1 %[temp4], 0(%[ff_table]) \n\t"
822  "lwc1 %[temp5], 4(%[ff_table]) \n\t"
823  "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t"
824  "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t"
825  "swc1 %[y1], 4(%[Y1]) \n\t"
826  "1: \n\t"
827  "swc1 %[y0], 0(%[Y1]) \n\t"
828 
829  : [temp0]"=&r"(temp0), [ff_table]"=&r"(ff_table), [y0]"=&f"(y0),
830  [y1]"=&f"(y1), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
831  [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
833  [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
834  : "memory"
835  );
836  }
837 }
838 
839 static void sbr_hf_apply_noise_3_mips(float (*Y)[2], const float *s_m,
840  const float *q_filt, int noise,
841  int kx, int m_max)
842 {
843  float phi_sign = 1 - 2 * (kx & 1);
844  int m;
845 
846  for (m = 0; m < m_max; m++) {
847 
848  float *Y1=&Y[m][0];
849  float *ff_table;
850  float y0,y1, temp1, temp2, temp4, temp5;
851  int temp0, temp3;
852  const float *s_m1=&s_m[m];
853  const float *q_filt1= &q_filt[m];
854 
855  __asm__ volatile(
856  "lwc1 %[y1], 4(%[Y1]) \n\t"
857  "lwc1 %[temp1], 0(%[s_m1]) \n\t"
858  "addiu %[noise], %[noise], 1 \n\t"
859  "andi %[noise], %[noise], 0x1ff \n\t"
860  "sll %[temp0], %[noise], 3 \n\t"
861  PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0] \n\t"
862  "nmsub.s %[y1], %[y1], %[temp1], %[phi_sign] \n\t"
863  "mfc1 %[temp3], %[temp1] \n\t"
864  "bne %[temp3], $0, 1f \n\t"
865  "lwc1 %[y0], 0(%[Y1]) \n\t"
866  "lwc1 %[temp2], 0(%[q_filt1]) \n\t"
867  "lwc1 %[temp4], 0(%[ff_table]) \n\t"
868  "lwc1 %[temp5], 4(%[ff_table]) \n\t"
869  "madd.s %[y0], %[y0], %[temp2], %[temp4] \n\t"
870  "madd.s %[y1], %[y1], %[temp2], %[temp5] \n\t"
871  "swc1 %[y0], 0(%[Y1]) \n\t"
872  "1: \n\t"
873  "swc1 %[y1], 4(%[Y1]) \n\t"
874 
875  : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
876  [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
877  [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
879  [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1),
880  [phi_sign]"f"(phi_sign)
881  : "memory"
882  );
883  phi_sign = -phi_sign;
884  }
885 }
886 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
887 #endif /* HAVE_MIPSFPU */
888 #endif /* HAVE_INLINE_ASM */
889 
891 {
892 #if HAVE_INLINE_ASM
893 #if HAVE_MIPSFPU
894  s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_mips;
895  s->qmf_post_shuffle = sbr_qmf_post_shuffle_mips;
896 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
897  s->sum64x5 = sbr_sum64x5_mips;
898  s->sum_square = sbr_sum_square_mips;
899  s->qmf_deint_bfly = sbr_qmf_deint_bfly_mips;
900  s->autocorrelate = sbr_autocorrelate_mips;
901  s->hf_gen = sbr_hf_gen_mips;
902  s->hf_g_filt = sbr_hf_g_filt_mips;
903 
904  s->hf_apply_noise[0] = sbr_hf_apply_noise_0_mips;
905  s->hf_apply_noise[1] = sbr_hf_apply_noise_1_mips;
906  s->hf_apply_noise[2] = sbr_hf_apply_noise_2_mips;
907  s->hf_apply_noise[3] = sbr_hf_apply_noise_3_mips;
908 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
909 #endif /* HAVE_MIPSFPU */
910 #endif /* HAVE_INLINE_ASM */
911 }
W
@ W
Definition: vf_addroi.c:26
SBRDSPContext
Definition: sbrdsp.h:28
end
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
asmdefs.h
v0
#define v0
Definition: regdef.h:26
x
FFmpeg Automated Testing Environment ************************************Introduction Using FATE from your FFmpeg source directory Submitting the results to the FFmpeg result aggregation server Uploading new samples to the fate suite FATE makefile targets and variables Makefile targets Makefile variables Examples Introduction **************FATE is an extended regression suite on the client side and a means for results aggregation and presentation on the server side The first part of this document explains how you can use FATE from your FFmpeg source directory to test your ffmpeg binary The second part describes how you can run FATE to submit the results to FFmpeg’s FATE server In any way you can have a look at the publicly viewable FATE results by visiting this as it can be seen if some test on some platform broke with their recent contribution This usually happens on the platforms the developers could not test on The second part of this document describes how you can run FATE to submit your results to FFmpeg’s FATE server If you want to submit your results be sure to check that your combination of OS and compiler is not already listed on the above mentioned website In the third part you can find a comprehensive listing of FATE makefile targets and variables Using FATE from your FFmpeg source directory **********************************************If you want to run FATE on your machine you need to have the samples in place You can get the samples via the build target fate rsync Use this command from the top level source this will cause FATE to fail NOTE To use a custom wrapper to run the pass ‘ target exec’ to ‘configure’ or set the TARGET_EXEC Make variable Submitting the results to the FFmpeg result aggregation server ****************************************************************To submit your results to the server you should run fate through the shell script ‘tests fate sh’ from the FFmpeg sources This script needs to be invoked with a configuration file as its first argument tests fate sh path to fate_config A configuration file template with comments describing the individual configuration variables can be found at ‘doc fate_config sh template’ Create a configuration that suits your based on the configuration template The ‘slot’ configuration variable can be any string that is not yet but it is suggested that you name it adhering to the following pattern ‘ARCH OS COMPILER COMPILER VERSION’ The configuration file itself will be sourced in a shell therefore all shell features may be used This enables you to setup the environment as you need it for your build For your first test runs the ‘fate_recv’ variable should be empty or commented out This will run everything as normal except that it will omit the submission of the results to the server The following files should be present in $workdir as specified in the configuration it may help to try out the ‘ssh’ command with one or more ‘ v’ options You should get detailed output concerning your SSH configuration and the authentication process The only thing left is to automate the execution of the fate sh script and the synchronisation of the samples directory Uploading new samples to the fate suite *****************************************If you need a sample uploaded send a mail to samples request This is for developers who have an account on the fate suite server If you upload new please make sure they are as small as space on each network bandwidth and so on benefit from smaller test cases Also keep in mind older checkouts use existing sample that means in practice generally do not remove or overwrite files as it likely would break older checkouts or releases Also all needed samples for a commit should be ideally before the push If you need an account for frequently uploading samples or you wish to help others by doing that send a mail to ffmpeg devel rsync vauL Duo x
Definition: fate.txt:150
s
#define s(width, name)
Definition: cbs_vp9.c:257
ff_sbr_noise_table
const INTFLOAT ff_sbr_noise_table[][2]
Definition: aacsbrdata.h:271
sbrdsp.h
Y
#define Y
Definition: boxblur.h:38
src0
#define src0
Definition: h264pred.c:138
src1
#define src1
Definition: h264pred.c:139
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:47
noise
static int noise(AVBSFContext *ctx, AVPacket *pkt)
Definition: noise_bsf.c:36
config.h
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:48
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_sbrdsp_init_mips
void ff_sbrdsp_init_mips(SBRDSPContext *s)
Definition: sbrdsp_mips.c:890