FFmpeg  4.2.3
vp9_mc_mmi.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 gxw <guxiwei-hf@loongson.cn>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavcodec/vp9dsp.h"
23 #include "vp9dsp_mips.h"
24 
25 #define GET_DATA_H_MMI \
26  "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \
27  "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \
28  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
29  "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \
30  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
31  "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \
32  "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \
33  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
34  "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \
35  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
36  "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \
37  "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \
38  "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \
39  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
40  "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \
41  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
42  "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \
43  "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \
44  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
45  "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \
46  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
47  "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t"
48 
49 #define GET_DATA_V_MMI \
50  "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \
51  "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \
52  "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
53  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
54  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
55  "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
56  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
57  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
58  "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
59  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
60  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
61  "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \
62  "pmaddhw %[srch], %[srch], %[filter10] \n\t" \
63  "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
64  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
65  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
66  "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
67  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
68  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
69  "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
70  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
71  "paddw %[srch], %[srch], %[ftmp12] \n\t"
72 
73 static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride,
74  uint8_t *dst, int32_t dst_stride,
75  const uint16_t *filter_x, int32_t w,
76  int32_t h)
77 {
78  double ftmp[15];
79  uint32_t tmp[2];
80  src -= 3;
81  src_stride -= w;
82  dst_stride -= w;
83  __asm__ volatile (
84  "move %[tmp1], %[width] \n\t"
85  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
86  "gsldlc1 %[filter1], 0x03(%[filter]) \n\t"
87  "gsldrc1 %[filter1], 0x00(%[filter]) \n\t"
88  "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t"
89  "gsldrc1 %[filter2], 0x08(%[filter]) \n\t"
90  "li %[tmp0], 0x07 \n\t"
91  "dmtc1 %[tmp0], %[ftmp13] \n\t"
92  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
93  "1: \n\t"
94  /* Get 8 data per row */
95  "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
96  "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
97  "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t"
98  "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t"
99  "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t"
100  "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t"
101  "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t"
102  "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t"
103  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
104  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
105  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
106  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
107  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
108  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
109  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
110  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
111  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
112  /* Get raw data */
114  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
115  %[ftmp6], %[tmp0])
116  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
117  %[ftmp6], %[tmp0])
118  "packsswh %[srcl], %[srcl], %[srch] \n\t"
119  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
120  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
121  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
122  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
123  /* Loop count */
124  "bnez %[width], 1b \n\t"
125  "move %[width], %[tmp1] \n\t"
126  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
127  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
128  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
129  "bnez %[height], 1b \n\t"
130  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
131  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
132  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
133  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
134  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
135  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
136  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
137  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
138  [src]"+&r"(src), [width]"+&r"(w),
139  [dst]"+&r"(dst), [height]"+&r"(h),
140  [ftmp13]"=&f"(ftmp[14])
141  : [filter]"r"(filter_x),
142  [src_stride]"r"((mips_reg)src_stride),
143  [dst_stride]"r"((mips_reg)dst_stride)
144  : "memory"
145  );
146 }
147 
148 static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
149  uint8_t *dst, int32_t dst_stride,
150  const int16_t *filter_y, int32_t w,
151  int32_t h)
152 {
153  double ftmp[17];
154  uint32_t tmp[1];
155  ptrdiff_t addr = src_stride;
156  src_stride -= w;
157  dst_stride -= w;
158 
159  __asm__ volatile (
160  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
161  "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t"
162  "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t"
163  "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t"
164  "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t"
165  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
166  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
167  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
168  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
169  "li %[tmp0], 0x07 \n\t"
170  "dmtc1 %[tmp0], %[ftmp13] \n\t"
171  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
172  "1: \n\t"
173  /* Get 8 data per column */
174  "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t"
175  "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t"
176  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
177  "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t"
178  "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t"
179  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
180  "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t"
181  "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t"
182  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
183  "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t"
184  "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t"
185  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
186  "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t"
187  "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t"
188  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
189  "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t"
190  "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t"
191  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
192  "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t"
193  "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t"
194  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
195  "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t"
196  "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t"
197  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
198  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
199  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
200  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
201  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
202  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
203  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
204  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
205  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
206  /* Get raw data */
208  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
209  %[ftmp6], %[tmp0])
210  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
211  %[ftmp6], %[tmp0])
212  "packsswh %[srcl], %[srcl], %[srch] \n\t"
213  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
214  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
215  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
216  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
217  /* Loop count */
218  "bnez %[width], 1b \n\t"
219  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
220  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
221  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
222  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
223  "bnez %[height], 1b \n\t"
224  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
225  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
226  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
227  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
228  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
229  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
230  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
231  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
232  [src]"+&r"(src), [dst]"+&r"(dst),
233  [width]"+&r"(w), [height]"+&r"(h),
234  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
235  : [filter]"r"(filter_y),
236  [src_stride]"r"((mips_reg)src_stride),
237  [dst_stride]"r"((mips_reg)dst_stride),
238  [addr]"r"((mips_reg)addr)
239  : "memory"
240  );
241 }
242 
243 static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
244  uint8_t *dst, int32_t dst_stride,
245  const uint16_t *filter_x, int32_t w,
246  int32_t h)
247 {
248  double ftmp[15];
249  uint32_t tmp[2];
250  src -= 3;
251  src_stride -= w;
252  dst_stride -= w;
253 
254  __asm__ volatile (
255  "move %[tmp1], %[width] \n\t"
256  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
257  "gsldlc1 %[filter1], 0x03(%[filter]) \n\t"
258  "gsldrc1 %[filter1], 0x00(%[filter]) \n\t"
259  "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t"
260  "gsldrc1 %[filter2], 0x08(%[filter]) \n\t"
261  "li %[tmp0], 0x07 \n\t"
262  "dmtc1 %[tmp0], %[ftmp13] \n\t"
263  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
264  "1: \n\t"
265  /* Get 8 data per row */
266  "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
267  "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
268  "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t"
269  "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t"
270  "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t"
271  "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t"
272  "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t"
273  "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t"
274  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
275  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
276  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
277  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
278  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
279  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
280  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
281  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
282  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
283  /* Get raw data */
285  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
286  %[ftmp6], %[tmp0])
287  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
288  %[ftmp6], %[tmp0])
289  "packsswh %[srcl], %[srcl], %[srch] \n\t"
290  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
291  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
292  "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
293  "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
294  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
295  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
296  "li %[tmp0], 0x10001 \n\t"
297  "dmtc1 %[tmp0], %[ftmp5] \n\t"
298  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
299  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
300  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
301  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
302  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
303  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
304  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
305  /* Loop count */
306  "bnez %[width], 1b \n\t"
307  "move %[width], %[tmp1] \n\t"
308  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
309  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
310  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
311  "bnez %[height], 1b \n\t"
312  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
313  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
314  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
315  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
316  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
317  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
318  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
319  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
320  [src]"+&r"(src), [width]"+&r"(w),
321  [dst]"+&r"(dst), [height]"+&r"(h),
322  [ftmp13]"=&f"(ftmp[14])
323  : [filter]"r"(filter_x),
324  [src_stride]"r"((mips_reg)src_stride),
325  [dst_stride]"r"((mips_reg)dst_stride)
326  : "memory"
327  );
328 }
329 
330 static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
331  uint8_t *dst, int32_t dst_stride,
332  const int16_t *filter_y, int32_t w,
333  int32_t h)
334 {
335  double ftmp[17];
336  uint32_t tmp[1];
337  ptrdiff_t addr = src_stride;
338  src_stride -= w;
339  dst_stride -= w;
340 
341  __asm__ volatile (
342  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
343  "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t"
344  "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t"
345  "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t"
346  "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t"
347  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
348  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
349  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
350  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
351  "li %[tmp0], 0x07 \n\t"
352  "dmtc1 %[tmp0], %[ftmp13] \n\t"
353  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
354  "1: \n\t"
355  /* Get 8 data per column */
356  "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t"
357  "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t"
358  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
359  "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t"
360  "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t"
361  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
362  "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t"
363  "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t"
364  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
365  "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t"
366  "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t"
367  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
368  "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t"
369  "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t"
370  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
371  "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t"
372  "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t"
373  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
374  "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t"
375  "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t"
376  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
377  "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t"
378  "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t"
379  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
380  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
381  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
382  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
383  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
384  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
385  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
386  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
387  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
388  /* Get raw data */
390  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
391  %[ftmp6], %[tmp0])
392  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
393  %[ftmp6], %[tmp0])
394  "packsswh %[srcl], %[srcl], %[srch] \n\t"
395  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
396  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
397  "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
398  "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
399  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
400  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
401  "li %[tmp0], 0x10001 \n\t"
402  "dmtc1 %[tmp0], %[ftmp5] \n\t"
403  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
404  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
405  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
406  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
407  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
408  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
409  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
410  /* Loop count */
411  "bnez %[width], 1b \n\t"
412  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
413  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
414  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
415  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
416  "bnez %[height], 1b \n\t"
417  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
418  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
419  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
420  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
421  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
422  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
423  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
424  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
425  [src]"+&r"(src), [dst]"+&r"(dst),
426  [width]"+&r"(w), [height]"+&r"(h),
427  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
428  : [filter]"r"(filter_y),
429  [src_stride]"r"((mips_reg)src_stride),
430  [dst_stride]"r"((mips_reg)dst_stride),
431  [addr]"r"((mips_reg)addr)
432  : "memory"
433  );
434 }
435 
436 static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
437  uint8_t *dst, int32_t dst_stride,
438  int32_t w, int32_t h)
439 {
440  double ftmp[4];
441  uint32_t tmp[2];
442  src_stride -= w;
443  dst_stride -= w;
444 
445  __asm__ volatile (
446  "move %[tmp1], %[width] \n\t"
447  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
448  "li %[tmp0], 0x10001 \n\t"
449  "dmtc1 %[tmp0], %[ftmp3] \n\t"
450  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
451  "1: \n\t"
452  "gslwlc1 %[ftmp1], 0x07(%[src]) \n\t"
453  "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t"
454  "gslwlc1 %[ftmp2], 0x07(%[dst]) \n\t"
455  "gslwrc1 %[ftmp2], 0x00(%[dst]) \n\t"
456  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
457  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
458  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
459  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
460  "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
461  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
462  "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
463  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
464  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
465  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
466  "bnez %[width], 1b \n\t"
467  "move %[width], %[tmp1] \n\t"
468  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
469  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
470  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
471  "bnez %[height], 1b \n\t"
472  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
473  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
474  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
475  [src]"+&r"(src), [dst]"+&r"(dst),
476  [width]"+&r"(w), [height]"+&r"(h)
477  : [src_stride]"r"((mips_reg)src_stride),
478  [dst_stride]"r"((mips_reg)dst_stride)
479  : "memory"
480  );
481 }
482 
483 static const int16_t vp9_subpel_filters_mmi[3][15][8] = {
484  [FILTER_8TAP_REGULAR] = {
485  {0, 1, -5, 126, 8, -3, 1, 0},
486  {-1, 3, -10, 122, 18, -6, 2, 0},
487  {-1, 4, -13, 118, 27, -9, 3, -1},
488  {-1, 4, -16, 112, 37, -11, 4, -1},
489  {-1, 5, -18, 105, 48, -14, 4, -1},
490  {-1, 5, -19, 97, 58, -16, 5, -1},
491  {-1, 6, -19, 88, 68, -18, 5, -1},
492  {-1, 6, -19, 78, 78, -19, 6, -1},
493  {-1, 5, -18, 68, 88, -19, 6, -1},
494  {-1, 5, -16, 58, 97, -19, 5, -1},
495  {-1, 4, -14, 48, 105, -18, 5, -1},
496  {-1, 4, -11, 37, 112, -16, 4, -1},
497  {-1, 3, -9, 27, 118, -13, 4, -1},
498  {0, 2, -6, 18, 122, -10, 3, -1},
499  {0, 1, -3, 8, 126, -5, 1, 0},
500  }, [FILTER_8TAP_SHARP] = {
501  {-1, 3, -7, 127, 8, -3, 1, 0},
502  {-2, 5, -13, 125, 17, -6, 3, -1},
503  {-3, 7, -17, 121, 27, -10, 5, -2},
504  {-4, 9, -20, 115, 37, -13, 6, -2},
505  {-4, 10, -23, 108, 48, -16, 8, -3},
506  {-4, 10, -24, 100, 59, -19, 9, -3},
507  {-4, 11, -24, 90, 70, -21, 10, -4},
508  {-4, 11, -23, 80, 80, -23, 11, -4},
509  {-4, 10, -21, 70, 90, -24, 11, -4},
510  {-3, 9, -19, 59, 100, -24, 10, -4},
511  {-3, 8, -16, 48, 108, -23, 10, -4},
512  {-2, 6, -13, 37, 115, -20, 9, -4},
513  {-2, 5, -10, 27, 121, -17, 7, -3},
514  {-1, 3, -6, 17, 125, -13, 5, -2},
515  {0, 1, -3, 8, 127, -7, 3, -1},
516  }, [FILTER_8TAP_SMOOTH] = {
517  {-3, -1, 32, 64, 38, 1, -3, 0},
518  {-2, -2, 29, 63, 41, 2, -3, 0},
519  {-2, -2, 26, 63, 43, 4, -4, 0},
520  {-2, -3, 24, 62, 46, 5, -4, 0},
521  {-2, -3, 21, 60, 49, 7, -4, 0},
522  {-1, -4, 18, 59, 51, 9, -4, 0},
523  {-1, -4, 16, 57, 53, 12, -4, -1},
524  {-1, -4, 14, 55, 55, 14, -4, -1},
525  {-1, -4, 12, 53, 57, 16, -4, -1},
526  {0, -4, 9, 51, 59, 18, -4, -1},
527  {0, -4, 7, 49, 60, 21, -3, -2},
528  {0, -4, 5, 46, 62, 24, -3, -2},
529  {0, -4, 4, 43, 63, 26, -2, -2},
530  {0, -3, 2, 41, 63, 29, -2, -2},
531  {0, -3, 1, 38, 64, 32, -1, -3},
532  }
533 };
534 
535 #define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX) \
536 void ff_put_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
537  const uint8_t *src, \
538  ptrdiff_t srcstride, \
539  int h, int mx, int my) \
540 { \
541  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
542  \
543  convolve_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
544 } \
545  \
546 void ff_put_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
547  const uint8_t *src, \
548  ptrdiff_t srcstride, \
549  int h, int mx, int my) \
550 { \
551  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
552  \
553  src -= (3 * srcstride); \
554  convolve_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
555 } \
556  \
557 void ff_put_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
558  const uint8_t *src, \
559  ptrdiff_t srcstride, \
560  int h, int mx, int my) \
561 { \
562  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
563  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
564  \
565  int tmp_h = h + 7; \
566  uint8_t temp[64 * 71]; \
567  src -= (3 * srcstride); \
568  convolve_horiz_mmi(src, srcstride, temp, 64, hfilter, SIZE, tmp_h); \
569  convolve_vert_mmi(temp, 64, dst, dststride, vfilter, SIZE, h); \
570 } \
571  \
572 void ff_avg_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
573  const uint8_t *src, \
574  ptrdiff_t srcstride, \
575  int h, int mx, int my) \
576 { \
577  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
578  \
579  convolve_avg_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
580 } \
581  \
582 void ff_avg_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
583  const uint8_t *src, \
584  ptrdiff_t srcstride, \
585  int h, int mx, int my) \
586 { \
587  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
588  \
589  src -= (3 * srcstride); \
590  convolve_avg_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
591 } \
592  \
593 void ff_avg_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
594  const uint8_t *src, \
595  ptrdiff_t srcstride, \
596  int h, int mx, int my) \
597 { \
598  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
599  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
600  \
601  uint8_t temp1[64 * 64]; \
602  uint8_t temp2[64 * 71]; \
603  int tmp_h = h + 7; \
604  src -= (3 * srcstride); \
605  convolve_horiz_mmi(src, srcstride, temp2, 64, hfilter, SIZE, tmp_h); \
606  convolve_vert_mmi(temp2, 64, temp1, 64, vfilter, SIZE, h); \
607  convolve_avg_mmi(temp1, 64, dst, dststride, SIZE, h); \
608 }
609 
615 
621 
627 
628 #undef VP9_8TAP_MIPS_MMI_FUNC
static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:243
#define mips_reg
Definition: asmdefs.h:44
#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0)
brief: (((value) + (1 << ((n) - 1))) >> (n)) fr_i0: src & dst fr_i1: Operand number fr_t0...
Definition: mmiutils.h:355
#define src
Definition: vp8dsp.c:254
static void filter(int16_t *output, ptrdiff_t out_stride, int16_t *low, ptrdiff_t low_stride, int16_t *high, ptrdiff_t high_stride, int len, int clip)
Definition: cfhd.c:153
uint8_t
static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:330
#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX)
Definition: vp9_mc_mmi.c:535
#define height
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
Definition: dcadsp.c:358
static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:148
static const int16_t vp9_subpel_filters_mmi[3][15][8]
Definition: vp9_mc_mmi.c:483
#define PTR_SUBU
Definition: asmdefs.h:50
#define width
uint8_t w
Definition: llviddspenc.c:38
#define GET_DATA_V_MMI
Definition: vp9_mc_mmi.c:49
int32_t
#define PTR_ADDIU
Definition: asmdefs.h:48
#define GET_DATA_H_MMI
Definition: vp9_mc_mmi.c:25
static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:73
#define PTR_ADDU
Definition: asmdefs.h:47
static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:436
static uint8_t tmp[11]
Definition: aes_ctr.c:26