OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
84 int source_stride, | 84 int source_stride, |
85 const unsigned char *ref_ptr, | 85 const unsigned char *ref_ptr, |
86 int recon_stride, | 86 int recon_stride, |
87 unsigned int *sse) | 87 unsigned int *sse) |
88 { | 88 { |
89 unsigned int var; | 89 unsigned int var; |
90 int avg; | 90 int avg; |
91 | 91 |
92 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg)
; | 92 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg)
; |
93 *sse = var; | 93 *sse = var; |
94 return (var - ((unsigned int)(avg * avg) >> 4)); | 94 return (var - (((unsigned int)avg * avg) >> 4)); |
95 | 95 |
96 } | 96 } |
97 | 97 |
98 unsigned int vp8_variance8x8_mmx( | 98 unsigned int vp8_variance8x8_mmx( |
99 const unsigned char *src_ptr, | 99 const unsigned char *src_ptr, |
100 int source_stride, | 100 int source_stride, |
101 const unsigned char *ref_ptr, | 101 const unsigned char *ref_ptr, |
102 int recon_stride, | 102 int recon_stride, |
103 unsigned int *sse) | 103 unsigned int *sse) |
104 { | 104 { |
105 unsigned int var; | 105 unsigned int var; |
106 int avg; | 106 int avg; |
107 | 107 |
108 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg)
; | 108 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg)
; |
109 *sse = var; | 109 *sse = var; |
110 | 110 |
111 return (var - ((unsigned int)(avg * avg) >> 6)); | 111 return (var - (((unsigned int)avg * avg) >> 6)); |
112 | 112 |
113 } | 113 } |
114 | 114 |
115 unsigned int vp8_mse16x16_mmx( | 115 unsigned int vp8_mse16x16_mmx( |
116 const unsigned char *src_ptr, | 116 const unsigned char *src_ptr, |
117 int source_stride, | 117 int source_stride, |
118 const unsigned char *ref_ptr, | 118 const unsigned char *ref_ptr, |
119 int recon_stride, | 119 int recon_stride, |
120 unsigned int *sse) | 120 unsigned int *sse) |
121 { | 121 { |
(...skipping 24 matching lines...) Expand all Loading... |
146 | 146 |
147 | 147 |
148 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum
0) ; | 148 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum
0) ; |
149 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &ss
e1, &sum1); | 149 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &ss
e1, &sum1); |
150 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 *
recon_stride, recon_stride, &sse2, &sum2) ; | 150 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 *
recon_stride, recon_stride, &sse2, &sum2) ; |
151 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr +
8 * recon_stride + 8, recon_stride, &sse3, &sum3); | 151 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr +
8 * recon_stride + 8, recon_stride, &sse3, &sum3); |
152 | 152 |
153 var = sse0 + sse1 + sse2 + sse3; | 153 var = sse0 + sse1 + sse2 + sse3; |
154 avg = sum0 + sum1 + sum2 + sum3; | 154 avg = sum0 + sum1 + sum2 + sum3; |
155 *sse = var; | 155 *sse = var; |
156 return (var - ((unsigned int)(avg * avg) >> 8)); | 156 return (var - (((unsigned int)avg * avg) >> 8)); |
157 } | 157 } |
158 | 158 |
159 unsigned int vp8_variance16x8_mmx( | 159 unsigned int vp8_variance16x8_mmx( |
160 const unsigned char *src_ptr, | 160 const unsigned char *src_ptr, |
161 int source_stride, | 161 int source_stride, |
162 const unsigned char *ref_ptr, | 162 const unsigned char *ref_ptr, |
163 int recon_stride, | 163 int recon_stride, |
164 unsigned int *sse) | 164 unsigned int *sse) |
165 { | 165 { |
166 unsigned int sse0, sse1, var; | 166 unsigned int sse0, sse1, var; |
167 int sum0, sum1, avg; | 167 int sum0, sum1, avg; |
168 | 168 |
169 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum
0) ; | 169 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum
0) ; |
170 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &ss
e1, &sum1); | 170 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &ss
e1, &sum1); |
171 | 171 |
172 var = sse0 + sse1; | 172 var = sse0 + sse1; |
173 avg = sum0 + sum1; | 173 avg = sum0 + sum1; |
174 *sse = var; | 174 *sse = var; |
175 return (var - ((unsigned int)(avg * avg) >> 7)); | 175 return (var - (((unsigned int)avg * avg) >> 7)); |
176 | 176 |
177 } | 177 } |
178 | 178 |
179 | 179 |
180 unsigned int vp8_variance8x16_mmx( | 180 unsigned int vp8_variance8x16_mmx( |
181 const unsigned char *src_ptr, | 181 const unsigned char *src_ptr, |
182 int source_stride, | 182 int source_stride, |
183 const unsigned char *ref_ptr, | 183 const unsigned char *ref_ptr, |
184 int recon_stride, | 184 int recon_stride, |
185 unsigned int *sse) | 185 unsigned int *sse) |
186 { | 186 { |
187 unsigned int sse0, sse1, var; | 187 unsigned int sse0, sse1, var; |
188 int sum0, sum1, avg; | 188 int sum0, sum1, avg; |
189 | 189 |
190 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum
0) ; | 190 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum
0) ; |
191 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 *
recon_stride, recon_stride, &sse1, &sum1) ; | 191 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 *
recon_stride, recon_stride, &sse1, &sum1) ; |
192 | 192 |
193 var = sse0 + sse1; | 193 var = sse0 + sse1; |
194 avg = sum0 + sum1; | 194 avg = sum0 + sum1; |
195 *sse = var; | 195 *sse = var; |
196 | 196 |
197 return (var - ((unsigned int)(avg * avg) >> 7)); | 197 return (var - (((unsigned int)avg * avg) >> 7)); |
198 | 198 |
199 } | 199 } |
200 | 200 |
201 | 201 |
202 unsigned int vp8_sub_pixel_variance4x4_mmx | 202 unsigned int vp8_sub_pixel_variance4x4_mmx |
203 ( | 203 ( |
204 const unsigned char *src_ptr, | 204 const unsigned char *src_ptr, |
205 int src_pixels_per_line, | 205 int src_pixels_per_line, |
206 int xoffset, | 206 int xoffset, |
207 int yoffset, | 207 int yoffset, |
208 const unsigned char *dst_ptr, | 208 const unsigned char *dst_ptr, |
209 int dst_pixels_per_line, | 209 int dst_pixels_per_line, |
210 unsigned int *sse) | 210 unsigned int *sse) |
211 | 211 |
212 { | 212 { |
213 int xsum; | 213 int xsum; |
214 unsigned int xxsum; | 214 unsigned int xxsum; |
215 vp8_filter_block2d_bil4x4_var_mmx( | 215 vp8_filter_block2d_bil4x4_var_mmx( |
216 src_ptr, src_pixels_per_line, | 216 src_ptr, src_pixels_per_line, |
217 dst_ptr, dst_pixels_per_line, | 217 dst_ptr, dst_pixels_per_line, |
218 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | 218 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, |
219 &xsum, &xxsum | 219 &xsum, &xxsum |
220 ); | 220 ); |
221 *sse = xxsum; | 221 *sse = xxsum; |
222 return (xxsum - ((unsigned int)(xsum * xsum) >> 4)); | 222 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); |
223 } | 223 } |
224 | 224 |
225 | 225 |
226 unsigned int vp8_sub_pixel_variance8x8_mmx | 226 unsigned int vp8_sub_pixel_variance8x8_mmx |
227 ( | 227 ( |
228 const unsigned char *src_ptr, | 228 const unsigned char *src_ptr, |
229 int src_pixels_per_line, | 229 int src_pixels_per_line, |
230 int xoffset, | 230 int xoffset, |
231 int yoffset, | 231 int yoffset, |
232 const unsigned char *dst_ptr, | 232 const unsigned char *dst_ptr, |
233 int dst_pixels_per_line, | 233 int dst_pixels_per_line, |
234 unsigned int *sse | 234 unsigned int *sse |
235 ) | 235 ) |
236 { | 236 { |
237 | 237 |
238 int xsum; | 238 int xsum; |
239 unsigned int xxsum; | 239 unsigned int xxsum; |
240 vp8_filter_block2d_bil_var_mmx( | 240 vp8_filter_block2d_bil_var_mmx( |
241 src_ptr, src_pixels_per_line, | 241 src_ptr, src_pixels_per_line, |
242 dst_ptr, dst_pixels_per_line, 8, | 242 dst_ptr, dst_pixels_per_line, 8, |
243 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | 243 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, |
244 &xsum, &xxsum | 244 &xsum, &xxsum |
245 ); | 245 ); |
246 *sse = xxsum; | 246 *sse = xxsum; |
247 return (xxsum - ((unsigned int)(xsum * xsum) >> 6)); | 247 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); |
248 } | 248 } |
249 | 249 |
250 unsigned int vp8_sub_pixel_variance16x16_mmx | 250 unsigned int vp8_sub_pixel_variance16x16_mmx |
251 ( | 251 ( |
252 const unsigned char *src_ptr, | 252 const unsigned char *src_ptr, |
253 int src_pixels_per_line, | 253 int src_pixels_per_line, |
254 int xoffset, | 254 int xoffset, |
255 int yoffset, | 255 int yoffset, |
256 const unsigned char *dst_ptr, | 256 const unsigned char *dst_ptr, |
257 int dst_pixels_per_line, | 257 int dst_pixels_per_line, |
(...skipping 17 matching lines...) Expand all Loading... |
275 src_ptr + 8, src_pixels_per_line, | 275 src_ptr + 8, src_pixels_per_line, |
276 dst_ptr + 8, dst_pixels_per_line, 16, | 276 dst_ptr + 8, dst_pixels_per_line, 16, |
277 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | 277 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, |
278 &xsum1, &xxsum1 | 278 &xsum1, &xxsum1 |
279 ); | 279 ); |
280 | 280 |
281 xsum0 += xsum1; | 281 xsum0 += xsum1; |
282 xxsum0 += xxsum1; | 282 xxsum0 += xxsum1; |
283 | 283 |
284 *sse = xxsum0; | 284 *sse = xxsum0; |
285 return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); | 285 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
286 | 286 |
287 | 287 |
288 } | 288 } |
289 | 289 |
290 unsigned int vp8_sub_pixel_mse16x16_mmx( | 290 unsigned int vp8_sub_pixel_mse16x16_mmx( |
291 const unsigned char *src_ptr, | 291 const unsigned char *src_ptr, |
292 int src_pixels_per_line, | 292 int src_pixels_per_line, |
293 int xoffset, | 293 int xoffset, |
294 int yoffset, | 294 int yoffset, |
295 const unsigned char *dst_ptr, | 295 const unsigned char *dst_ptr, |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
328 src_ptr + 8, src_pixels_per_line, | 328 src_ptr + 8, src_pixels_per_line, |
329 dst_ptr + 8, dst_pixels_per_line, 8, | 329 dst_ptr + 8, dst_pixels_per_line, 8, |
330 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | 330 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, |
331 &xsum1, &xxsum1 | 331 &xsum1, &xxsum1 |
332 ); | 332 ); |
333 | 333 |
334 xsum0 += xsum1; | 334 xsum0 += xsum1; |
335 xxsum0 += xxsum1; | 335 xxsum0 += xxsum1; |
336 | 336 |
337 *sse = xxsum0; | 337 *sse = xxsum0; |
338 return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7)); | 338 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); |
339 } | 339 } |
340 | 340 |
341 unsigned int vp8_sub_pixel_variance8x16_mmx | 341 unsigned int vp8_sub_pixel_variance8x16_mmx |
342 ( | 342 ( |
343 const unsigned char *src_ptr, | 343 const unsigned char *src_ptr, |
344 int src_pixels_per_line, | 344 int src_pixels_per_line, |
345 int xoffset, | 345 int xoffset, |
346 int yoffset, | 346 int yoffset, |
347 const unsigned char *dst_ptr, | 347 const unsigned char *dst_ptr, |
348 int dst_pixels_per_line, | 348 int dst_pixels_per_line, |
349 unsigned int *sse | 349 unsigned int *sse |
350 ) | 350 ) |
351 { | 351 { |
352 int xsum; | 352 int xsum; |
353 unsigned int xxsum; | 353 unsigned int xxsum; |
354 vp8_filter_block2d_bil_var_mmx( | 354 vp8_filter_block2d_bil_var_mmx( |
355 src_ptr, src_pixels_per_line, | 355 src_ptr, src_pixels_per_line, |
356 dst_ptr, dst_pixels_per_line, 16, | 356 dst_ptr, dst_pixels_per_line, 16, |
357 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | 357 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, |
358 &xsum, &xxsum | 358 &xsum, &xxsum |
359 ); | 359 ); |
360 *sse = xxsum; | 360 *sse = xxsum; |
361 return (xxsum - ((unsigned int)(xsum * xsum) >> 7)); | 361 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); |
362 } | 362 } |
363 | 363 |
364 | 364 |
365 unsigned int vp8_variance_halfpixvar16x16_h_mmx( | 365 unsigned int vp8_variance_halfpixvar16x16_h_mmx( |
366 const unsigned char *src_ptr, | 366 const unsigned char *src_ptr, |
367 int source_stride, | 367 int source_stride, |
368 const unsigned char *ref_ptr, | 368 const unsigned char *ref_ptr, |
369 int recon_stride, | 369 int recon_stride, |
370 unsigned int *sse) | 370 unsigned int *sse) |
371 { | 371 { |
(...skipping 17 matching lines...) Expand all Loading... |
389 unsigned int vp8_variance_halfpixvar16x16_hv_mmx( | 389 unsigned int vp8_variance_halfpixvar16x16_hv_mmx( |
390 const unsigned char *src_ptr, | 390 const unsigned char *src_ptr, |
391 int source_stride, | 391 int source_stride, |
392 const unsigned char *ref_ptr, | 392 const unsigned char *ref_ptr, |
393 int recon_stride, | 393 int recon_stride, |
394 unsigned int *sse) | 394 unsigned int *sse) |
395 { | 395 { |
396 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, | 396 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, |
397 ref_ptr, recon_stride, sse); | 397 ref_ptr, recon_stride, sse); |
398 } | 398 } |
OLD | NEW |