OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
141 int source_stride, | 141 int source_stride, |
142 const unsigned char *ref_ptr, | 142 const unsigned char *ref_ptr, |
143 int recon_stride, | 143 int recon_stride, |
144 unsigned int *sse) | 144 unsigned int *sse) |
145 { | 145 { |
146 unsigned int var; | 146 unsigned int var; |
147 int avg; | 147 int avg; |
148 | 148 |
149 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg)
; | 149 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg)
; |
150 *sse = var; | 150 *sse = var; |
151 return (var - ((unsigned int)(avg * avg) >> 4)); | 151 return (var - (((unsigned int)avg * avg) >> 4)); |
152 | 152 |
153 } | 153 } |
154 | 154 |
155 unsigned int vp8_variance8x8_wmt | 155 unsigned int vp8_variance8x8_wmt |
156 ( | 156 ( |
157 const unsigned char *src_ptr, | 157 const unsigned char *src_ptr, |
158 int source_stride, | 158 int source_stride, |
159 const unsigned char *ref_ptr, | 159 const unsigned char *ref_ptr, |
160 int recon_stride, | 160 int recon_stride, |
161 unsigned int *sse) | 161 unsigned int *sse) |
162 { | 162 { |
163 unsigned int var; | 163 unsigned int var; |
164 int avg; | 164 int avg; |
165 | 165 |
166 vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg
) ; | 166 vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg
) ; |
167 *sse = var; | 167 *sse = var; |
168 return (var - ((unsigned int)(avg * avg) >> 6)); | 168 return (var - (((unsigned int)avg * avg) >> 6)); |
169 | 169 |
170 } | 170 } |
171 | 171 |
172 | 172 |
173 unsigned int vp8_variance16x16_wmt | 173 unsigned int vp8_variance16x16_wmt |
174 ( | 174 ( |
175 const unsigned char *src_ptr, | 175 const unsigned char *src_ptr, |
176 int source_stride, | 176 int source_stride, |
177 const unsigned char *ref_ptr, | 177 const unsigned char *ref_ptr, |
178 int recon_stride, | 178 int recon_stride, |
179 unsigned int *sse) | 179 unsigned int *sse) |
180 { | 180 { |
181 unsigned int sse0; | 181 unsigned int sse0; |
182 int sum0; | 182 int sum0; |
183 | 183 |
184 | 184 |
185 vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &
sum0) ; | 185 vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &
sum0) ; |
186 *sse = sse0; | 186 *sse = sse0; |
187 return (sse0 - ((unsigned int)(sum0 * sum0) >> 8)); | 187 return (sse0 - (((unsigned int)sum0 * sum0) >> 8)); |
188 } | 188 } |
189 unsigned int vp8_mse16x16_wmt( | 189 unsigned int vp8_mse16x16_wmt( |
190 const unsigned char *src_ptr, | 190 const unsigned char *src_ptr, |
191 int source_stride, | 191 int source_stride, |
192 const unsigned char *ref_ptr, | 192 const unsigned char *ref_ptr, |
193 int recon_stride, | 193 int recon_stride, |
194 unsigned int *sse) | 194 unsigned int *sse) |
195 { | 195 { |
196 | 196 |
197 unsigned int sse0; | 197 unsigned int sse0; |
(...skipping 15 matching lines...) Expand all Loading... |
213 { | 213 { |
214 unsigned int sse0, sse1, var; | 214 unsigned int sse0, sse1, var; |
215 int sum0, sum1, avg; | 215 int sum0, sum1, avg; |
216 | 216 |
217 vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su
m0) ; | 217 vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su
m0) ; |
218 vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &s
se1, &sum1); | 218 vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &s
se1, &sum1); |
219 | 219 |
220 var = sse0 + sse1; | 220 var = sse0 + sse1; |
221 avg = sum0 + sum1; | 221 avg = sum0 + sum1; |
222 *sse = var; | 222 *sse = var; |
223 return (var - ((unsigned int)(avg * avg) >> 7)); | 223 return (var - (((unsigned int)avg * avg) >> 7)); |
224 | 224 |
225 } | 225 } |
226 | 226 |
227 unsigned int vp8_variance8x16_wmt | 227 unsigned int vp8_variance8x16_wmt |
228 ( | 228 ( |
229 const unsigned char *src_ptr, | 229 const unsigned char *src_ptr, |
230 int source_stride, | 230 int source_stride, |
231 const unsigned char *ref_ptr, | 231 const unsigned char *ref_ptr, |
232 int recon_stride, | 232 int recon_stride, |
233 unsigned int *sse) | 233 unsigned int *sse) |
234 { | 234 { |
235 unsigned int sse0, sse1, var; | 235 unsigned int sse0, sse1, var; |
236 int sum0, sum1, avg; | 236 int sum0, sum1, avg; |
237 | 237 |
238 vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su
m0) ; | 238 vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su
m0) ; |
239 vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 *
recon_stride, recon_stride, &sse1, &sum1) ; | 239 vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 *
recon_stride, recon_stride, &sse1, &sum1) ; |
240 | 240 |
241 var = sse0 + sse1; | 241 var = sse0 + sse1; |
242 avg = sum0 + sum1; | 242 avg = sum0 + sum1; |
243 *sse = var; | 243 *sse = var; |
244 return (var - ((unsigned int)(avg * avg) >> 7)); | 244 return (var - (((unsigned int)avg * avg) >> 7)); |
245 | 245 |
246 } | 246 } |
247 | 247 |
248 unsigned int vp8_sub_pixel_variance4x4_wmt | 248 unsigned int vp8_sub_pixel_variance4x4_wmt |
249 ( | 249 ( |
250 const unsigned char *src_ptr, | 250 const unsigned char *src_ptr, |
251 int src_pixels_per_line, | 251 int src_pixels_per_line, |
252 int xoffset, | 252 int xoffset, |
253 int yoffset, | 253 int yoffset, |
254 const unsigned char *dst_ptr, | 254 const unsigned char *dst_ptr, |
255 int dst_pixels_per_line, | 255 int dst_pixels_per_line, |
256 unsigned int *sse | 256 unsigned int *sse |
257 ) | 257 ) |
258 { | 258 { |
259 int xsum; | 259 int xsum; |
260 unsigned int xxsum; | 260 unsigned int xxsum; |
261 vp8_filter_block2d_bil4x4_var_mmx( | 261 vp8_filter_block2d_bil4x4_var_mmx( |
262 src_ptr, src_pixels_per_line, | 262 src_ptr, src_pixels_per_line, |
263 dst_ptr, dst_pixels_per_line, | 263 dst_ptr, dst_pixels_per_line, |
264 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | 264 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, |
265 &xsum, &xxsum | 265 &xsum, &xxsum |
266 ); | 266 ); |
267 *sse = xxsum; | 267 *sse = xxsum; |
268 return (xxsum - ((unsigned int)(xsum * xsum) >> 4)); | 268 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); |
269 } | 269 } |
270 | 270 |
271 | 271 |
272 unsigned int vp8_sub_pixel_variance8x8_wmt | 272 unsigned int vp8_sub_pixel_variance8x8_wmt |
273 ( | 273 ( |
274 const unsigned char *src_ptr, | 274 const unsigned char *src_ptr, |
275 int src_pixels_per_line, | 275 int src_pixels_per_line, |
276 int xoffset, | 276 int xoffset, |
277 int yoffset, | 277 int yoffset, |
278 const unsigned char *dst_ptr, | 278 const unsigned char *dst_ptr, |
(...skipping 28 matching lines...) Expand all Loading... |
307 else | 307 else |
308 { | 308 { |
309 vp8_filter_block2d_bil_var_sse2( | 309 vp8_filter_block2d_bil_var_sse2( |
310 src_ptr, src_pixels_per_line, | 310 src_ptr, src_pixels_per_line, |
311 dst_ptr, dst_pixels_per_line, 8, | 311 dst_ptr, dst_pixels_per_line, 8, |
312 xoffset, yoffset, | 312 xoffset, yoffset, |
313 &xsum, &xxsum); | 313 &xsum, &xxsum); |
314 } | 314 } |
315 | 315 |
316 *sse = xxsum; | 316 *sse = xxsum; |
317 return (xxsum - ((unsigned int)(xsum * xsum) >> 6)); | 317 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); |
318 } | 318 } |
319 | 319 |
320 unsigned int vp8_sub_pixel_variance16x16_wmt | 320 unsigned int vp8_sub_pixel_variance16x16_wmt |
321 ( | 321 ( |
322 const unsigned char *src_ptr, | 322 const unsigned char *src_ptr, |
323 int src_pixels_per_line, | 323 int src_pixels_per_line, |
324 int xoffset, | 324 int xoffset, |
325 int yoffset, | 325 int yoffset, |
326 const unsigned char *dst_ptr, | 326 const unsigned char *dst_ptr, |
327 int dst_pixels_per_line, | 327 int dst_pixels_per_line, |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
369 src_ptr + 8, src_pixels_per_line, | 369 src_ptr + 8, src_pixels_per_line, |
370 dst_ptr + 8, dst_pixels_per_line, 16, | 370 dst_ptr + 8, dst_pixels_per_line, 16, |
371 xoffset, yoffset, | 371 xoffset, yoffset, |
372 &xsum1, &xxsum1 | 372 &xsum1, &xxsum1 |
373 ); | 373 ); |
374 xsum0 += xsum1; | 374 xsum0 += xsum1; |
375 xxsum0 += xxsum1; | 375 xxsum0 += xxsum1; |
376 } | 376 } |
377 | 377 |
378 *sse = xxsum0; | 378 *sse = xxsum0; |
379 return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); | 379 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
380 } | 380 } |
381 | 381 |
382 unsigned int vp8_sub_pixel_mse16x16_wmt( | 382 unsigned int vp8_sub_pixel_mse16x16_wmt( |
383 const unsigned char *src_ptr, | 383 const unsigned char *src_ptr, |
384 int src_pixels_per_line, | 384 int src_pixels_per_line, |
385 int xoffset, | 385 int xoffset, |
386 int yoffset, | 386 int yoffset, |
387 const unsigned char *dst_ptr, | 387 const unsigned char *dst_ptr, |
388 int dst_pixels_per_line, | 388 int dst_pixels_per_line, |
389 unsigned int *sse | 389 unsigned int *sse |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
440 vp8_filter_block2d_bil_var_sse2( | 440 vp8_filter_block2d_bil_var_sse2( |
441 src_ptr + 8, src_pixels_per_line, | 441 src_ptr + 8, src_pixels_per_line, |
442 dst_ptr + 8, dst_pixels_per_line, 8, | 442 dst_ptr + 8, dst_pixels_per_line, 8, |
443 xoffset, yoffset, | 443 xoffset, yoffset, |
444 &xsum1, &xxsum1); | 444 &xsum1, &xxsum1); |
445 xsum0 += xsum1; | 445 xsum0 += xsum1; |
446 xxsum0 += xxsum1; | 446 xxsum0 += xxsum1; |
447 } | 447 } |
448 | 448 |
449 *sse = xxsum0; | 449 *sse = xxsum0; |
450 return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 7)); | 450 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); |
451 } | 451 } |
452 | 452 |
453 unsigned int vp8_sub_pixel_variance8x16_wmt | 453 unsigned int vp8_sub_pixel_variance8x16_wmt |
454 ( | 454 ( |
455 const unsigned char *src_ptr, | 455 const unsigned char *src_ptr, |
456 int src_pixels_per_line, | 456 int src_pixels_per_line, |
457 int xoffset, | 457 int xoffset, |
458 int yoffset, | 458 int yoffset, |
459 const unsigned char *dst_ptr, | 459 const unsigned char *dst_ptr, |
460 int dst_pixels_per_line, | 460 int dst_pixels_per_line, |
(...skipping 27 matching lines...) Expand all Loading... |
488 else | 488 else |
489 { | 489 { |
490 vp8_filter_block2d_bil_var_sse2( | 490 vp8_filter_block2d_bil_var_sse2( |
491 src_ptr, src_pixels_per_line, | 491 src_ptr, src_pixels_per_line, |
492 dst_ptr, dst_pixels_per_line, 16, | 492 dst_ptr, dst_pixels_per_line, 16, |
493 xoffset, yoffset, | 493 xoffset, yoffset, |
494 &xsum, &xxsum); | 494 &xsum, &xxsum); |
495 } | 495 } |
496 | 496 |
497 *sse = xxsum; | 497 *sse = xxsum; |
498 return (xxsum - ((unsigned int)(xsum * xsum) >> 7)); | 498 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); |
499 } | 499 } |
500 | 500 |
501 | 501 |
502 unsigned int vp8_variance_halfpixvar16x16_h_wmt( | 502 unsigned int vp8_variance_halfpixvar16x16_h_wmt( |
503 const unsigned char *src_ptr, | 503 const unsigned char *src_ptr, |
504 int src_pixels_per_line, | 504 int src_pixels_per_line, |
505 const unsigned char *dst_ptr, | 505 const unsigned char *dst_ptr, |
506 int dst_pixels_per_line, | 506 int dst_pixels_per_line, |
507 unsigned int *sse) | 507 unsigned int *sse) |
508 { | 508 { |
509 int xsum0; | 509 int xsum0; |
510 unsigned int xxsum0; | 510 unsigned int xxsum0; |
511 | 511 |
512 vp8_half_horiz_variance16x_h_sse2( | 512 vp8_half_horiz_variance16x_h_sse2( |
513 src_ptr, src_pixels_per_line, | 513 src_ptr, src_pixels_per_line, |
514 dst_ptr, dst_pixels_per_line, 16, | 514 dst_ptr, dst_pixels_per_line, 16, |
515 &xsum0, &xxsum0); | 515 &xsum0, &xxsum0); |
516 | 516 |
517 *sse = xxsum0; | 517 *sse = xxsum0; |
518 return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); | 518 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
519 } | 519 } |
520 | 520 |
521 | 521 |
522 unsigned int vp8_variance_halfpixvar16x16_v_wmt( | 522 unsigned int vp8_variance_halfpixvar16x16_v_wmt( |
523 const unsigned char *src_ptr, | 523 const unsigned char *src_ptr, |
524 int src_pixels_per_line, | 524 int src_pixels_per_line, |
525 const unsigned char *dst_ptr, | 525 const unsigned char *dst_ptr, |
526 int dst_pixels_per_line, | 526 int dst_pixels_per_line, |
527 unsigned int *sse) | 527 unsigned int *sse) |
528 { | 528 { |
529 int xsum0; | 529 int xsum0; |
530 unsigned int xxsum0; | 530 unsigned int xxsum0; |
531 vp8_half_vert_variance16x_h_sse2( | 531 vp8_half_vert_variance16x_h_sse2( |
532 src_ptr, src_pixels_per_line, | 532 src_ptr, src_pixels_per_line, |
533 dst_ptr, dst_pixels_per_line, 16, | 533 dst_ptr, dst_pixels_per_line, 16, |
534 &xsum0, &xxsum0); | 534 &xsum0, &xxsum0); |
535 | 535 |
536 *sse = xxsum0; | 536 *sse = xxsum0; |
537 return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); | 537 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
538 } | 538 } |
539 | 539 |
540 | 540 |
541 unsigned int vp8_variance_halfpixvar16x16_hv_wmt( | 541 unsigned int vp8_variance_halfpixvar16x16_hv_wmt( |
542 const unsigned char *src_ptr, | 542 const unsigned char *src_ptr, |
543 int src_pixels_per_line, | 543 int src_pixels_per_line, |
544 const unsigned char *dst_ptr, | 544 const unsigned char *dst_ptr, |
545 int dst_pixels_per_line, | 545 int dst_pixels_per_line, |
546 unsigned int *sse) | 546 unsigned int *sse) |
547 { | 547 { |
548 int xsum0; | 548 int xsum0; |
549 unsigned int xxsum0; | 549 unsigned int xxsum0; |
550 | 550 |
551 vp8_half_horiz_vert_variance16x_h_sse2( | 551 vp8_half_horiz_vert_variance16x_h_sse2( |
552 src_ptr, src_pixels_per_line, | 552 src_ptr, src_pixels_per_line, |
553 dst_ptr, dst_pixels_per_line, 16, | 553 dst_ptr, dst_pixels_per_line, 16, |
554 &xsum0, &xxsum0); | 554 &xsum0, &xxsum0); |
555 | 555 |
556 *sse = xxsum0; | 556 *sse = xxsum0; |
557 return (xxsum0 - ((unsigned int)(xsum0 * xsum0) >> 8)); | 557 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
558 } | 558 } |
OLD | NEW |