Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: third_party/libwebp/dsp/enc_neon.c

Issue 16871017: libwebp-0.3.1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: 0.3.1 final -> no changes since rc2 Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/libwebp/dsp/enc.c ('k') | third_party/libwebp/dsp/enc_sse2.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 Google Inc. All Rights Reserved. 1 // Copyright 2012 Google Inc. All Rights Reserved.
2 // 2 //
3 // This code is licensed under the same terms as WebM: 3 // Use of this source code is governed by a BSD-style license
4 // Software License Agreement: http://www.webmproject.org/license/software/ 4 // that can be found in the COPYING file in the root of the source
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
6 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
7 // 9 //
8 // ARM NEON version of speed-critical encoding functions. 10 // ARM NEON version of speed-critical encoding functions.
9 // 11 //
10 // adapted from libvpx (http://www.webmproject.org/code/) 12 // adapted from libvpx (http://www.webmproject.org/code/)
11 13
12 #include "./dsp.h" 14 #include "./dsp.h"
13 15
14 #if defined(__cplusplus) || defined(c_plusplus) 16 #if defined(__cplusplus) || defined(c_plusplus)
15 extern "C" { 17 extern "C" {
(...skipping 299 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 "vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000 317 "vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000
316 318
317 "vceq.s16 d4, d7, #0 \n" 319 "vceq.s16 d4, d7, #0 \n"
318 320
319 "vshr.s16 d0, d0, #4 \n" 321 "vshr.s16 d0, d0, #4 \n"
320 "vshr.s16 d2, d2, #4 \n" 322 "vshr.s16 d2, d2, #4 \n"
321 323
322 "vmlal.s16 q11, d6, d17 \n" // c1*2217 + d1*5352 + 12000 324 "vmlal.s16 q11, d6, d17 \n" // c1*2217 + d1*5352 + 12000
323 "vmlsl.s16 q12, d6, d16 \n" // d1*2217 - c1*5352 + 51000 325 "vmlsl.s16 q12, d6, d16 \n" // d1*2217 - c1*5352 + 51000
324 326
325 "vmvn.s16 d4, d4 \n" 327 "vmvn d4, d4 \n" // !(d1 == 0)
326 // op[4] = (c1*2217 + d1*5352 + 12000)>>16 328 // op[4] = (c1*2217 + d1*5352 + 12000)>>16
327 "vshrn.s32 d1, q11, #16 \n" 329 "vshrn.s32 d1, q11, #16 \n"
328 // op[4] += (d1!=0) 330 // op[4] += (d1!=0)
329 "vsub.s16 d1, d1, d4 \n" 331 "vsub.s16 d1, d1, d4 \n"
330 // op[12]= (d1*2217 - c1*5352 + 51000)>>16 332 // op[12]= (d1*2217 - c1*5352 + 51000)>>16
331 "vshrn.s32 d3, q12, #16 \n" 333 "vshrn.s32 d3, q12, #16 \n"
332 334
333 // set result to out array 335 // set result to out array
334 "vst1.16 {q0, q1}, [%[out]] \n" 336 "vst1.16 {q0, q1}, [%[out]] \n"
335 : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr), 337 : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
(...skipping 20 matching lines...) Expand all
356 "vld1.16 d3[1], [%[in]], %[kStep] \n" 358 "vld1.16 d3[1], [%[in]], %[kStep] \n"
357 "vld1.16 d0[2], [%[in]], %[kStep] \n" 359 "vld1.16 d0[2], [%[in]], %[kStep] \n"
358 "vld1.16 d1[2], [%[in]], %[kStep] \n" 360 "vld1.16 d1[2], [%[in]], %[kStep] \n"
359 "vld1.16 d2[2], [%[in]], %[kStep] \n" 361 "vld1.16 d2[2], [%[in]], %[kStep] \n"
360 "vld1.16 d3[2], [%[in]], %[kStep] \n" 362 "vld1.16 d3[2], [%[in]], %[kStep] \n"
361 "vld1.16 d0[3], [%[in]], %[kStep] \n" 363 "vld1.16 d0[3], [%[in]], %[kStep] \n"
362 "vld1.16 d1[3], [%[in]], %[kStep] \n" 364 "vld1.16 d1[3], [%[in]], %[kStep] \n"
363 "vld1.16 d2[3], [%[in]], %[kStep] \n" 365 "vld1.16 d2[3], [%[in]], %[kStep] \n"
364 "vld1.16 d3[3], [%[in]], %[kStep] \n" 366 "vld1.16 d3[3], [%[in]], %[kStep] \n"
365 367
366 "vaddl.s16 q2, d0, d2 \n" 368 "vaddl.s16 q2, d0, d2 \n" // a0=(in[0*16]+in[2*16])
367 "vshl.s32 q2, q2, #2 \n" // a0=(in[0*16]+in[2*16])<<2 369 "vaddl.s16 q3, d1, d3 \n" // a1=(in[1*16]+in[3*16])
368 "vaddl.s16 q3, d1, d3 \n" 370 "vsubl.s16 q4, d1, d3 \n" // a2=(in[1*16]-in[3*16])
369 "vshl.s32 q3, q3, #2 \n" // a1=(in[1*16]+in[3*16])<<2 371 "vsubl.s16 q5, d0, d2 \n" // a3=(in[0*16]-in[2*16])
370 "vsubl.s16 q4, d1, d3 \n"
371 "vshl.s32 q4, q4, #2 \n" // a2=(in[1*16]-in[3*16])<<2
372 "vsubl.s16 q5, d0, d2 \n"
373 "vshl.s32 q5, q5, #2 \n" // a3=(in[0*16]-in[2*16])<<2
374 372
375 "vceq.s32 q10, q2, #0 \n" 373 "vqadd.s32 q6, q2, q3 \n" // a0 + a1
376 "vmvn.s32 q10, q10 \n" // (a0 != 0)
377 "vqadd.s32 q6, q2, q3 \n" // (a0 + a1)
378 "vqsub.s32 q6, q6, q10 \n" // (a0 + a1) + (a0 != 0)
379 "vqadd.s32 q7, q5, q4 \n" // a3 + a2 374 "vqadd.s32 q7, q5, q4 \n" // a3 + a2
380 "vqsub.s32 q8, q5, q4 \n" // a3 - a2 375 "vqsub.s32 q8, q5, q4 \n" // a3 - a2
381 "vqsub.s32 q9, q2, q3 \n" // a0 - a1 376 "vqsub.s32 q9, q2, q3 \n" // a0 - a1
382 377
383 // Transpose 378 // Transpose
384 // q6 = tmp[0, 1, 2, 3] ; q7 = tmp[ 4, 5, 6, 7] 379 // q6 = tmp[0, 1, 2, 3] ; q7 = tmp[ 4, 5, 6, 7]
385 // q8 = tmp[8, 9, 10, 11] ; q9 = tmp[12, 13, 14, 15] 380 // q8 = tmp[8, 9, 10, 11] ; q9 = tmp[12, 13, 14, 15]
386 "vswp d13, d16 \n" // vtrn.64 q0, q2 381 "vswp d13, d16 \n" // vtrn.64 q0, q2
387 "vswp d15, d18 \n" // vtrn.64 q1, q3 382 "vswp d15, d18 \n" // vtrn.64 q1, q3
388 "vtrn.32 q6, q7 \n" 383 "vtrn.32 q6, q7 \n"
389 "vtrn.32 q8, q9 \n" 384 "vtrn.32 q8, q9 \n"
390 385
391 "vqadd.s32 q0, q6, q8 \n" // a0 = tmp[0] + tmp[8] 386 "vqadd.s32 q0, q6, q8 \n" // a0 = tmp[0] + tmp[8]
392 "vqadd.s32 q1, q7, q9 \n" // a1 = tmp[4] + tmp[12] 387 "vqadd.s32 q1, q7, q9 \n" // a1 = tmp[4] + tmp[12]
393 "vqsub.s32 q2, q7, q9 \n" // a2 = tmp[4] - tmp[12] 388 "vqsub.s32 q2, q7, q9 \n" // a2 = tmp[4] - tmp[12]
394 "vqsub.s32 q3, q6, q8 \n" // a3 = tmp[0] - tmp[8] 389 "vqsub.s32 q3, q6, q8 \n" // a3 = tmp[0] - tmp[8]
395 390
396 "vqadd.s32 q4, q0, q1 \n" // b0 = a0 + a1 391 "vqadd.s32 q4, q0, q1 \n" // b0 = a0 + a1
397 "vqadd.s32 q5, q3, q2 \n" // b1 = a3 + a2 392 "vqadd.s32 q5, q3, q2 \n" // b1 = a3 + a2
398 "vqsub.s32 q6, q3, q2 \n" // b2 = a3 - a2 393 "vqsub.s32 q6, q3, q2 \n" // b2 = a3 - a2
399 "vqsub.s32 q7, q0, q1 \n" // b3 = a0 - a1 394 "vqsub.s32 q7, q0, q1 \n" // b3 = a0 - a1
400 395
401 "vmov.s32 q0, #3 \n" // q0 = 3 396 "vshrn.s32 d18, q4, #1 \n" // b0 >> 1
402 397 "vshrn.s32 d19, q5, #1 \n" // b1 >> 1
403 "vcgt.s32 q1, q4, #0 \n" // (b0>0) 398 "vshrn.s32 d20, q6, #1 \n" // b2 >> 1
404 "vqsub.s32 q2, q4, q1 \n" // (b0+(b0>0)) 399 "vshrn.s32 d21, q7, #1 \n" // b3 >> 1
405 "vqadd.s32 q3, q2, q0 \n" // (b0+(b0>0)+3)
406 "vshrn.s32 d18, q3, #3 \n" // (b0+(b0>0)+3) >> 3
407
408 "vcgt.s32 q1, q5, #0 \n" // (b1>0)
409 "vqsub.s32 q2, q5, q1 \n" // (b1+(b1>0))
410 "vqadd.s32 q3, q2, q0 \n" // (b1+(b1>0)+3)
411 "vshrn.s32 d19, q3, #3 \n" // (b1+(b1>0)+3) >> 3
412
413 "vcgt.s32 q1, q6, #0 \n" // (b2>0)
414 "vqsub.s32 q2, q6, q1 \n" // (b2+(b2>0))
415 "vqadd.s32 q3, q2, q0 \n" // (b2+(b2>0)+3)
416 "vshrn.s32 d20, q3, #3 \n" // (b2+(b2>0)+3) >> 3
417
418 "vcgt.s32 q1, q7, #0 \n" // (b3>0)
419 "vqsub.s32 q2, q7, q1 \n" // (b3+(b3>0))
420 "vqadd.s32 q3, q2, q0 \n" // (b3+(b3>0)+3)
421 "vshrn.s32 d21, q3, #3 \n" // (b3+(b3>0)+3) >> 3
422 400
423 "vst1.16 {q9, q10}, [%[out]] \n" 401 "vst1.16 {q9, q10}, [%[out]] \n"
424 402
425 : [in] "+r"(in) 403 : [in] "+r"(in)
426 : [kStep] "r"(kStep), [out] "r"(out) 404 : [kStep] "r"(kStep), [out] "r"(out)
427 : "memory", "q0", "q1", "q2", "q3", "q4", "q5", 405 : "memory", "q0", "q1", "q2", "q3", "q4", "q5",
428 "q6", "q7", "q8", "q9", "q10" // clobbered 406 "q6", "q7", "q8", "q9", "q10" // clobbered
429 ) ; 407 ) ;
430 } 408 }
431 409
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after
652 VP8FTransformWHT = FTransformWHT; 630 VP8FTransformWHT = FTransformWHT;
653 631
654 VP8TDisto4x4 = Disto4x4; 632 VP8TDisto4x4 = Disto4x4;
655 VP8TDisto16x16 = Disto16x16; 633 VP8TDisto16x16 = Disto16x16;
656 #endif // WEBP_USE_NEON 634 #endif // WEBP_USE_NEON
657 } 635 }
658 636
659 #if defined(__cplusplus) || defined(c_plusplus) 637 #if defined(__cplusplus) || defined(c_plusplus)
660 } // extern "C" 638 } // extern "C"
661 #endif 639 #endif
OLDNEW
« no previous file with comments | « third_party/libwebp/dsp/enc.c ('k') | third_party/libwebp/dsp/enc_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698