third_party/libwebp/dsp/enc_neon.c - Issue 16871017: libwebp-0.3.1

Side by Side Diff: third_party/libwebp/dsp/enc_neon.c

Issue 16871017: libwebp-0.3.1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: 0.3.1 final -> no changes since rc2 Created 7 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 Google Inc. All Rights Reserved.	1 // Copyright 2012 Google Inc. All Rights Reserved.

2 //	2 //

3 // This code is licensed under the same terms as WebM:	3 // Use of this source code is governed by a BSD-style license

4 // Software License Agreement: http://www.webmproject.org/license/software/	4 // that can be found in the COPYING file in the root of the source

5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/	5 // tree. An additional intellectual property rights grant can be found

	6 // in the file PATENTS. All contributing project authors may

	7 // be found in the AUTHORS file in the root of the source tree.

6 // -----------------------------------------------------------------------------	8 // -----------------------------------------------------------------------------

7 //	9 //

8 // ARM NEON version of speed-critical encoding functions.	10 // ARM NEON version of speed-critical encoding functions.

9 //	11 //

10 // adapted from libvpx (http://www.webmproject.org/code/)	12 // adapted from libvpx (http://www.webmproject.org/code/)

11	13

12 #include "./dsp.h"	14 #include "./dsp.h"

13	15

14 #if defined(__cplusplus) \|\| defined(c_plusplus)	16 #if defined(__cplusplus) \|\| defined(c_plusplus)

15 extern "C" {	17 extern "C" {

(...skipping 299 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
315 "vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000	317 "vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000

316	318

317 "vceq.s16 d4, d7, #0 \n"	319 "vceq.s16 d4, d7, #0 \n"

318	320

319 "vshr.s16 d0, d0, #4 \n"	321 "vshr.s16 d0, d0, #4 \n"

320 "vshr.s16 d2, d2, #4 \n"	322 "vshr.s16 d2, d2, #4 \n"

321	323

322 "vmlal.s16 q11, d6, d17 \n" // c12217 + d15352 + 12000	324 "vmlal.s16 q11, d6, d17 \n" // c12217 + d15352 + 12000

323 "vmlsl.s16 q12, d6, d16 \n" // d12217 - c15352 + 51000	325 "vmlsl.s16 q12, d6, d16 \n" // d12217 - c15352 + 51000

324	326

325 "vmvn.s16 d4, d4 \n"	327 "vmvn d4, d4 \n" // !(d1 == 0)

326 // op[4] = (c12217 + d15352 + 12000)>>16	328 // op[4] = (c12217 + d15352 + 12000)>>16

327 "vshrn.s32 d1, q11, #16 \n"	329 "vshrn.s32 d1, q11, #16 \n"

328 // op[4] += (d1!=0)	330 // op[4] += (d1!=0)

329 "vsub.s16 d1, d1, d4 \n"	331 "vsub.s16 d1, d1, d4 \n"

330 // op[12]= (d12217 - c15352 + 51000)>>16	332 // op[12]= (d12217 - c15352 + 51000)>>16

331 "vshrn.s32 d3, q12, #16 \n"	333 "vshrn.s32 d3, q12, #16 \n"

332	334

333 // set result to out array	335 // set result to out array

334 "vst1.16 {q0, q1}, [%[out]] \n"	336 "vst1.16 {q0, q1}, [%[out]] \n"

335 : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),	337 : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),

(...skipping 20 matching lines...) Expand all Loading...
356 "vld1.16 d3[1], [%[in]], %[kStep] \n"	358 "vld1.16 d3[1], [%[in]], %[kStep] \n"

357 "vld1.16 d0[2], [%[in]], %[kStep] \n"	359 "vld1.16 d0[2], [%[in]], %[kStep] \n"

358 "vld1.16 d1[2], [%[in]], %[kStep] \n"	360 "vld1.16 d1[2], [%[in]], %[kStep] \n"

359 "vld1.16 d2[2], [%[in]], %[kStep] \n"	361 "vld1.16 d2[2], [%[in]], %[kStep] \n"

360 "vld1.16 d3[2], [%[in]], %[kStep] \n"	362 "vld1.16 d3[2], [%[in]], %[kStep] \n"

361 "vld1.16 d0[3], [%[in]], %[kStep] \n"	363 "vld1.16 d0[3], [%[in]], %[kStep] \n"

362 "vld1.16 d1[3], [%[in]], %[kStep] \n"	364 "vld1.16 d1[3], [%[in]], %[kStep] \n"

363 "vld1.16 d2[3], [%[in]], %[kStep] \n"	365 "vld1.16 d2[3], [%[in]], %[kStep] \n"

364 "vld1.16 d3[3], [%[in]], %[kStep] \n"	366 "vld1.16 d3[3], [%[in]], %[kStep] \n"

365	367

366 "vaddl.s16 q2, d0, d2 \n"	368 "vaddl.s16 q2, d0, d2 \n" // a0=(in[016]+in[216])

367 "vshl.s32 q2, q2, #2 \n" // a0=(in[016]+in[216])<<2	369 "vaddl.s16 q3, d1, d3 \n" // a1=(in[116]+in[316])

368 "vaddl.s16 q3, d1, d3 \n"	370 "vsubl.s16 q4, d1, d3 \n" // a2=(in[116]-in[316])

369 "vshl.s32 q3, q3, #2 \n" // a1=(in[116]+in[316])<<2	371 "vsubl.s16 q5, d0, d2 \n" // a3=(in[016]-in[216])

370 "vsubl.s16 q4, d1, d3 \n"

371 "vshl.s32 q4, q4, #2 \n" // a2=(in[116]-in[316])<<2

372 "vsubl.s16 q5, d0, d2 \n"

373 "vshl.s32 q5, q5, #2 \n" // a3=(in[016]-in[216])<<2

374	372

375 "vceq.s32 q10, q2, #0 \n"	373 "vqadd.s32 q6, q2, q3 \n" // a0 + a1

376 "vmvn.s32 q10, q10 \n" // (a0 != 0)

377 "vqadd.s32 q6, q2, q3 \n" // (a0 + a1)

378 "vqsub.s32 q6, q6, q10 \n" // (a0 + a1) + (a0 != 0)

379 "vqadd.s32 q7, q5, q4 \n" // a3 + a2	374 "vqadd.s32 q7, q5, q4 \n" // a3 + a2

380 "vqsub.s32 q8, q5, q4 \n" // a3 - a2	375 "vqsub.s32 q8, q5, q4 \n" // a3 - a2

381 "vqsub.s32 q9, q2, q3 \n" // a0 - a1	376 "vqsub.s32 q9, q2, q3 \n" // a0 - a1

382	377

383 // Transpose	378 // Transpose

384 // q6 = tmp[0, 1, 2, 3] ; q7 = tmp[ 4, 5, 6, 7]	379 // q6 = tmp[0, 1, 2, 3] ; q7 = tmp[ 4, 5, 6, 7]

385 // q8 = tmp[8, 9, 10, 11] ; q9 = tmp[12, 13, 14, 15]	380 // q8 = tmp[8, 9, 10, 11] ; q9 = tmp[12, 13, 14, 15]

386 "vswp d13, d16 \n" // vtrn.64 q0, q2	381 "vswp d13, d16 \n" // vtrn.64 q0, q2

387 "vswp d15, d18 \n" // vtrn.64 q1, q3	382 "vswp d15, d18 \n" // vtrn.64 q1, q3

388 "vtrn.32 q6, q7 \n"	383 "vtrn.32 q6, q7 \n"

389 "vtrn.32 q8, q9 \n"	384 "vtrn.32 q8, q9 \n"

390	385

391 "vqadd.s32 q0, q6, q8 \n" // a0 = tmp[0] + tmp[8]	386 "vqadd.s32 q0, q6, q8 \n" // a0 = tmp[0] + tmp[8]

392 "vqadd.s32 q1, q7, q9 \n" // a1 = tmp[4] + tmp[12]	387 "vqadd.s32 q1, q7, q9 \n" // a1 = tmp[4] + tmp[12]

393 "vqsub.s32 q2, q7, q9 \n" // a2 = tmp[4] - tmp[12]	388 "vqsub.s32 q2, q7, q9 \n" // a2 = tmp[4] - tmp[12]

394 "vqsub.s32 q3, q6, q8 \n" // a3 = tmp[0] - tmp[8]	389 "vqsub.s32 q3, q6, q8 \n" // a3 = tmp[0] - tmp[8]

395	390

396 "vqadd.s32 q4, q0, q1 \n" // b0 = a0 + a1	391 "vqadd.s32 q4, q0, q1 \n" // b0 = a0 + a1

397 "vqadd.s32 q5, q3, q2 \n" // b1 = a3 + a2	392 "vqadd.s32 q5, q3, q2 \n" // b1 = a3 + a2

398 "vqsub.s32 q6, q3, q2 \n" // b2 = a3 - a2	393 "vqsub.s32 q6, q3, q2 \n" // b2 = a3 - a2

399 "vqsub.s32 q7, q0, q1 \n" // b3 = a0 - a1	394 "vqsub.s32 q7, q0, q1 \n" // b3 = a0 - a1

400	395

401 "vmov.s32 q0, #3 \n" // q0 = 3	396 "vshrn.s32 d18, q4, #1 \n" // b0 >> 1

402	397 "vshrn.s32 d19, q5, #1 \n" // b1 >> 1

403 "vcgt.s32 q1, q4, #0 \n" // (b0>0)	398 "vshrn.s32 d20, q6, #1 \n" // b2 >> 1

404 "vqsub.s32 q2, q4, q1 \n" // (b0+(b0>0))	399 "vshrn.s32 d21, q7, #1 \n" // b3 >> 1

405 "vqadd.s32 q3, q2, q0 \n" // (b0+(b0>0)+3)

406 "vshrn.s32 d18, q3, #3 \n" // (b0+(b0>0)+3) >> 3

407

408 "vcgt.s32 q1, q5, #0 \n" // (b1>0)

409 "vqsub.s32 q2, q5, q1 \n" // (b1+(b1>0))

410 "vqadd.s32 q3, q2, q0 \n" // (b1+(b1>0)+3)

411 "vshrn.s32 d19, q3, #3 \n" // (b1+(b1>0)+3) >> 3

412

413 "vcgt.s32 q1, q6, #0 \n" // (b2>0)

414 "vqsub.s32 q2, q6, q1 \n" // (b2+(b2>0))

415 "vqadd.s32 q3, q2, q0 \n" // (b2+(b2>0)+3)

416 "vshrn.s32 d20, q3, #3 \n" // (b2+(b2>0)+3) >> 3

417

418 "vcgt.s32 q1, q7, #0 \n" // (b3>0)

419 "vqsub.s32 q2, q7, q1 \n" // (b3+(b3>0))

420 "vqadd.s32 q3, q2, q0 \n" // (b3+(b3>0)+3)

421 "vshrn.s32 d21, q3, #3 \n" // (b3+(b3>0)+3) >> 3

422	400

423 "vst1.16 {q9, q10}, [%[out]] \n"	401 "vst1.16 {q9, q10}, [%[out]] \n"

424	402

425 : [in] "+r"(in)	403 : [in] "+r"(in)

426 : [kStep] "r"(kStep), [out] "r"(out)	404 : [kStep] "r"(kStep), [out] "r"(out)

427 : "memory", "q0", "q1", "q2", "q3", "q4", "q5",	405 : "memory", "q0", "q1", "q2", "q3", "q4", "q5",

428 "q6", "q7", "q8", "q9", "q10" // clobbered	406 "q6", "q7", "q8", "q9", "q10" // clobbered

429 ) ;	407 ) ;

430 }	408 }

431	409

(...skipping 220 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
652 VP8FTransformWHT = FTransformWHT;	630 VP8FTransformWHT = FTransformWHT;

653	631

654 VP8TDisto4x4 = Disto4x4;	632 VP8TDisto4x4 = Disto4x4;

655 VP8TDisto16x16 = Disto16x16;	633 VP8TDisto16x16 = Disto16x16;

656 #endif // WEBP_USE_NEON	634 #endif // WEBP_USE_NEON

657 }	635 }

658	636

659 #if defined(__cplusplus) \|\| defined(c_plusplus)	637 #if defined(__cplusplus) \|\| defined(c_plusplus)

660 } // extern "C"	638 } // extern "C"

661 #endif	639 #endif

OLD	NEW

« no previous file with comments | « third_party/libwebp/dsp/enc.c ('k') | third_party/libwebp/dsp/enc_sse2.c » ('j') | no next file with comments »