Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(101)

Side by Side Diff: third_party/libwebp/dsp/enc_sse2.c

Issue 16871017: libwebp-0.3.1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: 0.3.1 final -> no changes since rc2 Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/libwebp/dsp/enc_neon.c ('k') | third_party/libwebp/dsp/lossless.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 Google Inc. All Rights Reserved. 1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // 2 //
3 // This code is licensed under the same terms as WebM: 3 // Use of this source code is governed by a BSD-style license
4 // Software License Agreement: http://www.webmproject.org/license/software/ 4 // that can be found in the COPYING file in the root of the source
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
6 // ----------------------------------------------------------------------------- 8 // -----------------------------------------------------------------------------
7 // 9 //
8 // SSE2 version of speed-critical encoding functions. 10 // SSE2 version of speed-critical encoding functions.
9 // 11 //
10 // Author: Christian Duvivier (cduvivier@google.com) 12 // Author: Christian Duvivier (cduvivier@google.com)
11 13
12 #include "./dsp.h" 14 #include "./dsp.h"
13 15
14 #if defined(__cplusplus) || defined(c_plusplus) 16 #if defined(__cplusplus) || defined(c_plusplus)
15 extern "C" { 17 extern "C" {
(...skipping 430 matching lines...) Expand 10 before | Expand all | Expand 10 after
446 // -> f1 = f1 + 1 - (a3 == 0) 448 // -> f1 = f1 + 1 - (a3 == 0)
447 const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero)); 449 const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
448 450
449 _mm_storel_epi64((__m128i*)&out[ 0], d0); 451 _mm_storel_epi64((__m128i*)&out[ 0], d0);
450 _mm_storel_epi64((__m128i*)&out[ 4], g1); 452 _mm_storel_epi64((__m128i*)&out[ 4], g1);
451 _mm_storel_epi64((__m128i*)&out[ 8], d2); 453 _mm_storel_epi64((__m128i*)&out[ 8], d2);
452 _mm_storel_epi64((__m128i*)&out[12], f3); 454 _mm_storel_epi64((__m128i*)&out[12], f3);
453 } 455 }
454 } 456 }
455 457
458 static void FTransformWHTSSE2(const int16_t* in, int16_t* out) {
459 int16_t tmp[16];
460 int i;
461 for (i = 0; i < 4; ++i, in += 64) {
462 const int a0 = (in[0 * 16] + in[2 * 16]);
463 const int a1 = (in[1 * 16] + in[3 * 16]);
464 const int a2 = (in[1 * 16] - in[3 * 16]);
465 const int a3 = (in[0 * 16] - in[2 * 16]);
466 tmp[0 + i * 4] = a0 + a1;
467 tmp[1 + i * 4] = a3 + a2;
468 tmp[2 + i * 4] = a3 - a2;
469 tmp[3 + i * 4] = a0 - a1;
470 }
471 {
472 const __m128i src0 = _mm_loadl_epi64((__m128i*)&tmp[0]);
473 const __m128i src1 = _mm_loadl_epi64((__m128i*)&tmp[4]);
474 const __m128i src2 = _mm_loadl_epi64((__m128i*)&tmp[8]);
475 const __m128i src3 = _mm_loadl_epi64((__m128i*)&tmp[12]);
476 const __m128i a0 = _mm_add_epi16(src0, src2);
477 const __m128i a1 = _mm_add_epi16(src1, src3);
478 const __m128i a2 = _mm_sub_epi16(src1, src3);
479 const __m128i a3 = _mm_sub_epi16(src0, src2);
480 const __m128i b0 = _mm_srai_epi16(_mm_adds_epi16(a0, a1), 1);
481 const __m128i b1 = _mm_srai_epi16(_mm_adds_epi16(a3, a2), 1);
482 const __m128i b2 = _mm_srai_epi16(_mm_subs_epi16(a3, a2), 1);
483 const __m128i b3 = _mm_srai_epi16(_mm_subs_epi16(a0, a1), 1);
484 _mm_storel_epi64((__m128i*)&out[ 0], b0);
485 _mm_storel_epi64((__m128i*)&out[ 4], b1);
486 _mm_storel_epi64((__m128i*)&out[ 8], b2);
487 _mm_storel_epi64((__m128i*)&out[12], b3);
488 }
489 }
490
456 //------------------------------------------------------------------------------ 491 //------------------------------------------------------------------------------
457 // Metric 492 // Metric
458 493
459 static int SSE_Nx4SSE2(const uint8_t* a, const uint8_t* b, 494 static int SSE_Nx4SSE2(const uint8_t* a, const uint8_t* b,
460 int num_quads, int do_16) { 495 int num_quads, int do_16) {
461 const __m128i zero = _mm_setzero_si128(); 496 const __m128i zero = _mm_setzero_si128();
462 __m128i sum1 = zero; 497 __m128i sum1 = zero;
463 __m128i sum2 = zero; 498 __m128i sum2 = zero;
464 499
465 while (num_quads-- > 0) { 500 while (num_quads-- > 0) {
(...skipping 446 matching lines...) Expand 10 before | Expand all | Expand 10 after
912 // Entry point 947 // Entry point
913 948
914 extern void VP8EncDspInitSSE2(void); 949 extern void VP8EncDspInitSSE2(void);
915 950
916 void VP8EncDspInitSSE2(void) { 951 void VP8EncDspInitSSE2(void) {
917 #if defined(WEBP_USE_SSE2) 952 #if defined(WEBP_USE_SSE2)
918 VP8CollectHistogram = CollectHistogramSSE2; 953 VP8CollectHistogram = CollectHistogramSSE2;
919 VP8EncQuantizeBlock = QuantizeBlockSSE2; 954 VP8EncQuantizeBlock = QuantizeBlockSSE2;
920 VP8ITransform = ITransformSSE2; 955 VP8ITransform = ITransformSSE2;
921 VP8FTransform = FTransformSSE2; 956 VP8FTransform = FTransformSSE2;
957 VP8FTransformWHT = FTransformWHTSSE2;
922 VP8SSE16x16 = SSE16x16SSE2; 958 VP8SSE16x16 = SSE16x16SSE2;
923 VP8SSE16x8 = SSE16x8SSE2; 959 VP8SSE16x8 = SSE16x8SSE2;
924 VP8SSE8x8 = SSE8x8SSE2; 960 VP8SSE8x8 = SSE8x8SSE2;
925 VP8SSE4x4 = SSE4x4SSE2; 961 VP8SSE4x4 = SSE4x4SSE2;
926 VP8TDisto4x4 = Disto4x4SSE2; 962 VP8TDisto4x4 = Disto4x4SSE2;
927 VP8TDisto16x16 = Disto16x16SSE2; 963 VP8TDisto16x16 = Disto16x16SSE2;
928 #endif // WEBP_USE_SSE2 964 #endif // WEBP_USE_SSE2
929 } 965 }
930 966
931 #if defined(__cplusplus) || defined(c_plusplus) 967 #if defined(__cplusplus) || defined(c_plusplus)
932 } // extern "C" 968 } // extern "C"
933 #endif 969 #endif
OLDNEW
« no previous file with comments | « third_party/libwebp/dsp/enc_neon.c ('k') | third_party/libwebp/dsp/lossless.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698