| Index: third_party/libwebp/dsp/enc_sse2.c
|
| diff --git a/third_party/libwebp/dsp/enc_sse2.c b/third_party/libwebp/dsp/enc_sse2.c
|
| index 619e6c5ce78e575652bcdd3ebec230cf6bdfc5cc..032e990762649496fd2120229a52a52053556464 100644
|
| --- a/third_party/libwebp/dsp/enc_sse2.c
|
| +++ b/third_party/libwebp/dsp/enc_sse2.c
|
| @@ -1,8 +1,10 @@
|
| // Copyright 2011 Google Inc. All Rights Reserved.
|
| //
|
| -// This code is licensed under the same terms as WebM:
|
| -// Software License Agreement: http://www.webmproject.org/license/software/
|
| -// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
| +// Use of this source code is governed by a BSD-style license
|
| +// that can be found in the COPYING file in the root of the source
|
| +// tree. An additional intellectual property rights grant can be found
|
| +// in the file PATENTS. All contributing project authors may
|
| +// be found in the AUTHORS file in the root of the source tree.
|
| // -----------------------------------------------------------------------------
|
| //
|
| // SSE2 version of speed-critical encoding functions.
|
| @@ -453,6 +455,39 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
|
| }
|
| }
|
|
|
| +static void FTransformWHTSSE2(const int16_t* in, int16_t* out) {
|
| + int16_t tmp[16];
|
| + int i;
|
| + for (i = 0; i < 4; ++i, in += 64) {
|
| + const int a0 = (in[0 * 16] + in[2 * 16]);
|
| + const int a1 = (in[1 * 16] + in[3 * 16]);
|
| + const int a2 = (in[1 * 16] - in[3 * 16]);
|
| + const int a3 = (in[0 * 16] - in[2 * 16]);
|
| + tmp[0 + i * 4] = a0 + a1;
|
| + tmp[1 + i * 4] = a3 + a2;
|
| + tmp[2 + i * 4] = a3 - a2;
|
| + tmp[3 + i * 4] = a0 - a1;
|
| + }
|
| + {
|
| + const __m128i src0 = _mm_loadl_epi64((__m128i*)&tmp[0]);
|
| + const __m128i src1 = _mm_loadl_epi64((__m128i*)&tmp[4]);
|
| + const __m128i src2 = _mm_loadl_epi64((__m128i*)&tmp[8]);
|
| + const __m128i src3 = _mm_loadl_epi64((__m128i*)&tmp[12]);
|
| + const __m128i a0 = _mm_add_epi16(src0, src2);
|
| + const __m128i a1 = _mm_add_epi16(src1, src3);
|
| + const __m128i a2 = _mm_sub_epi16(src1, src3);
|
| + const __m128i a3 = _mm_sub_epi16(src0, src2);
|
| + const __m128i b0 = _mm_srai_epi16(_mm_adds_epi16(a0, a1), 1);
|
| + const __m128i b1 = _mm_srai_epi16(_mm_adds_epi16(a3, a2), 1);
|
| + const __m128i b2 = _mm_srai_epi16(_mm_subs_epi16(a3, a2), 1);
|
| + const __m128i b3 = _mm_srai_epi16(_mm_subs_epi16(a0, a1), 1);
|
| + _mm_storel_epi64((__m128i*)&out[ 0], b0);
|
| + _mm_storel_epi64((__m128i*)&out[ 4], b1);
|
| + _mm_storel_epi64((__m128i*)&out[ 8], b2);
|
| + _mm_storel_epi64((__m128i*)&out[12], b3);
|
| + }
|
| +}
|
| +
|
| //------------------------------------------------------------------------------
|
| // Metric
|
|
|
| @@ -919,6 +954,7 @@ void VP8EncDspInitSSE2(void) {
|
| VP8EncQuantizeBlock = QuantizeBlockSSE2;
|
| VP8ITransform = ITransformSSE2;
|
| VP8FTransform = FTransformSSE2;
|
| + VP8FTransformWHT = FTransformWHTSSE2;
|
| VP8SSE16x16 = SSE16x16SSE2;
|
| VP8SSE16x8 = SSE16x8SSE2;
|
| VP8SSE8x8 = SSE8x8SSE2;
|
|
|