OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/base/vector_math_testing.h" | 5 #include "media/base/vector_math_testing.h" |
6 | 6 |
7 #include <xmmintrin.h> // NOLINT | 7 #include <xmmintrin.h> // NOLINT |
8 | 8 |
9 namespace media { | 9 namespace media { |
10 namespace vector_math { | 10 namespace vector_math { |
11 | 11 |
| 12 void FMUL_SSE(const float src[], float scale, int len, float dest[]) { |
| 13 const int rem = len % 4; |
| 14 const int last_index = len - rem; |
| 15 __m128 m_scale = _mm_set_ps1(scale); |
| 16 for (int i = 0; i < last_index; i += 4) |
| 17 _mm_store_ps(dest + i, _mm_mul_ps(_mm_load_ps(src + i), m_scale)); |
| 18 |
| 19 // Handle any remaining values that wouldn't fit in an SSE pass. |
| 20 for (int i = last_index; i < len; ++i) |
| 21 dest[i] = src[i] * scale; |
| 22 } |
| 23 |
12 void FMAC_SSE(const float src[], float scale, int len, float dest[]) { | 24 void FMAC_SSE(const float src[], float scale, int len, float dest[]) { |
13 const int rem = len % 4; | 25 const int rem = len % 4; |
14 const int last_index = len - rem; | 26 const int last_index = len - rem; |
15 __m128 m_scale = _mm_set_ps1(scale); | 27 __m128 m_scale = _mm_set_ps1(scale); |
16 for (int i = 0; i < last_index; i += 4) { | 28 for (int i = 0; i < last_index; i += 4) { |
17 _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i), | 29 _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i), |
18 _mm_mul_ps(_mm_load_ps(src + i), m_scale))); | 30 _mm_mul_ps(_mm_load_ps(src + i), m_scale))); |
19 } | 31 } |
20 | 32 |
21 // Handle any remaining values that wouldn't fit in an SSE pass. | 33 // Handle any remaining values that wouldn't fit in an SSE pass. |
22 for (int i = last_index; i < len; ++i) | 34 for (int i = last_index; i < len; ++i) |
23 dest[i] += src[i] * scale; | 35 dest[i] += src[i] * scale; |
24 } | 36 } |
25 | 37 |
26 } // namespace vector_math | 38 } // namespace vector_math |
27 } // namespace media | 39 } // namespace media |
OLD | NEW |