OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef SKIA_EXT_CONVOLVER_H_ | 5 #ifndef SKIA_EXT_CONVOLVER_H_ |
6 #define SKIA_EXT_CONVOLVER_H_ | 6 #define SKIA_EXT_CONVOLVER_H_ |
7 | 7 |
8 #include <cmath> | 8 #include <cmath> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
12 #include "base/cpu.h" | 12 #include "base/cpu.h" |
13 #include "third_party/skia/include/core/SkTypes.h" | 13 #include "third_party/skia/include/core/SkTypes.h" |
14 | 14 |
15 #if defined(ARCH_CPU_X86_FAMILY) | 15 // We can build SSE2 optimized versions for all x86 CPUs |
16 // TODO(hclam): SSE2 is disabled on Linux 32-bits because GCC requires -msse2. | 16 // except when building for the IOS emulator. |
17 // We should refactor the code in .cc and enable this. | 17 #if defined(ARCH_CPU_X86_FAMILY) && !defined(OS_IOS) |
18 #if defined(ARCH_CPU_X86_64) || defined(OS_MACOSX) || defined(COMPILER_MSVC) | |
19 #define SIMD_SSE2 1 | 18 #define SIMD_SSE2 1 |
20 #endif | 19 #define SIMD_PADDING 8 // 8 * int16 |
21 #endif | 20 #endif |
22 | 21 |
23 // avoid confusion with Mac OS X's math library (Carbon) | 22 // avoid confusion with Mac OS X's math library (Carbon) |
24 #if defined(__APPLE__) | 23 #if defined(__APPLE__) |
25 #undef FloatToFixed | 24 #undef FloatToFixed |
26 #undef FixedToFloat | 25 #undef FixedToFloat |
27 #endif | 26 #endif |
28 | 27 |
29 namespace skia { | 28 namespace skia { |
30 | 29 |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
101 const FilterInstance& filter = filters_[value_offset]; | 100 const FilterInstance& filter = filters_[value_offset]; |
102 *filter_offset = filter.offset; | 101 *filter_offset = filter.offset; |
103 *filter_length = filter.length; | 102 *filter_length = filter.length; |
104 if (filter.length == 0) { | 103 if (filter.length == 0) { |
105 return NULL; | 104 return NULL; |
106 } | 105 } |
107 return &filter_values_[filter.data_location]; | 106 return &filter_values_[filter.data_location]; |
108 } | 107 } |
109 | 108 |
110 | 109 |
111 inline void PaddingForSIMD(int padding_count) { | 110 inline void PaddingForSIMD() { |
112 // Padding |padding_count| of more dummy coefficients after the coefficients | 111 // Padding |padding_count| of more dummy coefficients after the coefficients |
113 // of last filter to prevent SIMD instructions which load 8 or 16 bytes | 112 // of last filter to prevent SIMD instructions which load 8 or 16 bytes |
114 // together to access invalid memory areas. We are not trying to align the | 113 // together to access invalid memory areas. We are not trying to align the |
115 // coefficients right now due to the opaqueness of <vector> implementation. | 114 // coefficients right now due to the opaqueness of <vector> implementation. |
116 // This has to be done after all |AddFilter| calls. | 115 // This has to be done after all |AddFilter| calls. |
117 for (int i = 0; i < padding_count; ++i) | 116 #ifdef SIMD_PADDING |
| 117 for (int i = 0; i < SIMD_PADDING; ++i) |
118 filter_values_.push_back(static_cast<Fixed>(0)); | 118 filter_values_.push_back(static_cast<Fixed>(0)); |
| 119 #endif |
119 } | 120 } |
120 | 121 |
121 private: | 122 private: |
122 struct FilterInstance { | 123 struct FilterInstance { |
123 // Offset within filter_values for this instance of the filter. | 124 // Offset within filter_values for this instance of the filter. |
124 int data_location; | 125 int data_location; |
125 | 126 |
126 // Distance from the left of the filter to the center. IN PIXELS | 127 // Distance from the left of the filter to the center. IN PIXELS |
127 int offset; | 128 int offset; |
128 | 129 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
160 // | 161 // |
161 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order | 162 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order |
162 // (this is ARGB when loaded into 32-bit words on a little-endian machine). | 163 // (this is ARGB when loaded into 32-bit words on a little-endian machine). |
163 SK_API void BGRAConvolve2D(const unsigned char* source_data, | 164 SK_API void BGRAConvolve2D(const unsigned char* source_data, |
164 int source_byte_row_stride, | 165 int source_byte_row_stride, |
165 bool source_has_alpha, | 166 bool source_has_alpha, |
166 const ConvolutionFilter1D& xfilter, | 167 const ConvolutionFilter1D& xfilter, |
167 const ConvolutionFilter1D& yfilter, | 168 const ConvolutionFilter1D& yfilter, |
168 int output_byte_row_stride, | 169 int output_byte_row_stride, |
169 unsigned char* output, | 170 unsigned char* output, |
170 bool use_sse2); | 171 bool use_simd_if_possible); |
171 } // namespace skia | 172 } // namespace skia |
172 | 173 |
173 #endif // SKIA_EXT_CONVOLVER_H_ | 174 #endif // SKIA_EXT_CONVOLVER_H_ |
OLD | NEW |