Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(147)

Side by Side Diff: media/base/sinc_resampler.cc

Issue 12530005: Don't use magic statics in SincResampler for thread safe init. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Remove typedef types. Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « media/base/sinc_resampler.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_): 5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_):
6 // 6 //
7 // |----------------|-----------------------------------------|----------------| 7 // |----------------|-----------------------------------------|----------------|
8 // 8 //
9 // kBlockSize + kKernelSize / 2 9 // kBlockSize + kKernelSize / 2
10 // <---------------------------------------------------------> 10 // <--------------------------------------------------------->
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
50 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb) 50 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)
51 : io_sample_rate_ratio_(io_sample_rate_ratio), 51 : io_sample_rate_ratio_(io_sample_rate_ratio),
52 virtual_source_idx_(0), 52 virtual_source_idx_(0),
53 buffer_primed_(false), 53 buffer_primed_(false),
54 read_cb_(read_cb), 54 read_cb_(read_cb),
55 // Create input buffers with a 16-byte alignment for SSE optimizations. 55 // Create input buffers with a 16-byte alignment for SSE optimizations.
56 kernel_storage_(static_cast<float*>( 56 kernel_storage_(static_cast<float*>(
57 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), 57 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
58 input_buffer_(static_cast<float*>( 58 input_buffer_(static_cast<float*>(
59 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))), 59 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))),
60 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__)
61 convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C),
62 #endif
60 // Setup various region pointers in the buffer (see diagram above). 63 // Setup various region pointers in the buffer (see diagram above).
61 r0_(input_buffer_.get() + kKernelSize / 2), 64 r0_(input_buffer_.get() + kKernelSize / 2),
62 r1_(input_buffer_.get()), 65 r1_(input_buffer_.get()),
63 r2_(r0_), 66 r2_(r0_),
64 r3_(r0_ + kBlockSize - kKernelSize / 2), 67 r3_(r0_ + kBlockSize - kKernelSize / 2),
65 r4_(r0_ + kBlockSize), 68 r4_(r0_ + kBlockSize),
66 r5_(r0_ + kKernelSize / 2) { 69 r5_(r0_ + kKernelSize / 2) {
67 // Ensure kKernelSize is a multiple of 32 for easy SSE optimizations; causes 70 // Ensure kKernelSize is a multiple of 32 for easy SSE optimizations; causes
68 // r0_ and r5_ (used for input) to always be 16-byte aligned by virtue of 71 // r0_ and r5_ (used for input) to always be 16-byte aligned by virtue of
69 // input_buffer_ being 16-byte aligned. 72 // input_buffer_ being 16-byte aligned.
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
129 double x = (i - subsample_offset) / kKernelSize; 132 double x = (i - subsample_offset) / kKernelSize;
130 double window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2 133 double window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2
131 * cos(4.0 * M_PI * x); 134 * cos(4.0 * M_PI * x);
132 135
133 // Window the sinc() function and store at the correct offset. 136 // Window the sinc() function and store at the correct offset.
134 kernel_storage_.get()[i + offset_idx * kKernelSize] = sinc * window; 137 kernel_storage_.get()[i + offset_idx * kKernelSize] = sinc * window;
135 } 138 }
136 } 139 }
137 } 140 }
138 141
142 // If we know the minimum architecture avoid function hopping for CPU detection.
143 #if defined(ARCH_CPU_X86_FAMILY)
144 #if defined(__SSE__)
145 #define CONVOLVE_FUNC Convolve_SSE
146 #else
147 // X86 CPU detection required. |convolve_proc_| will be set upon construction.
148 // TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed.
149 #define CONVOLVE_FUNC convolve_proc_
150 #endif
151 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
152 #define CONVOLVE_FUNC Convolve_NEON
153 #else
154 // Unknown architecture.
155 #define CONVOLVE_FUNC Convolve_C
156 #endif
157
139 void SincResampler::Resample(float* destination, int frames) { 158 void SincResampler::Resample(float* destination, int frames) {
140 int remaining_frames = frames; 159 int remaining_frames = frames;
141 160
142 // Step (1) -- Prime the input buffer at the start of the input stream. 161 // Step (1) -- Prime the input buffer at the start of the input stream.
143 if (!buffer_primed_) { 162 if (!buffer_primed_) {
144 read_cb_.Run(r0_, kBlockSize + kKernelSize / 2); 163 read_cb_.Run(r0_, kBlockSize + kKernelSize / 2);
145 buffer_primed_ = true; 164 buffer_primed_ = true;
146 } 165 }
147 166
148 // Step (2) -- Resample! 167 // Step (2) -- Resample!
149 while (remaining_frames) { 168 while (remaining_frames) {
150 while (virtual_source_idx_ < kBlockSize) { 169 while (virtual_source_idx_ < kBlockSize) {
151 // |virtual_source_idx_| lies in between two kernel offsets so figure out 170 // |virtual_source_idx_| lies in between two kernel offsets so figure out
152 // what they are. 171 // what they are.
153 int source_idx = static_cast<int>(virtual_source_idx_); 172 int source_idx = static_cast<int>(virtual_source_idx_);
154 double subsample_remainder = virtual_source_idx_ - source_idx; 173 double subsample_remainder = virtual_source_idx_ - source_idx;
155 174
156 double virtual_offset_idx = subsample_remainder * kKernelOffsetCount; 175 double virtual_offset_idx = subsample_remainder * kKernelOffsetCount;
157 int offset_idx = static_cast<int>(virtual_offset_idx); 176 int offset_idx = static_cast<int>(virtual_offset_idx);
158 177
159 // We'll compute "convolutions" for the two kernels which straddle 178 // We'll compute "convolutions" for the two kernels which straddle
160 // |virtual_source_idx_|. 179 // |virtual_source_idx_|.
161 float* k1 = kernel_storage_.get() + offset_idx * kKernelSize; 180 float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;
162 float* k2 = k1 + kKernelSize; 181 float* k2 = k1 + kKernelSize;
163 182
183 // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be
184 // true so long as kKernelSize is a multiple of 16.
185 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
186 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
187
164 // Initialize input pointer based on quantized |virtual_source_idx_|. 188 // Initialize input pointer based on quantized |virtual_source_idx_|.
165 float* input_ptr = r1_ + source_idx; 189 float* input_ptr = r1_ + source_idx;
166 190
167 // Figure out how much to weight each kernel's "convolution". 191 // Figure out how much to weight each kernel's "convolution".
168 double kernel_interpolation_factor = virtual_offset_idx - offset_idx; 192 double kernel_interpolation_factor = virtual_offset_idx - offset_idx;
169 *destination++ = Convolve( 193 *destination++ = CONVOLVE_FUNC(
170 input_ptr, k1, k2, kernel_interpolation_factor); 194 input_ptr, k1, k2, kernel_interpolation_factor);
171 195
172 // Advance the virtual index. 196 // Advance the virtual index.
173 virtual_source_idx_ += io_sample_rate_ratio_; 197 virtual_source_idx_ += io_sample_rate_ratio_;
174 198
175 if (!--remaining_frames) 199 if (!--remaining_frames)
176 return; 200 return;
177 } 201 }
178 202
179 // Wrap back around to the start. 203 // Wrap back around to the start.
180 virtual_source_idx_ -= kBlockSize; 204 virtual_source_idx_ -= kBlockSize;
181 205
182 // Step (3) Copy r3_ to r1_ and r4_ to r2_. 206 // Step (3) Copy r3_ to r1_ and r4_ to r2_.
183 // This wraps the last input frames back to the start of the buffer. 207 // This wraps the last input frames back to the start of the buffer.
184 memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * (kKernelSize / 2)); 208 memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * (kKernelSize / 2));
185 memcpy(r2_, r4_, sizeof(*input_buffer_.get()) * (kKernelSize / 2)); 209 memcpy(r2_, r4_, sizeof(*input_buffer_.get()) * (kKernelSize / 2));
186 210
187 // Step (4) 211 // Step (4)
188 // Refresh the buffer with more input. 212 // Refresh the buffer with more input.
189 read_cb_.Run(r5_, kBlockSize); 213 read_cb_.Run(r5_, kBlockSize);
190 } 214 }
191 } 215 }
192 216
217 #undef CONVOLVE_FUNC
218
193 int SincResampler::ChunkSize() const { 219 int SincResampler::ChunkSize() const {
194 return kBlockSize / io_sample_rate_ratio_; 220 return kBlockSize / io_sample_rate_ratio_;
195 } 221 }
196 222
197 void SincResampler::Flush() { 223 void SincResampler::Flush() {
198 virtual_source_idx_ = 0; 224 virtual_source_idx_ = 0;
199 buffer_primed_ = false; 225 buffer_primed_ = false;
200 memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize); 226 memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize);
201 } 227 }
202 228
203 float SincResampler::Convolve(const float* input_ptr, const float* k1,
204 const float* k2,
205 double kernel_interpolation_factor) {
206 // Ensure |k1|, |k2| are 16-byte aligned for SSE usage. Should always be true
207 // so long as kKernelSize is a multiple of 16.
208 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
209 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
210
211 // Rely on function level static initialization to keep ConvolveProc selection
212 // thread safe.
213 typedef float (*ConvolveProc)(const float* src, const float* k1,
214 const float* k2,
215 double kernel_interpolation_factor);
216 #if defined(ARCH_CPU_X86_FAMILY)
217 #if defined(__SSE__)
218 static const ConvolveProc kConvolveProc = Convolve_SSE;
219 #else
220 static const ConvolveProc kConvolveProc =
221 base::CPU().has_sse() ? Convolve_SSE : Convolve_C;
222 #endif
223 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
224 static const ConvolveProc kConvolveProc = Convolve_NEON;
225 #else
226 static const ConvolveProc kConvolveProc = Convolve_C;
227 #endif
228
229 return kConvolveProc(input_ptr, k1, k2, kernel_interpolation_factor);
230 }
231
232 float SincResampler::Convolve_C(const float* input_ptr, const float* k1, 229 float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
233 const float* k2, 230 const float* k2,
234 double kernel_interpolation_factor) { 231 double kernel_interpolation_factor) {
235 float sum1 = 0; 232 float sum1 = 0;
236 float sum2 = 0; 233 float sum2 = 0;
237 234
238 // Generate a single output sample. Unrolling this loop hurt performance in 235 // Generate a single output sample. Unrolling this loop hurt performance in
239 // local testing. 236 // local testing.
240 int n = kKernelSize; 237 int n = kKernelSize;
241 while (n--) { 238 while (n--) {
(...skipping 29 matching lines...) Expand all
271 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)), 268 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),
272 m_sums2, vmovq_n_f32(kernel_interpolation_factor)); 269 m_sums2, vmovq_n_f32(kernel_interpolation_factor));
273 270
274 // Sum components together. 271 // Sum components together.
275 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1)); 272 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
276 return vget_lane_f32(vpadd_f32(m_half, m_half), 0); 273 return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
277 } 274 }
278 #endif 275 #endif
279 276
280 } // namespace media 277 } // namespace media
OLDNEW
« no previous file with comments | « media/base/sinc_resampler.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698