OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 | 8 |
9 #include "SkBitmapProcState.h" | 9 #include "SkBitmapProcState.h" |
10 #include "SkColorPriv.h" | 10 #include "SkColorPriv.h" |
11 #include "SkPaint.h" | 11 #include "SkPaint.h" |
12 #include "SkTypes.h" | 12 #include "SkTypes.h" |
13 #include "SkUtils.h" | 13 #include "SkUtils.h" |
| 14 #include "SkUtilsArm.h" |
| 15 |
| 16 #include "SkConvolver.h" |
14 | 17 |
15 #if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) | 18 #if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) |
16 void SI8_D16_nofilter_DX_arm( | 19 void SI8_D16_nofilter_DX_arm( |
17 const SkBitmapProcState& s, | 20 const SkBitmapProcState& s, |
18 const uint32_t* SK_RESTRICT xy, | 21 const uint32_t* SK_RESTRICT xy, |
19 int count, | 22 int count, |
20 uint16_t* SK_RESTRICT colors) SK_ATTRIBUTE_OPTIMIZE_O1; | 23 uint16_t* SK_RESTRICT colors) SK_ATTRIBUTE_OPTIMIZE_O1; |
21 | 24 |
22 void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s, | 25 void SI8_D16_nofilter_DX_arm(const SkBitmapProcState& s, |
23 const uint32_t* SK_RESTRICT xy, | 26 const uint32_t* SK_RESTRICT xy, |
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
212 fShaderProc32 = NULL; | 215 fShaderProc32 = NULL; |
213 } | 216 } |
214 } | 217 } |
215 #endif | 218 #endif |
216 break; | 219 break; |
217 default: | 220 default: |
218 break; | 221 break; |
219 } | 222 } |
220 } | 223 } |
221 | 224 |
| 225 ///////////////////////////////////// |
| 226 |
| 227 /* FUNCTIONS BELOW ARE SCALAR STUBS INTENDED FOR ARM DEVELOPERS TO REPLACE */ |
| 228 |
| 229 ///////////////////////////////////// |
| 230 |
| 231 |
| 232 static inline unsigned char ClampTo8(int a) { |
| 233 if (static_cast<unsigned>(a) < 256) { |
| 234 return a; // Avoid the extra check in the common case. |
| 235 } |
| 236 if (a < 0) { |
| 237 return 0; |
| 238 } |
| 239 return 255; |
| 240 } |
| 241 |
| 242 // Convolves horizontally along a single row. The row data is given in |
| 243 // |srcData| and continues for the numValues() of the filter. |
| 244 void convolveHorizontally_arm(const unsigned char* srcData, |
| 245 const SkConvolutionFilter1D& filter, |
| 246 unsigned char* outRow, |
| 247 bool hasAlpha) { |
| 248 // Loop over each pixel on this row in the output image. |
| 249 int numValues = filter.numValues(); |
| 250 for (int outX = 0; outX < numValues; outX++) { |
| 251 // Get the filter that determines the current output pixel. |
| 252 int filterOffset, filterLength; |
| 253 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = |
| 254 filter.FilterForValue(outX, &filterOffset, &filterLength); |
| 255 |
| 256 // Compute the first pixel in this row that the filter affects. It will |
| 257 // touch |filterLength| pixels (4 bytes each) after this. |
| 258 const unsigned char* rowToFilter = &srcData[filterOffset * 4]; |
| 259 |
| 260 // Apply the filter to the row to get the destination pixel in |accum|. |
| 261 int accum[4] = {0}; |
| 262 for (int filterX = 0; filterX < filterLength; filterX++) { |
| 263 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[fil
terX]; |
| 264 accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; |
| 265 accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; |
| 266 accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; |
| 267 if (hasAlpha) { |
| 268 accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; |
| 269 } |
| 270 } |
| 271 |
| 272 // Bring this value back in range. All of the filter scaling factors |
| 273 // are in fixed point with kShiftBits bits of fractional part. |
| 274 accum[0] >>= SkConvolutionFilter1D::kShiftBits; |
| 275 accum[1] >>= SkConvolutionFilter1D::kShiftBits; |
| 276 accum[2] >>= SkConvolutionFilter1D::kShiftBits; |
| 277 if (hasAlpha) { |
| 278 accum[3] >>= SkConvolutionFilter1D::kShiftBits; |
| 279 } |
| 280 |
| 281 // Store the new pixel. |
| 282 outRow[outX * 4 + 0] = ClampTo8(accum[0]); |
| 283 outRow[outX * 4 + 1] = ClampTo8(accum[1]); |
| 284 outRow[outX * 4 + 2] = ClampTo8(accum[2]); |
| 285 if (hasAlpha) { |
| 286 outRow[outX * 4 + 3] = ClampTo8(accum[3]); |
| 287 } |
| 288 } |
| 289 } |
| 290 |
| 291 // Does vertical convolution to produce one output row. The filter values and |
| 292 // length are given in the first two parameters. These are applied to each |
| 293 // of the rows pointed to in the |sourceDataRows| array, with each row |
| 294 // being |pixelWidth| wide. |
| 295 // |
| 296 // The output must have room for |pixelWidth * 4| bytes. |
| 297 template<bool hasAlpha> |
| 298 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* f
ilterValues, |
| 299 int filterLength, |
| 300 unsigned char* const* sourceDataRows, |
| 301 int pixelWidth, |
| 302 unsigned char* outRow) { |
| 303 // We go through each column in the output and do a vertical convolution
, |
| 304 // generating one output pixel each time. |
| 305 for (int outX = 0; outX < pixelWidth; outX++) { |
| 306 // Compute the number of bytes over in each row that the current col
umn |
| 307 // we're convolving starts at. The pixel will cover the next 4 bytes
. |
| 308 int byteOffset = outX * 4; |
| 309 |
| 310 // Apply the filter to one column of pixels. |
| 311 int accum[4] = {0}; |
| 312 for (int filterY = 0; filterY < filterLength; filterY++) { |
| 313 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues
[filterY]; |
| 314 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; |
| 315 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; |
| 316 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; |
| 317 if (hasAlpha) { |
| 318 accum[3] += curFilter * sourceDataRows[filterY][byteOffset +
3]; |
| 319 } |
| 320 } |
| 321 |
| 322 // Bring this value back in range. All of the filter scaling factors |
| 323 // are in fixed point with kShiftBits bits of precision. |
| 324 accum[0] >>= SkConvolutionFilter1D::kShiftBits; |
| 325 accum[1] >>= SkConvolutionFilter1D::kShiftBits; |
| 326 accum[2] >>= SkConvolutionFilter1D::kShiftBits; |
| 327 if (hasAlpha) { |
| 328 accum[3] >>= SkConvolutionFilter1D::kShiftBits; |
| 329 } |
| 330 |
| 331 // Store the new pixel. |
| 332 outRow[byteOffset + 0] = ClampTo8(accum[0]); |
| 333 outRow[byteOffset + 1] = ClampTo8(accum[1]); |
| 334 outRow[byteOffset + 2] = ClampTo8(accum[2]); |
| 335 if (hasAlpha) { |
| 336 unsigned char alpha = ClampTo8(accum[3]); |
| 337 |
| 338 // Make sure the alpha channel doesn't come out smaller than any
of the |
| 339 // color channels. We use premultipled alpha channels, so this s
hould |
| 340 // never happen, but rounding errors will cause this from time t
o time. |
| 341 // These "impossible" colors will cause overflows (and hence ran
dom pixel |
| 342 // values) when the resulting bitmap is drawn to the screen. |
| 343 // |
| 344 // We only need to do this when generating the final output row
(here). |
| 345 int maxColorChannel = SkTMax(outRow[byteOffset + 0], |
| 346 SkTMax(outRow[byteOffset + 1], |
| 347 outRow[byteOffset + 2])); |
| 348 if (alpha < maxColorChannel) { |
| 349 outRow[byteOffset + 3] = maxColorChannel; |
| 350 } else { |
| 351 outRow[byteOffset + 3] = alpha; |
| 352 } |
| 353 } else { |
| 354 // No alpha channel, the image is opaque. |
| 355 outRow[byteOffset + 3] = 0xff; |
| 356 } |
| 357 } |
| 358 } |
| 359 |
| 360 void convolveVertically_arm(const SkConvolutionFilter1D::ConvolutionFixed* filte
rValues, |
| 361 int filterLength, |
| 362 unsigned char* const* sourceDataRows, |
| 363 int pixelWidth, |
| 364 unsigned char* outRow, |
| 365 bool sourceHasAlpha) { |
| 366 if (sourceHasAlpha) { |
| 367 convolveVertically_arm<true>(filterValues, filterLength, |
| 368 sourceDataRows, pixelWidth, |
| 369 outRow); |
| 370 } else { |
| 371 convolveVertically_arm<false>(filterValues, filterLength, |
| 372 sourceDataRows, pixelWidth, |
| 373 outRow); |
| 374 } |
| 375 } |
| 376 |
| 377 // Convolves horizontally along four rows. The row data is given in |
| 378 // |src_data| and continues for the num_values() of the filter. |
| 379 // The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please |
| 380 // refer to that function for detailed comments. |
| 381 void convolve4RowsHorizontally_arm(const unsigned char* src_data[4], |
| 382 const SkConvolutionFilter1D& filter, |
| 383 unsigned char* out_row[4]) { |
| 384 } |
| 385 |
| 386 /////////////////////////// |
| 387 |
| 388 /* STOP REWRITING FUNCTIONS HERE, BUT DON'T FORGET TO EDIT THE |
| 389 PLATFORM CONVOLUTION PROCS BELOW */ |
| 390 |
| 391 /////////////////////////// |
| 392 |
| 393 void applySIMDPadding_arm(SkConvolutionFilter1D *filter) { |
| 394 // Padding |paddingCount| of more dummy coefficients after the coefficients |
| 395 // of last filter to prevent SIMD instructions which load 8 or 16 bytes |
| 396 // together to access invalid memory areas. We are not trying to align the |
| 397 // coefficients right now due to the opaqueness of <vector> implementation. |
| 398 // This has to be done after all |AddFilter| calls. |
| 399 for (int i = 0; i < 8; ++i) { |
| 400 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix
ed>(0)); |
| 401 } |
| 402 } |
| 403 |
222 void SkBitmapProcState::platformConvolutionProcs() { | 404 void SkBitmapProcState::platformConvolutionProcs() { |
223 // no specialization for ARM here yet. | 405 if (sk_cpu_arm_has_neon()) { |
224 } | 406 fConvolutionProcs->fExtraHorizontalReads = 3; |
| 407 fConvolutionProcs->fConvolveVertically = &convolveVertically_arm; |
| 408 |
| 409 // next line is commented out because the four-row convolution function
above is |
| 410 // just a no-op. Please see the comment above its definition, and the S
SE implementation |
| 411 // in SkBitmapProcState_opts_SSE2.cpp for guidance on its semantics. |
| 412 // leaving it as NULL will just cause the convolution system to not atte
mpt |
| 413 // to operate on four rows at once, which is correct but not performance
-optimal. |
| 414 |
| 415 // fConvolutionProcs->fConvolve4RowsHorizontally = &convolve4RowsHorizon
tally_arm; |
| 416 |
| 417 fConvolutionProcs->fConvolve4RowsHorizontally = NULL; |
| 418 |
| 419 fConvolutionProcs->fConvolveHorizontally = &convolveHorizontally_arm; |
| 420 fConvolutionProcs->fApplySIMDPadding = &applySIMDPadding_arm; |
| 421 } |
| 422 } |
OLD | NEW |