| OLD | NEW |
| 1 | 1 |
| 2 /* | 2 /* |
| 3 * Copyright 2006 The Android Open Source Project | 3 * Copyright 2006 The Android Open Source Project |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
| 6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
| 7 */ | 7 */ |
| 8 | 8 |
| 9 | 9 |
| 10 #include "SkBlitRow.h" | 10 #include "SkBlitRow.h" |
| (...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 383 int height = clip.height(); | 383 int height = clip.height(); |
| 384 size_t deviceRB = fDevice.rowBytes() - (width << 1); | 384 size_t deviceRB = fDevice.rowBytes() - (width << 1); |
| 385 unsigned maskRB = mask.fRowBytes - width; | 385 unsigned maskRB = mask.fRowBytes - width; |
| 386 uint32_t expanded32 = fExpandedRaw16; | 386 uint32_t expanded32 = fExpandedRaw16; |
| 387 | 387 |
| 388 #ifdef SK_USE_NEON | 388 #ifdef SK_USE_NEON |
| 389 #define UNROLL 8 | 389 #define UNROLL 8 |
| 390 do { | 390 do { |
| 391 int w = width; | 391 int w = width; |
| 392 if (w >= UNROLL) { | 392 if (w >= UNROLL) { |
| 393 uint32x4_t color; /* can use same one */ | 393 uint32x4_t color, dev_lo, dev_hi; |
| 394 uint32x4_t dev_lo, dev_hi; | 394 uint32x4_t wn1, wn2, tmp; |
| 395 uint32x4_t t1; | 395 uint32x4_t vmask_g16, vmask_ng16; |
| 396 uint32x4_t wn1, wn2; | 396 uint16x8_t valpha, vdev; |
| 397 uint16x4_t odev_lo, odev_hi; | 397 uint16x4_t odev_lo, odev_hi, valpha_lo, valpha_hi; |
| 398 uint16x4_t alpha_lo, alpha_hi; | |
| 399 uint16x8_t alpha_full; | |
| 400 | 398 |
| 399 // prepare constants |
| 400 vmask_g16 = vdupq_n_u32(SK_G16_MASK_IN_PLACE); |
| 401 vmask_ng16 = vdupq_n_u32(~SK_G16_MASK_IN_PLACE); |
| 401 color = vdupq_n_u32(expanded32); | 402 color = vdupq_n_u32(expanded32); |
| 402 | 403 |
| 403 do { | 404 do { |
| 404 /* alpha is 8x8, widen and split to get pair of 16x4's */ | 405 // alpha is 8x8, widen and split to get a pair of 16x4 |
| 405 alpha_full = vmovl_u8(vld1_u8(alpha)); | 406 valpha = vaddw_u8(vdupq_n_u16(1), vld1_u8(alpha)); |
| 406 alpha_full = vaddq_u16(alpha_full, vshrq_n_u16(alpha_full,7)); | 407 valpha = vshrq_n_u16(valpha, 3); |
| 407 alpha_full = vshrq_n_u16(alpha_full, 3); | 408 valpha_lo = vget_low_u16(valpha); |
| 408 alpha_lo = vget_low_u16(alpha_full); | 409 valpha_hi = vget_high_u16(valpha); |
| 409 alpha_hi = vget_high_u16(alpha_full); | |
| 410 | 410 |
| 411 dev_lo = vmovl_u16(vld1_u16(device)); | 411 // load pixels |
| 412 dev_hi = vmovl_u16(vld1_u16(device+4)); | 412 vdev = vld1q_u16(device); |
| 413 dev_lo = vmovl_u16(vget_low_u16(vdev)); |
| 414 dev_hi = vmovl_u16(vget_high_u16(vdev)); |
| 413 | 415 |
| 414 /* unpack in 32 bits */ | 416 // unpack them in 32 bits |
| 415 dev_lo = vorrq_u32( | 417 dev_lo = (dev_lo & vmask_ng16) | vshlq_n_u32(dev_lo & vmask_g16,
16); |
| 416 vandq_u32(dev_lo, vdupq_n_u32(0x0000F81F)), | 418 dev_hi = (dev_hi & vmask_ng16) | vshlq_n_u32(dev_hi & vmask_g16,
16); |
| 417 vshlq_n_u32(vandq_u32(dev_lo, | |
| 418 vdupq_n_u32(0x000007E0)
), | |
| 419 16) | |
| 420 ); | |
| 421 dev_hi = vorrq_u32( | |
| 422 vandq_u32(dev_hi, vdupq_n_u32(0x0000F81F)), | |
| 423 vshlq_n_u32(vandq_u32(dev_hi, | |
| 424 vdupq_n_u32(0x000007E0)
), | |
| 425 16) | |
| 426 ); | |
| 427 | 419 |
| 428 /* blend the two */ | 420 // blend with color |
| 429 t1 = vmulq_u32(vsubq_u32(color, dev_lo), vmovl_u16(alpha_lo)); | 421 tmp = (color - dev_lo) * vmovl_u16(valpha_lo); |
| 430 t1 = vshrq_n_u32(t1, 5); | 422 tmp = vshrq_n_u32(tmp, 5); |
| 431 dev_lo = vaddq_u32(dev_lo, t1); | 423 dev_lo += tmp; |
| 432 | 424 |
| 433 t1 = vmulq_u32(vsubq_u32(color, dev_hi), vmovl_u16(alpha_hi)); | 425 tmp = vmulq_u32(color - dev_hi, vmovl_u16(valpha_hi)); |
| 434 t1 = vshrq_n_u32(t1, 5); | 426 tmp = vshrq_n_u32(tmp, 5); |
| 435 dev_hi = vaddq_u32(dev_hi, t1); | 427 dev_hi += tmp; |
| 436 | 428 |
| 437 /* re-compact and store */ | 429 // re-compact |
| 438 wn1 = vandq_u32(dev_lo, vdupq_n_u32(0x0000F81F)), | 430 wn1 = dev_lo & vmask_ng16; |
| 439 wn2 = vshrq_n_u32(dev_lo, 16); | 431 wn2 = vshrq_n_u32(dev_lo, 16) & vmask_g16; |
| 440 wn2 = vandq_u32(wn2, vdupq_n_u32(0x000007E0)); | 432 odev_lo = vmovn_u32(wn1 | wn2); |
| 441 odev_lo = vmovn_u32(vorrq_u32(wn1, wn2)); | |
| 442 | 433 |
| 443 wn1 = vandq_u32(dev_hi, vdupq_n_u32(0x0000F81F)), | 434 wn1 = dev_hi & vmask_ng16; |
| 444 wn2 = vshrq_n_u32(dev_hi, 16); | 435 wn2 = vshrq_n_u32(dev_hi, 16) & vmask_g16; |
| 445 wn2 = vandq_u32(wn2, vdupq_n_u32(0x000007E0)); | 436 odev_hi = vmovn_u32(wn1 | wn2); |
| 446 odev_hi = vmovn_u32(vorrq_u32(wn1, wn2)); | |
| 447 | 437 |
| 448 vst1_u16(device, odev_lo); | 438 // store |
| 449 vst1_u16(device+4, odev_hi); | 439 vst1q_u16(device, vcombine_u16(odev_lo, odev_hi)); |
| 450 | 440 |
| 451 device += UNROLL; | 441 device += UNROLL; |
| 452 alpha += UNROLL; | 442 alpha += UNROLL; |
| 453 w -= UNROLL; | 443 w -= UNROLL; |
| 454 } while (w >= UNROLL); | 444 } while (w >= UNROLL); |
| 455 } | 445 } |
| 456 | 446 |
| 457 /* residuals (which is everything if we have no neon) */ | 447 // residuals |
| 458 while (w > 0) { | 448 while (w > 0) { |
| 459 *device = blend_compact(expanded32, SkExpand_rgb_16(*device), | 449 *device = blend_compact(expanded32, SkExpand_rgb_16(*device), |
| 460 SkAlpha255To256(*alpha++) >> 3); | 450 SkAlpha255To256(*alpha++) >> 3); |
| 461 device += 1; | 451 device += 1; |
| 462 --w; | 452 --w; |
| 463 } | 453 } |
| 464 device = (uint16_t*)((char*)device + deviceRB); | 454 device = (uint16_t*)((char*)device + deviceRB); |
| 465 alpha += maskRB; | 455 alpha += maskRB; |
| 466 } while (--height != 0); | 456 } while (--height != 0); |
| 467 #undef UNROLL | 457 #undef UNROLL |
| (...skipping 588 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1056 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Opaque_Blitter, storage, | 1046 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Opaque_Blitter, storage, |
| 1057 storageSize, (device, paint)); | 1047 storageSize, (device, paint)); |
| 1058 } else { | 1048 } else { |
| 1059 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Blitter, storage, | 1049 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Blitter, storage, |
| 1060 storageSize, (device, paint)); | 1050 storageSize, (device, paint)); |
| 1061 } | 1051 } |
| 1062 } | 1052 } |
| 1063 | 1053 |
| 1064 return blitter; | 1054 return blitter; |
| 1065 } | 1055 } |
| OLD | NEW |