| Index: src/opts/SkBlitRow_opts_arm_neon.cpp
|
| diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp
|
| index 00086c37898915d25ed463df09fdb05970346efd..1e7f6f321d776829e352916b1d805dbad39f5aa0 100644
|
| --- a/src/opts/SkBlitRow_opts_arm_neon.cpp
|
| +++ b/src/opts/SkBlitRow_opts_arm_neon.cpp
|
| @@ -426,6 +426,13 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
|
| uint8x8_t src_raw, dst_raw, dst_final;
|
| uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
|
|
|
| + /* The two prefetches below may make the code slighlty
|
| + * slower for small values of count but are worth having
|
| + * in the general case.
|
| + */
|
| + __builtin_prefetch(src+32);
|
| + __builtin_prefetch(dst+32);
|
| +
|
| /* get the source */
|
| src_raw = vreinterpret_u8_u32(vld1_u32(src));
|
| #if UNROLL > 2
|
| @@ -447,14 +454,7 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
|
|
|
| /* get the alphas spread out properly */
|
| alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
|
| -#if 1
|
| - /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
|
| - /* we collapsed (255-a)+1 ... */
|
| alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
|
| -#else
|
| - alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow);
|
| - alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7));
|
| -#endif
|
|
|
| /* spread the dest */
|
| dst_wide = vmovl_u8(dst_raw);
|
| @@ -476,14 +476,7 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
|
| uint16x8_t alpha_wide;
|
|
|
| alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
|
| -#if 1
|
| - /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
|
| - /* we collapsed (255-a)+1 ... */
|
| alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
|
| -#else
|
| - alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow);
|
| - alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7));
|
| -#endif
|
|
|
| /* spread the dest */
|
| dst_wide = vmovl_u8(dst_raw_2);
|
|
|