Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1724)

Unified Diff: src/core/SkBlitter_RGB16.cpp

Issue 18666005: ARM Skia NEON patches - 11 - Blitter_RGB16 (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/core/SkBlitter_RGB16.cpp
diff --git a/src/core/SkBlitter_RGB16.cpp b/src/core/SkBlitter_RGB16.cpp
index 615a6a39dbe80fc7d5fefdc75e514a75b15fa296..86f0eb61576a89e8a0cc4bc4f2184865834f8fc2 100644
--- a/src/core/SkBlitter_RGB16.cpp
+++ b/src/core/SkBlitter_RGB16.cpp
@@ -390,63 +390,53 @@ void SkRGB16_Opaque_Blitter::blitMask(const SkMask& mask,
do {
int w = width;
if (w >= UNROLL) {
- uint32x4_t color; /* can use same one */
- uint32x4_t dev_lo, dev_hi;
- uint32x4_t t1;
- uint32x4_t wn1, wn2;
- uint16x4_t odev_lo, odev_hi;
- uint16x4_t alpha_lo, alpha_hi;
- uint16x8_t alpha_full;
-
+ uint32x4_t color, dev_lo, dev_hi;
+ uint32x4_t wn1, wn2, tmp;
+ uint32x4_t vmask_g16, vmask_ng16;
+ uint16x8_t valpha, vdev;
+ uint16x4_t odev_lo, odev_hi, valpha_lo, valpha_hi;
+
+ // prepare constants
+ vmask_g16 = vdupq_n_u32(SK_G16_MASK_IN_PLACE);
+ vmask_ng16 = vdupq_n_u32(~SK_G16_MASK_IN_PLACE);
color = vdupq_n_u32(expanded32);
do {
- /* alpha is 8x8, widen and split to get pair of 16x4's */
- alpha_full = vmovl_u8(vld1_u8(alpha));
- alpha_full = vaddq_u16(alpha_full, vshrq_n_u16(alpha_full,7));
- alpha_full = vshrq_n_u16(alpha_full, 3);
- alpha_lo = vget_low_u16(alpha_full);
- alpha_hi = vget_high_u16(alpha_full);
-
- dev_lo = vmovl_u16(vld1_u16(device));
- dev_hi = vmovl_u16(vld1_u16(device+4));
-
- /* unpack in 32 bits */
- dev_lo = vorrq_u32(
- vandq_u32(dev_lo, vdupq_n_u32(0x0000F81F)),
- vshlq_n_u32(vandq_u32(dev_lo,
- vdupq_n_u32(0x000007E0)),
- 16)
- );
- dev_hi = vorrq_u32(
- vandq_u32(dev_hi, vdupq_n_u32(0x0000F81F)),
- vshlq_n_u32(vandq_u32(dev_hi,
- vdupq_n_u32(0x000007E0)),
- 16)
- );
-
- /* blend the two */
- t1 = vmulq_u32(vsubq_u32(color, dev_lo), vmovl_u16(alpha_lo));
- t1 = vshrq_n_u32(t1, 5);
- dev_lo = vaddq_u32(dev_lo, t1);
-
- t1 = vmulq_u32(vsubq_u32(color, dev_hi), vmovl_u16(alpha_hi));
- t1 = vshrq_n_u32(t1, 5);
- dev_hi = vaddq_u32(dev_hi, t1);
-
- /* re-compact and store */
- wn1 = vandq_u32(dev_lo, vdupq_n_u32(0x0000F81F)),
- wn2 = vshrq_n_u32(dev_lo, 16);
- wn2 = vandq_u32(wn2, vdupq_n_u32(0x000007E0));
- odev_lo = vmovn_u32(vorrq_u32(wn1, wn2));
-
- wn1 = vandq_u32(dev_hi, vdupq_n_u32(0x0000F81F)),
- wn2 = vshrq_n_u32(dev_hi, 16);
- wn2 = vandq_u32(wn2, vdupq_n_u32(0x000007E0));
- odev_hi = vmovn_u32(vorrq_u32(wn1, wn2));
-
- vst1_u16(device, odev_lo);
- vst1_u16(device+4, odev_hi);
+ // alpha is 8x8, widen and split to get a pair of 16x4
+ valpha = vaddw_u8(vdupq_n_u16(1), vld1_u8(alpha));
+ valpha = vshrq_n_u16(valpha, 3);
+ valpha_lo = vget_low_u16(valpha);
+ valpha_hi = vget_high_u16(valpha);
+
+ // load pixels
+ vdev = vld1q_u16(device);
+ dev_lo = vmovl_u16(vget_low_u16(vdev));
+ dev_hi = vmovl_u16(vget_high_u16(vdev));
+
+ // unpack them in 32 bits
+ dev_lo = (dev_lo & vmask_ng16) | vshlq_n_u32(dev_lo & vmask_g16, 16);
+ dev_hi = (dev_hi & vmask_ng16) | vshlq_n_u32(dev_hi & vmask_g16, 16);
+
+ // blend with color
+ tmp = (color - dev_lo) * vmovl_u16(valpha_lo);
+ tmp = vshrq_n_u32(tmp, 5);
+ dev_lo += tmp;
+
+ tmp = vmulq_u32(color - dev_hi, vmovl_u16(valpha_hi));
+ tmp = vshrq_n_u32(tmp, 5);
+ dev_hi += tmp;
+
+ // re-compact
+ wn1 = dev_lo & vmask_ng16;
+ wn2 = vshrq_n_u32(dev_lo, 16) & vmask_g16;
+ odev_lo = vmovn_u32(wn1 | wn2);
+
+ wn1 = dev_hi & vmask_ng16;
+ wn2 = vshrq_n_u32(dev_hi, 16) & vmask_g16;
+ odev_hi = vmovn_u32(wn1 | wn2);
+
+ // store
+ vst1q_u16(device, vcombine_u16(odev_lo, odev_hi));
device += UNROLL;
alpha += UNROLL;
@@ -454,7 +444,7 @@ void SkRGB16_Opaque_Blitter::blitMask(const SkMask& mask,
} while (w >= UNROLL);
}
- /* residuals (which is everything if we have no neon) */
+ // residuals
while (w > 0) {
*device = blend_compact(expanded32, SkExpand_rgb_16(*device),
SkAlpha255To256(*alpha++) >> 3);
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698