Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(150)

Side by Side Diff: src/core/SkBlitter_RGB16.cpp

Issue 18666005: ARM Skia NEON patches - 11 - Blitter_RGB16 (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2006 The Android Open Source Project 3 * Copyright 2006 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 9
10 #include "SkBlitRow.h" 10 #include "SkBlitRow.h"
(...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after
383 int height = clip.height(); 383 int height = clip.height();
384 size_t deviceRB = fDevice.rowBytes() - (width << 1); 384 size_t deviceRB = fDevice.rowBytes() - (width << 1);
385 unsigned maskRB = mask.fRowBytes - width; 385 unsigned maskRB = mask.fRowBytes - width;
386 uint32_t expanded32 = fExpandedRaw16; 386 uint32_t expanded32 = fExpandedRaw16;
387 387
388 #ifdef SK_USE_NEON 388 #ifdef SK_USE_NEON
389 #define UNROLL 8 389 #define UNROLL 8
390 do { 390 do {
391 int w = width; 391 int w = width;
392 if (w >= UNROLL) { 392 if (w >= UNROLL) {
393 uint32x4_t color; /* can use same one */ 393 uint32x4_t color, dev_lo, dev_hi;
394 uint32x4_t dev_lo, dev_hi; 394 uint32x4_t wn1, wn2, tmp;
395 uint32x4_t t1; 395 uint32x4_t vmask_g16, vmask_ng16;
396 uint32x4_t wn1, wn2; 396 uint16x8_t valpha, vdev;
397 uint16x4_t odev_lo, odev_hi; 397 uint16x4_t odev_lo, odev_hi, valpha_lo, valpha_hi;
398 uint16x4_t alpha_lo, alpha_hi;
399 uint16x8_t alpha_full;
400 398
399 // prepare constants
400 vmask_g16 = vdupq_n_u32(SK_G16_MASK_IN_PLACE);
401 vmask_ng16 = vdupq_n_u32(~SK_G16_MASK_IN_PLACE);
401 color = vdupq_n_u32(expanded32); 402 color = vdupq_n_u32(expanded32);
402 403
403 do { 404 do {
404 /* alpha is 8x8, widen and split to get pair of 16x4's */ 405 // alpha is 8x8, widen and split to get a pair of 16x4
405 alpha_full = vmovl_u8(vld1_u8(alpha)); 406 valpha = vaddw_u8(vdupq_n_u16(1), vld1_u8(alpha));
406 alpha_full = vaddq_u16(alpha_full, vshrq_n_u16(alpha_full,7)); 407 valpha = vshrq_n_u16(valpha, 3);
407 alpha_full = vshrq_n_u16(alpha_full, 3); 408 valpha_lo = vget_low_u16(valpha);
408 alpha_lo = vget_low_u16(alpha_full); 409 valpha_hi = vget_high_u16(valpha);
409 alpha_hi = vget_high_u16(alpha_full);
410 410
411 dev_lo = vmovl_u16(vld1_u16(device)); 411 // load pixels
412 dev_hi = vmovl_u16(vld1_u16(device+4)); 412 vdev = vld1q_u16(device);
413 dev_lo = vmovl_u16(vget_low_u16(vdev));
414 dev_hi = vmovl_u16(vget_high_u16(vdev));
413 415
414 /* unpack in 32 bits */ 416 // unpack them in 32 bits
415 dev_lo = vorrq_u32( 417 dev_lo = (dev_lo & vmask_ng16) | vshlq_n_u32(dev_lo & vmask_g16, 16);
416 vandq_u32(dev_lo, vdupq_n_u32(0x0000F81F)), 418 dev_hi = (dev_hi & vmask_ng16) | vshlq_n_u32(dev_hi & vmask_g16, 16);
417 vshlq_n_u32(vandq_u32(dev_lo,
418 vdupq_n_u32(0x000007E0) ),
419 16)
420 );
421 dev_hi = vorrq_u32(
422 vandq_u32(dev_hi, vdupq_n_u32(0x0000F81F)),
423 vshlq_n_u32(vandq_u32(dev_hi,
424 vdupq_n_u32(0x000007E0) ),
425 16)
426 );
427 419
428 /* blend the two */ 420 // blend with color
429 t1 = vmulq_u32(vsubq_u32(color, dev_lo), vmovl_u16(alpha_lo)); 421 tmp = (color - dev_lo) * vmovl_u16(valpha_lo);
430 t1 = vshrq_n_u32(t1, 5); 422 tmp = vshrq_n_u32(tmp, 5);
431 dev_lo = vaddq_u32(dev_lo, t1); 423 dev_lo += tmp;
432 424
433 t1 = vmulq_u32(vsubq_u32(color, dev_hi), vmovl_u16(alpha_hi)); 425 tmp = vmulq_u32(color - dev_hi, vmovl_u16(valpha_hi));
434 t1 = vshrq_n_u32(t1, 5); 426 tmp = vshrq_n_u32(tmp, 5);
435 dev_hi = vaddq_u32(dev_hi, t1); 427 dev_hi += tmp;
436 428
437 /* re-compact and store */ 429 // re-compact
438 wn1 = vandq_u32(dev_lo, vdupq_n_u32(0x0000F81F)), 430 wn1 = dev_lo & vmask_ng16;
439 wn2 = vshrq_n_u32(dev_lo, 16); 431 wn2 = vshrq_n_u32(dev_lo, 16) & vmask_g16;
440 wn2 = vandq_u32(wn2, vdupq_n_u32(0x000007E0)); 432 odev_lo = vmovn_u32(wn1 | wn2);
441 odev_lo = vmovn_u32(vorrq_u32(wn1, wn2));
442 433
443 wn1 = vandq_u32(dev_hi, vdupq_n_u32(0x0000F81F)), 434 wn1 = dev_hi & vmask_ng16;
444 wn2 = vshrq_n_u32(dev_hi, 16); 435 wn2 = vshrq_n_u32(dev_hi, 16) & vmask_g16;
445 wn2 = vandq_u32(wn2, vdupq_n_u32(0x000007E0)); 436 odev_hi = vmovn_u32(wn1 | wn2);
446 odev_hi = vmovn_u32(vorrq_u32(wn1, wn2));
447 437
448 vst1_u16(device, odev_lo); 438 // store
449 vst1_u16(device+4, odev_hi); 439 vst1q_u16(device, vcombine_u16(odev_lo, odev_hi));
450 440
451 device += UNROLL; 441 device += UNROLL;
452 alpha += UNROLL; 442 alpha += UNROLL;
453 w -= UNROLL; 443 w -= UNROLL;
454 } while (w >= UNROLL); 444 } while (w >= UNROLL);
455 } 445 }
456 446
457 /* residuals (which is everything if we have no neon) */ 447 // residuals
458 while (w > 0) { 448 while (w > 0) {
459 *device = blend_compact(expanded32, SkExpand_rgb_16(*device), 449 *device = blend_compact(expanded32, SkExpand_rgb_16(*device),
460 SkAlpha255To256(*alpha++) >> 3); 450 SkAlpha255To256(*alpha++) >> 3);
461 device += 1; 451 device += 1;
462 --w; 452 --w;
463 } 453 }
464 device = (uint16_t*)((char*)device + deviceRB); 454 device = (uint16_t*)((char*)device + deviceRB);
465 alpha += maskRB; 455 alpha += maskRB;
466 } while (--height != 0); 456 } while (--height != 0);
467 #undef UNROLL 457 #undef UNROLL
(...skipping 588 matching lines...) Expand 10 before | Expand all | Expand 10 after
1056 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Opaque_Blitter, storage, 1046 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Opaque_Blitter, storage,
1057 storageSize, (device, paint)); 1047 storageSize, (device, paint));
1058 } else { 1048 } else {
1059 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Blitter, storage, 1049 SK_PLACEMENT_NEW_ARGS(blitter, SkRGB16_Blitter, storage,
1060 storageSize, (device, paint)); 1050 storageSize, (device, paint));
1061 } 1051 }
1062 } 1052 }
1063 1053
1064 return blitter; 1054 return blitter;
1065 } 1055 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698