Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(313)

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 18614010: ARM Skia NEON patches - 14 - S32A_Blend (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Fix uninitilized variable warnings Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBlitRow_opts_arm.h" 8 #include "SkBlitRow_opts_arm.h"
9 9
10 #include "SkBlitMask.h" 10 #include "SkBlitMask.h"
(...skipping 749 matching lines...) Expand 10 before | Expand all | Expand 10 after
760 src += 1; 760 src += 1;
761 dst += 1; 761 dst += 1;
762 } while (--count > 0); 762 } while (--count > 0);
763 } 763 }
764 #endif 764 #endif
765 765
766 #undef UNROLL 766 #undef UNROLL
767 } 767 }
768 } 768 }
769 769
770 void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
771 const SkPMColor* SK_RESTRICT src,
772 int count, U8CPU alpha) {
773
774 SkASSERT(255 >= alpha);
775
776 if (count <= 0) {
777 return;
778 }
779
780 unsigned alpha256 = SkAlpha255To256(alpha);
781
782 // First deal with odd counts
783 if (count & 1) {
784 uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres;
785 uint16x8_t vdst_wide, vsrc_wide;
786 unsigned dst_scale;
787
788 // Load
789 vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));
790 vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));
791
792 // Calc dst_scale
793 dst_scale = vget_lane_u8(vsrc, 3);
794 dst_scale *= alpha256;
795 dst_scale >>= 8;
796 dst_scale = 256 - dst_scale;
797
798 // Process src
799 vsrc_wide = vmovl_u8(vsrc);
800 vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256);
801
802 // Process dst
803 vdst_wide = vmovl_u8(vdst);
804 vdst_wide = vmulq_n_u16(vdst_wide, dst_scale);
805
806 // Combine
807 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);
808
809 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);
810 dst++;
811 src++;
812 count--;
813 }
814
815 if (count) {
816 uint8x8_t alpha_mask;
817 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
818 alpha_mask = vld1_u8(alpha_mask_setup);
819
820 do {
821
822 uint8x8_t vsrc, vdst, vres, vsrc_alphas;
823 uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale;
824
825 __builtin_prefetch(src+32);
826 __builtin_prefetch(dst+32);
827
828 // Load
829 vsrc = vreinterpret_u8_u32(vld1_u32(src));
830 vdst = vreinterpret_u8_u32(vld1_u32(dst));
831
832 // Prepare src_scale
833 vsrc_scale = vdupq_n_u16(alpha256);
834
835 // Calc dst_scale
836 vsrc_alphas = vtbl1_u8(vsrc, alpha_mask);
837 vdst_scale = vmovl_u8(vsrc_alphas);
838 vdst_scale *= vsrc_scale;
839 vdst_scale = vshrq_n_u16(vdst_scale, 8);
840 vdst_scale = vsubq_u16(vdupq_n_u16(256), vdst_scale);
841
842 // Process src
843 vsrc_wide = vmovl_u8(vsrc);
844 vsrc_wide *= vsrc_scale;
845
846 // Process dst
847 vdst_wide = vmovl_u8(vdst);
848 vdst_wide *= vdst_scale;
849
850 // Combine
851 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);
852
853 vst1_u32(dst, vreinterpret_u32_u8(vres));
854
855 src += 2;
856 dst += 2;
857 count -= 2;
858 } while(count);
859 }
860 }
861
770 /////////////////////////////////////////////////////////////////////////////// 862 ///////////////////////////////////////////////////////////////////////////////
771 863
772 #undef DEBUG_OPAQUE_DITHER 864 #undef DEBUG_OPAQUE_DITHER
773 865
774 #if defined(DEBUG_OPAQUE_DITHER) 866 #if defined(DEBUG_OPAQUE_DITHER)
775 static void showme8(char *str, void *p, int len) 867 static void showme8(char *str, void *p, int len)
776 { 868 {
777 static char buf[256]; 869 static char buf[256];
778 char tbuf[32]; 870 char tbuf[32];
779 int i; 871 int i;
(...skipping 507 matching lines...) Expand 10 before | Expand all | Expand 10 after
1287 * the performance will almost certainly be worse. However, for many 1379 * the performance will almost certainly be worse. However, for many
1288 * common cases the performance is equivalent or better than the standard 1380 * common cases the performance is equivalent or better than the standard
1289 * case where we do not inspect the src alpha. 1381 * case where we do not inspect the src alpha.
1290 */ 1382 */
1291 #if SK_A32_SHIFT == 24 1383 #if SK_A32_SHIFT == 24
1292 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor 1384 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
1293 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1385 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,
1294 #else 1386 #else
1295 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1387 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,
1296 #endif 1388 #endif
1297 S32A_Blend_BlitRow32_arm // S32A_Blend 1389 S32A_Blend_BlitRow32_neon // S32A_Blend
1298 }; 1390 };
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698