src/opts/SkBitmapProcState_filter_neon.h - Issue 18996003: ARM Skia NEON patches - 02 - Tune filter clobber list

Side by Side Diff: src/opts/SkBitmapProcState_filter_neon.h

Issue 18996003: ARM Skia NEON patches - 02 - Tune filter clobber list (Closed) Base URL: https://skia.googlecode.com/svn/trunk

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « no previous file | no next file » | no next file with comments »

OLD	NEW
1	1

2 /*	2 /*

3 * Copyright 2012 The Android Open Source Project	3 * Copyright 2012 The Android Open Source Project

4 *	4 *

5 * Use of this source code is governed by a BSD-style license that can be	5 * Use of this source code is governed by a BSD-style license that can be

6 * found in the LICENSE file.	6 * found in the LICENSE file.

7 */	7 */

8	8

9	9

10 #include "SkColorPriv.h"	10 #include "SkColorPriv.h"

(...skipping 29 matching lines...) Expand all Loading...
40 "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x	40 "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x

41	41

42 "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x	42 "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x

43 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x	43 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x

44 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)	44 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)

45 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)	45 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)

46 "vshrn.i16 d0, q2, #8 \n\t" // shift down r esult by 8	46 "vshrn.i16 d0, q2, #8 \n\t" // shift down r esult by 8

47 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result	47 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result

48 :	48 :

49 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [ a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)	49 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [ a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)

50 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6 ", "d7", "d16"	50 : "cc", "memory", "d0", "d1", "d3", "d4", "d5", "d6", "d7", "d1 6"

51 );	51 );

52 }	52 }

53	53

54 static inline void Filter_32_alpha_neon(unsigned x, unsigned y,	54 static inline void Filter_32_alpha_neon(unsigned x, unsigned y,

55 SkPMColor a00, SkPMColor a01,	55 SkPMColor a00, SkPMColor a01,

56 SkPMColor a10, SkPMColor a11,	56 SkPMColor a10, SkPMColor a11,

57 SkPMColor *dst, uint16_t scale) {	57 SkPMColor *dst, uint16_t scale) {

58 asm volatile(	58 asm volatile(

59 "vdup.8 d0, %[y] \n\t" // duplicate y into d0	59 "vdup.8 d0, %[y] \n\t" // duplicate y into d0

60 "vmov.u8 d16, #16 \n\t" // set up const ant in d16	60 "vmov.u8 d16, #16 \n\t" // set up const ant in d16

(...skipping 15 matching lines...) Expand all Loading...
76 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x	76 "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x

77 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)	77 "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x)

78 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)	78 "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x)

79 "vdup.16 d3, %[scale] \n\t" // duplicate sc ale into d3	79 "vdup.16 d3, %[scale] \n\t" // duplicate sc ale into d3

80 "vshr.u16 d4, d4, #8 \n\t" // shift down r esult by 8	80 "vshr.u16 d4, d4, #8 \n\t" // shift down r esult by 8

81 "vmul.i16 d4, d4, d3 \n\t" // multiply res ult by scale	81 "vmul.i16 d4, d4, d3 \n\t" // multiply res ult by scale

82 "vshrn.i16 d0, q2, #8 \n\t" // shift down r esult by 8	82 "vshrn.i16 d0, q2, #8 \n\t" // shift down r esult by 8

83 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result	83 "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result

84 :	84 :

85 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [ a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)	85 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [ a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)

86 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6 ", "d7", "d16"	86 : "cc", "memory", "d0", "d1", "d3", "d4", "d5", "d6", "d7", "d1 6"

87 );	87 );

88 }	88 }

	89

OLD	NEW