OLD | NEW |
1 | 1 |
2 /* | 2 /* |
3 * Copyright 2009 The Android Open Source Project | 3 * Copyright 2009 The Android Open Source Project |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
7 */ | 7 */ |
8 | 8 |
9 | 9 |
10 #include <emmintrin.h> | 10 #include <emmintrin.h> |
11 #include "SkUtils_opts_SSE2.h" | 11 #include "SkUtils_opts_SSE2.h" |
12 | 12 |
13 void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count) | 13 void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count) |
14 { | 14 { |
15 SkASSERT(dst != NULL && count >= 0); | 15 SkASSERT(dst != NULL && count >= 0); |
16 | 16 |
17 // dst must be 2-byte aligned. | 17 // dst must be 2-byte aligned. |
18 SkASSERT((((size_t) dst) & 0x01) == 0); | 18 SkASSERT((((size_t) dst) & 0x01) == 0); |
19 | 19 |
20 if (count >= 32) { | 20 if (count >= 32) { |
21 while (((size_t)dst) & 0x0F) { | 21 while (((size_t)dst) & 0x0F) { |
22 *dst++ = value; | 22 *dst++ = value; |
23 --count; | 23 --count; |
24 } | 24 } |
25 __m128i *d = reinterpret_cast<__m128i*>(dst); | 25 __m128i *d = reinterpret_cast<__m128i*>(dst); |
26 __m128i value_wide = _mm_set1_epi16(value); | 26 __m128i value_wide = _mm_set1_epi16(value); |
27 while (count >= 32) { | 27 while (count >= 32) { |
28 _mm_store_si128(d++, value_wide); | 28 _mm_store_si128(d , value_wide); |
29 _mm_store_si128(d++, value_wide); | 29 _mm_store_si128(d + 1, value_wide); |
30 _mm_store_si128(d++, value_wide); | 30 _mm_store_si128(d + 2, value_wide); |
31 _mm_store_si128(d++, value_wide); | 31 _mm_store_si128(d + 3, value_wide); |
| 32 d += 4; |
32 count -= 32; | 33 count -= 32; |
33 } | 34 } |
34 dst = reinterpret_cast<uint16_t*>(d); | 35 dst = reinterpret_cast<uint16_t*>(d); |
35 } | 36 } |
36 while (count > 0) { | 37 while (count > 0) { |
37 *dst++ = value; | 38 *dst++ = value; |
38 --count; | 39 --count; |
39 } | 40 } |
40 } | 41 } |
41 | 42 |
42 void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) | 43 void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) |
43 { | 44 { |
44 SkASSERT(dst != NULL && count >= 0); | 45 SkASSERT(dst != NULL && count >= 0); |
45 | 46 |
46 // dst must be 4-byte aligned. | 47 // dst must be 4-byte aligned. |
47 SkASSERT((((size_t) dst) & 0x03) == 0); | 48 SkASSERT((((size_t) dst) & 0x03) == 0); |
48 | 49 |
49 if (count >= 16) { | 50 if (count >= 16) { |
50 while (((size_t)dst) & 0x0F) { | 51 while (((size_t)dst) & 0x0F) { |
51 *dst++ = value; | 52 *dst++ = value; |
52 --count; | 53 --count; |
53 } | 54 } |
54 __m128i *d = reinterpret_cast<__m128i*>(dst); | 55 __m128i *d = reinterpret_cast<__m128i*>(dst); |
55 __m128i value_wide = _mm_set1_epi32(value); | 56 __m128i value_wide = _mm_set1_epi32(value); |
56 while (count >= 16) { | 57 while (count >= 16) { |
57 _mm_store_si128(d++, value_wide); | 58 _mm_store_si128(d , value_wide); |
58 _mm_store_si128(d++, value_wide); | 59 _mm_store_si128(d + 1, value_wide); |
59 _mm_store_si128(d++, value_wide); | 60 _mm_store_si128(d + 2, value_wide); |
60 _mm_store_si128(d++, value_wide); | 61 _mm_store_si128(d + 3, value_wide); |
| 62 d += 4; |
61 count -= 16; | 63 count -= 16; |
62 } | 64 } |
63 dst = reinterpret_cast<uint32_t*>(d); | 65 dst = reinterpret_cast<uint32_t*>(d); |
64 } | 66 } |
65 while (count > 0) { | 67 while (count > 0) { |
66 *dst++ = value; | 68 *dst++ = value; |
67 --count; | 69 --count; |
68 } | 70 } |
69 } | 71 } |
OLD | NEW |