| OLD | NEW |
| 1 | 1 |
| 2 /* | 2 /* |
| 3 * Copyright 2009 The Android Open Source Project | 3 * Copyright 2009 The Android Open Source Project |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
| 6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
| 7 */ | 7 */ |
| 8 | 8 |
| 9 | 9 |
| 10 #include <emmintrin.h> | 10 #include <emmintrin.h> |
| 11 #include "SkUtils_opts_SSE2.h" | 11 #include "SkUtils_opts_SSE2.h" |
| 12 | 12 |
| 13 void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count) | 13 void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count) |
| 14 { | 14 { |
| 15 SkASSERT(dst != NULL && count >= 0); | 15 SkASSERT(dst != NULL && count >= 0); |
| 16 | 16 |
| 17 // dst must be 2-byte aligned. | 17 // dst must be 2-byte aligned. |
| 18 SkASSERT((((size_t) dst) & 0x01) == 0); | 18 SkASSERT((((size_t) dst) & 0x01) == 0); |
| 19 | 19 |
| 20 if (count >= 32) { | 20 if (count >= 32) { |
| 21 while (((size_t)dst) & 0x0F) { | 21 while (((size_t)dst) & 0x0F) { |
| 22 *dst++ = value; | 22 *dst++ = value; |
| 23 --count; | 23 --count; |
| 24 } | 24 } |
| 25 __m128i *d = reinterpret_cast<__m128i*>(dst); | 25 __m128i *d = reinterpret_cast<__m128i*>(dst); |
| 26 __m128i value_wide = _mm_set1_epi16(value); | 26 __m128i value_wide = _mm_set1_epi16(value); |
| 27 while (count >= 32) { | 27 while (count >= 32) { |
| 28 _mm_store_si128(d++, value_wide); | 28 _mm_store_si128(d , value_wide); |
| 29 _mm_store_si128(d++, value_wide); | 29 _mm_store_si128(d + 1, value_wide); |
| 30 _mm_store_si128(d++, value_wide); | 30 _mm_store_si128(d + 2, value_wide); |
| 31 _mm_store_si128(d++, value_wide); | 31 _mm_store_si128(d + 3, value_wide); |
| 32 d += 4; |
| 32 count -= 32; | 33 count -= 32; |
| 33 } | 34 } |
| 34 dst = reinterpret_cast<uint16_t*>(d); | 35 dst = reinterpret_cast<uint16_t*>(d); |
| 35 } | 36 } |
| 36 while (count > 0) { | 37 while (count > 0) { |
| 37 *dst++ = value; | 38 *dst++ = value; |
| 38 --count; | 39 --count; |
| 39 } | 40 } |
| 40 } | 41 } |
| 41 | 42 |
| 42 void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) | 43 void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) |
| 43 { | 44 { |
| 44 SkASSERT(dst != NULL && count >= 0); | 45 SkASSERT(dst != NULL && count >= 0); |
| 45 | 46 |
| 46 // dst must be 4-byte aligned. | 47 // dst must be 4-byte aligned. |
| 47 SkASSERT((((size_t) dst) & 0x03) == 0); | 48 SkASSERT((((size_t) dst) & 0x03) == 0); |
| 48 | 49 |
| 49 if (count >= 16) { | 50 if (count >= 16) { |
| 50 while (((size_t)dst) & 0x0F) { | 51 while (((size_t)dst) & 0x0F) { |
| 51 *dst++ = value; | 52 *dst++ = value; |
| 52 --count; | 53 --count; |
| 53 } | 54 } |
| 54 __m128i *d = reinterpret_cast<__m128i*>(dst); | 55 __m128i *d = reinterpret_cast<__m128i*>(dst); |
| 55 __m128i value_wide = _mm_set1_epi32(value); | 56 __m128i value_wide = _mm_set1_epi32(value); |
| 56 while (count >= 16) { | 57 while (count >= 16) { |
| 57 _mm_store_si128(d++, value_wide); | 58 _mm_store_si128(d , value_wide); |
| 58 _mm_store_si128(d++, value_wide); | 59 _mm_store_si128(d + 1, value_wide); |
| 59 _mm_store_si128(d++, value_wide); | 60 _mm_store_si128(d + 2, value_wide); |
| 60 _mm_store_si128(d++, value_wide); | 61 _mm_store_si128(d + 3, value_wide); |
| 62 d += 4; |
| 61 count -= 16; | 63 count -= 16; |
| 62 } | 64 } |
| 63 dst = reinterpret_cast<uint32_t*>(d); | 65 dst = reinterpret_cast<uint32_t*>(d); |
| 64 } | 66 } |
| 65 while (count > 0) { | 67 while (count > 0) { |
| 66 *dst++ = value; | 68 *dst++ = value; |
| 67 --count; | 69 --count; |
| 68 } | 70 } |
| 69 } | 71 } |
| OLD | NEW |