OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 %define k4k5 [rsp + 16*2] | 48 %define k4k5 [rsp + 16*2] |
49 %define k6k7 [rsp + 16*3] | 49 %define k6k7 [rsp + 16*3] |
50 %define krd [rsp + 16*4] | 50 %define krd [rsp + 16*4] |
51 | 51 |
52 mov rdx, arg(5) ;filter ptr | 52 mov rdx, arg(5) ;filter ptr |
53 mov rsi, arg(0) ;src_ptr | 53 mov rsi, arg(0) ;src_ptr |
54 mov rdi, arg(2) ;output_ptr | 54 mov rdi, arg(2) ;output_ptr |
55 mov rcx, 0x0400040 | 55 mov rcx, 0x0400040 |
56 | 56 |
57 movdqa xmm4, [rdx] ;load filters | 57 movdqa xmm4, [rdx] ;load filters |
58 movd xmm5, rcx | 58 movq xmm5, rcx |
59 packsswb xmm4, xmm4 | 59 packsswb xmm4, xmm4 |
60 pshuflw xmm0, xmm4, 0b ;k0_k1 | 60 pshuflw xmm0, xmm4, 0b ;k0_k1 |
61 pshuflw xmm1, xmm4, 01010101b ;k2_k3 | 61 pshuflw xmm1, xmm4, 01010101b ;k2_k3 |
62 pshuflw xmm2, xmm4, 10101010b ;k4_k5 | 62 pshuflw xmm2, xmm4, 10101010b ;k4_k5 |
63 pshuflw xmm3, xmm4, 11111111b ;k6_k7 | 63 pshuflw xmm3, xmm4, 11111111b ;k6_k7 |
64 | 64 |
65 punpcklqdq xmm0, xmm0 | 65 punpcklqdq xmm0, xmm0 |
66 punpcklqdq xmm1, xmm1 | 66 punpcklqdq xmm1, xmm1 |
67 punpcklqdq xmm2, xmm2 | 67 punpcklqdq xmm2, xmm2 |
68 punpcklqdq xmm3, xmm3 | 68 punpcklqdq xmm3, xmm3 |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
166 %define k4k5 [rsp + 16*2] | 166 %define k4k5 [rsp + 16*2] |
167 %define k6k7 [rsp + 16*3] | 167 %define k6k7 [rsp + 16*3] |
168 %define krd [rsp + 16*4] | 168 %define krd [rsp + 16*4] |
169 | 169 |
170 mov rdx, arg(5) ;filter ptr | 170 mov rdx, arg(5) ;filter ptr |
171 mov rsi, arg(0) ;src_ptr | 171 mov rsi, arg(0) ;src_ptr |
172 mov rdi, arg(2) ;output_ptr | 172 mov rdi, arg(2) ;output_ptr |
173 mov rcx, 0x0400040 | 173 mov rcx, 0x0400040 |
174 | 174 |
175 movdqa xmm4, [rdx] ;load filters | 175 movdqa xmm4, [rdx] ;load filters |
176 movd xmm5, rcx | 176 movq xmm5, rcx |
177 packsswb xmm4, xmm4 | 177 packsswb xmm4, xmm4 |
178 pshuflw xmm0, xmm4, 0b ;k0_k1 | 178 pshuflw xmm0, xmm4, 0b ;k0_k1 |
179 pshuflw xmm1, xmm4, 01010101b ;k2_k3 | 179 pshuflw xmm1, xmm4, 01010101b ;k2_k3 |
180 pshuflw xmm2, xmm4, 10101010b ;k4_k5 | 180 pshuflw xmm2, xmm4, 10101010b ;k4_k5 |
181 pshuflw xmm3, xmm4, 11111111b ;k6_k7 | 181 pshuflw xmm3, xmm4, 11111111b ;k6_k7 |
182 | 182 |
183 punpcklqdq xmm0, xmm0 | 183 punpcklqdq xmm0, xmm0 |
184 punpcklqdq xmm1, xmm1 | 184 punpcklqdq xmm1, xmm1 |
185 punpcklqdq xmm2, xmm2 | 185 punpcklqdq xmm2, xmm2 |
186 punpcklqdq xmm3, xmm3 | 186 punpcklqdq xmm3, xmm3 |
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
316 %define k4k5 [rsp + 16*2] | 316 %define k4k5 [rsp + 16*2] |
317 %define k6k7 [rsp + 16*3] | 317 %define k6k7 [rsp + 16*3] |
318 %define krd [rsp + 16*4] | 318 %define krd [rsp + 16*4] |
319 | 319 |
320 mov rdx, arg(5) ;filter ptr | 320 mov rdx, arg(5) ;filter ptr |
321 mov rsi, arg(0) ;src_ptr | 321 mov rsi, arg(0) ;src_ptr |
322 mov rdi, arg(2) ;output_ptr | 322 mov rdi, arg(2) ;output_ptr |
323 mov rcx, 0x0400040 | 323 mov rcx, 0x0400040 |
324 | 324 |
325 movdqa xmm4, [rdx] ;load filters | 325 movdqa xmm4, [rdx] ;load filters |
326 movd xmm5, rcx | 326 movq xmm5, rcx |
327 packsswb xmm4, xmm4 | 327 packsswb xmm4, xmm4 |
328 pshuflw xmm0, xmm4, 0b ;k0_k1 | 328 pshuflw xmm0, xmm4, 0b ;k0_k1 |
329 pshuflw xmm1, xmm4, 01010101b ;k2_k3 | 329 pshuflw xmm1, xmm4, 01010101b ;k2_k3 |
330 pshuflw xmm2, xmm4, 10101010b ;k4_k5 | 330 pshuflw xmm2, xmm4, 10101010b ;k4_k5 |
331 pshuflw xmm3, xmm4, 11111111b ;k6_k7 | 331 pshuflw xmm3, xmm4, 11111111b ;k6_k7 |
332 | 332 |
333 punpcklqdq xmm0, xmm0 | 333 punpcklqdq xmm0, xmm0 |
334 punpcklqdq xmm1, xmm1 | 334 punpcklqdq xmm1, xmm1 |
335 punpcklqdq xmm2, xmm2 | 335 punpcklqdq xmm2, xmm2 |
336 punpcklqdq xmm3, xmm3 | 336 punpcklqdq xmm3, xmm3 |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
423 %define k4k5 [rsp + 16*2] | 423 %define k4k5 [rsp + 16*2] |
424 %define k6k7 [rsp + 16*3] | 424 %define k6k7 [rsp + 16*3] |
425 %define krd [rsp + 16*4] | 425 %define krd [rsp + 16*4] |
426 | 426 |
427 mov rdx, arg(5) ;filter ptr | 427 mov rdx, arg(5) ;filter ptr |
428 mov rsi, arg(0) ;src_ptr | 428 mov rsi, arg(0) ;src_ptr |
429 mov rdi, arg(2) ;output_ptr | 429 mov rdi, arg(2) ;output_ptr |
430 mov rcx, 0x0400040 | 430 mov rcx, 0x0400040 |
431 | 431 |
432 movdqa xmm4, [rdx] ;load filters | 432 movdqa xmm4, [rdx] ;load filters |
433 movd xmm5, rcx | 433 movq xmm5, rcx |
434 packsswb xmm4, xmm4 | 434 packsswb xmm4, xmm4 |
435 pshuflw xmm0, xmm4, 0b ;k0_k1 | 435 pshuflw xmm0, xmm4, 0b ;k0_k1 |
436 pshuflw xmm1, xmm4, 01010101b ;k2_k3 | 436 pshuflw xmm1, xmm4, 01010101b ;k2_k3 |
437 pshuflw xmm2, xmm4, 10101010b ;k4_k5 | 437 pshuflw xmm2, xmm4, 10101010b ;k4_k5 |
438 pshuflw xmm3, xmm4, 11111111b ;k6_k7 | 438 pshuflw xmm3, xmm4, 11111111b ;k6_k7 |
439 | 439 |
440 punpcklqdq xmm0, xmm0 | 440 punpcklqdq xmm0, xmm0 |
441 punpcklqdq xmm1, xmm1 | 441 punpcklqdq xmm1, xmm1 |
442 punpcklqdq xmm2, xmm2 | 442 punpcklqdq xmm2, xmm2 |
443 punpcklqdq xmm3, xmm3 | 443 punpcklqdq xmm3, xmm3 |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
541 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 | 541 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 |
542 align 16 | 542 align 16 |
543 shuf_t2t3: | 543 shuf_t2t3: |
544 db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 | 544 db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 |
545 align 16 | 545 align 16 |
546 shuf_t4t5: | 546 shuf_t4t5: |
547 db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 | 547 db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 |
548 align 16 | 548 align 16 |
549 shuf_t6t7: | 549 shuf_t6t7: |
550 db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 | 550 db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 |
OLD | NEW |