Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(472)

Unified Diff: simd/jdmrgss2-64.asm

Issue 10700197: Update libjpeg-turbo to r856. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libjpeg_turbo/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « simd/jdmrgss2.asm ('k') | simd/jsimdcfg.inc.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: simd/jdmrgss2-64.asm
===================================================================
--- simd/jdmrgss2-64.asm (revision 144411)
+++ simd/jdmrgss2-64.asm (working copy)
@@ -1,7 +1,7 @@
;
; jdmrgss2-64.asm - merged upsampling/color conversion (64-bit SSE2)
;
-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright 2009 D. R. Commander
;
; Based on
@@ -12,7 +12,7 @@
; This file should be assembled with NASM (Netwide Assembler),
; can *not* be assembled with Microsoft's MASM or any compatible
; assembler (including Borland's Turbo Assembler).
-; NASM is available from http://nasm.sourceforge.net/ for
+; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
;
; [TAB8]
@@ -252,17 +252,13 @@
movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
jmp short .out0
.out1: ; --(unaligned)-----------------
- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
- add rdi, byte SIZEOF_XMMWORD ; outptr
- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
- add rdi, byte SIZEOF_XMMWORD ; outptr
- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF
- add rdi, byte SIZEOF_XMMWORD ; outptr
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
.out0:
+ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
sub rcx, byte SIZEOF_XMMWORD
jz near .endcolumn
@@ -275,26 +271,23 @@
jmp near .columnloop
.column_st32:
- pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE
cmp rcx, byte 2*SIZEOF_XMMWORD
jb short .column_st16
- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
- add rdi, byte SIZEOF_XMMWORD ; outptr
- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD
- add rdi, byte SIZEOF_XMMWORD ; outptr
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmF
sub rcx, byte 2*SIZEOF_XMMWORD
jmp short .column_st15
.column_st16:
cmp rcx, byte SIZEOF_XMMWORD
jb short .column_st15
- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD
.column_st15:
-%ifdef STRICT_MEMORY_ACCESS
; Store the lower 8 bytes of xmmA to the output when it has enough
; space.
cmp rcx, byte SIZEOF_MMWORD
@@ -328,47 +321,6 @@
test rcx, rcx
jz short .endcolumn
mov BYTE [rdi], al
-%else
- mov rax,rcx
- xor rcx, byte 0x0F
- shl rcx, 2
- movd xmmB,ecx
- psrlq xmmH,4
- pcmpeqb xmmE,xmmE
- psrlq xmmH,xmmB
- psrlq xmmE,xmmB
- punpcklbw xmmE,xmmH
- ; ----------------
- mov rcx,rdi
- and rcx, byte SIZEOF_XMMWORD-1
- jz short .adj0
- add rax,rcx
- cmp rax, byte SIZEOF_XMMWORD
- ja short .adj0
- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
- movdqa xmmG,xmmA
- movdqa xmmC,xmmE
- pslldq xmmA, SIZEOF_XMMWORD/2
- pslldq xmmE, SIZEOF_XMMWORD/2
- movd xmmD,ecx
- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
- jb short .adj1
- movd xmmF,ecx
- psllq xmmA,xmmF
- psllq xmmE,xmmF
- jmp short .adj0
-.adj1: neg rcx
- movd xmmF,ecx
- psrlq xmmA,xmmF
- psrlq xmmE,xmmF
- psllq xmmG,xmmD
- psllq xmmC,xmmD
- por xmmA,xmmG
- por xmmE,xmmC
-.adj0: ; ----------------
- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
-%endif ; STRICT_MEMORY_ACCESS ; ---------------
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -413,19 +365,14 @@
movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
jmp short .out0
.out1: ; --(unaligned)-----------------
- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
- add rdi, byte SIZEOF_XMMWORD ; outptr
- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
- add rdi, byte SIZEOF_XMMWORD ; outptr
- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC
- add rdi, byte SIZEOF_XMMWORD ; outptr
- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH
- add rdi, byte SIZEOF_XMMWORD ; outptr
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+ movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
.out0:
+ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
sub rcx, byte SIZEOF_XMMWORD
jz near .endcolumn
@@ -438,25 +385,22 @@
jmp near .columnloop
.column_st32:
- pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
cmp rcx, byte SIZEOF_XMMWORD/2
jb short .column_st16
- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
- add rdi, byte SIZEOF_XMMWORD ; outptr
- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD
- add rdi, byte SIZEOF_XMMWORD ; outptr
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+ add rdi, byte 2*SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmC
movdqa xmmD,xmmH
sub rcx, byte SIZEOF_XMMWORD/2
.column_st16:
cmp rcx, byte SIZEOF_XMMWORD/4
jb short .column_st15
- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
+ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
add rdi, byte SIZEOF_XMMWORD ; outptr
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD/4
.column_st15:
-%ifdef STRICT_MEMORY_ACCESS
; Store two pixels (8 bytes) of xmmA to the output when it has enough
; space.
cmp rcx, byte SIZEOF_XMMWORD/8
@@ -471,47 +415,6 @@
test rcx, rcx
jz short .endcolumn
movd DWORD [rdi], xmmA
-%else
- cmp rcx, byte SIZEOF_XMMWORD/16
- jb near .endcolumn
- mov rax,rcx
- xor rcx, byte 0x03
- inc rcx
- shl rcx, 4
- movd xmmF,ecx
- psrlq xmmE,xmmF
- punpcklbw xmmE,xmmE
- ; ----------------
- mov rcx,rdi
- and rcx, byte SIZEOF_XMMWORD-1
- jz short .adj0
- lea rax, [rcx+rax*4] ; RGB_PIXELSIZE
- cmp rax, byte SIZEOF_XMMWORD
- ja short .adj0
- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
- movdqa xmmB,xmmA
- movdqa xmmG,xmmE
- pslldq xmmA, SIZEOF_XMMWORD/2
- pslldq xmmE, SIZEOF_XMMWORD/2
- movd xmmC,ecx
- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
- jb short .adj1
- movd xmmH,ecx
- psllq xmmA,xmmH
- psllq xmmE,xmmH
- jmp short .adj0
-.adj1: neg rcx
- movd xmmH,ecx
- psrlq xmmA,xmmH
- psrlq xmmE,xmmH
- psllq xmmB,xmmC
- psllq xmmG,xmmC
- por xmmA,xmmB
- por xmmE,xmmG
-.adj0: ; ----------------
- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
-%endif ; STRICT_MEMORY_ACCESS ; ---------------
%endif ; RGB_PIXELSIZE ; ---------------
« no previous file with comments | « simd/jdmrgss2.asm ('k') | simd/jsimdcfg.inc.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698