Chromium Code Reviews

Side by Side Diff: simd/jdclrss2.asm

Issue 10700197: Update libjpeg-turbo to r856. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libjpeg_turbo/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | | Annotate | Revision Log
« no previous file with comments | « README.chromium ('k') | simd/jdclrss2-64.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; jdclrss2.asm - colorspace conversion (SSE2) 2 ; jdclrss2.asm - colorspace conversion (SSE2)
3 ; 3 ;
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 4 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 ; 5 ;
6 ; Based on 6 ; Based on
7 ; x86 SIMD extension for IJG JPEG library 7 ; x86 SIMD extension for IJG JPEG library
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
10 ; 10 ;
11 ; This file should be assembled with NASM (Netwide Assembler), 11 ; This file should be assembled with NASM (Netwide Assembler),
12 ; can *not* be assembled with Microsoft's MASM or any compatible 12 ; can *not* be assembled with Microsoft's MASM or any compatible
13 ; assembler (including Borland's Turbo Assembler). 13 ; assembler (including Borland's Turbo Assembler).
14 ; NASM is available from http://nasm.sourceforge.net/ or 14 ; NASM is available from http://nasm.sourceforge.net/ or
(...skipping 240 matching lines...)
255 255
256 cmp ecx, byte SIZEOF_XMMWORD 256 cmp ecx, byte SIZEOF_XMMWORD
257 jb short .column_st32 257 jb short .column_st32
258 258
259 test edi, SIZEOF_XMMWORD-1 259 test edi, SIZEOF_XMMWORD-1
260 jnz short .out1 260 jnz short .out1
261 ; --(aligned)------------------- 261 ; --(aligned)-------------------
262 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA 262 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
263 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD 263 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
264 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF 264 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
265 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
266 jmp short .out0 265 jmp short .out0
267 .out1: ; --(unaligned)----------------- 266 .out1: ; --(unaligned)-----------------
268 » pcmpeqb xmmH,xmmH» » » ; xmmH=(all 1's) 267 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
269 » maskmovdqu xmmA,xmmH» » » ; movntdqu XMMWORD [edi], xmmA 268 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
270 » add» edi, byte SIZEOF_XMMWORD» ; outptr 269 » movdqu» XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
271 » maskmovdqu xmmD,xmmH» » » ; movntdqu XMMWORD [edi], xmmD
272 » add» edi, byte SIZEOF_XMMWORD» ; outptr
273 » maskmovdqu xmmF,xmmH» » » ; movntdqu XMMWORD [edi], xmmF
274 » add» edi, byte SIZEOF_XMMWORD» ; outptr
275 .out0: 270 .out0:
271 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
276 sub ecx, byte SIZEOF_XMMWORD 272 sub ecx, byte SIZEOF_XMMWORD
277 jz near .nextrow 273 jz near .nextrow
278 274
279 add esi, byte SIZEOF_XMMWORD ; inptr0 275 add esi, byte SIZEOF_XMMWORD ; inptr0
280 add ebx, byte SIZEOF_XMMWORD ; inptr1 276 add ebx, byte SIZEOF_XMMWORD ; inptr1
281 add edx, byte SIZEOF_XMMWORD ; inptr2 277 add edx, byte SIZEOF_XMMWORD ; inptr2
282 jmp near .columnloop 278 jmp near .columnloop
283 alignx 16,7 279 alignx 16,7
284 280
285 .column_st32: 281 .column_st32:
286 pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
287 lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE 282 lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
288 cmp ecx, byte 2*SIZEOF_XMMWORD 283 cmp ecx, byte 2*SIZEOF_XMMWORD
289 jb short .column_st16 284 jb short .column_st16
290 » maskmovdqu xmmA,xmmH» » » ; movntdqu XMMWORD [edi], xmmA 285 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
291 » add» edi, byte SIZEOF_XMMWORD» ; outptr 286 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
292 » maskmovdqu xmmD,xmmH» » » ; movntdqu XMMWORD [edi], xmmD 287 » add» edi, byte 2*SIZEOF_XMMWORD» ; outptr
293 » add» edi, byte SIZEOF_XMMWORD» ; outptr
294 movdqa xmmA,xmmF 288 movdqa xmmA,xmmF
295 sub ecx, byte 2*SIZEOF_XMMWORD 289 sub ecx, byte 2*SIZEOF_XMMWORD
296 jmp short .column_st15 290 jmp short .column_st15
297 .column_st16: 291 .column_st16:
298 cmp ecx, byte SIZEOF_XMMWORD 292 cmp ecx, byte SIZEOF_XMMWORD
299 jb short .column_st15 293 jb short .column_st15
300 » maskmovdqu xmmA,xmmH» » » ; movntdqu XMMWORD [edi], xmmA 294 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
301 add edi, byte SIZEOF_XMMWORD ; outptr 295 add edi, byte SIZEOF_XMMWORD ; outptr
302 movdqa xmmA,xmmD 296 movdqa xmmA,xmmD
303 sub ecx, byte SIZEOF_XMMWORD 297 sub ecx, byte SIZEOF_XMMWORD
304 .column_st15: 298 .column_st15:
305 %ifdef STRICT_MEMORY_ACCESS
306 ; Store the lower 8 bytes of xmmA to the output when it has enough 299 ; Store the lower 8 bytes of xmmA to the output when it has enough
307 ; space. 300 ; space.
308 cmp ecx, byte SIZEOF_MMWORD 301 cmp ecx, byte SIZEOF_MMWORD
309 jb short .column_st7 302 jb short .column_st7
310 movq MMWORD [edi], xmmA 303 movq MMWORD [edi], xmmA
311 add edi, byte SIZEOF_MMWORD 304 add edi, byte SIZEOF_MMWORD
312 sub ecx, byte SIZEOF_MMWORD 305 sub ecx, byte SIZEOF_MMWORD
313 psrldq xmmA, SIZEOF_MMWORD 306 psrldq xmmA, SIZEOF_MMWORD
314 .column_st7: 307 .column_st7:
315 ; Store the lower 4 bytes of xmmA to the output when it has enough 308 ; Store the lower 4 bytes of xmmA to the output when it has enough
(...skipping 13 matching lines...)
329 mov WORD [edi], ax 322 mov WORD [edi], ax
330 add edi, byte SIZEOF_WORD 323 add edi, byte SIZEOF_WORD
331 sub ecx, byte SIZEOF_WORD 324 sub ecx, byte SIZEOF_WORD
332 shr eax, 16 325 shr eax, 16
333 .column_st1: 326 .column_st1:
334 ; Store the lower 1 byte of eax to the output when it has enough 327 ; Store the lower 1 byte of eax to the output when it has enough
335 ; space. 328 ; space.
336 test ecx, ecx 329 test ecx, ecx
337 jz short .nextrow 330 jz short .nextrow
338 mov BYTE [edi], al 331 mov BYTE [edi], al
339 %else
340 mov eax,ecx
341 xor ecx, byte 0x0F
342 shl ecx, 2
343 movd xmmB,ecx
344 psrlq xmmH,4
345 pcmpeqb xmmE,xmmE
346 psrlq xmmH,xmmB
347 psrlq xmmE,xmmB
348 punpcklbw xmmE,xmmH
349 ; ----------------
350 mov ecx,edi
351 and ecx, byte SIZEOF_XMMWORD-1
352 jz short .adj0
353 add eax,ecx
354 cmp eax, byte SIZEOF_XMMWORD
355 ja short .adj0
356 and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
357 shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
358 movdqa xmmG,xmmA
359 movdqa xmmC,xmmE
360 pslldq xmmA, SIZEOF_XMMWORD/2
361 pslldq xmmE, SIZEOF_XMMWORD/2
362 movd xmmD,ecx
363 sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
364 jb short .adj1
365 movd xmmF,ecx
366 psllq xmmA,xmmF
367 psllq xmmE,xmmF
368 jmp short .adj0
369 .adj1: neg ecx
370 movd xmmF,ecx
371 psrlq xmmA,xmmF
372 psrlq xmmE,xmmF
373 psllq xmmG,xmmD
374 psllq xmmC,xmmD
375 por xmmA,xmmG
376 por xmmE,xmmC
377 .adj0: ; ----------------
378 maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
379 %endif ; STRICT_MEMORY_ACCESS ; ---------------
380 332
381 %else ; RGB_PIXELSIZE == 4 ; ----------- 333 %else ; RGB_PIXELSIZE == 4 ; -----------
382 334
383 %ifdef RGBX_FILLER_0XFF 335 %ifdef RGBX_FILLER_0XFF
384 pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) 336 pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
385 pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) 337 pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
386 %else 338 %else
387 pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) 339 pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
388 pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) 340 pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
389 %endif 341 %endif
(...skipping 24 matching lines...)
414 cmp ecx, byte SIZEOF_XMMWORD 366 cmp ecx, byte SIZEOF_XMMWORD
415 jb short .column_st32 367 jb short .column_st32
416 368
417 test edi, SIZEOF_XMMWORD-1 369 test edi, SIZEOF_XMMWORD-1
418 jnz short .out1 370 jnz short .out1
419 ; --(aligned)------------------- 371 ; --(aligned)-------------------
420 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA 372 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
421 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD 373 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
422 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC 374 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
423 movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH 375 movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
424 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
425 jmp short .out0 376 jmp short .out0
426 .out1: ; --(unaligned)----------------- 377 .out1: ; --(unaligned)-----------------
427 » pcmpeqb xmmE,xmmE» » » ; xmmE=(all 1's) 378 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
428 » maskmovdqu xmmA,xmmE» » » ; movntdqu XMMWORD [edi], xmmA 379 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
429 » add» edi, byte SIZEOF_XMMWORD» ; outptr 380 » movdqu» XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
430 » maskmovdqu xmmD,xmmE» » » ; movntdqu XMMWORD [edi], xmmD 381 » movdqu» XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
431 » add» edi, byte SIZEOF_XMMWORD» ; outptr
432 » maskmovdqu xmmC,xmmE» » » ; movntdqu XMMWORD [edi], xmmC
433 » add» edi, byte SIZEOF_XMMWORD» ; outptr
434 » maskmovdqu xmmH,xmmE» » » ; movntdqu XMMWORD [edi], xmmH
435 » add» edi, byte SIZEOF_XMMWORD» ; outptr
436 .out0: 382 .out0:
383 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
437 sub ecx, byte SIZEOF_XMMWORD 384 sub ecx, byte SIZEOF_XMMWORD
438 jz near .nextrow 385 jz near .nextrow
439 386
440 add esi, byte SIZEOF_XMMWORD ; inptr0 387 add esi, byte SIZEOF_XMMWORD ; inptr0
441 add ebx, byte SIZEOF_XMMWORD ; inptr1 388 add ebx, byte SIZEOF_XMMWORD ; inptr1
442 add edx, byte SIZEOF_XMMWORD ; inptr2 389 add edx, byte SIZEOF_XMMWORD ; inptr2
443 jmp near .columnloop 390 jmp near .columnloop
444 alignx 16,7 391 alignx 16,7
445 392
446 .column_st32: 393 .column_st32:
447 pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
448 cmp ecx, byte SIZEOF_XMMWORD/2 394 cmp ecx, byte SIZEOF_XMMWORD/2
449 jb short .column_st16 395 jb short .column_st16
450 » maskmovdqu xmmA,xmmE» » » ; movntdqu XMMWORD [edi], xmmA 396 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
451 » add» edi, byte SIZEOF_XMMWORD» ; outptr 397 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
452 » maskmovdqu xmmD,xmmE» » » ; movntdqu XMMWORD [edi], xmmD 398 » add» edi, byte 2*SIZEOF_XMMWORD» ; outptr
453 » add» edi, byte SIZEOF_XMMWORD» ; outptr
454 movdqa xmmA,xmmC 399 movdqa xmmA,xmmC
455 movdqa xmmD,xmmH 400 movdqa xmmD,xmmH
456 sub ecx, byte SIZEOF_XMMWORD/2 401 sub ecx, byte SIZEOF_XMMWORD/2
457 .column_st16: 402 .column_st16:
458 cmp ecx, byte SIZEOF_XMMWORD/4 403 cmp ecx, byte SIZEOF_XMMWORD/4
459 jb short .column_st15 404 jb short .column_st15
460 » maskmovdqu xmmA,xmmE» » » ; movntdqu XMMWORD [edi], xmmA 405 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
461 add edi, byte SIZEOF_XMMWORD ; outptr 406 add edi, byte SIZEOF_XMMWORD ; outptr
462 movdqa xmmA,xmmD 407 movdqa xmmA,xmmD
463 sub ecx, byte SIZEOF_XMMWORD/4 408 sub ecx, byte SIZEOF_XMMWORD/4
464 .column_st15: 409 .column_st15:
465 %ifdef STRICT_MEMORY_ACCESS
466 ; Store two pixels (8 bytes) of xmmA to the output when it has enough 410 ; Store two pixels (8 bytes) of xmmA to the output when it has enough
467 ; space. 411 ; space.
468 cmp ecx, byte SIZEOF_XMMWORD/8 412 cmp ecx, byte SIZEOF_XMMWORD/8
469 jb short .column_st7 413 jb short .column_st7
470 movq MMWORD [edi], xmmA 414 movq MMWORD [edi], xmmA
471 add edi, byte SIZEOF_XMMWORD/8*4 415 add edi, byte SIZEOF_XMMWORD/8*4
472 sub ecx, byte SIZEOF_XMMWORD/8 416 sub ecx, byte SIZEOF_XMMWORD/8
473 psrldq xmmA, SIZEOF_XMMWORD/8*4 417 psrldq xmmA, SIZEOF_XMMWORD/8*4
474 .column_st7: 418 .column_st7:
475 ; Store one pixel (4 bytes) of xmmA to the output when it has enough 419 ; Store one pixel (4 bytes) of xmmA to the output when it has enough
476 ; space. 420 ; space.
477 test ecx, ecx 421 test ecx, ecx
478 jz short .nextrow 422 jz short .nextrow
479 movd DWORD [edi], xmmA 423 movd DWORD [edi], xmmA
480 %else
481 cmp ecx, byte SIZEOF_XMMWORD/16
482 jb short .nextrow
483 mov eax,ecx
484 xor ecx, byte 0x03
485 inc ecx
486 shl ecx, 4
487 movd xmmF,ecx
488 psrlq xmmE,xmmF
489 punpcklbw xmmE,xmmE
490 ; ----------------
491 mov ecx,edi
492 and ecx, byte SIZEOF_XMMWORD-1
493 jz short .adj0
494 lea eax, [ecx+eax*4] ; RGB_PIXELSIZE
495 cmp eax, byte SIZEOF_XMMWORD
496 ja short .adj0
497 and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
498 shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
499 movdqa xmmB,xmmA
500 movdqa xmmG,xmmE
501 pslldq xmmA, SIZEOF_XMMWORD/2
502 pslldq xmmE, SIZEOF_XMMWORD/2
503 movd xmmC,ecx
504 sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
505 jb short .adj1
506 movd xmmH,ecx
507 psllq xmmA,xmmH
508 psllq xmmE,xmmH
509 jmp short .adj0
510 .adj1: neg ecx
511 movd xmmH,ecx
512 psrlq xmmA,xmmH
513 psrlq xmmE,xmmH
514 psllq xmmB,xmmC
515 psllq xmmG,xmmC
516 por xmmA,xmmB
517 por xmmE,xmmG
518 .adj0: ; ----------------
519 maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
520 %endif ; STRICT_MEMORY_ACCESS ; ---------------
521 424
522 %endif ; RGB_PIXELSIZE ; --------------- 425 %endif ; RGB_PIXELSIZE ; ---------------
523 426
524 alignx 16,7 427 alignx 16,7
525 428
526 .nextrow: 429 .nextrow:
527 pop ecx 430 pop ecx
528 pop esi 431 pop esi
529 pop ebx 432 pop ebx
530 pop edx 433 pop edx
(...skipping 16 matching lines...)
547 ; pop ecx ; need not be preserved 450 ; pop ecx ; need not be preserved
548 pop ebx 451 pop ebx
549 mov esp,ebp ; esp <- aligned ebp 452 mov esp,ebp ; esp <- aligned ebp
550 pop esp ; esp <- original ebp 453 pop esp ; esp <- original ebp
551 pop ebp 454 pop ebp
552 ret 455 ret
553 456
554 ; For some reason, the OS X linker does not honor the request to align the 457 ; For some reason, the OS X linker does not honor the request to align the
555 ; segment unless we do this. 458 ; segment unless we do this.
556 align 16 459 align 16
OLDNEW
« no previous file with comments | « README.chromium ('k') | simd/jdclrss2-64.asm » ('j') | no next file with comments »

Powered by Google App Engine