Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(239)

Side by Side Diff: simd/jdclrss2.asm

Issue 10700197: Update libjpeg-turbo to r856. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libjpeg_turbo/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « README.chromium ('k') | simd/jdclrss2-64.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; jdclrss2.asm - colorspace conversion (SSE2) 2 ; jdclrss2.asm - colorspace conversion (SSE2)
3 ; 3 ;
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 4 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 ; 5 ;
6 ; Based on 6 ; Based on
7 ; x86 SIMD extension for IJG JPEG library 7 ; x86 SIMD extension for IJG JPEG library
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
10 ; 10 ;
11 ; This file should be assembled with NASM (Netwide Assembler), 11 ; This file should be assembled with NASM (Netwide Assembler),
12 ; can *not* be assembled with Microsoft's MASM or any compatible 12 ; can *not* be assembled with Microsoft's MASM or any compatible
13 ; assembler (including Borland's Turbo Assembler). 13 ; assembler (including Borland's Turbo Assembler).
14 ; NASM is available from http://nasm.sourceforge.net/ or 14 ; NASM is available from http://nasm.sourceforge.net/ or
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
255 255
256 cmp ecx, byte SIZEOF_XMMWORD 256 cmp ecx, byte SIZEOF_XMMWORD
257 jb short .column_st32 257 jb short .column_st32
258 258
259 test edi, SIZEOF_XMMWORD-1 259 test edi, SIZEOF_XMMWORD-1
260 jnz short .out1 260 jnz short .out1
261 ; --(aligned)------------------- 261 ; --(aligned)-------------------
262 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA 262 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
263 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD 263 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
264 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF 264 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
265 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
266 jmp short .out0 265 jmp short .out0
267 .out1: ; --(unaligned)----------------- 266 .out1: ; --(unaligned)-----------------
268 » pcmpeqb xmmH,xmmH» » » ; xmmH=(all 1's) 267 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
269 » maskmovdqu xmmA,xmmH» » » ; movntdqu XMMWORD [edi], xmmA 268 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
270 » add» edi, byte SIZEOF_XMMWORD» ; outptr 269 » movdqu» XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
271 » maskmovdqu xmmD,xmmH» » » ; movntdqu XMMWORD [edi], xmmD
272 » add» edi, byte SIZEOF_XMMWORD» ; outptr
273 » maskmovdqu xmmF,xmmH» » » ; movntdqu XMMWORD [edi], xmmF
274 » add» edi, byte SIZEOF_XMMWORD» ; outptr
275 .out0: 270 .out0:
271 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
276 sub ecx, byte SIZEOF_XMMWORD 272 sub ecx, byte SIZEOF_XMMWORD
277 jz near .nextrow 273 jz near .nextrow
278 274
279 add esi, byte SIZEOF_XMMWORD ; inptr0 275 add esi, byte SIZEOF_XMMWORD ; inptr0
280 add ebx, byte SIZEOF_XMMWORD ; inptr1 276 add ebx, byte SIZEOF_XMMWORD ; inptr1
281 add edx, byte SIZEOF_XMMWORD ; inptr2 277 add edx, byte SIZEOF_XMMWORD ; inptr2
282 jmp near .columnloop 278 jmp near .columnloop
283 alignx 16,7 279 alignx 16,7
284 280
285 .column_st32: 281 .column_st32:
286 pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
287 lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE 282 lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
288 cmp ecx, byte 2*SIZEOF_XMMWORD 283 cmp ecx, byte 2*SIZEOF_XMMWORD
289 jb short .column_st16 284 jb short .column_st16
290 » maskmovdqu xmmA,xmmH» » » ; movntdqu XMMWORD [edi], xmmA 285 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
291 » add» edi, byte SIZEOF_XMMWORD» ; outptr 286 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
292 » maskmovdqu xmmD,xmmH» » » ; movntdqu XMMWORD [edi], xmmD 287 » add» edi, byte 2*SIZEOF_XMMWORD» ; outptr
293 » add» edi, byte SIZEOF_XMMWORD» ; outptr
294 movdqa xmmA,xmmF 288 movdqa xmmA,xmmF
295 sub ecx, byte 2*SIZEOF_XMMWORD 289 sub ecx, byte 2*SIZEOF_XMMWORD
296 jmp short .column_st15 290 jmp short .column_st15
297 .column_st16: 291 .column_st16:
298 cmp ecx, byte SIZEOF_XMMWORD 292 cmp ecx, byte SIZEOF_XMMWORD
299 jb short .column_st15 293 jb short .column_st15
300 » maskmovdqu xmmA,xmmH» » » ; movntdqu XMMWORD [edi], xmmA 294 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
301 add edi, byte SIZEOF_XMMWORD ; outptr 295 add edi, byte SIZEOF_XMMWORD ; outptr
302 movdqa xmmA,xmmD 296 movdqa xmmA,xmmD
303 sub ecx, byte SIZEOF_XMMWORD 297 sub ecx, byte SIZEOF_XMMWORD
304 .column_st15: 298 .column_st15:
305 %ifdef STRICT_MEMORY_ACCESS
306 ; Store the lower 8 bytes of xmmA to the output when it has enough 299 ; Store the lower 8 bytes of xmmA to the output when it has enough
307 ; space. 300 ; space.
308 cmp ecx, byte SIZEOF_MMWORD 301 cmp ecx, byte SIZEOF_MMWORD
309 jb short .column_st7 302 jb short .column_st7
310 movq MMWORD [edi], xmmA 303 movq MMWORD [edi], xmmA
311 add edi, byte SIZEOF_MMWORD 304 add edi, byte SIZEOF_MMWORD
312 sub ecx, byte SIZEOF_MMWORD 305 sub ecx, byte SIZEOF_MMWORD
313 psrldq xmmA, SIZEOF_MMWORD 306 psrldq xmmA, SIZEOF_MMWORD
314 .column_st7: 307 .column_st7:
315 ; Store the lower 4 bytes of xmmA to the output when it has enough 308 ; Store the lower 4 bytes of xmmA to the output when it has enough
(...skipping 13 matching lines...) Expand all
329 mov WORD [edi], ax 322 mov WORD [edi], ax
330 add edi, byte SIZEOF_WORD 323 add edi, byte SIZEOF_WORD
331 sub ecx, byte SIZEOF_WORD 324 sub ecx, byte SIZEOF_WORD
332 shr eax, 16 325 shr eax, 16
333 .column_st1: 326 .column_st1:
334 ; Store the lower 1 byte of eax to the output when it has enough 327 ; Store the lower 1 byte of eax to the output when it has enough
335 ; space. 328 ; space.
336 test ecx, ecx 329 test ecx, ecx
337 jz short .nextrow 330 jz short .nextrow
338 mov BYTE [edi], al 331 mov BYTE [edi], al
339 %else
340 mov eax,ecx
341 xor ecx, byte 0x0F
342 shl ecx, 2
343 movd xmmB,ecx
344 psrlq xmmH,4
345 pcmpeqb xmmE,xmmE
346 psrlq xmmH,xmmB
347 psrlq xmmE,xmmB
348 punpcklbw xmmE,xmmH
349 ; ----------------
350 mov ecx,edi
351 and ecx, byte SIZEOF_XMMWORD-1
352 jz short .adj0
353 add eax,ecx
354 cmp eax, byte SIZEOF_XMMWORD
355 ja short .adj0
356 and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
357 shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
358 movdqa xmmG,xmmA
359 movdqa xmmC,xmmE
360 pslldq xmmA, SIZEOF_XMMWORD/2
361 pslldq xmmE, SIZEOF_XMMWORD/2
362 movd xmmD,ecx
363 sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
364 jb short .adj1
365 movd xmmF,ecx
366 psllq xmmA,xmmF
367 psllq xmmE,xmmF
368 jmp short .adj0
369 .adj1: neg ecx
370 movd xmmF,ecx
371 psrlq xmmA,xmmF
372 psrlq xmmE,xmmF
373 psllq xmmG,xmmD
374 psllq xmmC,xmmD
375 por xmmA,xmmG
376 por xmmE,xmmC
377 .adj0: ; ----------------
378 maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
379 %endif ; STRICT_MEMORY_ACCESS ; ---------------
380 332
381 %else ; RGB_PIXELSIZE == 4 ; ----------- 333 %else ; RGB_PIXELSIZE == 4 ; -----------
382 334
383 %ifdef RGBX_FILLER_0XFF 335 %ifdef RGBX_FILLER_0XFF
384 pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) 336 pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
385 pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) 337 pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
386 %else 338 %else
387 pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) 339 pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
388 pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) 340 pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
389 %endif 341 %endif
(...skipping 24 matching lines...) Expand all
414 cmp ecx, byte SIZEOF_XMMWORD 366 cmp ecx, byte SIZEOF_XMMWORD
415 jb short .column_st32 367 jb short .column_st32
416 368
417 test edi, SIZEOF_XMMWORD-1 369 test edi, SIZEOF_XMMWORD-1
418 jnz short .out1 370 jnz short .out1
419 ; --(aligned)------------------- 371 ; --(aligned)-------------------
420 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA 372 movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
421 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD 373 movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
422 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC 374 movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
423 movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH 375 movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
424 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
425 jmp short .out0 376 jmp short .out0
426 .out1: ; --(unaligned)----------------- 377 .out1: ; --(unaligned)-----------------
427 » pcmpeqb xmmE,xmmE» » » ; xmmE=(all 1's) 378 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
428 » maskmovdqu xmmA,xmmE» » » ; movntdqu XMMWORD [edi], xmmA 379 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
429 » add» edi, byte SIZEOF_XMMWORD» ; outptr 380 » movdqu» XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
430 » maskmovdqu xmmD,xmmE» » » ; movntdqu XMMWORD [edi], xmmD 381 » movdqu» XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
431 » add» edi, byte SIZEOF_XMMWORD» ; outptr
432 » maskmovdqu xmmC,xmmE» » » ; movntdqu XMMWORD [edi], xmmC
433 » add» edi, byte SIZEOF_XMMWORD» ; outptr
434 » maskmovdqu xmmH,xmmE» » » ; movntdqu XMMWORD [edi], xmmH
435 » add» edi, byte SIZEOF_XMMWORD» ; outptr
436 .out0: 382 .out0:
383 add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
437 sub ecx, byte SIZEOF_XMMWORD 384 sub ecx, byte SIZEOF_XMMWORD
438 jz near .nextrow 385 jz near .nextrow
439 386
440 add esi, byte SIZEOF_XMMWORD ; inptr0 387 add esi, byte SIZEOF_XMMWORD ; inptr0
441 add ebx, byte SIZEOF_XMMWORD ; inptr1 388 add ebx, byte SIZEOF_XMMWORD ; inptr1
442 add edx, byte SIZEOF_XMMWORD ; inptr2 389 add edx, byte SIZEOF_XMMWORD ; inptr2
443 jmp near .columnloop 390 jmp near .columnloop
444 alignx 16,7 391 alignx 16,7
445 392
446 .column_st32: 393 .column_st32:
447 pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
448 cmp ecx, byte SIZEOF_XMMWORD/2 394 cmp ecx, byte SIZEOF_XMMWORD/2
449 jb short .column_st16 395 jb short .column_st16
450 » maskmovdqu xmmA,xmmE» » » ; movntdqu XMMWORD [edi], xmmA 396 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
451 » add» edi, byte SIZEOF_XMMWORD» ; outptr 397 » movdqu» XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
452 » maskmovdqu xmmD,xmmE» » » ; movntdqu XMMWORD [edi], xmmD 398 » add» edi, byte 2*SIZEOF_XMMWORD» ; outptr
453 » add» edi, byte SIZEOF_XMMWORD» ; outptr
454 movdqa xmmA,xmmC 399 movdqa xmmA,xmmC
455 movdqa xmmD,xmmH 400 movdqa xmmD,xmmH
456 sub ecx, byte SIZEOF_XMMWORD/2 401 sub ecx, byte SIZEOF_XMMWORD/2
457 .column_st16: 402 .column_st16:
458 cmp ecx, byte SIZEOF_XMMWORD/4 403 cmp ecx, byte SIZEOF_XMMWORD/4
459 jb short .column_st15 404 jb short .column_st15
460 » maskmovdqu xmmA,xmmE» » » ; movntdqu XMMWORD [edi], xmmA 405 » movdqu» XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
461 add edi, byte SIZEOF_XMMWORD ; outptr 406 add edi, byte SIZEOF_XMMWORD ; outptr
462 movdqa xmmA,xmmD 407 movdqa xmmA,xmmD
463 sub ecx, byte SIZEOF_XMMWORD/4 408 sub ecx, byte SIZEOF_XMMWORD/4
464 .column_st15: 409 .column_st15:
465 %ifdef STRICT_MEMORY_ACCESS
466 ; Store two pixels (8 bytes) of xmmA to the output when it has enough 410 ; Store two pixels (8 bytes) of xmmA to the output when it has enough
467 ; space. 411 ; space.
468 cmp ecx, byte SIZEOF_XMMWORD/8 412 cmp ecx, byte SIZEOF_XMMWORD/8
469 jb short .column_st7 413 jb short .column_st7
470 movq MMWORD [edi], xmmA 414 movq MMWORD [edi], xmmA
471 add edi, byte SIZEOF_XMMWORD/8*4 415 add edi, byte SIZEOF_XMMWORD/8*4
472 sub ecx, byte SIZEOF_XMMWORD/8 416 sub ecx, byte SIZEOF_XMMWORD/8
473 psrldq xmmA, SIZEOF_XMMWORD/8*4 417 psrldq xmmA, SIZEOF_XMMWORD/8*4
474 .column_st7: 418 .column_st7:
475 ; Store one pixel (4 bytes) of xmmA to the output when it has enough 419 ; Store one pixel (4 bytes) of xmmA to the output when it has enough
476 ; space. 420 ; space.
477 test ecx, ecx 421 test ecx, ecx
478 jz short .nextrow 422 jz short .nextrow
479 movd DWORD [edi], xmmA 423 movd DWORD [edi], xmmA
480 %else
481 cmp ecx, byte SIZEOF_XMMWORD/16
482 jb short .nextrow
483 mov eax,ecx
484 xor ecx, byte 0x03
485 inc ecx
486 shl ecx, 4
487 movd xmmF,ecx
488 psrlq xmmE,xmmF
489 punpcklbw xmmE,xmmE
490 ; ----------------
491 mov ecx,edi
492 and ecx, byte SIZEOF_XMMWORD-1
493 jz short .adj0
494 lea eax, [ecx+eax*4] ; RGB_PIXELSIZE
495 cmp eax, byte SIZEOF_XMMWORD
496 ja short .adj0
497 and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
498 shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
499 movdqa xmmB,xmmA
500 movdqa xmmG,xmmE
501 pslldq xmmA, SIZEOF_XMMWORD/2
502 pslldq xmmE, SIZEOF_XMMWORD/2
503 movd xmmC,ecx
504 sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
505 jb short .adj1
506 movd xmmH,ecx
507 psllq xmmA,xmmH
508 psllq xmmE,xmmH
509 jmp short .adj0
510 .adj1: neg ecx
511 movd xmmH,ecx
512 psrlq xmmA,xmmH
513 psrlq xmmE,xmmH
514 psllq xmmB,xmmC
515 psllq xmmG,xmmC
516 por xmmA,xmmB
517 por xmmE,xmmG
518 .adj0: ; ----------------
519 maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
520 %endif ; STRICT_MEMORY_ACCESS ; ---------------
521 424
522 %endif ; RGB_PIXELSIZE ; --------------- 425 %endif ; RGB_PIXELSIZE ; ---------------
523 426
524 alignx 16,7 427 alignx 16,7
525 428
526 .nextrow: 429 .nextrow:
527 pop ecx 430 pop ecx
528 pop esi 431 pop esi
529 pop ebx 432 pop ebx
530 pop edx 433 pop edx
(...skipping 16 matching lines...) Expand all
547 ; pop ecx ; need not be preserved 450 ; pop ecx ; need not be preserved
548 pop ebx 451 pop ebx
549 mov esp,ebp ; esp <- aligned ebp 452 mov esp,ebp ; esp <- aligned ebp
550 pop esp ; esp <- original ebp 453 pop esp ; esp <- original ebp
551 pop ebp 454 pop ebp
552 ret 455 ret
553 456
554 ; For some reason, the OS X linker does not honor the request to align the 457 ; For some reason, the OS X linker does not honor the request to align the
555 ; segment unless we do this. 458 ; segment unless we do this.
556 align 16 459 align 16
OLDNEW
« no previous file with comments | « README.chromium ('k') | simd/jdclrss2-64.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698