OLD | NEW |
1 /* | 1 /* |
2 * jsimd_i386.c | 2 * jsimd_i386.c |
3 * | 3 * |
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 * Copyright 2009-2011 D. R. Commander | 5 * Copyright 2009-2011 D. R. Commander |
6 * | 6 * |
7 * Based on the x86 SIMD extension for IJG JPEG library, | 7 * Based on the x86 SIMD extension for IJG JPEG library, |
8 * Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 * | 10 * |
11 * This file contains the interface between the "normal" portions | 11 * This file contains the interface between the "normal" portions |
12 * of the library and the SIMD implementations when running on a | 12 * of the library and the SIMD implementations when running on a |
13 * 32-bit x86 architecture. | 13 * 32-bit x86 architecture. |
14 */ | 14 */ |
15 | 15 |
16 #define JPEG_INTERNALS | 16 #define JPEG_INTERNALS |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
54 if ((env != NULL) && (strcmp(env, "1") == 0)) | 54 if ((env != NULL) && (strcmp(env, "1") == 0)) |
55 simd_support &= JSIMD_3DNOW|JSIMD_MMX; | 55 simd_support &= JSIMD_3DNOW|JSIMD_MMX; |
56 env = getenv("JSIMD_FORCESSE"); | 56 env = getenv("JSIMD_FORCESSE"); |
57 if ((env != NULL) && (strcmp(env, "1") == 0)) | 57 if ((env != NULL) && (strcmp(env, "1") == 0)) |
58 simd_support &= JSIMD_SSE|JSIMD_MMX; | 58 simd_support &= JSIMD_SSE|JSIMD_MMX; |
59 env = getenv("JSIMD_FORCESSE2"); | 59 env = getenv("JSIMD_FORCESSE2"); |
60 if ((env != NULL) && (strcmp(env, "1") == 0)) | 60 if ((env != NULL) && (strcmp(env, "1") == 0)) |
61 simd_support &= JSIMD_SSE2; | 61 simd_support &= JSIMD_SSE2; |
62 } | 62 } |
63 | 63 |
| 64 #ifndef JPEG_DECODE_ONLY |
64 GLOBAL(int) | 65 GLOBAL(int) |
65 jsimd_can_rgb_ycc (void) | 66 jsimd_can_rgb_ycc (void) |
66 { | 67 { |
67 init_simd(); | 68 init_simd(); |
68 | 69 |
69 /* The code is optimised for these values only */ | 70 /* The code is optimised for these values only */ |
70 if (BITS_IN_JSAMPLE != 8) | 71 if (BITS_IN_JSAMPLE != 8) |
71 return 0; | 72 return 0; |
72 if (sizeof(JDIMENSION) != 4) | 73 if (sizeof(JDIMENSION) != 4) |
73 return 0; | 74 return 0; |
74 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 75 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
75 return 0; | 76 return 0; |
76 | 77 |
77 if ((simd_support & JSIMD_SSE2) && | 78 if ((simd_support & JSIMD_SSE2) && |
78 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 79 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
79 return 1; | 80 return 1; |
80 if (simd_support & JSIMD_MMX) | 81 if (simd_support & JSIMD_MMX) |
81 return 1; | 82 return 1; |
82 | 83 |
83 return 0; | 84 return 0; |
84 } | 85 } |
| 86 #endif |
85 | 87 |
86 GLOBAL(int) | 88 GLOBAL(int) |
87 jsimd_can_rgb_gray (void) | 89 jsimd_can_rgb_gray (void) |
88 { | 90 { |
89 init_simd(); | 91 init_simd(); |
90 | 92 |
91 /* The code is optimised for these values only */ | 93 /* The code is optimised for these values only */ |
92 if (BITS_IN_JSAMPLE != 8) | 94 if (BITS_IN_JSAMPLE != 8) |
93 return 0; | 95 return 0; |
94 if (sizeof(JDIMENSION) != 4) | 96 if (sizeof(JDIMENSION) != 4) |
(...skipping 25 matching lines...) Expand all Loading... |
120 | 122 |
121 if ((simd_support & JSIMD_SSE2) && | 123 if ((simd_support & JSIMD_SSE2) && |
122 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) | 124 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
123 return 1; | 125 return 1; |
124 if (simd_support & JSIMD_MMX) | 126 if (simd_support & JSIMD_MMX) |
125 return 1; | 127 return 1; |
126 | 128 |
127 return 0; | 129 return 0; |
128 } | 130 } |
129 | 131 |
| 132 #ifndef JPEG_DECODE_ONLY |
130 GLOBAL(void) | 133 GLOBAL(void) |
131 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 134 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
132 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 135 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
133 JDIMENSION output_row, int num_rows) | 136 JDIMENSION output_row, int num_rows) |
134 { | 137 { |
135 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 138 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
136 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 139 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
137 | 140 |
138 switch(cinfo->in_color_space) | 141 switch(cinfo->in_color_space) |
139 { | 142 { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
172 } | 175 } |
173 | 176 |
174 if ((simd_support & JSIMD_SSE2) && | 177 if ((simd_support & JSIMD_SSE2) && |
175 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 178 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
176 sse2fct(cinfo->image_width, input_buf, | 179 sse2fct(cinfo->image_width, input_buf, |
177 output_buf, output_row, num_rows); | 180 output_buf, output_row, num_rows); |
178 else if (simd_support & JSIMD_MMX) | 181 else if (simd_support & JSIMD_MMX) |
179 mmxfct(cinfo->image_width, input_buf, | 182 mmxfct(cinfo->image_width, input_buf, |
180 output_buf, output_row, num_rows); | 183 output_buf, output_row, num_rows); |
181 } | 184 } |
| 185 #endif |
182 | 186 |
183 GLOBAL(void) | 187 GLOBAL(void) |
184 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 188 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
185 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 189 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
186 JDIMENSION output_row, int num_rows) | 190 JDIMENSION output_row, int num_rows) |
187 { | 191 { |
188 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 192 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
189 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 193 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
190 | 194 |
191 switch(cinfo->in_color_space) | 195 switch(cinfo->in_color_space) |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
279 | 283 |
280 if ((simd_support & JSIMD_SSE2) && | 284 if ((simd_support & JSIMD_SSE2) && |
281 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) | 285 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
282 sse2fct(cinfo->output_width, input_buf, | 286 sse2fct(cinfo->output_width, input_buf, |
283 input_row, output_buf, num_rows); | 287 input_row, output_buf, num_rows); |
284 else if (simd_support & JSIMD_MMX) | 288 else if (simd_support & JSIMD_MMX) |
285 mmxfct(cinfo->output_width, input_buf, | 289 mmxfct(cinfo->output_width, input_buf, |
286 input_row, output_buf, num_rows); | 290 input_row, output_buf, num_rows); |
287 } | 291 } |
288 | 292 |
| 293 #ifndef JPEG_DECODE_ONLY |
289 GLOBAL(int) | 294 GLOBAL(int) |
290 jsimd_can_h2v2_downsample (void) | 295 jsimd_can_h2v2_downsample (void) |
291 { | 296 { |
292 init_simd(); | 297 init_simd(); |
293 | 298 |
294 /* The code is optimised for these values only */ | 299 /* The code is optimised for these values only */ |
295 if (BITS_IN_JSAMPLE != 8) | 300 if (BITS_IN_JSAMPLE != 8) |
296 return 0; | 301 return 0; |
297 if (sizeof(JDIMENSION) != 4) | 302 if (sizeof(JDIMENSION) != 4) |
298 return 0; | 303 return 0; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
344 { | 349 { |
345 if (simd_support & JSIMD_SSE2) | 350 if (simd_support & JSIMD_SSE2) |
346 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, | 351 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, |
347 compptr->v_samp_factor, compptr->width_in_blocks, | 352 compptr->v_samp_factor, compptr->width_in_blocks, |
348 input_data, output_data); | 353 input_data, output_data); |
349 else if (simd_support & JSIMD_MMX) | 354 else if (simd_support & JSIMD_MMX) |
350 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, | 355 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, |
351 compptr->v_samp_factor, compptr->width_in_blocks, | 356 compptr->v_samp_factor, compptr->width_in_blocks, |
352 input_data, output_data); | 357 input_data, output_data); |
353 } | 358 } |
| 359 #endif |
354 | 360 |
355 GLOBAL(int) | 361 GLOBAL(int) |
356 jsimd_can_h2v2_upsample (void) | 362 jsimd_can_h2v2_upsample (void) |
357 { | 363 { |
358 init_simd(); | 364 init_simd(); |
359 | 365 |
360 /* The code is optimised for these values only */ | 366 /* The code is optimised for these values only */ |
361 if (BITS_IN_JSAMPLE != 8) | 367 if (BITS_IN_JSAMPLE != 8) |
362 return 0; | 368 return 0; |
363 if (sizeof(JDIMENSION) != 4) | 369 if (sizeof(JDIMENSION) != 4) |
(...skipping 21 matching lines...) Expand all Loading... |
385 if (simd_support & JSIMD_SSE2) | 391 if (simd_support & JSIMD_SSE2) |
386 return 1; | 392 return 1; |
387 if (simd_support & JSIMD_MMX) | 393 if (simd_support & JSIMD_MMX) |
388 return 1; | 394 return 1; |
389 | 395 |
390 return 0; | 396 return 0; |
391 } | 397 } |
392 | 398 |
393 GLOBAL(void) | 399 GLOBAL(void) |
394 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 400 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
395 jpeg_component_info * compptr, | 401 jpeg_component_info * compptr, |
396 JSAMPARRAY input_data, | 402 JSAMPARRAY input_data, |
397 JSAMPARRAY * output_data_ptr) | 403 JSAMPARRAY * output_data_ptr) |
398 { | 404 { |
399 if (simd_support & JSIMD_SSE2) | 405 if (simd_support & JSIMD_SSE2) |
400 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, | 406 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, |
401 cinfo->output_width, input_data, output_data_ptr); | 407 cinfo->output_width, input_data, output_data_ptr); |
402 else if (simd_support & JSIMD_MMX) | 408 else if (simd_support & JSIMD_MMX) |
403 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, | 409 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, |
404 cinfo->output_width, input_data, output_data_ptr); | 410 cinfo->output_width, input_data, output_data_ptr); |
405 } | 411 } |
406 | 412 |
407 GLOBAL(void) | 413 GLOBAL(void) |
408 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 414 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
409 jpeg_component_info * compptr, | 415 jpeg_component_info * compptr, |
410 JSAMPARRAY input_data, | 416 JSAMPARRAY input_data, |
411 JSAMPARRAY * output_data_ptr) | 417 JSAMPARRAY * output_data_ptr) |
412 { | 418 { |
413 if (simd_support & JSIMD_SSE2) | 419 if (simd_support & JSIMD_SSE2) |
414 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, | 420 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, |
415 cinfo->output_width, input_data, output_data_ptr); | 421 cinfo->output_width, input_data, output_data_ptr); |
416 else if (simd_support & JSIMD_MMX) | 422 else if (simd_support & JSIMD_MMX) |
417 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, | 423 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, |
418 cinfo->output_width, input_data, output_data_ptr); | 424 cinfo->output_width, input_data, output_data_ptr); |
419 } | 425 } |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
453 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 459 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
454 return 1; | 460 return 1; |
455 if (simd_support & JSIMD_MMX) | 461 if (simd_support & JSIMD_MMX) |
456 return 1; | 462 return 1; |
457 | 463 |
458 return 0; | 464 return 0; |
459 } | 465 } |
460 | 466 |
461 GLOBAL(void) | 467 GLOBAL(void) |
462 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 468 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
463 jpeg_component_info * compptr, | 469 jpeg_component_info * compptr, |
464 JSAMPARRAY input_data, | 470 JSAMPARRAY input_data, |
465 JSAMPARRAY * output_data_ptr) | 471 JSAMPARRAY * output_data_ptr) |
466 { | 472 { |
467 if ((simd_support & JSIMD_SSE2) && | 473 if ((simd_support & JSIMD_SSE2) && |
468 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 474 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
469 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 475 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
470 compptr->downsampled_width, input_data, output_data_ptr); | 476 compptr->downsampled_width, input_data, output_data_ptr); |
471 else if (simd_support & JSIMD_MMX) | 477 else if (simd_support & JSIMD_MMX) |
472 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, | 478 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
473 compptr->downsampled_width, input_data, output_data_ptr); | 479 compptr->downsampled_width, input_data, output_data_ptr); |
474 } | 480 } |
475 | 481 |
476 GLOBAL(void) | 482 GLOBAL(void) |
477 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 483 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
478 jpeg_component_info * compptr, | 484 jpeg_component_info * compptr, |
479 JSAMPARRAY input_data, | 485 JSAMPARRAY input_data, |
480 JSAMPARRAY * output_data_ptr) | 486 JSAMPARRAY * output_data_ptr) |
481 { | 487 { |
482 if ((simd_support & JSIMD_SSE2) && | 488 if ((simd_support & JSIMD_SSE2) && |
483 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 489 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
484 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 490 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
485 compptr->downsampled_width, input_data, output_data_ptr); | 491 compptr->downsampled_width, input_data, output_data_ptr); |
486 else if (simd_support & JSIMD_MMX) | 492 else if (simd_support & JSIMD_MMX) |
487 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, | 493 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
488 compptr->downsampled_width, input_data, output_data_ptr); | 494 compptr->downsampled_width, input_data, output_data_ptr); |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
629 | 635 |
630 if ((simd_support & JSIMD_SSE2) && | 636 if ((simd_support & JSIMD_SSE2) && |
631 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) | 637 IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) |
632 sse2fct(cinfo->output_width, input_buf, | 638 sse2fct(cinfo->output_width, input_buf, |
633 in_row_group_ctr, output_buf); | 639 in_row_group_ctr, output_buf); |
634 else if (simd_support & JSIMD_MMX) | 640 else if (simd_support & JSIMD_MMX) |
635 mmxfct(cinfo->output_width, input_buf, | 641 mmxfct(cinfo->output_width, input_buf, |
636 in_row_group_ctr, output_buf); | 642 in_row_group_ctr, output_buf); |
637 } | 643 } |
638 | 644 |
| 645 #ifndef JPEG_DECODE_ONLY |
639 GLOBAL(int) | 646 GLOBAL(int) |
640 jsimd_can_convsamp (void) | 647 jsimd_can_convsamp (void) |
641 { | 648 { |
642 init_simd(); | 649 init_simd(); |
643 | 650 |
644 /* The code is optimised for these values only */ | 651 /* The code is optimised for these values only */ |
645 if (DCTSIZE != 8) | 652 if (DCTSIZE != 8) |
646 return 0; | 653 return 0; |
647 if (BITS_IN_JSAMPLE != 8) | 654 if (BITS_IN_JSAMPLE != 8) |
648 return 0; | 655 return 0; |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
848 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, | 855 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
849 FAST_FLOAT * workspace) | 856 FAST_FLOAT * workspace) |
850 { | 857 { |
851 if (simd_support & JSIMD_SSE2) | 858 if (simd_support & JSIMD_SSE2) |
852 jsimd_quantize_float_sse2(coef_block, divisors, workspace); | 859 jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
853 else if (simd_support & JSIMD_SSE) | 860 else if (simd_support & JSIMD_SSE) |
854 jsimd_quantize_float_sse(coef_block, divisors, workspace); | 861 jsimd_quantize_float_sse(coef_block, divisors, workspace); |
855 else if (simd_support & JSIMD_3DNOW) | 862 else if (simd_support & JSIMD_3DNOW) |
856 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); | 863 jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
857 } | 864 } |
| 865 #endif |
858 | 866 |
859 GLOBAL(int) | 867 GLOBAL(int) |
860 jsimd_can_idct_2x2 (void) | 868 jsimd_can_idct_2x2 (void) |
861 { | 869 { |
862 init_simd(); | 870 init_simd(); |
863 | 871 |
864 /* The code is optimised for these values only */ | 872 /* The code is optimised for these values only */ |
865 if (DCTSIZE != 8) | 873 if (DCTSIZE != 8) |
866 return 0; | 874 return 0; |
867 if (sizeof(JCOEF) != 2) | 875 if (sizeof(JCOEF) != 2) |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1039 jsimd_idct_float_sse2(compptr->dct_table, coef_block, | 1047 jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
1040 output_buf, output_col); | 1048 output_buf, output_col); |
1041 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) | 1049 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) |
1042 jsimd_idct_float_sse(compptr->dct_table, coef_block, | 1050 jsimd_idct_float_sse(compptr->dct_table, coef_block, |
1043 output_buf, output_col); | 1051 output_buf, output_col); |
1044 else if (simd_support & JSIMD_3DNOW) | 1052 else if (simd_support & JSIMD_3DNOW) |
1045 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, | 1053 jsimd_idct_float_3dnow(compptr->dct_table, coef_block, |
1046 output_buf, output_col); | 1054 output_buf, output_col); |
1047 } | 1055 } |
1048 | 1056 |
OLD | NEW |