OLD | NEW |
1 /* | 1 /* |
2 * jsimd_x86_64.c | 2 * jsimd_x86_64.c |
3 * | 3 * |
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 * Copyright 2009-2011 D. R. Commander | 5 * Copyright 2009-2011 D. R. Commander |
6 * | 6 * |
7 * Based on the x86 SIMD extension for IJG JPEG library, | 7 * Based on the x86 SIMD extension for IJG JPEG library, |
8 * Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 * | 10 * |
11 * This file contains the interface between the "normal" portions | 11 * This file contains the interface between the "normal" portions |
12 * of the library and the SIMD implementations when running on a | 12 * of the library and the SIMD implementations when running on a |
13 * x86_64 architecture. | 13 * x86_64 architecture. |
14 */ | 14 */ |
15 | 15 |
16 #define JPEG_INTERNALS | 16 #define JPEG_INTERNALS |
17 #include "../jinclude.h" | 17 #include "../jinclude.h" |
18 #include "../jpeglib.h" | 18 #include "../jpeglib.h" |
19 #include "../jsimd.h" | 19 #include "../jsimd.h" |
20 #include "../jdct.h" | 20 #include "../jdct.h" |
21 #include "../jsimddct.h" | 21 #include "../jsimddct.h" |
22 #include "jsimd.h" | 22 #include "jsimd.h" |
23 | 23 |
24 /* | 24 /* |
25 * In the PIC cases, we have no guarantee that constants will keep | 25 * In the PIC cases, we have no guarantee that constants will keep |
26 * their alignment. This macro allows us to verify it at runtime. | 26 * their alignment. This macro allows us to verify it at runtime. |
27 */ | 27 */ |
28 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) | 28 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) |
29 | 29 |
30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ | 30 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
31 | 31 |
| 32 #ifndef JPEG_DECODE_ONLY |
32 GLOBAL(int) | 33 GLOBAL(int) |
33 jsimd_can_rgb_ycc (void) | 34 jsimd_can_rgb_ycc (void) |
34 { | 35 { |
35 /* The code is optimised for these values only */ | 36 /* The code is optimised for these values only */ |
36 if (BITS_IN_JSAMPLE != 8) | 37 if (BITS_IN_JSAMPLE != 8) |
37 return 0; | 38 return 0; |
38 if (sizeof(JDIMENSION) != 4) | 39 if (sizeof(JDIMENSION) != 4) |
39 return 0; | 40 return 0; |
40 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 41 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
41 return 0; | 42 return 0; |
42 | 43 |
43 if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) | 44 if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) |
44 return 0; | 45 return 0; |
45 | 46 |
46 return 1; | 47 return 1; |
47 } | 48 } |
| 49 #endif |
48 | 50 |
49 GLOBAL(int) | 51 GLOBAL(int) |
50 jsimd_can_rgb_gray (void) | 52 jsimd_can_rgb_gray (void) |
51 { | 53 { |
52 /* The code is optimised for these values only */ | 54 /* The code is optimised for these values only */ |
53 if (BITS_IN_JSAMPLE != 8) | 55 if (BITS_IN_JSAMPLE != 8) |
54 return 0; | 56 return 0; |
55 if (sizeof(JDIMENSION) != 4) | 57 if (sizeof(JDIMENSION) != 4) |
56 return 0; | 58 return 0; |
57 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 59 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
(...skipping 15 matching lines...) Expand all Loading... |
73 return 0; | 75 return 0; |
74 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) | 76 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
75 return 0; | 77 return 0; |
76 | 78 |
77 if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) | 79 if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) |
78 return 0; | 80 return 0; |
79 | 81 |
80 return 1; | 82 return 1; |
81 } | 83 } |
82 | 84 |
| 85 #ifndef JPEG_DECODE_ONLY |
83 GLOBAL(void) | 86 GLOBAL(void) |
84 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, | 87 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
85 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 88 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
86 JDIMENSION output_row, int num_rows) | 89 JDIMENSION output_row, int num_rows) |
87 { | 90 { |
88 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 91 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
89 | 92 |
90 switch(cinfo->in_color_space) | 93 switch(cinfo->in_color_space) |
91 { | 94 { |
92 case JCS_EXT_RGB: | 95 case JCS_EXT_RGB: |
(...skipping 18 matching lines...) Expand all Loading... |
111 case JCS_EXT_ARGB: | 114 case JCS_EXT_ARGB: |
112 sse2fct=jsimd_extxrgb_ycc_convert_sse2; | 115 sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
113 break; | 116 break; |
114 default: | 117 default: |
115 sse2fct=jsimd_rgb_ycc_convert_sse2; | 118 sse2fct=jsimd_rgb_ycc_convert_sse2; |
116 break; | 119 break; |
117 } | 120 } |
118 | 121 |
119 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); | 122 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
120 } | 123 } |
| 124 #endif |
121 | 125 |
122 GLOBAL(void) | 126 GLOBAL(void) |
123 jsimd_rgb_gray_convert (j_compress_ptr cinfo, | 127 jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
124 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, | 128 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
125 JDIMENSION output_row, int num_rows) | 129 JDIMENSION output_row, int num_rows) |
126 { | 130 { |
127 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); | 131 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
128 | 132 |
129 switch(cinfo->in_color_space) | 133 switch(cinfo->in_color_space) |
130 { | 134 { |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
190 sse2fct=jsimd_ycc_extxrgb_convert_sse2; | 194 sse2fct=jsimd_ycc_extxrgb_convert_sse2; |
191 break; | 195 break; |
192 default: | 196 default: |
193 sse2fct=jsimd_ycc_rgb_convert_sse2; | 197 sse2fct=jsimd_ycc_rgb_convert_sse2; |
194 break; | 198 break; |
195 } | 199 } |
196 | 200 |
197 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); | 201 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
198 } | 202 } |
199 | 203 |
| 204 #ifndef JPEG_DECODE_ONLY |
200 GLOBAL(int) | 205 GLOBAL(int) |
201 jsimd_can_h2v2_downsample (void) | 206 jsimd_can_h2v2_downsample (void) |
202 { | 207 { |
203 /* The code is optimised for these values only */ | 208 /* The code is optimised for these values only */ |
204 if (BITS_IN_JSAMPLE != 8) | 209 if (BITS_IN_JSAMPLE != 8) |
205 return 0; | 210 return 0; |
206 if (sizeof(JDIMENSION) != 4) | 211 if (sizeof(JDIMENSION) != 4) |
207 return 0; | 212 return 0; |
208 | 213 |
209 return 1; | 214 return 1; |
(...skipping 25 matching lines...) Expand all Loading... |
235 GLOBAL(void) | 240 GLOBAL(void) |
236 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, | 241 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
237 JSAMPARRAY input_data, JSAMPARRAY output_data) | 242 JSAMPARRAY input_data, JSAMPARRAY output_data) |
238 { | 243 { |
239 jsimd_h2v1_downsample_sse2(cinfo->image_width, | 244 jsimd_h2v1_downsample_sse2(cinfo->image_width, |
240 cinfo->max_v_samp_factor, | 245 cinfo->max_v_samp_factor, |
241 compptr->v_samp_factor, | 246 compptr->v_samp_factor, |
242 compptr->width_in_blocks, | 247 compptr->width_in_blocks, |
243 input_data, output_data); | 248 input_data, output_data); |
244 } | 249 } |
| 250 #endif |
245 | 251 |
246 GLOBAL(int) | 252 GLOBAL(int) |
247 jsimd_can_h2v2_upsample (void) | 253 jsimd_can_h2v2_upsample (void) |
248 { | 254 { |
249 /* The code is optimised for these values only */ | 255 /* The code is optimised for these values only */ |
250 if (BITS_IN_JSAMPLE != 8) | 256 if (BITS_IN_JSAMPLE != 8) |
251 return 0; | 257 return 0; |
252 if (sizeof(JDIMENSION) != 4) | 258 if (sizeof(JDIMENSION) != 4) |
253 return 0; | 259 return 0; |
254 | 260 |
255 return 1; | 261 return 1; |
256 } | 262 } |
257 | 263 |
258 GLOBAL(int) | 264 GLOBAL(int) |
259 jsimd_can_h2v1_upsample (void) | 265 jsimd_can_h2v1_upsample (void) |
260 { | 266 { |
261 /* The code is optimised for these values only */ | 267 /* The code is optimised for these values only */ |
262 if (BITS_IN_JSAMPLE != 8) | 268 if (BITS_IN_JSAMPLE != 8) |
263 return 0; | 269 return 0; |
264 if (sizeof(JDIMENSION) != 4) | 270 if (sizeof(JDIMENSION) != 4) |
265 return 0; | 271 return 0; |
266 | 272 |
267 return 1; | 273 return 1; |
268 } | 274 } |
269 | 275 |
270 GLOBAL(void) | 276 GLOBAL(void) |
271 jsimd_h2v2_upsample (j_decompress_ptr cinfo, | 277 jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
272 jpeg_component_info * compptr, | 278 jpeg_component_info * compptr, |
273 JSAMPARRAY input_data, | 279 JSAMPARRAY input_data, |
274 JSAMPARRAY * output_data_ptr) | 280 JSAMPARRAY * output_data_ptr) |
275 { | 281 { |
276 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, | 282 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, |
277 cinfo->output_width, | 283 cinfo->output_width, |
278 input_data, output_data_ptr); | 284 input_data, output_data_ptr); |
279 } | 285 } |
280 | 286 |
281 GLOBAL(void) | 287 GLOBAL(void) |
282 jsimd_h2v1_upsample (j_decompress_ptr cinfo, | 288 jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
283 jpeg_component_info * compptr, | 289 jpeg_component_info * compptr, |
284 JSAMPARRAY input_data, | 290 JSAMPARRAY input_data, |
285 JSAMPARRAY * output_data_ptr) | 291 JSAMPARRAY * output_data_ptr) |
286 { | 292 { |
287 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, | 293 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, |
288 cinfo->output_width, | 294 cinfo->output_width, |
289 input_data, output_data_ptr); | 295 input_data, output_data_ptr); |
290 } | 296 } |
291 | 297 |
292 GLOBAL(int) | 298 GLOBAL(int) |
293 jsimd_can_h2v2_fancy_upsample (void) | 299 jsimd_can_h2v2_fancy_upsample (void) |
(...skipping 20 matching lines...) Expand all Loading... |
314 return 0; | 320 return 0; |
315 | 321 |
316 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) | 322 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
317 return 0; | 323 return 0; |
318 | 324 |
319 return 1; | 325 return 1; |
320 } | 326 } |
321 | 327 |
322 GLOBAL(void) | 328 GLOBAL(void) |
323 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, | 329 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
324 jpeg_component_info * compptr, | 330 jpeg_component_info * compptr, |
325 JSAMPARRAY input_data, | 331 JSAMPARRAY input_data, |
326 JSAMPARRAY * output_data_ptr) | 332 JSAMPARRAY * output_data_ptr) |
327 { | 333 { |
328 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 334 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
329 compptr->downsampled_width, | 335 compptr->downsampled_width, |
330 input_data, output_data_ptr); | 336 input_data, output_data_ptr); |
331 } | 337 } |
332 | 338 |
333 GLOBAL(void) | 339 GLOBAL(void) |
334 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, | 340 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
335 jpeg_component_info * compptr, | 341 jpeg_component_info * compptr, |
336 JSAMPARRAY input_data, | 342 JSAMPARRAY input_data, |
337 JSAMPARRAY * output_data_ptr) | 343 JSAMPARRAY * output_data_ptr) |
338 { | 344 { |
339 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, | 345 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
340 compptr->downsampled_width, | 346 compptr->downsampled_width, |
341 input_data, output_data_ptr); | 347 input_data, output_data_ptr); |
342 } | 348 } |
343 | 349 |
344 GLOBAL(int) | 350 GLOBAL(int) |
345 jsimd_can_h2v2_merged_upsample (void) | 351 jsimd_can_h2v2_merged_upsample (void) |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
444 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; | 450 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
445 break; | 451 break; |
446 default: | 452 default: |
447 sse2fct=jsimd_h2v1_merged_upsample_sse2; | 453 sse2fct=jsimd_h2v1_merged_upsample_sse2; |
448 break; | 454 break; |
449 } | 455 } |
450 | 456 |
451 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); | 457 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
452 } | 458 } |
453 | 459 |
| 460 #ifndef JPEG_DECODE_ONLY |
454 GLOBAL(int) | 461 GLOBAL(int) |
455 jsimd_can_convsamp (void) | 462 jsimd_can_convsamp (void) |
456 { | 463 { |
457 /* The code is optimised for these values only */ | 464 /* The code is optimised for these values only */ |
458 if (DCTSIZE != 8) | 465 if (DCTSIZE != 8) |
459 return 0; | 466 return 0; |
460 if (BITS_IN_JSAMPLE != 8) | 467 if (BITS_IN_JSAMPLE != 8) |
461 return 0; | 468 return 0; |
462 if (sizeof(JDIMENSION) != 4) | 469 if (sizeof(JDIMENSION) != 4) |
463 return 0; | 470 return 0; |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
594 { | 601 { |
595 jsimd_quantize_sse2(coef_block, divisors, workspace); | 602 jsimd_quantize_sse2(coef_block, divisors, workspace); |
596 } | 603 } |
597 | 604 |
598 GLOBAL(void) | 605 GLOBAL(void) |
599 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, | 606 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
600 FAST_FLOAT * workspace) | 607 FAST_FLOAT * workspace) |
601 { | 608 { |
602 jsimd_quantize_float_sse2(coef_block, divisors, workspace); | 609 jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
603 } | 610 } |
| 611 #endif |
604 | 612 |
605 GLOBAL(int) | 613 GLOBAL(int) |
606 jsimd_can_idct_2x2 (void) | 614 jsimd_can_idct_2x2 (void) |
607 { | 615 { |
608 /* The code is optimised for these values only */ | 616 /* The code is optimised for these values only */ |
609 if (DCTSIZE != 8) | 617 if (DCTSIZE != 8) |
610 return 0; | 618 return 0; |
611 if (sizeof(JCOEF) != 2) | 619 if (sizeof(JCOEF) != 2) |
612 return 0; | 620 return 0; |
613 if (BITS_IN_JSAMPLE != 8) | 621 if (BITS_IN_JSAMPLE != 8) |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
744 | 752 |
745 GLOBAL(void) | 753 GLOBAL(void) |
746 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, | 754 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
747 JCOEFPTR coef_block, JSAMPARRAY output_buf, | 755 JCOEFPTR coef_block, JSAMPARRAY output_buf, |
748 JDIMENSION output_col) | 756 JDIMENSION output_col) |
749 { | 757 { |
750 jsimd_idct_float_sse2(compptr->dct_table, coef_block, | 758 jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
751 output_buf, output_col); | 759 output_buf, output_col); |
752 } | 760 } |
753 | 761 |
OLD | NEW |