OLD | NEW |
(Empty) | |
| 1 /////////////////////////////////////////////////////////////////////////////// |
| 2 // |
| 3 /// \file 01_compress_easy.c |
| 4 /// \brief Compress from stdin to stdout in multi-call mode |
| 5 /// |
| 6 /// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE |
| 7 /// |
| 8 /// Example: ./01_compress_easy 6 < foo > foo.xz |
| 9 // |
| 10 // Author: Lasse Collin |
| 11 // |
| 12 // This file has been put into the public domain. |
| 13 // You can do whatever you want with this file. |
| 14 // |
| 15 /////////////////////////////////////////////////////////////////////////////// |
| 16 |
| 17 #include <stdbool.h> |
| 18 #include <stdlib.h> |
| 19 #include <stdio.h> |
| 20 #include <string.h> |
| 21 #include <errno.h> |
| 22 #include <lzma.h> |
| 23 |
| 24 |
| 25 static void |
| 26 show_usage_and_exit(const char *argv0) |
| 27 { |
| 28 fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n" |
| 29 "PRESET is a number 0-9 and can optionally be " |
| 30 "followed by `e' to indicate extreme preset\n", |
| 31 argv0); |
| 32 exit(EXIT_FAILURE); |
| 33 } |
| 34 |
| 35 |
| 36 static uint32_t |
| 37 get_preset(int argc, char **argv) |
| 38 { |
| 39 // One argument whose first char must be 0-9. |
| 40 if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9') |
| 41 show_usage_and_exit(argv[0]); |
| 42 |
| 43 // Calculate the preste level 0-9. |
| 44 uint32_t preset = argv[1][0] - '0'; |
| 45 |
| 46 // If there is a second char, it must be 'e'. It will set |
| 47 // the LZMA_PRESET_EXTREME flag. |
| 48 if (argv[1][1] != '\0') { |
| 49 if (argv[1][1] != 'e' || argv[1][2] != '\0') |
| 50 show_usage_and_exit(argv[0]); |
| 51 |
| 52 preset |= LZMA_PRESET_EXTREME; |
| 53 } |
| 54 |
| 55 return preset; |
| 56 } |
| 57 |
| 58 |
| 59 static bool |
| 60 init_encoder(lzma_stream *strm, uint32_t preset) |
| 61 { |
| 62 // Initialize the encoder using a preset. Set the integrity to check |
| 63 // to CRC64, which is the default in the xz command line tool. If |
| 64 // the .xz file needs to be decompressed with XZ Embedded, use |
| 65 // LZMA_CHECK_CRC32 instead. |
| 66 lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64); |
| 67 |
| 68 // Return successfully if the initialization went fine. |
| 69 if (ret == LZMA_OK) |
| 70 return true; |
| 71 |
| 72 // Something went wrong. The possible errors are documented in |
| 73 // lzma/container.h (src/liblzma/api/lzma/container.h in the source |
| 74 // package or e.g. /usr/include/lzma/container.h depending on the |
| 75 // install prefix). |
| 76 const char *msg; |
| 77 switch (ret) { |
| 78 case LZMA_MEM_ERROR: |
| 79 msg = "Memory allocation failed"; |
| 80 break; |
| 81 |
| 82 case LZMA_OPTIONS_ERROR: |
| 83 msg = "Specified preset is not supported"; |
| 84 break; |
| 85 |
| 86 case LZMA_UNSUPPORTED_CHECK: |
| 87 msg = "Specified integrity check is not supported"; |
| 88 break; |
| 89 |
| 90 default: |
| 91 // This is most likely LZMA_PROG_ERROR indicating a bug in |
| 92 // this program or in liblzma. It is inconvenient to have a |
| 93 // separate error message for errors that should be impossible |
| 94 // to occur, but knowing the error code is important for |
| 95 // debugging. That's why it is good to print the error code |
| 96 // at least when there is no good error message to show. |
| 97 msg = "Unknown error, possibly a bug"; |
| 98 break; |
| 99 } |
| 100 |
| 101 fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n", |
| 102 msg, ret); |
| 103 return false; |
| 104 } |
| 105 |
| 106 |
| 107 static bool |
| 108 compress(lzma_stream *strm, FILE *infile, FILE *outfile) |
| 109 { |
| 110 // This will be LZMA_RUN until the end of the input file is reached. |
| 111 // This tells lzma_code() when there will be no more input. |
| 112 lzma_action action = LZMA_RUN; |
| 113 |
| 114 // Buffers to temporarily hold uncompressed input |
| 115 // and compressed output. |
| 116 uint8_t inbuf[BUFSIZ]; |
| 117 uint8_t outbuf[BUFSIZ]; |
| 118 |
| 119 // Initialize the input and output pointers. Initializing next_in |
| 120 // and avail_in isn't really necessary when we are going to encode |
| 121 // just one file since LZMA_STREAM_INIT takes care of initializing |
| 122 // those already. But it doesn't hurt much and it will be needed |
| 123 // if encoding more than one file like we will in 02_decompress.c. |
| 124 // |
| 125 // While we don't care about strm->total_in or strm->total_out in this |
| 126 // example, it is worth noting that initializing the encoder will |
| 127 // always reset total_in and total_out to zero. But the encoder |
| 128 // initialization doesn't touch next_in, avail_in, next_out, or |
| 129 // avail_out. |
| 130 strm->next_in = NULL; |
| 131 strm->avail_in = 0; |
| 132 strm->next_out = outbuf; |
| 133 strm->avail_out = sizeof(outbuf); |
| 134 |
| 135 // Loop until the file has been successfully compressed or until |
| 136 // an error occurs. |
| 137 while (true) { |
| 138 // Fill the input buffer if it is empty. |
| 139 if (strm->avail_in == 0 && !feof(infile)) { |
| 140 strm->next_in = inbuf; |
| 141 strm->avail_in = fread(inbuf, 1, sizeof(inbuf), |
| 142 infile); |
| 143 |
| 144 if (ferror(infile)) { |
| 145 fprintf(stderr, "Read error: %s\n", |
| 146 strerror(errno)); |
| 147 return false; |
| 148 } |
| 149 |
| 150 // Once the end of the input file has been reached, |
| 151 // we need to tell lzma_code() that no more input |
| 152 // will be coming and that it should finish the |
| 153 // encoding. |
| 154 if (feof(infile)) |
| 155 action = LZMA_FINISH; |
| 156 } |
| 157 |
| 158 // Tell liblzma do the actual encoding. |
| 159 // |
| 160 // This reads up to strm->avail_in bytes of input starting |
| 161 // from strm->next_in. avail_in will be decremented and |
| 162 // next_in incremented by an equal amount to match the |
| 163 // number of input bytes consumed. |
| 164 // |
| 165 // Up to strm->avail_out bytes of compressed output will be |
| 166 // written starting from strm->next_out. avail_out and next_out |
| 167 // will be incremented by an equal amount to match the number |
| 168 // of output bytes written. |
| 169 // |
| 170 // The encoder has to do internal buffering, which means that |
| 171 // it may take quite a bit of input before the same data is |
| 172 // available in compressed form in the output buffer. |
| 173 lzma_ret ret = lzma_code(strm, action); |
| 174 |
| 175 // If the output buffer is full or if the compression finished |
| 176 // successfully, write the data from the output bufffer to |
| 177 // the output file. |
| 178 if (strm->avail_out == 0 || ret == LZMA_STREAM_END) { |
| 179 // When lzma_code() has returned LZMA_STREAM_END, |
| 180 // the output buffer is likely to be only partially |
| 181 // full. Calculate how much new data there is to |
| 182 // be written to the output file. |
| 183 size_t write_size = sizeof(outbuf) - strm->avail_out; |
| 184 |
| 185 if (fwrite(outbuf, 1, write_size, outfile) |
| 186 != write_size) { |
| 187 fprintf(stderr, "Write error: %s\n", |
| 188 strerror(errno)); |
| 189 return false; |
| 190 } |
| 191 |
| 192 // Reset next_out and avail_out. |
| 193 strm->next_out = outbuf; |
| 194 strm->avail_out = sizeof(outbuf); |
| 195 } |
| 196 |
| 197 // Normally the return value of lzma_code() will be LZMA_OK |
| 198 // until everything has been encoded. |
| 199 if (ret != LZMA_OK) { |
| 200 // Once everything has been encoded successfully, the |
| 201 // return value of lzma_code() will be LZMA_STREAM_END. |
| 202 // |
| 203 // It is important to check for LZMA_STREAM_END. Do not |
| 204 // assume that getting ret != LZMA_OK would mean that |
| 205 // everything has gone well. |
| 206 if (ret == LZMA_STREAM_END) |
| 207 return true; |
| 208 |
| 209 // It's not LZMA_OK nor LZMA_STREAM_END, |
| 210 // so it must be an error code. See lzma/base.h |
| 211 // (src/liblzma/api/lzma/base.h in the source package |
| 212 // or e.g. /usr/include/lzma/base.h depending on the |
| 213 // install prefix) for the list and documentation of |
| 214 // possible values. Most values listen in lzma_ret |
| 215 // enumeration aren't possible in this example. |
| 216 const char *msg; |
| 217 switch (ret) { |
| 218 case LZMA_MEM_ERROR: |
| 219 msg = "Memory allocation failed"; |
| 220 break; |
| 221 |
| 222 case LZMA_DATA_ERROR: |
| 223 // This error is returned if the compressed |
| 224 // or uncompressed size get near 8 EiB |
| 225 // (2^63 bytes) because that's where the .xz |
| 226 // file format size limits currently are. |
| 227 // That is, the possibility of this error |
| 228 // is mostly theoretical unless you are doing |
| 229 // something very unusual. |
| 230 // |
| 231 // Note that strm->total_in and strm->total_out |
| 232 // have nothing to do with this error. Changing |
| 233 // those variables won't increase or decrease |
| 234 // the chance of getting this error. |
| 235 msg = "File size limits exceeded"; |
| 236 break; |
| 237 |
| 238 default: |
| 239 // This is most likely LZMA_PROG_ERROR, but |
| 240 // if this program is buggy (or liblzma has |
| 241 // a bug), it may be e.g. LZMA_BUF_ERROR or |
| 242 // LZMA_OPTIONS_ERROR too. |
| 243 // |
| 244 // It is inconvenient to have a separate |
| 245 // error message for errors that should be |
| 246 // impossible to occur, but knowing the error |
| 247 // code is important for debugging. That's why |
| 248 // it is good to print the error code at least |
| 249 // when there is no good error message to show. |
| 250 msg = "Unknown error, possibly a bug"; |
| 251 break; |
| 252 } |
| 253 |
| 254 fprintf(stderr, "Encoder error: %s (error code %u)\n", |
| 255 msg, ret); |
| 256 return false; |
| 257 } |
| 258 } |
| 259 } |
| 260 |
| 261 |
| 262 extern int |
| 263 main(int argc, char **argv) |
| 264 { |
| 265 // Get the preset number from the command line. |
| 266 uint32_t preset = get_preset(argc, argv); |
| 267 |
| 268 // Initialize a lzma_stream structure. When it is allocated on stack, |
| 269 // it is simplest to use LZMA_STREAM_INIT macro like below. When it |
| 270 // is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr)) |
| 271 // works (as long as NULL pointers are represented with zero bits |
| 272 // as they are on practically all computers today). |
| 273 lzma_stream strm = LZMA_STREAM_INIT; |
| 274 |
| 275 // Initialize the encoder. If it succeeds, compress from |
| 276 // stdin to stdout. |
| 277 bool success = init_encoder(&strm, preset); |
| 278 if (success) |
| 279 success = compress(&strm, stdin, stdout); |
| 280 |
| 281 // Free the memory allocated for the encoder. If we were encoding |
| 282 // multiple files, this would only need to be done after the last |
| 283 // file. See 02_decompress.c for handling of multiple files. |
| 284 // |
| 285 // It is OK to call lzma_end() multiple times or when it hasn't been |
| 286 // actually used except initialized with LZMA_STREAM_INIT. |
| 287 lzma_end(&strm); |
| 288 |
| 289 // Close stdout to catch possible write errors that can occur |
| 290 // when pending data is flushed from the stdio buffers. |
| 291 if (fclose(stdout)) { |
| 292 fprintf(stderr, "Write error: %s\n", strerror(errno)); |
| 293 success = false; |
| 294 } |
| 295 |
| 296 return success ? EXIT_SUCCESS : EXIT_FAILURE; |
| 297 } |
OLD | NEW |