001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023 024import org.apache.commons.compress.compressors.CompressorOutputStream; 025 026/** 027 * An output stream that compresses into the BZip2 format into another stream. 028 * 029 * <p> 030 * The compression requires large amounts of memory. Thus you should call the 031 * {@link #close() close()} method as soon as possible, to force 032 * {@code BZip2CompressorOutputStream} to release the allocated memory. 033 * </p> 034 * 035 * <p> You can shrink the amount of allocated memory and maybe raise 036 * the compression speed by choosing a lower blocksize, which in turn 037 * may cause a lower compression ratio. You can avoid unnecessary 038 * memory allocation by avoiding using a blocksize which is bigger 039 * than the size of the input. </p> 040 * 041 * <p> You can compute the memory usage for compressing by the 042 * following formula: </p> 043 * 044 * <pre> 045 * <code>400k + (9 * blocksize)</code>. 046 * </pre> 047 * 048 * <p> To get the memory required for decompression by {@link 049 * BZip2CompressorInputStream} use </p> 050 * 051 * <pre> 052 * <code>65k + (5 * blocksize)</code>. 053 * </pre> 054 * 055 * <table style="width:100%" border="1"> 056 * <caption>Memory usage by blocksize</caption> 057 * <tr> 058 * <th colspan="3">Memory usage by blocksize</th> 059 * </tr> 060 * <tr> 061 * <th style="text-align: right">Blocksize</th> <th style="text-align: right">Compression<br> 062 * memory usage</th> <th style="text-align: right">Decompression<br> 063 * memory usage</th> 064 * </tr> 065 * <tr> 066 * <td style="text-align: right">100k</td> 067 * <td style="text-align: right">1300k</td> 068 * <td style="text-align: right">565k</td> 069 * </tr> 070 * <tr> 071 * <td style="text-align: right">200k</td> 072 * <td style="text-align: right">2200k</td> 073 * <td style="text-align: right">1065k</td> 074 * </tr> 075 * <tr> 076 * <td style="text-align: right">300k</td> 077 * <td style="text-align: right">3100k</td> 078 * <td style="text-align: right">1565k</td> 079 * </tr> 080 * <tr> 081 * <td style="text-align: right">400k</td> 082 * <td style="text-align: right">4000k</td> 083 * <td style="text-align: right">2065k</td> 084 * </tr> 085 * <tr> 086 * <td style="text-align: right">500k</td> 087 * <td style="text-align: right">4900k</td> 088 * <td style="text-align: right">2565k</td> 089 * </tr> 090 * <tr> 091 * <td style="text-align: right">600k</td> 092 * <td style="text-align: right">5800k</td> 093 * <td style="text-align: right">3065k</td> 094 * </tr> 095 * <tr> 096 * <td style="text-align: right">700k</td> 097 * <td style="text-align: right">6700k</td> 098 * <td style="text-align: right">3565k</td> 099 * </tr> 100 * <tr> 101 * <td style="text-align: right">800k</td> 102 * <td style="text-align: right">7600k</td> 103 * <td style="text-align: right">4065k</td> 104 * </tr> 105 * <tr> 106 * <td style="text-align: right">900k</td> 107 * <td style="text-align: right">8500k</td> 108 * <td style="text-align: right">4565k</td> 109 * </tr> 110 * </table> 111 * 112 * <p> 113 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the 114 * bzipped input is smaller than one block. 115 * </p> 116 * 117 * <p> 118 * Instances of this class are not threadsafe. 119 * </p> 120 * 121 * <p> 122 * TODO: Update to BZip2 1.0.1 123 * </p> 124 * @NotThreadSafe 125 */ 126public class BZip2CompressorOutputStream extends CompressorOutputStream 127 implements BZip2Constants { 128 129 /** 130 * The minimum supported blocksize {@code == 1}. 131 */ 132 public static final int MIN_BLOCKSIZE = 1; 133 134 /** 135 * The maximum supported blocksize {@code == 9}. 136 */ 137 public static final int MAX_BLOCKSIZE = 9; 138 139 private static final int GREATER_ICOST = 15; 140 private static final int LESSER_ICOST = 0; 141 142 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, 143 final Data dat, final int alphaSize, 144 final int maxLen) { 145 /* 146 * Nodes and heap entries run from 1. Entry 0 for both the heap and 147 * nodes is a sentinel. 148 */ 149 final int[] heap = dat.heap; 150 final int[] weight = dat.weight; 151 final int[] parent = dat.parent; 152 153 for (int i = alphaSize; --i >= 0;) { 154 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 155 } 156 157 for (boolean tooLong = true; tooLong;) { 158 tooLong = false; 159 160 int nNodes = alphaSize; 161 int nHeap = 0; 162 heap[0] = 0; 163 weight[0] = 0; 164 parent[0] = -2; 165 166 for (int i = 1; i <= alphaSize; i++) { 167 parent[i] = -1; 168 nHeap++; 169 heap[nHeap] = i; 170 171 int zz = nHeap; 172 final int tmp = heap[zz]; 173 while (weight[tmp] < weight[heap[zz >> 1]]) { 174 heap[zz] = heap[zz >> 1]; 175 zz >>= 1; 176 } 177 heap[zz] = tmp; 178 } 179 180 while (nHeap > 1) { 181 final int n1 = heap[1]; 182 heap[1] = heap[nHeap]; 183 nHeap--; 184 185 int yy = 0; 186 int zz = 1; 187 int tmp = heap[1]; 188 189 while (true) { 190 yy = zz << 1; 191 192 if (yy > nHeap) { 193 break; 194 } 195 196 if ((yy < nHeap) 197 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 198 yy++; 199 } 200 201 if (weight[tmp] < weight[heap[yy]]) { 202 break; 203 } 204 205 heap[zz] = heap[yy]; 206 zz = yy; 207 } 208 209 heap[zz] = tmp; 210 211 final int n2 = heap[1]; 212 heap[1] = heap[nHeap]; 213 nHeap--; 214 215 yy = 0; 216 zz = 1; 217 tmp = heap[1]; 218 219 while (true) { 220 yy = zz << 1; 221 222 if (yy > nHeap) { 223 break; 224 } 225 226 if ((yy < nHeap) 227 && (weight[heap[yy + 1]] < weight[heap[yy]])) { 228 yy++; 229 } 230 231 if (weight[tmp] < weight[heap[yy]]) { 232 break; 233 } 234 235 heap[zz] = heap[yy]; 236 zz = yy; 237 } 238 239 heap[zz] = tmp; 240 nNodes++; 241 parent[n1] = parent[n2] = nNodes; 242 243 final int weight_n1 = weight[n1]; 244 final int weight_n2 = weight[n2]; 245 weight[nNodes] = ((weight_n1 & 0xffffff00) 246 + (weight_n2 & 0xffffff00)) 247 | (1 + (((weight_n1 & 0x000000ff) 248 > (weight_n2 & 0x000000ff)) 249 ? (weight_n1 & 0x000000ff) 250 : (weight_n2 & 0x000000ff))); 251 252 parent[nNodes] = -1; 253 nHeap++; 254 heap[nHeap] = nNodes; 255 256 tmp = 0; 257 zz = nHeap; 258 tmp = heap[zz]; 259 final int weight_tmp = weight[tmp]; 260 while (weight_tmp < weight[heap[zz >> 1]]) { 261 heap[zz] = heap[zz >> 1]; 262 zz >>= 1; 263 } 264 heap[zz] = tmp; 265 266 } 267 268 for (int i = 1; i <= alphaSize; i++) { 269 int j = 0; 270 int k = i; 271 272 for (int parent_k; (parent_k = parent[k]) >= 0;) { 273 k = parent_k; 274 j++; 275 } 276 277 len[i - 1] = (byte) j; 278 if (j > maxLen) { 279 tooLong = true; 280 } 281 } 282 283 if (tooLong) { 284 for (int i = 1; i < alphaSize; i++) { 285 int j = weight[i] >> 8; 286 j = 1 + (j >> 1); 287 weight[i] = j << 8; 288 } 289 } 290 } 291 } 292 293 /** 294 * Index of the last char in the block, so the block size == last + 1. 295 */ 296 private int last; 297 298 /** 299 * Always: in the range 0 .. 9. The current block size is 100000 * this 300 * number. 301 */ 302 private final int blockSize100k; 303 304 private int bsBuff; 305 private int bsLive; 306 private final CRC crc = new CRC(); 307 308 private int nInUse; 309 310 private int nMTF; 311 312 private int currentChar = -1; 313 private int runLength; 314 315 private int blockCRC; 316 private int combinedCRC; 317 private final int allowableBlockSize; 318 319 /** 320 * All memory intensive stuff. 321 */ 322 private Data data; 323 private BlockSort blockSorter; 324 325 private OutputStream out; 326 private volatile boolean closed; 327 328 /** 329 * Chooses a blocksize based on the given length of the data to compress. 330 * 331 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and 332 * {@link #MAX_BLOCKSIZE} both inclusive. For a negative 333 * {@code inputLength} this method returns {@code MAX_BLOCKSIZE} 334 * always. 335 * 336 * @param inputLength 337 * The length of the data which will be compressed by 338 * {@code BZip2CompressorOutputStream}. 339 */ 340 public static int chooseBlockSize(final long inputLength) { 341 return (inputLength > 0) ? (int) Math 342 .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; 343 } 344 345 /** 346 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 347 * 348 * @param out 349 * the destination stream. 350 * 351 * @throws IOException 352 * if an I/O error occurs in the specified stream. 353 * @throws NullPointerException 354 * if <code>out == null</code>. 355 */ 356 public BZip2CompressorOutputStream(final OutputStream out) 357 throws IOException { 358 this(out, MAX_BLOCKSIZE); 359 } 360 361 /** 362 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 363 * 364 * @param out 365 * the destination stream. 366 * @param blockSize 367 * the blockSize as 100k units. 368 * 369 * @throws IOException 370 * if an I/O error occurs in the specified stream. 371 * @throws IllegalArgumentException 372 * if <code>(blockSize < 1) || (blockSize > 9)</code>. 373 * @throws NullPointerException 374 * if <code>out == null</code>. 375 * 376 * @see #MIN_BLOCKSIZE 377 * @see #MAX_BLOCKSIZE 378 */ 379 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 380 if (blockSize < 1) { 381 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 382 } 383 if (blockSize > 9) { 384 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 385 } 386 387 this.blockSize100k = blockSize; 388 this.out = out; 389 390 /* 20 is just a paranoia constant */ 391 this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; 392 init(); 393 } 394 395 @Override 396 public void write(final int b) throws IOException { 397 if (closed) { 398 throw new IOException("Closed"); 399 } 400 write0(b); 401 } 402 403 /** 404 * Writes the current byte to the buffer, run-length encoding it 405 * if it has been repeated at least four times (the first step 406 * RLEs sequences of four identical bytes). 407 * 408 * <p>Flushes the current block before writing data if it is 409 * full.</p> 410 * 411 * <p>"write to the buffer" means adding to data.buffer starting 412 * two steps "after" this.last - initially starting at index 1 413 * (not 0) - and updating this.last to point to the last index 414 * written minus 1.</p> 415 */ 416 private void writeRun() throws IOException { 417 final int lastShadow = this.last; 418 419 if (lastShadow < this.allowableBlockSize) { 420 final int currentCharShadow = this.currentChar; 421 final Data dataShadow = this.data; 422 dataShadow.inUse[currentCharShadow] = true; 423 final byte ch = (byte) currentCharShadow; 424 425 int runLengthShadow = this.runLength; 426 this.crc.updateCRC(currentCharShadow, runLengthShadow); 427 428 switch (runLengthShadow) { 429 case 1: 430 dataShadow.block[lastShadow + 2] = ch; 431 this.last = lastShadow + 1; 432 break; 433 434 case 2: 435 dataShadow.block[lastShadow + 2] = ch; 436 dataShadow.block[lastShadow + 3] = ch; 437 this.last = lastShadow + 2; 438 break; 439 440 case 3: { 441 final byte[] block = dataShadow.block; 442 block[lastShadow + 2] = ch; 443 block[lastShadow + 3] = ch; 444 block[lastShadow + 4] = ch; 445 this.last = lastShadow + 3; 446 } 447 break; 448 449 default: { 450 runLengthShadow -= 4; 451 dataShadow.inUse[runLengthShadow] = true; 452 final byte[] block = dataShadow.block; 453 block[lastShadow + 2] = ch; 454 block[lastShadow + 3] = ch; 455 block[lastShadow + 4] = ch; 456 block[lastShadow + 5] = ch; 457 block[lastShadow + 6] = (byte) runLengthShadow; 458 this.last = lastShadow + 5; 459 } 460 break; 461 462 } 463 } else { 464 endBlock(); 465 initBlock(); 466 writeRun(); 467 } 468 } 469 470 /** 471 * Overridden to warn about an unclosed stream. 472 */ 473 @Override 474 protected void finalize() throws Throwable { 475 if (!closed) { 476 System.err.println("Unclosed BZip2CompressorOutputStream detected, will *not* close it"); 477 } 478 super.finalize(); 479 } 480 481 482 public void finish() throws IOException { 483 if (!closed) { 484 closed = true; 485 try { 486 if (this.runLength > 0) { 487 writeRun(); 488 } 489 this.currentChar = -1; 490 endBlock(); 491 endCompression(); 492 } finally { 493 this.out = null; 494 this.blockSorter = null; 495 this.data = null; 496 } 497 } 498 } 499 500 @Override 501 public void close() throws IOException { 502 if (!closed) { 503 try (OutputStream outShadow = this.out) { 504 finish(); 505 } 506 } 507 } 508 509 @Override 510 public void flush() throws IOException { 511 final OutputStream outShadow = this.out; 512 if (outShadow != null) { 513 outShadow.flush(); 514 } 515 } 516 517 /** 518 * Writes magic bytes like BZ on the first position of the stream 519 * and bytes indicating the file-format, which is 520 * huffmanised, followed by a digit indicating blockSize100k. 521 * @throws IOException if the magic bytes could not been written 522 */ 523 private void init() throws IOException { 524 bsPutUByte('B'); 525 bsPutUByte('Z'); 526 527 this.data = new Data(this.blockSize100k); 528 this.blockSorter = new BlockSort(this.data); 529 530 // huffmanised magic bytes 531 bsPutUByte('h'); 532 bsPutUByte('0' + this.blockSize100k); 533 534 this.combinedCRC = 0; 535 initBlock(); 536 } 537 538 private void initBlock() { 539 // blockNo++; 540 this.crc.initializeCRC(); 541 this.last = -1; 542 // ch = 0; 543 544 final boolean[] inUse = this.data.inUse; 545 for (int i = 256; --i >= 0;) { 546 inUse[i] = false; 547 } 548 549 } 550 551 private void endBlock() throws IOException { 552 this.blockCRC = this.crc.getFinalCRC(); 553 this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); 554 this.combinedCRC ^= this.blockCRC; 555 556 // empty block at end of file 557 if (this.last == -1) { 558 return; 559 } 560 561 /* sort the block and establish posn of original string */ 562 blockSort(); 563 564 /* 565 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 566 * :-). A 32 bit value does not really give a strong enough guarantee 567 * that the value will not appear by chance in the compressed 568 * datastream. Worst-case probability of this event, for a 900k block, 569 * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 570 * bits. For a compressed file of size 100Gb -- about 100000 blocks -- 571 * only a 48-bit marker will do. NB: normal compression/ decompression 572 * donot rely on these statistical properties. They are only important 573 * when trying to recover blocks from damaged files. 574 */ 575 bsPutUByte(0x31); 576 bsPutUByte(0x41); 577 bsPutUByte(0x59); 578 bsPutUByte(0x26); 579 bsPutUByte(0x53); 580 bsPutUByte(0x59); 581 582 /* Now the block's CRC, so it is in a known place. */ 583 bsPutInt(this.blockCRC); 584 585 /* Now a single bit indicating no randomisation. */ 586 bsW(1, 0); 587 588 /* Finally, block's contents proper. */ 589 moveToFrontCodeAndSend(); 590 } 591 592 private void endCompression() throws IOException { 593 /* 594 * Now another magic 48-bit number, 0x177245385090, to indicate the end 595 * of the last block. (sqrt(pi), if you want to know. I did want to use 596 * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me 597 * to feel statistically comfortable. Call me paranoid.) 598 */ 599 bsPutUByte(0x17); 600 bsPutUByte(0x72); 601 bsPutUByte(0x45); 602 bsPutUByte(0x38); 603 bsPutUByte(0x50); 604 bsPutUByte(0x90); 605 606 bsPutInt(this.combinedCRC); 607 bsFinishedWithStream(); 608 } 609 610 /** 611 * Returns the blocksize parameter specified at construction time. 612 * @return the blocksize parameter specified at construction time 613 */ 614 public final int getBlockSize() { 615 return this.blockSize100k; 616 } 617 618 @Override 619 public void write(final byte[] buf, int offs, final int len) 620 throws IOException { 621 if (offs < 0) { 622 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 623 } 624 if (len < 0) { 625 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 626 } 627 if (offs + len > buf.length) { 628 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 629 + len + ") > buf.length(" 630 + buf.length + ")."); 631 } 632 if (closed) { 633 throw new IOException("Stream closed"); 634 } 635 636 for (final int hi = offs + len; offs < hi;) { 637 write0(buf[offs++]); 638 } 639 } 640 641 /** 642 * Keeps track of the last bytes written and implicitly performs 643 * run-length encoding as the first step of the bzip2 algorithm. 644 */ 645 private void write0(int b) throws IOException { 646 if (this.currentChar != -1) { 647 b &= 0xff; 648 if (this.currentChar == b) { 649 if (++this.runLength > 254) { 650 writeRun(); 651 this.currentChar = -1; 652 this.runLength = 0; 653 } 654 // else nothing to do 655 } else { 656 writeRun(); 657 this.runLength = 1; 658 this.currentChar = b; 659 } 660 } else { 661 this.currentChar = b & 0xff; 662 this.runLength++; 663 } 664 } 665 666 private static void hbAssignCodes(final int[] code, final byte[] length, 667 final int minLen, final int maxLen, 668 final int alphaSize) { 669 int vec = 0; 670 for (int n = minLen; n <= maxLen; n++) { 671 for (int i = 0; i < alphaSize; i++) { 672 if ((length[i] & 0xff) == n) { 673 code[i] = vec; 674 vec++; 675 } 676 } 677 vec <<= 1; 678 } 679 } 680 681 private void bsFinishedWithStream() throws IOException { 682 while (this.bsLive > 0) { 683 final int ch = this.bsBuff >> 24; 684 this.out.write(ch); // write 8-bit 685 this.bsBuff <<= 8; 686 this.bsLive -= 8; 687 } 688 } 689 690 private void bsW(final int n, final int v) throws IOException { 691 final OutputStream outShadow = this.out; 692 int bsLiveShadow = this.bsLive; 693 int bsBuffShadow = this.bsBuff; 694 695 while (bsLiveShadow >= 8) { 696 outShadow.write(bsBuffShadow >> 24); // write 8-bit 697 bsBuffShadow <<= 8; 698 bsLiveShadow -= 8; 699 } 700 701 this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); 702 this.bsLive = bsLiveShadow + n; 703 } 704 705 private void bsPutUByte(final int c) throws IOException { 706 bsW(8, c); 707 } 708 709 private void bsPutInt(final int u) throws IOException { 710 bsW(8, (u >> 24) & 0xff); 711 bsW(8, (u >> 16) & 0xff); 712 bsW(8, (u >> 8) & 0xff); 713 bsW(8, u & 0xff); 714 } 715 716 private void sendMTFValues() throws IOException { 717 final byte[][] len = this.data.sendMTFValues_len; 718 final int alphaSize = this.nInUse + 2; 719 720 for (int t = N_GROUPS; --t >= 0;) { 721 final byte[] len_t = len[t]; 722 for (int v = alphaSize; --v >= 0;) { 723 len_t[v] = GREATER_ICOST; 724 } 725 } 726 727 /* Decide how many coding tables to use */ 728 // assert (this.nMTF > 0) : this.nMTF; 729 final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 730 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; 731 732 /* Generate an initial set of coding tables */ 733 sendMTFValues0(nGroups, alphaSize); 734 735 /* 736 * Iterate up to N_ITERS times to improve the tables. 737 */ 738 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 739 740 /* Compute MTF values for the selectors. */ 741 sendMTFValues2(nGroups, nSelectors); 742 743 /* Assign actual codes for the tables. */ 744 sendMTFValues3(nGroups, alphaSize); 745 746 /* Transmit the mapping table. */ 747 sendMTFValues4(); 748 749 /* Now the selectors. */ 750 sendMTFValues5(nGroups, nSelectors); 751 752 /* Now the coding tables. */ 753 sendMTFValues6(nGroups, alphaSize); 754 755 /* And finally, the block data proper */ 756 sendMTFValues7(); 757 } 758 759 private void sendMTFValues0(final int nGroups, final int alphaSize) { 760 final byte[][] len = this.data.sendMTFValues_len; 761 final int[] mtfFreq = this.data.mtfFreq; 762 763 int remF = this.nMTF; 764 int gs = 0; 765 766 for (int nPart = nGroups; nPart > 0; nPart--) { 767 final int tFreq = remF / nPart; 768 int ge = gs - 1; 769 int aFreq = 0; 770 771 for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { 772 aFreq += mtfFreq[++ge]; 773 } 774 775 if ((ge > gs) && (nPart != nGroups) && (nPart != 1) 776 && (((nGroups - nPart) & 1) != 0)) { 777 aFreq -= mtfFreq[ge--]; 778 } 779 780 final byte[] len_np = len[nPart - 1]; 781 for (int v = alphaSize; --v >= 0;) { 782 if ((v >= gs) && (v <= ge)) { 783 len_np[v] = LESSER_ICOST; 784 } else { 785 len_np[v] = GREATER_ICOST; 786 } 787 } 788 789 gs = ge + 1; 790 remF -= aFreq; 791 } 792 } 793 794 private int sendMTFValues1(final int nGroups, final int alphaSize) { 795 final Data dataShadow = this.data; 796 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 797 final int[] fave = dataShadow.sendMTFValues_fave; 798 final short[] cost = dataShadow.sendMTFValues_cost; 799 final char[] sfmap = dataShadow.sfmap; 800 final byte[] selector = dataShadow.selector; 801 final byte[][] len = dataShadow.sendMTFValues_len; 802 final byte[] len_0 = len[0]; 803 final byte[] len_1 = len[1]; 804 final byte[] len_2 = len[2]; 805 final byte[] len_3 = len[3]; 806 final byte[] len_4 = len[4]; 807 final byte[] len_5 = len[5]; 808 final int nMTFShadow = this.nMTF; 809 810 int nSelectors = 0; 811 812 for (int iter = 0; iter < N_ITERS; iter++) { 813 for (int t = nGroups; --t >= 0;) { 814 fave[t] = 0; 815 final int[] rfreqt = rfreq[t]; 816 for (int i = alphaSize; --i >= 0;) { 817 rfreqt[i] = 0; 818 } 819 } 820 821 nSelectors = 0; 822 823 for (int gs = 0; gs < this.nMTF;) { 824 /* Set group start & end marks. */ 825 826 /* 827 * Calculate the cost of this group as coded by each of the 828 * coding tables. 829 */ 830 831 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 832 833 if (nGroups == N_GROUPS) { 834 // unrolled version of the else-block 835 836 short cost0 = 0; 837 short cost1 = 0; 838 short cost2 = 0; 839 short cost3 = 0; 840 short cost4 = 0; 841 short cost5 = 0; 842 843 for (int i = gs; i <= ge; i++) { 844 final int icv = sfmap[i]; 845 cost0 += len_0[icv] & 0xff; 846 cost1 += len_1[icv] & 0xff; 847 cost2 += len_2[icv] & 0xff; 848 cost3 += len_3[icv] & 0xff; 849 cost4 += len_4[icv] & 0xff; 850 cost5 += len_5[icv] & 0xff; 851 } 852 853 cost[0] = cost0; 854 cost[1] = cost1; 855 cost[2] = cost2; 856 cost[3] = cost3; 857 cost[4] = cost4; 858 cost[5] = cost5; 859 860 } else { 861 for (int t = nGroups; --t >= 0;) { 862 cost[t] = 0; 863 } 864 865 for (int i = gs; i <= ge; i++) { 866 final int icv = sfmap[i]; 867 for (int t = nGroups; --t >= 0;) { 868 cost[t] += len[t][icv] & 0xff; 869 } 870 } 871 } 872 873 /* 874 * Find the coding table which is best for this group, and 875 * record its identity in the selector table. 876 */ 877 int bt = -1; 878 for (int t = nGroups, bc = 999999999; --t >= 0;) { 879 final int cost_t = cost[t]; 880 if (cost_t < bc) { 881 bc = cost_t; 882 bt = t; 883 } 884 } 885 886 fave[bt]++; 887 selector[nSelectors] = (byte) bt; 888 nSelectors++; 889 890 /* 891 * Increment the symbol frequencies for the selected table. 892 */ 893 final int[] rfreq_bt = rfreq[bt]; 894 for (int i = gs; i <= ge; i++) { 895 rfreq_bt[sfmap[i]]++; 896 } 897 898 gs = ge + 1; 899 } 900 901 /* 902 * Recompute the tables based on the accumulated frequencies. 903 */ 904 for (int t = 0; t < nGroups; t++) { 905 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 906 } 907 } 908 909 return nSelectors; 910 } 911 912 private void sendMTFValues2(final int nGroups, final int nSelectors) { 913 // assert (nGroups < 8) : nGroups; 914 915 final Data dataShadow = this.data; 916 final byte[] pos = dataShadow.sendMTFValues2_pos; 917 918 for (int i = nGroups; --i >= 0;) { 919 pos[i] = (byte) i; 920 } 921 922 for (int i = 0; i < nSelectors; i++) { 923 final byte ll_i = dataShadow.selector[i]; 924 byte tmp = pos[0]; 925 int j = 0; 926 927 while (ll_i != tmp) { 928 j++; 929 final byte tmp2 = tmp; 930 tmp = pos[j]; 931 pos[j] = tmp2; 932 } 933 934 pos[0] = tmp; 935 dataShadow.selectorMtf[i] = (byte) j; 936 } 937 } 938 939 private void sendMTFValues3(final int nGroups, final int alphaSize) { 940 final int[][] code = this.data.sendMTFValues_code; 941 final byte[][] len = this.data.sendMTFValues_len; 942 943 for (int t = 0; t < nGroups; t++) { 944 int minLen = 32; 945 int maxLen = 0; 946 final byte[] len_t = len[t]; 947 for (int i = alphaSize; --i >= 0;) { 948 final int l = len_t[i] & 0xff; 949 if (l > maxLen) { 950 maxLen = l; 951 } 952 if (l < minLen) { 953 minLen = l; 954 } 955 } 956 957 // assert (maxLen <= 20) : maxLen; 958 // assert (minLen >= 1) : minLen; 959 960 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 961 } 962 } 963 964 private void sendMTFValues4() throws IOException { 965 final boolean[] inUse = this.data.inUse; 966 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 967 968 for (int i = 16; --i >= 0;) { 969 inUse16[i] = false; 970 final int i16 = i * 16; 971 for (int j = 16; --j >= 0;) { 972 if (inUse[i16 + j]) { 973 inUse16[i] = true; 974 break; 975 } 976 } 977 } 978 979 for (int i = 0; i < 16; i++) { 980 bsW(1, inUse16[i] ? 1 : 0); 981 } 982 983 final OutputStream outShadow = this.out; 984 int bsLiveShadow = this.bsLive; 985 int bsBuffShadow = this.bsBuff; 986 987 for (int i = 0; i < 16; i++) { 988 if (inUse16[i]) { 989 final int i16 = i * 16; 990 for (int j = 0; j < 16; j++) { 991 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 992 while (bsLiveShadow >= 8) { 993 outShadow.write(bsBuffShadow >> 24); // write 8-bit 994 bsBuffShadow <<= 8; 995 bsLiveShadow -= 8; 996 } 997 if (inUse[i16 + j]) { 998 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 999 } 1000 bsLiveShadow++; 1001 } 1002 } 1003 } 1004 1005 this.bsBuff = bsBuffShadow; 1006 this.bsLive = bsLiveShadow; 1007 } 1008 1009 private void sendMTFValues5(final int nGroups, final int nSelectors) 1010 throws IOException { 1011 bsW(3, nGroups); 1012 bsW(15, nSelectors); 1013 1014 final OutputStream outShadow = this.out; 1015 final byte[] selectorMtf = this.data.selectorMtf; 1016 1017 int bsLiveShadow = this.bsLive; 1018 int bsBuffShadow = this.bsBuff; 1019 1020 for (int i = 0; i < nSelectors; i++) { 1021 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1022 // inlined: bsW(1, 1); 1023 while (bsLiveShadow >= 8) { 1024 outShadow.write(bsBuffShadow >> 24); 1025 bsBuffShadow <<= 8; 1026 bsLiveShadow -= 8; 1027 } 1028 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); 1029 bsLiveShadow++; 1030 } 1031 1032 // inlined: bsW(1, 0); 1033 while (bsLiveShadow >= 8) { 1034 outShadow.write(bsBuffShadow >> 24); 1035 bsBuffShadow <<= 8; 1036 bsLiveShadow -= 8; 1037 } 1038 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1039 bsLiveShadow++; 1040 } 1041 1042 this.bsBuff = bsBuffShadow; 1043 this.bsLive = bsLiveShadow; 1044 } 1045 1046 private void sendMTFValues6(final int nGroups, final int alphaSize) 1047 throws IOException { 1048 final byte[][] len = this.data.sendMTFValues_len; 1049 final OutputStream outShadow = this.out; 1050 1051 int bsLiveShadow = this.bsLive; 1052 int bsBuffShadow = this.bsBuff; 1053 1054 for (int t = 0; t < nGroups; t++) { 1055 final byte[] len_t = len[t]; 1056 int curr = len_t[0] & 0xff; 1057 1058 // inlined: bsW(5, curr); 1059 while (bsLiveShadow >= 8) { 1060 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1061 bsBuffShadow <<= 8; 1062 bsLiveShadow -= 8; 1063 } 1064 bsBuffShadow |= curr << (32 - bsLiveShadow - 5); 1065 bsLiveShadow += 5; 1066 1067 for (int i = 0; i < alphaSize; i++) { 1068 final int lti = len_t[i] & 0xff; 1069 while (curr < lti) { 1070 // inlined: bsW(2, 2); 1071 while (bsLiveShadow >= 8) { 1072 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1073 bsBuffShadow <<= 8; 1074 bsLiveShadow -= 8; 1075 } 1076 bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); 1077 bsLiveShadow += 2; 1078 1079 curr++; /* 10 */ 1080 } 1081 1082 while (curr > lti) { 1083 // inlined: bsW(2, 3); 1084 while (bsLiveShadow >= 8) { 1085 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1086 bsBuffShadow <<= 8; 1087 bsLiveShadow -= 8; 1088 } 1089 bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); 1090 bsLiveShadow += 2; 1091 1092 curr--; /* 11 */ 1093 } 1094 1095 // inlined: bsW(1, 0); 1096 while (bsLiveShadow >= 8) { 1097 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1098 bsBuffShadow <<= 8; 1099 bsLiveShadow -= 8; 1100 } 1101 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1102 bsLiveShadow++; 1103 } 1104 } 1105 1106 this.bsBuff = bsBuffShadow; 1107 this.bsLive = bsLiveShadow; 1108 } 1109 1110 private void sendMTFValues7() throws IOException { 1111 final Data dataShadow = this.data; 1112 final byte[][] len = dataShadow.sendMTFValues_len; 1113 final int[][] code = dataShadow.sendMTFValues_code; 1114 final OutputStream outShadow = this.out; 1115 final byte[] selector = dataShadow.selector; 1116 final char[] sfmap = dataShadow.sfmap; 1117 final int nMTFShadow = this.nMTF; 1118 1119 int selCtr = 0; 1120 1121 int bsLiveShadow = this.bsLive; 1122 int bsBuffShadow = this.bsBuff; 1123 1124 for (int gs = 0; gs < nMTFShadow;) { 1125 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1126 final int selector_selCtr = selector[selCtr] & 0xff; 1127 final int[] code_selCtr = code[selector_selCtr]; 1128 final byte[] len_selCtr = len[selector_selCtr]; 1129 1130 while (gs <= ge) { 1131 final int sfmap_i = sfmap[gs]; 1132 1133 // 1134 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1135 // code_selCtr[sfmap_i]); 1136 // 1137 while (bsLiveShadow >= 8) { 1138 outShadow.write(bsBuffShadow >> 24); 1139 bsBuffShadow <<= 8; 1140 bsLiveShadow -= 8; 1141 } 1142 final int n = len_selCtr[sfmap_i] & 0xFF; 1143 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); 1144 bsLiveShadow += n; 1145 1146 gs++; 1147 } 1148 1149 gs = ge + 1; 1150 selCtr++; 1151 } 1152 1153 this.bsBuff = bsBuffShadow; 1154 this.bsLive = bsLiveShadow; 1155 } 1156 1157 private void moveToFrontCodeAndSend() throws IOException { 1158 bsW(24, this.data.origPtr); 1159 generateMTFValues(); 1160 sendMTFValues(); 1161 } 1162 1163 private void blockSort() { 1164 blockSorter.blockSort(data, last); 1165 } 1166 1167 /* 1168 * Performs Move-To-Front on the Burrows-Wheeler transformed 1169 * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB 1170 * run-length-encoded form. 1171 * 1172 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 1173 */ 1174 private void generateMTFValues() { 1175 final int lastShadow = this.last; 1176 final Data dataShadow = this.data; 1177 final boolean[] inUse = dataShadow.inUse; 1178 final byte[] block = dataShadow.block; 1179 final int[] fmap = dataShadow.fmap; 1180 final char[] sfmap = dataShadow.sfmap; 1181 final int[] mtfFreq = dataShadow.mtfFreq; 1182 final byte[] unseqToSeq = dataShadow.unseqToSeq; 1183 final byte[] yy = dataShadow.generateMTFValues_yy; 1184 1185 // make maps 1186 int nInUseShadow = 0; 1187 for (int i = 0; i < 256; i++) { 1188 if (inUse[i]) { 1189 unseqToSeq[i] = (byte) nInUseShadow; 1190 nInUseShadow++; 1191 } 1192 } 1193 this.nInUse = nInUseShadow; 1194 1195 final int eob = nInUseShadow + 1; 1196 1197 for (int i = eob; i >= 0; i--) { 1198 mtfFreq[i] = 0; 1199 } 1200 1201 for (int i = nInUseShadow; --i >= 0;) { 1202 yy[i] = (byte) i; 1203 } 1204 1205 int wr = 0; 1206 int zPend = 0; 1207 1208 for (int i = 0; i <= lastShadow; i++) { 1209 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 1210 byte tmp = yy[0]; 1211 int j = 0; 1212 1213 while (ll_i != tmp) { 1214 j++; 1215 final byte tmp2 = tmp; 1216 tmp = yy[j]; 1217 yy[j] = tmp2; 1218 } 1219 yy[0] = tmp; 1220 1221 if (j == 0) { 1222 zPend++; 1223 } else { 1224 if (zPend > 0) { 1225 zPend--; 1226 while (true) { 1227 if ((zPend & 1) == 0) { 1228 sfmap[wr] = RUNA; 1229 wr++; 1230 mtfFreq[RUNA]++; 1231 } else { 1232 sfmap[wr] = RUNB; 1233 wr++; 1234 mtfFreq[RUNB]++; 1235 } 1236 1237 if (zPend < 2) { 1238 break; 1239 } 1240 zPend = (zPend - 2) >> 1; 1241 } 1242 zPend = 0; 1243 } 1244 sfmap[wr] = (char) (j + 1); 1245 wr++; 1246 mtfFreq[j + 1]++; 1247 } 1248 } 1249 1250 if (zPend > 0) { 1251 zPend--; 1252 while (true) { 1253 if ((zPend & 1) == 0) { 1254 sfmap[wr] = RUNA; 1255 wr++; 1256 mtfFreq[RUNA]++; 1257 } else { 1258 sfmap[wr] = RUNB; 1259 wr++; 1260 mtfFreq[RUNB]++; 1261 } 1262 1263 if (zPend < 2) { 1264 break; 1265 } 1266 zPend = (zPend - 2) >> 1; 1267 } 1268 } 1269 1270 sfmap[wr] = (char) eob; 1271 mtfFreq[eob]++; 1272 this.nMTF = wr + 1; 1273 } 1274 1275 static final class Data { 1276 1277 // with blockSize 900k 1278 /* maps unsigned byte => "does it occur in block" */ 1279 final boolean[] inUse = new boolean[256]; // 256 byte 1280 final byte[] unseqToSeq = new byte[256]; // 256 byte 1281 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 1282 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 1283 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 1284 1285 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 1286 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 1287 // byte 1288 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1289 // byte 1290 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 1291 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 1292 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 1293 // byte 1294 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 1295 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 1296 1297 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 1298 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1299 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 1300 1301 // ------------ 1302 // 333408 byte 1303 1304 /* holds the RLEd block of original data starting at index 1. 1305 * After sorting the last byte added to the buffer is at index 1306 * 0. */ 1307 final byte[] block; // 900021 byte 1308 /* maps index in Burrows-Wheeler transformed block => index of 1309 * byte in original block */ 1310 final int[] fmap; // 3600000 byte 1311 final char[] sfmap; // 3600000 byte 1312 // ------------ 1313 // 8433529 byte 1314 // ============ 1315 1316 /** 1317 * Index of original line in Burrows-Wheeler table. 1318 * 1319 * <p>This is the index in fmap that points to the last byte 1320 * of the original data.</p> 1321 */ 1322 int origPtr; 1323 1324 Data(final int blockSize100k) { 1325 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; 1326 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; 1327 this.fmap = new int[n]; 1328 this.sfmap = new char[2 * n]; 1329 } 1330 1331 } 1332 1333}