001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.tar; 019 020import java.io.ByteArrayOutputStream; 021import java.io.Closeable; 022import java.io.File; 023import java.io.IOException; 024import java.io.InputStream; 025import java.nio.ByteBuffer; 026import java.nio.channels.SeekableByteChannel; 027import java.nio.file.Files; 028import java.nio.file.Path; 029import java.util.ArrayList; 030import java.util.HashMap; 031import java.util.LinkedList; 032import java.util.List; 033import java.util.Map; 034 035import org.apache.commons.compress.archivers.zip.ZipEncoding; 036import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 037import org.apache.commons.compress.utils.ArchiveUtils; 038import org.apache.commons.compress.utils.BoundedInputStream; 039import org.apache.commons.compress.utils.BoundedArchiveInputStream; 040import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 041import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 042 043/** 044 * The TarFile provides random access to UNIX archives. 045 * @since 1.21 046 */ 047public class TarFile implements Closeable { 048 049 private static final int SMALL_BUFFER_SIZE = 256; 050 051 private final byte[] smallBuf = new byte[SMALL_BUFFER_SIZE]; 052 053 private final SeekableByteChannel archive; 054 055 /** 056 * The encoding of the tar file 057 */ 058 private final ZipEncoding zipEncoding; 059 060 private final LinkedList<TarArchiveEntry> entries = new LinkedList<>(); 061 062 private final int blockSize; 063 064 private final boolean lenient; 065 066 private final int recordSize; 067 068 private final ByteBuffer recordBuffer; 069 070 // the global sparse headers, this is only used in PAX Format 0.X 071 private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>(); 072 073 private boolean hasHitEOF; 074 075 /** 076 * The meta-data about the current entry 077 */ 078 private TarArchiveEntry currEntry; 079 080 // the global PAX header 081 private Map<String, String> globalPaxHeaders = new HashMap<>(); 082 083 private final Map<String, List<InputStream>> sparseInputStreams = new HashMap<>(); 084 085 /** 086 * Constructor for TarFile. 087 * 088 * @param content the content to use 089 * @throws IOException when reading the tar archive fails 090 */ 091 public TarFile(final byte[] content) throws IOException { 092 this(new SeekableInMemoryByteChannel(content)); 093 } 094 095 /** 096 * Constructor for TarFile. 097 * 098 * @param content the content to use 099 * @param encoding the encoding to use 100 * @throws IOException when reading the tar archive fails 101 */ 102 public TarFile(final byte[] content, final String encoding) throws IOException { 103 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 104 } 105 106 /** 107 * Constructor for TarFile. 108 * 109 * @param content the content to use 110 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 111 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 112 * exception instead. 113 * @throws IOException when reading the tar archive fails 114 */ 115 public TarFile(final byte[] content, final boolean lenient) throws IOException { 116 this(new SeekableInMemoryByteChannel(content), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 117 } 118 119 /** 120 * Constructor for TarFile. 121 * 122 * @param archive the file of the archive to use 123 * @throws IOException when reading the tar archive fails 124 */ 125 public TarFile(final File archive) throws IOException { 126 this(archive.toPath()); 127 } 128 129 /** 130 * Constructor for TarFile. 131 * 132 * @param archive the file of the archive to use 133 * @param encoding the encoding to use 134 * @throws IOException when reading the tar archive fails 135 */ 136 public TarFile(final File archive, final String encoding) throws IOException { 137 this(archive.toPath(), encoding); 138 } 139 140 /** 141 * Constructor for TarFile. 142 * 143 * @param archive the file of the archive to use 144 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 145 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 146 * exception instead. 147 * @throws IOException when reading the tar archive fails 148 */ 149 public TarFile(final File archive, final boolean lenient) throws IOException { 150 this(archive.toPath(), lenient); 151 } 152 153 /** 154 * Constructor for TarFile. 155 * 156 * @param archivePath the path of the archive to use 157 * @throws IOException when reading the tar archive fails 158 */ 159 public TarFile(final Path archivePath) throws IOException { 160 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 161 } 162 163 /** 164 * Constructor for TarFile. 165 * 166 * @param archivePath the path of the archive to use 167 * @param encoding the encoding to use 168 * @throws IOException when reading the tar archive fails 169 */ 170 public TarFile(final Path archivePath, final String encoding) throws IOException { 171 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding, false); 172 } 173 174 /** 175 * Constructor for TarFile. 176 * 177 * @param archivePath the path of the archive to use 178 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 179 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 180 * exception instead. 181 * @throws IOException when reading the tar archive fails 182 */ 183 public TarFile(final Path archivePath, final boolean lenient) throws IOException { 184 this(Files.newByteChannel(archivePath), TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, lenient); 185 } 186 187 /** 188 * Constructor for TarFile. 189 * 190 * @param content the content to use 191 * @throws IOException when reading the tar archive fails 192 */ 193 public TarFile(final SeekableByteChannel content) throws IOException { 194 this(content, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, null, false); 195 } 196 197 /** 198 * Constructor for TarFile. 199 * 200 * @param archive the seekable byte channel to use 201 * @param blockSize the blocks size to use 202 * @param recordSize the record size to use 203 * @param encoding the encoding to use 204 * @param lenient when set to true illegal values for group/userid, mode, device numbers and timestamp will be 205 * ignored and the fields set to {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an 206 * exception instead. 207 * @throws IOException when reading the tar archive fails 208 */ 209 public TarFile(final SeekableByteChannel archive, final int blockSize, final int recordSize, final String encoding, final boolean lenient) throws IOException { 210 this.archive = archive; 211 this.hasHitEOF = false; 212 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 213 this.recordSize = recordSize; 214 this.recordBuffer = ByteBuffer.allocate(this.recordSize); 215 this.blockSize = blockSize; 216 this.lenient = lenient; 217 218 TarArchiveEntry entry; 219 while ((entry = getNextTarEntry()) != null) { 220 entries.add(entry); 221 } 222 } 223 224 /** 225 * Get the next entry in this tar archive. This will skip 226 * to the end of the current entry, if there is one, and 227 * place the position of the channel at the header of the 228 * next entry, and read the header and instantiate a new 229 * TarEntry from the header bytes and return that entry. 230 * If there are no more entries in the archive, null will 231 * be returned to indicate that the end of the archive has 232 * been reached. 233 * 234 * @return The next TarEntry in the archive, or null if there is no next entry. 235 * @throws IOException when reading the next TarEntry fails 236 */ 237 private TarArchiveEntry getNextTarEntry() throws IOException { 238 if (isAtEOF()) { 239 return null; 240 } 241 242 if (currEntry != null) { 243 // Skip to the end of the entry 244 repositionForwardTo(currEntry.getDataOffset() + currEntry.getSize()); 245 throwExceptionIfPositionIsNotInArchive(); 246 skipRecordPadding(); 247 } 248 249 final ByteBuffer headerBuf = getRecord(); 250 if (null == headerBuf) { 251 /* hit EOF */ 252 currEntry = null; 253 return null; 254 } 255 256 try { 257 currEntry = new TarArchiveEntry(headerBuf.array(), zipEncoding, lenient, archive.position()); 258 } catch (final IllegalArgumentException e) { 259 throw new IOException("Error detected parsing the header", e); 260 } 261 262 if (currEntry.isGNULongLinkEntry()) { 263 final byte[] longLinkData = getLongNameData(); 264 if (longLinkData == null) { 265 // Bugzilla: 40334 266 // Malformed tar file - long link entry name not followed by 267 // entry 268 return null; 269 } 270 currEntry.setLinkName(zipEncoding.decode(longLinkData)); 271 } 272 273 if (currEntry.isGNULongNameEntry()) { 274 final byte[] longNameData = getLongNameData(); 275 if (longNameData == null) { 276 // Bugzilla: 40334 277 // Malformed tar file - long entry name not followed by 278 // entry 279 return null; 280 } 281 282 // COMPRESS-509 : the name of directories should end with '/' 283 final String name = zipEncoding.decode(longNameData); 284 currEntry.setName(name); 285 if (currEntry.isDirectory() && !name.endsWith("/")) { 286 currEntry.setName(name + "/"); 287 } 288 } 289 290 if (currEntry.isGlobalPaxHeader()) { // Process Global Pax headers 291 readGlobalPaxHeaders(); 292 } 293 294 try { 295 if (currEntry.isPaxHeader()) { // Process Pax headers 296 paxHeaders(); 297 } else if (!globalPaxHeaders.isEmpty()) { 298 applyPaxHeadersToCurrentEntry(globalPaxHeaders, globalSparseHeaders); 299 } 300 } catch (NumberFormatException e) { 301 throw new IOException("Error detected parsing the pax header", e); 302 } 303 304 if (currEntry.isOldGNUSparse()) { // Process sparse files 305 readOldGNUSparse(); 306 } 307 308 return currEntry; 309 } 310 311 /** 312 * Adds the sparse chunks from the current entry to the sparse chunks, 313 * including any additional sparse entries following the current entry. 314 * 315 * @throws IOException when reading the sparse entry fails 316 */ 317 private void readOldGNUSparse() throws IOException { 318 if (currEntry.isExtended()) { 319 TarArchiveSparseEntry entry; 320 do { 321 final ByteBuffer headerBuf = getRecord(); 322 if (headerBuf == null) { 323 throw new IOException("premature end of tar archive. Didn't find extended_header after header with extended flag."); 324 } 325 entry = new TarArchiveSparseEntry(headerBuf.array()); 326 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); 327 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 328 } while (entry.isExtended()); 329 } 330 331 // sparse headers are all done reading, we need to build 332 // sparse input streams using these sparse headers 333 buildSparseInputStreams(); 334 } 335 336 /** 337 * Build the input streams consisting of all-zero input streams and non-zero input streams. 338 * When reading from the non-zero input streams, the data is actually read from the original input stream. 339 * The size of each input stream is introduced by the sparse headers. 340 * 341 * @implNote Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the 342 * 0 size input streams because they are meaningless. 343 */ 344 private void buildSparseInputStreams() throws IOException { 345 final List<InputStream> streams = new ArrayList<>(); 346 347 final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); 348 349 // Stream doesn't need to be closed at all as it doesn't use any resources 350 final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR 351 // logical offset into the extracted entry 352 long offset = 0; 353 long numberOfZeroBytesInSparseEntry = 0; 354 for (TarArchiveStructSparse sparseHeader : sparseHeaders) { 355 final long zeroBlockSize = sparseHeader.getOffset() - offset; 356 if (zeroBlockSize < 0) { 357 // sparse header says to move backwards inside of the extracted entry 358 throw new IOException("Corrupted struct sparse detected"); 359 } 360 361 // only store the zero block if it is not empty 362 if (zeroBlockSize > 0) { 363 streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize)); 364 numberOfZeroBytesInSparseEntry += zeroBlockSize; 365 } 366 367 // only store the input streams with non-zero size 368 if (sparseHeader.getNumbytes() > 0) { 369 final long start = 370 currEntry.getDataOffset() + sparseHeader.getOffset() - numberOfZeroBytesInSparseEntry; 371 if (start + sparseHeader.getNumbytes() < start) { 372 // possible integer overflow 373 throw new IOException("Unreadable TAR archive, sparse block offset or length too big"); 374 } 375 streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive)); 376 } 377 378 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); 379 } 380 381 sparseInputStreams.put(currEntry.getName(), streams); 382 } 383 384 /** 385 * Update the current entry with the read pax headers 386 * @param headers Headers read from the pax header 387 * @param sparseHeaders Sparse headers read from pax header 388 */ 389 private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) 390 throws IOException { 391 currEntry.updateEntryFromPaxHeaders(headers); 392 currEntry.setSparseHeaders(sparseHeaders); 393 } 394 395 /** 396 * <p> 397 * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) 398 * may appear multi times, and they look like: 399 * <pre> 400 * GNU.sparse.size=size 401 * GNU.sparse.numblocks=numblocks 402 * repeat numblocks times 403 * GNU.sparse.offset=offset 404 * GNU.sparse.numbytes=numbytes 405 * end repeat 406 * </pre> 407 * 408 * <p> 409 * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map 410 * <pre> 411 * GNU.sparse.map 412 * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" 413 * </pre> 414 * 415 * <p> 416 * For PAX Format 1.X: 417 * <br> 418 * The sparse map itself is stored in the file data block, preceding the actual file data. 419 * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. 420 * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers 421 * giving the offset and size of the data block it describes. 422 * @throws IOException 423 */ 424 private void paxHeaders() throws IOException { 425 List<TarArchiveStructSparse> sparseHeaders = new ArrayList<>(); 426 final Map<String, String> headers; 427 try (final InputStream input = getInputStream(currEntry)) { 428 headers = TarUtils.parsePaxHeaders(input, sparseHeaders, globalPaxHeaders, currEntry.getSize()); 429 } 430 431 // for 0.1 PAX Headers 432 if (headers.containsKey("GNU.sparse.map")) { 433 sparseHeaders = new ArrayList<>(TarUtils.parseFromPAX01SparseHeaders(headers.get("GNU.sparse.map"))); 434 } 435 getNextTarEntry(); // Get the actual file entry 436 if (currEntry == null) { 437 throw new IOException("premature end of tar archive. Didn't find any entry after PAX header."); 438 } 439 applyPaxHeadersToCurrentEntry(headers, sparseHeaders); 440 441 // for 1.0 PAX Format, the sparse map is stored in the file data block 442 if (currEntry.isPaxGNU1XSparse()) { 443 try (final InputStream input = getInputStream(currEntry)) { 444 sparseHeaders = TarUtils.parsePAX1XSparseHeaders(input, recordSize); 445 } 446 currEntry.setSparseHeaders(sparseHeaders); 447 // data of the entry is after the pax gnu entry. So we need to update the data position once again 448 currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); 449 } 450 451 // sparse headers are all done reading, we need to build 452 // sparse input streams using these sparse headers 453 buildSparseInputStreams(); 454 } 455 456 private void readGlobalPaxHeaders() throws IOException { 457 try (InputStream input = getInputStream(currEntry)) { 458 globalPaxHeaders = TarUtils.parsePaxHeaders(input, globalSparseHeaders, globalPaxHeaders, 459 currEntry.getSize()); 460 } 461 getNextTarEntry(); // Get the actual file entry 462 463 if (currEntry == null) { 464 throw new IOException("Error detected parsing the pax header"); 465 } 466 } 467 468 /** 469 * Get the next entry in this tar archive as longname data. 470 * 471 * @return The next entry in the archive as longname data, or null. 472 * @throws IOException on error 473 */ 474 private byte[] getLongNameData() throws IOException { 475 final ByteArrayOutputStream longName = new ByteArrayOutputStream(); 476 int length; 477 try (final InputStream in = getInputStream(currEntry)) { 478 while ((length = in.read(smallBuf)) >= 0) { 479 longName.write(smallBuf, 0, length); 480 } 481 } 482 getNextTarEntry(); 483 if (currEntry == null) { 484 // Bugzilla: 40334 485 // Malformed tar file - long entry name not followed by entry 486 return null; 487 } 488 byte[] longNameData = longName.toByteArray(); 489 // remove trailing null terminator(s) 490 length = longNameData.length; 491 while (length > 0 && longNameData[length - 1] == 0) { 492 --length; 493 } 494 if (length != longNameData.length) { 495 final byte[] l = new byte[length]; 496 System.arraycopy(longNameData, 0, l, 0, length); 497 longNameData = l; 498 } 499 return longNameData; 500 } 501 502 /** 503 * The last record block should be written at the full size, so skip any 504 * additional space used to fill a record after an entry 505 * 506 * @throws IOException when skipping the padding of the record fails 507 */ 508 private void skipRecordPadding() throws IOException { 509 if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) { 510 final long numRecords = (currEntry.getSize() / recordSize) + 1; 511 final long padding = (numRecords * recordSize) - currEntry.getSize(); 512 repositionForwardBy(padding); 513 throwExceptionIfPositionIsNotInArchive(); 514 } 515 } 516 517 private void repositionForwardTo(final long newPosition) throws IOException { 518 final long currPosition = archive.position(); 519 if (newPosition < currPosition) { 520 throw new IOException("trying to move backwards inside of the archive"); 521 } 522 archive.position(newPosition); 523 } 524 525 private void repositionForwardBy(final long offset) throws IOException { 526 repositionForwardTo(archive.position() + offset); 527 } 528 529 /** 530 * Checks if the current position of the SeekableByteChannel is in the archive. 531 * @throws IOException If the position is not in the archive 532 */ 533 private void throwExceptionIfPositionIsNotInArchive() throws IOException { 534 if (archive.size() < archive.position()) { 535 throw new IOException("Truncated TAR archive"); 536 } 537 } 538 539 /** 540 * Get the next record in this tar archive. This will skip 541 * over any remaining data in the current entry, if there 542 * is one, and place the input stream at the header of the 543 * next entry. 544 * 545 * <p>If there are no more entries in the archive, null will be 546 * returned to indicate that the end of the archive has been 547 * reached. At the same time the {@code hasHitEOF} marker will be 548 * set to true.</p> 549 * 550 * @return The next TarEntry in the archive, or null if there is no next entry. 551 * @throws IOException when reading the next TarEntry fails 552 */ 553 private ByteBuffer getRecord() throws IOException { 554 ByteBuffer headerBuf = readRecord(); 555 setAtEOF(isEOFRecord(headerBuf)); 556 if (isAtEOF() && headerBuf != null) { 557 // Consume rest 558 tryToConsumeSecondEOFRecord(); 559 consumeRemainderOfLastBlock(); 560 headerBuf = null; 561 } 562 return headerBuf; 563 } 564 565 /** 566 * Tries to read the next record resetting the position in the 567 * archive if it is not a EOF record. 568 * 569 * <p>This is meant to protect against cases where a tar 570 * implementation has written only one EOF record when two are 571 * expected. Actually this won't help since a non-conforming 572 * implementation likely won't fill full blocks consisting of - by 573 * default - ten records either so we probably have already read 574 * beyond the archive anyway.</p> 575 * 576 * @throws IOException if reading the record of resetting the position in the archive fails 577 */ 578 private void tryToConsumeSecondEOFRecord() throws IOException { 579 boolean shouldReset = true; 580 try { 581 shouldReset = !isEOFRecord(readRecord()); 582 } finally { 583 if (shouldReset) { 584 archive.position(archive.position() - recordSize); 585 } 586 } 587 } 588 589 /** 590 * This method is invoked once the end of the archive is hit, it 591 * tries to consume the remaining bytes under the assumption that 592 * the tool creating this archive has padded the last block. 593 */ 594 private void consumeRemainderOfLastBlock() throws IOException { 595 final long bytesReadOfLastBlock = archive.position() % blockSize; 596 if (bytesReadOfLastBlock > 0) { 597 repositionForwardBy(blockSize - bytesReadOfLastBlock); 598 } 599 } 600 601 /** 602 * Read a record from the input stream and return the data. 603 * 604 * @return The record data or null if EOF has been hit. 605 * @throws IOException if reading from the archive fails 606 */ 607 private ByteBuffer readRecord() throws IOException { 608 recordBuffer.rewind(); 609 final int readNow = archive.read(recordBuffer); 610 if (readNow != recordSize) { 611 return null; 612 } 613 return recordBuffer; 614 } 615 616 /** 617 * Get all TAR Archive Entries from the TarFile 618 * 619 * @return All entries from the tar file 620 */ 621 public List<TarArchiveEntry> getEntries() { 622 return new ArrayList<>(entries); 623 } 624 625 private boolean isEOFRecord(final ByteBuffer headerBuf) { 626 return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize); 627 } 628 629 protected final boolean isAtEOF() { 630 return hasHitEOF; 631 } 632 633 protected final void setAtEOF(final boolean b) { 634 hasHitEOF = b; 635 } 636 637 private boolean isDirectory() { 638 return currEntry != null && currEntry.isDirectory(); 639 } 640 641 /** 642 * Gets the input stream for the provided Tar Archive Entry. 643 * @param entry Entry to get the input stream from 644 * @return Input stream of the provided entry 645 * @throws IOException Corrupted TAR archive. Can't read entry. 646 */ 647 public InputStream getInputStream(final TarArchiveEntry entry) throws IOException { 648 try { 649 return new BoundedTarEntryInputStream(entry, archive); 650 } catch (RuntimeException ex) { 651 throw new IOException("Corrupted TAR archive. Can't read entry", ex); 652 } 653 } 654 655 @Override 656 public void close() throws IOException { 657 archive.close(); 658 } 659 660 private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream { 661 662 private final SeekableByteChannel channel; 663 664 private final TarArchiveEntry entry; 665 666 private long entryOffset; 667 668 private int currentSparseInputStreamIndex; 669 670 BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException { 671 super(entry.getDataOffset(), entry.getRealSize()); 672 if (channel.size() - entry.getSize() < entry.getDataOffset()) { 673 throw new IOException("entry size exceeds archive size"); 674 } 675 this.entry = entry; 676 this.channel = channel; 677 } 678 679 @Override 680 protected int read(final long pos, final ByteBuffer buf) throws IOException { 681 if (entryOffset >= entry.getRealSize()) { 682 return -1; 683 } 684 685 final int totalRead; 686 if (entry.isSparse()) { 687 totalRead = readSparse(entryOffset, buf, buf.limit()); 688 } else { 689 totalRead = readArchive(pos, buf); 690 } 691 692 if (totalRead == -1) { 693 if (buf.array().length > 0) { 694 throw new IOException("Truncated TAR archive"); 695 } 696 setAtEOF(true); 697 } else { 698 entryOffset += totalRead; 699 buf.flip(); 700 } 701 return totalRead; 702 } 703 704 private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException { 705 // if there are no actual input streams, just read from the original archive 706 final List<InputStream> entrySparseInputStreams = sparseInputStreams.get(entry.getName()); 707 if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) { 708 return readArchive(entry.getDataOffset() + pos, buf); 709 } 710 711 if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) { 712 return -1; 713 } 714 715 final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex); 716 final byte[] bufArray = new byte[numToRead]; 717 final int readLen = currentInputStream.read(bufArray); 718 if (readLen != -1) { 719 buf.put(bufArray, 0, readLen); 720 } 721 722 // if the current input stream is the last input stream, 723 // just return the number of bytes read from current input stream 724 if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) { 725 return readLen; 726 } 727 728 // if EOF of current input stream is meet, open a new input stream and recursively call read 729 if (readLen == -1) { 730 currentSparseInputStreamIndex++; 731 return readSparse(pos, buf, numToRead); 732 } 733 734 // if the rest data of current input stream is not long enough, open a new input stream 735 // and recursively call read 736 if (readLen < numToRead) { 737 currentSparseInputStreamIndex++; 738 final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen); 739 if (readLenOfNext == -1) { 740 return readLen; 741 } 742 743 return readLen + readLenOfNext; 744 } 745 746 // if the rest data of current input stream is enough(which means readLen == len), just return readLen 747 return readLen; 748 } 749 750 private int readArchive(final long pos, final ByteBuffer buf) throws IOException { 751 channel.position(pos); 752 return channel.read(buf); 753 } 754 } 755}