001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.ByteArrayInputStream; 022import java.io.Closeable; 023import java.io.EOFException; 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.SequenceInputStream; 028import java.nio.Buffer; 029import java.nio.ByteBuffer; 030import java.nio.channels.FileChannel; 031import java.nio.channels.SeekableByteChannel; 032import java.nio.file.Files; 033import java.nio.file.StandardOpenOption; 034import java.util.Arrays; 035import java.util.Collections; 036import java.util.Comparator; 037import java.util.Enumeration; 038import java.util.EnumSet; 039import java.util.HashMap; 040import java.util.LinkedList; 041import java.util.List; 042import java.util.Map; 043import java.util.zip.Inflater; 044import java.util.zip.ZipException; 045 046import org.apache.commons.compress.archivers.EntryStreamOffsets; 047import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 048import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 049import org.apache.commons.compress.utils.BoundedArchiveInputStream; 050import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 051import org.apache.commons.compress.utils.CountingInputStream; 052import org.apache.commons.compress.utils.IOUtils; 053import org.apache.commons.compress.utils.InputStreamStatistics; 054 055import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 056import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 057import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 058import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 059import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 060 061/** 062 * Replacement for <code>java.util.ZipFile</code>. 063 * 064 * <p>This class adds support for file name encodings other than UTF-8 065 * (which is required to work on ZIP files created by native zip tools 066 * and is able to skip a preamble like the one found in self 067 * extracting archives. Furthermore it returns instances of 068 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 069 * instead of <code>java.util.zip.ZipEntry</code>.</p> 070 * 071 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 072 * have to reimplement all methods anyway. Like 073 * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the 074 * covers and supports compressed and uncompressed entries. As of 075 * Apache Commons Compress 1.3 it also transparently supports Zip64 076 * extensions and thus individual entries and archives larger than 4 077 * GB or with more than 65536 entries.</p> 078 * 079 * <p>The method signatures mimic the ones of 080 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 081 * 082 * <ul> 083 * <li>There is no getName method.</li> 084 * <li>entries has been renamed to getEntries.</li> 085 * <li>getEntries and getEntry return 086 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 087 * instances.</li> 088 * <li>close is allowed to throw IOException.</li> 089 * </ul> 090 * 091 */ 092public class ZipFile implements Closeable { 093 private static final int HASH_SIZE = 509; 094 static final int NIBLET_MASK = 0x0f; 095 static final int BYTE_SHIFT = 8; 096 private static final int POS_0 = 0; 097 private static final int POS_1 = 1; 098 private static final int POS_2 = 2; 099 private static final int POS_3 = 3; 100 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 101 102 /** 103 * List of entries in the order they appear inside the central 104 * directory. 105 */ 106 private final List<ZipArchiveEntry> entries = 107 new LinkedList<>(); 108 109 /** 110 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 111 */ 112 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 113 new HashMap<>(HASH_SIZE); 114 115 /** 116 * The encoding to use for file names and the file comment. 117 * 118 * <p>For a list of possible values see <a 119 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 120 * Defaults to UTF-8.</p> 121 */ 122 private final String encoding; 123 124 /** 125 * The zip encoding to use for file names and the file comment. 126 */ 127 private final ZipEncoding zipEncoding; 128 129 /** 130 * File name of actual source. 131 */ 132 private final String archiveName; 133 134 /** 135 * The actual data source. 136 */ 137 private final SeekableByteChannel archive; 138 139 /** 140 * Whether to look for and use Unicode extra fields. 141 */ 142 private final boolean useUnicodeExtraFields; 143 144 /** 145 * Whether the file is closed. 146 */ 147 private volatile boolean closed = true; 148 149 /** 150 * Whether the zip archive is a split zip archive 151 */ 152 private final boolean isSplitZipArchive; 153 154 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 155 private final byte[] dwordBuf = new byte[DWORD]; 156 private final byte[] wordBuf = new byte[WORD]; 157 private final byte[] cfhBuf = new byte[CFH_LEN]; 158 private final byte[] shortBuf = new byte[SHORT]; 159 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 160 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 161 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 162 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 163 164 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 165 private long centralDirectoryStartOffset; 166 167 /** 168 * Opens the given file for reading, assuming "UTF8" for file names. 169 * 170 * @param f the archive. 171 * 172 * @throws IOException if an error occurs while reading the file. 173 */ 174 public ZipFile(final File f) throws IOException { 175 this(f, ZipEncodingHelper.UTF8); 176 } 177 178 /** 179 * Opens the given file for reading, assuming "UTF8". 180 * 181 * @param name name of the archive. 182 * 183 * @throws IOException if an error occurs while reading the file. 184 */ 185 public ZipFile(final String name) throws IOException { 186 this(new File(name), ZipEncodingHelper.UTF8); 187 } 188 189 /** 190 * Opens the given file for reading, assuming the specified 191 * encoding for file names, scanning unicode extra fields. 192 * 193 * @param name name of the archive. 194 * @param encoding the encoding to use for file names, use null 195 * for the platform's default encoding 196 * 197 * @throws IOException if an error occurs while reading the file. 198 */ 199 public ZipFile(final String name, final String encoding) throws IOException { 200 this(new File(name), encoding, true); 201 } 202 203 /** 204 * Opens the given file for reading, assuming the specified 205 * encoding for file names and scanning for unicode extra fields. 206 * 207 * @param f the archive. 208 * @param encoding the encoding to use for file names, use null 209 * for the platform's default encoding 210 * 211 * @throws IOException if an error occurs while reading the file. 212 */ 213 public ZipFile(final File f, final String encoding) throws IOException { 214 this(f, encoding, true); 215 } 216 217 /** 218 * Opens the given file for reading, assuming the specified 219 * encoding for file names. 220 * 221 * @param f the archive. 222 * @param encoding the encoding to use for file names, use null 223 * for the platform's default encoding 224 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 225 * Extra Fields (if present) to set the file names. 226 * 227 * @throws IOException if an error occurs while reading the file. 228 */ 229 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) 230 throws IOException { 231 this(f, encoding, useUnicodeExtraFields, false); 232 } 233 234 /** 235 * Opens the given file for reading, assuming the specified 236 * encoding for file names. 237 * 238 * 239 * <p>By default the central directory record and all local file headers of the archive will be read immediately 240 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 241 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 242 * may contain information not present inside of the central directory which will not be available when the argument 243 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 244 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 245 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 246 * true}.</p> 247 * 248 * @param f the archive. 249 * @param encoding the encoding to use for file names, use null 250 * for the platform's default encoding 251 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 252 * Extra Fields (if present) to set the file names. 253 * @param ignoreLocalFileHeader whether to ignore information 254 * stored inside the local file header (see the notes in this method's javadoc) 255 * 256 * @throws IOException if an error occurs while reading the file. 257 * @since 1.19 258 */ 259 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, 260 final boolean ignoreLocalFileHeader) 261 throws IOException { 262 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), 263 f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, ignoreLocalFileHeader); 264 } 265 266 /** 267 * Opens the given channel for reading, assuming "UTF8" for file names. 268 * 269 * <p>{@link 270 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 271 * allows you to read from an in-memory archive.</p> 272 * 273 * @param channel the archive. 274 * 275 * @throws IOException if an error occurs while reading the file. 276 * @since 1.13 277 */ 278 public ZipFile(final SeekableByteChannel channel) 279 throws IOException { 280 this(channel, "unknown archive", ZipEncodingHelper.UTF8, true); 281 } 282 283 /** 284 * Opens the given channel for reading, assuming the specified 285 * encoding for file names. 286 * 287 * <p>{@link 288 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 289 * allows you to read from an in-memory archive.</p> 290 * 291 * @param channel the archive. 292 * @param encoding the encoding to use for file names, use null 293 * for the platform's default encoding 294 * 295 * @throws IOException if an error occurs while reading the file. 296 * @since 1.13 297 */ 298 public ZipFile(final SeekableByteChannel channel, final String encoding) 299 throws IOException { 300 this(channel, "unknown archive", encoding, true); 301 } 302 303 /** 304 * Opens the given channel for reading, assuming the specified 305 * encoding for file names. 306 * 307 * <p>{@link 308 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 309 * allows you to read from an in-memory archive.</p> 310 * 311 * @param channel the archive. 312 * @param archiveName name of the archive, used for error messages only. 313 * @param encoding the encoding to use for file names, use null 314 * for the platform's default encoding 315 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 316 * Extra Fields (if present) to set the file names. 317 * 318 * @throws IOException if an error occurs while reading the file. 319 * @since 1.13 320 */ 321 public ZipFile(final SeekableByteChannel channel, final String archiveName, 322 final String encoding, final boolean useUnicodeExtraFields) 323 throws IOException { 324 this(channel, archiveName, encoding, useUnicodeExtraFields, false, false); 325 } 326 327 /** 328 * Opens the given channel for reading, assuming the specified 329 * encoding for file names. 330 * 331 * <p>{@link 332 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 333 * allows you to read from an in-memory archive.</p> 334 * 335 * <p>By default the central directory record and all local file headers of the archive will be read immediately 336 * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter 337 * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header 338 * may contain information not present inside of the central directory which will not be available when the argument 339 * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 340 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also 341 * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code 342 * true}.</p> 343 * 344 * @param channel the archive. 345 * @param archiveName name of the archive, used for error messages only. 346 * @param encoding the encoding to use for file names, use null 347 * for the platform's default encoding 348 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 349 * Extra Fields (if present) to set the file names. 350 * @param ignoreLocalFileHeader whether to ignore information 351 * stored inside the local file header (see the notes in this method's javadoc) 352 * 353 * @throws IOException if an error occurs while reading the file. 354 * @since 1.19 355 */ 356 public ZipFile(final SeekableByteChannel channel, final String archiveName, 357 final String encoding, final boolean useUnicodeExtraFields, 358 final boolean ignoreLocalFileHeader) 359 throws IOException { 360 this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 361 } 362 363 private ZipFile(final SeekableByteChannel channel, final String archiveName, 364 final String encoding, final boolean useUnicodeExtraFields, 365 final boolean closeOnError, final boolean ignoreLocalFileHeader) 366 throws IOException { 367 isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel); 368 369 this.archiveName = archiveName; 370 this.encoding = encoding; 371 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 372 this.useUnicodeExtraFields = useUnicodeExtraFields; 373 archive = channel; 374 boolean success = false; 375 try { 376 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 377 populateFromCentralDirectory(); 378 if (!ignoreLocalFileHeader) { 379 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 380 } 381 fillNameMap(); 382 success = true; 383 } catch (final IOException e) { 384 throw new IOException("Error on ZipFile " + archiveName, e); 385 } finally { 386 closed = !success; 387 if (!success && closeOnError) { 388 IOUtils.closeQuietly(archive); 389 } 390 } 391 } 392 393 /** 394 * The encoding to use for file names and the file comment. 395 * 396 * @return null if using the platform's default character encoding. 397 */ 398 public String getEncoding() { 399 return encoding; 400 } 401 402 /** 403 * Closes the archive. 404 * @throws IOException if an error occurs closing the archive. 405 */ 406 @Override 407 public void close() throws IOException { 408 // this flag is only written here and read in finalize() which 409 // can never be run in parallel. 410 // no synchronization needed. 411 closed = true; 412 413 archive.close(); 414 } 415 416 /** 417 * close a zipfile quietly; throw no io fault, do nothing 418 * on a null parameter 419 * @param zipfile file to close, can be null 420 */ 421 public static void closeQuietly(final ZipFile zipfile) { 422 IOUtils.closeQuietly(zipfile); 423 } 424 425 /** 426 * Returns all entries. 427 * 428 * <p>Entries will be returned in the same order they appear 429 * within the archive's central directory.</p> 430 * 431 * @return all entries as {@link ZipArchiveEntry} instances 432 */ 433 public Enumeration<ZipArchiveEntry> getEntries() { 434 return Collections.enumeration(entries); 435 } 436 437 /** 438 * Returns all entries in physical order. 439 * 440 * <p>Entries will be returned in the same order their contents 441 * appear within the archive.</p> 442 * 443 * @return all entries as {@link ZipArchiveEntry} instances 444 * 445 * @since 1.1 446 */ 447 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 448 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY); 449 Arrays.sort(allEntries, offsetComparator); 450 return Collections.enumeration(Arrays.asList(allEntries)); 451 } 452 453 /** 454 * Returns a named entry - or {@code null} if no entry by 455 * that name exists. 456 * 457 * <p>If multiple entries with the same name exist the first entry 458 * in the archive's central directory by that name is 459 * returned.</p> 460 * 461 * @param name name of the entry. 462 * @return the ZipArchiveEntry corresponding to the given name - or 463 * {@code null} if not present. 464 */ 465 public ZipArchiveEntry getEntry(final String name) { 466 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 467 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 468 } 469 470 /** 471 * Returns all named entries in the same order they appear within 472 * the archive's central directory. 473 * 474 * @param name name of the entry. 475 * @return the Iterable<ZipArchiveEntry> corresponding to the 476 * given name 477 * @since 1.6 478 */ 479 public Iterable<ZipArchiveEntry> getEntries(final String name) { 480 final List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 481 return entriesOfThatName != null ? entriesOfThatName 482 : Collections.emptyList(); 483 } 484 485 /** 486 * Returns all named entries in the same order their contents 487 * appear within the archive. 488 * 489 * @param name name of the entry. 490 * @return the Iterable<ZipArchiveEntry> corresponding to the 491 * given name 492 * @since 1.6 493 */ 494 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 495 ZipArchiveEntry[] entriesOfThatName = ZipArchiveEntry.EMPTY_ZIP_ARCHIVE_ENTRY_ARRAY; 496 if (nameMap.containsKey(name)) { 497 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 498 Arrays.sort(entriesOfThatName, offsetComparator); 499 } 500 return Arrays.asList(entriesOfThatName); 501 } 502 503 /** 504 * Whether this class is able to read the given entry. 505 * 506 * <p>May return false if it is set up to use encryption or a 507 * compression method that hasn't been implemented yet.</p> 508 * @since 1.1 509 * @param ze the entry 510 * @return whether this class is able to read the given entry. 511 */ 512 public boolean canReadEntryData(final ZipArchiveEntry ze) { 513 return ZipUtil.canHandleEntryData(ze); 514 } 515 516 /** 517 * Expose the raw stream of the archive entry (compressed form). 518 * 519 * <p>This method does not relate to how/if we understand the payload in the 520 * stream, since we really only intend to move it on to somewhere else.</p> 521 * 522 * @param ze The entry to get the stream for 523 * @return The raw input stream containing (possibly) compressed data. 524 * @since 1.11 525 */ 526 public InputStream getRawInputStream(final ZipArchiveEntry ze) { 527 if (!(ze instanceof Entry)) { 528 return null; 529 } 530 final long start = ze.getDataOffset(); 531 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 532 return null; 533 } 534 return createBoundedInputStream(start, ze.getCompressedSize()); 535 } 536 537 538 /** 539 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 540 * Compression and all other attributes will be as in this file. 541 * <p>This method transfers entries based on the central directory of the zip file.</p> 542 * 543 * @param target The zipArchiveOutputStream to write the entries to 544 * @param predicate A predicate that selects which entries to write 545 * @throws IOException on error 546 */ 547 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) 548 throws IOException { 549 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 550 while (src.hasMoreElements()) { 551 final ZipArchiveEntry entry = src.nextElement(); 552 if (predicate.test( entry)) { 553 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 554 } 555 } 556 } 557 558 /** 559 * Returns an InputStream for reading the contents of the given entry. 560 * 561 * @param ze the entry to get the stream for. 562 * @return a stream to read the entry from. The returned stream 563 * implements {@link InputStreamStatistics}. 564 * @throws IOException if unable to create an input stream from the zipentry 565 */ 566 public InputStream getInputStream(final ZipArchiveEntry ze) 567 throws IOException { 568 if (!(ze instanceof Entry)) { 569 return null; 570 } 571 // cast validity is checked just above 572 ZipUtil.checkRequestedFeatures(ze); 573 final long start = getDataOffset(ze); 574 575 // doesn't get closed if the method is not supported - which 576 // should never happen because of the checkRequestedFeatures 577 // call above 578 final InputStream is = 579 new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR 580 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 581 case STORED: 582 return new StoredStatisticsStream(is); 583 case UNSHRINKING: 584 return new UnshrinkingInputStream(is); 585 case IMPLODING: 586 try { 587 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 588 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 589 } catch (final IllegalArgumentException ex) { 590 throw new IOException("bad IMPLODE data", ex); 591 } 592 case DEFLATED: 593 final Inflater inflater = new Inflater(true); 594 // Inflater with nowrap=true has this odd contract for a zero padding 595 // byte following the data stream; this used to be zlib's requirement 596 // and has been fixed a long time ago, but the contract persists so 597 // we comply. 598 // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 599 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), 600 inflater) { 601 @Override 602 public void close() throws IOException { 603 try { 604 super.close(); 605 } finally { 606 inflater.end(); 607 } 608 } 609 }; 610 case BZIP2: 611 return new BZip2CompressorInputStream(is); 612 case ENHANCED_DEFLATED: 613 return new Deflate64CompressorInputStream(is); 614 case AES_ENCRYPTED: 615 case EXPANDING_LEVEL_1: 616 case EXPANDING_LEVEL_2: 617 case EXPANDING_LEVEL_3: 618 case EXPANDING_LEVEL_4: 619 case JPEG: 620 case LZMA: 621 case PKWARE_IMPLODING: 622 case PPMD: 623 case TOKENIZATION: 624 case UNKNOWN: 625 case WAVPACK: 626 case XZ: 627 default: 628 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(ze.getMethod()), ze); 629 } 630 } 631 632 /** 633 * <p> 634 * Convenience method to return the entry's content as a String if isUnixSymlink() 635 * returns true for it, otherwise returns null. 636 * </p> 637 * 638 * <p>This method assumes the symbolic link's file name uses the 639 * same encoding that as been specified for this ZipFile.</p> 640 * 641 * @param entry ZipArchiveEntry object that represents the symbolic link 642 * @return entry's content as a String 643 * @throws IOException problem with content's input stream 644 * @since 1.5 645 */ 646 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 647 if (entry != null && entry.isUnixSymlink()) { 648 try (InputStream in = getInputStream(entry)) { 649 return zipEncoding.decode(IOUtils.toByteArray(in)); 650 } 651 } 652 return null; 653 } 654 655 /** 656 * Ensures that the close method of this zipfile is called when 657 * there are no more references to it. 658 * @see #close() 659 */ 660 @Override 661 protected void finalize() throws Throwable { 662 try { 663 if (!closed) { 664 System.err.println("Cleaning up unclosed ZipFile for archive " 665 + archiveName); 666 close(); 667 } 668 } finally { 669 super.finalize(); 670 } 671 } 672 673 /** 674 * Length of a "central directory" entry structure without file 675 * name, extra fields or comment. 676 */ 677 private static final int CFH_LEN = 678 /* version made by */ SHORT 679 /* version needed to extract */ + SHORT 680 /* general purpose bit flag */ + SHORT 681 /* compression method */ + SHORT 682 /* last mod file time */ + SHORT 683 /* last mod file date */ + SHORT 684 /* crc-32 */ + WORD 685 /* compressed size */ + WORD 686 /* uncompressed size */ + WORD 687 /* file name length */ + SHORT 688 /* extra field length */ + SHORT 689 /* file comment length */ + SHORT 690 /* disk number start */ + SHORT 691 /* internal file attributes */ + SHORT 692 /* external file attributes */ + WORD 693 /* relative offset of local header */ + WORD; 694 695 private static final long CFH_SIG = 696 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 697 698 /** 699 * Reads the central directory of the given archive and populates 700 * the internal tables with ZipArchiveEntry instances. 701 * 702 * <p>The ZipArchiveEntrys will know all data that can be obtained from 703 * the central directory alone, but not the data that requires the 704 * local file header or additional data to be read.</p> 705 * 706 * @return a map of zipentries that didn't have the language 707 * encoding flag set when read. 708 */ 709 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 710 throws IOException { 711 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 712 new HashMap<>(); 713 714 positionAtCentralDirectory(); 715 centralDirectoryStartOffset = archive.position(); 716 717 ((Buffer)wordBbuf).rewind(); 718 IOUtils.readFully(archive, wordBbuf); 719 long sig = ZipLong.getValue(wordBuf); 720 721 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 722 throw new IOException("Central directory is empty, can't expand" 723 + " corrupt archive."); 724 } 725 726 while (sig == CFH_SIG) { 727 readCentralDirectoryEntry(noUTF8Flag); 728 ((Buffer)wordBbuf).rewind(); 729 IOUtils.readFully(archive, wordBbuf); 730 sig = ZipLong.getValue(wordBuf); 731 } 732 return noUTF8Flag; 733 } 734 735 /** 736 * Reads an individual entry of the central directory, creats an 737 * ZipArchiveEntry from it and adds it to the global maps. 738 * 739 * @param noUTF8Flag map used to collect entries that don't have 740 * their UTF-8 flag set and whose name will be set by data read 741 * from the local file header later. The current entry may be 742 * added to this map. 743 */ 744 private void 745 readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 746 throws IOException { 747 ((Buffer)cfhBbuf).rewind(); 748 IOUtils.readFully(archive, cfhBbuf); 749 int off = 0; 750 final Entry ze = new Entry(); 751 752 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 753 off += SHORT; 754 ze.setVersionMadeBy(versionMadeBy); 755 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 756 757 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 758 off += SHORT; // version required 759 760 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 761 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 762 final ZipEncoding entryEncoding = 763 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 764 if (hasUTF8Flag) { 765 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 766 } 767 ze.setGeneralPurposeBit(gpFlag); 768 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 769 770 off += SHORT; 771 772 //noinspection MagicConstant 773 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 774 off += SHORT; 775 776 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 777 ze.setTime(time); 778 off += WORD; 779 780 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 781 off += WORD; 782 783 long size = ZipLong.getValue(cfhBuf, off); 784 if (size < 0) { 785 throw new IOException("broken archive, entry with negative compressed size"); 786 } 787 ze.setCompressedSize(size); 788 off += WORD; 789 790 size = ZipLong.getValue(cfhBuf, off); 791 if (size < 0) { 792 throw new IOException("broken archive, entry with negative size"); 793 } 794 ze.setSize(size); 795 off += WORD; 796 797 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 798 off += SHORT; 799 if (fileNameLen < 0) { 800 throw new IOException("broken archive, entry with negative fileNameLen"); 801 } 802 803 final int extraLen = ZipShort.getValue(cfhBuf, off); 804 off += SHORT; 805 if (extraLen < 0) { 806 throw new IOException("broken archive, entry with negative extraLen"); 807 } 808 809 final int commentLen = ZipShort.getValue(cfhBuf, off); 810 off += SHORT; 811 if (commentLen < 0) { 812 throw new IOException("broken archive, entry with negative commentLen"); 813 } 814 815 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 816 off += SHORT; 817 818 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 819 off += SHORT; 820 821 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 822 off += WORD; 823 824 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 825 if (fileName.length < fileNameLen) { 826 throw new EOFException(); 827 } 828 ze.setName(entryEncoding.decode(fileName), fileName); 829 830 // LFH offset, 831 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off)); 832 // data offset will be filled later 833 entries.add(ze); 834 835 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 836 if (cdExtraData.length < extraLen) { 837 throw new EOFException(); 838 } 839 try { 840 ze.setCentralDirectoryExtra(cdExtraData); 841 } catch (RuntimeException ex) { 842 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 843 z.initCause(ex); 844 throw z; 845 } 846 847 setSizesAndOffsetFromZip64Extra(ze); 848 sanityCheckLFHOffset(ze); 849 850 final byte[] comment = IOUtils.readRange(archive, commentLen); 851 if (comment.length < commentLen) { 852 throw new EOFException(); 853 } 854 ze.setComment(entryEncoding.decode(comment)); 855 856 if (!hasUTF8Flag && useUnicodeExtraFields) { 857 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 858 } 859 860 ze.setStreamContiguous(true); 861 } 862 863 private void sanityCheckLFHOffset(final ZipArchiveEntry ze) throws IOException { 864 if (ze.getDiskNumberStart() < 0) { 865 throw new IOException("broken archive, entry with negative disk number"); 866 } 867 if (ze.getLocalHeaderOffset() < 0) { 868 throw new IOException("broken archive, entry with negative local file header offset"); 869 } 870 if (isSplitZipArchive) { 871 if (ze.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 872 throw new IOException("local file header for " + ze.getName() + " starts on a later disk than central directory"); 873 } 874 if (ze.getDiskNumberStart() == centralDirectoryStartDiskNumber 875 && ze.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 876 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 877 } 878 } else { 879 if (ze.getLocalHeaderOffset() > centralDirectoryStartOffset) { 880 throw new IOException("local file header for " + ze.getName() + " starts after central directory"); 881 } 882 } 883 } 884 885 /** 886 * If the entry holds a Zip64 extended information extra field, 887 * read sizes from there if the entry's sizes are set to 888 * 0xFFFFFFFFF, do the same for the offset of the local file 889 * header. 890 * 891 * <p>Ensures the Zip64 extra either knows both compressed and 892 * uncompressed size or neither of both as the internal logic in 893 * ExtraFieldUtils forces the field to create local header data 894 * even if they are never used - and here a field with only one 895 * size would be invalid.</p> 896 */ 897 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze) 898 throws IOException { 899 final ZipExtraField extra = 900 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 901 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 902 throw new ZipException("archive contains unparseable zip64 extra field"); 903 } 904 final Zip64ExtendedInformationExtraField z64 = 905 (Zip64ExtendedInformationExtraField) extra; 906 if (z64 != null) { 907 final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 908 final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 909 final boolean hasRelativeHeaderOffset = 910 ze.getLocalHeaderOffset() == ZIP64_MAGIC; 911 final boolean hasDiskStart = ze.getDiskNumberStart() == ZIP64_MAGIC_SHORT; 912 z64.reparseCentralDirectoryData(hasUncompressedSize, 913 hasCompressedSize, 914 hasRelativeHeaderOffset, 915 hasDiskStart); 916 917 if (hasUncompressedSize) { 918 final long size = z64.getSize().getLongValue(); 919 if (size < 0) { 920 throw new IOException("broken archive, entry with negative size"); 921 } 922 ze.setSize(size); 923 } else if (hasCompressedSize) { 924 z64.setSize(new ZipEightByteInteger(ze.getSize())); 925 } 926 927 if (hasCompressedSize) { 928 final long size = z64.getCompressedSize().getLongValue(); 929 if (size < 0) { 930 throw new IOException("broken archive, entry with negative compressed size"); 931 } 932 ze.setCompressedSize(size); 933 } else if (hasUncompressedSize) { 934 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 935 } 936 937 if (hasRelativeHeaderOffset) { 938 ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 939 } 940 941 if (hasDiskStart) { 942 ze.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 943 } 944 } 945 } 946 947 /** 948 * Length of the "End of central directory record" - which is 949 * supposed to be the last structure of the archive - without file 950 * comment. 951 */ 952 static final int MIN_EOCD_SIZE = 953 /* end of central dir signature */ WORD 954 /* number of this disk */ + SHORT 955 /* number of the disk with the */ 956 /* start of the central directory */ + SHORT 957 /* total number of entries in */ 958 /* the central dir on this disk */ + SHORT 959 /* total number of entries in */ 960 /* the central dir */ + SHORT 961 /* size of the central directory */ + WORD 962 /* offset of start of central */ 963 /* directory with respect to */ 964 /* the starting disk number */ + WORD 965 /* zipfile comment length */ + SHORT; 966 967 /** 968 * Maximum length of the "End of central directory record" with a 969 * file comment. 970 */ 971 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 972 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 973 974 /** 975 * Offset of the field that holds the location of the first 976 * central directory entry inside the "End of central directory 977 * record" relative to the start of the "End of central directory 978 * record". 979 */ 980 private static final int CFD_LOCATOR_OFFSET = 981 /* end of central dir signature */ WORD 982 /* number of this disk */ + SHORT 983 /* number of the disk with the */ 984 /* start of the central directory */ + SHORT 985 /* total number of entries in */ 986 /* the central dir on this disk */ + SHORT 987 /* total number of entries in */ 988 /* the central dir */ + SHORT 989 /* size of the central directory */ + WORD; 990 991 /** 992 * Offset of the field that holds the disk number of the first 993 * central directory entry inside the "End of central directory 994 * record" relative to the start of the "End of central directory 995 * record". 996 */ 997 private static final int CFD_DISK_OFFSET = 998 /* end of central dir signature */ WORD 999 /* number of this disk */ + SHORT; 1000 1001 /** 1002 * Offset of the field that holds the location of the first 1003 * central directory entry inside the "End of central directory 1004 * record" relative to the "number of the disk with the start 1005 * of the central directory". 1006 */ 1007 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 1008 /* total number of entries in */ 1009 /* the central dir on this disk */ + SHORT 1010 /* total number of entries in */ 1011 /* the central dir */ + SHORT 1012 /* size of the central directory */ + WORD; 1013 1014 /** 1015 * Length of the "Zip64 end of central directory locator" - which 1016 * should be right in front of the "end of central directory 1017 * record" if one is present at all. 1018 */ 1019 private static final int ZIP64_EOCDL_LENGTH = 1020 /* zip64 end of central dir locator sig */ WORD 1021 /* number of the disk with the start */ 1022 /* start of the zip64 end of */ 1023 /* central directory */ + WORD 1024 /* relative offset of the zip64 */ 1025 /* end of central directory record */ + DWORD 1026 /* total number of disks */ + WORD; 1027 1028 /** 1029 * Offset of the field that holds the location of the "Zip64 end 1030 * of central directory record" inside the "Zip64 end of central 1031 * directory locator" relative to the start of the "Zip64 end of 1032 * central directory locator". 1033 */ 1034 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 1035 /* zip64 end of central dir locator sig */ WORD 1036 /* number of the disk with the start */ 1037 /* start of the zip64 end of */ 1038 /* central directory */ + WORD; 1039 1040 /** 1041 * Offset of the field that holds the location of the first 1042 * central directory entry inside the "Zip64 end of central 1043 * directory record" relative to the start of the "Zip64 end of 1044 * central directory record". 1045 */ 1046 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 1047 /* zip64 end of central dir */ 1048 /* signature */ WORD 1049 /* size of zip64 end of central */ 1050 /* directory record */ + DWORD 1051 /* version made by */ + SHORT 1052 /* version needed to extract */ + SHORT 1053 /* number of this disk */ + WORD 1054 /* number of the disk with the */ 1055 /* start of the central directory */ + WORD 1056 /* total number of entries in the */ 1057 /* central directory on this disk */ + DWORD 1058 /* total number of entries in the */ 1059 /* central directory */ + DWORD 1060 /* size of the central directory */ + DWORD; 1061 1062 /** 1063 * Offset of the field that holds the disk number of the first 1064 * central directory entry inside the "Zip64 end of central 1065 * directory record" relative to the start of the "Zip64 end of 1066 * central directory record". 1067 */ 1068 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 1069 /* zip64 end of central dir */ 1070 /* signature */ WORD 1071 /* size of zip64 end of central */ 1072 /* directory record */ + DWORD 1073 /* version made by */ + SHORT 1074 /* version needed to extract */ + SHORT 1075 /* number of this disk */ + WORD; 1076 1077 /** 1078 * Offset of the field that holds the location of the first 1079 * central directory entry inside the "Zip64 end of central 1080 * directory record" relative to the "number of the disk 1081 * with the start of the central directory". 1082 */ 1083 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 1084 /* total number of entries in the */ 1085 /* central directory on this disk */ DWORD 1086 /* total number of entries in the */ 1087 /* central directory */ + DWORD 1088 /* size of the central directory */ + DWORD; 1089 1090 /** 1091 * Searches for either the "Zip64 end of central directory 1092 * locator" or the "End of central dir record", parses 1093 * it and positions the stream at the first central directory 1094 * record. 1095 */ 1096 private void positionAtCentralDirectory() 1097 throws IOException { 1098 positionAtEndOfCentralDirectoryRecord(); 1099 boolean found = false; 1100 final boolean searchedForZip64EOCD = 1101 archive.position() > ZIP64_EOCDL_LENGTH; 1102 if (searchedForZip64EOCD) { 1103 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 1104 ((Buffer)wordBbuf).rewind(); 1105 IOUtils.readFully(archive, wordBbuf); 1106 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 1107 wordBuf); 1108 } 1109 if (!found) { 1110 // not a ZIP64 archive 1111 if (searchedForZip64EOCD) { 1112 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 1113 } 1114 positionAtCentralDirectory32(); 1115 } else { 1116 positionAtCentralDirectory64(); 1117 } 1118 } 1119 1120 /** 1121 * Parses the "Zip64 end of central directory locator", 1122 * finds the "Zip64 end of central directory record" using the 1123 * parsed information, parses that and positions the stream at the 1124 * first central directory record. 1125 * 1126 * Expects stream to be positioned right behind the "Zip64 1127 * end of central directory locator"'s signature. 1128 */ 1129 private void positionAtCentralDirectory64() 1130 throws IOException { 1131 if (isSplitZipArchive) { 1132 ((Buffer)wordBbuf).rewind(); 1133 IOUtils.readFully(archive, wordBbuf); 1134 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1135 1136 ((Buffer)dwordBbuf).rewind(); 1137 IOUtils.readFully(archive, dwordBbuf); 1138 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1139 ((ZipSplitReadOnlySeekableByteChannel) archive) 1140 .position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1141 } else { 1142 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 1143 - WORD /* signature has already been read */); 1144 ((Buffer)dwordBbuf).rewind(); 1145 IOUtils.readFully(archive, dwordBbuf); 1146 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1147 } 1148 1149 ((Buffer)wordBbuf).rewind(); 1150 IOUtils.readFully(archive, wordBbuf); 1151 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1152 throw new ZipException("Archive's ZIP64 end of central " 1153 + "directory locator is corrupt."); 1154 } 1155 1156 if (isSplitZipArchive) { 1157 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET 1158 - WORD /* signature has already been read */); 1159 ((Buffer)wordBbuf).rewind(); 1160 IOUtils.readFully(archive, wordBbuf); 1161 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1162 1163 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1164 1165 ((Buffer)dwordBbuf).rewind(); 1166 IOUtils.readFully(archive, dwordBbuf); 1167 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1168 ((ZipSplitReadOnlySeekableByteChannel) archive) 1169 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1170 } else { 1171 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 1172 - WORD /* signature has already been read */); 1173 ((Buffer)dwordBbuf).rewind(); 1174 IOUtils.readFully(archive, dwordBbuf); 1175 centralDirectoryStartDiskNumber = 0; 1176 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1177 archive.position(centralDirectoryStartRelativeOffset); 1178 } 1179 } 1180 1181 /** 1182 * Parses the "End of central dir record" and positions 1183 * the stream at the first central directory record. 1184 * 1185 * Expects stream to be positioned at the beginning of the 1186 * "End of central dir record". 1187 */ 1188 private void positionAtCentralDirectory32() 1189 throws IOException { 1190 if (isSplitZipArchive) { 1191 skipBytes(CFD_DISK_OFFSET); 1192 ((Buffer)shortBbuf).rewind(); 1193 IOUtils.readFully(archive, shortBbuf); 1194 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1195 1196 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1197 1198 ((Buffer)wordBbuf).rewind(); 1199 IOUtils.readFully(archive, wordBbuf); 1200 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1201 ((ZipSplitReadOnlySeekableByteChannel) archive) 1202 .position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1203 } else { 1204 skipBytes(CFD_LOCATOR_OFFSET); 1205 ((Buffer)wordBbuf).rewind(); 1206 IOUtils.readFully(archive, wordBbuf); 1207 centralDirectoryStartDiskNumber = 0; 1208 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1209 archive.position(centralDirectoryStartRelativeOffset); 1210 } 1211 } 1212 1213 /** 1214 * Searches for the and positions the stream at the start of the 1215 * "End of central dir record". 1216 */ 1217 private void positionAtEndOfCentralDirectoryRecord() 1218 throws IOException { 1219 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 1220 ZipArchiveOutputStream.EOCD_SIG); 1221 if (!found) { 1222 throw new ZipException("Archive is not a ZIP archive"); 1223 } 1224 } 1225 1226 /** 1227 * Searches the archive backwards from minDistance to maxDistance 1228 * for the given signature, positions the RandomaccessFile right 1229 * at the signature if it has been found. 1230 */ 1231 private boolean tryToLocateSignature(final long minDistanceFromEnd, 1232 final long maxDistanceFromEnd, 1233 final byte[] sig) throws IOException { 1234 boolean found = false; 1235 long off = archive.size() - minDistanceFromEnd; 1236 final long stopSearching = 1237 Math.max(0L, archive.size() - maxDistanceFromEnd); 1238 if (off >= 0) { 1239 for (; off >= stopSearching; off--) { 1240 archive.position(off); 1241 try { 1242 ((Buffer)wordBbuf).rewind(); 1243 IOUtils.readFully(archive, wordBbuf); 1244 ((Buffer)wordBbuf).flip(); 1245 } catch (final EOFException ex) { // NOSONAR 1246 break; 1247 } 1248 int curr = wordBbuf.get(); 1249 if (curr == sig[POS_0]) { 1250 curr = wordBbuf.get(); 1251 if (curr == sig[POS_1]) { 1252 curr = wordBbuf.get(); 1253 if (curr == sig[POS_2]) { 1254 curr = wordBbuf.get(); 1255 if (curr == sig[POS_3]) { 1256 found = true; 1257 break; 1258 } 1259 } 1260 } 1261 } 1262 } 1263 } 1264 if (found) { 1265 archive.position(off); 1266 } 1267 return found; 1268 } 1269 1270 /** 1271 * Skips the given number of bytes or throws an EOFException if 1272 * skipping failed. 1273 */ 1274 private void skipBytes(final int count) throws IOException { 1275 final long currentPosition = archive.position(); 1276 final long newPosition = currentPosition + count; 1277 if (newPosition > archive.size()) { 1278 throw new EOFException(); 1279 } 1280 archive.position(newPosition); 1281 } 1282 1283 /** 1284 * Number of bytes in local file header up to the "length of 1285 * file name" entry. 1286 */ 1287 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 1288 /* local file header signature */ WORD 1289 /* version needed to extract */ + SHORT 1290 /* general purpose bit flag */ + SHORT 1291 /* compression method */ + SHORT 1292 /* last mod file time */ + SHORT 1293 /* last mod file date */ + SHORT 1294 /* crc-32 */ + WORD 1295 /* compressed size */ + WORD 1296 /* uncompressed size */ + (long) WORD; 1297 1298 /** 1299 * Walks through all recorded entries and adds the data available 1300 * from the local file header. 1301 * 1302 * <p>Also records the offsets for the data to read from the 1303 * entries.</p> 1304 */ 1305 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> 1306 entriesWithoutUTF8Flag) 1307 throws IOException { 1308 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1309 // entries is filled in populateFromCentralDirectory and 1310 // never modified 1311 final Entry ze = (Entry) zipArchiveEntry; 1312 final int[] lens = setDataOffset(ze); 1313 final int fileNameLen = lens[0]; 1314 final int extraFieldLen = lens[1]; 1315 skipBytes(fileNameLen); 1316 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1317 if (localExtraData.length < extraFieldLen) { 1318 throw new EOFException(); 1319 } 1320 try { 1321 ze.setExtra(localExtraData); 1322 } catch (RuntimeException ex) { 1323 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1324 z.initCause(ex); 1325 throw z; 1326 } 1327 1328 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1329 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1330 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 1331 nc.comment); 1332 } 1333 } 1334 } 1335 1336 private void fillNameMap() { 1337 for (final ZipArchiveEntry ze : entries) { 1338 // entries is filled in populateFromCentralDirectory and 1339 // never modified 1340 final String name = ze.getName(); 1341 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 1342 entriesOfThatName.addLast(ze); 1343 } 1344 } 1345 1346 private int[] setDataOffset(final ZipArchiveEntry ze) throws IOException { 1347 long offset = ze.getLocalHeaderOffset(); 1348 if (isSplitZipArchive) { 1349 ((ZipSplitReadOnlySeekableByteChannel) archive) 1350 .position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1351 // the offset should be updated to the global offset 1352 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1353 } else { 1354 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1355 } 1356 ((Buffer)wordBbuf).rewind(); 1357 IOUtils.readFully(archive, wordBbuf); 1358 ((Buffer)wordBbuf).flip(); 1359 wordBbuf.get(shortBuf); 1360 final int fileNameLen = ZipShort.getValue(shortBuf); 1361 wordBbuf.get(shortBuf); 1362 final int extraFieldLen = ZipShort.getValue(shortBuf); 1363 ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH 1364 + SHORT + SHORT + fileNameLen + extraFieldLen); 1365 if (ze.getDataOffset() + ze.getCompressedSize() > centralDirectoryStartOffset) { 1366 throw new IOException("data for " + ze.getName() + " overlaps with central directory."); 1367 } 1368 return new int[] { fileNameLen, extraFieldLen }; 1369 } 1370 1371 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 1372 final long s = ze.getDataOffset(); 1373 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 1374 setDataOffset(ze); 1375 return ze.getDataOffset(); 1376 } 1377 return s; 1378 } 1379 1380 /** 1381 * Checks whether the archive starts with a LFH. If it doesn't, 1382 * it may be an empty archive. 1383 */ 1384 private boolean startsWithLocalFileHeader() throws IOException { 1385 archive.position(0); 1386 ((Buffer)wordBbuf).rewind(); 1387 IOUtils.readFully(archive, wordBbuf); 1388 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1389 } 1390 1391 /** 1392 * Creates new BoundedInputStream, according to implementation of 1393 * underlying archive channel. 1394 */ 1395 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 1396 if (start < 0 || remaining < 0 || start + remaining < start) { 1397 throw new IllegalArgumentException("Corrupted archive, stream boundaries" 1398 + " are out of range"); 1399 } 1400 return archive instanceof FileChannel ? 1401 new BoundedFileChannelInputStream(start, remaining) : 1402 new BoundedSeekableByteChannelInputStream(start, remaining, archive); 1403 } 1404 1405 /** 1406 * Lock-free implementation of BoundedInputStream. The 1407 * implementation uses positioned reads on the underlying archive 1408 * file channel and therefore performs significantly faster in 1409 * concurrent environment. 1410 */ 1411 private class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 1412 private final FileChannel archive; 1413 1414 BoundedFileChannelInputStream(final long start, final long remaining) { 1415 super(start, remaining); 1416 archive = (FileChannel) ZipFile.this.archive; 1417 } 1418 1419 @Override 1420 protected int read(final long pos, final ByteBuffer buf) throws IOException { 1421 final int read = archive.read(buf, pos); 1422 ((Buffer)buf).flip(); 1423 return read; 1424 } 1425 } 1426 1427 private static final class NameAndComment { 1428 private final byte[] name; 1429 private final byte[] comment; 1430 private NameAndComment(final byte[] name, final byte[] comment) { 1431 this.name = name; 1432 this.comment = comment; 1433 } 1434 } 1435 1436 /** 1437 * Compares two ZipArchiveEntries based on their offset within the archive. 1438 * 1439 * <p>Won't return any meaningful results if one of the entries 1440 * isn't part of the archive at all.</p> 1441 * 1442 * @since 1.1 1443 */ 1444 private final Comparator<ZipArchiveEntry> offsetComparator = 1445 Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 1446 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 1447 1448 /** 1449 * Extends ZipArchiveEntry to store the offset within the archive. 1450 */ 1451 private static class Entry extends ZipArchiveEntry { 1452 1453 Entry() { 1454 } 1455 1456 @Override 1457 public int hashCode() { 1458 return 3 * super.hashCode() 1459 + (int) getLocalHeaderOffset()+(int)(getLocalHeaderOffset()>>32); 1460 } 1461 1462 @Override 1463 public boolean equals(final Object other) { 1464 if (super.equals(other)) { 1465 // super.equals would return false if other were not an Entry 1466 final Entry otherEntry = (Entry) other; 1467 return getLocalHeaderOffset() 1468 == otherEntry.getLocalHeaderOffset() 1469 && super.getDataOffset() 1470 == otherEntry.getDataOffset() 1471 && super.getDiskNumberStart() 1472 == otherEntry.getDiskNumberStart(); 1473 } 1474 return false; 1475 } 1476 } 1477 1478 private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 1479 StoredStatisticsStream(final InputStream in) { 1480 super(in); 1481 } 1482 1483 @Override 1484 public long getCompressedCount() { 1485 return super.getBytesRead(); 1486 } 1487 1488 @Override 1489 public long getUncompressedCount() { 1490 return getCompressedCount(); 1491 } 1492 } 1493}