BinaryDeltaInputStream.java

  1. /*
  2.  * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */
  10. package org.eclipse.jgit.util.io;

  11. import java.io.EOFException;
  12. import java.io.IOException;
  13. import java.io.InputStream;
  14. import java.io.StreamCorruptedException;
  15. import java.text.MessageFormat;

  16. import org.eclipse.jgit.internal.JGitText;

  17. /**
  18.  * An {@link InputStream} that applies a binary delta to a base on the fly.
  19.  * <p>
  20.  * Delta application to a base needs random access to the base data. The delta
  21.  * is expressed as a sequence of copy and insert instructions. A copy
  22.  * instruction has the form "COPY fromOffset length" and says "copy length bytes
  23.  * from the base, starting at offset fromOffset, to the result". An insert
  24.  * instruction has the form "INSERT length" followed by length bytes and says
  25.  * "copy the next length bytes from the delta to the result".
  26.  * </p>
  27.  * <p>
  28.  * These instructions are generated using a content-defined chunking algorithm
  29.  * (currently C git uses the standard Rabin variant; but there are others that
  30.  * could be used) that identifies equal chunks. It is entirely possible that a
  31.  * later copy instruction has a fromOffset that is before the fromOffset of an
  32.  * earlier copy instruction.
  33.  * </p>
  34.  * <p>
  35.  * This makes it impossible to stream the base.
  36.  * </p>
  37.  * <p>
  38.  * JGit is limited to 2GB maximum size for the base since array indices are
  39.  * signed 32bit values.
  40.  *
  41.  * @since 5.12
  42.  */
  43. public class BinaryDeltaInputStream extends InputStream {

  44.     private final byte[] base;

  45.     private final InputStream delta;

  46.     private long resultLength;

  47.     private long toDeliver = -1;

  48.     private int fromBase;

  49.     private int fromDelta;

  50.     private int baseOffset = -1;

  51.     /**
  52.      * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
  53.      * to {@code base}.
  54.      *
  55.      * @param base
  56.      *            data to apply the delta to
  57.      * @param delta
  58.      *            {@link InputStream} delivering the delta to apply
  59.      */
  60.     public BinaryDeltaInputStream(byte[] base, InputStream delta) {
  61.         this.base = base;
  62.         this.delta = delta;
  63.     }

  64.     @Override
  65.     public int read() throws IOException {
  66.         int b = readNext();
  67.         if (b >= 0) {
  68.             toDeliver--;
  69.         }
  70.         return b;
  71.     }

  72.     @Override
  73.     public int read(byte[] b, int off, int len) throws IOException {
  74.         return super.read(b, off, len);
  75.     }

  76.     private void initialize() throws IOException {
  77.         long baseSize = readVarInt(delta);
  78.         if (baseSize > Integer.MAX_VALUE || baseSize < 0
  79.                 || (int) baseSize != base.length) {
  80.             throw new IOException(MessageFormat.format(
  81.                     JGitText.get().binaryDeltaBaseLengthMismatch,
  82.                     Integer.valueOf(base.length), Long.valueOf(baseSize)));
  83.         }
  84.         resultLength = readVarInt(delta);
  85.         if (resultLength < 0) {
  86.             throw new StreamCorruptedException(
  87.                     JGitText.get().binaryDeltaInvalidResultLength);
  88.         }
  89.         toDeliver = resultLength;
  90.         baseOffset = 0;
  91.     }

  92.     private int readNext() throws IOException {
  93.         if (baseOffset < 0) {
  94.             initialize();
  95.         }
  96.         if (fromBase > 0) {
  97.             fromBase--;
  98.             return base[baseOffset++] & 0xFF;
  99.         } else if (fromDelta > 0) {
  100.             fromDelta--;
  101.             return delta.read();
  102.         }
  103.         int command = delta.read();
  104.         if (command < 0) {
  105.             return -1;
  106.         }
  107.         if ((command & 0x80) != 0) {
  108.             // Decode offset and length to read from base
  109.             long copyOffset = 0;
  110.             for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
  111.                 if ((command & i) != 0) {
  112.                     copyOffset |= ((long) next(delta)) << shift;
  113.                 }
  114.             }
  115.             int copySize = 0;
  116.             for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
  117.                 if ((command & i) != 0) {
  118.                     copySize |= next(delta) << shift;
  119.                 }
  120.             }
  121.             if (copySize == 0) {
  122.                 copySize = 0x10000;
  123.             }
  124.             if (copyOffset > base.length - copySize) {
  125.                 throw new StreamCorruptedException(MessageFormat.format(
  126.                         JGitText.get().binaryDeltaInvalidOffset,
  127.                         Long.valueOf(copyOffset), Integer.valueOf(copySize)));
  128.             }
  129.             baseOffset = (int) copyOffset;
  130.             fromBase = copySize;
  131.             return readNext();
  132.         } else if (command != 0) {
  133.             // The next 'command' bytes come from the delta
  134.             fromDelta = command - 1;
  135.             return delta.read();
  136.         } else {
  137.             // Zero is reserved
  138.             throw new StreamCorruptedException(
  139.                     JGitText.get().unsupportedCommand0);
  140.         }
  141.     }

  142.     private int next(InputStream in) throws IOException {
  143.         int b = in.read();
  144.         if (b < 0) {
  145.             throw new EOFException();
  146.         }
  147.         return b;
  148.     }

  149.     private long readVarInt(InputStream in) throws IOException {
  150.         long val = 0;
  151.         int shift = 0;
  152.         int b;
  153.         do {
  154.             b = next(in);
  155.             val |= ((long) (b & 0x7f)) << shift;
  156.             shift += 7;
  157.         } while ((b & 0x80) != 0);
  158.         return val;
  159.     }

  160.     /**
  161.      * Tells the expected size of the final result.
  162.      *
  163.      * @return the size
  164.      * @throws IOException
  165.      *             if the size cannot be determined from {@code delta}
  166.      */
  167.     public long getExpectedResultSize() throws IOException {
  168.         if (baseOffset < 0) {
  169.             initialize();
  170.         }
  171.         return resultLength;
  172.     }

  173.     /**
  174.      * Tells whether the delta has been fully consumed, and the expected number
  175.      * of bytes for the combined result have been read from this
  176.      * {@link BinaryDeltaInputStream}.
  177.      *
  178.      * @return whether delta application was successful
  179.      */
  180.     public boolean isFullyConsumed() {
  181.         try {
  182.             return toDeliver == 0 && delta.read() < 0;
  183.         } catch (IOException e) {
  184.             return toDeliver == 0;
  185.         }
  186.     }

  187.     @Override
  188.     public void close() throws IOException {
  189.         delta.close();
  190.     }
  191. }