BinaryDeltaInputStream.java
- /*
- * Copyright (C) 2021 Thomas Wolf <thomas.wolf@paranor.ch> and others
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Distribution License v. 1.0 which is available at
- * https://www.eclipse.org/org/documents/edl-v10.php.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
- package org.eclipse.jgit.util.io;
- import java.io.EOFException;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.StreamCorruptedException;
- import java.text.MessageFormat;
- import org.eclipse.jgit.internal.JGitText;
- /**
- * An {@link InputStream} that applies a binary delta to a base on the fly.
- * <p>
- * Delta application to a base needs random access to the base data. The delta
- * is expressed as a sequence of copy and insert instructions. A copy
- * instruction has the form "COPY fromOffset length" and says "copy length bytes
- * from the base, starting at offset fromOffset, to the result". An insert
- * instruction has the form "INSERT length" followed by length bytes and says
- * "copy the next length bytes from the delta to the result".
- * </p>
- * <p>
- * These instructions are generated using a content-defined chunking algorithm
- * (currently C git uses the standard Rabin variant; but there are others that
- * could be used) that identifies equal chunks. It is entirely possible that a
- * later copy instruction has a fromOffset that is before the fromOffset of an
- * earlier copy instruction.
- * </p>
- * <p>
- * This makes it impossible to stream the base.
- * </p>
- * <p>
- * JGit is limited to 2GB maximum size for the base since array indices are
- * signed 32bit values.
- *
- * @since 5.12
- */
- public class BinaryDeltaInputStream extends InputStream {
- private final byte[] base;
- private final InputStream delta;
- private long resultLength;
- private long toDeliver = -1;
- private int fromBase;
- private int fromDelta;
- private int baseOffset = -1;
- /**
- * Creates a new {@link BinaryDeltaInputStream} that applies {@code delta}
- * to {@code base}.
- *
- * @param base
- * data to apply the delta to
- * @param delta
- * {@link InputStream} delivering the delta to apply
- */
- public BinaryDeltaInputStream(byte[] base, InputStream delta) {
- this.base = base;
- this.delta = delta;
- }
- @Override
- public int read() throws IOException {
- int b = readNext();
- if (b >= 0) {
- toDeliver--;
- }
- return b;
- }
- @Override
- public int read(byte[] b, int off, int len) throws IOException {
- return super.read(b, off, len);
- }
- private void initialize() throws IOException {
- long baseSize = readVarInt(delta);
- if (baseSize > Integer.MAX_VALUE || baseSize < 0
- || (int) baseSize != base.length) {
- throw new IOException(MessageFormat.format(
- JGitText.get().binaryDeltaBaseLengthMismatch,
- Integer.valueOf(base.length), Long.valueOf(baseSize)));
- }
- resultLength = readVarInt(delta);
- if (resultLength < 0) {
- throw new StreamCorruptedException(
- JGitText.get().binaryDeltaInvalidResultLength);
- }
- toDeliver = resultLength;
- baseOffset = 0;
- }
- private int readNext() throws IOException {
- if (baseOffset < 0) {
- initialize();
- }
- if (fromBase > 0) {
- fromBase--;
- return base[baseOffset++] & 0xFF;
- } else if (fromDelta > 0) {
- fromDelta--;
- return delta.read();
- }
- int command = delta.read();
- if (command < 0) {
- return -1;
- }
- if ((command & 0x80) != 0) {
- // Decode offset and length to read from base
- long copyOffset = 0;
- for (int i = 1, shift = 0; i < 0x10; i *= 2, shift += 8) {
- if ((command & i) != 0) {
- copyOffset |= ((long) next(delta)) << shift;
- }
- }
- int copySize = 0;
- for (int i = 0x10, shift = 0; i < 0x80; i *= 2, shift += 8) {
- if ((command & i) != 0) {
- copySize |= next(delta) << shift;
- }
- }
- if (copySize == 0) {
- copySize = 0x10000;
- }
- if (copyOffset > base.length - copySize) {
- throw new StreamCorruptedException(MessageFormat.format(
- JGitText.get().binaryDeltaInvalidOffset,
- Long.valueOf(copyOffset), Integer.valueOf(copySize)));
- }
- baseOffset = (int) copyOffset;
- fromBase = copySize;
- return readNext();
- } else if (command != 0) {
- // The next 'command' bytes come from the delta
- fromDelta = command - 1;
- return delta.read();
- } else {
- // Zero is reserved
- throw new StreamCorruptedException(
- JGitText.get().unsupportedCommand0);
- }
- }
- private int next(InputStream in) throws IOException {
- int b = in.read();
- if (b < 0) {
- throw new EOFException();
- }
- return b;
- }
- private long readVarInt(InputStream in) throws IOException {
- long val = 0;
- int shift = 0;
- int b;
- do {
- b = next(in);
- val |= ((long) (b & 0x7f)) << shift;
- shift += 7;
- } while ((b & 0x80) != 0);
- return val;
- }
- /**
- * Tells the expected size of the final result.
- *
- * @return the size
- * @throws IOException
- * if the size cannot be determined from {@code delta}
- */
- public long getExpectedResultSize() throws IOException {
- if (baseOffset < 0) {
- initialize();
- }
- return resultLength;
- }
- /**
- * Tells whether the delta has been fully consumed, and the expected number
- * of bytes for the combined result have been read from this
- * {@link BinaryDeltaInputStream}.
- *
- * @return whether delta application was successful
- */
- public boolean isFullyConsumed() {
- try {
- return toDeliver == 0 && delta.read() < 0;
- } catch (IOException e) {
- return toDeliver == 0;
- }
- }
- @Override
- public void close() throws IOException {
- delta.close();
- }
- }