| // Copyright 2016 Google Inc. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package com.google.archivepatcher.generator.bsdiff; |
| |
| import java.io.IOException; |
| |
| /** |
| * Implementation of matcher used by BsDiff. Exact matches between newData[a ... a + len - 1] |
| * and oldData[b ... b + len - 1] are valid if: |
| * <ul> |
| * <li>|len| > mMinimumMatchLength</li> |
| * <li>The number of matches between newData[a ... a + len - 1] and |
| * oldData[previous_b ... previous_b + len - 1] < |len| - mMinimumMatchLength where |
| * |previous_b| is the |b| value of the previous match if there was one and zero otherwise.</li> |
| * </ul> |
| */ |
| class BsDiffMatcher implements Matcher { |
| private final RandomAccessObject mOldData; |
| private final RandomAccessObject mNewData; |
| |
| /** |
| * Contains order of the sorted suffixes of |oldData|. The element at mGroupArray[i] contains the |
| * position of oldData[i ... oldData.length - 1] in the sorted list of suffixes of |oldData|. |
| */ |
| private final RandomAccessObject mGroupArray; |
| |
| /** |
| * The index in |oldData| of the first byte of the match. Zero if no matches have been found yet. |
| */ |
| private int mOldPos; |
| |
| /** |
| * The index in |newData| of the first byte of the match. Zero if no matches have been found yet. |
| * The next match will be searched starting at |mNewPos| + |mMatchLen|. |
| */ |
| private int mNewPos; |
| |
| /** |
| * Minimum match length in bytes. |
| */ |
| private final int mMinimumMatchLength; |
| |
| /** |
| * A limit on how many total match lengths encountered, to exit the match extension loop in next() |
| * and prevent O(n^2) behavior. |
| */ |
| private final long mTotalMatchLenBudget = 1L << 26; // ~64 million. |
| |
| /** |
| * The number of bytes, |n|, which match between newData[mNewPos ... mNewPos + n] and |
| * oldData[mOldPos ... mOldPos + n]. |
| */ |
| private int mMatchLen; |
| |
| /** |
| * Create a standard BsDiffMatcher. |
| * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the |
| * oldData and newData. This can have a significant effect on both the generated patch size and |
| * the amount of time and memory required to apply the patch. |
| */ |
| BsDiffMatcher( |
| RandomAccessObject oldData, |
| RandomAccessObject newData, |
| RandomAccessObject groupArray, |
| int minimumMatchLength) { |
| mOldData = oldData; |
| mNewData = newData; |
| mGroupArray = groupArray; |
| mOldPos = 0; |
| mMinimumMatchLength = minimumMatchLength; |
| } |
| |
| @Override |
| public Matcher.NextMatch next() throws IOException, InterruptedException { |
| RandomAccessObject oldData = mOldData; |
| RandomAccessObject newData = mNewData; |
| |
| // The offset between between the indices in |oldData| and |newData| |
| // of the previous match. |
| int previousOldOffset = mOldPos - mNewPos; |
| |
| // Look for a new match starting from the end of the previous match. |
| mNewPos += mMatchLen; |
| |
| // The number of matching bytes in the forward extension of the previous match: |
| // oldData[mNewPos + previousOldOffset ... mNewPos + previousOldOffset + mMatchLen - 1] |
| // and newData[mNewPos ... mNewPos + mMatchLen - 1]. |
| int numMatches = 0; |
| |
| // The size of the range for which |numMatches| has been computed. |
| int matchesCacheSize = 0; |
| |
| // Sum over all match lengths encountered, to exit loop if we take too long to compute. |
| long totalMatchLen = 0; |
| |
| while (mNewPos < newData.length()) { |
| if (Thread.interrupted()) { |
| throw new InterruptedException(); |
| } |
| BsDiff.Match match = |
| BsDiff.searchForMatch(mGroupArray, oldData, newData, mNewPos, 0, (int) oldData.length()); |
| mOldPos = match.start; |
| mMatchLen = match.length; |
| totalMatchLen += mMatchLen; |
| |
| // Update |numMatches| for the new value of |matchLen|. |
| for (; matchesCacheSize < mMatchLen; ++matchesCacheSize) { |
| int oldIndex = mNewPos + previousOldOffset + matchesCacheSize; |
| int newIndex = mNewPos + matchesCacheSize; |
| if (oldIndex < oldData.length()) { |
| oldData.seek(oldIndex); |
| newData.seek(newIndex); |
| |
| if (oldData.readByte() == newData.readByte()) { |
| ++numMatches; |
| } |
| } |
| } |
| |
| // Also return if we've been trying to extend a large match for a long time. |
| if (mMatchLen > numMatches + mMinimumMatchLength || totalMatchLen >= mTotalMatchLenBudget) { |
| return Matcher.NextMatch.of(true, mOldPos, mNewPos); |
| } |
| |
| if (mMatchLen == 0) { |
| ++mNewPos; |
| } else if (mMatchLen == numMatches) { |
| // This seems to be an optimization because it is unlikely to find a valid match in the |
| // range mNewPos = [ mNewPos ... mNewPos + mMatchLen - 1 ] especially for large values of |
| // |mMatchLen|. |
| mNewPos += numMatches; |
| numMatches = 0; |
| matchesCacheSize = 0; |
| } else { |
| // Update |numMatches| for the value of |mNewPos| in the next iteration of the loop. In the |
| // next iteration of the loop, the new value of |numMatches| will be at least |
| // |numMatches - 1| because |
| // oldData[mNewPos + previousOldOffset + 1 ... mNewPos + previousOldOffset + mMatchLen - 1] |
| // matches newData[mNewPos + 1 ... mNewPos + mMatchLen - 1]. |
| if (mNewPos + previousOldOffset < oldData.length()) { |
| oldData.seek(mNewPos + previousOldOffset); |
| newData.seek(mNewPos); |
| |
| if (oldData.readByte() == newData.readByte()) { |
| --numMatches; |
| } |
| } |
| ++mNewPos; |
| --matchesCacheSize; |
| } |
| } |
| |
| return Matcher.NextMatch.of(false, 0, 0); |
| } |
| } |