blob: d20f5e40806c88c7d2a3cafb44251af9481f2c34 [file] [log] [blame]
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.java.util.jar.pack;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import static com.sun.java.util.jar.pack.Constants.*;
/**
* Population-based coding.
* See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
* @author John Rose
*/
// This tactic alone reduces the final zipped rt.jar by about a percent.
class PopulationCoding implements CodingMethod {
Histogram vHist; // histogram of all values
int[] fValues; // list of favored values
int fVlen; // inclusive max index
long[] symtab; // int map of favored value -> token [1..#fValues]
CodingMethod favoredCoding;
CodingMethod tokenCoding;
CodingMethod unfavoredCoding;
int L = -1; //preferred L value for tokenCoding
public void setFavoredValues(int[] fValues, int fVlen) {
// Note: {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
// This is because zero is an exceptional favored value index.
assert(fValues[0] == 0); // must be empty
assert(this.fValues == null); // do not do this twice
this.fValues = fValues;
this.fVlen = fVlen;
if (L >= 0) {
setL(L); // reassert
}
}
public void setFavoredValues(int[] fValues) {
int lfVlen = fValues.length-1;
setFavoredValues(fValues, lfVlen);
}
public void setHistogram(Histogram vHist) {
this.vHist = vHist;
}
public void setL(int L) {
this.L = L;
if (L >= 0 && fValues != null && tokenCoding == null) {
tokenCoding = fitTokenCoding(fVlen, L);
assert(tokenCoding != null);
}
}
public static Coding fitTokenCoding(int fVlen, int L) {
// Find the smallest B s.t. (B,H,0) covers fVlen.
if (fVlen < 256)
// H/L do not matter when B==1
return BandStructure.BYTE1;
Coding longest = BandStructure.UNSIGNED5.setL(L);
if (!longest.canRepresentUnsigned(fVlen))
return null; // failure; L is too sharp and fVlen too large
Coding tc = longest;
for (Coding shorter = longest; ; ) {
shorter = shorter.setB(shorter.B()-1);
if (shorter.umax() < fVlen)
break;
tc = shorter; // shorten it by reducing B
}
return tc;
}
public void setFavoredCoding(CodingMethod favoredCoding) {
this.favoredCoding = favoredCoding;
}
public void setTokenCoding(CodingMethod tokenCoding) {
this.tokenCoding = tokenCoding;
this.L = -1;
if (tokenCoding instanceof Coding && fValues != null) {
Coding tc = (Coding) tokenCoding;
if (tc == fitTokenCoding(fVlen, tc.L()))
this.L = tc.L();
// Otherwise, it's a non-default coding.
}
}
public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
this.unfavoredCoding = unfavoredCoding;
}
public int favoredValueMaxLength() {
if (L == 0)
return Integer.MAX_VALUE;
else
return BandStructure.UNSIGNED5.setL(L).umax();
}
public void resortFavoredValues() {
Coding tc = (Coding) tokenCoding;
// Make a local copy before reordering.
fValues = BandStructure.realloc(fValues, 1+fVlen);
// Resort favoredValues within each byte-size cadre.
int fillp = 1; // skip initial zero
for (int n = 1; n <= tc.B(); n++) {
int nmax = tc.byteMax(n);
if (nmax > fVlen)
nmax = fVlen;
if (nmax < tc.byteMin(n))
break;
int low = fillp;
int high = nmax+1;
if (high == low) continue;
assert(high > low)
: high+"!>"+low;
assert(tc.getLength(low) == n)
: n+" != len("+(low)+") == "+
tc.getLength(low);
assert(tc.getLength(high-1) == n)
: n+" != len("+(high-1)+") == "+
tc.getLength(high-1);
int midTarget = low + (high-low)/2;
int mid = low;
// Divide the values into cadres, and sort within each.
int prevCount = -1;
int prevLimit = low;
for (int i = low; i < high; i++) {
int val = fValues[i];
int count = vHist.getFrequency(val);
if (prevCount != count) {
if (n == 1) {
// For the single-byte encoding, keep strict order
// among frequency groups.
Arrays.sort(fValues, prevLimit, i);
} else if (Math.abs(mid - midTarget) >
Math.abs(i - midTarget)) {
// Find a single inflection point
// close to the middle of the byte-size cadre.
mid = i;
}
prevCount = count;
prevLimit = i;
}
}
if (n == 1) {
Arrays.sort(fValues, prevLimit, high);
} else {
// Sort up to the midpoint, if any.
Arrays.sort(fValues, low, mid);
Arrays.sort(fValues, mid, high);
}
assert(tc.getLength(low) == tc.getLength(mid));
assert(tc.getLength(low) == tc.getLength(high-1));
fillp = nmax+1;
}
assert(fillp == fValues.length);
// Reset symtab.
symtab = null;
}
public int getToken(int value) {
if (symtab == null)
symtab = makeSymtab();
int pos = Arrays.binarySearch(symtab, (long)value << 32);
if (pos < 0) pos = -pos-1;
if (pos < symtab.length && value == (int)(symtab[pos] >>> 32))
return (int)symtab[pos];
else
return 0;
}
public int[][] encodeValues(int[] values, int start, int end) {
// Compute token sequence.
int[] tokens = new int[end-start];
int nuv = 0;
for (int i = 0; i < tokens.length; i++) {
int val = values[start+i];
int tok = getToken(val);
if (tok != 0)
tokens[i] = tok;
else
nuv += 1;
}
// Compute unfavored value sequence.
int[] unfavoredValues = new int[nuv];
nuv = 0; // reset
for (int i = 0; i < tokens.length; i++) {
if (tokens[i] != 0) continue; // already covered
int val = values[start+i];
unfavoredValues[nuv++] = val;
}
assert(nuv == unfavoredValues.length);
return new int[][]{ tokens, unfavoredValues };
}
private long[] makeSymtab() {
long[] lsymtab = new long[fVlen];
for (int token = 1; token <= fVlen; token++) {
lsymtab[token-1] = ((long)fValues[token] << 32) | token;
}
// Index by value:
Arrays.sort(lsymtab);
return lsymtab;
}
private Coding getTailCoding(CodingMethod c) {
while (c instanceof AdaptiveCoding)
c = ((AdaptiveCoding)c).tailCoding;
return (Coding) c;
}
// CodingMethod methods.
public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
int[][] vals = encodeValues(a, start, end);
writeSequencesTo(out, vals[0], vals[1]);
}
void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException {
favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen);
getTailCoding(favoredCoding).writeTo(out, computeSentinelValue());
tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
if (uValues.length > 0)
unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length);
}
int computeSentinelValue() {
Coding fc = getTailCoding(favoredCoding);
if (fc.isDelta()) {
// repeat the last favored value, using delta=0
return 0;
} else {
// else repeat the shorter of the min or last value
int min = fValues[1];
int last = min;
// (remember that fVlen is an inclusive limit in fValues)
for (int i = 2; i <= fVlen; i++) {
last = fValues[i];
min = moreCentral(min, last);
}
int endVal;
if (fc.getLength(min) <= fc.getLength(last))
return min;
else
return last;
}
}
public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
// Parameters are fCode, L, uCode.
setFavoredValues(readFavoredValuesFrom(in, end-start));
// Read the tokens. Read them into the final array, for the moment.
tokenCoding.readArrayFrom(in, a, start, end);
// Decode the favored tokens.
int headp = 0, tailp = -1;
int uVlen = 0;
for (int i = start; i < end; i++) {
int tok = a[i];
if (tok == 0) {
// Make a linked list, and decode in a second pass.
if (tailp < 0) {
headp = i;
} else {
a[tailp] = i;
}
tailp = i;
uVlen += 1;
} else {
a[i] = fValues[tok];
}
}
// Walk the linked list of "zero" locations, decoding unfavored vals.
int[] uValues = new int[uVlen];
if (uVlen > 0)
unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
for (int i = 0; i < uVlen; i++) {
int nextp = a[headp];
a[headp] = uValues[i];
headp = nextp;
}
}
int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException {
int[] lfValues = new int[1000]; // realloc as needed
// The set uniqueValuesForDebug records all favored values.
// As each new value is added, we assert that the value
// was not already in the set.
Set<Integer> uniqueValuesForDebug = null;
assert((uniqueValuesForDebug = new HashSet<>()) != null);
int fillp = 1;
maxForDebug += fillp;
int min = Integer.MIN_VALUE; // farthest from the center
//int min2 = Integer.MIN_VALUE; // emulate buggy 150.7 spec.
int last = 0;
CodingMethod fcm = favoredCoding;
while (fcm instanceof AdaptiveCoding) {
AdaptiveCoding ac = (AdaptiveCoding) fcm;
int len = ac.headLength;
while (fillp + len > lfValues.length) {
lfValues = BandStructure.realloc(lfValues);
}
int newFillp = fillp + len;
ac.headCoding.readArrayFrom(in, lfValues, fillp, newFillp);
while (fillp < newFillp) {
int val = lfValues[fillp++];
assert(uniqueValuesForDebug.add(val));
assert(fillp <= maxForDebug);
last = val;
min = moreCentral(min, val);
//min2 = moreCentral2(min2, val, min);
}
fcm = ac.tailCoding;
}
Coding fc = (Coding) fcm;
if (fc.isDelta()) {
for (long state = 0;;) {
// Read a new value:
state += fc.readFrom(in);
int val;
if (fc.isSubrange())
val = fc.reduceToUnsignedRange(state);
else
val = (int)state;
state = val;
if (fillp > 1 && (val == last || val == min)) //|| val == min2
break;
if (fillp == lfValues.length)
lfValues = BandStructure.realloc(lfValues);
lfValues[fillp++] = val;
assert(uniqueValuesForDebug.add(val));
assert(fillp <= maxForDebug);
last = val;
min = moreCentral(min, val);
//min2 = moreCentral(min2, val);
}
} else {
for (;;) {
int val = fc.readFrom(in);
if (fillp > 1 && (val == last || val == min)) //|| val == min2
break;
if (fillp == lfValues.length)
lfValues = BandStructure.realloc(lfValues);
lfValues[fillp++] = val;
assert(uniqueValuesForDebug.add(val));
assert(fillp <= maxForDebug);
last = val;
min = moreCentral(min, val);
//min2 = moreCentral2(min2, val, min);
}
}
return BandStructure.realloc(lfValues, fillp);
}
private static int moreCentral(int x, int y) {
int kx = (x >> 31) ^ (x << 1);
int ky = (y >> 31) ^ (y << 1);
// bias kx/ky to get an unsigned comparison:
kx -= Integer.MIN_VALUE;
ky -= Integer.MIN_VALUE;
int xy = (kx < ky? x: y);
// assert that this ALU-ish version is the same:
assert(xy == moreCentralSlow(x, y));
return xy;
}
// private static int moreCentral2(int x, int y, int min) {
// // Strict implementation of buggy 150.7 specification.
// // The bug is that the spec. says absolute-value ties are broken
// // in favor of positive numbers, but the suggested implementation
// // (also mentioned in the spec.) breaks ties in favor of negatives.
// if (x + y == 0) return (x > y? x : y);
// return min;
// }
private static int moreCentralSlow(int x, int y) {
int ax = x;
if (ax < 0) ax = -ax;
if (ax < 0) return y; //x is MIN_VALUE
int ay = y;
if (ay < 0) ay = -ay;
if (ay < 0) return x; //y is MIN_VALUE
if (ax < ay) return x;
if (ax > ay) return y;
// At this point the absolute values agree, and the negative wins.
return x < y ? x : y;
}
static final int[] LValuesCoded
= { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
public byte[] getMetaCoding(Coding dflt) {
int K = fVlen;
int LCoded = 0;
if (tokenCoding instanceof Coding) {
Coding tc = (Coding) tokenCoding;
if (tc.B() == 1) {
LCoded = 1;
} else if (L >= 0) {
assert(L == tc.L());
for (int i = 1; i < LValuesCoded.length; i++) {
if (LValuesCoded[i] == L) { LCoded = i; break; }
}
}
}
CodingMethod tokenDflt = null;
if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
// A simple L value is enough to recover the tokenCoding.
tokenDflt = tokenCoding;
}
int FDef = (favoredCoding == dflt)?1:0;
int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0;
int TDef = (tokenCoding == tokenDflt)?1:0;
int TDefL = (TDef == 1) ? LCoded : 0;
assert(TDef == ((TDefL>0)?1:0));
ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL);
try {
if (FDef == 0) bytes.write(favoredCoding.getMetaCoding(dflt));
if (TDef == 0) bytes.write(tokenCoding.getMetaCoding(dflt));
if (UDef == 0) bytes.write(unfavoredCoding.getMetaCoding(dflt));
} catch (IOException ee) {
throw new RuntimeException(ee);
}
return bytes.toByteArray();
}
public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
int op = bytes[pos++] & 0xFF;
if (op < _meta_pop || op >= _meta_limit) return pos-1; // backup
op -= _meta_pop;
int FDef = op % 2;
int UDef = (op / 2) % 2;
int TDefL = (op / 4);
int TDef = (TDefL > 0)?1:0;
int L = LValuesCoded[TDefL];
CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt};
if (FDef == 0)
pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode);
if (TDef == 0)
pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode);
if (UDef == 0)
pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode);
PopulationCoding pop = new PopulationCoding();
pop.L = L; // might be -1
pop.favoredCoding = FCode[0];
pop.tokenCoding = TCode[0]; // might be null!
pop.unfavoredCoding = UCode[0];
res[0] = pop;
return pos;
}
private String keyString(CodingMethod m) {
if (m instanceof Coding)
return ((Coding)m).keyString();
if (m == null)
return "none";
return m.toString();
}
public String toString() {
PropMap p200 = Utils.currentPropMap();
boolean verbose
= (p200 != null &&
p200.getBoolean(Utils.COM_PREFIX+"verbose.pop"));
StringBuilder res = new StringBuilder(100);
res.append("pop(").append("fVlen=").append(fVlen);
if (verbose && fValues != null) {
res.append(" fV=[");
for (int i = 1; i <= fVlen; i++) {
res.append(i==1?"":",").append(fValues[i]);
}
res.append(";").append(computeSentinelValue());
res.append("]");
}
res.append(" fc=").append(keyString(favoredCoding));
res.append(" tc=").append(keyString(tokenCoding));
res.append(" uc=").append(keyString(unfavoredCoding));
res.append(")");
return res.toString();
}
}