| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.commons.compress.archivers.zip; |
| |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| import java.nio.charset.CodingErrorAction; |
| |
| /** |
| * A ZipEncoding, which uses a java.nio {@link |
| * java.nio.charset.Charset Charset} to encode names. |
| * <p>The methods of this class are reentrant.</p> |
| * @Immutable |
| */ |
| class NioZipEncoding implements ZipEncoding, CharsetAccessor { |
| |
| private final Charset charset; |
| private final boolean useReplacement; |
| private static final char REPLACEMENT = '?'; |
| private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT }; |
| private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT); |
| private static final char[] HEX_CHARS = new char[] { |
| '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' |
| }; |
| |
| |
| /** |
| * Construct an NioZipEncoding using the given charset. |
| * @param charset The character set to use. |
| * @param useReplacement should invalid characters be replaced, or reported. |
| */ |
| NioZipEncoding(final Charset charset, boolean useReplacement) { |
| this.charset = charset; |
| this.useReplacement = useReplacement; |
| } |
| |
| @Override |
| public Charset getCharset() { |
| return charset; |
| } |
| |
| /** |
| * @see ZipEncoding#canEncode(java.lang.String) |
| */ |
| @Override |
| public boolean canEncode(final String name) { |
| final CharsetEncoder enc = newEncoder(); |
| |
| return enc.canEncode(name); |
| } |
| |
| /** |
| * @see ZipEncoding#encode(java.lang.String) |
| */ |
| @Override |
| public ByteBuffer encode(final String name) { |
| final CharsetEncoder enc = newEncoder(); |
| |
| final CharBuffer cb = CharBuffer.wrap(name); |
| CharBuffer tmp = null; |
| ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining())); |
| |
| while (cb.remaining() > 0) { |
| final CoderResult res = enc.encode(cb, out, false); |
| |
| if (res.isUnmappable() || res.isMalformed()) { |
| |
| // write the unmappable characters in utf-16 |
| // pseudo-URL encoding style to ByteBuffer. |
| |
| int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length()); |
| if (spaceForSurrogate > out.remaining()) { |
| // if the destination buffer isn't over sized, assume that the presence of one |
| // unmappable character makes it likely that there will be more. Find all the |
| // un-encoded characters and allocate space based on those estimates. |
| int charCount = 0; |
| for (int i = cb.position() ; i < cb.limit(); i++) { |
| charCount += !enc.canEncode(cb.get(i)) ? 6 : 1; |
| } |
| int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount); |
| out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining()); |
| } |
| if (tmp == null) { |
| tmp = CharBuffer.allocate(6); |
| } |
| for (int i = 0; i < res.length(); ++i) { |
| out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out); |
| } |
| |
| } else if (res.isOverflow()) { |
| int increment = estimateIncrementalEncodingSize(enc, cb.remaining()); |
| out = ZipEncodingHelper.growBufferBy(out, increment); |
| } |
| } |
| // tell the encoder we are done |
| enc.encode(cb, out, true); |
| // may have caused underflow, but that's been ignored traditionally |
| |
| out.limit(out.position()); |
| out.rewind(); |
| return out; |
| } |
| |
| /** |
| * @see |
| * ZipEncoding#decode(byte[]) |
| */ |
| @Override |
| public String decode(final byte[] data) throws IOException { |
| return newDecoder() |
| .decode(ByteBuffer.wrap(data)).toString(); |
| } |
| |
| private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) { |
| ByteBuffer o = out; |
| while (cb.hasRemaining()) { |
| CoderResult result = enc.encode(cb, o, false); |
| if (result.isOverflow()) { |
| int increment = estimateIncrementalEncodingSize(enc, cb.remaining()); |
| o = ZipEncodingHelper.growBufferBy(o, increment); |
| } |
| } |
| return o; |
| } |
| |
| private static CharBuffer encodeSurrogate(CharBuffer cb, char c) { |
| cb.position(0).limit(6); |
| cb.put('%'); |
| cb.put('U'); |
| |
| cb.put(HEX_CHARS[(c >> 12) & 0x0f]); |
| cb.put(HEX_CHARS[(c >> 8) & 0x0f]); |
| cb.put(HEX_CHARS[(c >> 4) & 0x0f]); |
| cb.put(HEX_CHARS[c & 0x0f]); |
| cb.flip(); |
| return cb; |
| } |
| |
| private CharsetEncoder newEncoder() { |
| if (useReplacement) { |
| return charset.newEncoder() |
| .onMalformedInput(CodingErrorAction.REPLACE) |
| .onUnmappableCharacter(CodingErrorAction.REPLACE) |
| .replaceWith(REPLACEMENT_BYTES); |
| } else { |
| return charset.newEncoder() |
| .onMalformedInput(CodingErrorAction.REPORT) |
| .onUnmappableCharacter(CodingErrorAction.REPORT); |
| } |
| } |
| |
| private CharsetDecoder newDecoder() { |
| if (!useReplacement) { |
| return this.charset.newDecoder() |
| .onMalformedInput(CodingErrorAction.REPORT) |
| .onUnmappableCharacter(CodingErrorAction.REPORT); |
| } else { |
| return charset.newDecoder() |
| .onMalformedInput(CodingErrorAction.REPLACE) |
| .onUnmappableCharacter(CodingErrorAction.REPLACE) |
| .replaceWith(REPLACEMENT_STRING); |
| } |
| } |
| |
| /** |
| * Estimate the initial encoded size (in bytes) for a character buffer. |
| * <p> |
| * The estimate assumes that one character consumes uses the maximum length encoding, |
| * whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at |
| * the expense of a couple of extra bytes for UTF-8 encoded ASCII. |
| * </p> |
| * |
| * @param enc encoder to use for estimates |
| * @param charChount number of characters in string |
| * @return estimated size in bytes. |
| */ |
| private static int estimateInitialBufferSize(CharsetEncoder enc, int charChount) { |
| float first = enc.maxBytesPerChar(); |
| float rest = (charChount - 1) * enc.averageBytesPerChar(); |
| return (int) Math.ceil(first + rest); |
| } |
| |
| /** |
| * Estimate the size needed for remaining characters |
| * |
| * @param enc encoder to use for estimates |
| * @param charCount number of characters remaining |
| * @return estimated size in bytes. |
| */ |
| private static int estimateIncrementalEncodingSize(CharsetEncoder enc, int charCount) { |
| return (int) Math.ceil(charCount * enc.averageBytesPerChar()); |
| } |
| |
| } |