blob: de8239713461233325e86feba95ca1866a289e0b [file] [log] [blame]
/* ====================================================================
* Copyright (c) 2006 J.T. Beetstra
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* ====================================================================
*/
package com.beetstra.jutf7;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
/**
* <p>
* The CharsetEncoder used to encode both variants of the UTF-7 charset and the
* modified-UTF-7 charset.
* </p>
* <p>
* <strong>Please note this class does not behave strictly according to the
* specification in Sun Java VMs before 1.6.</strong> This is done to get around
* a bug in the implementation of
* {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)}. Unfortunately,
* that method cannot be overridden.
* </p>
*
* @see <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">JDK
* bug 6221056< /a>
* @author Jaap Beetstra
*/
class UTF7StyleCharsetEncoder extends CharsetEncoder {
private static final float AVG_BYTES_PER_CHAR = 1.5f;
private static final float MAX_BYTES_PER_CHAR = 5.0f;
private final UTF7StyleCharset cs;
private final Base64Util base64;
private final byte shift;
private final byte unshift;
private final boolean strict;
private boolean base64mode;
private int bitsToOutput;
private int sextet;
static boolean useUglyHackToForceCallToFlushInJava5;
static {
String version = System.getProperty("java.specification.version");
String vendor = System.getProperty("java.vm.vendor");
useUglyHackToForceCallToFlushInJava5 = "1.4".equals(version) || "1.5".equals(version);
useUglyHackToForceCallToFlushInJava5 &= "Sun Microsystems Inc.".equals(vendor);
}
UTF7StyleCharsetEncoder(UTF7StyleCharset cs, Base64Util base64, boolean strict) {
super(cs, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR);
this.cs = cs;
this.base64 = base64;
this.strict = strict;
this.shift = cs.shift();
this.unshift = cs.unshift();
}
/*
* (non-Javadoc)
* @see java.nio.charset.CharsetEncoder#implReset()
*/
protected void implReset() {
base64mode = false;
sextet = 0;
bitsToOutput = 0;
}
/**
* {@inheritDoc}
* <p>
* Note that this method might return <code>CoderResult.OVERFLOW</code> (as
* is required by the specification) if insufficient space is available in
* the output buffer. However, calling it again on JDKs before Java 6
* triggers a bug in
* {@link java.nio.charset.CharsetEncoder#flush(ByteBuffer)} causing it to
* throw an IllegalStateException (the buggy method is <code>final</code>,
* thus cannot be overridden).
* </p>
*
* @see <a
* href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6227608">
* JDK bug 6227608< /a>
* @param out The output byte buffer
* @return A coder-result object describing the reason for termination
*/
protected CoderResult implFlush(ByteBuffer out) {
if (base64mode) {
if (out.remaining() < 2)
return CoderResult.OVERFLOW;
if (bitsToOutput != 0)
out.put(base64.getChar(sextet));
out.put(unshift);
}
return CoderResult.UNDERFLOW;
}
/**
* {@inheritDoc}
* <p>
* Note that this method might return <code>CoderResult.OVERFLOW</code>,
* even though there is sufficient space available in the output buffer.
* This is done to force the broken implementation of
* {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)} to call flush
* (the buggy method is <code>final</code>, thus cannot be overridden).
* </p>
* <p>
* However, String.getBytes() fails if CoderResult.OVERFLOW is returned,
* since this assumes it always allocates sufficient bytes (maxBytesPerChar
* * nr_of_chars). Thus, as an extra check, the size of the input buffer is
* compared against the size of the output buffer. A static variable is used
* to indicate if a broken java version is used.
* </p>
* <p>
* It is not possible to directly write the last few bytes, since more bytes
* might be waiting to be encoded then those available in the input buffer.
* </p>
*
* @see <a
* href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">
* JDK bug 6221056< /a>
* @param in The input character buffer
* @param out The output byte buffer
* @return A coder-result object describing the reason for termination
*/
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
while (in.hasRemaining()) {
if (out.remaining() < 4)
return CoderResult.OVERFLOW;
char ch = in.get();
if (cs.canEncodeDirectly(ch)) {
unshift(out, ch);
out.put((byte)ch);
} else if (!base64mode && ch == shift) {
out.put(shift);
out.put(unshift);
} else
encodeBase64(ch, out);
}
/*
* <HACK type="ugly"> These lines are required to trick JDK 1.5 and
* earlier into flushing when using Charset.encode(String),
* Charset.encode(CharBuffer) or CharsetEncoder.encode(CharBuffer)
* Without them, the last few bytes may be missing.
*/
if (base64mode && useUglyHackToForceCallToFlushInJava5
&& out.limit() != MAX_BYTES_PER_CHAR * in.limit())
return CoderResult.OVERFLOW;
/* </HACK> */
return CoderResult.UNDERFLOW;
}
/**
* <p>
* Writes the bytes necessary to leave <i>base 64 mode</i>. This might
* include an unshift character.
* </p>
*
* @param out
* @param ch
*/
private void unshift(ByteBuffer out, char ch) {
if (!base64mode)
return;
if (bitsToOutput != 0)
out.put(base64.getChar(sextet));
if (base64.contains(ch) || ch == unshift || strict)
out.put(unshift);
base64mode = false;
sextet = 0;
bitsToOutput = 0;
}
/**
* <p>
* Writes the bytes necessary to encode a character in <i>base 64 mode</i>.
* All bytes which are fully determined will be written. The fields
* <code>bitsToOutput</code> and <code>sextet</code> are used to remember
* the bytes not yet fully determined.
* </p>
*
* @param out
* @param ch
*/
private void encodeBase64(char ch, ByteBuffer out) {
if (!base64mode)
out.put(shift);
base64mode = true;
bitsToOutput += 16;
while (bitsToOutput >= 6) {
bitsToOutput -= 6;
sextet += (ch >> bitsToOutput);
sextet &= 0x3F;
out.put(base64.getChar(sextet));
sextet = 0;
}
sextet = (ch << (6 - bitsToOutput)) & 0x3F;
}
}