ojluni/src/main/java/sun/nio/cs/ext/COMPOUND_TEXT_Decoder.java - platform/libcore - Git at Google

 /*
  * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */

 package sun.nio.cs.ext;
 import java.io.ByteArrayOutputStream;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.*;

 /**
  * An algorithmic conversion from COMPOUND_TEXT to Unicode.
  */

 public class COMPOUND_TEXT_Decoder extends CharsetDecoder {

     private static final int NORMAL_BYTES             =  0;
     private static final int NONSTANDARD_BYTES        =  1;
     private static final int VERSION_SEQUENCE_V       =  2;
     private static final int VERSION_SEQUENCE_TERM    =  3;
     private static final int ESCAPE_SEQUENCE          =  4;
     private static final int CHARSET_NGIIF            =  5;
     private static final int CHARSET_NLIIF            =  6;
     private static final int CHARSET_NLIF             =  7;
     private static final int CHARSET_NRIIF            =  8;
     private static final int CHARSET_NRIF             =  9;
     private static final int CHARSET_NONSTANDARD_FOML = 10;
     private static final int CHARSET_NONSTANDARD_OML  = 11;
     private static final int CHARSET_NONSTANDARD_ML   = 12;
     private static final int CHARSET_NONSTANDARD_L    = 13;
     private static final int CHARSET_NONSTANDARD      = 14;
     private static final int CHARSET_LIIF             = 15;
     private static final int CHARSET_LIF              = 16;
     private static final int CHARSET_RIIF             = 17;
     private static final int CHARSET_RIF              = 18;
     private static final int CONTROL_SEQUENCE_PIF     = 19;
     private static final int CONTROL_SEQUENCE_IF      = 20;
     private static final int EXTENSION_ML             = 21;
     private static final int EXTENSION_L              = 22;
     private static final int EXTENSION                = 23;
     private static final int ESCAPE_SEQUENCE_OTHER    = 24;

     private static final String ERR_LATIN1 = "ISO8859_1 unsupported";
     private static final String ERR_ILLSTATE = "Illegal state";
     private static final String ERR_ESCBYTE =
         "Illegal byte in 0x1B escape sequence";
     private static final String ERR_ENCODINGBYTE =
         "Illegal byte in non-standard character set name";
     private static final String ERR_CTRLBYTE =
         "Illegal byte in 0x9B control sequence";
     private static final String ERR_CTRLPI =
         "P following I in 0x9B control sequence";
     private static final String ERR_VERSTART =
         "Versioning escape sequence can only appear at start of byte stream";
     private static final String ERR_VERMANDATORY =
         "Cannot parse mandatory extensions";
     private static final String ERR_ENCODING = "Unknown encoding: ";
     private static final String ERR_FLUSH =
         "Escape sequence, control sequence, or ML extension not terminated";

     private int state = NORMAL_BYTES ;
     private int ext_count, ext_offset;
     private boolean versionSequenceAllowed = true;
     private byte[] byteBuf = new byte[1];
     private ByteBuffer inBB = ByteBuffer.allocate(16);
     private ByteArrayOutputStream queue = new ByteArrayOutputStream(),
         encodingQueue = new ByteArrayOutputStream();

     private CharsetDecoder glDecoder, grDecoder, nonStandardDecoder,
         lastDecoder;
     private boolean glHigh = false, grHigh = true;


     public COMPOUND_TEXT_Decoder(Charset cs) {
         super(cs, 1.0f, 1.0f);
         try {
             // Initial state in ISO 2022 designates Latin-1 charset.
             glDecoder = Charset.forName("ASCII").newDecoder();
             grDecoder = Charset.forName("ISO8859_1").newDecoder();
         } catch (IllegalArgumentException e) {
             error(ERR_LATIN1);
         }
         initDecoder(glDecoder);
         initDecoder(grDecoder);
     }

     protected CoderResult decodeLoop(ByteBuffer src, CharBuffer des) {
         CoderResult cr = CoderResult.UNDERFLOW;
         byte[] input = src.array();
         int inOff = src.arrayOffset() + src.position();
         int inEnd = src.arrayOffset() + src.limit();

         try {
             while (inOff < inEnd && cr.isUnderflow()) {
                 // Byte parsing is done with shorts instead of bytes because
                 // Java bytes are signed, while COMPOUND_TEXT bytes are not. If
                 // we used the Java byte type, the > and < tests during parsing
                 // would not work correctly.
                 cr = handleByte((short)(input[inOff] & 0xFF), des);
                 inOff++;
             }
             return cr;
         } finally {
             src.position(inOff - src.arrayOffset());
         }
     }

     private CoderResult handleByte(short newByte, CharBuffer cb) {
         CoderResult cr = CoderResult.UNDERFLOW;
         switch (state) {
         case NORMAL_BYTES:
             cr= normalBytes(newByte, cb);
             break;
         case NONSTANDARD_BYTES:
             cr = nonStandardBytes(newByte, cb);
             break;
         case VERSION_SEQUENCE_V:
         case VERSION_SEQUENCE_TERM:
             cr = versionSequence(newByte);
             break;
         case ESCAPE_SEQUENCE:
             cr = escapeSequence(newByte);
             break;
         case CHARSET_NGIIF:
             cr = charset94N(newByte);
             break;
         case CHARSET_NLIIF:
         case CHARSET_NLIF:
             cr = charset94NL(newByte, cb);
             break;
         case CHARSET_NRIIF:
         case CHARSET_NRIF:
             cr = charset94NR(newByte, cb);
             break;
         case CHARSET_NONSTANDARD_FOML:
         case CHARSET_NONSTANDARD_OML:
         case CHARSET_NONSTANDARD_ML:
         case CHARSET_NONSTANDARD_L:
         case CHARSET_NONSTANDARD:
             cr = charsetNonStandard(newByte, cb);
             break;
         case CHARSET_LIIF:
         case CHARSET_LIF:
             cr = charset9496L(newByte, cb);
             break;
         case CHARSET_RIIF:
         case CHARSET_RIF:
             cr = charset9496R(newByte, cb);
             break;
         case CONTROL_SEQUENCE_PIF:
         case CONTROL_SEQUENCE_IF:
             cr = controlSequence(newByte);
             break;
         case EXTENSION_ML:
         case EXTENSION_L:
         case EXTENSION:
             cr = extension(newByte);
             break;
         case ESCAPE_SEQUENCE_OTHER:
             cr = escapeSequenceOther(newByte);
             break;
         default:
             error(ERR_ILLSTATE);
         }
         return cr;
     }

     private CoderResult normalBytes(short newByte, CharBuffer cb) {
         CoderResult cr = CoderResult.UNDERFLOW;
         if ((newByte >= 0x00 && newByte <= 0x1F) || // C0
             (newByte >= 0x80 && newByte <= 0x9F)) { // C1
             char newChar;

             switch (newByte) {
             case 0x1B:
                 state = ESCAPE_SEQUENCE;
                 queue.write(newByte);
                 return cr;
             case 0x9B:
                 state = CONTROL_SEQUENCE_PIF;
                 versionSequenceAllowed = false;
                 queue.write(newByte);
                 return cr;
             case 0x09:
                 versionSequenceAllowed = false;
                 newChar = '\t';
                 break;
             case 0x0A:
                 versionSequenceAllowed = false;
                 newChar = '\n';
                 break;
             default:
                 versionSequenceAllowed = false;
                 return cr;
             }
             if (!cb.hasRemaining())
                 return CoderResult.OVERFLOW;
             else
                 cb.put(newChar);
         } else {
             CharsetDecoder decoder;
             boolean high;
             versionSequenceAllowed = false;

             if (newByte >= 0x20 && newByte <= 0x7F) {
                 decoder = glDecoder;
                 high = glHigh;
             } else /* if (newByte >= 0xA0 && newByte <= 0xFF) */ {
                 decoder = grDecoder;
                 high = grHigh;
             }
             if (lastDecoder != null && decoder != lastDecoder) {
                 cr = flushDecoder(lastDecoder, cb);
             }
             lastDecoder = decoder;

             if (decoder != null) {
                 byte b = (byte)newByte;
                 if (high) {
                     b |= 0x80;
                 } else {
                     b &= 0x7F;
                 }
                 inBB.put(b);
                 inBB.flip();
                 cr = decoder.decode(inBB, cb, false);
                 if (!inBB.hasRemaining() || cr.isMalformed()) {
                     inBB.clear();
                 } else {
                   int pos = inBB.limit();
                   inBB.clear();
                   inBB.position(pos);
                 }
             } else if (cb.remaining() < replacement().length()) {
                 cb.put(replacement());
             } else {
                 return CoderResult.OVERFLOW;
             }
         }
         return cr;
     }

     private CoderResult nonStandardBytes(short newByte, CharBuffer cb)
     {
         CoderResult cr = CoderResult.UNDERFLOW;
         if (nonStandardDecoder != null) {
             //byteBuf[0] = (byte)newByte;
             inBB.put((byte)newByte);
             inBB.flip();
             cr = nonStandardDecoder.decode(inBB, cb, false);
             if (!inBB.hasRemaining()) {
                 inBB.clear();
             } else {
                 int pos = inBB.limit();
                 inBB.clear();
                 inBB.position(pos);
             }
         } else if (cb.remaining() < replacement().length()) {
             cb.put(replacement());
         } else {
             return CoderResult.OVERFLOW;
         }

         ext_offset++;
         if (ext_offset >= ext_count) {
             ext_offset = ext_count = 0;
             state = NORMAL_BYTES;
             cr = flushDecoder(nonStandardDecoder, cb);
             nonStandardDecoder = null;
         }
         return cr;
     }

     private CoderResult escapeSequence(short newByte) {
         switch (newByte) {
         case 0x23:
             state = VERSION_SEQUENCE_V;
             break;
         case 0x24:
             state = CHARSET_NGIIF;
             versionSequenceAllowed = false;
             break;
         case 0x25:
             state = CHARSET_NONSTANDARD_FOML;
             versionSequenceAllowed = false;
             break;
         case 0x28:
             state = CHARSET_LIIF;
             versionSequenceAllowed = false;
             break;
         case 0x29:
         case 0x2D:
             state = CHARSET_RIIF;
             versionSequenceAllowed = false;
             break;
         default:
             // escapeSequenceOther will write to queue if appropriate
             return escapeSequenceOther(newByte);
         }

         queue.write(newByte);
         return CoderResult.UNDERFLOW;
     }

     /**
      * Test for unknown, but valid, escape sequences.
      */
     private CoderResult escapeSequenceOther(short newByte) {
         if (newByte >= 0x20 && newByte <= 0x2F) {
             // {I}
             state = ESCAPE_SEQUENCE_OTHER;
             versionSequenceAllowed = false;
             queue.write(newByte);
         } else if (newByte >= 0x30 && newByte <= 0x7E) {
             // F -- end of sequence
             state = NORMAL_BYTES;
             versionSequenceAllowed = false;
             queue.reset();
         } else {
             return malformedInput(ERR_ESCBYTE);
         }
         return CoderResult.UNDERFLOW;
     }

     /**
      * Parses directionality, as well as unknown, but valid, control sequences.
      */
     private CoderResult controlSequence(short newByte) {
         if (newByte >= 0x30 && newByte <= 0x3F) {
             // {P}
             if (state == CONTROL_SEQUENCE_IF) {
                 // P no longer allowed
                 return malformedInput(ERR_CTRLPI);
             }
             queue.write(newByte);
         } else if (newByte >= 0x20 && newByte <= 0x2F) {
             // {I}
             state = CONTROL_SEQUENCE_IF;
             queue.write(newByte);
         } else if (newByte >= 0x40 && newByte <= 0x7E) {
             // F -- end of sequence
             state = NORMAL_BYTES;
             queue.reset();
         } else {
             return malformedInput(ERR_CTRLBYTE);
         }
         return CoderResult.UNDERFLOW;
     }

     private CoderResult versionSequence(short newByte) {
         if (state == VERSION_SEQUENCE_V) {
             if (newByte >= 0x20 && newByte <= 0x2F) {
                 state = VERSION_SEQUENCE_TERM;
                 queue.write(newByte);
             } else {
                 return escapeSequenceOther(newByte);
             }
         } else /* if (state == VERSION_SEQUENCE_TERM) */ {
             switch (newByte) {
             case 0x30:
                 if (!versionSequenceAllowed) {
                     return malformedInput(ERR_VERSTART);
                 }

                 // OK to ignore extensions
                 versionSequenceAllowed = false;
                 state = NORMAL_BYTES;
                 queue.reset();
                 break;
             case 0x31:
                 return malformedInput((versionSequenceAllowed)
                                ? ERR_VERMANDATORY : ERR_VERSTART);
             default:
                 return escapeSequenceOther(newByte);
             }
         }
         return CoderResult.UNDERFLOW;
     }

     private CoderResult charset94N(short newByte) {
         switch (newByte) {
         case 0x28:
             state = CHARSET_NLIIF;
             break;
         case 0x29:
             state = CHARSET_NRIIF;
             break;
         default:
             // escapeSequenceOther will write byte if appropriate
             return escapeSequenceOther(newByte);
         }

         queue.write(newByte);
         return CoderResult.UNDERFLOW;
     }

     private CoderResult charset94NL(short newByte, CharBuffer cb) {
         if (newByte >= 0x21 &&
             newByte <= (state == CHARSET_NLIIF ? 0x23 : 0x2F)) {
             // {I}
             state = CHARSET_NLIF;
             queue.write(newByte);
         } else if (newByte >= 0x40 && newByte <= 0x7E) {
             // F
             return switchDecoder(newByte, cb);
         } else {
             return escapeSequenceOther(newByte);
         }
         return CoderResult.UNDERFLOW;
     }

     private CoderResult charset94NR(short newByte, CharBuffer cb)
     {
         if (newByte >= 0x21 &&
             newByte <= (state == CHARSET_NRIIF ? 0x23 : 0x2F)) {
             // {I}
             state = CHARSET_NRIF;
             queue.write(newByte);
         } else if (newByte >= 0x40 && newByte <= 0x7E) {
             // F
             return switchDecoder(newByte, cb);
         } else {
             return escapeSequenceOther(newByte);
         }
         return CoderResult.UNDERFLOW;
     }

     private CoderResult charset9496L(short newByte, CharBuffer cb) {
         if (newByte >= 0x21 &&
             newByte <= (state == CHARSET_LIIF ? 0x23 : 0x2F)) {
             // {I}
             state = CHARSET_LIF;
             queue.write(newByte);
             return CoderResult.UNDERFLOW;
         } else if (newByte >= 0x40 && newByte <= 0x7E) {
             // F
             return switchDecoder(newByte, cb);
         } else {
             return escapeSequenceOther(newByte);
         }
     }

     private CoderResult charset9496R(short newByte, CharBuffer cb) {
         if (newByte >= 0x21 &&
             newByte <= (state == CHARSET_RIIF ? 0x23 : 0x2F)) {
             // {I}
             state = CHARSET_RIF;
             queue.write(newByte);
             return CoderResult.UNDERFLOW;
         } else if (newByte >= 0x40 && newByte <= 0x7E) {
             // F
             return switchDecoder(newByte, cb);
         } else {
             return escapeSequenceOther(newByte);
         }
     }

     private CoderResult charsetNonStandard(short newByte, CharBuffer cb) {
         switch (state) {
         case CHARSET_NONSTANDARD_FOML:
             if (newByte == 0x2F) {
                 state = CHARSET_NONSTANDARD_OML;
                 queue.write(newByte);
             } else {
                 return escapeSequenceOther(newByte);
             }
             break;
         case CHARSET_NONSTANDARD_OML:
             if (newByte >= 0x30 && newByte <= 0x34) {
                 state = CHARSET_NONSTANDARD_ML;
                 queue.write(newByte);
             } else if (newByte >= 0x35 && newByte <= 0x3F) {
                 state = EXTENSION_ML;
                 queue.write(newByte);
             } else {
                 return escapeSequenceOther(newByte);
             }
             break;
         case CHARSET_NONSTANDARD_ML:
             ext_count = (newByte & 0x7F) * 0x80;
             state = CHARSET_NONSTANDARD_L;
             break;
         case CHARSET_NONSTANDARD_L:
             ext_count = ext_count + (newByte & 0x7F);
             state = (ext_count > 0) ? CHARSET_NONSTANDARD : NORMAL_BYTES;
             break;
         case CHARSET_NONSTANDARD:
             if (newByte == 0x3F || newByte == 0x2A) {
                 queue.reset(); // In this case, only current byte is bad.
                 return malformedInput(ERR_ENCODINGBYTE);
             }
             ext_offset++;
             if (ext_offset >= ext_count) {
                 ext_offset = ext_count = 0;
                 state = NORMAL_BYTES;
                 queue.reset();
                 encodingQueue.reset();
             } else if (newByte == 0x02) {
                 // encoding name terminator
                 return switchDecoder((short)0, cb);
             } else {
                 encodingQueue.write(newByte);
             }
             break;
         default:
             error(ERR_ILLSTATE);
         }
         return CoderResult.UNDERFLOW;
     }

     private CoderResult extension(short newByte) {
         switch (state) {
         case EXTENSION_ML:
             ext_count = (newByte & 0x7F) * 0x80;
             state = EXTENSION_L;
             break;
         case EXTENSION_L:
             ext_count = ext_count + (newByte & 0x7F);
             state = (ext_count > 0) ? EXTENSION : NORMAL_BYTES;
             break;
         case EXTENSION:
             // Consume 'count' bytes. Don't bother putting them on the queue.
             // There may be too many and we can't do anything with them anyway.
             ext_offset++;
             if (ext_offset >= ext_count) {
                 ext_offset = ext_count = 0;
                 state = NORMAL_BYTES;
                 queue.reset();
             }
             break;
         default:
             error(ERR_ILLSTATE);
         }
         return CoderResult.UNDERFLOW;
     }

     /**
      * Preconditions:
      *   1. 'queue' contains ControlSequence.escSequence
      *   2. 'encodingQueue' contains ControlSequence.encoding
      */
     private CoderResult switchDecoder(short lastByte, CharBuffer cb) {
         CoderResult cr = CoderResult.UNDERFLOW;
         CharsetDecoder decoder = null;
         boolean high = false;
         byte[] escSequence;
         byte[] encoding = null;

         if (lastByte != 0) {
             queue.write(lastByte);
         }

         escSequence = queue.toByteArray();
         queue.reset();

         if (state == CHARSET_NONSTANDARD) {
             encoding = encodingQueue.toByteArray();
             encodingQueue.reset();
             decoder = CompoundTextSupport.
                 getNonStandardDecoder(escSequence, encoding);
         } else {
             decoder = CompoundTextSupport.getStandardDecoder(escSequence);
             high = CompoundTextSupport.getHighBit(escSequence);
         }
         if (decoder != null) {
             initDecoder(decoder);
         } else if (unmappableCharacterAction() == CodingErrorAction.REPORT) {
             int badInputLength = 1;
             if (encoding != null) {
                 badInputLength = encoding.length;
             } else if (escSequence.length > 0) {
                 badInputLength = escSequence.length;
             }
             return CoderResult.unmappableForLength(badInputLength);
         }

         if (state == CHARSET_NLIIF || state == CHARSET_NLIF ||
             state == CHARSET_LIIF || state == CHARSET_LIF)
         {
             if (lastDecoder == glDecoder) {
                 cr = flushDecoder(glDecoder, cb);
             }
             glDecoder = lastDecoder = decoder;
             glHigh = high;
             state = NORMAL_BYTES;
         } else if (state == CHARSET_NRIIF || state == CHARSET_NRIF ||
                    state == CHARSET_RIIF || state == CHARSET_RIF) {
             if (lastDecoder == grDecoder) {
                 cr = flushDecoder(grDecoder, cb);
             }
             grDecoder = lastDecoder = decoder;
             grHigh = high;
             state = NORMAL_BYTES;
         } else if (state == CHARSET_NONSTANDARD) {
             if (lastDecoder != null) {
                 cr = flushDecoder(lastDecoder, cb);
                 lastDecoder = null;
             }
             nonStandardDecoder = decoder;
             state = NONSTANDARD_BYTES;
         } else {
             error(ERR_ILLSTATE);
         }
         return cr;
     }

     private ByteBuffer fbb= ByteBuffer.allocate(0);
     private CoderResult flushDecoder(CharsetDecoder dec, CharBuffer cb) {
         dec.decode(fbb, cb, true);
         CoderResult cr = dec.flush(cb);
         dec.reset();  //reuse
         return cr;
     }

     private CoderResult malformedInput(String msg) {
         int badInputLength = queue.size() + 1 /* current byte */ ;
         queue.reset();
         //TBD: nowhere to put the msg in CoderResult
         return CoderResult.malformedForLength(badInputLength);
     }

     private void error(String msg) {
         // For now, throw InternalError. Convert to 'assert' keyword later.
         throw new InternalError(msg);
     }

     protected CoderResult implFlush(CharBuffer out) {
         CoderResult cr = CoderResult.UNDERFLOW;
         if (lastDecoder != null)
           cr = flushDecoder(lastDecoder, out);
         if (state != NORMAL_BYTES)
             //TBD message ERR_FLUSH;
             cr = CoderResult.malformedForLength(0);
         reset();
         return cr;
     }

     /**
      * Resets the decoder.
      * Call this method to reset the decoder to its initial state
      */
     protected void implReset() {
         state = NORMAL_BYTES;
         ext_count = ext_offset = 0;
         versionSequenceAllowed = true;
         queue.reset();
         encodingQueue.reset();
         nonStandardDecoder = lastDecoder = null;
         glHigh = false;
         grHigh = true;
         try {
             // Initial state in ISO 2022 designates Latin-1 charset.
             glDecoder = Charset.forName("ASCII").newDecoder();
             grDecoder = Charset.forName("ISO8859_1").newDecoder();
         } catch (IllegalArgumentException e) {
             error(ERR_LATIN1);
         }
         initDecoder(glDecoder);
         initDecoder(grDecoder);
     }

     protected void implOnMalformedInput(CodingErrorAction newAction) {
         if (glDecoder != null)
             glDecoder.onMalformedInput(newAction);
         if (grDecoder != null)
             grDecoder.onMalformedInput(newAction);
         if (nonStandardDecoder != null)
             nonStandardDecoder.onMalformedInput(newAction);
     }

     protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
         if (glDecoder != null)
             glDecoder.onUnmappableCharacter(newAction);
         if (grDecoder != null)
             grDecoder.onUnmappableCharacter(newAction);
         if (nonStandardDecoder != null)
             nonStandardDecoder.onUnmappableCharacter(newAction);
     }

     protected void implReplaceWith(String newReplacement) {
         if (glDecoder != null)
             glDecoder.replaceWith(newReplacement);
         if (grDecoder != null)
             grDecoder.replaceWith(newReplacement);
         if (nonStandardDecoder != null)
             nonStandardDecoder.replaceWith(newReplacement);
     }

     private void initDecoder(CharsetDecoder dec) {
         dec.onUnmappableCharacter(CodingErrorAction.REPLACE)
             .replaceWith(replacement());
     }
 }
	/*
	* Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	package sun.nio.cs.ext;
	import java.io.ByteArrayOutputStream;
	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;
	import java.nio.charset.*;

	/**
	* An algorithmic conversion from COMPOUND_TEXT to Unicode.
	*/

	public class COMPOUND_TEXT_Decoder extends CharsetDecoder {

	private static final int NORMAL_BYTES = 0;
	private static final int NONSTANDARD_BYTES = 1;
	private static final int VERSION_SEQUENCE_V = 2;
	private static final int VERSION_SEQUENCE_TERM = 3;
	private static final int ESCAPE_SEQUENCE = 4;
	private static final int CHARSET_NGIIF = 5;
	private static final int CHARSET_NLIIF = 6;
	private static final int CHARSET_NLIF = 7;
	private static final int CHARSET_NRIIF = 8;
	private static final int CHARSET_NRIF = 9;
	private static final int CHARSET_NONSTANDARD_FOML = 10;
	private static final int CHARSET_NONSTANDARD_OML = 11;
	private static final int CHARSET_NONSTANDARD_ML = 12;
	private static final int CHARSET_NONSTANDARD_L = 13;
	private static final int CHARSET_NONSTANDARD = 14;
	private static final int CHARSET_LIIF = 15;
	private static final int CHARSET_LIF = 16;
	private static final int CHARSET_RIIF = 17;
	private static final int CHARSET_RIF = 18;
	private static final int CONTROL_SEQUENCE_PIF = 19;
	private static final int CONTROL_SEQUENCE_IF = 20;
	private static final int EXTENSION_ML = 21;
	private static final int EXTENSION_L = 22;
	private static final int EXTENSION = 23;
	private static final int ESCAPE_SEQUENCE_OTHER = 24;

	private static final String ERR_LATIN1 = "ISO8859_1 unsupported";
	private static final String ERR_ILLSTATE = "Illegal state";
	private static final String ERR_ESCBYTE =
	"Illegal byte in 0x1B escape sequence";
	private static final String ERR_ENCODINGBYTE =
	"Illegal byte in non-standard character set name";
	private static final String ERR_CTRLBYTE =
	"Illegal byte in 0x9B control sequence";
	private static final String ERR_CTRLPI =
	"P following I in 0x9B control sequence";
	private static final String ERR_VERSTART =
	"Versioning escape sequence can only appear at start of byte stream";
	private static final String ERR_VERMANDATORY =
	"Cannot parse mandatory extensions";
	private static final String ERR_ENCODING = "Unknown encoding: ";
	private static final String ERR_FLUSH =
	"Escape sequence, control sequence, or ML extension not terminated";

	private int state = NORMAL_BYTES ;
	private int ext_count, ext_offset;
	private boolean versionSequenceAllowed = true;
	private byte[] byteBuf = new byte[1];
	private ByteBuffer inBB = ByteBuffer.allocate(16);
	private ByteArrayOutputStream queue = new ByteArrayOutputStream(),
	encodingQueue = new ByteArrayOutputStream();

	private CharsetDecoder glDecoder, grDecoder, nonStandardDecoder,
	lastDecoder;
	private boolean glHigh = false, grHigh = true;


	public COMPOUND_TEXT_Decoder(Charset cs) {
	super(cs, 1.0f, 1.0f);
	try {
	// Initial state in ISO 2022 designates Latin-1 charset.
	glDecoder = Charset.forName("ASCII").newDecoder();
	grDecoder = Charset.forName("ISO8859_1").newDecoder();
	} catch (IllegalArgumentException e) {
	error(ERR_LATIN1);
	}
	initDecoder(glDecoder);
	initDecoder(grDecoder);
	}

	protected CoderResult decodeLoop(ByteBuffer src, CharBuffer des) {
	CoderResult cr = CoderResult.UNDERFLOW;
	byte[] input = src.array();
	int inOff = src.arrayOffset() + src.position();
	int inEnd = src.arrayOffset() + src.limit();

	try {
	while (inOff < inEnd && cr.isUnderflow()) {
	// Byte parsing is done with shorts instead of bytes because
	// Java bytes are signed, while COMPOUND_TEXT bytes are not. If
	// we used the Java byte type, the > and < tests during parsing
	// would not work correctly.
	cr = handleByte((short)(input[inOff] & 0xFF), des);
	inOff++;
	}
	return cr;
	} finally {
	src.position(inOff - src.arrayOffset());
	}
	}

	private CoderResult handleByte(short newByte, CharBuffer cb) {
	CoderResult cr = CoderResult.UNDERFLOW;
	switch (state) {
	case NORMAL_BYTES:
	cr= normalBytes(newByte, cb);
	break;
	case NONSTANDARD_BYTES:
	cr = nonStandardBytes(newByte, cb);
	break;
	case VERSION_SEQUENCE_V:
	case VERSION_SEQUENCE_TERM:
	cr = versionSequence(newByte);
	break;
	case ESCAPE_SEQUENCE:
	cr = escapeSequence(newByte);
	break;
	case CHARSET_NGIIF:
	cr = charset94N(newByte);
	break;
	case CHARSET_NLIIF:
	case CHARSET_NLIF:
	cr = charset94NL(newByte, cb);
	break;
	case CHARSET_NRIIF:
	case CHARSET_NRIF:
	cr = charset94NR(newByte, cb);
	break;
	case CHARSET_NONSTANDARD_FOML:
	case CHARSET_NONSTANDARD_OML:
	case CHARSET_NONSTANDARD_ML:
	case CHARSET_NONSTANDARD_L:
	case CHARSET_NONSTANDARD:
	cr = charsetNonStandard(newByte, cb);
	break;
	case CHARSET_LIIF:
	case CHARSET_LIF:
	cr = charset9496L(newByte, cb);
	break;
	case CHARSET_RIIF:
	case CHARSET_RIF:
	cr = charset9496R(newByte, cb);
	break;
	case CONTROL_SEQUENCE_PIF:
	case CONTROL_SEQUENCE_IF:
	cr = controlSequence(newByte);
	break;
	case EXTENSION_ML:
	case EXTENSION_L:
	case EXTENSION:
	cr = extension(newByte);
	break;
	case ESCAPE_SEQUENCE_OTHER:
	cr = escapeSequenceOther(newByte);
	break;
	default:
	error(ERR_ILLSTATE);
	}
	return cr;
	}

	private CoderResult normalBytes(short newByte, CharBuffer cb) {
	CoderResult cr = CoderResult.UNDERFLOW;
	if ((newByte >= 0x00 && newByte <= 0x1F) \|\| // C0
	(newByte >= 0x80 && newByte <= 0x9F)) { // C1
	char newChar;

	switch (newByte) {
	case 0x1B:
	state = ESCAPE_SEQUENCE;
	queue.write(newByte);
	return cr;
	case 0x9B:
	state = CONTROL_SEQUENCE_PIF;
	versionSequenceAllowed = false;
	queue.write(newByte);
	return cr;
	case 0x09:
	versionSequenceAllowed = false;
	newChar = '\t';
	break;
	case 0x0A:
	versionSequenceAllowed = false;
	newChar = '\n';
	break;
	default:
	versionSequenceAllowed = false;
	return cr;
	}
	if (!cb.hasRemaining())
	return CoderResult.OVERFLOW;
	else
	cb.put(newChar);
	} else {
	CharsetDecoder decoder;
	boolean high;
	versionSequenceAllowed = false;

	if (newByte >= 0x20 && newByte <= 0x7F) {
	decoder = glDecoder;
	high = glHigh;
	} else /* if (newByte >= 0xA0 && newByte <= 0xFF) */ {
	decoder = grDecoder;
	high = grHigh;
	}
	if (lastDecoder != null && decoder != lastDecoder) {
	cr = flushDecoder(lastDecoder, cb);
	}
	lastDecoder = decoder;

	if (decoder != null) {
	byte b = (byte)newByte;
	if (high) {
	b \|= 0x80;
	} else {
	b &= 0x7F;
	}
	inBB.put(b);
	inBB.flip();
	cr = decoder.decode(inBB, cb, false);
	if (!inBB.hasRemaining() \|\| cr.isMalformed()) {
	inBB.clear();
	} else {
	int pos = inBB.limit();
	inBB.clear();
	inBB.position(pos);
	}
	} else if (cb.remaining() < replacement().length()) {
	cb.put(replacement());
	} else {
	return CoderResult.OVERFLOW;
	}
	}
	return cr;
	}

	private CoderResult nonStandardBytes(short newByte, CharBuffer cb)
	{
	CoderResult cr = CoderResult.UNDERFLOW;
	if (nonStandardDecoder != null) {
	//byteBuf[0] = (byte)newByte;
	inBB.put((byte)newByte);
	inBB.flip();
	cr = nonStandardDecoder.decode(inBB, cb, false);
	if (!inBB.hasRemaining()) {
	inBB.clear();
	} else {
	int pos = inBB.limit();
	inBB.clear();
	inBB.position(pos);
	}
	} else if (cb.remaining() < replacement().length()) {
	cb.put(replacement());
	} else {
	return CoderResult.OVERFLOW;
	}

	ext_offset++;
	if (ext_offset >= ext_count) {
	ext_offset = ext_count = 0;
	state = NORMAL_BYTES;
	cr = flushDecoder(nonStandardDecoder, cb);
	nonStandardDecoder = null;
	}
	return cr;
	}

	private CoderResult escapeSequence(short newByte) {
	switch (newByte) {
	case 0x23:
	state = VERSION_SEQUENCE_V;
	break;
	case 0x24:
	state = CHARSET_NGIIF;
	versionSequenceAllowed = false;
	break;
	case 0x25:
	state = CHARSET_NONSTANDARD_FOML;
	versionSequenceAllowed = false;
	break;
	case 0x28:
	state = CHARSET_LIIF;
	versionSequenceAllowed = false;
	break;
	case 0x29:
	case 0x2D:
	state = CHARSET_RIIF;
	versionSequenceAllowed = false;
	break;
	default:
	// escapeSequenceOther will write to queue if appropriate
	return escapeSequenceOther(newByte);
	}

	queue.write(newByte);
	return CoderResult.UNDERFLOW;
	}

	/**
	* Test for unknown, but valid, escape sequences.
	*/
	private CoderResult escapeSequenceOther(short newByte) {
	if (newByte >= 0x20 && newByte <= 0x2F) {
	// {I}
	state = ESCAPE_SEQUENCE_OTHER;
	versionSequenceAllowed = false;
	queue.write(newByte);
	} else if (newByte >= 0x30 && newByte <= 0x7E) {
	// F -- end of sequence
	state = NORMAL_BYTES;
	versionSequenceAllowed = false;
	queue.reset();
	} else {
	return malformedInput(ERR_ESCBYTE);
	}
	return CoderResult.UNDERFLOW;
	}

	/**
	* Parses directionality, as well as unknown, but valid, control sequences.
	*/
	private CoderResult controlSequence(short newByte) {
	if (newByte >= 0x30 && newByte <= 0x3F) {
	// {P}
	if (state == CONTROL_SEQUENCE_IF) {
	// P no longer allowed
	return malformedInput(ERR_CTRLPI);
	}
	queue.write(newByte);
	} else if (newByte >= 0x20 && newByte <= 0x2F) {
	// {I}
	state = CONTROL_SEQUENCE_IF;
	queue.write(newByte);
	} else if (newByte >= 0x40 && newByte <= 0x7E) {
	// F -- end of sequence
	state = NORMAL_BYTES;
	queue.reset();
	} else {
	return malformedInput(ERR_CTRLBYTE);
	}
	return CoderResult.UNDERFLOW;
	}

	private CoderResult versionSequence(short newByte) {
	if (state == VERSION_SEQUENCE_V) {
	if (newByte >= 0x20 && newByte <= 0x2F) {
	state = VERSION_SEQUENCE_TERM;
	queue.write(newByte);
	} else {
	return escapeSequenceOther(newByte);
	}
	} else /* if (state == VERSION_SEQUENCE_TERM) */ {
	switch (newByte) {
	case 0x30:
	if (!versionSequenceAllowed) {
	return malformedInput(ERR_VERSTART);
	}

	// OK to ignore extensions
	versionSequenceAllowed = false;
	state = NORMAL_BYTES;
	queue.reset();
	break;
	case 0x31:
	return malformedInput((versionSequenceAllowed)
	? ERR_VERMANDATORY : ERR_VERSTART);
	default:
	return escapeSequenceOther(newByte);
	}
	}
	return CoderResult.UNDERFLOW;
	}

	private CoderResult charset94N(short newByte) {
	switch (newByte) {
	case 0x28:
	state = CHARSET_NLIIF;
	break;
	case 0x29:
	state = CHARSET_NRIIF;
	break;
	default:
	// escapeSequenceOther will write byte if appropriate
	return escapeSequenceOther(newByte);
	}

	queue.write(newByte);
	return CoderResult.UNDERFLOW;
	}

	private CoderResult charset94NL(short newByte, CharBuffer cb) {
	if (newByte >= 0x21 &&
	newByte <= (state == CHARSET_NLIIF ? 0x23 : 0x2F)) {
	// {I}
	state = CHARSET_NLIF;
	queue.write(newByte);
	} else if (newByte >= 0x40 && newByte <= 0x7E) {
	// F
	return switchDecoder(newByte, cb);
	} else {
	return escapeSequenceOther(newByte);
	}
	return CoderResult.UNDERFLOW;
	}

	private CoderResult charset94NR(short newByte, CharBuffer cb)
	{
	if (newByte >= 0x21 &&
	newByte <= (state == CHARSET_NRIIF ? 0x23 : 0x2F)) {
	// {I}
	state = CHARSET_NRIF;
	queue.write(newByte);
	} else if (newByte >= 0x40 && newByte <= 0x7E) {
	// F
	return switchDecoder(newByte, cb);
	} else {
	return escapeSequenceOther(newByte);
	}
	return CoderResult.UNDERFLOW;
	}

	private CoderResult charset9496L(short newByte, CharBuffer cb) {
	if (newByte >= 0x21 &&
	newByte <= (state == CHARSET_LIIF ? 0x23 : 0x2F)) {
	// {I}
	state = CHARSET_LIF;
	queue.write(newByte);
	return CoderResult.UNDERFLOW;
	} else if (newByte >= 0x40 && newByte <= 0x7E) {
	// F
	return switchDecoder(newByte, cb);
	} else {
	return escapeSequenceOther(newByte);
	}
	}

	private CoderResult charset9496R(short newByte, CharBuffer cb) {
	if (newByte >= 0x21 &&
	newByte <= (state == CHARSET_RIIF ? 0x23 : 0x2F)) {
	// {I}
	state = CHARSET_RIF;
	queue.write(newByte);
	return CoderResult.UNDERFLOW;
	} else if (newByte >= 0x40 && newByte <= 0x7E) {
	// F
	return switchDecoder(newByte, cb);
	} else {
	return escapeSequenceOther(newByte);
	}
	}

	private CoderResult charsetNonStandard(short newByte, CharBuffer cb) {
	switch (state) {
	case CHARSET_NONSTANDARD_FOML:
	if (newByte == 0x2F) {
	state = CHARSET_NONSTANDARD_OML;
	queue.write(newByte);
	} else {
	return escapeSequenceOther(newByte);
	}
	break;
	case CHARSET_NONSTANDARD_OML:
	if (newByte >= 0x30 && newByte <= 0x34) {
	state = CHARSET_NONSTANDARD_ML;
	queue.write(newByte);
	} else if (newByte >= 0x35 && newByte <= 0x3F) {
	state = EXTENSION_ML;
	queue.write(newByte);
	} else {
	return escapeSequenceOther(newByte);
	}
	break;
	case CHARSET_NONSTANDARD_ML:
	ext_count = (newByte & 0x7F) * 0x80;
	state = CHARSET_NONSTANDARD_L;
	break;
	case CHARSET_NONSTANDARD_L:
	ext_count = ext_count + (newByte & 0x7F);
	state = (ext_count > 0) ? CHARSET_NONSTANDARD : NORMAL_BYTES;
	break;
	case CHARSET_NONSTANDARD:
	if (newByte == 0x3F \|\| newByte == 0x2A) {
	queue.reset(); // In this case, only current byte is bad.
	return malformedInput(ERR_ENCODINGBYTE);
	}
	ext_offset++;
	if (ext_offset >= ext_count) {
	ext_offset = ext_count = 0;
	state = NORMAL_BYTES;
	queue.reset();
	encodingQueue.reset();
	} else if (newByte == 0x02) {
	// encoding name terminator
	return switchDecoder((short)0, cb);
	} else {
	encodingQueue.write(newByte);
	}
	break;
	default:
	error(ERR_ILLSTATE);
	}
	return CoderResult.UNDERFLOW;
	}

	private CoderResult extension(short newByte) {
	switch (state) {
	case EXTENSION_ML:
	ext_count = (newByte & 0x7F) * 0x80;
	state = EXTENSION_L;
	break;
	case EXTENSION_L:
	ext_count = ext_count + (newByte & 0x7F);
	state = (ext_count > 0) ? EXTENSION : NORMAL_BYTES;
	break;
	case EXTENSION:
	// Consume 'count' bytes. Don't bother putting them on the queue.
	// There may be too many and we can't do anything with them anyway.
	ext_offset++;
	if (ext_offset >= ext_count) {
	ext_offset = ext_count = 0;
	state = NORMAL_BYTES;
	queue.reset();
	}
	break;
	default:
	error(ERR_ILLSTATE);
	}
	return CoderResult.UNDERFLOW;
	}

	/**
	* Preconditions:
	* 1. 'queue' contains ControlSequence.escSequence
	* 2. 'encodingQueue' contains ControlSequence.encoding
	*/
	private CoderResult switchDecoder(short lastByte, CharBuffer cb) {
	CoderResult cr = CoderResult.UNDERFLOW;
	CharsetDecoder decoder = null;
	boolean high = false;
	byte[] escSequence;
	byte[] encoding = null;

	if (lastByte != 0) {
	queue.write(lastByte);
	}

	escSequence = queue.toByteArray();
	queue.reset();

	if (state == CHARSET_NONSTANDARD) {
	encoding = encodingQueue.toByteArray();
	encodingQueue.reset();
	decoder = CompoundTextSupport.
	getNonStandardDecoder(escSequence, encoding);
	} else {
	decoder = CompoundTextSupport.getStandardDecoder(escSequence);
	high = CompoundTextSupport.getHighBit(escSequence);
	}
	if (decoder != null) {
	initDecoder(decoder);
	} else if (unmappableCharacterAction() == CodingErrorAction.REPORT) {
	int badInputLength = 1;
	if (encoding != null) {
	badInputLength = encoding.length;
	} else if (escSequence.length > 0) {
	badInputLength = escSequence.length;
	}
	return CoderResult.unmappableForLength(badInputLength);
	}

	if (state == CHARSET_NLIIF \|\| state == CHARSET_NLIF \|\|
	state == CHARSET_LIIF \|\| state == CHARSET_LIF)
	{
	if (lastDecoder == glDecoder) {
	cr = flushDecoder(glDecoder, cb);
	}
	glDecoder = lastDecoder = decoder;
	glHigh = high;
	state = NORMAL_BYTES;
	} else if (state == CHARSET_NRIIF \|\| state == CHARSET_NRIF \|\|
	state == CHARSET_RIIF \|\| state == CHARSET_RIF) {
	if (lastDecoder == grDecoder) {
	cr = flushDecoder(grDecoder, cb);
	}
	grDecoder = lastDecoder = decoder;
	grHigh = high;
	state = NORMAL_BYTES;
	} else if (state == CHARSET_NONSTANDARD) {
	if (lastDecoder != null) {
	cr = flushDecoder(lastDecoder, cb);
	lastDecoder = null;
	}
	nonStandardDecoder = decoder;
	state = NONSTANDARD_BYTES;
	} else {
	error(ERR_ILLSTATE);
	}
	return cr;
	}

	private ByteBuffer fbb= ByteBuffer.allocate(0);
	private CoderResult flushDecoder(CharsetDecoder dec, CharBuffer cb) {
	dec.decode(fbb, cb, true);
	CoderResult cr = dec.flush(cb);
	dec.reset(); //reuse
	return cr;
	}

	private CoderResult malformedInput(String msg) {
	int badInputLength = queue.size() + 1 /* current byte */ ;
	queue.reset();
	//TBD: nowhere to put the msg in CoderResult
	return CoderResult.malformedForLength(badInputLength);
	}

	private void error(String msg) {
	// For now, throw InternalError. Convert to 'assert' keyword later.
	throw new InternalError(msg);
	}

	protected CoderResult implFlush(CharBuffer out) {
	CoderResult cr = CoderResult.UNDERFLOW;
	if (lastDecoder != null)
	cr = flushDecoder(lastDecoder, out);
	if (state != NORMAL_BYTES)
	//TBD message ERR_FLUSH;
	cr = CoderResult.malformedForLength(0);
	reset();
	return cr;
	}

	/**
	* Resets the decoder.
	* Call this method to reset the decoder to its initial state
	*/
	protected void implReset() {
	state = NORMAL_BYTES;
	ext_count = ext_offset = 0;
	versionSequenceAllowed = true;
	queue.reset();
	encodingQueue.reset();
	nonStandardDecoder = lastDecoder = null;
	glHigh = false;
	grHigh = true;
	try {
	// Initial state in ISO 2022 designates Latin-1 charset.
	glDecoder = Charset.forName("ASCII").newDecoder();
	grDecoder = Charset.forName("ISO8859_1").newDecoder();
	} catch (IllegalArgumentException e) {
	error(ERR_LATIN1);
	}
	initDecoder(glDecoder);
	initDecoder(grDecoder);
	}

	protected void implOnMalformedInput(CodingErrorAction newAction) {
	if (glDecoder != null)
	glDecoder.onMalformedInput(newAction);
	if (grDecoder != null)
	grDecoder.onMalformedInput(newAction);
	if (nonStandardDecoder != null)
	nonStandardDecoder.onMalformedInput(newAction);
	}

	protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
	if (glDecoder != null)
	glDecoder.onUnmappableCharacter(newAction);
	if (grDecoder != null)
	grDecoder.onUnmappableCharacter(newAction);
	if (nonStandardDecoder != null)
	nonStandardDecoder.onUnmappableCharacter(newAction);
	}

	protected void implReplaceWith(String newReplacement) {
	if (glDecoder != null)
	glDecoder.replaceWith(newReplacement);
	if (grDecoder != null)
	grDecoder.replaceWith(newReplacement);
	if (nonStandardDecoder != null)
	nonStandardDecoder.replaceWith(newReplacement);
	}

	private void initDecoder(CharsetDecoder dec) {
	dec.onUnmappableCharacter(CodingErrorAction.REPLACE)
	.replaceWith(replacement());
	}
	}