| package com.fasterxml.jackson.core.json; |
| |
| import java.io.*; |
| import java.util.Arrays; |
| |
| import com.fasterxml.jackson.core.*; |
| import com.fasterxml.jackson.core.base.ParserBase; |
| import com.fasterxml.jackson.core.io.CharTypes; |
| import com.fasterxml.jackson.core.io.IOContext; |
| import com.fasterxml.jackson.core.sym.*; |
| import com.fasterxml.jackson.core.util.*; |
| |
| import static com.fasterxml.jackson.core.JsonTokenId.*; |
| |
| /** |
| * This is a concrete implementation of {@link JsonParser}, which is |
| * based on a {@link java.io.InputStream} as the input source. |
| *<p> |
| * Note: non-final since version 2.3. |
| */ |
| public class UTF8StreamJsonParser |
| extends ParserBase |
| { |
| final static byte BYTE_LF = (byte) '\n'; |
| |
| // This is the main input-code lookup table, fetched eagerly |
| private final static int[] _icUTF8 = CharTypes.getInputCodeUtf8(); |
| |
| // Latin1 encoding is not supported, but we do use 8-bit subset for |
| // pre-processing task, to simplify first pass, keep it fast. |
| protected final static int[] _icLatin1 = CharTypes.getInputCodeLatin1(); |
| |
| // White-space processing is done all the time, pre-fetch as well |
| // private final static int[] _icWS = CharTypes.getInputCodeWS(); |
| |
| /* |
| /********************************************************** |
| /* Configuration |
| /********************************************************** |
| */ |
| |
| /** |
| * Codec used for data binding when (if) requested; typically full |
| * <code>ObjectMapper</code>, but that abstract is not part of core |
| * package. |
| */ |
| protected ObjectCodec _objectCodec; |
| |
| /** |
| * Symbol table that contains field names encountered so far |
| */ |
| final protected BytesToNameCanonicalizer _symbols; |
| |
| /* |
| /********************************************************** |
| /* Parsing state |
| /********************************************************** |
| */ |
| |
| /** |
| * Temporary buffer used for name parsing. |
| */ |
| protected int[] _quadBuffer = new int[16]; |
| |
| /** |
| * Flag that indicates that the current token has not yet |
| * been fully processed, and needs to be finished for |
| * some access (or skipped to obtain the next token) |
| */ |
| protected boolean _tokenIncomplete = false; |
| |
| /** |
| * Temporary storage for partially parsed name bytes. |
| */ |
| private int _quad1; |
| |
| /* |
| /********************************************************** |
| /* Input buffering (from former 'StreamBasedParserBase') |
| /********************************************************** |
| */ |
| |
| protected InputStream _inputStream; |
| |
| /* |
| /********************************************************** |
| /* Current input data |
| /********************************************************** |
| */ |
| |
| /** |
| * Current buffer from which data is read; generally data is read into |
| * buffer from input source, but in some cases pre-loaded buffer |
| * is handed to the parser. |
| */ |
| protected byte[] _inputBuffer; |
| |
| /** |
| * Flag that indicates whether the input buffer is recycable (and |
| * needs to be returned to recycler once we are done) or not. |
| *<p> |
| * If it is not, it also means that parser can NOT modify underlying |
| * buffer. |
| */ |
| protected boolean _bufferRecyclable; |
| |
| /* |
| /********************************************************** |
| /* Life-cycle |
| /********************************************************** |
| */ |
| |
| public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in, |
| ObjectCodec codec, BytesToNameCanonicalizer sym, |
| byte[] inputBuffer, int start, int end, |
| boolean bufferRecyclable) |
| { |
| super(ctxt, features); |
| _inputStream = in; |
| _objectCodec = codec; |
| _symbols = sym; |
| _inputBuffer = inputBuffer; |
| _inputPtr = start; |
| _inputEnd = end; |
| _currInputRowStart = start; |
| // If we have offset, need to omit that from byte offset, so: |
| _currInputProcessed = -start; |
| _bufferRecyclable = bufferRecyclable; |
| } |
| |
| @Override |
| public ObjectCodec getCodec() { |
| return _objectCodec; |
| } |
| |
| @Override |
| public void setCodec(ObjectCodec c) { |
| _objectCodec = c; |
| } |
| |
| /* |
| /********************************************************** |
| /* Overrides for life-cycle |
| /********************************************************** |
| */ |
| |
| @Override |
| public int releaseBuffered(OutputStream out) throws IOException |
| { |
| int count = _inputEnd - _inputPtr; |
| if (count < 1) { |
| return 0; |
| } |
| // let's just advance ptr to end |
| int origPtr = _inputPtr; |
| out.write(_inputBuffer, origPtr, count); |
| return count; |
| } |
| |
| @Override |
| public Object getInputSource() { |
| return _inputStream; |
| } |
| |
| /* |
| /********************************************************** |
| /* Overrides, low-level reading |
| /********************************************************** |
| */ |
| |
| @Override |
| protected final boolean loadMore() |
| throws IOException |
| { |
| _currInputProcessed += _inputEnd; |
| _currInputRowStart -= _inputEnd; |
| |
| if (_inputStream != null) { |
| int count = _inputStream.read(_inputBuffer, 0, _inputBuffer.length); |
| if (count > 0) { |
| _inputPtr = 0; |
| _inputEnd = count; |
| return true; |
| } |
| // End of input |
| _closeInput(); |
| // Should never return 0, so let's fail |
| if (count == 0) { |
| throw new IOException("InputStream.read() returned 0 characters when trying to read "+_inputBuffer.length+" bytes"); |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Helper method that will try to load at least specified number bytes in |
| * input buffer, possible moving existing data around if necessary |
| */ |
| protected final boolean _loadToHaveAtLeast(int minAvailable) |
| throws IOException |
| { |
| // No input stream, no leading (either we are closed, or have non-stream input source) |
| if (_inputStream == null) { |
| return false; |
| } |
| // Need to move remaining data in front? |
| int amount = _inputEnd - _inputPtr; |
| if (amount > 0 && _inputPtr > 0) { |
| _currInputProcessed += _inputPtr; |
| _currInputRowStart -= _inputPtr; |
| System.arraycopy(_inputBuffer, _inputPtr, _inputBuffer, 0, amount); |
| _inputEnd = amount; |
| } else { |
| _inputEnd = 0; |
| } |
| _inputPtr = 0; |
| while (_inputEnd < minAvailable) { |
| int count = _inputStream.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd); |
| if (count < 1) { |
| // End of input |
| _closeInput(); |
| // Should never return 0, so let's fail |
| if (count == 0) { |
| throw new IOException("InputStream.read() returned 0 characters when trying to read "+amount+" bytes"); |
| } |
| return false; |
| } |
| _inputEnd += count; |
| } |
| return true; |
| } |
| |
| @Override |
| protected void _closeInput() throws IOException |
| { |
| /* 25-Nov-2008, tatus: As per [JACKSON-16] we are not to call close() |
| * on the underlying InputStream, unless we "own" it, or auto-closing |
| * feature is enabled. |
| */ |
| if (_inputStream != null) { |
| if (_ioContext.isResourceManaged() || isEnabled(Feature.AUTO_CLOSE_SOURCE)) { |
| _inputStream.close(); |
| } |
| _inputStream = null; |
| } |
| } |
| |
| /** |
| * Method called to release internal buffers owned by the base |
| * reader. This may be called along with {@link #_closeInput} (for |
| * example, when explicitly closing this reader instance), or |
| * separately (if need be). |
| */ |
| @Override |
| protected void _releaseBuffers() throws IOException |
| { |
| super._releaseBuffers(); |
| // Merge found symbols, if any: |
| _symbols.release(); |
| if (_bufferRecyclable) { |
| byte[] buf = _inputBuffer; |
| if (buf != null) { |
| _inputBuffer = null; |
| _ioContext.releaseReadIOBuffer(buf); |
| } |
| } |
| } |
| |
| /* |
| /********************************************************** |
| /* Public API, data access |
| /********************************************************** |
| */ |
| |
| @Override |
| public String getText() |
| throws IOException, JsonParseException |
| { |
| if (_currToken == JsonToken.VALUE_STRING) { |
| if (_tokenIncomplete) { |
| _tokenIncomplete = false; |
| _finishString(); // only strings can be incomplete |
| } |
| return _textBuffer.contentsAsString(); |
| } |
| return _getText2(_currToken); |
| } |
| |
| // // // Let's override default impls for improved performance |
| |
| // @since 2.1 |
| @Override |
| public String getValueAsString() throws IOException, JsonParseException |
| { |
| if (_currToken == JsonToken.VALUE_STRING) { |
| if (_tokenIncomplete) { |
| _tokenIncomplete = false; |
| _finishString(); // only strings can be incomplete |
| } |
| return _textBuffer.contentsAsString(); |
| } |
| return super.getValueAsString(null); |
| } |
| |
| // @since 2.1 |
| @Override |
| public String getValueAsString(String defValue) throws IOException, JsonParseException |
| { |
| if (_currToken == JsonToken.VALUE_STRING) { |
| if (_tokenIncomplete) { |
| _tokenIncomplete = false; |
| _finishString(); // only strings can be incomplete |
| } |
| return _textBuffer.contentsAsString(); |
| } |
| return super.getValueAsString(defValue); |
| } |
| |
| protected final String _getText2(JsonToken t) |
| { |
| if (t == null) { |
| return null; |
| } |
| switch (t.id()) { |
| case ID_FIELD_NAME: |
| return _parsingContext.getCurrentName(); |
| |
| case ID_STRING: |
| // fall through |
| case ID_NUMBER_INT: |
| case ID_NUMBER_FLOAT: |
| return _textBuffer.contentsAsString(); |
| default: |
| return t.asString(); |
| } |
| } |
| |
| @Override |
| public char[] getTextCharacters() |
| throws IOException, JsonParseException |
| { |
| if (_currToken != null) { // null only before/after document |
| switch (_currToken.id()) { |
| |
| case ID_FIELD_NAME: |
| if (!_nameCopied) { |
| String name = _parsingContext.getCurrentName(); |
| int nameLen = name.length(); |
| if (_nameCopyBuffer == null) { |
| _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen); |
| } else if (_nameCopyBuffer.length < nameLen) { |
| _nameCopyBuffer = new char[nameLen]; |
| } |
| name.getChars(0, nameLen, _nameCopyBuffer, 0); |
| _nameCopied = true; |
| } |
| return _nameCopyBuffer; |
| |
| case ID_STRING: |
| if (_tokenIncomplete) { |
| _tokenIncomplete = false; |
| _finishString(); // only strings can be incomplete |
| } |
| // fall through |
| case ID_NUMBER_INT: |
| case ID_NUMBER_FLOAT: |
| return _textBuffer.getTextBuffer(); |
| |
| default: |
| return _currToken.asCharArray(); |
| } |
| } |
| return null; |
| } |
| |
| @Override |
| public int getTextLength() |
| throws IOException, JsonParseException |
| { |
| if (_currToken != null) { // null only before/after document |
| switch (_currToken.id()) { |
| |
| case ID_FIELD_NAME: |
| return _parsingContext.getCurrentName().length(); |
| case ID_STRING: |
| if (_tokenIncomplete) { |
| _tokenIncomplete = false; |
| _finishString(); // only strings can be incomplete |
| } |
| // fall through |
| case ID_NUMBER_INT: |
| case ID_NUMBER_FLOAT: |
| return _textBuffer.size(); |
| |
| default: |
| return _currToken.asCharArray().length; |
| } |
| } |
| return 0; |
| } |
| |
| @Override |
| public int getTextOffset() throws IOException, JsonParseException |
| { |
| // Most have offset of 0, only some may have other values: |
| if (_currToken != null) { |
| switch (_currToken.id()) { |
| case ID_FIELD_NAME: |
| return 0; |
| case ID_STRING: |
| if (_tokenIncomplete) { |
| _tokenIncomplete = false; |
| _finishString(); // only strings can be incomplete |
| } |
| // fall through |
| case ID_NUMBER_INT: |
| case ID_NUMBER_FLOAT: |
| return _textBuffer.getTextOffset(); |
| default: |
| } |
| } |
| return 0; |
| } |
| |
| @Override |
| public byte[] getBinaryValue(Base64Variant b64variant) |
| throws IOException, JsonParseException |
| { |
| if (_currToken != JsonToken.VALUE_STRING && |
| (_currToken != JsonToken.VALUE_EMBEDDED_OBJECT || _binaryValue == null)) { |
| _reportError("Current token ("+_currToken+") not VALUE_STRING or VALUE_EMBEDDED_OBJECT, can not access as binary"); |
| } |
| /* To ensure that we won't see inconsistent data, better clear up |
| * state... |
| */ |
| if (_tokenIncomplete) { |
| try { |
| _binaryValue = _decodeBase64(b64variant); |
| } catch (IllegalArgumentException iae) { |
| throw _constructError("Failed to decode VALUE_STRING as base64 ("+b64variant+"): "+iae.getMessage()); |
| } |
| /* let's clear incomplete only now; allows for accessing other |
| * textual content in error cases |
| */ |
| _tokenIncomplete = false; |
| } else { // may actually require conversion... |
| if (_binaryValue == null) { |
| @SuppressWarnings("resource") |
| ByteArrayBuilder builder = _getByteArrayBuilder(); |
| _decodeBase64(getText(), builder, b64variant); |
| _binaryValue = builder.toByteArray(); |
| } |
| } |
| return _binaryValue; |
| } |
| |
| @Override |
| public int readBinaryValue(Base64Variant b64variant, OutputStream out) |
| throws IOException, JsonParseException |
| { |
| // if we have already read the token, just use whatever we may have |
| if (!_tokenIncomplete || _currToken != JsonToken.VALUE_STRING) { |
| byte[] b = getBinaryValue(b64variant); |
| out.write(b); |
| return b.length; |
| } |
| // otherwise do "real" incremental parsing... |
| byte[] buf = _ioContext.allocBase64Buffer(); |
| try { |
| return _readBinary(b64variant, out, buf); |
| } finally { |
| _ioContext.releaseBase64Buffer(buf); |
| } |
| } |
| |
| protected int _readBinary(Base64Variant b64variant, OutputStream out, |
| byte[] buffer) |
| throws IOException, JsonParseException |
| { |
| int outputPtr = 0; |
| final int outputEnd = buffer.length - 3; |
| int outputCount = 0; |
| |
| while (true) { |
| // first, we'll skip preceding white space, if any |
| int ch; |
| do { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| } while (ch <= INT_SPACE); |
| int bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { // reached the end, fair and square? |
| if (ch == INT_QUOTE) { |
| break; |
| } |
| bits = _decodeBase64Escape(b64variant, ch, 0); |
| if (bits < 0) { // white space to skip |
| continue; |
| } |
| } |
| |
| // enough room? If not, flush |
| if (outputPtr > outputEnd) { |
| outputCount += outputPtr; |
| out.write(buffer, 0, outputPtr); |
| outputPtr = 0; |
| } |
| |
| int decodedData = bits; |
| |
| // then second base64 char; can't get padding yet, nor ws |
| |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { |
| bits = _decodeBase64Escape(b64variant, ch, 1); |
| } |
| decodedData = (decodedData << 6) | bits; |
| |
| // third base64 char; can be padding, but not ws |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| bits = b64variant.decodeBase64Char(ch); |
| |
| // First branch: can get padding (-> 1 byte) |
| if (bits < 0) { |
| if (bits != Base64Variant.BASE64_VALUE_PADDING) { |
| // as per [JACKSON-631], could also just be 'missing' padding |
| if (ch == '"' && !b64variant.usesPadding()) { |
| decodedData >>= 4; |
| buffer[outputPtr++] = (byte) decodedData; |
| break; |
| } |
| bits = _decodeBase64Escape(b64variant, ch, 2); |
| } |
| if (bits == Base64Variant.BASE64_VALUE_PADDING) { |
| // Ok, must get padding |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| if (!b64variant.usesPaddingChar(ch)) { |
| throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); |
| } |
| // Got 12 bits, only need 8, need to shift |
| decodedData >>= 4; |
| buffer[outputPtr++] = (byte) decodedData; |
| continue; |
| } |
| } |
| // Nope, 2 or 3 bytes |
| decodedData = (decodedData << 6) | bits; |
| // fourth and last base64 char; can be padding, but not ws |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { |
| if (bits != Base64Variant.BASE64_VALUE_PADDING) { |
| // as per [JACKSON-631], could also just be 'missing' padding |
| if (ch == '"' && !b64variant.usesPadding()) { |
| decodedData >>= 2; |
| buffer[outputPtr++] = (byte) (decodedData >> 8); |
| buffer[outputPtr++] = (byte) decodedData; |
| break; |
| } |
| bits = _decodeBase64Escape(b64variant, ch, 3); |
| } |
| if (bits == Base64Variant.BASE64_VALUE_PADDING) { |
| /* With padding we only get 2 bytes; but we have |
| * to shift it a bit so it is identical to triplet |
| * case with partial output. |
| * 3 chars gives 3x6 == 18 bits, of which 2 are |
| * dummies, need to discard: |
| */ |
| decodedData >>= 2; |
| buffer[outputPtr++] = (byte) (decodedData >> 8); |
| buffer[outputPtr++] = (byte) decodedData; |
| continue; |
| } |
| } |
| // otherwise, our triplet is now complete |
| decodedData = (decodedData << 6) | bits; |
| buffer[outputPtr++] = (byte) (decodedData >> 16); |
| buffer[outputPtr++] = (byte) (decodedData >> 8); |
| buffer[outputPtr++] = (byte) decodedData; |
| } |
| _tokenIncomplete = false; |
| if (outputPtr > 0) { |
| outputCount += outputPtr; |
| out.write(buffer, 0, outputPtr); |
| } |
| return outputCount; |
| } |
| |
| // As per [Issue#108], must ensure we call the right method |
| @Override |
| public JsonLocation getTokenLocation() |
| { |
| return new JsonLocation(_ioContext.getSourceReference(), |
| getTokenCharacterOffset(), -1L, // bytes, chars |
| getTokenLineNr(), |
| getTokenColumnNr()); |
| } |
| |
| // As per [Issue#108], must ensure we call the right method |
| @Override |
| public JsonLocation getCurrentLocation() |
| { |
| int col = _inputPtr - _currInputRowStart + 1; // 1-based |
| return new JsonLocation(_ioContext.getSourceReference(), |
| _currInputProcessed + _inputPtr, -1L, // bytes, chars |
| _currInputRow, col); |
| } |
| |
| /* |
| /********************************************************** |
| /* Public API, traversal, basic |
| /********************************************************** |
| */ |
| |
| /** |
| * @return Next token from the stream, if any found, or null |
| * to indicate end-of-input |
| */ |
| @Override |
| public JsonToken nextToken() throws IOException |
| { |
| _numTypesValid = NR_UNKNOWN; |
| /* First: field names are special -- we will always tokenize |
| * (part of) value along with field name to simplify |
| * state handling. If so, can and need to use secondary token: |
| */ |
| if (_currToken == JsonToken.FIELD_NAME) { |
| return _nextAfterName(); |
| } |
| if (_tokenIncomplete) { |
| _skipString(); // only strings can be partial |
| } |
| int i = _skipWSOrEnd(); |
| if (i < 0) { // end-of-input |
| // Close/release things like input source, symbol table and recyclable buffers |
| close(); |
| return (_currToken = null); |
| } |
| |
| // First, need to ensure we know the starting location of token |
| // after skipping leading white space |
| _tokenInputTotal = _currInputProcessed + _inputPtr - 1; |
| _tokenInputRow = _currInputRow; |
| _tokenInputCol = _inputPtr - _currInputRowStart - 1; |
| |
| // finally: clear any data retained so far |
| _binaryValue = null; |
| |
| // Closing scope? |
| if (i == INT_RBRACKET) { |
| if (!_parsingContext.inArray()) { |
| _reportMismatchedEndMarker(i, '}'); |
| } |
| _parsingContext = _parsingContext.getParent(); |
| return (_currToken = JsonToken.END_ARRAY); |
| } |
| if (i == INT_RCURLY) { |
| if (!_parsingContext.inObject()) { |
| _reportMismatchedEndMarker(i, ']'); |
| } |
| _parsingContext = _parsingContext.getParent(); |
| return (_currToken = JsonToken.END_OBJECT); |
| } |
| |
| // Nope: do we then expect a comma? |
| if (_parsingContext.expectComma()) { |
| if (i != INT_COMMA) { |
| _reportUnexpectedChar(i, "was expecting comma to separate "+_parsingContext.getTypeDesc()+" entries"); |
| } |
| i = _skipWS(); |
| } |
| |
| /* And should we now have a name? Always true for |
| * Object contexts, since the intermediate 'expect-value' |
| * state is never retained. |
| */ |
| if (!_parsingContext.inObject()) { |
| return _nextTokenNotInObject(i); |
| } |
| // So first parse the field name itself: |
| Name n = _parseName(i); |
| _parsingContext.setCurrentName(n.getName()); |
| _currToken = JsonToken.FIELD_NAME; |
| |
| i = _skipColon(); |
| |
| // Ok: we must have a value... what is it? Strings are very common, check first: |
| if (i == INT_QUOTE) { |
| _tokenIncomplete = true; |
| _nextToken = JsonToken.VALUE_STRING; |
| return _currToken; |
| } |
| JsonToken t; |
| |
| switch (i) { |
| case '-': |
| /* Should we have separate handling for plus? Although |
| * it is not allowed per se, it may be erroneously used, |
| * and could be indicate by a more specific error message. |
| */ |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| t = _parseNumber(i); |
| break; |
| case 'f': |
| _matchToken("false", 1); |
| t = JsonToken.VALUE_FALSE; |
| break; |
| case 'n': |
| _matchToken("null", 1); |
| t = JsonToken.VALUE_NULL; |
| break; |
| case 't': |
| _matchToken("true", 1); |
| t = JsonToken.VALUE_TRUE; |
| break; |
| case '[': |
| t = JsonToken.START_ARRAY; |
| break; |
| case '{': |
| t = JsonToken.START_OBJECT; |
| break; |
| |
| default: |
| t = _handleUnexpectedValue(i); |
| } |
| _nextToken = t; |
| return _currToken; |
| } |
| |
| private final JsonToken _nextTokenNotInObject(int i) throws IOException |
| { |
| if (i == INT_QUOTE) { |
| _tokenIncomplete = true; |
| return (_currToken = JsonToken.VALUE_STRING); |
| } |
| switch (i) { |
| case '[': |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| return (_currToken = JsonToken.START_ARRAY); |
| case '{': |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| return (_currToken = JsonToken.START_OBJECT); |
| case 't': |
| _matchToken("true", 1); |
| return (_currToken = JsonToken.VALUE_TRUE); |
| case 'f': |
| _matchToken("false", 1); |
| return (_currToken = JsonToken.VALUE_FALSE); |
| case 'n': |
| _matchToken("null", 1); |
| return (_currToken = JsonToken.VALUE_NULL); |
| case '-': |
| /* Should we have separate handling for plus? Although |
| * it is not allowed per se, it may be erroneously used, |
| * and could be indicated by a more specific error message. |
| */ |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| return (_currToken = _parseNumber(i)); |
| } |
| return (_currToken = _handleUnexpectedValue(i)); |
| } |
| |
| private final JsonToken _nextAfterName() |
| { |
| _nameCopied = false; // need to invalidate if it was copied |
| JsonToken t = _nextToken; |
| _nextToken = null; |
| // Also: may need to start new context? |
| if (t == JsonToken.START_ARRAY) { |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| } else if (t == JsonToken.START_OBJECT) { |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| } |
| return (_currToken = t); |
| } |
| |
| /* |
| /********************************************************** |
| /* Public API, traversal, nextXxxValue/nextFieldName |
| /********************************************************** |
| */ |
| |
| @Override |
| public boolean nextFieldName(SerializableString str) throws IOException |
| { |
| // // // Note: most of code below is copied from nextToken() |
| |
| _numTypesValid = NR_UNKNOWN; |
| if (_currToken == JsonToken.FIELD_NAME) { // can't have name right after name |
| _nextAfterName(); |
| return false; |
| } |
| if (_tokenIncomplete) { |
| _skipString(); |
| } |
| int i = _skipWSOrEnd(); |
| if (i < 0) { // end-of-input |
| close(); |
| _currToken = null; |
| return false; |
| } |
| _tokenInputTotal = _currInputProcessed + _inputPtr - 1; |
| _tokenInputRow = _currInputRow; |
| _tokenInputCol = _inputPtr - _currInputRowStart - 1; |
| |
| // finally: clear any data retained so far |
| _binaryValue = null; |
| |
| // Closing scope? |
| if (i == INT_RBRACKET) { |
| if (!_parsingContext.inArray()) { |
| _reportMismatchedEndMarker(i, '}'); |
| } |
| _parsingContext = _parsingContext.getParent(); |
| _currToken = JsonToken.END_ARRAY; |
| return false; |
| } |
| if (i == INT_RCURLY) { |
| if (!_parsingContext.inObject()) { |
| _reportMismatchedEndMarker(i, ']'); |
| } |
| _parsingContext = _parsingContext.getParent(); |
| _currToken = JsonToken.END_OBJECT; |
| return false; |
| } |
| |
| // Nope: do we then expect a comma? |
| if (_parsingContext.expectComma()) { |
| if (i != INT_COMMA) { |
| _reportUnexpectedChar(i, "was expecting comma to separate "+_parsingContext.getTypeDesc()+" entries"); |
| } |
| i = _skipWS(); |
| } |
| |
| if (!_parsingContext.inObject()) { |
| _nextTokenNotInObject(i); |
| return false; |
| } |
| |
| // // // This part differs, name parsing |
| if (i == INT_QUOTE) { |
| // when doing literal match, must consider escaping: |
| byte[] nameBytes = str.asQuotedUTF8(); |
| final int len = nameBytes.length; |
| if ((_inputPtr + len) < _inputEnd) { // maybe... |
| // first check length match by |
| final int end = _inputPtr+len; |
| if (_inputBuffer[end] == INT_QUOTE) { |
| int offset = 0; |
| final int ptr = _inputPtr; |
| while (true) { |
| if (offset == len) { // yes, match! |
| _inputPtr = end+1; // skip current value first |
| // First part is simple; setting of name |
| _parsingContext.setCurrentName(str.getValue()); |
| _currToken = JsonToken.FIELD_NAME; |
| // But then we also must handle following value etc |
| _isNextTokenNameYes(); |
| return true; |
| } |
| if (nameBytes[offset] != _inputBuffer[ptr+offset]) { |
| break; |
| } |
| ++offset; |
| } |
| } |
| } |
| } |
| return _isNextTokenNameMaybe(i, str); |
| } |
| |
| private final void _isNextTokenNameYes() throws IOException |
| { |
| // very first thing: common case, colon, value, no white space |
| int i = _skipColon(); |
| switch (i) { |
| case '"': |
| _tokenIncomplete = true; |
| _nextToken = JsonToken.VALUE_STRING; |
| return; |
| case '[': |
| _nextToken = JsonToken.START_ARRAY; |
| return; |
| case '{': |
| _nextToken = JsonToken.START_OBJECT; |
| return; |
| case 't': |
| _matchToken("true", 1); |
| _nextToken = JsonToken.VALUE_TRUE; |
| return; |
| case 'f': |
| _matchToken("false", 1); |
| _nextToken = JsonToken.VALUE_FALSE; |
| return; |
| case 'n': |
| _matchToken("null", 1); |
| _nextToken = JsonToken.VALUE_NULL; |
| return; |
| case '-': |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| _nextToken = _parseNumber(i); |
| return; |
| } |
| _nextToken = _handleUnexpectedValue(i); |
| } |
| |
| private final boolean _isNextTokenNameMaybe(int i, SerializableString str) throws IOException |
| { |
| // // // and this is back to standard nextToken() |
| |
| Name n = _parseName(i); |
| final boolean match; |
| { |
| String nameStr = n.getName(); |
| _parsingContext.setCurrentName(nameStr); |
| match = nameStr.equals(str.getValue()); |
| } |
| _currToken = JsonToken.FIELD_NAME; |
| i = _skipWS(); |
| if (i != INT_COLON) { |
| _reportUnexpectedChar(i, "was expecting a colon to separate field name and value"); |
| } |
| i = _skipWS(); |
| |
| // Ok: we must have a value... what is it? Strings are very common, check first: |
| if (i == INT_QUOTE) { |
| _tokenIncomplete = true; |
| _nextToken = JsonToken.VALUE_STRING; |
| return match; |
| } |
| JsonToken t; |
| |
| switch (i) { |
| case '[': |
| t = JsonToken.START_ARRAY; |
| break; |
| case '{': |
| t = JsonToken.START_OBJECT; |
| break; |
| case 't': |
| _matchToken("true", 1); |
| t = JsonToken.VALUE_TRUE; |
| break; |
| case 'f': |
| _matchToken("false", 1); |
| t = JsonToken.VALUE_FALSE; |
| break; |
| case 'n': |
| _matchToken("null", 1); |
| t = JsonToken.VALUE_NULL; |
| break; |
| |
| case '-': |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| |
| t = _parseNumber(i); |
| break; |
| default: |
| t = _handleUnexpectedValue(i); |
| } |
| _nextToken = t; |
| return match; |
| } |
| |
| @Override |
| public String nextTextValue() throws IOException |
| { |
| // two distinct cases; either got name and we know next type, or 'other' |
| if (_currToken == JsonToken.FIELD_NAME) { // mostly copied from '_nextAfterName' |
| _nameCopied = false; |
| JsonToken t = _nextToken; |
| _nextToken = null; |
| _currToken = t; |
| if (t == JsonToken.VALUE_STRING) { |
| if (_tokenIncomplete) { |
| _tokenIncomplete = false; |
| _finishString(); |
| } |
| return _textBuffer.contentsAsString(); |
| } |
| if (t == JsonToken.START_ARRAY) { |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| } else if (t == JsonToken.START_OBJECT) { |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| } |
| return null; |
| } |
| // !!! TODO: optimize this case as well |
| return (nextToken() == JsonToken.VALUE_STRING) ? getText() : null; |
| } |
| |
| @Override |
| public int nextIntValue(int defaultValue) throws IOException |
| { |
| // two distinct cases; either got name and we know next type, or 'other' |
| if (_currToken == JsonToken.FIELD_NAME) { // mostly copied from '_nextAfterName' |
| _nameCopied = false; |
| JsonToken t = _nextToken; |
| _nextToken = null; |
| _currToken = t; |
| if (t == JsonToken.VALUE_NUMBER_INT) { |
| return getIntValue(); |
| } |
| if (t == JsonToken.START_ARRAY) { |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| } else if (t == JsonToken.START_OBJECT) { |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| } |
| return defaultValue; |
| } |
| // !!! TODO: optimize this case as well |
| return (nextToken() == JsonToken.VALUE_NUMBER_INT) ? getIntValue() : defaultValue; |
| } |
| |
| @Override |
| public long nextLongValue(long defaultValue) throws IOException |
| { |
| // two distinct cases; either got name and we know next type, or 'other' |
| if (_currToken == JsonToken.FIELD_NAME) { // mostly copied from '_nextAfterName' |
| _nameCopied = false; |
| JsonToken t = _nextToken; |
| _nextToken = null; |
| _currToken = t; |
| if (t == JsonToken.VALUE_NUMBER_INT) { |
| return getLongValue(); |
| } |
| if (t == JsonToken.START_ARRAY) { |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| } else if (t == JsonToken.START_OBJECT) { |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| } |
| return defaultValue; |
| } |
| // !!! TODO: optimize this case as well |
| return (nextToken() == JsonToken.VALUE_NUMBER_INT) ? getLongValue() : defaultValue; |
| } |
| |
| @Override |
| public Boolean nextBooleanValue() throws IOException |
| { |
| // two distinct cases; either got name and we know next type, or 'other' |
| if (_currToken == JsonToken.FIELD_NAME) { // mostly copied from '_nextAfterName' |
| _nameCopied = false; |
| JsonToken t = _nextToken; |
| _nextToken = null; |
| _currToken = t; |
| if (t == JsonToken.VALUE_TRUE) { |
| return Boolean.TRUE; |
| } |
| if (t == JsonToken.VALUE_FALSE) { |
| return Boolean.FALSE; |
| } |
| if (t == JsonToken.START_ARRAY) { |
| _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol); |
| } else if (t == JsonToken.START_OBJECT) { |
| _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol); |
| } |
| return null; |
| } |
| |
| switch (nextToken().id()) { |
| case ID_TRUE: |
| return Boolean.TRUE; |
| case ID_FALSE: |
| return Boolean.FALSE; |
| default: |
| return null; |
| } |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, number parsing |
| /********************************************************** |
| */ |
| |
| /** |
| * Initial parsing method for number values. It needs to be able |
| * to parse enough input to be able to determine whether the |
| * value is to be considered a simple integer value, or a more |
| * generic decimal value: latter of which needs to be expressed |
| * as a floating point number. The basic rule is that if the number |
| * has no fractional or exponential part, it is an integer; otherwise |
| * a floating point number. |
| *<p> |
| * Because much of input has to be processed in any case, no partial |
| * parsing is done: all input text will be stored for further |
| * processing. However, actual numeric value conversion will be |
| * deferred, since it is usually the most complicated and costliest |
| * part of processing. |
| */ |
| protected JsonToken _parseNumber(int c) throws IOException |
| { |
| char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); |
| int outPtr = 0; |
| boolean negative = (c == INT_MINUS); |
| |
| // Need to prepend sign? |
| if (negative) { |
| outBuf[outPtr++] = '-'; |
| // Must have something after sign too |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| c = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| // Note: must be followed by a digit |
| if (c < INT_0 || c > INT_9) { |
| return _handleInvalidNumberStart(c, true); |
| } |
| } |
| |
| // One special case: if first char is 0, must not be followed by a digit |
| if (c == INT_0) { |
| c = _verifyNoLeadingZeroes(); |
| } |
| |
| // Ok: we can first just add digit we saw first: |
| outBuf[outPtr++] = (char) c; |
| int intLen = 1; |
| |
| // And then figure out how far we can read without further checks |
| // for either input or output |
| int end = _inputPtr + outBuf.length; |
| if (end > _inputEnd) { |
| end = _inputEnd; |
| } |
| |
| // With this, we have a nice and tight loop: |
| while (true) { |
| if (_inputPtr >= end) { |
| // Long enough to be split across boundary, so: |
| return _parseNumber2(outBuf, outPtr, negative, intLen); |
| } |
| c = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| if (c < INT_0 || c > INT_9) { |
| break; |
| } |
| ++intLen; |
| outBuf[outPtr++] = (char) c; |
| } |
| if (c == '.' || c == 'e' || c == 'E') { |
| return _parseFloat(outBuf, outPtr, c, negative, intLen); |
| } |
| |
| --_inputPtr; // to push back trailing char (comma etc) |
| _textBuffer.setCurrentLength(outPtr); |
| // As per #105, need separating space between root values; check here |
| if (_parsingContext.inRoot()) { |
| _verifyRootSpace(c); |
| } |
| |
| // And there we have it! |
| return resetInt(negative, intLen); |
| } |
| |
| /** |
| * Method called to handle parsing when input is split across buffer boundary |
| * (or output is longer than segment used to store it) |
| */ |
| private final JsonToken _parseNumber2(char[] outBuf, int outPtr, boolean negative, |
| int intPartLength) throws IOException |
| { |
| // Ok, parse the rest |
| while (true) { |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| _textBuffer.setCurrentLength(outPtr); |
| return resetInt(negative, intPartLength); |
| } |
| int c = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| if (c > INT_9 || c < INT_0) { |
| if (c == INT_PERIOD || c == INT_e || c == INT_E) { |
| return _parseFloat(outBuf, outPtr, c, negative, intPartLength); |
| } |
| break; |
| } |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| outBuf[outPtr++] = (char) c; |
| ++intPartLength; |
| } |
| --_inputPtr; // to push back trailing char (comma etc) |
| _textBuffer.setCurrentLength(outPtr); |
| // As per #105, need separating space between root values; check here |
| if (_parsingContext.inRoot()) { |
| _verifyRootSpace(_inputBuffer[_inputPtr++] & 0xFF); |
| } |
| |
| // And there we have it! |
| return resetInt(negative, intPartLength); |
| |
| } |
| |
| /** |
| * Method called when we have seen one zero, and want to ensure |
| * it is not followed by another |
| */ |
| private final int _verifyNoLeadingZeroes() throws IOException |
| { |
| // Ok to have plain "0" |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| return INT_0; |
| } |
| int ch = _inputBuffer[_inputPtr] & 0xFF; |
| // if not followed by a number (probably '.'); return zero as is, to be included |
| if (ch < INT_0 || ch > INT_9) { |
| return INT_0; |
| } |
| // [JACKSON-358]: we may want to allow them, after all... |
| if (!isEnabled(Feature.ALLOW_NUMERIC_LEADING_ZEROS)) { |
| reportInvalidNumber("Leading zeroes not allowed"); |
| } |
| // if so, just need to skip either all zeroes (if followed by number); or all but one (if non-number) |
| ++_inputPtr; // Leading zero to be skipped |
| if (ch == INT_0) { |
| while (_inputPtr < _inputEnd || loadMore()) { |
| ch = _inputBuffer[_inputPtr] & 0xFF; |
| if (ch < INT_0 || ch > INT_9) { // followed by non-number; retain one zero |
| return INT_0; |
| } |
| ++_inputPtr; // skip previous zeroes |
| if (ch != INT_0) { // followed by other number; return |
| break; |
| } |
| } |
| } |
| return ch; |
| } |
| |
| private final JsonToken _parseFloat(char[] outBuf, int outPtr, int c, |
| boolean negative, int integerPartLength) throws IOException |
| { |
| int fractLen = 0; |
| boolean eof = false; |
| |
| // And then see if we get other parts |
| if (c == INT_PERIOD) { // yes, fraction |
| outBuf[outPtr++] = (char) c; |
| |
| fract_loop: |
| while (true) { |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| eof = true; |
| break fract_loop; |
| } |
| c = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| if (c < INT_0 || c > INT_9) { |
| break fract_loop; |
| } |
| ++fractLen; |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| outBuf[outPtr++] = (char) c; |
| } |
| // must be followed by sequence of ints, one minimum |
| if (fractLen == 0) { |
| reportUnexpectedNumberChar(c, "Decimal point not followed by a digit"); |
| } |
| } |
| |
| int expLen = 0; |
| if (c == INT_e || c == INT_E) { // exponent? |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| outBuf[outPtr++] = (char) c; |
| // Not optional, can require that we get one more char |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| c = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| // Sign indicator? |
| if (c == '-' || c == '+') { |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| outBuf[outPtr++] = (char) c; |
| // Likewise, non optional: |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| c = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| } |
| |
| exp_loop: |
| while (c <= INT_9 && c >= INT_0) { |
| ++expLen; |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| outBuf[outPtr++] = (char) c; |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| eof = true; |
| break exp_loop; |
| } |
| c = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| } |
| // must be followed by sequence of ints, one minimum |
| if (expLen == 0) { |
| reportUnexpectedNumberChar(c, "Exponent indicator not followed by a digit"); |
| } |
| } |
| |
| // Ok; unless we hit end-of-input, need to push last char read back |
| if (!eof) { |
| --_inputPtr; |
| // As per #105, need separating space between root values; check here |
| if (_parsingContext.inRoot()) { |
| _verifyRootSpace(c); |
| } |
| } |
| _textBuffer.setCurrentLength(outPtr); |
| |
| // And there we have it! |
| return resetFloat(negative, integerPartLength, fractLen, expLen); |
| } |
| |
| /** |
| * Method called to ensure that a root-value is followed by a space |
| * token. |
| *<p> |
| * NOTE: caller MUST ensure there is at least one character available; |
| * and that input pointer is AT given char (not past) |
| */ |
| private final void _verifyRootSpace(int ch) throws IOException |
| { |
| // caller had pushed it back, before calling; reset |
| ++_inputPtr; |
| // TODO? Handle UTF-8 char decoding for error reporting |
| switch (ch) { |
| case ' ': |
| case '\t': |
| return; |
| case '\r': |
| _skipCR(); |
| return; |
| case '\n': |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| return; |
| } |
| _reportMissingRootWS(ch); |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, secondary parsing |
| /********************************************************** |
| */ |
| |
| protected Name _parseName(int i) throws IOException |
| { |
| if (i != INT_QUOTE) { |
| return _handleOddName(i); |
| } |
| // First: can we optimize out bounds checks? |
| if ((_inputPtr + 9) > _inputEnd) { // Need 8 chars, plus one trailing (quote) |
| return slowParseName(); |
| } |
| |
| // If so, can also unroll loops nicely |
| /* 25-Nov-2008, tatu: This may seem weird, but here we do |
| * NOT want to worry about UTF-8 decoding. Rather, we'll |
| * assume that part is ok (if not it will get caught |
| * later on), and just handle quotes and backslashes here. |
| */ |
| final byte[] input = _inputBuffer; |
| final int[] codes = _icLatin1; |
| |
| int q = input[_inputPtr++] & 0xFF; |
| |
| if (codes[q] == 0) { |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] == 0) { |
| q = (q << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] == 0) { |
| q = (q << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] == 0) { |
| q = (q << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] == 0) { |
| _quad1 = q; |
| return parseMediumName(i); |
| } |
| if (i == INT_QUOTE) { // 4 byte/char case or broken |
| return findName(q, 4); |
| } |
| return parseName(q, i, 4); |
| } |
| if (i == INT_QUOTE) { // 3 byte/char case or broken |
| return findName(q, 3); |
| } |
| return parseName(q, i, 3); |
| } |
| if (i == INT_QUOTE) { // 2 byte/char case or broken |
| return findName(q, 2); |
| } |
| return parseName(q, i, 2); |
| } |
| if (i == INT_QUOTE) { // one byte/char case or broken |
| return findName(q, 1); |
| } |
| return parseName(q, i, 1); |
| } |
| if (q == INT_QUOTE) { // special case, "" |
| return BytesToNameCanonicalizer.getEmptyName(); |
| } |
| return parseName(0, q, 0); // quoting or invalid char |
| } |
| |
| protected final Name parseMediumName(int q2) throws IOException |
| { |
| final byte[] input = _inputBuffer; |
| final int[] codes = _icLatin1; |
| |
| // Ok, got 5 name bytes so far |
| int i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { // 5 bytes |
| return findName(_quad1, q2, 1); |
| } |
| return parseName(_quad1, q2, i, 1); // quoting or invalid char |
| } |
| q2 = (q2 << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { // 6 bytes |
| return findName(_quad1, q2, 2); |
| } |
| return parseName(_quad1, q2, i, 2); |
| } |
| q2 = (q2 << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { // 7 bytes |
| return findName(_quad1, q2, 3); |
| } |
| return parseName(_quad1, q2, i, 3); |
| } |
| q2 = (q2 << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { // 8 bytes |
| return findName(_quad1, q2, 4); |
| } |
| return parseName(_quad1, q2, i, 4); |
| } |
| return parseLongName(i, q2); |
| } |
| |
| protected final Name parseLongName(int q, final int q2) throws IOException |
| { |
| _quadBuffer[0] = _quad1; |
| _quadBuffer[1] = q2; |
| |
| // As explained above, will ignore UTF-8 encoding at this point |
| final byte[] input = _inputBuffer; |
| final int[] codes = _icLatin1; |
| int qlen = 2; |
| |
| while ((_inputPtr + 4) <= _inputEnd) { |
| int i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { |
| return findName(_quadBuffer, qlen, q, 1); |
| } |
| return parseEscapedName(_quadBuffer, qlen, q, i, 1); |
| } |
| |
| q = (q << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { |
| return findName(_quadBuffer, qlen, q, 2); |
| } |
| return parseEscapedName(_quadBuffer, qlen, q, i, 2); |
| } |
| |
| q = (q << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { |
| return findName(_quadBuffer, qlen, q, 3); |
| } |
| return parseEscapedName(_quadBuffer, qlen, q, i, 3); |
| } |
| |
| q = (q << 8) | i; |
| i = input[_inputPtr++] & 0xFF; |
| if (codes[i] != 0) { |
| if (i == INT_QUOTE) { |
| return findName(_quadBuffer, qlen, q, 4); |
| } |
| return parseEscapedName(_quadBuffer, qlen, q, i, 4); |
| } |
| |
| // Nope, no end in sight. Need to grow quad array etc |
| if (qlen >= _quadBuffer.length) { |
| _quadBuffer = growArrayBy(_quadBuffer, qlen); |
| } |
| _quadBuffer[qlen++] = q; |
| q = i; |
| } |
| |
| /* Let's offline if we hit buffer boundary (otherwise would |
| * need to [try to] align input, which is bit complicated |
| * and may not always be possible) |
| */ |
| return parseEscapedName(_quadBuffer, qlen, 0, q, 0); |
| } |
| |
| /** |
| * Method called when not even first 8 bytes are guaranteed |
| * to come consequtively. Happens rarely, so this is offlined; |
| * plus we'll also do full checks for escaping etc. |
| */ |
| protected Name slowParseName() throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(": was expecting closing '\"' for name"); |
| } |
| } |
| int i = _inputBuffer[_inputPtr++] & 0xFF; |
| if (i == INT_QUOTE) { // special case, "" |
| return BytesToNameCanonicalizer.getEmptyName(); |
| } |
| return parseEscapedName(_quadBuffer, 0, 0, i, 0); |
| } |
| |
| private final Name parseName(int q1, int ch, int lastQuadBytes) throws IOException { |
| return parseEscapedName(_quadBuffer, 0, q1, ch, lastQuadBytes); |
| } |
| |
| private final Name parseName(int q1, int q2, int ch, int lastQuadBytes) throws IOException { |
| _quadBuffer[0] = q1; |
| return parseEscapedName(_quadBuffer, 1, q2, ch, lastQuadBytes); |
| } |
| |
| /** |
| * Slower parsing method which is generally branched to when |
| * an escape sequence is detected (or alternatively for long |
| * names, or ones crossing input buffer boundary). In any case, |
| * needs to be able to handle more exceptional cases, gets |
| * slower, and hance is offlined to a separate method. |
| */ |
| protected final Name parseEscapedName(int[] quads, int qlen, int currQuad, int ch, |
| int currQuadBytes) throws IOException |
| { |
| /* 25-Nov-2008, tatu: This may seem weird, but here we do not want to worry about |
| * UTF-8 decoding yet. Rather, we'll assume that part is ok (if not it will get |
| * caught later on), and just handle quotes and backslashes here. |
| */ |
| final int[] codes = _icLatin1; |
| |
| while (true) { |
| if (codes[ch] != 0) { |
| if (ch == INT_QUOTE) { // we are done |
| break; |
| } |
| // Unquoted white space? |
| if (ch != INT_BACKSLASH) { |
| // As per [JACKSON-208], call can now return: |
| _throwUnquotedSpace(ch, "name"); |
| } else { |
| // Nope, escape sequence |
| ch = _decodeEscaped(); |
| } |
| /* Oh crap. May need to UTF-8 (re-)encode it, if it's |
| * beyond 7-bit ascii. Gets pretty messy. |
| * If this happens often, may want to use different name |
| * canonicalization to avoid these hits. |
| */ |
| if (ch > 127) { |
| // Ok, we'll need room for first byte right away |
| if (currQuadBytes >= 4) { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| currQuad = 0; |
| currQuadBytes = 0; |
| } |
| if (ch < 0x800) { // 2-byte |
| currQuad = (currQuad << 8) | (0xc0 | (ch >> 6)); |
| ++currQuadBytes; |
| // Second byte gets output below: |
| } else { // 3 bytes; no need to worry about surrogates here |
| currQuad = (currQuad << 8) | (0xe0 | (ch >> 12)); |
| ++currQuadBytes; |
| // need room for middle byte? |
| if (currQuadBytes >= 4) { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| currQuad = 0; |
| currQuadBytes = 0; |
| } |
| currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f)); |
| ++currQuadBytes; |
| } |
| // And same last byte in both cases, gets output below: |
| ch = 0x80 | (ch & 0x3f); |
| } |
| } |
| // Ok, we have one more byte to add at any rate: |
| if (currQuadBytes < 4) { |
| ++currQuadBytes; |
| currQuad = (currQuad << 8) | ch; |
| } else { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| currQuad = ch; |
| currQuadBytes = 1; |
| } |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in field name"); |
| } |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| } |
| |
| if (currQuadBytes > 0) { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| } |
| Name name = _symbols.findName(quads, qlen); |
| if (name == null) { |
| name = addName(quads, qlen, currQuadBytes); |
| } |
| return name; |
| } |
| |
| /** |
| * Method called when we see non-white space character other |
| * than double quote, when expecting a field name. |
| * In standard mode will just throw an expection; but |
| * in non-standard modes may be able to parse name. |
| */ |
| protected Name _handleOddName(int ch) throws IOException |
| { |
| // [JACKSON-173]: allow single quotes |
| if (ch == '\'' && isEnabled(Feature.ALLOW_SINGLE_QUOTES)) { |
| return _parseAposName(); |
| } |
| // [JACKSON-69]: allow unquoted names if feature enabled: |
| if (!isEnabled(Feature.ALLOW_UNQUOTED_FIELD_NAMES)) { |
| _reportUnexpectedChar(ch, "was expecting double-quote to start field name"); |
| } |
| /* Also: note that although we use a different table here, |
| * it does NOT handle UTF-8 decoding. It'll just pass those |
| * high-bit codes as acceptable for later decoding. |
| */ |
| final int[] codes = CharTypes.getInputCodeUtf8JsNames(); |
| // Also: must start with a valid character... |
| if (codes[ch] != 0) { |
| _reportUnexpectedChar(ch, "was expecting either valid name character (for unquoted name) or double-quote (for quoted) to start field name"); |
| } |
| |
| /* Ok, now; instead of ultra-optimizing parsing here (as with |
| * regular JSON names), let's just use the generic "slow" |
| * variant. Can measure its impact later on if need be |
| */ |
| int[] quads = _quadBuffer; |
| int qlen = 0; |
| int currQuad = 0; |
| int currQuadBytes = 0; |
| |
| while (true) { |
| // Ok, we have one more byte to add at any rate: |
| if (currQuadBytes < 4) { |
| ++currQuadBytes; |
| currQuad = (currQuad << 8) | ch; |
| } else { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| currQuad = ch; |
| currQuadBytes = 1; |
| } |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in field name"); |
| } |
| } |
| ch = _inputBuffer[_inputPtr] & 0xFF; |
| if (codes[ch] != 0) { |
| break; |
| } |
| ++_inputPtr; |
| } |
| |
| if (currQuadBytes > 0) { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| } |
| Name name = _symbols.findName(quads, qlen); |
| if (name == null) { |
| name = addName(quads, qlen, currQuadBytes); |
| } |
| return name; |
| } |
| |
| /* Parsing to support [JACKSON-173]. Plenty of duplicated code; |
| * main reason being to try to avoid slowing down fast path |
| * for valid JSON -- more alternatives, more code, generally |
| * bit slower execution. |
| */ |
| protected Name _parseAposName() throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(": was expecting closing '\'' for name"); |
| } |
| } |
| int ch = _inputBuffer[_inputPtr++] & 0xFF; |
| if (ch == '\'') { // special case, '' |
| return BytesToNameCanonicalizer.getEmptyName(); |
| } |
| int[] quads = _quadBuffer; |
| int qlen = 0; |
| int currQuad = 0; |
| int currQuadBytes = 0; |
| |
| // Copied from parseEscapedFieldName, with minor mods: |
| |
| final int[] codes = _icLatin1; |
| |
| while (true) { |
| if (ch == '\'') { |
| break; |
| } |
| // additional check to skip handling of double-quotes |
| if (ch != '"' && codes[ch] != 0) { |
| if (ch != '\\') { |
| // Unquoted white space? |
| // As per [JACKSON-208], call can now return: |
| _throwUnquotedSpace(ch, "name"); |
| } else { |
| // Nope, escape sequence |
| ch = _decodeEscaped(); |
| } |
| /* Oh crap. May need to UTF-8 (re-)encode it, if it's |
| * beyond 7-bit ascii. Gets pretty messy. |
| * If this happens often, may want to use different name |
| * canonicalization to avoid these hits. |
| */ |
| if (ch > 127) { |
| // Ok, we'll need room for first byte right away |
| if (currQuadBytes >= 4) { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| currQuad = 0; |
| currQuadBytes = 0; |
| } |
| if (ch < 0x800) { // 2-byte |
| currQuad = (currQuad << 8) | (0xc0 | (ch >> 6)); |
| ++currQuadBytes; |
| // Second byte gets output below: |
| } else { // 3 bytes; no need to worry about surrogates here |
| currQuad = (currQuad << 8) | (0xe0 | (ch >> 12)); |
| ++currQuadBytes; |
| // need room for middle byte? |
| if (currQuadBytes >= 4) { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| currQuad = 0; |
| currQuadBytes = 0; |
| } |
| currQuad = (currQuad << 8) | (0x80 | ((ch >> 6) & 0x3f)); |
| ++currQuadBytes; |
| } |
| // And same last byte in both cases, gets output below: |
| ch = 0x80 | (ch & 0x3f); |
| } |
| } |
| // Ok, we have one more byte to add at any rate: |
| if (currQuadBytes < 4) { |
| ++currQuadBytes; |
| currQuad = (currQuad << 8) | ch; |
| } else { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| currQuad = ch; |
| currQuadBytes = 1; |
| } |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in field name"); |
| } |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| } |
| |
| if (currQuadBytes > 0) { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = currQuad; |
| } |
| Name name = _symbols.findName(quads, qlen); |
| if (name == null) { |
| name = addName(quads, qlen, currQuadBytes); |
| } |
| return name; |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, symbol (name) handling |
| /********************************************************** |
| */ |
| |
| private final Name findName(int q1, int lastQuadBytes) |
| throws JsonParseException |
| { |
| // Usually we'll find it from the canonical symbol table already |
| Name name = _symbols.findName(q1); |
| if (name != null) { |
| return name; |
| } |
| // If not, more work. We'll need add stuff to buffer |
| _quadBuffer[0] = q1; |
| return addName(_quadBuffer, 1, lastQuadBytes); |
| } |
| |
| private final Name findName(int q1, int q2, int lastQuadBytes) |
| throws JsonParseException |
| { |
| // Usually we'll find it from the canonical symbol table already |
| Name name = _symbols.findName(q1, q2); |
| if (name != null) { |
| return name; |
| } |
| // If not, more work. We'll need add stuff to buffer |
| _quadBuffer[0] = q1; |
| _quadBuffer[1] = q2; |
| return addName(_quadBuffer, 2, lastQuadBytes); |
| } |
| |
| private final Name findName(int[] quads, int qlen, int lastQuad, int lastQuadBytes) |
| throws JsonParseException |
| { |
| if (qlen >= quads.length) { |
| _quadBuffer = quads = growArrayBy(quads, quads.length); |
| } |
| quads[qlen++] = lastQuad; |
| Name name = _symbols.findName(quads, qlen); |
| if (name == null) { |
| return addName(quads, qlen, lastQuadBytes); |
| } |
| return name; |
| } |
| |
| /** |
| * This is the main workhorse method used when we take a symbol |
| * table miss. It needs to demultiplex individual bytes, decode |
| * multi-byte chars (if any), and then construct Name instance |
| * and add it to the symbol table. |
| */ |
| private final Name addName(int[] quads, int qlen, int lastQuadBytes) |
| throws JsonParseException |
| { |
| /* Ok: must decode UTF-8 chars. No other validation is |
| * needed, since unescaping has been done earlier as necessary |
| * (as well as error reporting for unescaped control chars) |
| */ |
| // 4 bytes per quad, except last one maybe less |
| int byteLen = (qlen << 2) - 4 + lastQuadBytes; |
| |
| /* And last one is not correctly aligned (leading zero bytes instead |
| * need to shift a bit, instead of trailing). Only need to shift it |
| * for UTF-8 decoding; need revert for storage (since key will not |
| * be aligned, to optimize lookup speed) |
| */ |
| int lastQuad; |
| |
| if (lastQuadBytes < 4) { |
| lastQuad = quads[qlen-1]; |
| // 8/16/24 bit left shift |
| quads[qlen-1] = (lastQuad << ((4 - lastQuadBytes) << 3)); |
| } else { |
| lastQuad = 0; |
| } |
| |
| // Need some working space, TextBuffer works well: |
| char[] cbuf = _textBuffer.emptyAndGetCurrentSegment(); |
| int cix = 0; |
| |
| for (int ix = 0; ix < byteLen; ) { |
| int ch = quads[ix >> 2]; // current quad, need to shift+mask |
| int byteIx = (ix & 3); |
| ch = (ch >> ((3 - byteIx) << 3)) & 0xFF; |
| ++ix; |
| |
| if (ch > 127) { // multi-byte |
| int needed; |
| if ((ch & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF) |
| ch &= 0x1F; |
| needed = 1; |
| } else if ((ch & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF) |
| ch &= 0x0F; |
| needed = 2; |
| } else if ((ch & 0xF8) == 0xF0) { // 4 bytes; double-char with surrogates and all... |
| ch &= 0x07; |
| needed = 3; |
| } else { // 5- and 6-byte chars not valid xml chars |
| _reportInvalidInitial(ch); |
| needed = ch = 1; // never really gets this far |
| } |
| if ((ix + needed) > byteLen) { |
| _reportInvalidEOF(" in field name"); |
| } |
| |
| // Ok, always need at least one more: |
| int ch2 = quads[ix >> 2]; // current quad, need to shift+mask |
| byteIx = (ix & 3); |
| ch2 = (ch2 >> ((3 - byteIx) << 3)); |
| ++ix; |
| |
| if ((ch2 & 0xC0) != 0x080) { |
| _reportInvalidOther(ch2); |
| } |
| ch = (ch << 6) | (ch2 & 0x3F); |
| if (needed > 1) { |
| ch2 = quads[ix >> 2]; |
| byteIx = (ix & 3); |
| ch2 = (ch2 >> ((3 - byteIx) << 3)); |
| ++ix; |
| |
| if ((ch2 & 0xC0) != 0x080) { |
| _reportInvalidOther(ch2); |
| } |
| ch = (ch << 6) | (ch2 & 0x3F); |
| if (needed > 2) { // 4 bytes? (need surrogates on output) |
| ch2 = quads[ix >> 2]; |
| byteIx = (ix & 3); |
| ch2 = (ch2 >> ((3 - byteIx) << 3)); |
| ++ix; |
| if ((ch2 & 0xC0) != 0x080) { |
| _reportInvalidOther(ch2 & 0xFF); |
| } |
| ch = (ch << 6) | (ch2 & 0x3F); |
| } |
| } |
| if (needed > 2) { // surrogate pair? once again, let's output one here, one later on |
| ch -= 0x10000; // to normalize it starting with 0x0 |
| if (cix >= cbuf.length) { |
| cbuf = _textBuffer.expandCurrentSegment(); |
| } |
| cbuf[cix++] = (char) (0xD800 + (ch >> 10)); |
| ch = 0xDC00 | (ch & 0x03FF); |
| } |
| } |
| if (cix >= cbuf.length) { |
| cbuf = _textBuffer.expandCurrentSegment(); |
| } |
| cbuf[cix++] = (char) ch; |
| } |
| |
| // Ok. Now we have the character array, and can construct the String |
| String baseName = new String(cbuf, 0, cix); |
| // And finally, un-align if necessary |
| if (lastQuadBytes < 4) { |
| quads[qlen-1] = lastQuad; |
| } |
| return _symbols.addName(baseName, quads, qlen); |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, String value parsing |
| /********************************************************** |
| */ |
| |
| @Override |
| protected void _finishString() throws IOException |
| { |
| // First, single tight loop for ASCII content, not split across input buffer boundary: |
| int ptr = _inputPtr; |
| if (ptr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| ptr = _inputPtr; |
| } |
| int outPtr = 0; |
| char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); |
| final int[] codes = _icUTF8; |
| |
| final int max = Math.min(_inputEnd, (ptr + outBuf.length)); |
| final byte[] inputBuffer = _inputBuffer; |
| while (ptr < max) { |
| int c = (int) inputBuffer[ptr] & 0xFF; |
| if (codes[c] != 0) { |
| if (c == INT_QUOTE) { |
| _inputPtr = ptr+1; |
| _textBuffer.setCurrentLength(outPtr); |
| return; |
| } |
| break; |
| } |
| ++ptr; |
| outBuf[outPtr++] = (char) c; |
| } |
| _inputPtr = ptr; |
| _finishString2(outBuf, outPtr); |
| } |
| |
| private final void _finishString2(char[] outBuf, int outPtr) |
| throws IOException |
| { |
| int c; |
| |
| // Here we do want to do full decoding, hence: |
| final int[] codes = _icUTF8; |
| final byte[] inputBuffer = _inputBuffer; |
| |
| main_loop: |
| while (true) { |
| // Then the tight ASCII non-funny-char loop: |
| ascii_loop: |
| while (true) { |
| int ptr = _inputPtr; |
| if (ptr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| ptr = _inputPtr; |
| } |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| final int max = Math.min(_inputEnd, (ptr + (outBuf.length - outPtr))); |
| while (ptr < max) { |
| c = (int) inputBuffer[ptr++] & 0xFF; |
| if (codes[c] != 0) { |
| _inputPtr = ptr; |
| break ascii_loop; |
| } |
| outBuf[outPtr++] = (char) c; |
| } |
| _inputPtr = ptr; |
| } |
| // Ok: end marker, escape or multi-byte? |
| if (c == INT_QUOTE) { |
| break main_loop; |
| } |
| |
| switch (codes[c]) { |
| case 1: // backslash |
| c = _decodeEscaped(); |
| break; |
| case 2: // 2-byte UTF |
| c = _decodeUtf8_2(c); |
| break; |
| case 3: // 3-byte UTF |
| if ((_inputEnd - _inputPtr) >= 2) { |
| c = _decodeUtf8_3fast(c); |
| } else { |
| c = _decodeUtf8_3(c); |
| } |
| break; |
| case 4: // 4-byte UTF |
| c = _decodeUtf8_4(c); |
| // Let's add first part right away: |
| outBuf[outPtr++] = (char) (0xD800 | (c >> 10)); |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| c = 0xDC00 | (c & 0x3FF); |
| // And let the other char output down below |
| break; |
| default: |
| if (c < INT_SPACE) { |
| // As per [JACKSON-208], call can now return: |
| _throwUnquotedSpace(c, "string value"); |
| } else { |
| // Is this good enough error message? |
| _reportInvalidChar(c); |
| } |
| } |
| // Need more room? |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| // Ok, let's add char to output: |
| outBuf[outPtr++] = (char) c; |
| } |
| _textBuffer.setCurrentLength(outPtr); |
| } |
| |
| /** |
| * Method called to skim through rest of unparsed String value, |
| * if it is not needed. This can be done bit faster if contents |
| * need not be stored for future access. |
| */ |
| protected void _skipString() throws IOException |
| { |
| _tokenIncomplete = false; |
| |
| // Need to be fully UTF-8 aware here: |
| final int[] codes = _icUTF8; |
| final byte[] inputBuffer = _inputBuffer; |
| |
| main_loop: |
| while (true) { |
| int c; |
| |
| ascii_loop: |
| while (true) { |
| int ptr = _inputPtr; |
| int max = _inputEnd; |
| if (ptr >= max) { |
| loadMoreGuaranteed(); |
| ptr = _inputPtr; |
| max = _inputEnd; |
| } |
| while (ptr < max) { |
| c = (int) inputBuffer[ptr++] & 0xFF; |
| if (codes[c] != 0) { |
| _inputPtr = ptr; |
| break ascii_loop; |
| } |
| } |
| _inputPtr = ptr; |
| } |
| // Ok: end marker, escape or multi-byte? |
| if (c == INT_QUOTE) { |
| break main_loop; |
| } |
| |
| switch (codes[c]) { |
| case 1: // backslash |
| _decodeEscaped(); |
| break; |
| case 2: // 2-byte UTF |
| _skipUtf8_2(c); |
| break; |
| case 3: // 3-byte UTF |
| _skipUtf8_3(c); |
| break; |
| case 4: // 4-byte UTF |
| _skipUtf8_4(c); |
| break; |
| default: |
| if (c < INT_SPACE) { |
| // As per [JACKSON-208], call can now return: |
| _throwUnquotedSpace(c, "string value"); |
| } else { |
| // Is this good enough error message? |
| _reportInvalidChar(c); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Method for handling cases where first non-space character |
| * of an expected value token is not legal for standard JSON content. |
| */ |
| protected JsonToken _handleUnexpectedValue(int c) |
| throws IOException |
| { |
| // Most likely an error, unless we are to allow single-quote-strings |
| switch (c) { |
| case ']': |
| case '}': |
| // Error: neither is valid at this point; valid closers have |
| // been handled earlier |
| _reportUnexpectedChar(c, "expected a value"); |
| case '\'': |
| if (isEnabled(Feature.ALLOW_SINGLE_QUOTES)) { |
| return _handleApos(); |
| } |
| break; |
| case 'N': |
| _matchToken("NaN", 1); |
| if (isEnabled(Feature.ALLOW_NON_NUMERIC_NUMBERS)) { |
| return resetAsNaN("NaN", Double.NaN); |
| } |
| _reportError("Non-standard token 'NaN': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow"); |
| break; |
| case 'I': |
| _matchToken("Infinity", 1); |
| if (isEnabled(Feature.ALLOW_NON_NUMERIC_NUMBERS)) { |
| return resetAsNaN("Infinity", Double.POSITIVE_INFINITY); |
| } |
| _reportError("Non-standard token 'Infinity': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow"); |
| break; |
| case '+': // note: '-' is taken as number |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOFInValue(); |
| } |
| } |
| return _handleInvalidNumberStart(_inputBuffer[_inputPtr++] & 0xFF, false); |
| } |
| // [Issue#77] Try to decode most likely token |
| if (Character.isJavaIdentifierStart(c)) { |
| _reportInvalidToken(""+((char) c), "('true', 'false' or 'null')"); |
| } |
| // but if it doesn't look like a token: |
| _reportUnexpectedChar(c, "expected a valid value (number, String, array, object, 'true', 'false' or 'null')"); |
| return null; |
| } |
| |
| protected JsonToken _handleApos() |
| throws IOException |
| { |
| int c = 0; |
| // Otherwise almost verbatim copy of _finishString() |
| int outPtr = 0; |
| char[] outBuf = _textBuffer.emptyAndGetCurrentSegment(); |
| |
| // Here we do want to do full decoding, hence: |
| final int[] codes = _icUTF8; |
| final byte[] inputBuffer = _inputBuffer; |
| |
| main_loop: |
| while (true) { |
| // Then the tight ascii non-funny-char loop: |
| ascii_loop: |
| while (true) { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| int max = _inputEnd; |
| { |
| int max2 = _inputPtr + (outBuf.length - outPtr); |
| if (max2 < max) { |
| max = max2; |
| } |
| } |
| while (_inputPtr < max) { |
| c = (int) inputBuffer[_inputPtr++] & 0xFF; |
| if (c == '\'' || codes[c] != 0) { |
| break ascii_loop; |
| } |
| outBuf[outPtr++] = (char) c; |
| } |
| } |
| |
| // Ok: end marker, escape or multi-byte? |
| if (c == '\'') { |
| break main_loop; |
| } |
| |
| switch (codes[c]) { |
| case 1: // backslash |
| if (c != '\'') { // marked as special, isn't here |
| c = _decodeEscaped(); |
| } |
| break; |
| case 2: // 2-byte UTF |
| c = _decodeUtf8_2(c); |
| break; |
| case 3: // 3-byte UTF |
| if ((_inputEnd - _inputPtr) >= 2) { |
| c = _decodeUtf8_3fast(c); |
| } else { |
| c = _decodeUtf8_3(c); |
| } |
| break; |
| case 4: // 4-byte UTF |
| c = _decodeUtf8_4(c); |
| // Let's add first part right away: |
| outBuf[outPtr++] = (char) (0xD800 | (c >> 10)); |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| c = 0xDC00 | (c & 0x3FF); |
| // And let the other char output down below |
| break; |
| default: |
| if (c < INT_SPACE) { |
| _throwUnquotedSpace(c, "string value"); |
| } |
| // Is this good enough error message? |
| _reportInvalidChar(c); |
| } |
| // Need more room? |
| if (outPtr >= outBuf.length) { |
| outBuf = _textBuffer.finishCurrentSegment(); |
| outPtr = 0; |
| } |
| // Ok, let's add char to output: |
| outBuf[outPtr++] = (char) c; |
| } |
| _textBuffer.setCurrentLength(outPtr); |
| |
| return JsonToken.VALUE_STRING; |
| } |
| |
| /** |
| * Method called if expected numeric value (due to leading sign) does not |
| * look like a number |
| */ |
| protected JsonToken _handleInvalidNumberStart(int ch, boolean neg) |
| throws IOException |
| { |
| while (ch == 'I') { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOFInValue(); |
| } |
| } |
| ch = _inputBuffer[_inputPtr++]; |
| String match; |
| if (ch == 'N') { |
| match = neg ? "-INF" :"+INF"; |
| } else if (ch == 'n') { |
| match = neg ? "-Infinity" :"+Infinity"; |
| } else { |
| break; |
| } |
| _matchToken(match, 3); |
| if (isEnabled(Feature.ALLOW_NON_NUMERIC_NUMBERS)) { |
| return resetAsNaN(match, neg ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY); |
| } |
| _reportError("Non-standard token '"+match+"': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow"); |
| } |
| reportUnexpectedNumberChar(ch, "expected digit (0-9) to follow minus sign, for valid numeric value"); |
| return null; |
| } |
| |
| protected final void _matchToken(String matchStr, int i) throws IOException |
| { |
| final int len = matchStr.length(); |
| if ((_inputPtr + len) >= _inputEnd) { |
| _matchToken2(matchStr, i); |
| return; |
| } |
| do { |
| if (_inputBuffer[_inputPtr] != matchStr.charAt(i)) { |
| _reportInvalidToken(matchStr.substring(0, i)); |
| } |
| ++_inputPtr; |
| } while (++i < len); |
| |
| int ch = _inputBuffer[_inputPtr] & 0xFF; |
| if (ch >= '0' && ch != ']' && ch != '}') { // expected/allowed chars |
| _checkMatchEnd(matchStr, i, ch); |
| } |
| } |
| |
| private final void _matchToken2(String matchStr, int i) throws IOException |
| { |
| final int len = matchStr.length(); |
| do { |
| if (((_inputPtr >= _inputEnd) && !loadMore()) |
| || (_inputBuffer[_inputPtr] != matchStr.charAt(i))) { |
| _reportInvalidToken(matchStr.substring(0, i)); |
| } |
| ++_inputPtr; |
| } while (++i < len); |
| |
| // but let's also ensure we either get EOF, or non-alphanum char... |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| return; |
| } |
| int ch = _inputBuffer[_inputPtr] & 0xFF; |
| if (ch >= '0' && ch != ']' && ch != '}') { // expected/allowed chars |
| _checkMatchEnd(matchStr, i, ch); |
| } |
| } |
| |
| private final void _checkMatchEnd(String matchStr, int i, int ch) throws IOException { |
| // but actually only alphanums are problematic |
| char c = (char) _decodeCharForError(ch); |
| if (Character.isJavaIdentifierPart(c)) { |
| _reportInvalidToken(matchStr.substring(0, i)); |
| } |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, ws skipping, escape/unescape |
| /********************************************************** |
| */ |
| |
| private final int _skipWS() throws IOException |
| { |
| /* |
| final int[] codes = _icWS; |
| while (_inputPtr < _inputEnd || loadMore()) { |
| final int i = _inputBuffer[_inputPtr++] & 0xFF; |
| switch (codes[i]) { |
| case 0: // done! |
| return i; |
| case 1: // white space, skip |
| continue; |
| case 2: // 2/3/4-byte UTF: done |
| case 3: |
| case 4: |
| return i; |
| case INT_LF: |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| break; |
| case INT_CR: |
| _skipCR(); |
| break; |
| case '/': |
| _skipComment(); |
| break; |
| case '#': |
| if (!_skipYAMLComment()) { |
| return i; |
| } |
| break; |
| default: // e.g. -1 |
| // Is this good enough error message? |
| if (i < 32) { |
| _throwInvalidSpace(i); |
| } |
| _reportInvalidChar(i); |
| } |
| } |
| */ |
| while (_inputPtr < _inputEnd || loadMore()) { |
| int i = _inputBuffer[_inputPtr++] & 0xFF; |
| if (i > INT_SPACE) { |
| if (i == INT_SLASH) { |
| _skipComment(); |
| continue; |
| } |
| if (i == INT_HASH) { |
| if (_skipYAMLComment()) { |
| continue; |
| } |
| } |
| return i; |
| } else if (i != INT_SPACE) { |
| if (i == INT_LF) { |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| } else if (i == INT_CR) { |
| _skipCR(); |
| } else if (i != INT_TAB) { |
| _throwInvalidSpace(i); |
| } |
| } |
| } |
| throw _constructError("Unexpected end-of-input within/between "+_parsingContext.getTypeDesc()+" entries"); |
| } |
| |
| private final int _skipWSOrEnd() throws IOException |
| { |
| while ((_inputPtr < _inputEnd) || loadMore()) { |
| int i = _inputBuffer[_inputPtr++] & 0xFF; |
| if (i > INT_SPACE) { |
| if (i == INT_SLASH) { |
| _skipComment(); |
| continue; |
| } |
| if (i == INT_HASH) { |
| if (_skipYAMLComment()) { |
| continue; |
| } |
| } |
| return i; |
| } else if (i != INT_SPACE) { |
| if (i == INT_LF) { |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| } else if (i == INT_CR) { |
| _skipCR(); |
| } else if (i != INT_TAB) { |
| _throwInvalidSpace(i); |
| } |
| } |
| } |
| // We ran out of input... |
| _handleEOF(); |
| return -1; |
| |
| /* |
| final int[] codes = _icWS; |
| while ((_inputPtr < _inputEnd) || loadMore()) { |
| final int i = _inputBuffer[_inputPtr++] & 0xFF; |
| switch (codes[i]) { |
| case 0: // done! |
| return i; |
| case 1: // skip |
| continue; |
| case INT_LF: |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| break; |
| case INT_CR: |
| _skipCR(); |
| break; |
| case INT_SLASH: |
| _skipComment(); |
| break; |
| case '#': |
| if (!_skipYAMLComment()) { |
| return i; |
| } |
| break; |
| // case 2: // 2-byte UTF |
| // case 3: // 3-byte UTF |
| // case 4: // 4-byte UTF |
| default: // e.g. -1 |
| _reportInvalidChar(i); |
| } |
| } |
| // We ran out of input... |
| _handleEOF(); |
| return -1; |
| */ |
| } |
| |
| private final int _skipColon() throws IOException |
| { |
| if ((_inputPtr + 4) >= _inputEnd) { |
| return _skipColon2(false); |
| } |
| // Fast path: colon with optional single-space/tab before and/or after: |
| int i = _inputBuffer[_inputPtr]; |
| if (i == INT_COLON) { // common case, no leading space |
| i = _inputBuffer[++_inputPtr]; |
| if (i > INT_SPACE) { // nor trailing |
| if (i == INT_SLASH || i == INT_HASH) { |
| return _skipColon2(true); |
| } |
| ++_inputPtr; |
| return i; |
| } |
| if (i == INT_SPACE || i == INT_TAB) { |
| i = (int) _inputBuffer[++_inputPtr]; |
| if (i > INT_SPACE) { |
| if (i == INT_SLASH || i == INT_HASH) { |
| return _skipColon2(true); |
| } |
| ++_inputPtr; |
| return i; |
| } |
| } |
| return _skipColon2(true); // true -> skipped colon |
| } |
| if (i == INT_SPACE || i == INT_TAB) { |
| i = _inputBuffer[++_inputPtr]; |
| } |
| if (i == INT_COLON) { |
| i = _inputBuffer[++_inputPtr]; |
| if (i > INT_SPACE) { |
| if (i == INT_SLASH || i == INT_HASH) { |
| return _skipColon2(true); |
| } |
| ++_inputPtr; |
| return i; |
| } |
| if (i == INT_SPACE || i == INT_TAB) { |
| i = (int) _inputBuffer[++_inputPtr]; |
| if (i > INT_SPACE) { |
| if (i == INT_SLASH || i == INT_HASH) { |
| return _skipColon2(true); |
| } |
| ++_inputPtr; |
| return i; |
| } |
| } |
| return _skipColon2(true); |
| } |
| return _skipColon2(false); |
| } |
| |
| private final int _skipColon2(boolean gotColon) throws IOException |
| { |
| while (_inputPtr < _inputEnd || loadMore()) { |
| int i = _inputBuffer[_inputPtr++] & 0xFF; |
| |
| if (i > INT_SPACE) { |
| if (i == INT_SLASH) { |
| _skipComment(); |
| continue; |
| } |
| if (i == INT_HASH) { |
| if (_skipYAMLComment()) { |
| continue; |
| } |
| } |
| if (gotColon) { |
| return i; |
| } |
| if (i != INT_COLON) { |
| if (i < INT_SPACE) { |
| _throwInvalidSpace(i); |
| } |
| _reportUnexpectedChar(i, "was expecting a colon to separate field name and value"); |
| } |
| gotColon = true; |
| } else if (i != INT_SPACE) { |
| if (i == INT_LF) { |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| } else if (i == INT_CR) { |
| _skipCR(); |
| } else if (i != INT_TAB) { |
| _throwInvalidSpace(i); |
| } |
| } |
| } |
| throw _constructError("Unexpected end-of-input within/between "+_parsingContext.getTypeDesc()+" entries"); |
| } |
| |
| private final void _skipComment() throws IOException |
| { |
| if (!isEnabled(Feature.ALLOW_COMMENTS)) { |
| _reportUnexpectedChar('/', "maybe a (non-standard) comment? (not recognized as one since Feature 'ALLOW_COMMENTS' not enabled for parser)"); |
| } |
| // First: check which comment (if either) it is: |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| _reportInvalidEOF(" in a comment"); |
| } |
| int c = _inputBuffer[_inputPtr++] & 0xFF; |
| if (c == '/') { |
| _skipLine(); |
| } else if (c == '*') { |
| _skipCComment(); |
| } else { |
| _reportUnexpectedChar(c, "was expecting either '*' or '/' for a comment"); |
| } |
| } |
| |
| private final void _skipCComment() throws IOException |
| { |
| // Need to be UTF-8 aware here to decode content (for skipping) |
| final int[] codes = CharTypes.getInputCodeComment(); |
| |
| // Ok: need the matching '*/' |
| main_loop: |
| while ((_inputPtr < _inputEnd) || loadMore()) { |
| int i = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| int code = codes[i]; |
| if (code != 0) { |
| switch (code) { |
| case '*': |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| break main_loop; |
| } |
| if (_inputBuffer[_inputPtr] == INT_SLASH) { |
| ++_inputPtr; |
| return; |
| } |
| break; |
| case INT_LF: |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| break; |
| case INT_CR: |
| _skipCR(); |
| break; |
| case 2: // 2-byte UTF |
| _skipUtf8_2(i); |
| break; |
| case 3: // 3-byte UTF |
| _skipUtf8_3(i); |
| break; |
| case 4: // 4-byte UTF |
| _skipUtf8_4(i); |
| break; |
| default: // e.g. -1 |
| // Is this good enough error message? |
| _reportInvalidChar(i); |
| } |
| } |
| } |
| _reportInvalidEOF(" in a comment"); |
| } |
| |
| private final boolean _skipYAMLComment() throws IOException |
| { |
| if (!isEnabled(Feature.ALLOW_YAML_COMMENTS)) { |
| return false; |
| } |
| _skipLine(); |
| return true; |
| } |
| |
| /** |
| * Method for skipping contents of an input line; usually for CPP |
| * and YAML style comments. |
| */ |
| private final void _skipLine() throws IOException |
| { |
| // Ok: need to find EOF or linefeed |
| final int[] codes = CharTypes.getInputCodeComment(); |
| while ((_inputPtr < _inputEnd) || loadMore()) { |
| int i = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| int code = codes[i]; |
| if (code != 0) { |
| switch (code) { |
| case INT_LF: |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| return; |
| case INT_CR: |
| _skipCR(); |
| return; |
| case '*': // nop for these comments |
| break; |
| case 2: // 2-byte UTF |
| _skipUtf8_2(i); |
| break; |
| case 3: // 3-byte UTF |
| _skipUtf8_3(i); |
| break; |
| case 4: // 4-byte UTF |
| _skipUtf8_4(i); |
| break; |
| default: // e.g. -1 |
| if (code < 0) { |
| // Is this good enough error message? |
| _reportInvalidChar(i); |
| } |
| } |
| } |
| } |
| } |
| |
| @Override |
| protected char _decodeEscaped() throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in character escape sequence"); |
| } |
| } |
| int c = (int) _inputBuffer[_inputPtr++]; |
| |
| switch (c) { |
| // First, ones that are mapped |
| case 'b': |
| return '\b'; |
| case 't': |
| return '\t'; |
| case 'n': |
| return '\n'; |
| case 'f': |
| return '\f'; |
| case 'r': |
| return '\r'; |
| |
| // And these are to be returned as they are |
| case '"': |
| case '/': |
| case '\\': |
| return (char) c; |
| |
| case 'u': // and finally hex-escaped |
| break; |
| |
| default: |
| return _handleUnrecognizedCharacterEscape((char) _decodeCharForError(c)); |
| } |
| |
| // Ok, a hex escape. Need 4 characters |
| int value = 0; |
| for (int i = 0; i < 4; ++i) { |
| if (_inputPtr >= _inputEnd) { |
| if (!loadMore()) { |
| _reportInvalidEOF(" in character escape sequence"); |
| } |
| } |
| int ch = (int) _inputBuffer[_inputPtr++]; |
| int digit = CharTypes.charToHex(ch); |
| if (digit < 0) { |
| _reportUnexpectedChar(ch, "expected a hex-digit for character escape sequence"); |
| } |
| value = (value << 4) | digit; |
| } |
| return (char) value; |
| } |
| |
| protected int _decodeCharForError(int firstByte) throws IOException |
| { |
| int c = firstByte; |
| if (c < 0) { // if >= 0, is ascii and fine as is |
| int needed; |
| |
| // Ok; if we end here, we got multi-byte combination |
| if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF) |
| c &= 0x1F; |
| needed = 1; |
| } else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF) |
| c &= 0x0F; |
| needed = 2; |
| } else if ((c & 0xF8) == 0xF0) { |
| // 4 bytes; double-char with surrogates and all... |
| c &= 0x07; |
| needed = 3; |
| } else { |
| _reportInvalidInitial(c & 0xFF); |
| needed = 1; // never gets here |
| } |
| |
| int d = nextByte(); |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF); |
| } |
| c = (c << 6) | (d & 0x3F); |
| |
| if (needed > 1) { // needed == 1 means 2 bytes total |
| d = nextByte(); // 3rd byte |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF); |
| } |
| c = (c << 6) | (d & 0x3F); |
| if (needed > 2) { // 4 bytes? (need surrogates) |
| d = nextByte(); |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF); |
| } |
| c = (c << 6) | (d & 0x3F); |
| } |
| } |
| } |
| return c; |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods,UTF8 decoding |
| /********************************************************** |
| */ |
| |
| private final int _decodeUtf8_2(int c) throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| int d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| return ((c & 0x1F) << 6) | (d & 0x3F); |
| } |
| |
| private final int _decodeUtf8_3(int c1) throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| c1 &= 0x0F; |
| int d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| int c = (c1 << 6) | (d & 0x3F); |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| c = (c << 6) | (d & 0x3F); |
| return c; |
| } |
| |
| private final int _decodeUtf8_3fast(int c1) throws IOException |
| { |
| c1 &= 0x0F; |
| int d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| int c = (c1 << 6) | (d & 0x3F); |
| d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| c = (c << 6) | (d & 0x3F); |
| return c; |
| } |
| |
| /** |
| * @return Character value <b>minus 0x10000</c>; this so that caller |
| * can readily expand it to actual surrogates |
| */ |
| private final int _decodeUtf8_4(int c) throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| int d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| c = ((c & 0x07) << 6) | (d & 0x3F); |
| |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| c = (c << 6) | (d & 0x3F); |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| |
| /* note: won't change it to negative here, since caller |
| * already knows it'll need a surrogate |
| */ |
| return ((c << 6) | (d & 0x3F)) - 0x10000; |
| } |
| |
| private final void _skipUtf8_2(int c) throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| c = (int) _inputBuffer[_inputPtr++]; |
| if ((c & 0xC0) != 0x080) { |
| _reportInvalidOther(c & 0xFF, _inputPtr); |
| } |
| } |
| |
| /* Alas, can't heavily optimize skipping, since we still have to |
| * do validity checks... |
| */ |
| private final void _skipUtf8_3(int c) throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| //c &= 0x0F; |
| c = (int) _inputBuffer[_inputPtr++]; |
| if ((c & 0xC0) != 0x080) { |
| _reportInvalidOther(c & 0xFF, _inputPtr); |
| } |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| c = (int) _inputBuffer[_inputPtr++]; |
| if ((c & 0xC0) != 0x080) { |
| _reportInvalidOther(c & 0xFF, _inputPtr); |
| } |
| } |
| |
| private final void _skipUtf8_4(int c) throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| int d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| d = (int) _inputBuffer[_inputPtr++]; |
| if ((d & 0xC0) != 0x080) { |
| _reportInvalidOther(d & 0xFF, _inputPtr); |
| } |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, input loading |
| /********************************************************** |
| */ |
| |
| /** |
| * We actually need to check the character value here |
| * (to see if we have \n following \r). |
| */ |
| protected final void _skipCR() throws IOException |
| { |
| if (_inputPtr < _inputEnd || loadMore()) { |
| if (_inputBuffer[_inputPtr] == BYTE_LF) { |
| ++_inputPtr; |
| } |
| } |
| ++_currInputRow; |
| _currInputRowStart = _inputPtr; |
| } |
| |
| private int nextByte() throws IOException |
| { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| return _inputBuffer[_inputPtr++] & 0xFF; |
| } |
| |
| /* |
| /********************************************************** |
| /* Internal methods, error reporting |
| /********************************************************** |
| */ |
| |
| protected void _reportInvalidToken(String matchedPart) throws IOException |
| { |
| _reportInvalidToken(matchedPart, "'null', 'true', 'false' or NaN"); |
| } |
| |
| protected void _reportInvalidToken(String matchedPart, String msg) throws IOException |
| { |
| StringBuilder sb = new StringBuilder(matchedPart); |
| |
| /* Let's just try to find what appears to be the token, using |
| * regular Java identifier character rules. It's just a heuristic, |
| * nothing fancy here (nor fast). |
| */ |
| while (true) { |
| if (_inputPtr >= _inputEnd && !loadMore()) { |
| break; |
| } |
| int i = (int) _inputBuffer[_inputPtr++]; |
| char c = (char) _decodeCharForError(i); |
| if (!Character.isJavaIdentifierPart(c)) { |
| break; |
| } |
| sb.append(c); |
| } |
| _reportError("Unrecognized token '"+sb.toString()+"': was expecting "+msg); |
| } |
| |
| protected void _reportInvalidChar(int c) |
| throws JsonParseException |
| { |
| // Either invalid WS or illegal UTF-8 start char |
| if (c < INT_SPACE) { |
| _throwInvalidSpace(c); |
| } |
| _reportInvalidInitial(c); |
| } |
| |
| protected void _reportInvalidInitial(int mask) |
| throws JsonParseException |
| { |
| _reportError("Invalid UTF-8 start byte 0x"+Integer.toHexString(mask)); |
| } |
| |
| protected void _reportInvalidOther(int mask) |
| throws JsonParseException |
| { |
| _reportError("Invalid UTF-8 middle byte 0x"+Integer.toHexString(mask)); |
| } |
| |
| protected void _reportInvalidOther(int mask, int ptr) |
| throws JsonParseException |
| { |
| _inputPtr = ptr; |
| _reportInvalidOther(mask); |
| } |
| |
| public static int[] growArrayBy(int[] arr, int more) |
| { |
| if (arr == null) { |
| return new int[more]; |
| } |
| return Arrays.copyOf(arr, arr.length + more); |
| } |
| |
| /* |
| /********************************************************** |
| /* Binary access |
| /********************************************************** |
| */ |
| |
| /** |
| * Efficient handling for incremental parsing of base64-encoded |
| * textual content. |
| */ |
| @SuppressWarnings("resource") |
| protected final byte[] _decodeBase64(Base64Variant b64variant) throws IOException |
| { |
| ByteArrayBuilder builder = _getByteArrayBuilder(); |
| |
| //main_loop: |
| while (true) { |
| // first, we'll skip preceding white space, if any |
| int ch; |
| do { |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = (int) _inputBuffer[_inputPtr++] & 0xFF; |
| } while (ch <= INT_SPACE); |
| int bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { // reached the end, fair and square? |
| if (ch == INT_QUOTE) { |
| return builder.toByteArray(); |
| } |
| bits = _decodeBase64Escape(b64variant, ch, 0); |
| if (bits < 0) { // white space to skip |
| continue; |
| } |
| } |
| int decodedData = bits; |
| |
| // then second base64 char; can't get padding yet, nor ws |
| |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { |
| bits = _decodeBase64Escape(b64variant, ch, 1); |
| } |
| decodedData = (decodedData << 6) | bits; |
| |
| // third base64 char; can be padding, but not ws |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| bits = b64variant.decodeBase64Char(ch); |
| |
| // First branch: can get padding (-> 1 byte) |
| if (bits < 0) { |
| if (bits != Base64Variant.BASE64_VALUE_PADDING) { |
| // as per [JACKSON-631], could also just be 'missing' padding |
| if (ch == '"' && !b64variant.usesPadding()) { |
| decodedData >>= 4; |
| builder.append(decodedData); |
| return builder.toByteArray(); |
| } |
| bits = _decodeBase64Escape(b64variant, ch, 2); |
| } |
| if (bits == Base64Variant.BASE64_VALUE_PADDING) { |
| // Ok, must get padding |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| if (!b64variant.usesPaddingChar(ch)) { |
| throw reportInvalidBase64Char(b64variant, ch, 3, "expected padding character '"+b64variant.getPaddingChar()+"'"); |
| } |
| // Got 12 bits, only need 8, need to shift |
| decodedData >>= 4; |
| builder.append(decodedData); |
| continue; |
| } |
| } |
| // Nope, 2 or 3 bytes |
| decodedData = (decodedData << 6) | bits; |
| // fourth and last base64 char; can be padding, but not ws |
| if (_inputPtr >= _inputEnd) { |
| loadMoreGuaranteed(); |
| } |
| ch = _inputBuffer[_inputPtr++] & 0xFF; |
| bits = b64variant.decodeBase64Char(ch); |
| if (bits < 0) { |
| if (bits != Base64Variant.BASE64_VALUE_PADDING) { |
| // as per [JACKSON-631], could also just be 'missing' padding |
| if (ch == '"' && !b64variant.usesPadding()) { |
| decodedData >>= 2; |
| builder.appendTwoBytes(decodedData); |
| return builder.toByteArray(); |
| } |
| bits = _decodeBase64Escape(b64variant, ch, 3); |
| } |
| if (bits == Base64Variant.BASE64_VALUE_PADDING) { |
| /* With padding we only get 2 bytes; but we have |
| * to shift it a bit so it is identical to triplet |
| * case with partial output. |
| * 3 chars gives 3x6 == 18 bits, of which 2 are |
| * dummies, need to discard: |
| */ |
| decodedData >>= 2; |
| builder.appendTwoBytes(decodedData); |
| continue; |
| } |
| } |
| // otherwise, our triplet is now complete |
| decodedData = (decodedData << 6) | bits; |
| builder.appendThreeBytes(decodedData); |
| } |
| } |
| } |