yet more work on non-blocking parser

commit: e20608536cdb735f60e4c38be5dae411a00d564a [log] [tgz]
author: Tatu Saloranta <tatu.saloranta@iki.fi> Tue May 30 22:00:35 2017 -0700
committer: Tatu Saloranta <tatu.saloranta@iki.fi> Tue May 30 22:00:35 2017 -0700
tree: 89eb6a869e5135c83f379afa475ac2306de4a3c5
parent: aa804c3f7805185b7eb46e0d1752cc79dc8df182 [diff]
diff --git a/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParser.java
index 0c17dba..ebd0715 100644
--- a/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParser.java
+++ b/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParser.java

@@ -234,9 +234,9 @@
 
         // Field name states
         case MINOR_FIELD_NAME:
-            _finishFieldName(_inputBuffer[_inputPtr++] & 0xFF);
+            return _parseEscapedName(_quadLength,  _pending32, _pendingBytes);
         case MINOR_FIELD_NAME_ESCAPE:
-            _finishFieldWithEscape(_inputBuffer[_inputPtr++] & 0xFF);
+            return _finishFieldWithEscape(_inputBuffer[_inputPtr++] & 0xFF);
             
         // Value states
             
@@ -1076,7 +1076,7 @@
      * Method that handles initial token type recognition for token
      * that has to be either FIELD_NAME or END_OBJECT.
      */
-    protected final JsonToken _startFieldName(int ch) throws IOException
+    private final JsonToken _startFieldName(int ch) throws IOException
     {
         // First: any leading white space?
         if (ch <= 0x0020) {
@@ -1091,43 +1091,19 @@
             if (ch == INT_RCURLY) {
                 return _closeObjectScope();
             }
-VersionUtil.throwInternal();
-//            return _handleOddName(ch);
+            return _handleOddName(ch);
         }
-        if (_inputPtr >= _inputEnd) {
-            _minorState = MINOR_FIELD_NAME;
-            return (_currToken = JsonToken.NOT_AVAILABLE);
-        }
-        ch = _inputBuffer[_inputPtr++];
-        if (ch == INT_QUOTE) { // special case, ""
-            _majorState = MAJOR_OBJECT_VALUE;
-            _parsingContext.setCurrentName("");
-            return (_currToken = JsonToken.FIELD_NAME);
-        }
-        ch &= 0xFF;
-
         // First: can we optimize out bounds checks?
-        String n;
         if ((_inputPtr + 13) <= _inputEnd) { // Need up to 12 chars, plus one trailing (quote)
-            n = _fastParseName(ch);
-            if (n == null) {
-                n = _parseEscapedName(_quadBuffer, 0, 0, ch, 0);
+            String n = _fastParseName();
+            if (n != null) {
+                return _fieldComplete(n);
             }
-        } else {
-            n = _parseEscapedName(_quadBuffer, 0, 0, ch, 0);
         }
-        if (n == null) {
-// !!! TODO: name parsing
-            // note: called method should have set minor state
-VersionUtil.throwInternal();
-            return (_currToken = JsonToken.NOT_AVAILABLE);
-        }
-        _majorState = MAJOR_OBJECT_VALUE;
-        _parsingContext.setCurrentName(n);
-        return (_currToken = JsonToken.FIELD_NAME);
+        return _parseEscapedName(0, 0, 0);
     }
 
-    protected final JsonToken _startFieldNameAfterComma(int ch) throws IOException
+    private final JsonToken _startFieldNameAfterComma(int ch) throws IOException
     {
         // First: any leading white space?
         if (ch <= 0x0020) {
@@ -1164,46 +1140,16 @@
                     return _closeObjectScope();
                 }
             }
-VersionUtil.throwInternal();
-//            return _handleOddName(ch);
+            return _handleOddName(ch);
         }
-        if (_inputPtr >= _inputEnd) {
-            _minorState = MINOR_FIELD_NAME;
-            return (_currToken = JsonToken.NOT_AVAILABLE);
-        }
-        ch = _inputBuffer[_inputPtr++];
-        if (ch == INT_QUOTE) { // special case, ""
-            _majorState = MAJOR_OBJECT_VALUE;
-            _parsingContext.setCurrentName("");
-            return (_currToken = JsonToken.FIELD_NAME);
-        }
-        ch &= 0xFF;
-
         // First: can we optimize out bounds checks?
-        String n;
         if ((_inputPtr + 13) <= _inputEnd) { // Need up to 12 chars, plus one trailing (quote)
-            n = _fastParseName(ch);
-            if (n == null) {
-                n = _parseEscapedName(_quadBuffer, 0, 0, ch, 0);
+            String n = _fastParseName();
+            if (n != null) {
+                return _fieldComplete(n);
             }
-        } else {
-            n = _parseEscapedName(_quadBuffer, 0, 0, ch, 0);
         }
-        if (n == null) {
-// !!! TODO: name parsing
-            // note: called method should have set minor state
-VersionUtil.throwInternal();
-            return (_currToken = JsonToken.NOT_AVAILABLE);
-        }
-        _majorState = MAJOR_OBJECT_VALUE;
-        _parsingContext.setCurrentName(n);
-        return (_currToken = JsonToken.FIELD_NAME);
-    }
-
-    protected final JsonToken _finishFieldName(int ch) throws IOException
-    {
-        VersionUtil.throwInternal();
-        return _currToken;
+        return _parseEscapedName(0, 0, 0);
     }
 
     protected final JsonToken _finishFieldWithEscape(int ch) throws IOException
@@ -1218,7 +1164,7 @@
     /**********************************************************************
      */
 
-    private final String _fastParseName(int q0) throws IOException
+    private final String _fastParseName() throws IOException
     {
         // If so, can also unroll loops nicely
         // This may seem weird, but here we do NOT want to worry about UTF-8
@@ -1229,6 +1175,7 @@
         final int[] codes = _icLatin1;
         int ptr = _inputPtr;
 
+        int q0 = input[ptr++] & 0xFF;
         if (codes[q0] == 0) {
             int i = input[ptr++] & 0xFF;
             if (codes[i] == 0) {
@@ -1268,6 +1215,10 @@
             }
             return null;
         }
+        if (q0 == INT_QUOTE) {
+            _inputPtr = ptr;
+            return "";
+        }
         return null;
     }
 
@@ -1357,22 +1308,6 @@
         return null;
     }
 
-    /*
-    private String _slowParseName() throws IOException
-    {
-        if (_inputPtr >= _inputEnd) {
-            if (!_loadMore()) {
-                _reportInvalidEOF(": was expecting closing '\"' for name", JsonToken.FIELD_NAME);
-            }
-        }
-        int i = _inputBuffer[_inputPtr++] & 0xFF;
-        if (i == INT_QUOTE) { // special case, ""
-            return "";
-        }
-        return _parseEscapedName(_quadBuffer, 0, 0, i, 0);
-    }
-    */
-
     /**
      * Slower parsing method which is generally branched to when
      * an escape sequence is detected (or alternatively for long
@@ -1380,22 +1315,31 @@
      * Needs to be able to handle more exceptional cases, gets slower,
      * and hence is offlined to a separate method.
      */
-    private final String _parseEscapedName(int[] quads, int qlen, int currQuad, int ch,
-            int currQuadBytes) throws IOException
+    private final JsonToken _parseEscapedName(int qlen, int currQuad, int currQuadBytes)
+            throws IOException
     {
-        // 25-Nov-2008, tatu: This may seem weird, but here we do not want to worry about
-        //   UTF-8 decoding yet. Rather, we'll assume that part is ok (if not it will get
-        //   caught later on), and just handle quotes and backslashes here.
+        // This may seem weird, but here we do not want to worry about
+        // UTF-8 decoding yet. Rather, we'll assume that part is ok (if not it will get
+        // caught later on), and just handle quotes and backslashes here.
+        int[] quads = _quadBuffer;
         final int[] codes = _icLatin1;
 
         while (true) {
+            if (_inputPtr >= _inputEnd) {
+                _quadLength = qlen;
+                _pending32 = currQuad;
+                _pendingBytes = currQuadBytes;
+                _minorState = MINOR_FIELD_NAME;
+                return (_currToken = JsonToken.NOT_AVAILABLE);
+            }
+            int ch = _inputBuffer[_inputPtr++] & 0xFF;
             if (codes[ch] != 0) {
                 if (ch == INT_QUOTE) { // we are done
                     break;
                 }
                 // Unquoted white space?
                 if (ch != INT_BACKSLASH) {
-                    // As per [JACKSON-208], call can now return:
+                    // Call can actually now return (if unquoted linefeeds allowed)
                     _throwUnquotedSpace(ch, "name");
                 } else {
                     // Nope, escape sequence
@@ -1449,12 +1393,6 @@
                 currQuad = ch;
                 currQuadBytes = 1;
             }
-            if (_inputPtr >= _inputEnd) {
-                if (!_loadMore()) {
-                    _reportInvalidEOF(" in field name", JsonToken.FIELD_NAME);
-                }
-            }
-            ch = _inputBuffer[_inputPtr++] & 0xFF;
         }
 
         if (currQuadBytes > 0) {
@@ -1462,12 +1400,16 @@
                 _quadBuffer = quads = growArrayBy(quads, quads.length);
             }
             quads[qlen++] = _padLastQuad(currQuad, currQuadBytes);
+        } else {
+            if (qlen == 0) { // rare, but may happen
+                return _fieldComplete("");
+            }
         }
         String name = _symbols.findName(quads, qlen);
         if (name == null) {
             name = _addName(quads, qlen, currQuadBytes);
         }
-        return name;
+        return _fieldComplete(name);
     }
 
     /**
@@ -1476,33 +1418,29 @@
      * In standard mode will just throw an exception; but
      * in non-standard modes may be able to parse name.
      */
-    private String _handleOddName(int ch) throws IOException
+    private JsonToken _handleOddName(int ch) throws IOException
     {
         // First: may allow single quotes
         if (ch == '\'' && isEnabled(Feature.ALLOW_SINGLE_QUOTES)) {
             return _parseAposName();
         }
-        // [JACKSON-69]: allow unquoted names if feature enabled:
+        // allow unquoted names if feature enabled:
         if (!isEnabled(Feature.ALLOW_UNQUOTED_FIELD_NAMES)) {
          // !!! TODO: Decode UTF-8 characters properly...
 //            char c = (char) _decodeCharForError(ch);
             char c = (char) ch;
             _reportUnexpectedChar(c, "was expecting double-quote to start field name");
         }
-        /* Also: note that although we use a different table here,
-         * it does NOT handle UTF-8 decoding. It'll just pass those
-         * high-bit codes as acceptable for later decoding.
-         */
+        // Also: note that although we use a different table here, it does NOT handle UTF-8
+        // decoding. It'll just pass those high-bit codes as acceptable for later decoding.
         final int[] codes = CharTypes.getInputCodeUtf8JsNames();
         // Also: must start with a valid character...
         if (codes[ch] != 0) {
             _reportUnexpectedChar(ch, "was expecting either valid name character (for unquoted name) or double-quote (for quoted) to start field name");
         }
 
-        /* Ok, now; instead of ultra-optimizing parsing here (as with
-         * regular JSON names), let's just use the generic "slow"
-         * variant. Can measure its impact later on if need be
-         */
+        // Ok, now; instead of ultra-optimizing parsing here (as with regular JSON names),
+        // let's just use the generic "slow" variant. Can measure its impact later on if need be.
         int[] quads = _quadBuffer;
         int qlen = 0;
         int currQuad = 0;
@@ -1543,7 +1481,7 @@
         if (name == null) {
             name = _addName(quads, qlen, currQuadBytes);
         }
-        return name;
+        return _fieldComplete(name);
     }
 
     /* Parsing to support [JACKSON-173]. Plenty of duplicated code;
@@ -1551,7 +1489,7 @@
      * for valid JSON -- more alternatives, more code, generally
      * bit slower execution.
      */
-    private String _parseAposName() throws IOException
+    private JsonToken _parseAposName() throws IOException
     {
         if (_inputPtr >= _inputEnd) {
             if (!_loadMore()) {
@@ -1560,7 +1498,7 @@
         }
         int ch = _inputBuffer[_inputPtr++] & 0xFF;
         if (ch == '\'') { // special case, ''
-            return "";
+            return _fieldComplete("");
         }
         int[] quads = _quadBuffer;
         int qlen = 0;
@@ -1647,7 +1585,7 @@
         if (name == null) {
             name = _addName(quads, qlen, currQuadBytes);
         }
-        return name;
+        return _fieldComplete(name);
     }
 
     /*

diff --git a/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParserBase.java b/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParserBase.java
index 1c62cc1..5508308 100644
--- a/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParserBase.java
+++ b/src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParserBase.java

@@ -121,11 +121,14 @@
      */
     protected int[] _quadBuffer = new int[8];
 
-    /**
-     * Quads used for hash calculation
-     */
-    protected int _quad1, _quad2;
+    protected int _quadLength;
 
+    protected int _quad1;
+
+    protected int _pending32;
+
+    protected int _pendingBytes;
+    
     /*
     /**********************************************************************
     /* Additional parsing state
@@ -163,18 +166,12 @@
      * Temporary buffer for holding content if input not contiguous (but can
      * fit in buffer)
      */
-    protected byte[] _inputCopy;
+//    protected byte[] _inputCopy;
 
     /**
      * Number of bytes buffered in <code>_inputCopy</code>
      */
-    protected int _inputCopyLen;
-
-    /**
-     * Temporary storage for 32-bit values (int, float), as well as length markers
-     * for length-prefixed values.
-     */
-    protected int _pending32;
+//    protected int _inputCopyLen;
 
     /*
     /**********************************************************************
@@ -188,7 +185,7 @@
         super(ctxt, parserFeatures);
         _symbols = sym;
         // We don't need a lot; for most things maximum known a-priori length below 70 bytes
-        _inputCopy = ctxt.allocReadIOBuffer(500);
+//        _inputCopy = ctxt.allocReadIOBuffer(500);
 
         _currToken = null;
         _majorState = MAJOR_INITIAL;
@@ -643,99 +640,6 @@
     }
 
     /*
-    // Helper method for trying to find specified encoded UTF-8 byte sequence
-    // from symbol table; if successful avoids actual decoding to String
-    protected final String _findDecodedFromSymbols(byte[] inBuf, int inPtr, int len) throws IOException
-    {
-        // First: maybe we already have this name decoded?
-        if (len < 5) {
-            int q = inBuf[inPtr] & 0xFF;
-            if (--len > 0) {
-                q = (q << 8) + (inBuf[++inPtr] & 0xFF);
-                if (--len > 0) {
-                    q = (q << 8) + (inBuf[++inPtr] & 0xFF);
-                    if (--len > 0) {
-                        q = (q << 8) + (inBuf[++inPtr] & 0xFF);
-                    }
-                }
-            }
-            _quad1 = q;
-            return _symbols.findName(q);
-        }
-        if (len < 9) {
-            // First quadbyte is easy
-            int q1 = (inBuf[inPtr] & 0xFF) << 8;
-            q1 += (inBuf[++inPtr] & 0xFF);
-            q1 <<= 8;
-            q1 += (inBuf[++inPtr] & 0xFF);
-            q1 <<= 8;
-            q1 += (inBuf[++inPtr] & 0xFF);
-            int q2 = (inBuf[++inPtr] & 0xFF);
-            len -= 5;
-            if (len > 0) {
-                q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
-                if (--len > 0) {
-                    q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
-                    if (--len > 0) {
-                        q2 = (q2 << 8) + (inBuf[++inPtr] & 0xFF);
-                    }
-                }
-            }
-            _quad1 = q1;
-            _quad2 = q2;
-            return _symbols.findName(q1, q2);
-        }
-        return _findDecodedLonger(inBuf, inPtr, len);
-    }
-    
-    // Method for locating names longer than 8 bytes (in UTF-8)
-    private final String _findDecodedLonger(byte[] inBuf, int inPtr, int len) throws IOException
-    {
-        // first, need enough buffer to store bytes as ints:
-        {
-            int bufLen = (len + 3) >> 2;
-            if (bufLen > _quadBuffer.length) {
-                _quadBuffer = Arrays.copyOf(_quadBuffer, bufLen+4);
-            }
-        }
-        // then decode, full quads first
-        int offset = 0;
-        do {
-            int q = (inBuf[inPtr++] & 0xFF) << 8;
-            q |= inBuf[inPtr++] & 0xFF;
-            q <<= 8;
-            q |= inBuf[inPtr++] & 0xFF;
-            q <<= 8;
-            q |= inBuf[inPtr++] & 0xFF;
-            _quadBuffer[offset++] = q;
-        } while ((len -= 4) > 3);
-        // and then leftovers
-        if (len > 0) {
-            int q = inBuf[inPtr] & 0xFF;
-            if (--len > 0) {
-                q = (q << 8) + (inBuf[++inPtr] & 0xFF);
-                if (--len > 0) {
-                    q = (q << 8) + (inBuf[++inPtr] & 0xFF);
-                }
-            }
-            _quadBuffer[offset++] = q;
-        }
-        return _symbols.findName(_quadBuffer, offset);
-    }
-
-    protected final String _addDecodedToSymbols(int len, String name)
-    {
-        if (len < 5) {
-            return _symbols.addName(name, _quad1);
-        }
-        if (len < 9) {
-            return _symbols.addName(name, _quad1, _quad2);
-        }
-        int qlen = (len + 3) >> 2;
-        return _symbols.addName(name, _quadBuffer, qlen);
-    }
-*/
-    /*
     /**********************************************************************
     /* Internal methods, state changes
     /**********************************************************************
@@ -754,6 +658,13 @@
         return (_currToken = null);
     }
 
+    protected final JsonToken _fieldComplete(String name) throws IOException
+    {
+        _majorState = MAJOR_OBJECT_VALUE;
+        _parsingContext.setCurrentName(name);
+        return (_currToken = JsonToken.FIELD_NAME);
+    }
+
     protected final JsonToken _valueComplete(JsonToken t) throws IOException
     {
         _majorState = _majorStateAfterValue;

diff --git a/src/test/java/com/fasterxml/jackson/core/async/AsyncRootValuesTest.java b/src/test/java/com/fasterxml/jackson/core/async/AsyncRootValuesTest.java
index 454f7a9..81984ed 100644
--- a/src/test/java/com/fasterxml/jackson/core/async/AsyncRootValuesTest.java
+++ b/src/test/java/com/fasterxml/jackson/core/async/AsyncRootValuesTest.java

@@ -142,14 +142,12 @@
 
         JsonFactory f = JSON_F;
         _testMixedRootSequence(f, input, 0, 100);
-/* !!! TODO
         _testMixedRootSequence(f, input, 0, 3);
         _testMixedRootSequence(f, input, 0, 1);
 
         _testMixedRootSequence(f, input, 1, 100);
         _testMixedRootSequence(f, input, 1, 3);
         _testMixedRootSequence(f, input, 1, 1);
-        */
     }
 
     private void _testMixedRootSequence(JsonFactory f,

diff --git a/src/test/java/com/fasterxml/jackson/core/async/AsyncSimpleNestedTest.java b/src/test/java/com/fasterxml/jackson/core/async/AsyncSimpleNestedTest.java
index 4faba60..439b79d 100644
--- a/src/test/java/com/fasterxml/jackson/core/async/AsyncSimpleNestedTest.java
+++ b/src/test/java/com/fasterxml/jackson/core/async/AsyncSimpleNestedTest.java

@@ -21,15 +21,12 @@
 
         JsonFactory f = JSON_F;
         _testStuffInObject(f, data, 0, 100);
-/* !!! TODO        
-        
         _testStuffInObject(f, data, 0, 3);
         _testStuffInObject(f, data, 0, 1);
 
         _testStuffInObject(f, data, 1, 100);
         _testStuffInObject(f, data, 1, 3);
         _testStuffInObject(f, data, 1, 1);
-        */
     }
 
     private void _testStuffInObject(JsonFactory f,
@@ -84,17 +81,15 @@
     public void testStuffInArray() throws Exception
     {
         byte[] data = _jsonDoc(aposToQuotes("[true,{'moreStuff':0},[null],{'extraOrdinary':23}]"));
-
         JsonFactory f = JSON_F;
+
         _testStuffInArray(f, data, 0, 100);
-/* !!! TODO        
         _testStuffInArray(f, data, 0, 3);
         _testStuffInArray(f, data, 0, 1);
 
-        _testStuffInArray(f, data, 1, 100);
-        _testStuffInArray(f, data, 1, 3);
-        _testStuffInArray(f, data, 1, 1);
-        */
+        _testStuffInArray(f, data, 3, 100);
+        _testStuffInArray(f, data, 3, 3);
+        _testStuffInArray(f, data, 3, 1);
     }
 
     private void _testStuffInArray(JsonFactory f,
commit	e20608536cdb735f60e4c38be5dae411a00d564a	[log] [tgz]
author	Tatu Saloranta <tatu.saloranta@iki.fi>	Tue May 30 22:00:35 2017 -0700
committer	Tatu Saloranta <tatu.saloranta@iki.fi>	Tue May 30 22:00:35 2017 -0700
tree	89eb6a869e5135c83f379afa475ac2306de4a3c5
parent	aa804c3f7805185b7eb46e0d1752cc79dc8df182 [diff]