Reapply an upstream OkHttp change to Android for HttpUrl

Manually reapplied change from upstream to address HttpUrl encoding issues.

Upstream details:
Comment:
Don't percent-encode non-ASCII characters in fragments

Fixes https://github.com/square/okhttp/issues/1635
SHA: f4feb0adfcd8e209f90d4fffb6facf5c07f57110

Bug: 27590872
(cherry picked from commit 469258d7cc4690e91c3950020c4aea106f216b21)

Change-Id: I35e688e30d307a1c3f8b0d0b4ca18b799e28ab7d
diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java
index 4f04af9..d2fc31d 100644
--- a/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java
+++ b/okhttp-tests/src/test/java/com/squareup/okhttp/HttpUrlTest.java
@@ -466,8 +466,45 @@
     new UrlComponentEncodingTester()
         .override(Encoding.IDENTITY, ' ', '"', '#', '<', '>', '?', '`')
         .skipForUri('%', ' ', '"', '#', '<', '>', '\\', '^', '`', '{', '|', '}')
+        .identityForNonAscii()
         .test(Component.FRAGMENT);
-    // TODO(jwilson): don't percent-encode non-ASCII characters. (But do encode control characters!)
+  }
+
+  @Test public void fragmentNonAscii() throws Exception {
+    HttpUrl url = HttpUrl.parse("http://host/#Σ");
+    assertEquals("http://host/#Σ", url.toString());
+    assertEquals("Σ", url.fragment());
+    assertEquals("Σ", url.encodedFragment());
+    assertEquals("http://host/#Σ", url.uri().toString());
+  }
+
+  @Test public void fragmentNonAsciiThatOffendsJavaNetUri() throws Exception {
+    HttpUrl url = HttpUrl.parse("http://host/#\u0080");
+    assertEquals("http://host/#\u0080", url.toString());
+    assertEquals("\u0080", url.fragment());
+    assertEquals("\u0080", url.encodedFragment());
+    try {
+      url.uri();
+      fail();
+    } catch (IllegalStateException expected) {
+      // Possibly a bug in java.net.URI. Many non-ASCII code points work, this one doesn't!
+    }
+  }
+
+  @Test public void fragmentPercentEncodedNonAscii() throws Exception {
+    HttpUrl url = HttpUrl.parse("http://host/#%C2%80");
+    assertEquals("http://host/#%C2%80", url.toString());
+    assertEquals("\u0080", url.fragment());
+    assertEquals("%C2%80", url.encodedFragment());
+    assertEquals("http://host/#%C2%80", url.uri().toString());
+  }
+
+  @Test public void fragmentPercentEncodedPartialCodePoint() throws Exception {
+    HttpUrl url = HttpUrl.parse("http://host/#%80");
+    assertEquals("http://host/#%80", url.toString());
+    assertEquals("\ufffd", url.fragment()); // Unicode replacement character.
+    assertEquals("%80", url.encodedFragment());
+    assertEquals("http://host/#%80", url.uri().toString());
   }
 
   @Test public void relativePath() throws Exception {
diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java b/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java
index 99e9791..22502eb 100644
--- a/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java
+++ b/okhttp-tests/src/test/java/com/squareup/okhttp/UrlComponentEncodingTester.java
@@ -27,6 +27,10 @@
 
 /** Tests how each code point is encoded and decoded in the context of each URL component. */
 class UrlComponentEncodingTester {
+  private static final int UNICODE_2 = 0x07ff; // Arbitrary code point that's 2 bytes in UTF-8.
+  private static final int UNICODE_3 = 0xffff; // Arbitrary code point that's 3 bytes in UTF-8.
+  private static final int UNICODE_4 = 0x10ffff; // Arbitrary code point that's 4 bytes in UTF-8.
+
   /**
    * The default encode set for the ASCII range. The specific rules vary per-component: for example,
    * '?' may be identity-encoded in a fragment, but must be percent-encoded in a path.
@@ -164,6 +168,9 @@
     map.put((int)  '}', Encoding.IDENTITY);
     map.put((int)  '~', Encoding.IDENTITY);
     map.put(      0x7f, Encoding.PERCENT); // Delete
+    map.put( UNICODE_2, Encoding.PERCENT);
+    map.put( UNICODE_3, Encoding.PERCENT);
+    map.put( UNICODE_4, Encoding.PERCENT);
     defaultEncodings = Collections.unmodifiableMap(map);
   }
 
@@ -181,6 +188,13 @@
     return this;
   }
 
+  public UrlComponentEncodingTester identityForNonAscii() {
+    encodings.put(UNICODE_2, Encoding.IDENTITY);
+    encodings.put(UNICODE_3, Encoding.IDENTITY);
+    encodings.put(UNICODE_4, Encoding.IDENTITY);
+    return this;
+  }
+
   /**
    * Configure a character to be skipped but only for conversion to and from {@code java.net.URI}.
    * That class is more strict than the others.
diff --git a/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java b/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java
index e45761c..2c619c2 100644
--- a/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java
+++ b/okhttp-tests/src/test/java/com/squareup/okhttp/WebPlatformUrlTest.java
@@ -57,8 +57,6 @@
       "Parsing: <http://f:00000000000000/c> against <http://example.org/foo/bar>",
       "Parsing: <http://f:\n/c> against <http://example.org/foo/bar>",
       "Parsing: <http://f:999999/c> against <http://example.org/foo/bar>",
-      "Parsing: <#β> against <http://example.org/foo/bar>",
-      "Parsing: <http://www.google.com/foo?bar=baz# »> against <about:blank>",
       "Parsing: <http://192.0x00A80001> against <about:blank>",
       // This test fails on Java 7 but passes on Java 8. See HttpUrlTest.hostWithTrailingDot().
       "Parsing: <http://%30%78%63%30%2e%30%32%35%30.01%2e> against <http://other.com/>",
diff --git a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java
index 6f4b93c..f5134d9 100644
--- a/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java
+++ b/okhttp/src/main/java/com/squareup/okhttp/FormEncodingBuilder.java
@@ -33,10 +33,10 @@
       content.writeByte('&');
     }
     HttpUrl.canonicalize(content, name, 0, name.length(),
-        HttpUrl.FORM_ENCODE_SET, false, true);
+        HttpUrl.FORM_ENCODE_SET, false, true, true);
     content.writeByte('=');
     HttpUrl.canonicalize(content, value, 0, value.length(),
-        HttpUrl.FORM_ENCODE_SET, false, true);
+        HttpUrl.FORM_ENCODE_SET, false, true, true);
     return this;
   }
 
@@ -46,10 +46,10 @@
       content.writeByte('&');
     }
     HttpUrl.canonicalize(content, name, 0, name.length(),
-        HttpUrl.FORM_ENCODE_SET, true, true);
+        HttpUrl.FORM_ENCODE_SET, true, true, true);
     content.writeByte('=');
     HttpUrl.canonicalize(content, value, 0, value.length(),
-        HttpUrl.FORM_ENCODE_SET, true, true);
+        HttpUrl.FORM_ENCODE_SET, true, true, true);
     return this;
   }
 
diff --git a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java
index 76764c2..68b09ef 100644
--- a/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java
+++ b/okhttp/src/main/java/com/squareup/okhttp/HttpUrl.java
@@ -686,25 +686,25 @@
 
     public Builder username(String username) {
       if (username == null) throw new IllegalArgumentException("username == null");
-      this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false);
+      this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, true);
       return this;
     }
 
     public Builder encodedUsername(String encodedUsername) {
       if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null");
-      this.encodedUsername = canonicalize(encodedUsername, USERNAME_ENCODE_SET, true, false);
+      this.encodedUsername = canonicalize(encodedUsername, USERNAME_ENCODE_SET, true, false, true);
       return this;
     }
 
     public Builder password(String password) {
       if (password == null) throw new IllegalArgumentException("password == null");
-      this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false);
+      this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, true);
       return this;
     }
 
     public Builder encodedPassword(String encodedPassword) {
       if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null");
-      this.encodedPassword = canonicalize(encodedPassword, PASSWORD_ENCODE_SET, true, false);
+      this.encodedPassword = canonicalize(encodedPassword, PASSWORD_ENCODE_SET, true, false, true);
       return this;
     }
 
@@ -747,7 +747,7 @@
     public Builder setPathSegment(int index, String pathSegment) {
       if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
       String canonicalPathSegment = canonicalize(
-          pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false);
+          pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, true);
       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
         throw new IllegalArgumentException("unexpected path segment: " + pathSegment);
       }
@@ -760,7 +760,7 @@
         throw new IllegalArgumentException("encodedPathSegment == null");
       }
       String canonicalPathSegment = canonicalize(encodedPathSegment,
-          0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false);
+          0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, true);
       encodedPathSegments.set(index, canonicalPathSegment);
       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
         throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment);
@@ -787,14 +787,15 @@
 
     public Builder query(String query) {
       this.encodedQueryNamesAndValues = query != null
-          ? queryStringToNamesAndValues(canonicalize(query, QUERY_ENCODE_SET, false, true))
+          ? queryStringToNamesAndValues(canonicalize(query, QUERY_ENCODE_SET, false, true, true))
           : null;
       return this;
     }
 
     public Builder encodedQuery(String encodedQuery) {
       this.encodedQueryNamesAndValues = encodedQuery != null
-          ? queryStringToNamesAndValues(canonicalize(encodedQuery, QUERY_ENCODE_SET, true, true))
+          ? queryStringToNamesAndValues(
+              canonicalize(encodedQuery, QUERY_ENCODE_SET, true, true, true))
           : null;
       return this;
     }
@@ -803,9 +804,10 @@
     public Builder addQueryParameter(String name, String value) {
       if (name == null) throw new IllegalArgumentException("name == null");
       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
-      encodedQueryNamesAndValues.add(canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true));
+      encodedQueryNamesAndValues.add(
+          canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true));
       encodedQueryNamesAndValues.add(value != null
-          ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, true)
+          ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, true, true)
           : null);
       return this;
     }
@@ -815,9 +817,9 @@
       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
       encodedQueryNamesAndValues.add(
-          canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true));
+          canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true));
       encodedQueryNamesAndValues.add(encodedValue != null
-          ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, true)
+          ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, true, true)
           : null);
       return this;
     }
@@ -837,7 +839,7 @@
     public Builder removeAllQueryParameters(String name) {
       if (name == null) throw new IllegalArgumentException("name == null");
       if (encodedQueryNamesAndValues == null) return this;
-      String nameToRemove = canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true);
+      String nameToRemove = canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, true, true);
       removeAllCanonicalQueryParameters(nameToRemove);
       return this;
     }
@@ -846,7 +848,7 @@
       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
       if (encodedQueryNamesAndValues == null) return this;
       removeAllCanonicalQueryParameters(
-          canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true));
+          canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, true, true));
       return this;
     }
 
@@ -865,14 +867,14 @@
 
     public Builder fragment(String fragment) {
       this.encodedFragment = fragment != null
-          ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false)
+          ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false)
           : null;
       return this;
     }
 
     public Builder encodedFragment(String encodedFragment) {
       this.encodedFragment = encodedFragment != null
-          ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false)
+          ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false)
           : null;
       return this;
     }
@@ -885,19 +887,20 @@
       for (int i = 0, size = encodedPathSegments.size(); i < size; i++) {
         String pathSegment = encodedPathSegments.get(i);
         encodedPathSegments.set(i,
-            canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, false));
+            canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, false, true));
       }
       if (encodedQueryNamesAndValues != null) {
         for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) {
           String component = encodedQueryNamesAndValues.get(i);
           if (component != null) {
             encodedQueryNamesAndValues.set(i,
-                canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true));
+                canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true));
           }
         }
       }
       if (encodedFragment != null) {
-        encodedFragment = canonicalize(encodedFragment, FRAGMENT_ENCODE_SET_URI, true, false);
+        encodedFragment = canonicalize(
+            encodedFragment, FRAGMENT_ENCODE_SET_URI, true, false, false);
       }
       return this;
     }
@@ -1010,19 +1013,19 @@
                 int passwordColonOffset = delimiterOffset(
                     input, pos, componentDelimiterOffset, ":");
                 String canonicalUsername = canonicalize(
-                    input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false);
+                    input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, true);
                 this.encodedUsername = hasUsername
                     ? this.encodedUsername + "%40" + canonicalUsername
                     : canonicalUsername;
                 if (passwordColonOffset != componentDelimiterOffset) {
                   hasPassword = true;
                   this.encodedPassword = canonicalize(input, passwordColonOffset + 1,
-                      componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false);
+                      componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true);
                 }
                 hasUsername = true;
               } else {
                 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(
-                    input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false);
+                    input, pos, componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, true);
               }
               pos = componentDelimiterOffset + 1;
               break;
@@ -1069,14 +1072,14 @@
       if (pos < limit && input.charAt(pos) == '?') {
         int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#");
         this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(
-            input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, true));
+            input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, true, true));
         pos = queryDelimiterOffset;
       }
 
       // Fragment.
       if (pos < limit && input.charAt(pos) == '#') {
         this.encodedFragment = canonicalize(
-            input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false);
+            input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false);
       }
 
       return ParseResult.SUCCESS;
@@ -1113,7 +1116,7 @@
     private void push(String input, int pos, int limit, boolean addTrailingSlash,
         boolean alreadyEncoded) {
       String segment = canonicalize(
-          input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false);
+          input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, true);
       if (isDot(segment)) {
         return; // Skip '.' path segments.
       }
@@ -1464,7 +1467,7 @@
     private static int parsePort(String input, int pos, int limit) {
       try {
         // Canonicalize the port string to skip '\n' etc.
-        String portString = canonicalize(input, pos, limit, "", false, false);
+        String portString = canonicalize(input, pos, limit, "", false, false, true);
         int i = Integer.parseInt(portString);
         if (i > 0 && i <= 65535) return i;
         return -1;
@@ -1553,21 +1556,23 @@
    *
    * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
    * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded
+   * @param asciiOnly true to encode all non-ASCII codepoints.
    */
   static String canonicalize(String input, int pos, int limit, String encodeSet,
-      boolean alreadyEncoded, boolean plusIsSpace) {
+      boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) {
     int codePoint;
     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
       codePoint = input.codePointAt(i);
       if (codePoint < 0x20
-          || codePoint >= 0x7f
+          || codePoint == 0x7f
+          || (codePoint >= 0x80 && asciiOnly)
           || encodeSet.indexOf(codePoint) != -1
           || (codePoint == '%' && !alreadyEncoded)
           || (codePoint == '+' && plusIsSpace)) {
         // Slow path: the character at i requires encoding!
         Buffer out = new Buffer();
         out.writeUtf8(input, pos, i);
-        canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, plusIsSpace);
+        canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, plusIsSpace, asciiOnly);
         return out.readUtf8();
       }
     }
@@ -1577,7 +1582,7 @@
   }
 
   static void canonicalize(Buffer out, String input, int pos, int limit,
-      String encodeSet, boolean alreadyEncoded, boolean plusIsSpace) {
+      String encodeSet, boolean alreadyEncoded, boolean plusIsSpace, boolean asciiOnly) {
     Buffer utf8Buffer = null; // Lazily allocated.
     int codePoint;
     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
@@ -1589,7 +1594,8 @@
         // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
         out.writeUtf8(alreadyEncoded ? "+" : "%2B");
       } else if (codePoint < 0x20
-          || codePoint >= 0x7f
+          || codePoint == 0x7f
+          || (codePoint >= 0x80 && asciiOnly)
           || encodeSet.indexOf(codePoint) != -1
           || (codePoint == '%' && !alreadyEncoded)) {
         // Percent encode this character.
@@ -1611,8 +1617,7 @@
   }
 
   static String canonicalize(
-      String input, String encodeSet, boolean alreadyEncoded, boolean plusIsSpace) {
-    return canonicalize(
-        input, 0, input.length(), encodeSet, alreadyEncoded, plusIsSpace);
+      String input, String encodeSet, boolean alreadyEncoded, boolean query, boolean asciiOnly) {
+    return canonicalize(input, 0, input.length(), encodeSet, alreadyEncoded, query, asciiOnly);
   }
 }