| /* |
| * Copyright (C) 2009 The Guava Authors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
| * in compliance with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software distributed under the License |
| * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
| * or implied. See the License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| |
| package com.google.common.escape; |
| |
| import static com.google.common.base.Preconditions.checkNotNull; |
| |
| import com.google.common.annotations.GwtCompatible; |
| import com.google.errorprone.annotations.CanIgnoreReturnValue; |
| import java.util.HashMap; |
| import java.util.Map; |
| import javax.annotation.CheckForNull; |
| import org.checkerframework.checker.nullness.qual.Nullable; |
| |
| /** |
| * Static utility methods pertaining to {@link Escaper} instances. |
| * |
| * @author Sven Mawson |
| * @author David Beaumont |
| * @since 15.0 |
| */ |
| @GwtCompatible |
| @ElementTypesAreNonnullByDefault |
| public final class Escapers { |
| private Escapers() {} |
| |
| /** |
| * Returns an {@link Escaper} that does no escaping, passing all character data through unchanged. |
| */ |
| public static Escaper nullEscaper() { |
| return NULL_ESCAPER; |
| } |
| |
| // An Escaper that efficiently performs no escaping. |
| // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier. |
| private static final Escaper NULL_ESCAPER = |
| new CharEscaper() { |
| @Override |
| public String escape(String string) { |
| return checkNotNull(string); |
| } |
| |
| @Override |
| @CheckForNull |
| protected char[] escape(char c) { |
| // TODO: Fix tests not to call this directly and make it throw an error. |
| return null; |
| } |
| }; |
| |
| /** |
| * Returns a builder for creating simple, fast escapers. A builder instance can be reused and each |
| * escaper that is created will be a snapshot of the current builder state. Builders are not |
| * thread safe. |
| * |
| * <p>The initial state of the builder is such that: |
| * |
| * <ul> |
| * <li>There are no replacement mappings |
| * <li>{@code safeMin == Character.MIN_VALUE} |
| * <li>{@code safeMax == Character.MAX_VALUE} |
| * <li>{@code unsafeReplacement == null} |
| * </ul> |
| * |
| * <p>For performance reasons escapers created by this builder are not Unicode aware and will not |
| * validate the well-formedness of their input. |
| */ |
| public static Builder builder() { |
| return new Builder(); |
| } |
| |
| /** |
| * A builder for simple, fast escapers. |
| * |
| * <p>Typically an escaper needs to deal with the escaping of high valued characters or code |
| * points. In these cases it is necessary to extend either {@link ArrayBasedCharEscaper} or {@link |
| * ArrayBasedUnicodeEscaper} to provide the desired behavior. However this builder is suitable for |
| * creating escapers that replace a relative small set of characters. |
| * |
| * @author David Beaumont |
| * @since 15.0 |
| */ |
| public static final class Builder { |
| private final Map<Character, String> replacementMap = new HashMap<>(); |
| private char safeMin = Character.MIN_VALUE; |
| private char safeMax = Character.MAX_VALUE; |
| @CheckForNull private String unsafeReplacement = null; |
| |
| // The constructor is exposed via the builder() method above. |
| private Builder() {} |
| |
| /** |
| * Sets the safe range of characters for the escaper. Characters in this range that have no |
| * explicit replacement are considered 'safe' and remain unescaped in the output. If {@code |
| * safeMax < safeMin} then the safe range is empty. |
| * |
| * @param safeMin the lowest 'safe' character |
| * @param safeMax the highest 'safe' character |
| * @return the builder instance |
| */ |
| @CanIgnoreReturnValue |
| public Builder setSafeRange(char safeMin, char safeMax) { |
| this.safeMin = safeMin; |
| this.safeMax = safeMax; |
| return this; |
| } |
| |
| /** |
| * Sets the replacement string for any characters outside the 'safe' range that have no explicit |
| * replacement. If {@code unsafeReplacement} is {@code null} then no replacement will occur, if |
| * it is {@code ""} then the unsafe characters are removed from the output. |
| * |
| * @param unsafeReplacement the string to replace unsafe characters |
| * @return the builder instance |
| */ |
| @CanIgnoreReturnValue |
| public Builder setUnsafeReplacement(@Nullable String unsafeReplacement) { |
| this.unsafeReplacement = unsafeReplacement; |
| return this; |
| } |
| |
| /** |
| * Adds a replacement string for the given input character. The specified character will be |
| * replaced by the given string whenever it occurs in the input, irrespective of whether it lies |
| * inside or outside the 'safe' range. |
| * |
| * @param c the character to be replaced |
| * @param replacement the string to replace the given character |
| * @return the builder instance |
| * @throws NullPointerException if {@code replacement} is null |
| */ |
| @CanIgnoreReturnValue |
| public Builder addEscape(char c, String replacement) { |
| checkNotNull(replacement); |
| // This can replace an existing character (the builder is re-usable). |
| replacementMap.put(c, replacement); |
| return this; |
| } |
| |
| /** Returns a new escaper based on the current state of the builder. */ |
| public Escaper build() { |
| return new ArrayBasedCharEscaper(replacementMap, safeMin, safeMax) { |
| @CheckForNull |
| private final char[] replacementChars = |
| unsafeReplacement != null ? unsafeReplacement.toCharArray() : null; |
| |
| @Override |
| @CheckForNull |
| protected char[] escapeUnsafe(char c) { |
| return replacementChars; |
| } |
| }; |
| } |
| } |
| |
| /** |
| * Returns a {@link UnicodeEscaper} equivalent to the given escaper instance. If the escaper is |
| * already a UnicodeEscaper then it is simply returned, otherwise it is wrapped in a |
| * UnicodeEscaper. |
| * |
| * <p>When a {@link CharEscaper} escaper is wrapped by this method it acquires extra behavior with |
| * respect to the well-formedness of Unicode character sequences and will throw {@link |
| * IllegalArgumentException} when given bad input. |
| * |
| * @param escaper the instance to be wrapped |
| * @return a UnicodeEscaper with the same behavior as the given instance |
| * @throws NullPointerException if escaper is null |
| * @throws IllegalArgumentException if escaper is not a UnicodeEscaper or a CharEscaper |
| */ |
| static UnicodeEscaper asUnicodeEscaper(Escaper escaper) { |
| checkNotNull(escaper); |
| if (escaper instanceof UnicodeEscaper) { |
| return (UnicodeEscaper) escaper; |
| } else if (escaper instanceof CharEscaper) { |
| return wrap((CharEscaper) escaper); |
| } |
| // In practice this shouldn't happen because it would be very odd not to |
| // extend either CharEscaper or UnicodeEscaper for non trivial cases. |
| throw new IllegalArgumentException( |
| "Cannot create a UnicodeEscaper from: " + escaper.getClass().getName()); |
| } |
| |
| /** |
| * Returns a string that would replace the given character in the specified escaper, or {@code |
| * null} if no replacement should be made. This method is intended for use in tests through the |
| * {@code EscaperAsserts} class; production users of {@link CharEscaper} should limit themselves |
| * to its public interface. |
| * |
| * @param c the character to escape if necessary |
| * @return the replacement string, or {@code null} if no escaping was needed |
| */ |
| @CheckForNull |
| public static String computeReplacement(CharEscaper escaper, char c) { |
| return stringOrNull(escaper.escape(c)); |
| } |
| |
| /** |
| * Returns a string that would replace the given character in the specified escaper, or {@code |
| * null} if no replacement should be made. This method is intended for use in tests through the |
| * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit |
| * themselves to its public interface. |
| * |
| * @param cp the Unicode code point to escape if necessary |
| * @return the replacement string, or {@code null} if no escaping was needed |
| */ |
| @CheckForNull |
| public static String computeReplacement(UnicodeEscaper escaper, int cp) { |
| return stringOrNull(escaper.escape(cp)); |
| } |
| |
| @CheckForNull |
| private static String stringOrNull(@CheckForNull char[] in) { |
| return (in == null) ? null : new String(in); |
| } |
| |
| /** Private helper to wrap a CharEscaper as a UnicodeEscaper. */ |
| private static UnicodeEscaper wrap(CharEscaper escaper) { |
| return new UnicodeEscaper() { |
| @Override |
| @CheckForNull |
| protected char[] escape(int cp) { |
| // If a code point maps to a single character, just escape that. |
| if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { |
| return escaper.escape((char) cp); |
| } |
| // Convert the code point to a surrogate pair and escape them both. |
| // Note: This code path is horribly slow and typically allocates 4 new |
| // char[] each time it is invoked. However this avoids any |
| // synchronization issues and makes the escaper thread safe. |
| char[] surrogateChars = new char[2]; |
| Character.toChars(cp, surrogateChars, 0); |
| char[] hiChars = escaper.escape(surrogateChars[0]); |
| char[] loChars = escaper.escape(surrogateChars[1]); |
| |
| // If either hiChars or lowChars are non-null, the CharEscaper is trying |
| // to escape the characters of a surrogate pair separately. This is |
| // uncommon and applies only to escapers that assume UCS-2 rather than |
| // UTF-16. See: http://en.wikipedia.org/wiki/UTF-16/UCS-2 |
| if (hiChars == null && loChars == null) { |
| // We expect this to be the common code path for most escapers. |
| return null; |
| } |
| // Combine the characters and/or escaped sequences into a single array. |
| int hiCount = hiChars != null ? hiChars.length : 1; |
| int loCount = loChars != null ? loChars.length : 1; |
| char[] output = new char[hiCount + loCount]; |
| if (hiChars != null) { |
| // TODO: Is this faster than System.arraycopy() for small arrays? |
| for (int n = 0; n < hiChars.length; ++n) { |
| output[n] = hiChars[n]; |
| } |
| } else { |
| output[0] = surrogateChars[0]; |
| } |
| if (loChars != null) { |
| for (int n = 0; n < loChars.length; ++n) { |
| output[hiCount + n] = loChars[n]; |
| } |
| } else { |
| output[hiCount] = surrogateChars[1]; |
| } |
| return output; |
| } |
| }; |
| } |
| } |