blob: 23b28b4af149f03644e359d386193b85900b2d35 [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.jimfs;
import static com.google.common.jimfs.PathNormalization.CASE_FOLD_ASCII;
import static com.google.common.jimfs.PathNormalization.CASE_FOLD_UNICODE;
import static com.google.common.jimfs.PathNormalization.NFC;
import static com.google.common.jimfs.PathNormalization.NFD;
import static com.google.common.jimfs.TestUtils.assertNotEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import com.google.common.collect.ImmutableSet;
import java.util.regex.Pattern;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/**
* Tests for {@link PathNormalization}.
*
* @author Colin Decker
*/
@RunWith(JUnit4.class)
public class PathNormalizationTest {
private ImmutableSet<PathNormalization> normalizations;
@Test
public void testNone() {
normalizations = ImmutableSet.of();
assertNormalizedEqual("foo", "foo");
assertNormalizedUnequal("Foo", "foo");
assertNormalizedUnequal("\u00c5", "\u212b");
assertNormalizedUnequal("Am\u00e9lie", "Ame\u0301lie");
}
private static final String[][] CASE_FOLD_TEST_DATA = {
{"foo", "fOo", "foO", "Foo", "FOO"},
{"efficient", "efficient", "efficient", "Efficient", "EFFICIENT"},
{"flour", "flour", "flour", "Flour", "FLOUR"},
{"poſt", "post", "poſt", "Poſt", "POST"},
{"poſt", "post", "poſt", "Poſt", "POST"},
{"ſtop", "stop", "ſtop", "Stop", "STOP"},
{"tschüß", "tschüss", "tschüß", "Tschüß", "TSCHÜSS"},
{"weiß", "weiss", "weiß", "Weiß", "WEISS"},
{"WEIẞ", "weiss", "weiß", "Weiß", "WEIẞ"},
{"στιγμας", "στιγμασ", "στιγμας", "Στιγμας", "ΣΤΙΓΜΑΣ"},
{"ᾲ στο διάολο", "ὰι στο διάολο", "ᾲ στο διάολο", "Ὰͅ Στο Διάολο", "ᾺΙ ΣΤΟ ΔΙΆΟΛΟ"},
{"Henry Ⅷ", "henry ⅷ", "henry ⅷ", "Henry Ⅷ", "HENRY Ⅷ"},
{"I Work At Ⓚ", "i work at ⓚ", "i work at ⓚ", "I Work At Ⓚ", "I WORK AT Ⓚ"},
{"ʀᴀʀᴇ", "ʀᴀʀᴇ", "ʀᴀʀᴇ", "Ʀᴀʀᴇ", "ƦᴀƦᴇ"},
{"Ὰͅ", "ὰι", "ᾲ", "Ὰͅ", "ᾺΙ"}
};
@Test
public void testCaseFold() {
normalizations = ImmutableSet.of(CASE_FOLD_UNICODE);
for (String[] row : CASE_FOLD_TEST_DATA) {
for (int i = 0; i < row.length; i++) {
for (int j = i; j < row.length; j++) {
assertNormalizedEqual(row[i], row[j]);
}
}
}
}
@Test
public void testCaseInsensitiveAscii() {
normalizations = ImmutableSet.of(CASE_FOLD_ASCII);
String[] row = {"foo", "FOO", "fOo", "Foo"};
for (int i = 0; i < row.length; i++) {
for (int j = i; j < row.length; j++) {
assertNormalizedEqual(row[i], row[j]);
}
}
assertNormalizedUnequal("weiß", "weiss");
}
private static final String[][] NORMALIZE_TEST_DATA = {
{"\u00c5", "\u212b"}, // two forms of Å (one code point each)
{"Am\u00e9lie", "Ame\u0301lie"} // two forms of Amélie (one composed, one decomposed)
};
@Test
public void testNormalizeNfc() {
normalizations = ImmutableSet.of(NFC);
for (String[] row : NORMALIZE_TEST_DATA) {
for (int i = 0; i < row.length; i++) {
for (int j = i; j < row.length; j++) {
assertNormalizedEqual(row[i], row[j]);
}
}
}
}
@Test
public void testNormalizeNfd() {
normalizations = ImmutableSet.of(NFD);
for (String[] row : NORMALIZE_TEST_DATA) {
for (int i = 0; i < row.length; i++) {
for (int j = i; j < row.length; j++) {
assertNormalizedEqual(row[i], row[j]);
}
}
}
}
private static final String[][] NORMALIZE_CASE_FOLD_TEST_DATA = {
{"\u00c5", "\u00e5", "\u212b"},
{"Am\u00e9lie", "Am\u00c9lie", "Ame\u0301lie", "AME\u0301LIE"}
};
@Test
public void testNormalizeNfcCaseFold() {
normalizations = ImmutableSet.of(NFC, CASE_FOLD_UNICODE);
for (String[] row : NORMALIZE_CASE_FOLD_TEST_DATA) {
for (int i = 0; i < row.length; i++) {
for (int j = i; j < row.length; j++) {
assertNormalizedEqual(row[i], row[j]);
}
}
}
}
@Test
public void testNormalizeNfdCaseFold() {
normalizations = ImmutableSet.of(NFD, CASE_FOLD_UNICODE);
for (String[] row : NORMALIZE_CASE_FOLD_TEST_DATA) {
for (int i = 0; i < row.length; i++) {
for (int j = i; j < row.length; j++) {
assertNormalizedEqual(row[i], row[j]);
}
}
}
}
private static final String[][] NORMALIZED_CASE_INSENSITIVE_ASCII_TEST_DATA = {
{"\u00e5", "\u212b"},
{"Am\u00e9lie", "AME\u0301LIE"}
};
@Test
public void testNormalizeNfcCaseFoldAscii() {
normalizations = ImmutableSet.of(NFC, CASE_FOLD_ASCII);
for (String[] row : NORMALIZED_CASE_INSENSITIVE_ASCII_TEST_DATA) {
for (int i = 0; i < row.length; i++) {
for (int j = i + 1; j < row.length; j++) {
assertNormalizedUnequal(row[i], row[j]);
}
}
}
}
@Test
public void testNormalizeNfdCaseFoldAscii() {
normalizations = ImmutableSet.of(NFD, CASE_FOLD_ASCII);
for (String[] row : NORMALIZED_CASE_INSENSITIVE_ASCII_TEST_DATA) {
for (int i = 0; i < row.length; i++) {
for (int j = i + 1; j < row.length; j++) {
// since decomposition happens before case folding, the strings are equal when the
// decomposed ASCII letter is folded
assertNormalizedEqual(row[i], row[j]);
}
}
}
}
// regex patterns offer loosely similar matching, but that's all
@Test
public void testNone_pattern() {
normalizations = ImmutableSet.of();
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternDoesNotMatch("foo", "FOO");
assertNormalizedPatternDoesNotMatch("FOO", "foo");
}
@Test
public void testCaseFold_pattern() {
normalizations = ImmutableSet.of(CASE_FOLD_UNICODE);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternMatches("foo", "FOO");
assertNormalizedPatternMatches("FOO", "foo");
assertNormalizedPatternMatches("Am\u00e9lie", "AM\u00c9LIE");
assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternDoesNotMatch("AM\u00c9LIE", "AME\u0301LIE");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE");
}
@Test
public void testCaseFoldAscii_pattern() {
normalizations = ImmutableSet.of(CASE_FOLD_ASCII);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternMatches("foo", "FOO");
assertNormalizedPatternMatches("FOO", "foo");
assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AM\u00c9LIE");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternDoesNotMatch("AM\u00c9LIE", "AME\u0301LIE");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE");
}
@Test
public void testNormalizeNfc_pattern() {
normalizations = ImmutableSet.of(NFC);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternDoesNotMatch("foo", "FOO");
assertNormalizedPatternDoesNotMatch("FOO", "foo");
assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE");
}
@Test
public void testNormalizeNfd_pattern() {
normalizations = ImmutableSet.of(NFD);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternDoesNotMatch("foo", "FOO");
assertNormalizedPatternDoesNotMatch("FOO", "foo");
assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AME\u0301LIE");
}
@Test
public void testNormalizeNfcCaseFold_pattern() {
normalizations = ImmutableSet.of(NFC, CASE_FOLD_UNICODE);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternMatches("foo", "FOO");
assertNormalizedPatternMatches("FOO", "foo");
assertNormalizedPatternMatches("Am\u00e9lie", "AM\u00c9LIE");
assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE");
assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE");
assertNormalizedPatternMatches("Am\u00e9lie", "AME\u0301LIE");
}
@Test
public void testNormalizeNfdCaseFold_pattern() {
normalizations = ImmutableSet.of(NFD, CASE_FOLD_UNICODE);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternMatches("foo", "FOO");
assertNormalizedPatternMatches("FOO", "foo");
assertNormalizedPatternMatches("Am\u00e9lie", "AM\u00c9LIE");
assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE");
assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE");
assertNormalizedPatternMatches("Am\u00e9lie", "AME\u0301LIE");
}
@Test
public void testNormalizeNfcCaseFoldAscii_pattern() {
normalizations = ImmutableSet.of(NFC, CASE_FOLD_ASCII);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternMatches("foo", "FOO");
assertNormalizedPatternMatches("FOO", "foo");
// these are all a bit fuzzy as when CASE_INSENSITIVE is present but not UNICODE_CASE, ASCII
// only strings are expected
assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AM\u00c9LIE");
assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE");
}
@Test
public void testNormalizeNfdCaseFoldAscii_pattern() {
normalizations = ImmutableSet.of(NFD, CASE_FOLD_ASCII);
assertNormalizedPatternMatches("foo", "foo");
assertNormalizedPatternMatches("foo", "FOO");
assertNormalizedPatternMatches("FOO", "foo");
// these are all a bit fuzzy as when CASE_INSENSITIVE is present but not UNICODE_CASE, ASCII
// only strings are expected
assertNormalizedPatternMatches("Ame\u0301lie", "AME\u0301LIE");
assertNormalizedPatternDoesNotMatch("Am\u00e9lie", "AM\u00c9LIE");
assertNormalizedPatternMatches("Am\u00e9lie", "Ame\u0301lie");
assertNormalizedPatternMatches("AM\u00c9LIE", "AME\u0301LIE");
}
/** Asserts that the given strings normalize to the same string using the current normalizer. */
private void assertNormalizedEqual(String first, String second) {
assertEquals(
PathNormalization.normalize(first, normalizations),
PathNormalization.normalize(second, normalizations));
}
/** Asserts that the given strings normalize to different strings using the current normalizer. */
private void assertNormalizedUnequal(String first, String second) {
assertNotEquals(
PathNormalization.normalize(first, normalizations),
PathNormalization.normalize(second, normalizations));
}
/**
* Asserts that the given strings match when one is compiled as a regex pattern using the current
* normalizer and matched against the other.
*/
private void assertNormalizedPatternMatches(String first, String second) {
Pattern pattern = PathNormalization.compilePattern(first, normalizations);
assertTrue(
"pattern '" + pattern + "' does not match '" + second + "'",
pattern.matcher(second).matches());
pattern = PathNormalization.compilePattern(second, normalizations);
assertTrue(
"pattern '" + pattern + "' does not match '" + first + "'",
pattern.matcher(first).matches());
}
/**
* Asserts that the given strings do not match when one is compiled as a regex pattern using the
* current normalizer and matched against the other.
*/
private void assertNormalizedPatternDoesNotMatch(String first, String second) {
Pattern pattern = PathNormalization.compilePattern(first, normalizations);
assertFalse(
"pattern '" + pattern + "' should not match '" + second + "'",
pattern.matcher(second).matches());
pattern = PathNormalization.compilePattern(second, normalizations);
assertFalse(
"pattern '" + pattern + "' should not match '" + first + "'",
pattern.matcher(first).matches());
}
}