| /* |
| * Copyright (C) 2010 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /** |
| * This is a series of unit tests for snippet creation and highlighting |
| * |
| * You can run this entire test case with: |
| * runtest -c com.android.emailcommon.utility.TextUtilitiesTests email |
| */ |
| package com.android.emailcommon.utility; |
| |
| import android.test.AndroidTestCase; |
| import android.text.SpannableStringBuilder; |
| import android.text.style.BackgroundColorSpan; |
| |
| public class TextUtilitiesTests extends AndroidTestCase { |
| |
| public void testPlainSnippet() { |
| // Test the simplest cases |
| assertEquals("", TextUtilities.makeSnippetFromPlainText(null)); |
| assertEquals("", TextUtilities.makeSnippetFromPlainText("")); |
| |
| // Test handling leading, trailing, and duplicated whitespace |
| // Just test common whitespace characters; we calls Character.isWhitespace() internally, so |
| // other whitespace should be fine as well |
| assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n")); |
| char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER; |
| assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c)); |
| assertEquals("foo bar", |
| TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c)); |
| |
| // Handle duplicated - and = |
| assertEquals("Foo-Bar=Bletch", |
| TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch")); |
| |
| // We shouldn't muck with HTML entities |
| assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >")); |
| } |
| |
| public void testHtmlSnippet() { |
| // Test the simplest cases |
| assertEquals("", TextUtilities.makeSnippetFromHtmlText(null)); |
| assertEquals("", TextUtilities.makeSnippetFromHtmlText("")); |
| |
| // Test handling leading, trailing, and duplicated whitespace |
| // Just test common whitespace characters; we calls Character.isWhitespace() internally, so |
| // other whitespace should be fine as well |
| assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n")); |
| char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER; |
| assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c)); |
| assertEquals("foo bar", |
| TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c)); |
| |
| // Handle duplicated - and = |
| assertEquals("Foo-Bar=Bletch", |
| TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch")); |
| |
| // We should catch HTML entities in these tests |
| assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >")); |
| assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&<> "")); |
| // Test for decimal and hex entities |
| assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC")); |
| assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC")); |
| |
| // Test for stripping simple tags |
| assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>")); |
| // TODO: Add tests here if/when we find problematic HTML |
| } |
| |
| public void testStripHtmlEntityEdgeCases() { |
| int[] skipCount = new int[1]; |
| // Bare & isn't an entity |
| char c = TextUtilities.stripHtmlEntity("&", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| // Also not legal |
| c = TextUtilities.stripHtmlEntity("&;", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| // This is an entity, but shouldn't be found |
| c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| // This is too long for an entity, even though it starts like a valid one |
| c = TextUtilities.stripHtmlEntity(" andmore;", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| // Illegal decimal entities |
| c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| c = TextUtilities.stripHtmlEntity("B", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| // Illegal hex entities |
| c = TextUtilities.stripHtmlEntity("઼", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| // Illegal hex entities |
| c = TextUtilities.stripHtmlEntity("G", 0, skipCount); |
| assertEquals(c, '&'); |
| assertEquals(0, skipCount[0]); |
| } |
| |
| public void testStripContent() { |
| assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( |
| "<html><style foo=\"bar\">Not</style>Visible</html>")); |
| assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( |
| "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>")); |
| assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText( |
| "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>")); |
| assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( |
| "<html>Visible<style foo=\"bar\">Not")); |
| assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( |
| "<html>Visible<style foo=\"bar\">Not</style>AgainVisible")); |
| assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( |
| "<html>Visible<style foo=\"bar\"/>AgainVisible")); |
| assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( |
| "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible")); |
| } |
| |
| /** |
| * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position |
| * for the tag named 'tag' and then check whether the calculated end position matches the known |
| * correct position. HTML text not containing an ampersand should generate a calculated end of |
| * -1 |
| * @param text the HTML text to test |
| */ |
| private void findTagEnd(String text, String tag) { |
| int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0); |
| int knownEnd = text.indexOf('@') + 2; |
| if (knownEnd == 1) { |
| // indexOf will return -1, so we'll get 1 as knownEnd |
| assertEquals(-1, calculatedEnd); |
| } else { |
| assertEquals(calculatedEnd, knownEnd); |
| } |
| } |
| |
| public void testFindTagEnd() { |
| // Test with <tag ... /> |
| findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag"); |
| // Test with <tag ...> ... </tag> |
| findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag"); |
| // Test with incomplete tag |
| findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag"); |
| // Test with space at end of tag |
| findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag "); |
| } |
| |
| private void assertHighlightUnchanged(String str) { |
| assertEquals(str, TextUtilities.highlightTermsInHtml(str, null)); |
| } |
| |
| public void testHighlightNoTerm() { |
| // With no search terms, the html should be unchanged |
| assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>"); |
| assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"); |
| assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not"); |
| assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible"); |
| assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible"); |
| assertHighlightUnchanged( |
| "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"); |
| } |
| |
| public void testHighlightSingleTermHtml() { |
| String str = "<html><style foo=\"bar\">Not</style>Visible</html>"; |
| // Test that tags aren't highlighted |
| assertEquals(str, TextUtilities.highlightTermsInHtml( |
| "<html><style foo=\"bar\">Not</style>Visible</html>", "style")); |
| // Test that non-tags are |
| assertEquals("<html><style foo=\"bar\">Not</style><span " + |
| "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING + |
| "\">Visi</span>ble</html>", |
| TextUtilities.highlightTermsInHtml(str, "Visi")); |
| assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" + |
| " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING + |
| "\">gain</span>Visible", |
| TextUtilities.highlightTermsInHtml( |
| "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain")); |
| } |
| |
| public void brokentestHighlightSingleTermText() { |
| // Sprinkle text with a few HTML characters to make sure they're ignored |
| String text = "This< should be visibl>e"; |
| // We should find this, because search terms are case insensitive |
| SpannableStringBuilder ssb = |
| (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi"); |
| BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); |
| assertEquals(1, spans.length); |
| BackgroundColorSpan span = spans[0]; |
| assertEquals(text.indexOf("visi"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span)); |
| // Heh; this next test fails.. we use the search term! |
| assertEquals(text, ssb.toString()); |
| |
| // Multiple instances of the term |
| text = "The research word should be a search result"; |
| ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search"); |
| spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); |
| assertEquals(2, spans.length); |
| span = spans[0]; |
| assertEquals(text.indexOf("search word"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span)); |
| span = spans[1]; |
| assertEquals(text.indexOf("search result"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span)); |
| assertEquals(text, ssb.toString()); |
| } |
| |
| public void brokentestHighlightTwoTermText() { |
| String text = "This should be visible"; |
| // We should find this, because search terms are case insensitive |
| SpannableStringBuilder ssb = |
| (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should"); |
| BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); |
| assertEquals(2, spans.length); |
| BackgroundColorSpan span = spans[0]; |
| assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span)); |
| span = spans[1]; |
| assertEquals(text.indexOf("visi"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span)); |
| assertEquals(text, ssb.toString()); |
| } |
| |
| public void brokentestHighlightDuplicateTermText() { |
| String text = "This should be visible"; |
| // We should find this, because search terms are case insensitive |
| SpannableStringBuilder ssb = |
| (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should"); |
| BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); |
| assertEquals(1, spans.length); |
| BackgroundColorSpan span = spans[0]; |
| assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span)); |
| } |
| |
| public void brokentestHighlightOverlapTermText() { |
| String text = "This shoulder is visible"; |
| // We should find this, because search terms are case insensitive |
| SpannableStringBuilder ssb = |
| (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould"); |
| BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); |
| assertEquals(1, spans.length); |
| BackgroundColorSpan span = spans[0]; |
| assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span)); |
| } |
| |
| |
| public void brokentestHighlightOverlapTermText2() { |
| String text = "The shoulders are visible"; |
| // We should find this, because search terms are case insensitive |
| SpannableStringBuilder ssb = |
| (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders"); |
| BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); |
| assertEquals(2, spans.length); |
| BackgroundColorSpan span = spans[0]; |
| assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span)); |
| span = spans[1]; |
| // Just the 's' should be caught in the 2nd span |
| assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span)); |
| assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span)); |
| assertEquals(text, ssb.toString()); |
| } |
| // For debugging large HTML samples |
| |
| // private String readLargeSnippet(String fn) { |
| // File file = mContext.getFileStreamPath(fn); |
| // StringBuffer sb = new StringBuffer(); |
| // BufferedReader reader = null; |
| // try { |
| // String text; |
| // reader = new BufferedReader(new FileReader(file)); |
| // while ((text = reader.readLine()) != null) { |
| // sb.append(text); |
| // sb.append(" "); |
| // } |
| // } catch (IOException e) { |
| // } |
| // return sb.toString(); |
| // } |
| } |