src/tests/org/owasp/html/HtmlSanitizerTest.java - platform/external/owasp/sanitizer - Git at Google

 // Copyright (c) 2011, Mike Samuel
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
 // are met:
 //
 // Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 // Redistributions in binary form must reproduce the above copyright
 // notice, this list of conditions and the following disclaimer in the
 // documentation and/or other materials provided with the distribution.
 // Neither the name of the OWASP nor the names of its contributors may
 // be used to endorse or promote products derived from this software
 // without specific prior written permission.
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.

 package org.owasp.html;

 import junit.framework.TestCase;

 import javax.annotation.Nullable;

 import org.junit.Test;


 public class HtmlSanitizerTest extends TestCase {

   @Test
   public static final void testEmpty() throws Exception {
     assertEquals("", sanitize(""));
     assertEquals("", sanitize(null));
   }

   @Test
   public static final void testSimpleText() throws Exception {
     assertEquals("hello world", sanitize("hello world"));
   }

   @Test
   public static final void testEntities1() throws Exception {
     assertEquals("&lt;hello world&gt;", sanitize("&lt;hello world&gt;"));
   }

   @Test
   public static final void testEntities2() throws Exception {
     assertEquals("<b>hello <i>world</i></b>",
                  sanitize("<b>hello <i>world</i></b>"));
   }

   @Test
   public static final void testUnknownTagsRemoved() throws Exception {
     assertEquals("<b>hello <i>world</i></b>",
                  sanitize("<b>hello <bogus></bogus><i>world</i></b>"));
   }

   @Test
   public static final void testUnsafeTagsRemoved() throws Exception {
     assertEquals("<b>hello <i>world</i></b>",
                  sanitize("<b>hello <i>world</i>"
                           + "<script src=foo.js></script></b>"));
   }

   @Test
   public static final void testUnsafeAttributesRemoved() throws Exception {
     assertEquals(
         "<b>hello <i>world</i></b>",
         sanitize("<b>hello <i onclick=\"takeOverWorld(this)\">world</i></b>"));
   }

   @Test
   public static final void testCruftEscaped() throws Exception {
     assertEquals("<b>hello <i>world&lt;</i></b> &amp; tomorrow the universe",
                  sanitize(
                      "<b>hello <i>world<</i></b> & tomorrow the universe"));
   }

   @Test
   public static final void testTagCruftRemoved() throws Exception {
     assertEquals("<b id=\"p-foo\">hello <i>world&lt;</i></b>",
                  sanitize("<b id=\"foo\" / -->hello <i>world<</i></b>"));
   }

   @Test
   public static final void testIdsAndClassesPrefixed() throws Exception {
     assertEquals(
         "<b id=\"p-foo\" class=\"p-boo p-bar p-baz\">"
         + "hello <i>world&lt;</i></b>",
         sanitize(
             "<b id=\"foo\" class=\"boo bar baz\">hello <i>world<</i></b>"));
   }

   @Test
   public static final void testSpecialCharsInAttributes() throws Exception {
     assertEquals(
         "<b title=\"a&lt;b &amp;&amp; c&gt;b\">bar</b>",
         sanitize("<b title=\"a<b && c>b\">bar</b>"));
   }

   @Test
   public static final void testUnclosedTags() throws Exception {
     assertEquals("<div id=\"p-foo\">Bar<br />Baz</div>",
                  sanitize("<div id=\"foo\">Bar<br>Baz"));
   }

   @Test
   public static final void testUnopenedTags() throws Exception {
     assertEquals("Foo<b>Bar</b>Baz",
                  sanitize("Foo<b></select>Bar</b></b>Baz</select>"));
   }

   @Test
   public static final void testUnsafeEndTags() throws Exception {
     assertEquals(
         "",
         sanitize(
             "</meta http-equiv=\"refesh\""
             + " content=\"1;URL=http://evilgadget.com\">"));
   }

   @Test
   public static final void testEmptyEndTags() throws Exception {
     assertEquals("<input />", sanitize("<input></input>"));
   }

   @Test
   public static final void testOnLoadStripped() throws Exception {
     assertEquals(
         "<img />",
         sanitize("<img src=http://foo.com/bar ONLOAD=alert(1)>"));
   }

   @Test
   public static final void testClosingTagParameters() throws Exception {
     assertEquals(
         "<p>Hello world</p>",
         sanitize("<p>Hello world</b style=\"width:expression(alert(1))\">"));
   }

   @Test
   public static final void testOptionalEndTags() throws Exception {
     // Should not be
     //     "<ol> <li>A</li> <li>B<li>C </li></li></ol>"
     // The difference is significant because in the first, the item contains no
     // space after 'A", but in the third, the item contains 'C' and a space.
     assertEquals(
         "<ol><li>A</li><li>B</li><li>C </li></ol>",
         sanitize("<ol> <li>A</li> <li>B<li>C </ol>"));
   }

   @Test
   public static final void testFoldingOfHtmlAndBodyTags() throws Exception {
     assertEquals(
         "<p>P 1</p>",
         sanitize("<html><head><title>Foo</title></head>"
                  + "<body><p>P 1</p></body></html>"));
     assertEquals(
         "Hello",
         sanitize("<body bgcolor=\"blue\">Hello</body>"));
     assertEquals(
         "<p>Foo</p><p>One</p><p>Two</p>Three<p>Four</p>",
         sanitize(
             "<html>"
             + "<head>"
             + "<title>Blah</title>"
             + "<p>Foo</p>"
             + "</head>"
             + "<body>"
             + "<p>One"
             + "<p>Two</p>"
             + "Three"
             + "<p>Four</p>"
             + "</body>"
             + "</html>"));
   }

   @Test
   public static final void testEmptyAndValuelessAttributes() throws Exception {
     assertEquals(
         "<input checked=\"checked\" type=\"checkbox\" id=\"\" class=\"\" />",
         sanitize("<input checked type=checkbox id=\"\" class=>"));
   }

   @Test
   public static final void testSgmlShortTags() throws Exception {
     // We make no attempt to correctly handle SGML short tags since they are
     // not implemented consistently across browsers, and have been removed from
     // HTML 5.
     //
     // According to http://www.w3.org/QA/2007/10/shorttags.html
     //      Shorttags - the odd side of HTML 4.01
     //      ...
     //      It uses an ill-known feature of SGML called shorthand markup, which
     //      was authorized in HTML up to HTML 4.01. But what used to be a "cool"
     //      feature for SGML experts becomes a liability in HTML, where the
     //      construct is more likely to appear as a typo than as a conscious
     //      choice.
     //
     //      All could be fine if this form typo-that-happens-to-be-legal was
     //      properly implemented in contemporary HTML user-agents. It is not.
     assertEquals("<p></p>", sanitize("<p/b/"));  // Short-tag discarded.
     assertEquals("<p></p>", sanitize("<p<b>"));  // Discard <b attribute
     assertEquals(
         // This behavior for short tags is not ideal, but it is safe.
         "<p href=\"/\">first part of the text&lt;/&gt; second part</p>",
         sanitize("<p<a href=\"/\">first part of the text</> second part"));
   }

   @Test
   public static final void testNul() throws Exception {
     assertEquals(
         "<a title="
         + "\"harmless  SCRIPT&#61;javascript:alert(1) ignored&#61;ignored\">"
         + "</a>",
         sanitize(
             "<A TITLE="
             + "\"harmless\0  SCRIPT=javascript:alert(1) ignored=ignored\">"
             ));
   }

   @Test
   public static final void testDigitsInAttrNames() throws Exception {
     // See bug 614 for details.
     assertEquals(
         "<div>Hello</div>",
         sanitize(
             "<div style1=\"expression(\'alert(1)\")\">Hello</div>"
             ));
   }

   @Test
   public static final void testSupplementaryCodepointEncoding()
       throws Exception {
     // &#xd87e;&#xdc1a; is not appropriate.
     // &#x2f81a; is appropriate as is the unencoded form.
     assertEquals(
         "&#x2f81a; | &#x2f81a; | &#x2f81a;",
         sanitize("&#x2F81A; | \ud87e\udc1a | &#xd87e;&#xdc1a;"));
   }

   @Test
   public static final void testDeeplyNestedTagsDoS() throws Exception {
     String sanitized = sanitize(stringRepeatedTimes("<div>", 20000));
     int n = sanitized.length() / "<div></div>".length();
     assertTrue("" + n, 50 <= n && n <= 1000);
     int middle = n * "<div>".length();
     assertEquals(sanitized.substring(0, middle),
                  stringRepeatedTimes("<div>", n));
     assertEquals(sanitized.substring(middle),
                  stringRepeatedTimes("</div>", n));
   }

   @Test
   public static final void testInnerHTMLIE8() throws Exception {
     // Apparently, in quirks mode, IE8 does a poor job producing innerHTML
     // values.  Given
     //     <div attr="``foo=bar">
     // we encode &#96; but if JavaScript does:
     //    nodeA.innerHTML = nodeB.innerHTML;
     // and nodeB contains the DIV above, then IE8 will produce
     //     <div attr=``foo=bar>
     // as the value of nodeB.innerHTML and assign it to nodeA.
     // IE8's HTML parser treats `` as a blank attribute value and foo=bar
     // becomes a separate attribute.
     // Adding a space at the end of the attribute prevents this by forcing
     // IE8 to put double quotes around the attribute when computing
     // nodeB.innerHTML.
     assertEquals(
         "<div title=\"&#96;&#96;onmouseover&#61;alert(1337) \"></div>",
         sanitize("<div title=\"``onmouseover=alert(1337)\">"));
   }

   @Test
   public static final void testNabobsOfNegativism() throws Exception {
     // Treating <noscript> as raw-text gains us nothing security-wise.
     assertEquals("<noscript></noscript>",
                  sanitize("<noscript><evil></noscript>"));
     assertEquals("<noscript>I <b>&lt;3</b> Ponies</noscript>",
                  sanitize("<noscript>I <b><3</b> Ponies</noscript>"));
     assertEquals("<noscript>I <b>&lt;3</b> Ponies</noscript>",
                  sanitize("<NOSCRIPT>I <b><3</b> Ponies</noscript><evil>"));
     assertEquals("<noframes>I <b>&lt;3</b> Ponies</noframes>",
                  sanitize("<noframes>I <b><3</b> Ponies</noframes><evil>"));
     assertEquals("<noembed>I <b>&lt;3</b> Ponies</noembed>",
                  sanitize("<noembed>I <b><3</b> Ponies</noembed><evil>"));
     assertEquals("<noxss>I <b>&lt;3</b> Ponies</noxss>",
                  sanitize("<noxss>I <b><3</b> Ponies</noxss><evil>"));
     assertEquals(
         "&lt;noscript&gt;I &lt;b&gt;&lt;3&lt;/b&gt; Ponies&lt;/noscript&gt;",
         sanitize("<xmp><noscript>I <b><3</b> Ponies</noscript></xmp>"));
   }

   @Test
   public static final void testNULs() throws Exception {
     assertEquals("<b>Hello, </b>", sanitize("<b>Hello, \u0000</b>"));
     assertEquals("<b>Hello, </b>", sanitize("<b>Hello, \u0000"));
     assertEquals("",               sanitize("\u0000"));
     assertEquals("<b>Hello, </b>", sanitize("<b>Hello, &#0;</b>"));
     assertEquals("",               sanitize("&#0;"));
   }

   @Test
   public static final void testQMarkMeta() throws Exception {
     assertEquals(
         "Hello, <b>World</b>!",
         sanitize(
             ""
             // An XML Prologue.
             // HTML5 treats it as ignorable content via the bogus comment state.
             + "<?xml version=\"1\" ?>"
             + "Hello, "
             // An XML Processing instruction.
             // HTML5 treats it as ignorable content via the bogus comment state.
             + "<?processing instruction?>"
             + "<b>World"
             // Appears in HTML copied from outlook.
             + "<?xml:namespace prefix = o ns = "
             + "\"urn:schemas-microsoft-com:office:office\" />"
             + "</b>!"));
   }

   @Test
   public static final void testScriptInIframe() throws Exception {
     assertEquals(
         "<iframe></iframe>",
         sanitize(
             "<iframe>\n"
             + "  <script>alert(Hi)</script>\n"
             + "</iframe>"));
   }

   private static String sanitize(@Nullable String html) throws Exception {
     StringBuilder sb = new StringBuilder();
     HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
         sb,
         new Handler<String>() {
           public void handle(String errorMessage) {
             fail(errorMessage);
           }
         });

     HtmlSanitizer.Policy policy = new HtmlPolicyBuilder()
         // Allow these tags.
        .allowElements(
            "a", "b", "br", "div", "i", "iframe", "img", "input", "li",
            "ol", "p", "span", "ul", "noscript", "noframes", "noembed", "noxss")
        // And these attributes.
        .allowAttributes(
            "dir", "checked", "class", "href", "id", "target", "title", "type")
        .globally()
        // Cleanup IDs and CLASSes and prefix them with p- to move to a separate
        // name-space.
        .allowAttributes("id", "class")
        .matching(
            new AttributePolicy() {
             public String apply(
                 String elementName, String attributeName, String value) {
               return value.replaceAll("(?:^|\\s)([a-zA-Z])", " p-$1")
                   .replaceAll("\\s+", " ")
                   .trim();
             }
            })
        .globally()
        // Don't throw out useless <img> and <input> elements to ease debugging.
        .allowWithoutAttributes("img", "input")
        .build(renderer);

     HtmlSanitizer.sanitize(html, policy);

     return sb.toString();
   }

   private static final String stringRepeatedTimes(String s, int n) {
     StringBuilder sb = new StringBuilder(s.length() * n);
     while (--n >= 0) {
       sb.append(s);
     }
     return sb.toString();
   }
 }
	// Copyright (c) 2011, Mike Samuel
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions
	// are met:
	//
	// Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// Redistributions in binary form must reproduce the above copyright
	// notice, this list of conditions and the following disclaimer in the
	// documentation and/or other materials provided with the distribution.
	// Neither the name of the OWASP nor the names of its contributors may
	// be used to endorse or promote products derived from this software
	// without specific prior written permission.
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
	// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.

	package org.owasp.html;

	import junit.framework.TestCase;

	import javax.annotation.Nullable;

	import org.junit.Test;


	public class HtmlSanitizerTest extends TestCase {

	@Test
	public static final void testEmpty() throws Exception {
	assertEquals("", sanitize(""));
	assertEquals("", sanitize(null));
	}

	@Test
	public static final void testSimpleText() throws Exception {
	assertEquals("hello world", sanitize("hello world"));
	}

	@Test
	public static final void testEntities1() throws Exception {
	assertEquals("<hello world>", sanitize("<hello world>"));
	}

	@Test
	public static final void testEntities2() throws Exception {
	assertEquals("<b>hello <i>world</i></b>",
	sanitize("<b>hello <i>world</i></b>"));
	}

	@Test
	public static final void testUnknownTagsRemoved() throws Exception {
	assertEquals("<b>hello <i>world</i></b>",
	sanitize("<b>hello <bogus></bogus><i>world</i></b>"));
	}

	@Test
	public static final void testUnsafeTagsRemoved() throws Exception {
	assertEquals("<b>hello <i>world</i></b>",
	sanitize("<b>hello <i>world</i>"
	+ "<script src=foo.js></script></b>"));
	}

	@Test
	public static final void testUnsafeAttributesRemoved() throws Exception {
	assertEquals(
	"<b>hello <i>world</i></b>",
	sanitize("<b>hello <i onclick=\"takeOverWorld(this)\">world</i></b>"));
	}

	@Test
	public static final void testCruftEscaped() throws Exception {
	assertEquals("<b>hello <i>world<</i></b> & tomorrow the universe",
	sanitize(
	"<b>hello <i>world<</i></b> & tomorrow the universe"));
	}

	@Test
	public static final void testTagCruftRemoved() throws Exception {
	assertEquals("<b id=\"p-foo\">hello <i>world<</i></b>",
	sanitize("<b id=\"foo\" / -->hello <i>world<</i></b>"));
	}

	@Test
	public static final void testIdsAndClassesPrefixed() throws Exception {
	assertEquals(
	"<b id=\"p-foo\" class=\"p-boo p-bar p-baz\">"
	+ "hello <i>world<</i></b>",
	sanitize(
	"<b id=\"foo\" class=\"boo bar baz\">hello <i>world<</i></b>"));
	}

	@Test
	public static final void testSpecialCharsInAttributes() throws Exception {
	assertEquals(
	"<b title=\"a<b && c>b\">bar</b>",
	sanitize("<b title=\"a<b && c>b\">bar</b>"));
	}

	@Test
	public static final void testUnclosedTags() throws Exception {
	assertEquals("<div id=\"p-foo\">Bar<br />Baz</div>",
	sanitize("<div id=\"foo\">Bar<br>Baz"));
	}

	@Test
	public static final void testUnopenedTags() throws Exception {
	assertEquals("Foo<b>Bar</b>Baz",
	sanitize("Foo<b></select>Bar</b></b>Baz</select>"));
	}

	@Test
	public static final void testUnsafeEndTags() throws Exception {
	assertEquals(
	"",
	sanitize(
	"</meta http-equiv=\"refesh\""
	+ " content=\"1;URL=http://evilgadget.com\">"));
	}

	@Test
	public static final void testEmptyEndTags() throws Exception {
	assertEquals("<input />", sanitize("<input></input>"));
	}

	@Test
	public static final void testOnLoadStripped() throws Exception {
	assertEquals(
	"<img />",
	sanitize("<img src=http://foo.com/bar ONLOAD=alert(1)>"));
	}

	@Test
	public static final void testClosingTagParameters() throws Exception {
	assertEquals(
	"<p>Hello world</p>",
	sanitize("<p>Hello world</b style=\"width:expression(alert(1))\">"));
	}

	@Test
	public static final void testOptionalEndTags() throws Exception {
	// Should not be
	// "<ol> <li>A</li> <li>B<li>C </li></li></ol>"
	// The difference is significant because in the first, the item contains no
	// space after 'A", but in the third, the item contains 'C' and a space.
	assertEquals(
	"<ol><li>A</li><li>B</li><li>C </li></ol>",
	sanitize("<ol> <li>A</li> <li>B<li>C </ol>"));
	}

	@Test
	public static final void testFoldingOfHtmlAndBodyTags() throws Exception {
	assertEquals(
	"<p>P 1</p>",
	sanitize("<html><head><title>Foo</title></head>"
	+ "<body><p>P 1</p></body></html>"));
	assertEquals(
	"Hello",
	sanitize("<body bgcolor=\"blue\">Hello</body>"));
	assertEquals(
	"<p>Foo</p><p>One</p><p>Two</p>Three<p>Four</p>",
	sanitize(
	"<html>"
	+ "<head>"
	+ "<title>Blah</title>"
	+ "<p>Foo</p>"
	+ "</head>"
	+ "<body>"
	+ "<p>One"
	+ "<p>Two</p>"
	+ "Three"
	+ "<p>Four</p>"
	+ "</body>"
	+ "</html>"));
	}

	@Test
	public static final void testEmptyAndValuelessAttributes() throws Exception {
	assertEquals(
	"<input checked=\"checked\" type=\"checkbox\" id=\"\" class=\"\" />",
	sanitize("<input checked type=checkbox id=\"\" class=>"));
	}

	@Test
	public static final void testSgmlShortTags() throws Exception {
	// We make no attempt to correctly handle SGML short tags since they are
	// not implemented consistently across browsers, and have been removed from
	// HTML 5.
	//
	// According to http://www.w3.org/QA/2007/10/shorttags.html
	// Shorttags - the odd side of HTML 4.01
	// ...
	// It uses an ill-known feature of SGML called shorthand markup, which
	// was authorized in HTML up to HTML 4.01. But what used to be a "cool"
	// feature for SGML experts becomes a liability in HTML, where the
	// construct is more likely to appear as a typo than as a conscious
	// choice.
	//
	// All could be fine if this form typo-that-happens-to-be-legal was
	// properly implemented in contemporary HTML user-agents. It is not.
	assertEquals("<p></p>", sanitize("<p/b/")); // Short-tag discarded.
	assertEquals("<p></p>", sanitize("<p<b>")); // Discard <b attribute
	assertEquals(
	// This behavior for short tags is not ideal, but it is safe.
	"<p href=\"/\">first part of the text</> second part</p>",
	sanitize("<p<a href=\"/\">first part of the text</> second part"));
	}

	@Test
	public static final void testNul() throws Exception {
	assertEquals(
	"<a title="
	+ "\"harmless SCRIPT=javascript:alert(1) ignored=ignored\">"
	+ "</a>",
	sanitize(
	"<A TITLE="
	+ "\"harmless\0 SCRIPT=javascript:alert(1) ignored=ignored\">"
	));
	}

	@Test
	public static final void testDigitsInAttrNames() throws Exception {
	// See bug 614 for details.
	assertEquals(
	"<div>Hello</div>",
	sanitize(
	"<div style1=\"expression(\'alert(1)\")\">Hello</div>"
	));
	}

	@Test
	public static final void testSupplementaryCodepointEncoding()
	throws Exception {
	// &#xd87e;&#xdc1a; is not appropriate.
	// 冬 is appropriate as is the unencoded form.
	assertEquals(
	"冬 \| 冬 \| 冬",
	sanitize("冬 \| \ud87e\udc1a \| &#xd87e;&#xdc1a;"));
	}

	@Test
	public static final void testDeeplyNestedTagsDoS() throws Exception {
	String sanitized = sanitize(stringRepeatedTimes("<div>", 20000));
	int n = sanitized.length() / "<div></div>".length();
	assertTrue("" + n, 50 <= n && n <= 1000);
	int middle = n * "<div>".length();
	assertEquals(sanitized.substring(0, middle),
	stringRepeatedTimes("<div>", n));
	assertEquals(sanitized.substring(middle),
	stringRepeatedTimes("</div>", n));
	}

	@Test
	public static final void testInnerHTMLIE8() throws Exception {
	// Apparently, in quirks mode, IE8 does a poor job producing innerHTML
	// values. Given
	// <div attr="``foo=bar">
	// we encode ` but if JavaScript does:
	// nodeA.innerHTML = nodeB.innerHTML;
	// and nodeB contains the DIV above, then IE8 will produce
	// <div attr=``foo=bar>
	// as the value of nodeB.innerHTML and assign it to nodeA.
	// IE8's HTML parser treats `` as a blank attribute value and foo=bar
	// becomes a separate attribute.
	// Adding a space at the end of the attribute prevents this by forcing
	// IE8 to put double quotes around the attribute when computing
	// nodeB.innerHTML.
	assertEquals(
	"<div title=\"``onmouseover=alert(1337) \"></div>",
	sanitize("<div title=\"``onmouseover=alert(1337)\">"));
	}

	@Test
	public static final void testNabobsOfNegativism() throws Exception {
	// Treating <noscript> as raw-text gains us nothing security-wise.
	assertEquals("<noscript></noscript>",
	sanitize("<noscript><evil></noscript>"));
	assertEquals("<noscript>I <b><3</b> Ponies</noscript>",
	sanitize("<noscript>I <b><3</b> Ponies</noscript>"));
	assertEquals("<noscript>I <b><3</b> Ponies</noscript>",
	sanitize("<NOSCRIPT>I <b><3</b> Ponies</noscript><evil>"));
	assertEquals("<noframes>I <b><3</b> Ponies</noframes>",
	sanitize("<noframes>I <b><3</b> Ponies</noframes><evil>"));
	assertEquals("<noembed>I <b><3</b> Ponies</noembed>",
	sanitize("<noembed>I <b><3</b> Ponies</noembed><evil>"));
	assertEquals("<noxss>I <b><3</b> Ponies</noxss>",
	sanitize("<noxss>I <b><3</b> Ponies</noxss><evil>"));
	assertEquals(
	"<noscript>I <b><3</b> Ponies</noscript>",
	sanitize("<xmp><noscript>I <b><3</b> Ponies</noscript></xmp>"));
	}

	@Test
	public static final void testNULs() throws Exception {
	assertEquals("<b>Hello, </b>", sanitize("<b>Hello, \u0000</b>"));
	assertEquals("<b>Hello, </b>", sanitize("<b>Hello, \u0000"));
	assertEquals("", sanitize("\u0000"));
	assertEquals("<b>Hello, </b>", sanitize("<b>Hello, </b>"));
	assertEquals("", sanitize(""));
	}

	@Test
	public static final void testQMarkMeta() throws Exception {
	assertEquals(
	"Hello, <b>World</b>!",
	sanitize(
	""
	// An XML Prologue.
	// HTML5 treats it as ignorable content via the bogus comment state.
	+ "<?xml version=\"1\" ?>"
	+ "Hello, "
	// An XML Processing instruction.
	// HTML5 treats it as ignorable content via the bogus comment state.
	+ "<?processing instruction?>"
	+ "<b>World"
	// Appears in HTML copied from outlook.
	+ "<?xml:namespace prefix = o ns = "
	+ "\"urn:schemas-microsoft-com:office:office\" />"
	+ "</b>!"));
	}

	@Test
	public static final void testScriptInIframe() throws Exception {
	assertEquals(
	"<iframe></iframe>",
	sanitize(
	"<iframe>\n"
	+ " <script>alert(Hi)</script>\n"
	+ "</iframe>"));
	}

	private static String sanitize(@Nullable String html) throws Exception {
	StringBuilder sb = new StringBuilder();
	HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
	sb,
	new Handler<String>() {
	public void handle(String errorMessage) {
	fail(errorMessage);
	}
	});

	HtmlSanitizer.Policy policy = new HtmlPolicyBuilder()
	// Allow these tags.
	.allowElements(
	"a", "b", "br", "div", "i", "iframe", "img", "input", "li",
	"ol", "p", "span", "ul", "noscript", "noframes", "noembed", "noxss")
	// And these attributes.
	.allowAttributes(
	"dir", "checked", "class", "href", "id", "target", "title", "type")
	.globally()
	// Cleanup IDs and CLASSes and prefix them with p- to move to a separate
	// name-space.
	.allowAttributes("id", "class")
	.matching(
	new AttributePolicy() {
	public String apply(
	String elementName, String attributeName, String value) {
	return value.replaceAll("(?:^\|\\s)([a-zA-Z])", " p-$1")
	.replaceAll("\\s+", " ")
	.trim();
	}
	})
	.globally()
	// Don't throw out useless <img> and <input> elements to ease debugging.
	.allowWithoutAttributes("img", "input")
	.build(renderer);

	HtmlSanitizer.sanitize(html, policy);

	return sb.toString();
	}

	private static final String stringRepeatedTimes(String s, int n) {
	StringBuilder sb = new StringBuilder(s.length() * n);
	while (--n >= 0) {
	sb.append(s);
	}
	return sb.toString();
	}
	}