blob: 8a30c9a497001e464a06fea80e6b70a5df4fb1e7 [file] [log] [blame]
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package org.owasp.html;
import java.io.File;
import java.io.StringReader;
import java.util.List;
import java.util.ListIterator;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
public class Benchmark {
public static void main(String[] args) throws Exception {
String html = Files.toString(new File(args[0]), Charsets.UTF_8);
boolean timeLibhtmlparser = true;
boolean timeSanitize = true;
boolean timePolicyBuilder = true;
if (args.length > 1) {
String s = args[1];
timeLibhtmlparser = s.contains("h");
timeSanitize = s.contains("s");
timePolicyBuilder = s.contains("p");
}
int n = 0; // Defeat optimizations.
if (timeLibhtmlparser) {
for (int i = 100; --i >= 0;) {
n += parseUsingLibhtmlparser(html);
}
}
if (timeSanitize) {
for (int i = 100; --i >= 0;) {
n += sanitize(html).length();
}
}
if (timePolicyBuilder) {
for (int i = 100; --i >= 0;) {
n += sanitizeUsingPolicyBuilder(html).length();
}
}
long t0 = 0, t1 = -1;
if (timeLibhtmlparser) {
t0 = System.nanoTime();
for (int i = 100; --i >= 0;) {
n += parseUsingLibhtmlparser(html);
}
t1 = System.nanoTime();
}
long t2 = 0, t3 = -1;
if (timeSanitize) {
t2 = System.nanoTime();
for (int i = 100; --i >= 0;) {
n += sanitize(html).length();
}
t3 = System.nanoTime();
}
long t4 = 0, t5 = -1;
if (timePolicyBuilder) {
t4 = System.nanoTime();
for (int i = 100; --i >= 0;) {
n += sanitizeUsingPolicyBuilder(html).length();
}
t5 = System.nanoTime();
}
// Defeat optimization by using n.
if (n < 0) {
throw new AssertionError("Oh noes underflow");
}
if (timeLibhtmlparser) {
System.err.println(String.format(
"Tree parse : %12d", (t1 - t0)));
}
if (timeSanitize) {
System.err.println(String.format(
"Full sanitize custom : %12d", (t3 - t2)));
}
if (timePolicyBuilder) {
System.err.println(String.format(
"Full sanitize w/ PB : %12d", (t5 - t4)));
}
}
private static int parseUsingLibhtmlparser(String html) throws Exception {
HtmlDocumentBuilder parser = new HtmlDocumentBuilder();
Node node = parser.parse(new InputSource(new StringReader(html)));
return System.identityHashCode(node) >> 24;
}
private static String sanitize(String html) throws Exception {
StringBuilder sb = new StringBuilder(html.length());
final HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
sb, new Handler<String>() {
public void handle(String x) {
throw new AssertionError(x);
}
});
HtmlSanitizer.sanitize(html, new HtmlSanitizer.Policy() {
public void openDocument() {
renderer.openDocument();
}
public void closeDocument() {
renderer.closeDocument();
}
public void text(String textChunk) {
renderer.text(textChunk);
}
public void openTag(String elementName, List<String> attrs) {
if ("a".equals(elementName)) {
for (ListIterator<String> it = attrs.listIterator(); it.hasNext();) {
String name = it.next();
if ("href".equals(name)) {
it.next();
} else {
it.remove();
it.next();
it.remove();
}
}
renderer.openTag(elementName, attrs);
}
}
public void closeTag(String elementName) {
if ("a".equals(elementName)) {
renderer.closeTag(elementName);
}
}
});
return sb.toString();
}
private static HtmlPolicyBuilder policyBuilder;
private static String sanitizeUsingPolicyBuilder(String html)
throws Exception {
if (policyBuilder == null) {
policyBuilder = new HtmlPolicyBuilder()
.allowStandardUrlProtocols()
.allowElements("a")
.allowAttributes("href").onElements("a");
}
StringBuilder sb = new StringBuilder(html.length());
HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
sb, new Handler<String>() {
public void handle(String x) {
throw new AssertionError(x);
}
});
HtmlSanitizer.sanitize(html, policyBuilder.build(renderer));
return sb.toString();
}
}