blob: 6435d61d9edeab65db4ce0772054a093b3ab3804 [file] [log] [blame]
// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package org.owasp.html;
import javax.annotation.Nullable;
import com.google.common.collect.ImmutableSet;
/**
* An attribute policy for attributes whose values are URLs that requires that
* the value have no protocol or have an allowed protocol.
*
* <p>
* URLs with protocols must match the protocol set passed to the constructor.
* URLs without protocols but which specify an origin different from the
* containing page (e.g. {@code //example.org}) are only allowed if the
* {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy}
* allows both {@code http} and {@code https} which are normally used to serve
* HTML.
* Same-origin URLs, URLs without any protocol or authority part are always
* allowed.
* </p>
*
* <p>
* This class assumes that URLs are either hierarchical, or are opaque, but
* do not look like they contain an authority portion.
* </p>
*
* @author Mike Samuel <mikesamuel@gmail.com>
*/
@TCB
public class FilterUrlByProtocolAttributePolicy implements AttributePolicy {
private final ImmutableSet<String> protocols;
public FilterUrlByProtocolAttributePolicy(
Iterable<? extends String> protocols) {
this.protocols = ImmutableSet.copyOf(protocols);
}
public @Nullable String apply(
String elementName, String attributeName, String s) {
protocol_loop:
for (int i = 0, n = s.length(); i < n; ++i) {
switch (s.charAt(i)) {
case '/': case '#': case '?': // No protocol.
// Check for domain relative URLs like //www.evil.org/
if (s.startsWith("//")
// or the protocols by which HTML is normally served are OK.
&& !allowProtocolRelativeUrls()) {
return null;
}
break protocol_loop;
case ':':
String protocol = Strings.toLowerCase(s.substring(0, i));
if (!protocols.contains(protocol)) { return null; }
break protocol_loop;
}
}
return normalizeUri(s);
}
protected boolean allowProtocolRelativeUrls() {
return protocols.contains("http") && protocols.contains("https");
}
/** Percent encodes anything that looks like a colon, or a parenthesis. */
static String normalizeUri(String s) {
int n = s.length();
boolean colonsIrrelevant = false;
for (int i = 0; i < n; ++i) {
char ch = s.charAt(i);
switch (ch) {
case '/': case '#': case '?': case ':':
colonsIrrelevant = true;
break;
case '(': case ')': case '\uff1a':
StringBuilder sb = new StringBuilder(n + 16);
int pos = 0;
for (; i < n; ++i) {
ch = s.charAt(i);
switch (ch) {
case '(':
sb.append(s, pos, i).append("%28");
pos = i + 1;
break;
case ')':
sb.append(s, pos, i).append("%29");
pos = i + 1;
break;
default:
if (ch > 0x100 && !colonsIrrelevant) {
// Other colon like characters.
// TODO: do we need to encode non-colon characters if we're
// not dealing with URLs that haven't been copy/pasted into
// the URL bar?
// Is it safe to assume UTF-8 here?
switch (ch) {
case '\u0589':
sb.append(s, pos, i).append("%d6%89");
pos = i + 1;
break;
case '\u05c3':
sb.append(s, pos, i).append("%d7%83");
pos = i + 1;
break;
case '\u2236':
sb.append(s, pos, i).append("%e2%88%b6");
pos = i + 1;
break;
case '\uff1a':
sb.append(s, pos, i).append("%ef%bc%9a");
pos = i + 1;
break;
}
}
break;
}
}
return sb.append(s, pos, n).toString();
}
}
return s;
}
@Override
public boolean equals(Object o) {
return o != null && this.getClass() == o.getClass()
&& protocols.equals(((FilterUrlByProtocolAttributePolicy) o).protocols);
}
@Override
public int hashCode() {
return protocols.hashCode();
}
}