blob: 38e88a3bbd63f642bb5cb7e49bfcb3e4f1a98d3b [file] [log] [blame]
// Copyright 2013-2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
use std::cmp;
use std::fmt::{self, Formatter};
use std::io;
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs};
use std::vec;
use parser::{ParseResult, ParseError};
use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET};
use idna;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum HostInternal {
None,
Domain,
Ipv4(Ipv4Addr),
Ipv6(Ipv6Addr),
}
#[cfg(feature = "heapsize")]
known_heap_size!(0, HostInternal);
#[cfg(feature="serde")]
impl ::serde::Serialize for HostInternal {
fn serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: ::serde::Serializer {
// This doesn’t use `derive` because that involves
// large dependencies (that take a long time to build), and
// either Macros 1.1 which are not stable yet or a cumbersome build script.
//
// Implementing `Serializer` correctly for an enum is tricky,
// so let’s use existing enums that already do.
use std::net::IpAddr;
match *self {
HostInternal::None => None,
HostInternal::Domain => Some(None),
HostInternal::Ipv4(addr) => Some(Some(IpAddr::V4(addr))),
HostInternal::Ipv6(addr) => Some(Some(IpAddr::V6(addr))),
}.serialize(serializer)
}
}
#[cfg(feature="serde")]
impl ::serde::Deserialize for HostInternal {
fn deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: ::serde::Deserializer {
use std::net::IpAddr;
Ok(match ::serde::Deserialize::deserialize(deserializer)? {
None => HostInternal::None,
Some(None) => HostInternal::Domain,
Some(Some(IpAddr::V4(addr))) => HostInternal::Ipv4(addr),
Some(Some(IpAddr::V6(addr))) => HostInternal::Ipv6(addr),
})
}
}
impl<S> From<Host<S>> for HostInternal {
fn from(host: Host<S>) -> HostInternal {
match host {
Host::Domain(_) => HostInternal::Domain,
Host::Ipv4(address) => HostInternal::Ipv4(address),
Host::Ipv6(address) => HostInternal::Ipv6(address),
}
}
}
/// The host name of an URL.
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum Host<S=String> {
/// A DNS domain name, as '.' dot-separated labels.
/// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
/// a special URL, or percent encoded for non-special URLs. Hosts for
/// non-special URLs are also called opaque hosts.
Domain(S),
/// An IPv4 address.
/// `Url::host_str` returns the serialization of this address,
/// as four decimal integers separated by `.` dots.
Ipv4(Ipv4Addr),
/// An IPv6 address.
/// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
/// in the format per [RFC 5952 *A Recommendation
/// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
/// lowercase hexadecimal with maximal `::` compression.
Ipv6(Ipv6Addr),
}
#[cfg(feature="serde")]
impl<S: ::serde::Serialize> ::serde::Serialize for Host<S> {
fn serialize<R>(&self, serializer: &mut R) -> Result<(), R::Error> where R: ::serde::Serializer {
use std::net::IpAddr;
match *self {
Host::Domain(ref s) => Ok(s),
Host::Ipv4(addr) => Err(IpAddr::V4(addr)),
Host::Ipv6(addr) => Err(IpAddr::V6(addr)),
}.serialize(serializer)
}
}
#[cfg(feature="serde")]
impl<S: ::serde::Deserialize> ::serde::Deserialize for Host<S> {
fn deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: ::serde::Deserializer {
use std::net::IpAddr;
Ok(match ::serde::Deserialize::deserialize(deserializer)? {
Ok(s) => Host::Domain(s),
Err(IpAddr::V4(addr)) => Host::Ipv4(addr),
Err(IpAddr::V6(addr)) => Host::Ipv6(addr),
})
}
}
#[cfg(feature = "heapsize")]
impl<S: HeapSizeOf> HeapSizeOf for Host<S> {
fn heap_size_of_children(&self) -> usize {
match *self {
Host::Domain(ref s) => s.heap_size_of_children(),
_ => 0,
}
}
}
impl<'a> Host<&'a str> {
/// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
pub fn to_owned(&self) -> Host<String> {
match *self {
Host::Domain(domain) => Host::Domain(domain.to_owned()),
Host::Ipv4(address) => Host::Ipv4(address),
Host::Ipv6(address) => Host::Ipv6(address),
}
}
}
impl Host<String> {
/// Parse a host: either an IPv6 address in [] square brackets, or a domain.
///
/// <https://url.spec.whatwg.org/#host-parsing>
pub fn parse(input: &str) -> Result<Self, ParseError> {
if input.starts_with('[') {
if !input.ends_with(']') {
return Err(ParseError::InvalidIpv6Address)
}
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6)
}
let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
let domain = idna::domain_to_ascii(&domain)?;
if domain.find(|c| matches!(c,
'\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
)).is_some() {
return Err(ParseError::InvalidDomainCharacter)
}
if let Some(address) = parse_ipv4addr(&domain)? {
Ok(Host::Ipv4(address))
} else {
Ok(Host::Domain(domain.into()))
}
}
// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
if input.starts_with('[') {
if !input.ends_with(']') {
return Err(ParseError::InvalidIpv6Address)
}
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6)
}
if input.find(|c| matches!(c,
'\0' | '\t' | '\n' | '\r' | ' ' | '#' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
)).is_some() {
return Err(ParseError::InvalidDomainCharacter)
}
let s = utf8_percent_encode(input, SIMPLE_ENCODE_SET).to_string();
Ok(Host::Domain(s))
}
}
impl<S: AsRef<str>> fmt::Display for Host<S> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match *self {
Host::Domain(ref domain) => domain.as_ref().fmt(f),
Host::Ipv4(ref addr) => addr.fmt(f),
Host::Ipv6(ref addr) => {
f.write_str("[")?;
write_ipv6(addr, f)?;
f.write_str("]")
}
}
}
}
/// This mostly exists because coherence rules don’t allow us to implement
/// `ToSocketAddrs for (Host<S>, u16)`.
#[derive(Clone, Debug)]
pub struct HostAndPort<S=String> {
pub host: Host<S>,
pub port: u16,
}
impl<'a> HostAndPort<&'a str> {
/// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
pub fn to_owned(&self) -> HostAndPort<String> {
HostAndPort {
host: self.host.to_owned(),
port: self.port
}
}
}
impl<S: AsRef<str>> fmt::Display for HostAndPort<S> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
self.host.fmt(f)?;
f.write_str(":")?;
self.port.fmt(f)
}
}
impl<S: AsRef<str>> ToSocketAddrs for HostAndPort<S> {
type Iter = SocketAddrs;
fn to_socket_addrs(&self) -> io::Result<Self::Iter> {
let port = self.port;
match self.host {
Host::Domain(ref domain) => Ok(SocketAddrs {
// FIXME: use std::net::lookup_host when it’s stable.
state: SocketAddrsState::Domain((domain.as_ref(), port).to_socket_addrs()?)
}),
Host::Ipv4(address) => Ok(SocketAddrs {
state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port)))
}),
Host::Ipv6(address) => Ok(SocketAddrs {
state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new(address, port, 0, 0)))
}),
}
}
}
/// Socket addresses for an URL.
#[derive(Debug)]
pub struct SocketAddrs {
state: SocketAddrsState
}
#[derive(Debug)]
enum SocketAddrsState {
Domain(vec::IntoIter<SocketAddr>),
One(SocketAddr),
Done,
}
impl Iterator for SocketAddrs {
type Item = SocketAddr;
fn next(&mut self) -> Option<SocketAddr> {
match self.state {
SocketAddrsState::Domain(ref mut iter) => iter.next(),
SocketAddrsState::One(s) => {
self.state = SocketAddrsState::Done;
Some(s)
}
SocketAddrsState::Done => None
}
}
}
fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result {
let segments = addr.segments();
let (compress_start, compress_end) = longest_zero_sequence(&segments);
let mut i = 0;
while i < 8 {
if i == compress_start {
f.write_str(":")?;
if i == 0 {
f.write_str(":")?;
}
if compress_end < 8 {
i = compress_end;
} else {
break;
}
}
write!(f, "{:x}", segments[i as usize])?;
if i < 7 {
f.write_str(":")?;
}
i += 1;
}
Ok(())
}
// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
let mut longest = -1;
let mut longest_length = -1;
let mut start = -1;
macro_rules! finish_sequence(
($end: expr) => {
if start >= 0 {
let length = $end - start;
if length > longest_length {
longest = start;
longest_length = length;
}
}
};
);
for i in 0..8 {
if pieces[i as usize] == 0 {
if start < 0 {
start = i;
}
} else {
finish_sequence!(i);
start = -1;
}
}
finish_sequence!(8);
// https://url.spec.whatwg.org/#concept-ipv6-serializer
// step 3: ignore lone zeroes
if longest_length < 2 {
(-1, -2)
} else {
(longest, longest + longest_length)
}
}
/// <https://url.spec.whatwg.org/#ipv4-number-parser>
fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
let mut r = 10;
if input.starts_with("0x") || input.starts_with("0X") {
input = &input[2..];
r = 16;
} else if input.len() >= 2 && input.starts_with('0') {
input = &input[1..];
r = 8;
}
// At the moment we can't know the reason why from_str_radix fails
// https://github.com/rust-lang/rust/issues/22639
// So instead we check if the input looks like a real number and only return
// an error when it's an overflow.
let valid_number = match r {
8 => input.chars().all(|c| c >= '0' && c <='7'),
10 => input.chars().all(|c| c >= '0' && c <='9'),
16 => input.chars().all(|c| (c >= '0' && c <='9') || (c >='a' && c <= 'f') || (c >= 'A' && c <= 'F')),
_ => false
};
if !valid_number {
return Ok(None);
}
if input.is_empty() {
return Ok(Some(0));
}
if input.starts_with('+') {
return Ok(None);
}
match u32::from_str_radix(input, r) {
Ok(number) => Ok(Some(number)),
Err(_) => Err(()),
}
}
/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
if input.is_empty() {
return Ok(None)
}
let mut parts: Vec<&str> = input.split('.').collect();
if parts.last() == Some(&"") {
parts.pop();
}
if parts.len() > 4 {
return Ok(None);
}
let mut numbers: Vec<u32> = Vec::new();
let mut overflow = false;
for part in parts {
if part == "" {
return Ok(None);
}
match parse_ipv4number(part) {
Ok(Some(n)) => numbers.push(n),
Ok(None) => return Ok(None),
Err(()) => overflow = true
};
}
if overflow {
return Err(ParseError::InvalidIpv4Address);
}
let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
// Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
return Err(ParseError::InvalidIpv4Address);
}
if numbers.iter().any(|x| *x > 255) {
return Err(ParseError::InvalidIpv4Address);
}
for (counter, n) in numbers.iter().enumerate() {
ipv4 += n << (8 * (3 - counter as u32))
}
Ok(Some(Ipv4Addr::from(ipv4)))
}
/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
let input = input.as_bytes();
let len = input.len();
let mut is_ip_v4 = false;
let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
let mut piece_pointer = 0;
let mut compress_pointer = None;
let mut i = 0;
if len < 2 {
return Err(ParseError::InvalidIpv6Address)
}
if input[0] == b':' {
if input[1] != b':' {
return Err(ParseError::InvalidIpv6Address)
}
i = 2;
piece_pointer = 1;
compress_pointer = Some(1);
}
while i < len {
if piece_pointer == 8 {
return Err(ParseError::InvalidIpv6Address)
}
if input[i] == b':' {
if compress_pointer.is_some() {
return Err(ParseError::InvalidIpv6Address)
}
i += 1;
piece_pointer += 1;
compress_pointer = Some(piece_pointer);
continue
}
let start = i;
let end = cmp::min(len, start + 4);
let mut value = 0u16;
while i < end {
match (input[i] as char).to_digit(16) {
Some(digit) => {
value = value * 0x10 + digit as u16;
i += 1;
},
None => break
}
}
if i < len {
match input[i] {
b'.' => {
if i == start {
return Err(ParseError::InvalidIpv6Address)
}
i = start;
if piece_pointer > 6 {
return Err(ParseError::InvalidIpv6Address)
}
is_ip_v4 = true;
},
b':' => {
i += 1;
if i == len {
return Err(ParseError::InvalidIpv6Address)
}
},
_ => return Err(ParseError::InvalidIpv6Address)
}
}
if is_ip_v4 {
break
}
pieces[piece_pointer] = value;
piece_pointer += 1;
}
if is_ip_v4 {
if piece_pointer > 6 {
return Err(ParseError::InvalidIpv6Address)
}
let mut numbers_seen = 0;
while i < len {
if numbers_seen > 0 {
if numbers_seen < 4 && (i < len && input[i] == b'.') {
i += 1
} else {
return Err(ParseError::InvalidIpv6Address)
}
}
let mut ipv4_piece = None;
while i < len {
let digit = match input[i] {
c @ b'0' ... b'9' => c - b'0',
_ => break
};
match ipv4_piece {
None => ipv4_piece = Some(digit as u16),
Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
Some(ref mut v) => {
*v = *v * 10 + digit as u16;
if *v > 255 {
return Err(ParseError::InvalidIpv6Address)
}
}
}
i += 1;
}
pieces[piece_pointer] = if let Some(v) = ipv4_piece {
pieces[piece_pointer] * 0x100 + v
} else {
return Err(ParseError::InvalidIpv6Address)
};
numbers_seen += 1;
if numbers_seen == 2 || numbers_seen == 4 {
piece_pointer += 1;
}
}
if numbers_seen != 4 {
return Err(ParseError::InvalidIpv6Address)
}
}
if i < len {
return Err(ParseError::InvalidIpv6Address)
}
match compress_pointer {
Some(compress_pointer) => {
let mut swaps = piece_pointer - compress_pointer;
piece_pointer = 7;
while swaps > 0 {
pieces.swap(piece_pointer, compress_pointer + swaps - 1);
swaps -= 1;
piece_pointer -= 1;
}
}
_ => if piece_pointer != 8 {
return Err(ParseError::InvalidIpv6Address)
}
}
Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3],
pieces[4], pieces[5], pieces[6], pieces[7]))
}