blob: ed9e60b050dc336acd1c7baf1bb8966cd9b6e4f3 [file] [log] [blame]
// These functions are necessarily inefficient, because they must revert
// encoding conversions performed by the standard library. However, there is
// currently no better alternative.
use std::borrow::Cow;
use std::error::Error;
use std::ffi::OsStr;
use std::ffi::OsString;
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
use std::ops::Not;
use std::os::windows::ffi::OsStrExt;
use std::os::windows::ffi::OsStringExt;
use std::result;
use std::str;
if_raw_str! {
pub(super) mod raw;
}
mod wtf8;
use wtf8::DecodeWide;
#[cfg(test)]
mod tests;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(super) enum EncodingError {
Byte(u8),
CodePoint(u32),
End(),
}
impl EncodingError {
fn position(&self) -> Cow<'_, str> {
match self {
Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)),
Self::CodePoint(code_point) => {
Cow::Owned(format!("code point U+{:04X}", code_point))
}
Self::End() => Cow::Borrowed("end of string"),
}
}
}
impl Display for EncodingError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"byte sequence is not representable in the platform encoding; \
error at {}",
self.position(),
)
}
}
impl Error for EncodingError {}
type Result<T> = result::Result<T, EncodingError>;
fn from_bytes(string: &[u8]) -> Result<Option<OsString>> {
let mut encoder = wtf8::encode_wide(string);
// Collecting an iterator into a result ignores the size hint:
// https://github.com/rust-lang/rust/issues/48994
let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
for wchar in &mut encoder {
encoded_string.push(wchar?);
}
debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8());
Ok(encoder
.is_still_utf8()
.not()
.then(|| OsStringExt::from_wide(&encoded_string)))
}
fn to_bytes(os_string: &OsStr) -> Vec<u8> {
let encoder = OsStrExt::encode_wide(os_string);
let mut string = Vec::with_capacity(encoder.size_hint().0);
string.extend(DecodeWide::new(encoder));
string
}
pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
from_bytes(string).map(|os_string| {
os_string.map(Cow::Owned).unwrap_or_else(|| {
// SAFETY: This slice was validated to be UTF-8.
Cow::Borrowed(OsStr::new(unsafe {
str::from_utf8_unchecked(string)
}))
})
})
}
pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
Cow::Owned(to_bytes(os_string))
}
pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
from_bytes(&string).map(|os_string| {
os_string.unwrap_or_else(|| {
// SAFETY: This slice was validated to be UTF-8.
unsafe { String::from_utf8_unchecked(string) }.into()
})
})
}
pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
to_bytes(&os_string)
}