blob: 287f8dac9cbd17770fc7ae67a905827b8898e954 [file] [log] [blame]
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! Data stored as as [`ZeroTrieSimpleAscii`]
// This is a valid separator as `DataLocale` will never produce it.
const ID_SEPARATOR: u8 = 0x1E;
pub use icu_provider::DynamicDataMarker;
use icu_provider::{
prelude::{zerofrom::ZeroFrom, *},
ule::MaybeAsVarULE,
};
pub use zerotrie::ZeroTrieSimpleAscii;
use zerovec::VarZeroSlice;
#[cfg(feature = "export")]
use icu_provider::export::ExportMarker;
#[cfg(feature = "export")]
pub(crate) fn bake(
marker_bake: &databake::TokenStream,
bakes_to_ids: &[(
&DataPayload<ExportMarker>,
&std::collections::BTreeSet<DataIdentifierCow>,
)],
ctx: &databake::CrateEnv,
) -> (databake::TokenStream, usize) {
use databake::*;
// Safety invariant upheld: the only values being added to the trie are `baked_index`
// values, which come from `bakes`
let trie = ZeroTrieSimpleAscii::from_iter(bakes_to_ids.iter().enumerate().flat_map(
|(bake_index, (_, ids))| {
ids.iter().map(move |id| {
let mut encoded = id.locale.to_string().into_bytes();
if !id.marker_attributes.is_empty() {
encoded.push(ID_SEPARATOR);
encoded.extend_from_slice(id.marker_attributes.as_bytes());
}
(encoded, bake_index)
})
},
));
let baked_trie = trie.as_borrowed_slice().bake(&Default::default());
let baked_trie = quote! {
const TRIE: icu_provider_baked::zerotrie::ZeroTrieSimpleAscii<&'static [u8]> = icu_provider_baked:: #baked_trie;
};
let payloads = bakes_to_ids
.iter()
.map(|(payload, _)| *payload)
.collect::<Vec<_>>();
let maybe_vzv_tokens = DataPayload::tokenize_encoded_seq(&payloads, ctx);
let (baked_values, value_store_ty) = if let Some(vzv_tokens) = maybe_vzv_tokens {
(
quote! {
const VALUES: &'static zerovec::VarZeroSlice<<<#marker_bake as icu_provider_baked::zerotrie::DynamicDataMarker>::DataStruct as icu_provider::ule::MaybeAsVarULE>::EncodedStruct> = #vzv_tokens;
},
quote! {
icu_provider_baked::zerotrie::DataForVarULEs
},
)
} else {
let bakes = payloads.iter().map(|payload| payload.tokenize(ctx));
(
quote! {
const VALUES: &'static [<#marker_bake as icu_provider_baked::zerotrie::DynamicDataMarker>::DataStruct] = &[#(#bakes,)*];
},
quote! {
icu_provider_baked::zerotrie::Data
},
)
};
(
quote! {
// Safety invariant upheld: see above
#value_store_ty<#marker_bake> = {
#baked_trie
#baked_values
unsafe {
#value_store_ty::from_trie_and_values_unchecked(TRIE, VALUES)
}
}
},
core::mem::size_of::<Data<icu_provider::hello_world::HelloWorldV1>>()
+ trie.as_borrowed_slice().borrows_size(),
)
}
fn get_index(
trie: ZeroTrieSimpleAscii<&'static [u8]>,
id: DataIdentifierBorrowed,
attributes_prefix_match: bool,
) -> Option<usize> {
use writeable::Writeable;
let mut cursor = trie.cursor();
let _is_ascii = id.locale.write_to(&mut cursor);
if !id.marker_attributes.is_empty() {
cursor.step(ID_SEPARATOR);
id.marker_attributes.write_to(&mut cursor).ok()?;
loop {
if let Some(v) = cursor.take_value() {
break Some(v);
}
if !attributes_prefix_match || cursor.probe(0).is_none() {
break None;
}
}
} else {
cursor.take_value()
}
}
#[cfg(feature = "alloc")]
#[allow(clippy::type_complexity)]
fn iter(
trie: &'static ZeroTrieSimpleAscii<&'static [u8]>,
) -> core::iter::FilterMap<
zerotrie::ZeroTrieStringIterator<'static>,
fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
> {
use alloc::borrow::ToOwned;
trie.iter().filter_map(move |(s, _)| {
if let Some((locale, attrs)) = s.split_once(ID_SEPARATOR as char) {
Some(DataIdentifierCow::from_owned(
DataMarkerAttributes::try_from_str(attrs).ok()?.to_owned(),
locale.parse().ok()?,
))
} else {
s.parse().ok().map(DataIdentifierCow::from_locale)
}
})
}
pub struct Data<M: DataMarker> {
// Unsafe invariant: actual values contained MUST be valid indices into `values`
trie: ZeroTrieSimpleAscii<&'static [u8]>,
values: &'static [M::DataStruct],
}
impl<M: DataMarker> Data<M> {
/// Construct from a trie and values
///
/// # Safety
/// The actual values contained in the trie must be valid indices into `values`
pub const unsafe fn from_trie_and_values_unchecked(
trie: ZeroTrieSimpleAscii<&'static [u8]>,
values: &'static [M::DataStruct],
) -> Self {
Self { trie, values }
}
}
impl<M: DataMarker> super::DataStore<M> for Data<M> {
fn get(
&self,
id: DataIdentifierBorrowed,
attributes_prefix_match: bool,
) -> Option<DataPayload<M>> {
get_index(self.trie, id, attributes_prefix_match)
// Safety: Allowed since `i` came from the trie and the field safety invariant
.map(|i| unsafe { self.values.get_unchecked(i) })
.map(DataPayload::from_static_ref)
}
#[cfg(feature = "alloc")]
type IterReturn = core::iter::FilterMap<
zerotrie::ZeroTrieStringIterator<'static>,
fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
>;
#[cfg(feature = "alloc")]
fn iter(&'static self) -> Self::IterReturn {
iter(&self.trie)
}
}
pub struct DataForVarULEs<M: DataMarker>
where
M::DataStruct: MaybeAsVarULE,
M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
{
// Unsafe invariant: actual values contained MUST be valid indices into `values`
trie: ZeroTrieSimpleAscii<&'static [u8]>,
values: &'static VarZeroSlice<<M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
}
impl<M: DataMarker> DataForVarULEs<M>
where
M::DataStruct: MaybeAsVarULE,
M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
{
/// Construct from a trie and values
///
/// # Safety
/// The actual values contained in the trie must be valid indices into `values`
pub const unsafe fn from_trie_and_values_unchecked(
trie: ZeroTrieSimpleAscii<&'static [u8]>,
values: &'static VarZeroSlice<<M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
) -> Self {
Self { trie, values }
}
}
impl<M: DataMarker> super::DataStore<M> for DataForVarULEs<M>
where
M::DataStruct: MaybeAsVarULE,
M::DataStruct: ZeroFrom<'static, <M::DataStruct as MaybeAsVarULE>::EncodedStruct>,
{
fn get(
&self,
id: DataIdentifierBorrowed,
attributes_prefix_match: bool,
) -> Option<DataPayload<M>> {
get_index(self.trie, id, attributes_prefix_match)
// Safety: Allowed since `i` came from the trie and the field safety invariant
.map(|i| unsafe { self.values.get_unchecked(i) })
.map(M::DataStruct::zero_from)
.map(DataPayload::from_owned)
}
#[cfg(feature = "alloc")]
type IterReturn = core::iter::FilterMap<
zerotrie::ZeroTrieStringIterator<'static>,
fn((alloc::string::String, usize)) -> Option<DataIdentifierCow<'static>>,
>;
#[cfg(feature = "alloc")]
fn iter(&'static self) -> Self::IterReturn {
iter(&self.trie)
}
}