| // This file is part of ICU4X. For terms of use, please see the file |
| // called LICENSE at the top level of the ICU4X source tree |
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| |
| //! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. |
| //! |
| //! <div class="stab unstable"> |
| //! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
| //! including in SemVer minor releases. While the serde representation of data structs is guaranteed |
| //! to be stable, their Rust representation might not be. Use with caution. |
| //! </div> |
| //! |
| //! Read more about data providers: [`icu_provider`] |
| |
| // Provider structs must be stable |
| #![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] |
| |
| use icu_provider::prelude::*; |
| |
| use crate::provider::data::CaseMapData; |
| use crate::provider::exceptions::CaseMapExceptions; |
| use icu_collections::codepointtrie::CodePointTrie; |
| #[cfg(feature = "datagen")] |
| use icu_collections::codepointtrie::CodePointTrieHeader; |
| |
| pub mod data; |
| pub mod exception_helpers; |
| pub mod exceptions; |
| #[cfg(feature = "datagen")] |
| mod exceptions_builder; |
| mod unfold; |
| |
| #[cfg(feature = "compiled_data")] |
| #[derive(Debug)] |
| /// Baked data |
| /// |
| /// <div class="stab unstable"> |
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
| /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only |
| /// guaranteed to match with this version's `*_unstable` providers. Use with caution. |
| /// </div> |
| pub struct Baked; |
| |
| #[cfg(feature = "compiled_data")] |
| #[allow(unused_imports)] |
| const _: () = { |
| use icu_casemap_data::*; |
| pub mod icu { |
| pub use crate as casemap; |
| pub use icu_collections as collections; |
| } |
| make_provider!(Baked); |
| impl_case_map_v1_marker!(Baked); |
| impl_case_map_unfold_v1_marker!(Baked); |
| }; |
| |
| #[cfg(feature = "datagen")] |
| /// The latest minimum set of markers required by this component. |
| pub const MARKERS: &[DataMarkerInfo] = &[CaseMapUnfoldV1Marker::INFO, CaseMapV1Marker::INFO]; |
| |
| pub use self::unfold::{CaseMapUnfoldV1, CaseMapUnfoldV1Marker}; |
| |
| /// This type contains all of the casemapping data |
| /// |
| /// The methods in the provider module are primarily about accessing its data, |
| /// however the full algorithms are also implemented as methods on this type in |
| /// the `internals` module of this crate. |
| /// |
| /// <div class="stab unstable"> |
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
| /// to be stable, their Rust representation might not be. Use with caution. |
| /// </div> |
| #[icu_provider::data_struct(marker(CaseMapV1Marker, "props/casemap@1", singleton))] |
| #[derive(Debug, PartialEq, Clone)] |
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] |
| #[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider))] |
| #[yoke(prove_covariance_manually)] |
| /// CaseMapper provides low-level access to the data necessary to |
| /// convert characters and strings to upper, lower, or title case. |
| pub struct CaseMapV1<'data> { |
| /// Case mapping data |
| pub trie: CodePointTrie<'data, CaseMapData>, |
| /// Exceptions to the case mapping data |
| pub exceptions: CaseMapExceptions<'data>, |
| } |
| |
| #[cfg(feature = "serde")] |
| impl<'de> serde::Deserialize<'de> for CaseMapV1<'de> { |
| fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> { |
| #[derive(serde::Deserialize)] |
| pub struct Raw<'data> { |
| #[serde(borrow)] |
| pub trie: CodePointTrie<'data, CaseMapData>, |
| #[serde(borrow)] |
| pub exceptions: CaseMapExceptions<'data>, |
| } |
| |
| let Raw { trie, exceptions } = Raw::deserialize(deserializer)?; |
| let result = Self { trie, exceptions }; |
| debug_assert!(result.validate().is_ok()); |
| Ok(result) |
| } |
| } |
| |
| impl CaseMapV1<'_> { |
| /// Creates a new CaseMapV1 using data exported by the |
| // `icuexportdata` tool in ICU4C. Validates that the data is |
| // consistent. |
| #[cfg(feature = "datagen")] |
| pub fn try_from_icu( |
| trie_header: CodePointTrieHeader, |
| trie_index: &[u16], |
| trie_data: &[u16], |
| exceptions: &[u16], |
| ) -> Result<Self, DataError> { |
| use self::exceptions_builder::CaseMapExceptionsBuilder; |
| use zerovec::ZeroVec; |
| let exceptions_builder = CaseMapExceptionsBuilder::new(exceptions); |
| let (exceptions, idx_map) = exceptions_builder.build()?; |
| |
| let trie_index = ZeroVec::alloc_from_slice(trie_index); |
| |
| #[allow(clippy::unwrap_used)] // datagen only |
| let trie_data = trie_data |
| .iter() |
| .map(|&i| { |
| CaseMapData::try_from_icu_integer(i) |
| .unwrap() |
| .with_updated_exception(&idx_map) |
| }) |
| .collect::<ZeroVec<_>>(); |
| |
| let trie = CodePointTrie::try_new(trie_header, trie_index, trie_data) |
| .map_err(|_| DataError::custom("Casemapping data does not form valid trie"))?; |
| |
| let result = Self { trie, exceptions }; |
| result.validate().map_err(DataError::custom)?; |
| Ok(result) |
| } |
| |
| /// Given an existing CaseMapper, validates that the data is |
| /// consistent. A CaseMapper created by the ICU transformer has |
| /// already been validated. Calling this function is only |
| /// necessary if you are concerned about data corruption after |
| /// deserializing. |
| #[cfg(any(feature = "serde", feature = "datagen"))] |
| #[allow(unused)] // is only used in debug mode for serde |
| pub(crate) fn validate(&self) -> Result<(), &'static str> { |
| // First, validate that exception data is well-formed. |
| let valid_exception_indices = self.exceptions.validate()?; |
| |
| let validate_delta = |c: char, delta: i32| -> Result<(), &'static str> { |
| let new_c = |
| u32::try_from(c as i32 + delta).map_err(|_| "Delta larger than character")?; |
| char::from_u32(new_c).ok_or("Invalid delta")?; |
| Ok(()) |
| }; |
| |
| for i in 0..char::MAX as u32 { |
| if let Some(c) = char::from_u32(i) { |
| let data = self.lookup_data(c); |
| if data.has_exception() { |
| let idx = data.exception_index(); |
| let exception = self.exceptions.get(idx); |
| // Verify that the exception index points to a valid exception header. |
| if !valid_exception_indices.contains(&idx) { |
| return Err("Invalid exception index in trie data"); |
| } |
| exception.validate()?; |
| } else { |
| validate_delta(c, data.delta() as i32)?; |
| } |
| } |
| } |
| Ok(()) |
| } |
| |
| pub(crate) fn lookup_data(&self, c: char) -> CaseMapData { |
| self.trie.get32(c as u32) |
| } |
| } |