| #![warn(unsafe_op_in_unsafe_fn)] |
| use core::{ffi::CStr, marker::PhantomData, mem::MaybeUninit, ops::ControlFlow}; |
| |
| use crate::{ |
| adler32::adler32, |
| allocate::Allocator, |
| c_api::{gz_header, internal_state, z_checksum, z_stream}, |
| crc32::{crc32, Crc32Fold}, |
| read_buf::ReadBuf, |
| trace, |
| weak_slice::{WeakArrayMut, WeakSliceMut}, |
| DeflateFlush, ReturnCode, ADLER32_INITIAL_VALUE, CRC32_INITIAL_VALUE, MAX_WBITS, MIN_WBITS, |
| }; |
| |
| use self::{ |
| algorithm::CONFIGURATION_TABLE, |
| hash_calc::{HashCalcVariant, RollHashCalc, StandardHashCalc}, |
| pending::Pending, |
| trees_tbl::STATIC_LTREE, |
| window::Window, |
| }; |
| |
| mod algorithm; |
| mod compare256; |
| mod hash_calc; |
| mod longest_match; |
| mod pending; |
| mod slide_hash; |
| mod trees_tbl; |
| mod window; |
| |
| // Position relative to the current window |
| pub(crate) type Pos = u16; |
| |
| // SAFETY: This struct must have the same layout as [`z_stream`], so that casts and transmutations |
| // between the two can work without UB. |
| #[repr(C)] |
| pub struct DeflateStream<'a> { |
| pub(crate) next_in: *mut crate::c_api::Bytef, |
| pub(crate) avail_in: crate::c_api::uInt, |
| pub(crate) total_in: crate::c_api::z_size, |
| pub(crate) next_out: *mut crate::c_api::Bytef, |
| pub(crate) avail_out: crate::c_api::uInt, |
| pub(crate) total_out: crate::c_api::z_size, |
| pub(crate) msg: *const core::ffi::c_char, |
| pub(crate) state: &'a mut State<'a>, |
| pub(crate) alloc: Allocator<'a>, |
| pub(crate) data_type: core::ffi::c_int, |
| pub(crate) adler: crate::c_api::z_checksum, |
| pub(crate) reserved: crate::c_api::uLong, |
| } |
| |
| impl<'a> DeflateStream<'a> { |
| // z_stream and DeflateStream must have the same layout. Do our best to check if this is true. |
| // (imperfect check, but should catch most mistakes.) |
| const _S: () = assert!(core::mem::size_of::<z_stream>() == core::mem::size_of::<Self>()); |
| const _A: () = assert!(core::mem::align_of::<z_stream>() == core::mem::align_of::<Self>()); |
| |
| /// # Safety |
| /// |
| /// Behavior is undefined if any of the following conditions are violated: |
| /// |
| /// - `strm` satisfies the conditions of [`pointer::as_mut`] |
| /// - if not `NULL`, `strm` as initialized using [`init`] or similar |
| /// |
| /// [`pointer::as_mut`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.as_mut |
| #[inline(always)] |
| pub unsafe fn from_stream_mut(strm: *mut z_stream) -> Option<&'a mut Self> { |
| { |
| // Safety: ptr points to a valid value of type z_stream (if non-null) |
| let stream = unsafe { strm.as_ref() }?; |
| |
| if stream.zalloc.is_none() || stream.zfree.is_none() { |
| return None; |
| } |
| |
| if stream.state.is_null() { |
| return None; |
| } |
| } |
| |
| // SAFETY: DeflateStream has an equivalent layout as z_stream |
| unsafe { strm.cast::<DeflateStream>().as_mut() } |
| } |
| |
| /// # Safety |
| /// |
| /// Behavior is undefined if any of the following conditions are violated: |
| /// |
| /// - `strm` satisfies the conditions of [`pointer::as_ref`] |
| /// - if not `NULL`, `strm` as initialized using [`init`] or similar |
| /// |
| /// [`pointer::as_ref`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.as_ref |
| #[inline(always)] |
| pub unsafe fn from_stream_ref(strm: *const z_stream) -> Option<&'a Self> { |
| { |
| // Safety: ptr points to a valid value of type z_stream (if non-null) |
| let stream = unsafe { strm.as_ref() }?; |
| |
| if stream.zalloc.is_none() || stream.zfree.is_none() { |
| return None; |
| } |
| |
| if stream.state.is_null() { |
| return None; |
| } |
| } |
| |
| // SAFETY: DeflateStream has an equivalent layout as z_stream |
| unsafe { strm.cast::<DeflateStream>().as_ref() } |
| } |
| |
| fn as_z_stream_mut(&mut self) -> &mut z_stream { |
| // SAFETY: a valid &mut DeflateStream is also a valid &mut z_stream |
| unsafe { &mut *(self as *mut DeflateStream as *mut z_stream) } |
| } |
| |
| pub fn pending(&self) -> (usize, u8) { |
| ( |
| self.state.bit_writer.pending.pending, |
| self.state.bit_writer.bits_used, |
| ) |
| } |
| } |
| |
| /// number of elements in hash table |
| pub(crate) const HASH_SIZE: usize = 65536; |
| /// log2(HASH_SIZE) |
| const HASH_BITS: usize = 16; |
| |
| /// Maximum value for memLevel in deflateInit2 |
| const MAX_MEM_LEVEL: i32 = 9; |
| const DEF_MEM_LEVEL: i32 = if MAX_MEM_LEVEL > 8 { 8 } else { MAX_MEM_LEVEL }; |
| |
| #[repr(i32)] |
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] |
| #[cfg_attr(feature = "__internal-fuzz", derive(arbitrary::Arbitrary))] |
| pub enum Method { |
| #[default] |
| Deflated = 8, |
| } |
| |
| impl TryFrom<i32> for Method { |
| type Error = (); |
| |
| fn try_from(value: i32) -> Result<Self, Self::Error> { |
| match value { |
| 8 => Ok(Self::Deflated), |
| _ => Err(()), |
| } |
| } |
| } |
| |
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| #[cfg_attr(feature = "__internal-fuzz", derive(arbitrary::Arbitrary))] |
| pub struct DeflateConfig { |
| pub level: i32, |
| pub method: Method, |
| pub window_bits: i32, |
| pub mem_level: i32, |
| pub strategy: Strategy, |
| } |
| |
| #[cfg(any(test, feature = "__internal-test"))] |
| impl quickcheck::Arbitrary for DeflateConfig { |
| fn arbitrary(g: &mut quickcheck::Gen) -> Self { |
| let mem_levels: Vec<_> = (1..=9).collect(); |
| let levels: Vec<_> = (0..=9).collect(); |
| |
| let mut window_bits = Vec::new(); |
| window_bits.extend(9..=15); // zlib |
| window_bits.extend(9 + 16..=15 + 16); // gzip |
| window_bits.extend(-15..=-9); // raw |
| |
| Self { |
| level: *g.choose(&levels).unwrap(), |
| method: Method::Deflated, |
| window_bits: *g.choose(&window_bits).unwrap(), |
| mem_level: *g.choose(&mem_levels).unwrap(), |
| strategy: *g |
| .choose(&[ |
| Strategy::Default, |
| Strategy::Filtered, |
| Strategy::HuffmanOnly, |
| Strategy::Rle, |
| Strategy::Fixed, |
| ]) |
| .unwrap(), |
| } |
| } |
| } |
| |
| impl DeflateConfig { |
| pub fn new(level: i32) -> Self { |
| Self { |
| level, |
| ..Self::default() |
| } |
| } |
| } |
| |
| impl Default for DeflateConfig { |
| fn default() -> Self { |
| Self { |
| level: crate::c_api::Z_DEFAULT_COMPRESSION, |
| method: Method::Deflated, |
| window_bits: MAX_WBITS, |
| mem_level: DEF_MEM_LEVEL, |
| strategy: Strategy::Default, |
| } |
| } |
| } |
| |
| pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode { |
| let DeflateConfig { |
| mut level, |
| method: _, |
| mut window_bits, |
| mem_level, |
| strategy, |
| } = config; |
| |
| /* Todo: ignore strm->next_in if we use it as window */ |
| stream.msg = core::ptr::null_mut(); |
| |
| // for safety we must really make sure that alloc and free are consistent |
| // this is a (slight) deviation from stock zlib. In this crate we pick the rust |
| // allocator as the default, but `libz-rs-sys` always explicitly sets an allocator, |
| // and can configure the C allocator |
| #[cfg(feature = "rust-allocator")] |
| if stream.zalloc.is_none() || stream.zfree.is_none() { |
| stream.configure_default_rust_allocator() |
| } |
| |
| #[cfg(feature = "c-allocator")] |
| if stream.zalloc.is_none() || stream.zfree.is_none() { |
| stream.configure_default_c_allocator() |
| } |
| |
| if stream.zalloc.is_none() || stream.zfree.is_none() { |
| return ReturnCode::StreamError; |
| } |
| |
| if level == crate::c_api::Z_DEFAULT_COMPRESSION { |
| level = 6; |
| } |
| |
| let wrap = if window_bits < 0 { |
| if window_bits < -MAX_WBITS { |
| return ReturnCode::StreamError; |
| } |
| window_bits = -window_bits; |
| |
| 0 |
| } else if window_bits > MAX_WBITS { |
| window_bits -= 16; |
| 2 |
| } else { |
| 1 |
| }; |
| |
| if (!(1..=MAX_MEM_LEVEL).contains(&mem_level)) |
| || !(MIN_WBITS..=MAX_WBITS).contains(&window_bits) |
| || !(0..=9).contains(&level) |
| || (window_bits == 8 && wrap != 1) |
| { |
| return ReturnCode::StreamError; |
| } |
| |
| let window_bits = if window_bits == 8 { |
| 9 /* until 256-byte window bug fixed */ |
| } else { |
| window_bits as usize |
| }; |
| |
| let alloc = Allocator { |
| zalloc: stream.zalloc.unwrap(), |
| zfree: stream.zfree.unwrap(), |
| opaque: stream.opaque, |
| _marker: PhantomData, |
| }; |
| |
| // allocated here to have the same order as zlib |
| let Some(state_allocation) = alloc.allocate_raw::<State>() else { |
| return ReturnCode::MemError; |
| }; |
| |
| let w_size = 1 << window_bits; |
| let window = Window::new_in(&alloc, window_bits); |
| |
| let prev = alloc.allocate_slice_raw::<u16>(w_size); |
| let head = alloc.allocate_raw::<[u16; HASH_SIZE]>(); |
| |
| let lit_bufsize = 1 << (mem_level + 6); // 16K elements by default |
| let pending = Pending::new_in(&alloc, 4 * lit_bufsize); |
| |
| // zlib-ng overlays the pending_buf and sym_buf. We cannot really do that safely |
| let sym_buf = ReadBuf::new_in(&alloc, 3 * lit_bufsize); |
| |
| // if any allocation failed, clean up allocations that did succeed |
| let (window, prev, head, pending, sym_buf) = match (window, prev, head, pending, sym_buf) { |
| (Some(window), Some(prev), Some(head), Some(pending), Some(sym_buf)) => { |
| (window, prev, head, pending, sym_buf) |
| } |
| (window, prev, head, pending, sym_buf) => { |
| // SAFETY: these pointers/structures are discarded after deallocation. |
| unsafe { |
| if let Some(mut sym_buf) = sym_buf { |
| alloc.deallocate(sym_buf.as_mut_ptr(), sym_buf.capacity()) |
| } |
| if let Some(mut pending) = pending { |
| pending.drop_in(&alloc); |
| } |
| if let Some(head) = head { |
| alloc.deallocate(head.as_ptr(), 1) |
| } |
| if let Some(prev) = prev { |
| alloc.deallocate(prev.as_ptr(), w_size) |
| } |
| if let Some(mut window) = window { |
| window.drop_in(&alloc); |
| } |
| |
| alloc.deallocate(state_allocation.as_ptr(), 1); |
| } |
| |
| return ReturnCode::MemError; |
| } |
| }; |
| |
| // zero initialize the memory |
| let prev = prev.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0 |
| unsafe { prev.write_bytes(0, w_size) }; |
| let prev = unsafe { WeakSliceMut::from_raw_parts_mut(prev, w_size) }; |
| |
| // zero out head's first element |
| let head = head.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0 |
| unsafe { head.write_bytes(0, 1) }; |
| let head = unsafe { WeakArrayMut::<u16, HASH_SIZE>::from_ptr(head) }; |
| |
| let state = State { |
| status: Status::Init, |
| |
| // window |
| w_size, |
| |
| // allocated values |
| window, |
| prev, |
| head, |
| bit_writer: BitWriter::from_pending(pending), |
| |
| // |
| lit_bufsize, |
| |
| // |
| sym_buf, |
| |
| // |
| level: level as i8, // set to zero again for testing? |
| strategy, |
| |
| // these fields are not set explicitly at this point |
| last_flush: 0, |
| wrap, |
| strstart: 0, |
| block_start: 0, |
| block_open: 0, |
| window_size: 0, |
| insert: 0, |
| matches: 0, |
| opt_len: 0, |
| static_len: 0, |
| lookahead: 0, |
| ins_h: 0, |
| max_chain_length: 0, |
| max_lazy_match: 0, |
| good_match: 0, |
| nice_match: 0, |
| |
| // |
| l_desc: TreeDesc::EMPTY, |
| d_desc: TreeDesc::EMPTY, |
| bl_desc: TreeDesc::EMPTY, |
| |
| // |
| crc_fold: Crc32Fold::new(), |
| gzhead: None, |
| gzindex: 0, |
| |
| // |
| match_start: 0, |
| prev_match: 0, |
| match_available: false, |
| prev_length: 0, |
| |
| // just provide a valid default; gets set properly later |
| hash_calc_variant: HashCalcVariant::Standard, |
| _cache_line_0: (), |
| _cache_line_1: (), |
| _cache_line_2: (), |
| _cache_line_3: (), |
| _padding_0: [0; 16], |
| }; |
| |
| unsafe { state_allocation.as_ptr().write(state) }; // FIXME: write is stable for NonNull since 1.80.0 |
| |
| stream.state = state_allocation.as_ptr() as *mut internal_state; |
| |
| let Some(stream) = (unsafe { DeflateStream::from_stream_mut(stream) }) else { |
| if cfg!(debug_assertions) { |
| unreachable!("we should have initialized the stream properly"); |
| } |
| return ReturnCode::StreamError; |
| }; |
| |
| reset(stream) |
| } |
| |
| pub fn params(stream: &mut DeflateStream, level: i32, strategy: Strategy) -> ReturnCode { |
| let level = if level == crate::c_api::Z_DEFAULT_COMPRESSION { |
| 6 |
| } else { |
| level |
| }; |
| |
| if !(0..=9).contains(&level) { |
| return ReturnCode::StreamError; |
| } |
| |
| let level = level as i8; |
| |
| let func = CONFIGURATION_TABLE[stream.state.level as usize].func; |
| |
| let state = &mut stream.state; |
| |
| // FIXME: use fn_addr_eq when it's available in our MSRV. The comparison returning false here |
| // is not functionally incorrect, but would be inconsistent with zlib-ng. |
| #[allow(unpredictable_function_pointer_comparisons)] |
| if (strategy != state.strategy || func != CONFIGURATION_TABLE[level as usize].func) |
| && state.last_flush != -2 |
| { |
| // Flush the last buffer. |
| let err = deflate(stream, DeflateFlush::Block); |
| if err == ReturnCode::StreamError { |
| return err; |
| } |
| |
| let state = &mut stream.state; |
| |
| if stream.avail_in != 0 |
| || ((state.strstart as isize - state.block_start) + state.lookahead as isize) != 0 |
| { |
| return ReturnCode::BufError; |
| } |
| } |
| |
| let state = &mut stream.state; |
| |
| if state.level != level { |
| if state.level == 0 && state.matches != 0 { |
| if state.matches == 1 { |
| self::slide_hash::slide_hash(state); |
| } else { |
| state.head.as_mut_slice().fill(0); |
| } |
| state.matches = 0; |
| } |
| |
| lm_set_level(state, level); |
| } |
| |
| state.strategy = strategy; |
| |
| ReturnCode::Ok |
| } |
| |
| pub fn set_dictionary(stream: &mut DeflateStream, mut dictionary: &[u8]) -> ReturnCode { |
| let state = &mut stream.state; |
| |
| let wrap = state.wrap; |
| |
| if wrap == 2 || (wrap == 1 && state.status != Status::Init) || state.lookahead != 0 { |
| return ReturnCode::StreamError; |
| } |
| |
| // when using zlib wrappers, compute Adler-32 for provided dictionary |
| if wrap == 1 { |
| stream.adler = adler32(stream.adler as u32, dictionary) as z_checksum; |
| } |
| |
| // avoid computing Adler-32 in read_buf |
| state.wrap = 0; |
| |
| // if dictionary would fill window, just replace the history |
| if dictionary.len() >= state.window.capacity() { |
| if wrap == 0 { |
| // clear the hash table |
| state.head.as_mut_slice().fill(0); |
| |
| state.strstart = 0; |
| state.block_start = 0; |
| state.insert = 0; |
| } else { |
| /* already empty otherwise */ |
| } |
| |
| // use the tail |
| dictionary = &dictionary[dictionary.len() - state.w_size..]; |
| } |
| |
| // insert dictionary into window and hash |
| let avail = stream.avail_in; |
| let next = stream.next_in; |
| stream.avail_in = dictionary.len() as _; |
| stream.next_in = dictionary.as_ptr() as *mut u8; |
| fill_window(stream); |
| |
| while stream.state.lookahead >= STD_MIN_MATCH { |
| let str = stream.state.strstart; |
| let n = stream.state.lookahead - (STD_MIN_MATCH - 1); |
| stream.state.insert_string(str, n); |
| stream.state.strstart = str + n; |
| stream.state.lookahead = STD_MIN_MATCH - 1; |
| fill_window(stream); |
| } |
| |
| let state = &mut stream.state; |
| |
| state.strstart += state.lookahead; |
| state.block_start = state.strstart as _; |
| state.insert = state.lookahead; |
| state.lookahead = 0; |
| state.prev_length = 0; |
| state.match_available = false; |
| |
| // restore the state |
| stream.next_in = next; |
| stream.avail_in = avail; |
| state.wrap = wrap; |
| |
| ReturnCode::Ok |
| } |
| |
| pub fn prime(stream: &mut DeflateStream, mut bits: i32, value: i32) -> ReturnCode { |
| // our logic actually supports up to 32 bits. |
| debug_assert!(bits <= 16, "zlib only supports up to 16 bits here"); |
| |
| let mut value64 = value as u64; |
| |
| let state = &mut stream.state; |
| |
| if bits < 0 |
| || bits > BitWriter::BIT_BUF_SIZE as i32 |
| || bits > (core::mem::size_of_val(&value) << 3) as i32 |
| { |
| return ReturnCode::BufError; |
| } |
| |
| let mut put; |
| |
| loop { |
| put = BitWriter::BIT_BUF_SIZE - state.bit_writer.bits_used; |
| let put = Ord::min(put as i32, bits); |
| |
| if state.bit_writer.bits_used == 0 { |
| state.bit_writer.bit_buffer = value64; |
| } else { |
| state.bit_writer.bit_buffer |= |
| (value64 & ((1 << put) - 1)) << state.bit_writer.bits_used; |
| } |
| |
| state.bit_writer.bits_used += put as u8; |
| state.bit_writer.flush_bits(); |
| value64 >>= put; |
| bits -= put; |
| |
| if bits == 0 { |
| break; |
| } |
| } |
| |
| ReturnCode::Ok |
| } |
| |
| pub fn copy<'a>( |
| dest: &mut MaybeUninit<DeflateStream<'a>>, |
| source: &mut DeflateStream<'a>, |
| ) -> ReturnCode { |
| // SAFETY: source and dest are both mutable references, so guaranteed not to overlap. |
| // dest being a reference to maybe uninitialized memory makes a copy of 1 DeflateStream valid. |
| unsafe { |
| core::ptr::copy_nonoverlapping(source, dest.as_mut_ptr(), 1); |
| } |
| |
| let alloc = &source.alloc; |
| |
| // allocated here to have the same order as zlib |
| let Some(state_allocation) = alloc.allocate_raw::<State>() else { |
| return ReturnCode::MemError; |
| }; |
| |
| let source_state = &source.state; |
| |
| let window = source_state.window.clone_in(alloc); |
| |
| let prev = alloc.allocate_slice_raw::<u16>(source_state.w_size); |
| let head = alloc.allocate_raw::<[u16; HASH_SIZE]>(); |
| |
| let pending = source_state.bit_writer.pending.clone_in(alloc); |
| let sym_buf = source_state.sym_buf.clone_in(alloc); |
| |
| // if any allocation failed, clean up allocations that did succeed |
| let (window, prev, head, pending, sym_buf) = match (window, prev, head, pending, sym_buf) { |
| (Some(window), Some(prev), Some(head), Some(pending), Some(sym_buf)) => { |
| (window, prev, head, pending, sym_buf) |
| } |
| (window, prev, head, pending, sym_buf) => { |
| // SAFETY: this access is in-bounds |
| let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) }; |
| unsafe { core::ptr::write(field_ptr as *mut *mut State, core::ptr::null_mut()) }; |
| |
| // SAFETY: it is an assumpion on DeflateStream that (de)allocation does not cause UB. |
| unsafe { |
| if let Some(mut sym_buf) = sym_buf { |
| alloc.deallocate(sym_buf.as_mut_ptr(), sym_buf.capacity()) |
| } |
| if let Some(mut pending) = pending { |
| pending.drop_in(alloc); |
| } |
| if let Some(head) = head { |
| alloc.deallocate(head.as_ptr(), HASH_SIZE) |
| } |
| if let Some(prev) = prev { |
| alloc.deallocate(prev.as_ptr(), source_state.w_size) |
| } |
| if let Some(mut window) = window { |
| window.drop_in(alloc); |
| } |
| |
| alloc.deallocate(state_allocation.as_ptr(), 1); |
| } |
| |
| return ReturnCode::MemError; |
| } |
| }; |
| |
| let prev = unsafe { |
| let prev = prev.as_ptr(); |
| prev.copy_from_nonoverlapping(source_state.prev.as_ptr(), source_state.prev.len()); |
| WeakSliceMut::from_raw_parts_mut(prev, source_state.prev.len()) |
| }; |
| |
| // FIXME: write_bytes is stable for NonNull since 1.80.0 |
| let head = unsafe { |
| let head = head.as_ptr(); |
| head.write_bytes(0, 1); |
| head.cast::<u16>().write(source_state.head.as_slice()[0]); |
| WeakArrayMut::from_ptr(head) |
| }; |
| |
| let mut bit_writer = BitWriter::from_pending(pending); |
| bit_writer.bits_used = source_state.bit_writer.bits_used; |
| bit_writer.bit_buffer = source_state.bit_writer.bit_buffer; |
| |
| let dest_state = State { |
| status: source_state.status, |
| bit_writer, |
| last_flush: source_state.last_flush, |
| wrap: source_state.wrap, |
| strategy: source_state.strategy, |
| level: source_state.level, |
| good_match: source_state.good_match, |
| nice_match: source_state.nice_match, |
| l_desc: source_state.l_desc.clone(), |
| d_desc: source_state.d_desc.clone(), |
| bl_desc: source_state.bl_desc.clone(), |
| prev_match: source_state.prev_match, |
| match_available: source_state.match_available, |
| strstart: source_state.strstart, |
| match_start: source_state.match_start, |
| prev_length: source_state.prev_length, |
| max_chain_length: source_state.max_chain_length, |
| max_lazy_match: source_state.max_lazy_match, |
| block_start: source_state.block_start, |
| block_open: source_state.block_open, |
| window, |
| sym_buf, |
| lit_bufsize: source_state.lit_bufsize, |
| window_size: source_state.window_size, |
| matches: source_state.matches, |
| opt_len: source_state.opt_len, |
| static_len: source_state.static_len, |
| insert: source_state.insert, |
| w_size: source_state.w_size, |
| lookahead: source_state.lookahead, |
| prev, |
| head, |
| ins_h: source_state.ins_h, |
| hash_calc_variant: source_state.hash_calc_variant, |
| crc_fold: source_state.crc_fold, |
| gzhead: None, |
| gzindex: source_state.gzindex, |
| _cache_line_0: (), |
| _cache_line_1: (), |
| _cache_line_2: (), |
| _cache_line_3: (), |
| _padding_0: source_state._padding_0, |
| }; |
| |
| // write the cloned state into state_ptr |
| unsafe { state_allocation.as_ptr().write(dest_state) }; // FIXME: write is stable for NonNull since 1.80.0 |
| |
| // insert the state_ptr into `dest` |
| let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) }; |
| unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation.as_ptr()) }; |
| |
| // update the gzhead field (it contains a mutable reference so we need to be careful |
| let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state.gzhead) }; |
| unsafe { core::ptr::copy(&source_state.gzhead, field_ptr, 1) }; |
| |
| ReturnCode::Ok |
| } |
| |
| /// # Returns |
| /// |
| /// - Err when deflate is not done. A common cause is insufficient output space |
| /// - Ok otherwise |
| pub fn end<'a>(stream: &'a mut DeflateStream) -> Result<&'a mut z_stream, &'a mut z_stream> { |
| let status = stream.state.status; |
| |
| let alloc = stream.alloc; |
| |
| // deallocate in reverse order of allocations |
| unsafe { |
| // SAFETY: we make sure that these fields are not used (by invalidating the state pointer) |
| stream.state.sym_buf.drop_in(&alloc); |
| stream.state.bit_writer.pending.drop_in(&alloc); |
| alloc.deallocate(stream.state.head.as_mut_ptr(), 1); |
| if !stream.state.prev.is_empty() { |
| alloc.deallocate(stream.state.prev.as_mut_ptr(), stream.state.prev.len()); |
| } |
| stream.state.window.drop_in(&alloc); |
| } |
| |
| let stream = stream.as_z_stream_mut(); |
| let state = core::mem::replace(&mut stream.state, core::ptr::null_mut()); |
| |
| // SAFETY: `state` is not used later |
| unsafe { |
| alloc.deallocate(state as *mut State, 1); |
| } |
| |
| match status { |
| Status::Busy => Err(stream), |
| _ => Ok(stream), |
| } |
| } |
| |
| pub fn reset(stream: &mut DeflateStream) -> ReturnCode { |
| let ret = reset_keep(stream); |
| |
| if ret == ReturnCode::Ok { |
| lm_init(stream.state); |
| } |
| |
| ret |
| } |
| |
| fn reset_keep(stream: &mut DeflateStream) -> ReturnCode { |
| stream.total_in = 0; |
| stream.total_out = 0; |
| stream.msg = core::ptr::null_mut(); |
| stream.data_type = crate::c_api::Z_UNKNOWN; |
| |
| let state = &mut stream.state; |
| |
| state.bit_writer.pending.reset_keep(); |
| |
| // can be made negative by deflate(..., Z_FINISH); |
| state.wrap = state.wrap.abs(); |
| |
| state.status = match state.wrap { |
| 2 => Status::GZip, |
| _ => Status::Init, |
| }; |
| |
| stream.adler = match state.wrap { |
| 2 => { |
| state.crc_fold = Crc32Fold::new(); |
| CRC32_INITIAL_VALUE as _ |
| } |
| _ => ADLER32_INITIAL_VALUE as _, |
| }; |
| |
| state.last_flush = -2; |
| |
| state.zng_tr_init(); |
| |
| ReturnCode::Ok |
| } |
| |
| fn lm_init(state: &mut State) { |
| state.window_size = 2 * state.w_size; |
| |
| // zlib uses CLEAR_HASH here |
| state.head.as_mut_slice().fill(0); |
| |
| // Set the default configuration parameters: |
| lm_set_level(state, state.level); |
| |
| state.strstart = 0; |
| state.block_start = 0; |
| state.lookahead = 0; |
| state.insert = 0; |
| state.prev_length = 0; |
| state.match_available = false; |
| state.match_start = 0; |
| state.ins_h = 0; |
| } |
| |
| fn lm_set_level(state: &mut State, level: i8) { |
| state.max_lazy_match = CONFIGURATION_TABLE[level as usize].max_lazy; |
| state.good_match = CONFIGURATION_TABLE[level as usize].good_length; |
| state.nice_match = CONFIGURATION_TABLE[level as usize].nice_length; |
| state.max_chain_length = CONFIGURATION_TABLE[level as usize].max_chain; |
| |
| state.hash_calc_variant = HashCalcVariant::for_max_chain_length(state.max_chain_length); |
| state.level = level; |
| } |
| |
| pub fn tune( |
| stream: &mut DeflateStream, |
| good_length: usize, |
| max_lazy: usize, |
| nice_length: usize, |
| max_chain: usize, |
| ) -> ReturnCode { |
| stream.state.good_match = good_length as u16; |
| stream.state.max_lazy_match = max_lazy as u16; |
| stream.state.nice_match = nice_length as u16; |
| stream.state.max_chain_length = max_chain as u16; |
| |
| ReturnCode::Ok |
| } |
| |
| #[repr(C)] |
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| pub(crate) struct Value { |
| a: u16, |
| b: u16, |
| } |
| |
| impl Value { |
| pub(crate) const fn new(a: u16, b: u16) -> Self { |
| Self { a, b } |
| } |
| |
| pub(crate) fn freq_mut(&mut self) -> &mut u16 { |
| &mut self.a |
| } |
| |
| pub(crate) fn code_mut(&mut self) -> &mut u16 { |
| &mut self.a |
| } |
| |
| pub(crate) fn dad_mut(&mut self) -> &mut u16 { |
| &mut self.b |
| } |
| |
| pub(crate) fn len_mut(&mut self) -> &mut u16 { |
| &mut self.b |
| } |
| |
| #[inline(always)] |
| pub(crate) const fn freq(self) -> u16 { |
| self.a |
| } |
| |
| #[inline(always)] |
| pub(crate) const fn code(self) -> u16 { |
| self.a |
| } |
| |
| #[inline(always)] |
| pub(crate) const fn dad(self) -> u16 { |
| self.b |
| } |
| |
| #[inline(always)] |
| pub(crate) const fn len(self) -> u16 { |
| self.b |
| } |
| } |
| |
| /// number of length codes, not counting the special END_BLOCK code |
| pub(crate) const LENGTH_CODES: usize = 29; |
| |
| /// number of literal bytes 0..255 |
| const LITERALS: usize = 256; |
| |
| /// number of Literal or Length codes, including the END_BLOCK code |
| pub(crate) const L_CODES: usize = LITERALS + 1 + LENGTH_CODES; |
| |
| /// number of distance codes |
| pub(crate) const D_CODES: usize = 30; |
| |
| /// number of codes used to transfer the bit lengths |
| const BL_CODES: usize = 19; |
| |
| /// maximum heap size |
| const HEAP_SIZE: usize = 2 * L_CODES + 1; |
| |
| /// all codes must not exceed MAX_BITS bits |
| const MAX_BITS: usize = 15; |
| |
| /// Bit length codes must not exceed MAX_BL_BITS bits |
| const MAX_BL_BITS: usize = 7; |
| |
| pub(crate) const DIST_CODE_LEN: usize = 512; |
| |
| struct BitWriter<'a> { |
| pub(crate) pending: Pending<'a>, // output still pending |
| pub(crate) bit_buffer: u64, |
| pub(crate) bits_used: u8, |
| |
| /// total bit length of compressed file (NOTE: zlib-ng uses a 32-bit integer here) |
| #[cfg(feature = "ZLIB_DEBUG")] |
| compressed_len: usize, |
| /// bit length of compressed data sent (NOTE: zlib-ng uses a 32-bit integer here) |
| #[cfg(feature = "ZLIB_DEBUG")] |
| bits_sent: usize, |
| } |
| |
| #[inline] |
| const fn encode_len(ltree: &[Value], lc: u8) -> (u64, usize) { |
| let mut lc = lc as usize; |
| |
| /* Send the length code, len is the match length - STD_MIN_MATCH */ |
| let code = self::trees_tbl::LENGTH_CODE[lc] as usize; |
| let c = code + LITERALS + 1; |
| assert!(c < L_CODES, "bad l_code"); |
| // send_code_trace(s, c); |
| |
| let lnode = ltree[c]; |
| let mut match_bits: u64 = lnode.code() as u64; |
| let mut match_bits_len = lnode.len() as usize; |
| let extra = StaticTreeDesc::EXTRA_LBITS[code] as usize; |
| if extra != 0 { |
| lc -= self::trees_tbl::BASE_LENGTH[code] as usize; |
| match_bits |= (lc as u64) << match_bits_len; |
| match_bits_len += extra; |
| } |
| |
| (match_bits, match_bits_len) |
| } |
| |
| #[inline] |
| const fn encode_dist(dtree: &[Value], mut dist: u16) -> (u64, usize) { |
| dist -= 1; /* dist is now the match distance - 1 */ |
| let code = State::d_code(dist as usize) as usize; |
| assert!(code < D_CODES, "bad d_code"); |
| // send_code_trace(s, code); |
| |
| /* Send the distance code */ |
| let dnode = dtree[code]; |
| let mut match_bits = dnode.code() as u64; |
| let mut match_bits_len = dnode.len() as usize; |
| let extra = StaticTreeDesc::EXTRA_DBITS[code] as usize; |
| if extra != 0 { |
| dist -= self::trees_tbl::BASE_DIST[code]; |
| match_bits |= (dist as u64) << match_bits_len; |
| match_bits_len += extra; |
| } |
| |
| (match_bits, match_bits_len) |
| } |
| |
| impl<'a> BitWriter<'a> { |
| pub(crate) const BIT_BUF_SIZE: u8 = 64; |
| |
| fn from_pending(pending: Pending<'a>) -> Self { |
| Self { |
| pending, |
| bit_buffer: 0, |
| bits_used: 0, |
| |
| #[cfg(feature = "ZLIB_DEBUG")] |
| compressed_len: 0, |
| #[cfg(feature = "ZLIB_DEBUG")] |
| bits_sent: 0, |
| } |
| } |
| |
| fn flush_bits(&mut self) { |
| debug_assert!(self.bits_used <= 64); |
| let removed = self.bits_used.saturating_sub(7).next_multiple_of(8); |
| let keep_bytes = self.bits_used / 8; // can never divide by zero |
| |
| let src = &self.bit_buffer.to_le_bytes(); |
| self.pending.extend(&src[..keep_bytes as usize]); |
| |
| self.bits_used -= removed; |
| self.bit_buffer = self.bit_buffer.checked_shr(removed as u32).unwrap_or(0); |
| } |
| |
| fn emit_align(&mut self) { |
| debug_assert!(self.bits_used <= 64); |
| let keep_bytes = self.bits_used.div_ceil(8); |
| let src = &self.bit_buffer.to_le_bytes(); |
| self.pending.extend(&src[..keep_bytes as usize]); |
| |
| self.bits_used = 0; |
| self.bit_buffer = 0; |
| |
| self.sent_bits_align(); |
| } |
| |
| fn send_bits_trace(&self, _value: u64, _len: u8) { |
| trace!(" l {:>2} v {:>4x} ", _len, _value); |
| } |
| |
| fn cmpr_bits_add(&mut self, _len: usize) { |
| #[cfg(feature = "ZLIB_DEBUG")] |
| { |
| self.compressed_len += _len; |
| } |
| } |
| |
| fn cmpr_bits_align(&mut self) { |
| #[cfg(feature = "ZLIB_DEBUG")] |
| { |
| self.compressed_len = self.compressed_len.next_multiple_of(8); |
| } |
| } |
| |
| fn sent_bits_add(&mut self, _len: usize) { |
| #[cfg(feature = "ZLIB_DEBUG")] |
| { |
| self.bits_sent += _len; |
| } |
| } |
| |
| fn sent_bits_align(&mut self) { |
| #[cfg(feature = "ZLIB_DEBUG")] |
| { |
| self.bits_sent = self.bits_sent.next_multiple_of(8); |
| } |
| } |
| |
| #[inline(always)] |
| fn send_bits(&mut self, val: u64, len: u8) { |
| debug_assert!(len <= 64); |
| debug_assert!(self.bits_used <= 64); |
| |
| let total_bits = len + self.bits_used; |
| |
| self.send_bits_trace(val, len); |
| self.sent_bits_add(len as usize); |
| |
| if total_bits < Self::BIT_BUF_SIZE { |
| self.bit_buffer |= val << self.bits_used; |
| self.bits_used = total_bits; |
| } else { |
| self.send_bits_overflow(val, total_bits); |
| } |
| } |
| |
| fn send_bits_overflow(&mut self, val: u64, total_bits: u8) { |
| if self.bits_used == Self::BIT_BUF_SIZE { |
| self.pending.extend(&self.bit_buffer.to_le_bytes()); |
| self.bit_buffer = val; |
| self.bits_used = total_bits - Self::BIT_BUF_SIZE; |
| } else { |
| self.bit_buffer |= val << self.bits_used; |
| self.pending.extend(&self.bit_buffer.to_le_bytes()); |
| self.bit_buffer = val >> (Self::BIT_BUF_SIZE - self.bits_used); |
| self.bits_used = total_bits - Self::BIT_BUF_SIZE; |
| } |
| } |
| |
| fn send_code(&mut self, code: usize, tree: &[Value]) { |
| let node = tree[code]; |
| self.send_bits(node.code() as u64, node.len() as u8) |
| } |
| |
| /// Send one empty static block to give enough lookahead for inflate. |
| /// This takes 10 bits, of which 7 may remain in the bit buffer. |
| pub fn align(&mut self) { |
| self.emit_tree(BlockType::StaticTrees, false); |
| self.emit_end_block(&STATIC_LTREE, false); |
| self.flush_bits(); |
| } |
| |
| pub(crate) fn emit_tree(&mut self, block_type: BlockType, is_last_block: bool) { |
| let header_bits = ((block_type as u64) << 1) | (is_last_block as u64); |
| self.send_bits(header_bits, 3); |
| trace!("\n--- Emit Tree: Last: {}\n", is_last_block as u8); |
| } |
| |
| pub(crate) fn emit_end_block_and_align(&mut self, ltree: &[Value], is_last_block: bool) { |
| self.emit_end_block(ltree, is_last_block); |
| |
| if is_last_block { |
| self.emit_align(); |
| } |
| } |
| |
| fn emit_end_block(&mut self, ltree: &[Value], _is_last_block: bool) { |
| const END_BLOCK: usize = 256; |
| self.send_code(END_BLOCK, ltree); |
| |
| trace!( |
| "\n+++ Emit End Block: Last: {} Pending: {} Total Out: {}\n", |
| _is_last_block as u8, |
| self.pending.pending().len(), |
| "<unknown>" |
| ); |
| } |
| |
| pub(crate) fn emit_lit(&mut self, ltree: &[Value], c: u8) -> u16 { |
| self.send_code(c as usize, ltree); |
| |
| #[cfg(feature = "ZLIB_DEBUG")] |
| if let Some(c) = char::from_u32(c as u32) { |
| if isgraph(c as u8) { |
| trace!(" '{}' ", c); |
| } |
| } |
| |
| ltree[c as usize].len() |
| } |
| |
| pub(crate) fn emit_dist( |
| &mut self, |
| ltree: &[Value], |
| dtree: &[Value], |
| lc: u8, |
| dist: u16, |
| ) -> usize { |
| let (mut match_bits, mut match_bits_len) = encode_len(ltree, lc); |
| |
| let (dist_match_bits, dist_match_bits_len) = encode_dist(dtree, dist); |
| |
| match_bits |= dist_match_bits << match_bits_len; |
| match_bits_len += dist_match_bits_len; |
| |
| self.send_bits(match_bits, match_bits_len as u8); |
| |
| match_bits_len |
| } |
| |
| pub(crate) fn emit_dist_static(&mut self, lc: u8, dist: u16) -> usize { |
| let precomputed_len = trees_tbl::STATIC_LTREE_ENCODINGS[lc as usize]; |
| let mut match_bits = precomputed_len.code() as u64; |
| let mut match_bits_len = precomputed_len.len() as usize; |
| |
| let dtree = self::trees_tbl::STATIC_DTREE.as_slice(); |
| let (dist_match_bits, dist_match_bits_len) = encode_dist(dtree, dist); |
| |
| match_bits |= dist_match_bits << match_bits_len; |
| match_bits_len += dist_match_bits_len; |
| |
| self.send_bits(match_bits, match_bits_len as u8); |
| |
| match_bits_len |
| } |
| |
| fn compress_block_help(&mut self, sym_buf: &[u8], ltree: &[Value], dtree: &[Value]) { |
| for chunk in sym_buf.chunks_exact(3) { |
| let [dist_low, dist_high, lc] = *chunk else { |
| unreachable!("out of bound access on the symbol buffer"); |
| }; |
| |
| match u16::from_le_bytes([dist_low, dist_high]) { |
| 0 => self.emit_lit(ltree, lc) as usize, |
| dist => self.emit_dist(ltree, dtree, lc, dist), |
| }; |
| } |
| |
| self.emit_end_block(ltree, false) |
| } |
| |
| fn send_tree(&mut self, tree: &[Value], bl_tree: &[Value], max_code: usize) { |
| /* tree: the tree to be scanned */ |
| /* max_code and its largest code of non zero frequency */ |
| let mut prevlen: isize = -1; /* last emitted length */ |
| let mut curlen; /* length of current code */ |
| let mut nextlen = tree[0].len(); /* length of next code */ |
| let mut count = 0; /* repeat count of the current code */ |
| let mut max_count = 7; /* max repeat count */ |
| let mut min_count = 4; /* min repeat count */ |
| |
| /* tree[max_code+1].Len = -1; */ |
| /* guard already set */ |
| if nextlen == 0 { |
| max_count = 138; |
| min_count = 3; |
| } |
| |
| for n in 0..=max_code { |
| curlen = nextlen; |
| nextlen = tree[n + 1].len(); |
| count += 1; |
| if count < max_count && curlen == nextlen { |
| continue; |
| } else if count < min_count { |
| loop { |
| self.send_code(curlen as usize, bl_tree); |
| |
| count -= 1; |
| if count == 0 { |
| break; |
| } |
| } |
| } else if curlen != 0 { |
| if curlen as isize != prevlen { |
| self.send_code(curlen as usize, bl_tree); |
| count -= 1; |
| } |
| assert!((3..=6).contains(&count), " 3_6?"); |
| self.send_code(REP_3_6, bl_tree); |
| self.send_bits(count - 3, 2); |
| } else if count <= 10 { |
| self.send_code(REPZ_3_10, bl_tree); |
| self.send_bits(count - 3, 3); |
| } else { |
| self.send_code(REPZ_11_138, bl_tree); |
| self.send_bits(count - 11, 7); |
| } |
| |
| count = 0; |
| prevlen = curlen as isize; |
| |
| if nextlen == 0 { |
| max_count = 138; |
| min_count = 3; |
| } else if curlen == nextlen { |
| max_count = 6; |
| min_count = 3; |
| } else { |
| max_count = 7; |
| min_count = 4; |
| } |
| } |
| } |
| } |
| |
| #[repr(C, align(64))] |
| pub(crate) struct State<'a> { |
| status: Status, |
| |
| last_flush: i8, /* value of flush param for previous deflate call */ |
| |
| pub(crate) wrap: i8, /* bit 0 true for zlib, bit 1 true for gzip */ |
| |
| pub(crate) strategy: Strategy, |
| pub(crate) level: i8, |
| |
| /// Whether or not a block is currently open for the QUICK deflation scheme. |
| /// 0 if the block is closed, 1 if there is an active block, or 2 if there |
| /// is an active block and it is the last block. |
| pub(crate) block_open: u8, |
| |
| pub(crate) hash_calc_variant: HashCalcVariant, |
| |
| pub(crate) match_available: bool, /* set if previous match exists */ |
| |
| /// Use a faster search when the previous match is longer than this |
| pub(crate) good_match: u16, |
| |
| /// Stop searching when current match exceeds this |
| pub(crate) nice_match: u16, |
| |
| pub(crate) match_start: Pos, /* start of matching string */ |
| pub(crate) prev_match: Pos, /* previous match */ |
| pub(crate) strstart: usize, /* start of string to insert */ |
| |
| pub(crate) window: Window<'a>, |
| pub(crate) w_size: usize, /* LZ77 window size (32K by default) */ |
| |
| pub(crate) lookahead: usize, /* number of valid bytes ahead in window */ |
| |
| _cache_line_0: (), |
| |
| /// prev[N], where N is an offset in the current window, contains the offset in the window |
| /// of the previous 4-byte sequence that hashes to the same value as the 4-byte sequence |
| /// starting at N. Together with head, prev forms a chained hash table that can be used |
| /// to find earlier strings in the window that are potential matches for new input being |
| /// deflated. |
| pub(crate) prev: WeakSliceMut<'a, u16>, |
| /// head[H] contains the offset of the last 4-character sequence seen so far in |
| /// the current window that hashes to H (as calculated using the hash_calc_variant). |
| pub(crate) head: WeakArrayMut<'a, u16, HASH_SIZE>, |
| |
| /// Length of the best match at previous step. Matches not greater than this |
| /// are discarded. This is used in the lazy match evaluation. |
| pub(crate) prev_length: u16, |
| |
| /// To speed up deflation, hash chains are never searched beyond this length. |
| /// A higher limit improves compression ratio but degrades the speed. |
| pub(crate) max_chain_length: u16, |
| |
| // TODO untangle this mess! zlib uses the same field differently based on compression level |
| // we should just have 2 fields for clarity! |
| // |
| // Insert new strings in the hash table only if the match length is not |
| // greater than this length. This saves time but degrades compression. |
| // max_insert_length is used only for compression levels <= 3. |
| // define max_insert_length max_lazy_match |
| /// Attempt to find a better match only when the current match is strictly smaller |
| /// than this value. This mechanism is used only for compression levels >= 4. |
| pub(crate) max_lazy_match: u16, |
| |
| /// number of string matches in current block |
| /// NOTE: this is a saturating 8-bit counter, to help keep the struct compact. The code that |
| /// makes decisions based on this field only cares whether the count is greater than 2, so |
| /// an 8-bit counter is sufficient. |
| pub(crate) matches: u8, |
| |
| /// Window position at the beginning of the current output block. Gets |
| /// negative when the window is moved backwards. |
| pub(crate) block_start: isize, |
| |
| pub(crate) sym_buf: ReadBuf<'a>, |
| |
| _cache_line_1: (), |
| |
| /// Size of match buffer for literals/lengths. There are 4 reasons for |
| /// limiting lit_bufsize to 64K: |
| /// - frequencies can be kept in 16 bit counters |
| /// - if compression is not successful for the first block, all input |
| /// data is still in the window so we can still emit a stored block even |
| /// when input comes from standard input. (This can also be done for |
| /// all blocks if lit_bufsize is not greater than 32K.) |
| /// - if compression is not successful for a file smaller than 64K, we can |
| /// even emit a stored file instead of a stored block (saving 5 bytes). |
| /// This is applicable only for zip (not gzip or zlib). |
| /// - creating new Huffman trees less frequently may not provide fast |
| /// adaptation to changes in the input data statistics. (Take for |
| /// example a binary file with poorly compressible code followed by |
| /// a highly compressible string table.) Smaller buffer sizes give |
| /// fast adaptation but have of course the overhead of transmitting |
| /// trees more frequently. |
| /// - I can't count above 4 |
| lit_bufsize: usize, |
| |
| /// Actual size of window: 2*w_size, except when the user input buffer is directly used as sliding window. |
| pub(crate) window_size: usize, |
| |
| bit_writer: BitWriter<'a>, |
| |
| _cache_line_2: (), |
| |
| /// bit length of current block with optimal trees |
| opt_len: usize, |
| /// bit length of current block with static trees |
| static_len: usize, |
| |
| /// bytes at end of window left to insert |
| pub(crate) insert: usize, |
| |
| /// hash index of string to be inserted |
| pub(crate) ins_h: u32, |
| |
| gzhead: Option<&'a mut gz_header>, |
| gzindex: usize, |
| |
| _padding_0: [u8; 16], |
| |
| _cache_line_3: (), |
| |
| crc_fold: crate::crc32::Crc32Fold, |
| |
| l_desc: TreeDesc<HEAP_SIZE>, /* literal and length tree */ |
| d_desc: TreeDesc<{ 2 * D_CODES + 1 }>, /* distance tree */ |
| bl_desc: TreeDesc<{ 2 * BL_CODES + 1 }>, /* Huffman tree for bit lengths */ |
| } |
| |
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] |
| #[cfg_attr(feature = "__internal-fuzz", derive(arbitrary::Arbitrary))] |
| pub enum Strategy { |
| #[default] |
| Default = 0, |
| Filtered = 1, |
| HuffmanOnly = 2, |
| Rle = 3, |
| Fixed = 4, |
| } |
| |
| impl TryFrom<i32> for Strategy { |
| type Error = (); |
| |
| fn try_from(value: i32) -> Result<Self, Self::Error> { |
| match value { |
| 0 => Ok(Strategy::Default), |
| 1 => Ok(Strategy::Filtered), |
| 2 => Ok(Strategy::HuffmanOnly), |
| 3 => Ok(Strategy::Rle), |
| 4 => Ok(Strategy::Fixed), |
| _ => Err(()), |
| } |
| } |
| } |
| |
| #[derive(Debug, PartialEq, Eq)] |
| enum DataType { |
| Binary = 0, |
| Text = 1, |
| Unknown = 2, |
| } |
| |
| impl<'a> State<'a> { |
| pub const BIT_BUF_SIZE: u8 = BitWriter::BIT_BUF_SIZE; |
| |
| // log2(w_size) (in the range MIN_WBITS..=MAX_WBITS) |
| pub(crate) fn w_bits(&self) -> u32 { |
| self.w_size.trailing_zeros() |
| } |
| |
| pub(crate) fn w_mask(&self) -> usize { |
| self.w_size - 1 |
| } |
| |
| pub(crate) fn max_dist(&self) -> usize { |
| self.w_size - MIN_LOOKAHEAD |
| } |
| |
| // TODO untangle this mess! zlib uses the same field differently based on compression level |
| // we should just have 2 fields for clarity! |
| pub(crate) fn max_insert_length(&self) -> usize { |
| self.max_lazy_match as usize |
| } |
| |
| /// Total size of the pending buf. But because `pending` shares memory with `sym_buf`, this is |
| /// not the number of bytes that are actually in `pending`! |
| pub(crate) fn pending_buf_size(&self) -> usize { |
| self.lit_bufsize * 4 |
| } |
| |
| #[inline(always)] |
| pub(crate) fn update_hash(&self, h: u32, val: u32) -> u32 { |
| match self.hash_calc_variant { |
| HashCalcVariant::Standard => StandardHashCalc::update_hash(h, val), |
| HashCalcVariant::Roll => RollHashCalc::update_hash(h, val), |
| } |
| } |
| |
| #[inline(always)] |
| pub(crate) fn quick_insert_string(&mut self, string: usize) -> u16 { |
| match self.hash_calc_variant { |
| HashCalcVariant::Standard => StandardHashCalc::quick_insert_string(self, string), |
| HashCalcVariant::Roll => RollHashCalc::quick_insert_string(self, string), |
| } |
| } |
| |
| #[inline(always)] |
| pub(crate) fn insert_string(&mut self, string: usize, count: usize) { |
| match self.hash_calc_variant { |
| HashCalcVariant::Standard => StandardHashCalc::insert_string(self, string, count), |
| HashCalcVariant::Roll => RollHashCalc::insert_string(self, string, count), |
| } |
| } |
| |
| #[inline(always)] |
| pub(crate) fn tally_lit(&mut self, unmatched: u8) -> bool { |
| Self::tally_lit_help(&mut self.sym_buf, &mut self.l_desc, unmatched) |
| } |
| |
| #[inline(always)] |
| pub(crate) fn tally_lit_help( |
| sym_buf: &mut ReadBuf<'a>, |
| l_desc: &mut TreeDesc<HEAP_SIZE>, |
| unmatched: u8, |
| ) -> bool { |
| sym_buf.push_lit(unmatched); |
| |
| *l_desc.dyn_tree[unmatched as usize].freq_mut() += 1; |
| |
| assert!( |
| unmatched as usize <= STD_MAX_MATCH - STD_MIN_MATCH, |
| "zng_tr_tally: bad literal" |
| ); |
| |
| // signal that the current block should be flushed |
| sym_buf.len() == sym_buf.capacity() - 3 |
| } |
| |
| const fn d_code(dist: usize) -> u8 { |
| let index = if dist < 256 { dist } else { 256 + (dist >> 7) }; |
| self::trees_tbl::DIST_CODE[index] |
| } |
| |
| #[inline(always)] |
| pub(crate) fn tally_dist(&mut self, mut dist: usize, len: usize) -> bool { |
| self.sym_buf.push_dist(dist as u16, len as u8); |
| |
| self.matches = self.matches.saturating_add(1); |
| dist -= 1; |
| |
| assert!( |
| dist < self.max_dist() && Self::d_code(dist) < D_CODES as u8, |
| "tally_dist: bad match" |
| ); |
| |
| let index = self::trees_tbl::LENGTH_CODE[len] as usize + LITERALS + 1; |
| *self.l_desc.dyn_tree[index].freq_mut() += 1; |
| |
| *self.d_desc.dyn_tree[Self::d_code(dist) as usize].freq_mut() += 1; |
| |
| // signal that the current block should be flushed |
| self.sym_buf.len() == self.sym_buf.capacity() - 3 |
| } |
| |
| fn detect_data_type(dyn_tree: &[Value]) -> DataType { |
| // set bits 0..6, 14..25, and 28..31 |
| // 0xf3ffc07f = binary 11110011111111111100000001111111 |
| const NON_TEXT: u64 = 0xf3ffc07f; |
| let mut mask = NON_TEXT; |
| |
| /* Check for non-textual bytes. */ |
| for value in &dyn_tree[0..32] { |
| if (mask & 1) != 0 && value.freq() != 0 { |
| return DataType::Binary; |
| } |
| |
| mask >>= 1; |
| } |
| |
| /* Check for textual bytes. */ |
| if dyn_tree[9].freq() != 0 || dyn_tree[10].freq() != 0 || dyn_tree[13].freq() != 0 { |
| return DataType::Text; |
| } |
| |
| if dyn_tree[32..LITERALS].iter().any(|v| v.freq() != 0) { |
| return DataType::Text; |
| } |
| |
| // there are no explicit text or non-text bytes. The stream is either empty or has only |
| // tolerated bytes |
| DataType::Binary |
| } |
| |
| fn compress_block_static_trees(&mut self) { |
| let ltree = self::trees_tbl::STATIC_LTREE.as_slice(); |
| for chunk in self.sym_buf.filled().chunks_exact(3) { |
| let [dist_low, dist_high, lc] = *chunk else { |
| unreachable!("out of bound access on the symbol buffer"); |
| }; |
| |
| match u16::from_le_bytes([dist_low, dist_high]) { |
| 0 => self.bit_writer.emit_lit(ltree, lc) as usize, |
| dist => self.bit_writer.emit_dist_static(lc, dist), |
| }; |
| } |
| |
| self.bit_writer.emit_end_block(ltree, false) |
| } |
| |
| fn compress_block_dynamic_trees(&mut self) { |
| self.bit_writer.compress_block_help( |
| self.sym_buf.filled(), |
| &self.l_desc.dyn_tree, |
| &self.d_desc.dyn_tree, |
| ); |
| } |
| |
| fn header(&self) -> u16 { |
| // preset dictionary flag in zlib header |
| const PRESET_DICT: u16 = 0x20; |
| |
| // The deflate compression method (the only one supported in this version) |
| const Z_DEFLATED: u16 = 8; |
| |
| let dict = match self.strstart { |
| 0 => 0, |
| _ => PRESET_DICT, |
| }; |
| |
| let h = ((Z_DEFLATED + ((self.w_bits() as u16 - 8) << 4)) << 8) |
| | (self.level_flags() << 6) |
| | dict; |
| |
| h + 31 - (h % 31) |
| } |
| |
| fn level_flags(&self) -> u16 { |
| if self.strategy >= Strategy::HuffmanOnly || self.level < 2 { |
| 0 |
| } else if self.level < 6 { |
| 1 |
| } else if self.level == 6 { |
| 2 |
| } else { |
| 3 |
| } |
| } |
| |
| fn zng_tr_init(&mut self) { |
| self.l_desc.stat_desc = &StaticTreeDesc::L; |
| |
| self.d_desc.stat_desc = &StaticTreeDesc::D; |
| |
| self.bl_desc.stat_desc = &StaticTreeDesc::BL; |
| |
| self.bit_writer.bit_buffer = 0; |
| self.bit_writer.bits_used = 0; |
| |
| #[cfg(feature = "ZLIB_DEBUG")] |
| { |
| self.bit_writer.compressed_len = 0; |
| self.bit_writer.bits_sent = 0; |
| } |
| |
| // Initialize the first block of the first file: |
| self.init_block(); |
| } |
| |
| /// initializes a new block |
| fn init_block(&mut self) { |
| // Initialize the trees. |
| // TODO would a memset also work here? |
| |
| for value in &mut self.l_desc.dyn_tree[..L_CODES] { |
| *value.freq_mut() = 0; |
| } |
| |
| for value in &mut self.d_desc.dyn_tree[..D_CODES] { |
| *value.freq_mut() = 0; |
| } |
| |
| for value in &mut self.bl_desc.dyn_tree[..BL_CODES] { |
| *value.freq_mut() = 0; |
| } |
| |
| // end of block literal code |
| const END_BLOCK: usize = 256; |
| |
| *self.l_desc.dyn_tree[END_BLOCK].freq_mut() = 1; |
| self.opt_len = 0; |
| self.static_len = 0; |
| self.sym_buf.clear(); |
| self.matches = 0; |
| } |
| } |
| |
| #[repr(u8)] |
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| enum Status { |
| Init = 1, |
| |
| GZip = 4, |
| Extra = 5, |
| Name = 6, |
| Comment = 7, |
| Hcrc = 8, |
| |
| Busy = 2, |
| Finish = 3, |
| } |
| |
| const fn rank_flush(f: i8) -> i8 { |
| // rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH |
| ((f) * 2) - (if (f) > 4 { 9 } else { 0 }) |
| } |
| |
| #[derive(Debug)] |
| pub(crate) enum BlockState { |
| /// block not completed, need more input or more output |
| NeedMore = 0, |
| /// block flush performed |
| BlockDone = 1, |
| /// finish started, need only more output at next deflate |
| FinishStarted = 2, |
| /// finish done, accept no more input or output |
| FinishDone = 3, |
| } |
| |
| // Maximum stored block length in deflate format (not including header). |
| pub(crate) const MAX_STORED: usize = 65535; // so u16::max |
| |
| pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: usize) -> usize { |
| let len = Ord::min(stream.avail_in as usize, size); |
| |
| if len == 0 { |
| return 0; |
| } |
| |
| stream.avail_in -= len as u32; |
| |
| if stream.state.wrap == 2 { |
| // we likely cannot fuse the crc32 and the copy here because the input can be changed by |
| // a concurrent thread. Therefore it cannot be converted into a slice! |
| let window = &mut stream.state.window; |
| // SAFETY: len is bounded by avail_in, so this copy is in bounds. |
| unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) }; |
| |
| let data = &stream.state.window.filled()[offset..][..len]; |
| stream.state.crc_fold.fold(data, CRC32_INITIAL_VALUE); |
| } else if stream.state.wrap == 1 { |
| // we likely cannot fuse the adler32 and the copy here because the input can be changed by |
| // a concurrent thread. Therefore it cannot be converted into a slice! |
| let window = &mut stream.state.window; |
| // SAFETY: len is bounded by avail_in, so this copy is in bounds. |
| unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) }; |
| |
| let data = &stream.state.window.filled()[offset..][..len]; |
| stream.adler = adler32(stream.adler as u32, data) as _; |
| } else { |
| let window = &mut stream.state.window; |
| // SAFETY: len is bounded by avail_in, so this copy is in bounds. |
| unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) }; |
| } |
| |
| stream.next_in = stream.next_in.wrapping_add(len); |
| stream.total_in += len as crate::c_api::z_size; |
| |
| len |
| } |
| |
| pub(crate) enum BlockType { |
| StoredBlock = 0, |
| StaticTrees = 1, |
| DynamicTrees = 2, |
| } |
| |
| pub(crate) fn zng_tr_stored_block( |
| state: &mut State, |
| window_range: core::ops::Range<usize>, |
| is_last: bool, |
| ) { |
| // send block type |
| state.bit_writer.emit_tree(BlockType::StoredBlock, is_last); |
| |
| // align on byte boundary |
| state.bit_writer.emit_align(); |
| |
| state.bit_writer.cmpr_bits_align(); |
| |
| let input_block: &[u8] = &state.window.filled()[window_range]; |
| let stored_len = input_block.len() as u16; |
| |
| state.bit_writer.pending.extend(&stored_len.to_le_bytes()); |
| state |
| .bit_writer |
| .pending |
| .extend(&(!stored_len).to_le_bytes()); |
| |
| state.bit_writer.cmpr_bits_add(32); |
| state.bit_writer.sent_bits_add(32); |
| if stored_len > 0 { |
| state.bit_writer.pending.extend(input_block); |
| state.bit_writer.cmpr_bits_add((stored_len << 3) as usize); |
| state.bit_writer.sent_bits_add((stored_len << 3) as usize); |
| } |
| } |
| |
| /// The minimum match length mandated by the deflate standard |
| pub(crate) const STD_MIN_MATCH: usize = 3; |
| /// The maximum match length mandated by the deflate standard |
| pub(crate) const STD_MAX_MATCH: usize = 258; |
| |
| /// The minimum wanted match length, affects deflate_quick, deflate_fast, deflate_medium and deflate_slow |
| pub(crate) const WANT_MIN_MATCH: usize = 4; |
| |
| pub(crate) const MIN_LOOKAHEAD: usize = STD_MAX_MATCH + STD_MIN_MATCH + 1; |
| |
| #[inline] |
| pub(crate) fn fill_window(stream: &mut DeflateStream) { |
| debug_assert!(stream.state.lookahead < MIN_LOOKAHEAD); |
| |
| let wsize = stream.state.w_size; |
| |
| loop { |
| let state = &mut *stream.state; |
| let mut more = state.window_size - state.lookahead - state.strstart; |
| |
| // If the window is almost full and there is insufficient lookahead, |
| // move the upper half to the lower one to make room in the upper half. |
| if state.strstart >= wsize + state.max_dist() { |
| // shift the window to the left |
| let (old, new) = state.window.filled_mut()[..2 * wsize].split_at_mut(wsize); |
| old.copy_from_slice(new); |
| |
| state.match_start = state.match_start.saturating_sub(wsize as u16); |
| if state.match_start == 0 { |
| state.prev_length = 0; |
| } |
| |
| state.strstart -= wsize; /* we now have strstart >= MAX_DIST */ |
| state.block_start -= wsize as isize; |
| state.insert = Ord::min(state.insert, state.strstart); |
| |
| self::slide_hash::slide_hash(state); |
| |
| more += wsize; |
| } |
| |
| if stream.avail_in == 0 { |
| break; |
| } |
| |
| // If there was no sliding: |
| // strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && |
| // more == window_size - lookahead - strstart |
| // => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) |
| // => more >= window_size - 2*WSIZE + 2 |
| // In the BIG_MEM or MMAP case (not yet supported), |
| // window_size == input_size + MIN_LOOKAHEAD && |
| // strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. |
| // Otherwise, window_size == 2*WSIZE so more >= 2. |
| // If there was sliding, more >= WSIZE. So in all cases, more >= 2. |
| assert!(more >= 2, "more < 2"); |
| |
| let n = read_buf_window(stream, stream.state.strstart + stream.state.lookahead, more); |
| |
| let state = &mut *stream.state; |
| state.lookahead += n; |
| |
| // Initialize the hash value now that we have some input: |
| if state.lookahead + state.insert >= STD_MIN_MATCH { |
| let string = state.strstart - state.insert; |
| if state.max_chain_length > 1024 { |
| let v0 = state.window.filled()[string] as u32; |
| let v1 = state.window.filled()[string + 1] as u32; |
| state.ins_h = state.update_hash(v0, v1); |
| } else if string >= 1 { |
| state.quick_insert_string(string + 2 - STD_MIN_MATCH); |
| } |
| let mut count = state.insert; |
| if state.lookahead == 1 { |
| count -= 1; |
| } |
| if count > 0 { |
| state.insert_string(string, count); |
| state.insert -= count; |
| } |
| } |
| |
| // If the whole input has less than STD_MIN_MATCH bytes, ins_h is garbage, |
| // but this is not important since only literal bytes will be emitted. |
| |
| if !(stream.state.lookahead < MIN_LOOKAHEAD && stream.avail_in != 0) { |
| break; |
| } |
| } |
| |
| assert!( |
| stream.state.strstart <= stream.state.window_size - MIN_LOOKAHEAD, |
| "not enough room for search" |
| ); |
| } |
| |
| pub(crate) struct StaticTreeDesc { |
| /// static tree or NULL |
| pub(crate) static_tree: &'static [Value], |
| /// extra bits for each code or NULL |
| extra_bits: &'static [u8], |
| /// base index for extra_bits |
| extra_base: usize, |
| /// max number of elements in the tree |
| elems: usize, |
| /// max bit length for the codes |
| max_length: u16, |
| } |
| |
| impl StaticTreeDesc { |
| const EMPTY: Self = Self { |
| static_tree: &[], |
| extra_bits: &[], |
| extra_base: 0, |
| elems: 0, |
| max_length: 0, |
| }; |
| |
| /// extra bits for each length code |
| const EXTRA_LBITS: [u8; LENGTH_CODES] = [ |
| 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, |
| ]; |
| |
| /// extra bits for each distance code |
| const EXTRA_DBITS: [u8; D_CODES] = [ |
| 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, |
| 13, 13, |
| ]; |
| |
| /// extra bits for each bit length code |
| const EXTRA_BLBITS: [u8; BL_CODES] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7]; |
| |
| /// The lengths of the bit length codes are sent in order of decreasing |
| /// probability, to avoid transmitting the lengths for unused bit length codes. |
| const BL_ORDER: [u8; BL_CODES] = [ |
| 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, |
| ]; |
| |
| pub(crate) const L: Self = Self { |
| static_tree: &self::trees_tbl::STATIC_LTREE, |
| extra_bits: &Self::EXTRA_LBITS, |
| extra_base: LITERALS + 1, |
| elems: L_CODES, |
| max_length: MAX_BITS as u16, |
| }; |
| |
| pub(crate) const D: Self = Self { |
| static_tree: &self::trees_tbl::STATIC_DTREE, |
| extra_bits: &Self::EXTRA_DBITS, |
| extra_base: 0, |
| elems: D_CODES, |
| max_length: MAX_BITS as u16, |
| }; |
| |
| pub(crate) const BL: Self = Self { |
| static_tree: &[], |
| extra_bits: &Self::EXTRA_BLBITS, |
| extra_base: 0, |
| elems: BL_CODES, |
| max_length: MAX_BL_BITS as u16, |
| }; |
| } |
| |
| #[derive(Clone)] |
| pub(crate) struct TreeDesc<const N: usize> { |
| dyn_tree: [Value; N], |
| max_code: usize, |
| stat_desc: &'static StaticTreeDesc, |
| } |
| |
| impl<const N: usize> TreeDesc<N> { |
| const EMPTY: Self = Self { |
| dyn_tree: [Value::new(0, 0); N], |
| max_code: 0, |
| stat_desc: &StaticTreeDesc::EMPTY, |
| }; |
| } |
| |
| fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) { |
| let tree = &mut desc.dyn_tree; |
| let stree = desc.stat_desc.static_tree; |
| let elements = desc.stat_desc.elems; |
| |
| let mut heap = Heap::new(); |
| let mut max_code = heap.initialize(&mut tree[..elements]); |
| |
| // The pkzip format requires that at least one distance code exists, |
| // and that at least one bit should be sent even if there is only one |
| // possible code. So to avoid special checks later on we force at least |
| // two codes of non zero frequency. |
| while heap.heap_len < 2 { |
| heap.heap_len += 1; |
| let node = if max_code < 2 { |
| max_code += 1; |
| max_code |
| } else { |
| 0 |
| }; |
| |
| debug_assert!(node >= 0); |
| let node = node as usize; |
| |
| heap.heap[heap.heap_len] = node as u32; |
| *tree[node].freq_mut() = 1; |
| heap.depth[node] = 0; |
| state.opt_len -= 1; |
| if !stree.is_empty() { |
| state.static_len -= stree[node].len() as usize; |
| } |
| /* node is 0 or 1 so it does not have extra bits */ |
| } |
| |
| debug_assert!(max_code >= 0); |
| let max_code = max_code as usize; |
| desc.max_code = max_code; |
| |
| // The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, |
| // establish sub-heaps of increasing lengths: |
| let mut n = heap.heap_len / 2; |
| while n >= 1 { |
| heap.pqdownheap(tree, n); |
| n -= 1; |
| } |
| |
| heap.construct_huffman_tree(tree, elements); |
| |
| // At this point, the fields freq and dad are set. We can now |
| // generate the bit lengths. |
| let bl_count = gen_bitlen(state, &mut heap, desc); |
| |
| // The field len is now set, we can generate the bit codes |
| gen_codes(&mut desc.dyn_tree, max_code, &bl_count); |
| } |
| |
| fn gen_bitlen<const N: usize>( |
| state: &mut State, |
| heap: &mut Heap, |
| desc: &mut TreeDesc<N>, |
| ) -> [u16; MAX_BITS + 1] { |
| let tree = &mut desc.dyn_tree; |
| let max_code = desc.max_code; |
| let stree = desc.stat_desc.static_tree; |
| let extra = desc.stat_desc.extra_bits; |
| let base = desc.stat_desc.extra_base; |
| let max_length = desc.stat_desc.max_length; |
| |
| let mut bl_count = [0u16; MAX_BITS + 1]; |
| |
| // In a first pass, compute the optimal bit lengths (which may |
| // overflow in the case of the bit length tree). |
| *tree[heap.heap[heap.heap_max] as usize].len_mut() = 0; /* root of the heap */ |
| |
| // number of elements with bit length too large |
| let mut overflow: i32 = 0; |
| |
| for h in heap.heap_max + 1..HEAP_SIZE { |
| let n = heap.heap[h] as usize; |
| let mut bits = tree[tree[n].dad() as usize].len() + 1; |
| |
| if bits > max_length { |
| bits = max_length; |
| overflow += 1; |
| } |
| |
| // We overwrite tree[n].Dad which is no longer needed |
| *tree[n].len_mut() = bits; |
| |
| // not a leaf node |
| if n > max_code { |
| continue; |
| } |
| |
| bl_count[bits as usize] += 1; |
| let mut xbits = 0; |
| if n >= base { |
| xbits = extra[n - base] as usize; |
| } |
| |
| let f = tree[n].freq() as usize; |
| state.opt_len += f * (bits as usize + xbits); |
| |
| if !stree.is_empty() { |
| state.static_len += f * (stree[n].len() as usize + xbits); |
| } |
| } |
| |
| if overflow == 0 { |
| return bl_count; |
| } |
| |
| /* Find the first bit length which could increase: */ |
| loop { |
| let mut bits = max_length as usize - 1; |
| while bl_count[bits] == 0 { |
| bits -= 1; |
| } |
| bl_count[bits] -= 1; /* move one leaf down the tree */ |
| bl_count[bits + 1] += 2; /* move one overflow item as its brother */ |
| bl_count[max_length as usize] -= 1; |
| /* The brother of the overflow item also moves one step up, |
| * but this does not affect bl_count[max_length] |
| */ |
| overflow -= 2; |
| |
| if overflow <= 0 { |
| break; |
| } |
| } |
| |
| // Now recompute all bit lengths, scanning in increasing frequency. |
| // h is still equal to HEAP_SIZE. (It is simpler to reconstruct all |
| // lengths instead of fixing only the wrong ones. This idea is taken |
| // from 'ar' written by Haruhiko Okumura.) |
| let mut h = HEAP_SIZE; |
| for bits in (1..=max_length).rev() { |
| let mut n = bl_count[bits as usize]; |
| while n != 0 { |
| h -= 1; |
| let m = heap.heap[h] as usize; |
| if m > max_code { |
| continue; |
| } |
| |
| if tree[m].len() != bits { |
| // Tracev((stderr, "code %d bits %d->%u\n", m, tree[m].Len, bits)); |
| state.opt_len += (bits * tree[m].freq()) as usize; |
| state.opt_len -= (tree[m].len() * tree[m].freq()) as usize; |
| *tree[m].len_mut() = bits; |
| } |
| |
| n -= 1; |
| } |
| } |
| bl_count |
| } |
| |
| /// Checks that symbol is a printing character (excluding space) |
| #[allow(unused)] |
| fn isgraph(c: u8) -> bool { |
| (c > 0x20) && (c <= 0x7E) |
| } |
| |
| fn gen_codes(tree: &mut [Value], max_code: usize, bl_count: &[u16]) { |
| /* tree: the tree to decorate */ |
| /* max_code: largest code with non zero frequency */ |
| /* bl_count: number of codes at each bit length */ |
| let mut next_code = [0; MAX_BITS + 1]; /* next code value for each bit length */ |
| let mut code = 0; /* running code value */ |
| |
| /* The distribution counts are first used to generate the code values |
| * without bit reversal. |
| */ |
| for bits in 1..=MAX_BITS { |
| code = (code + bl_count[bits - 1]) << 1; |
| next_code[bits] = code; |
| } |
| |
| /* Check that the bit counts in bl_count are consistent. The last code |
| * must be all ones. |
| */ |
| assert!( |
| code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1, |
| "inconsistent bit counts" |
| ); |
| |
| trace!("\ngen_codes: max_code {max_code} "); |
| |
| for n in 0..=max_code { |
| let len = tree[n].len(); |
| if len == 0 { |
| continue; |
| } |
| |
| /* Now reverse the bits */ |
| assert!((1..=15).contains(&len), "code length must be 1-15"); |
| *tree[n].code_mut() = next_code[len as usize].reverse_bits() >> (16 - len); |
| next_code[len as usize] += 1; |
| |
| if tree != self::trees_tbl::STATIC_LTREE.as_slice() { |
| trace!( |
| "\nn {:>3} {} l {:>2} c {:>4x} ({:x}) ", |
| n, |
| if isgraph(n as u8) { |
| char::from_u32(n as u32).unwrap() |
| } else { |
| ' ' |
| }, |
| len, |
| tree[n].code(), |
| next_code[len as usize] - 1 |
| ); |
| } |
| } |
| } |
| |
| /// repeat previous bit length 3-6 times (2 bits of repeat count) |
| const REP_3_6: usize = 16; |
| |
| /// repeat a zero length 3-10 times (3 bits of repeat count) |
| const REPZ_3_10: usize = 17; |
| |
| /// repeat a zero length 11-138 times (7 bits of repeat count) |
| const REPZ_11_138: usize = 18; |
| |
| fn scan_tree(bl_desc: &mut TreeDesc<{ 2 * BL_CODES + 1 }>, tree: &mut [Value], max_code: usize) { |
| /* tree: the tree to be scanned */ |
| /* max_code: and its largest code of non zero frequency */ |
| let mut prevlen = -1isize; /* last emitted length */ |
| let mut curlen: isize; /* length of current code */ |
| let mut nextlen = tree[0].len(); /* length of next code */ |
| let mut count = 0; /* repeat count of the current code */ |
| let mut max_count = 7; /* max repeat count */ |
| let mut min_count = 4; /* min repeat count */ |
| |
| if nextlen == 0 { |
| max_count = 138; |
| min_count = 3; |
| } |
| |
| *tree[max_code + 1].len_mut() = 0xffff; /* guard */ |
| |
| let bl_tree = &mut bl_desc.dyn_tree; |
| |
| for n in 0..=max_code { |
| curlen = nextlen as isize; |
| nextlen = tree[n + 1].len(); |
| count += 1; |
| if count < max_count && curlen == nextlen as isize { |
| continue; |
| } else if count < min_count { |
| *bl_tree[curlen as usize].freq_mut() += count; |
| } else if curlen != 0 { |
| if curlen != prevlen { |
| *bl_tree[curlen as usize].freq_mut() += 1; |
| } |
| *bl_tree[REP_3_6].freq_mut() += 1; |
| } else if count <= 10 { |
| *bl_tree[REPZ_3_10].freq_mut() += 1; |
| } else { |
| *bl_tree[REPZ_11_138].freq_mut() += 1; |
| } |
| |
| count = 0; |
| prevlen = curlen; |
| |
| if nextlen == 0 { |
| max_count = 138; |
| min_count = 3; |
| } else if curlen == nextlen as isize { |
| max_count = 6; |
| min_count = 3; |
| } else { |
| max_count = 7; |
| min_count = 4; |
| } |
| } |
| } |
| |
| fn send_all_trees(state: &mut State, lcodes: usize, dcodes: usize, blcodes: usize) { |
| assert!( |
| lcodes >= 257 && dcodes >= 1 && blcodes >= 4, |
| "not enough codes" |
| ); |
| assert!( |
| lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, |
| "too many codes" |
| ); |
| |
| trace!("\nbl counts: "); |
| state.bit_writer.send_bits(lcodes as u64 - 257, 5); /* not +255 as stated in appnote.txt */ |
| state.bit_writer.send_bits(dcodes as u64 - 1, 5); |
| state.bit_writer.send_bits(blcodes as u64 - 4, 4); /* not -3 as stated in appnote.txt */ |
| |
| for rank in 0..blcodes { |
| trace!("\nbl code {:>2} ", StaticTreeDesc::BL_ORDER[rank]); |
| state.bit_writer.send_bits( |
| state.bl_desc.dyn_tree[StaticTreeDesc::BL_ORDER[rank] as usize].len() as u64, |
| 3, |
| ); |
| } |
| trace!("\nbl tree: sent {}", state.bit_writer.bits_sent); |
| |
| // literal tree |
| state |
| .bit_writer |
| .send_tree(&state.l_desc.dyn_tree, &state.bl_desc.dyn_tree, lcodes - 1); |
| trace!("\nlit tree: sent {}", state.bit_writer.bits_sent); |
| |
| // distance tree |
| state |
| .bit_writer |
| .send_tree(&state.d_desc.dyn_tree, &state.bl_desc.dyn_tree, dcodes - 1); |
| trace!("\ndist tree: sent {}", state.bit_writer.bits_sent); |
| } |
| |
| /// Construct the Huffman tree for the bit lengths and return the index in |
| /// bl_order of the last bit length code to send. |
| fn build_bl_tree(state: &mut State) -> usize { |
| /* Determine the bit length frequencies for literal and distance trees */ |
| |
| scan_tree( |
| &mut state.bl_desc, |
| &mut state.l_desc.dyn_tree, |
| state.l_desc.max_code, |
| ); |
| |
| scan_tree( |
| &mut state.bl_desc, |
| &mut state.d_desc.dyn_tree, |
| state.d_desc.max_code, |
| ); |
| |
| /* Build the bit length tree: */ |
| { |
| let mut tmp = TreeDesc::EMPTY; |
| core::mem::swap(&mut tmp, &mut state.bl_desc); |
| build_tree(state, &mut tmp); |
| core::mem::swap(&mut tmp, &mut state.bl_desc); |
| } |
| |
| /* opt_len now includes the length of the tree representations, except |
| * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. |
| */ |
| |
| /* Determine the number of bit length codes to send. The pkzip format |
| * requires that at least 4 bit length codes be sent. (appnote.txt says |
| * 3 but the actual value used is 4.) |
| */ |
| let mut max_blindex = BL_CODES - 1; |
| while max_blindex >= 3 { |
| let index = StaticTreeDesc::BL_ORDER[max_blindex] as usize; |
| if state.bl_desc.dyn_tree[index].len() != 0 { |
| break; |
| } |
| |
| max_blindex -= 1; |
| } |
| |
| /* Update opt_len to include the bit length tree and counts */ |
| state.opt_len += 3 * (max_blindex + 1) + 5 + 5 + 4; |
| trace!( |
| "\ndyn trees: dyn {}, stat {}", |
| state.opt_len, |
| state.static_len |
| ); |
| |
| max_blindex |
| } |
| |
| fn zng_tr_flush_block( |
| stream: &mut DeflateStream, |
| window_offset: Option<usize>, |
| stored_len: u32, |
| last: bool, |
| ) { |
| /* window_offset: offset of the input block into the window */ |
| /* stored_len: length of input block */ |
| /* last: one if this is the last block for a file */ |
| |
| let mut opt_lenb; |
| let static_lenb; |
| let mut max_blindex = 0; |
| |
| let state = &mut stream.state; |
| |
| if state.sym_buf.is_empty() { |
| opt_lenb = 0; |
| static_lenb = 0; |
| state.static_len = 7; |
| } else if state.level > 0 { |
| if stream.data_type == DataType::Unknown as i32 { |
| stream.data_type = State::detect_data_type(&state.l_desc.dyn_tree) as i32; |
| } |
| |
| { |
| let mut tmp = TreeDesc::EMPTY; |
| core::mem::swap(&mut tmp, &mut state.l_desc); |
| |
| build_tree(state, &mut tmp); |
| core::mem::swap(&mut tmp, &mut state.l_desc); |
| |
| trace!( |
| "\nlit data: dyn {}, stat {}", |
| state.opt_len, |
| state.static_len |
| ); |
| } |
| |
| { |
| let mut tmp = TreeDesc::EMPTY; |
| core::mem::swap(&mut tmp, &mut state.d_desc); |
| build_tree(state, &mut tmp); |
| core::mem::swap(&mut tmp, &mut state.d_desc); |
| |
| trace!( |
| "\ndist data: dyn {}, stat {}", |
| state.opt_len, |
| state.static_len |
| ); |
| } |
| |
| // Build the bit length tree for the above two trees, and get the index |
| // in bl_order of the last bit length code to send. |
| max_blindex = build_bl_tree(state); |
| |
| // Determine the best encoding. Compute the block lengths in bytes. |
| opt_lenb = (state.opt_len + 3 + 7) >> 3; |
| static_lenb = (state.static_len + 3 + 7) >> 3; |
| |
| trace!( |
| "\nopt {}({}) stat {}({}) stored {} lit {} ", |
| opt_lenb, |
| state.opt_len, |
| static_lenb, |
| state.static_len, |
| stored_len, |
| state.sym_buf.len() / 3 |
| ); |
| |
| if static_lenb <= opt_lenb || state.strategy == Strategy::Fixed { |
| opt_lenb = static_lenb; |
| } |
| } else { |
| assert!(window_offset.is_some(), "lost buf"); |
| /* force a stored block */ |
| opt_lenb = stored_len as usize + 5; |
| static_lenb = stored_len as usize + 5; |
| } |
| |
| if stored_len as usize + 4 <= opt_lenb && window_offset.is_some() { |
| /* 4: two words for the lengths |
| * The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. |
| * Otherwise we can't have processed more than WSIZE input bytes since |
| * the last block flush, because compression would have been |
| * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to |
| * transform a block into a stored block. |
| */ |
| let window_offset = window_offset.unwrap(); |
| let range = window_offset..window_offset + stored_len as usize; |
| zng_tr_stored_block(state, range, last); |
| } else if static_lenb == opt_lenb { |
| state.bit_writer.emit_tree(BlockType::StaticTrees, last); |
| state.compress_block_static_trees(); |
| // cmpr_bits_add(s, s.static_len); |
| } else { |
| state.bit_writer.emit_tree(BlockType::DynamicTrees, last); |
| send_all_trees( |
| state, |
| state.l_desc.max_code + 1, |
| state.d_desc.max_code + 1, |
| max_blindex + 1, |
| ); |
| |
| state.compress_block_dynamic_trees(); |
| } |
| |
| // TODO |
| // This check is made mod 2^32, for files larger than 512 MB and unsigned long implemented on 32 bits. |
| // assert_eq!(state.compressed_len, state.bits_sent, "bad compressed size"); |
| |
| state.init_block(); |
| if last { |
| state.bit_writer.emit_align(); |
| } |
| |
| // Tracev((stderr, "\ncomprlen {}(%lu) ", s->compressed_len>>3, s->compressed_len-7*last)); |
| } |
| |
| pub(crate) fn flush_block_only(stream: &mut DeflateStream, is_last: bool) { |
| zng_tr_flush_block( |
| stream, |
| (stream.state.block_start >= 0).then_some(stream.state.block_start as usize), |
| (stream.state.strstart as isize - stream.state.block_start) as u32, |
| is_last, |
| ); |
| |
| stream.state.block_start = stream.state.strstart as isize; |
| flush_pending(stream) |
| } |
| |
| fn flush_bytes(stream: &mut DeflateStream, mut bytes: &[u8]) -> ControlFlow<ReturnCode> { |
| let mut state = &mut stream.state; |
| |
| // we'll be using the pending buffer as temporary storage |
| let mut beg = state.bit_writer.pending.pending().len(); /* start of bytes to update crc */ |
| |
| while state.bit_writer.pending.remaining() < bytes.len() { |
| let copy = state.bit_writer.pending.remaining(); |
| |
| state.bit_writer.pending.extend(&bytes[..copy]); |
| |
| stream.adler = crc32( |
| stream.adler as u32, |
| &state.bit_writer.pending.pending()[beg..], |
| ) as z_checksum; |
| |
| state.gzindex += copy; |
| flush_pending(stream); |
| state = &mut stream.state; |
| |
| // could not flush all the pending output |
| if !state.bit_writer.pending.pending().is_empty() { |
| state.last_flush = -1; |
| return ControlFlow::Break(ReturnCode::Ok); |
| } |
| |
| beg = 0; |
| bytes = &bytes[copy..]; |
| } |
| |
| state.bit_writer.pending.extend(bytes); |
| |
| stream.adler = crc32( |
| stream.adler as u32, |
| &state.bit_writer.pending.pending()[beg..], |
| ) as z_checksum; |
| state.gzindex = 0; |
| |
| ControlFlow::Continue(()) |
| } |
| |
| pub fn deflate(stream: &mut DeflateStream, flush: DeflateFlush) -> ReturnCode { |
| if stream.next_out.is_null() |
| || (stream.avail_in != 0 && stream.next_in.is_null()) |
| || (stream.state.status == Status::Finish && flush != DeflateFlush::Finish) |
| { |
| let err = ReturnCode::StreamError; |
| stream.msg = err.error_message(); |
| return err; |
| } |
| |
| if stream.avail_out == 0 { |
| let err = ReturnCode::BufError; |
| stream.msg = err.error_message(); |
| return err; |
| } |
| |
| let old_flush = stream.state.last_flush; |
| stream.state.last_flush = flush as i8; |
| |
| /* Flush as much pending output as possible */ |
| if !stream.state.bit_writer.pending.pending().is_empty() { |
| flush_pending(stream); |
| if stream.avail_out == 0 { |
| /* Since avail_out is 0, deflate will be called again with |
| * more output space, but possibly with both pending and |
| * avail_in equal to zero. There won't be anything to do, |
| * but this is not an error situation so make sure we |
| * return OK instead of BUF_ERROR at next call of deflate: |
| */ |
| stream.state.last_flush = -1; |
| return ReturnCode::Ok; |
| } |
| |
| /* Make sure there is something to do and avoid duplicate consecutive |
| * flushes. For repeated and useless calls with Z_FINISH, we keep |
| * returning Z_STREAM_END instead of Z_BUF_ERROR. |
| */ |
| } else if stream.avail_in == 0 |
| && rank_flush(flush as i8) <= rank_flush(old_flush) |
| && flush != DeflateFlush::Finish |
| { |
| let err = ReturnCode::BufError; |
| stream.msg = err.error_message(); |
| return err; |
| } |
| |
| /* User must not provide more input after the first FINISH: */ |
| if stream.state.status == Status::Finish && stream.avail_in != 0 { |
| let err = ReturnCode::BufError; |
| stream.msg = err.error_message(); |
| return err; |
| } |
| |
| /* Write the header */ |
| if stream.state.status == Status::Init && stream.state.wrap == 0 { |
| stream.state.status = Status::Busy; |
| } |
| |
| if stream.state.status == Status::Init { |
| let header = stream.state.header(); |
| stream |
| .state |
| .bit_writer |
| .pending |
| .extend(&header.to_be_bytes()); |
| |
| /* Save the adler32 of the preset dictionary: */ |
| if stream.state.strstart != 0 { |
| let adler = stream.adler as u32; |
| stream.state.bit_writer.pending.extend(&adler.to_be_bytes()); |
| } |
| |
| stream.adler = ADLER32_INITIAL_VALUE as _; |
| stream.state.status = Status::Busy; |
| |
| // compression must start with an empty pending buffer |
| flush_pending(stream); |
| |
| if !stream.state.bit_writer.pending.pending().is_empty() { |
| stream.state.last_flush = -1; |
| |
| return ReturnCode::Ok; |
| } |
| } |
| |
| if stream.state.status == Status::GZip { |
| /* gzip header */ |
| stream.state.crc_fold = Crc32Fold::new(); |
| |
| stream.state.bit_writer.pending.extend(&[31, 139, 8]); |
| |
| let extra_flags = if stream.state.level == 9 { |
| 2 |
| } else if stream.state.strategy >= Strategy::HuffmanOnly || stream.state.level < 2 { |
| 4 |
| } else { |
| 0 |
| }; |
| |
| match &stream.state.gzhead { |
| None => { |
| let bytes = [0, 0, 0, 0, 0, extra_flags, gz_header::OS_CODE]; |
| stream.state.bit_writer.pending.extend(&bytes); |
| stream.state.status = Status::Busy; |
| |
| /* Compression must start with an empty pending buffer */ |
| flush_pending(stream); |
| if !stream.state.bit_writer.pending.pending().is_empty() { |
| stream.state.last_flush = -1; |
| return ReturnCode::Ok; |
| } |
| } |
| Some(gzhead) => { |
| stream.state.bit_writer.pending.extend(&[gzhead.flags()]); |
| let bytes = (gzhead.time as u32).to_le_bytes(); |
| stream.state.bit_writer.pending.extend(&bytes); |
| stream |
| .state |
| .bit_writer |
| .pending |
| .extend(&[extra_flags, gzhead.os as u8]); |
| |
| if !gzhead.extra.is_null() { |
| let bytes = (gzhead.extra_len as u16).to_le_bytes(); |
| stream.state.bit_writer.pending.extend(&bytes); |
| } |
| |
| if gzhead.hcrc != 0 { |
| stream.adler = crc32( |
| stream.adler as u32, |
| stream.state.bit_writer.pending.pending(), |
| ) as z_checksum |
| } |
| |
| stream.state.gzindex = 0; |
| stream.state.status = Status::Extra; |
| } |
| } |
| } |
| |
| if stream.state.status == Status::Extra { |
| if let Some(gzhead) = stream.state.gzhead.as_ref() { |
| if !gzhead.extra.is_null() { |
| let gzhead_extra = gzhead.extra; |
| |
| let extra = unsafe { |
| core::slice::from_raw_parts( |
| // SAFETY: gzindex is always less than extra_len, and the user |
| // guarantees the pointer is valid for extra_len. |
| gzhead_extra.add(stream.state.gzindex), |
| (gzhead.extra_len & 0xffff) as usize - stream.state.gzindex, |
| ) |
| }; |
| |
| if let ControlFlow::Break(err) = flush_bytes(stream, extra) { |
| return err; |
| } |
| } |
| } |
| stream.state.status = Status::Name; |
| } |
| |
| if stream.state.status == Status::Name { |
| if let Some(gzhead) = stream.state.gzhead.as_ref() { |
| if !gzhead.name.is_null() { |
| // SAFETY: user satisfies precondition that gzhead.name is a C string. |
| let gzhead_name = unsafe { CStr::from_ptr(gzhead.name.cast()) }; |
| let bytes = gzhead_name.to_bytes_with_nul(); |
| if let ControlFlow::Break(err) = flush_bytes(stream, bytes) { |
| return err; |
| } |
| } |
| stream.state.status = Status::Comment; |
| } |
| } |
| |
| if stream.state.status == Status::Comment { |
| if let Some(gzhead) = stream.state.gzhead.as_ref() { |
| if !gzhead.comment.is_null() { |
| // SAFETY: user satisfies precondition that gzhead.name is a C string. |
| let gzhead_comment = unsafe { CStr::from_ptr(gzhead.comment.cast()) }; |
| let bytes = gzhead_comment.to_bytes_with_nul(); |
| if let ControlFlow::Break(err) = flush_bytes(stream, bytes) { |
| return err; |
| } |
| } |
| stream.state.status = Status::Hcrc; |
| } |
| } |
| |
| if stream.state.status == Status::Hcrc { |
| if let Some(gzhead) = stream.state.gzhead.as_ref() { |
| if gzhead.hcrc != 0 { |
| let bytes = (stream.adler as u16).to_le_bytes(); |
| if let ControlFlow::Break(err) = flush_bytes(stream, &bytes) { |
| return err; |
| } |
| } |
| } |
| |
| stream.state.status = Status::Busy; |
| |
| // compression must start with an empty pending buffer |
| flush_pending(stream); |
| if !stream.state.bit_writer.pending.pending().is_empty() { |
| stream.state.last_flush = -1; |
| return ReturnCode::Ok; |
| } |
| } |
| |
| // Start a new block or continue the current one. |
| let state = &mut stream.state; |
| if stream.avail_in != 0 |
| || state.lookahead != 0 |
| || (flush != DeflateFlush::NoFlush && state.status != Status::Finish) |
| { |
| let bstate = self::algorithm::run(stream, flush); |
| |
| let state = &mut stream.state; |
| |
| if matches!(bstate, BlockState::FinishStarted | BlockState::FinishDone) { |
| state.status = Status::Finish; |
| } |
| |
| match bstate { |
| BlockState::NeedMore | BlockState::FinishStarted => { |
| if stream.avail_out == 0 { |
| state.last_flush = -1; /* avoid BUF_ERROR next call, see above */ |
| } |
| return ReturnCode::Ok; |
| /* If flush != Z_NO_FLUSH && avail_out == 0, the next call |
| * of deflate should use the same flush parameter to make sure |
| * that the flush is complete. So we don't have to output an |
| * empty block here, this will be done at next call. This also |
| * ensures that for a very small output buffer, we emit at most |
| * one empty block. |
| */ |
| } |
| BlockState::BlockDone => { |
| match flush { |
| DeflateFlush::NoFlush => unreachable!("condition of inner surrounding if"), |
| DeflateFlush::PartialFlush => { |
| state.bit_writer.align(); |
| } |
| DeflateFlush::SyncFlush => { |
| // add an empty stored block that is marked as not final. This is useful for |
| // parallel deflate where we want to make sure the intermediate blocks are not |
| // marked as "last block". |
| zng_tr_stored_block(state, 0..0, false); |
| } |
| DeflateFlush::FullFlush => { |
| // add an empty stored block that is marked as not final. This is useful for |
| // parallel deflate where we want to make sure the intermediate blocks are not |
| // marked as "last block". |
| zng_tr_stored_block(state, 0..0, false); |
| |
| state.head.as_mut_slice().fill(0); // forget history |
| |
| if state.lookahead == 0 { |
| state.strstart = 0; |
| state.block_start = 0; |
| state.insert = 0; |
| } |
| } |
| DeflateFlush::Block => { /* fall through */ } |
| DeflateFlush::Finish => unreachable!("condition of outer surrounding if"), |
| } |
| |
| flush_pending(stream); |
| |
| if stream.avail_out == 0 { |
| stream.state.last_flush = -1; /* avoid BUF_ERROR at next call, see above */ |
| return ReturnCode::Ok; |
| } |
| } |
| BlockState::FinishDone => { /* do nothing */ } |
| } |
| } |
| |
| if flush != DeflateFlush::Finish { |
| return ReturnCode::Ok; |
| } |
| |
| // write the trailer |
| if stream.state.wrap == 2 { |
| let crc_fold = core::mem::take(&mut stream.state.crc_fold); |
| stream.adler = crc_fold.finish() as z_checksum; |
| |
| let adler = stream.adler as u32; |
| stream.state.bit_writer.pending.extend(&adler.to_le_bytes()); |
| |
| let total_in = stream.total_in as u32; |
| stream |
| .state |
| .bit_writer |
| .pending |
| .extend(&total_in.to_le_bytes()); |
| } else if stream.state.wrap == 1 { |
| let adler = stream.adler as u32; |
| stream.state.bit_writer.pending.extend(&adler.to_be_bytes()); |
| } |
| |
| flush_pending(stream); |
| |
| // If avail_out is zero, the application will call deflate again to flush the rest. |
| if stream.state.wrap > 0 { |
| stream.state.wrap = -stream.state.wrap; /* write the trailer only once! */ |
| } |
| |
| if stream.state.bit_writer.pending.pending().is_empty() { |
| assert_eq!(stream.state.bit_writer.bits_used, 0, "bi_buf not flushed"); |
| return ReturnCode::StreamEnd; |
| } |
| ReturnCode::Ok |
| } |
| |
| pub(crate) fn flush_pending(stream: &mut DeflateStream) { |
| let state = &mut stream.state; |
| |
| state.bit_writer.flush_bits(); |
| |
| let pending = state.bit_writer.pending.pending(); |
| let len = Ord::min(pending.len(), stream.avail_out as usize); |
| |
| if len == 0 { |
| return; |
| } |
| |
| trace!("\n[FLUSH {len} bytes]"); |
| // SAFETY: len is min(pending, stream.avail_out), so we won't overrun next_out. |
| unsafe { core::ptr::copy_nonoverlapping(pending.as_ptr(), stream.next_out, len) }; |
| |
| stream.next_out = stream.next_out.wrapping_add(len); |
| stream.total_out += len as crate::c_api::z_size; |
| stream.avail_out -= len as crate::c_api::uInt; |
| |
| state.bit_writer.pending.advance(len); |
| } |
| |
| pub fn compress_slice<'a>( |
| output: &'a mut [u8], |
| input: &[u8], |
| config: DeflateConfig, |
| ) -> (&'a mut [u8], ReturnCode) { |
| // SAFETY: a [u8] is a valid [MaybeUninit<u8>]. |
| let output_uninit = unsafe { |
| core::slice::from_raw_parts_mut(output.as_mut_ptr() as *mut MaybeUninit<u8>, output.len()) |
| }; |
| |
| compress(output_uninit, input, config) |
| } |
| |
| pub fn compress<'a>( |
| output: &'a mut [MaybeUninit<u8>], |
| input: &[u8], |
| config: DeflateConfig, |
| ) -> (&'a mut [u8], ReturnCode) { |
| compress_with_flush(output, input, config, DeflateFlush::Finish) |
| } |
| |
| pub fn compress_slice_with_flush<'a>( |
| output: &'a mut [u8], |
| input: &[u8], |
| config: DeflateConfig, |
| flush: DeflateFlush, |
| ) -> (&'a mut [u8], ReturnCode) { |
| // SAFETY: a [u8] is a valid [MaybeUninit<u8>], and `compress_with_flush` never uninitializes previously initialized memory. |
| let output_uninit = unsafe { |
| core::slice::from_raw_parts_mut(output.as_mut_ptr() as *mut MaybeUninit<u8>, output.len()) |
| }; |
| |
| compress_with_flush(output_uninit, input, config, flush) |
| } |
| |
| pub fn compress_with_flush<'a>( |
| output: &'a mut [MaybeUninit<u8>], |
| input: &[u8], |
| config: DeflateConfig, |
| final_flush: DeflateFlush, |
| ) -> (&'a mut [u8], ReturnCode) { |
| let mut stream = z_stream { |
| next_in: input.as_ptr() as *mut u8, |
| avail_in: 0, // for special logic in the first iteration |
| total_in: 0, |
| next_out: output.as_mut_ptr() as *mut u8, |
| avail_out: 0, // for special logic on the first iteration |
| total_out: 0, |
| msg: core::ptr::null_mut(), |
| state: core::ptr::null_mut(), |
| zalloc: None, |
| zfree: None, |
| opaque: core::ptr::null_mut(), |
| data_type: 0, |
| adler: 0, |
| reserved: 0, |
| }; |
| |
| let err = init(&mut stream, config); |
| if err != ReturnCode::Ok { |
| return (&mut [], err); |
| } |
| |
| let max = core::ffi::c_uint::MAX as usize; |
| |
| let mut left = output.len(); |
| let mut source_len = input.len(); |
| |
| loop { |
| if stream.avail_out == 0 { |
| stream.avail_out = Ord::min(left, max) as _; |
| left -= stream.avail_out as usize; |
| } |
| |
| if stream.avail_in == 0 { |
| stream.avail_in = Ord::min(source_len, max) as _; |
| source_len -= stream.avail_in as usize; |
| } |
| |
| let flush = if source_len > 0 { |
| DeflateFlush::NoFlush |
| } else { |
| final_flush |
| }; |
| |
| let err = if let Some(stream) = unsafe { DeflateStream::from_stream_mut(&mut stream) } { |
| deflate(stream, flush) |
| } else { |
| ReturnCode::StreamError |
| }; |
| |
| if err != ReturnCode::Ok { |
| break; |
| } |
| } |
| |
| // SAFETY: we have now initialized these bytes |
| let output_slice = unsafe { |
| core::slice::from_raw_parts_mut(output.as_mut_ptr() as *mut u8, stream.total_out as usize) |
| }; |
| |
| // may DataError if insufficient output space |
| let return_code = if let Some(stream) = unsafe { DeflateStream::from_stream_mut(&mut stream) } { |
| match end(stream) { |
| Ok(_) => ReturnCode::Ok, |
| Err(_) => ReturnCode::DataError, |
| } |
| } else { |
| ReturnCode::Ok |
| }; |
| |
| (output_slice, return_code) |
| } |
| |
| pub const fn compress_bound(source_len: usize) -> usize { |
| compress_bound_help(source_len, ZLIB_WRAPLEN) |
| } |
| |
| const fn compress_bound_help(source_len: usize, wrap_len: usize) -> usize { |
| source_len // The source size itself */ |
| // Always at least one byte for any input |
| .wrapping_add(if source_len == 0 { 1 } else { 0 }) |
| // One extra byte for lengths less than 9 |
| .wrapping_add(if source_len < 9 { 1 } else { 0 }) |
| // Source encoding overhead, padded to next full byte |
| .wrapping_add(deflate_quick_overhead(source_len)) |
| // Deflate block overhead bytes |
| .wrapping_add(DEFLATE_BLOCK_OVERHEAD) |
| // none, zlib or gzip wrapper |
| .wrapping_add(wrap_len) |
| } |
| |
| /// heap used to build the Huffman trees |
| /// |
| /// The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. |
| /// The same heap array is used to build all trees. |
| #[derive(Clone)] |
| struct Heap { |
| heap: [u32; 2 * L_CODES + 1], |
| |
| /// number of elements in the heap |
| heap_len: usize, |
| |
| /// element of the largest frequency |
| heap_max: usize, |
| |
| depth: [u8; 2 * L_CODES + 1], |
| } |
| |
| impl Heap { |
| // an empty heap |
| fn new() -> Self { |
| Self { |
| heap: [0; 2 * L_CODES + 1], |
| heap_len: 0, |
| heap_max: 0, |
| depth: [0; 2 * L_CODES + 1], |
| } |
| } |
| |
| /// Construct the initial heap, with least frequent element in |
| /// heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. |
| fn initialize(&mut self, tree: &mut [Value]) -> isize { |
| let mut max_code = -1; |
| |
| self.heap_len = 0; |
| self.heap_max = HEAP_SIZE; |
| |
| for (n, node) in tree.iter_mut().enumerate() { |
| if node.freq() > 0 { |
| self.heap_len += 1; |
| self.heap[self.heap_len] = n as u32; |
| max_code = n as isize; |
| self.depth[n] = 0; |
| } else { |
| *node.len_mut() = 0; |
| } |
| } |
| |
| max_code |
| } |
| |
| /// Index within the heap array of least frequent node in the Huffman tree |
| const SMALLEST: usize = 1; |
| |
| fn pqdownheap(&mut self, tree: &[Value], mut k: usize) { |
| /* tree: the tree to restore */ |
| /* k: node to move down */ |
| |
| // Given the index $i of a node in the tree, pack the node's frequency and depth |
| // into a single integer. The heap ordering logic uses a primary sort on frequency |
| // and a secondary sort on depth, so packing both into one integer makes it |
| // possible to sort with fewer comparison operations. |
| macro_rules! freq_and_depth { |
| ($i:expr) => { |
| (tree[$i as usize].freq() as u32) << 8 | self.depth[$i as usize] as u32 |
| }; |
| } |
| |
| let v = self.heap[k]; |
| let v_val = freq_and_depth!(v); |
| let mut j = k << 1; /* left son of k */ |
| |
| while j <= self.heap_len { |
| /* Set j to the smallest of the two sons: */ |
| let mut j_val = freq_and_depth!(self.heap[j]); |
| if j < self.heap_len { |
| let j1_val = freq_and_depth!(self.heap[j + 1]); |
| if j1_val <= j_val { |
| j += 1; |
| j_val = j1_val; |
| } |
| } |
| |
| /* Exit if v is smaller than both sons */ |
| if v_val <= j_val { |
| break; |
| } |
| |
| /* Exchange v with the smallest son */ |
| self.heap[k] = self.heap[j]; |
| k = j; |
| |
| /* And continue down the tree, setting j to the left son of k */ |
| j <<= 1; |
| } |
| |
| self.heap[k] = v; |
| } |
| |
| /// Remove the smallest element from the heap and recreate the heap with |
| /// one less element. Updates heap and heap_len. |
| fn pqremove(&mut self, tree: &[Value]) -> u32 { |
| let top = self.heap[Self::SMALLEST]; |
| self.heap[Self::SMALLEST] = self.heap[self.heap_len]; |
| self.heap_len -= 1; |
| |
| self.pqdownheap(tree, Self::SMALLEST); |
| |
| top |
| } |
| |
| /// Construct the Huffman tree by repeatedly combining the least two frequent nodes. |
| fn construct_huffman_tree(&mut self, tree: &mut [Value], mut node: usize) { |
| loop { |
| let n = self.pqremove(tree) as usize; /* n = node of least frequency */ |
| let m = self.heap[Heap::SMALLEST] as usize; /* m = node of next least frequency */ |
| |
| self.heap_max -= 1; |
| self.heap[self.heap_max] = n as u32; /* keep the nodes sorted by frequency */ |
| self.heap_max -= 1; |
| self.heap[self.heap_max] = m as u32; |
| |
| /* Create a new node father of n and m */ |
| *tree[node].freq_mut() = tree[n].freq() + tree[m].freq(); |
| self.depth[node] = Ord::max(self.depth[n], self.depth[m]) + 1; |
| |
| *tree[n].dad_mut() = node as u16; |
| *tree[m].dad_mut() = node as u16; |
| |
| /* and insert the new node in the heap */ |
| self.heap[Heap::SMALLEST] = node as u32; |
| node += 1; |
| |
| self.pqdownheap(tree, Heap::SMALLEST); |
| |
| if self.heap_len < 2 { |
| break; |
| } |
| } |
| |
| self.heap_max -= 1; |
| self.heap[self.heap_max] = self.heap[Heap::SMALLEST]; |
| } |
| } |
| |
| /// # Safety |
| /// |
| /// The caller must guarantee: |
| /// |
| /// * If `head` is `Some` |
| /// - `head.extra` is `NULL` or is readable for at least `head.extra_len` bytes |
| /// - `head.name` is `NULL` or satisfies the requirements of [`core::ffi::CStr::from_ptr`] |
| /// - `head.comment` is `NULL` or satisfies the requirements of [`core::ffi::CStr::from_ptr`] |
| pub unsafe fn set_header<'a>( |
| stream: &mut DeflateStream<'a>, |
| head: Option<&'a mut gz_header>, |
| ) -> ReturnCode { |
| if stream.state.wrap != 2 { |
| ReturnCode::StreamError as _ |
| } else { |
| stream.state.gzhead = head; |
| ReturnCode::Ok as _ |
| } |
| } |
| |
| // zlib format overhead |
| const ZLIB_WRAPLEN: usize = 6; |
| // gzip format overhead |
| const GZIP_WRAPLEN: usize = 18; |
| |
| const DEFLATE_HEADER_BITS: usize = 3; |
| const DEFLATE_EOBS_BITS: usize = 15; |
| const DEFLATE_PAD_BITS: usize = 6; |
| const DEFLATE_BLOCK_OVERHEAD: usize = |
| (DEFLATE_HEADER_BITS + DEFLATE_EOBS_BITS + DEFLATE_PAD_BITS) >> 3; |
| |
| const DEFLATE_QUICK_LIT_MAX_BITS: usize = 9; |
| const fn deflate_quick_overhead(x: usize) -> usize { |
| let sum = x |
| .wrapping_mul(DEFLATE_QUICK_LIT_MAX_BITS - 8) |
| .wrapping_add(7); |
| |
| // imitate zlib-ng rounding behavior (on windows, c_ulong is 32 bits) |
| (sum as core::ffi::c_ulong >> 3) as usize |
| } |
| |
| /// For the default windowBits of 15 and memLevel of 8, this function returns |
| /// a close to exact, as well as small, upper bound on the compressed size. |
| /// They are coded as constants here for a reason--if the #define's are |
| /// changed, then this function needs to be changed as well. The return |
| /// value for 15 and 8 only works for those exact settings. |
| /// |
| /// For any setting other than those defaults for windowBits and memLevel, |
| /// the value returned is a conservative worst case for the maximum expansion |
| /// resulting from using fixed blocks instead of stored blocks, which deflate |
| /// can emit on compressed data for some combinations of the parameters. |
| /// |
| /// This function could be more sophisticated to provide closer upper bounds for |
| /// every combination of windowBits and memLevel. But even the conservative |
| /// upper bound of about 14% expansion does not seem onerous for output buffer |
| /// allocation. |
| pub fn bound(stream: Option<&mut DeflateStream>, source_len: usize) -> usize { |
| // on windows, c_ulong is only a 32-bit integer |
| let mask = core::ffi::c_ulong::MAX as usize; |
| |
| // conservative upper bound for compressed data |
| let comp_len = source_len |
| .wrapping_add((source_len.wrapping_add(7) & mask) >> 3) |
| .wrapping_add((source_len.wrapping_add(63) & mask) >> 6) |
| .wrapping_add(5); |
| |
| let Some(stream) = stream else { |
| // return conservative bound plus zlib wrapper |
| return comp_len.wrapping_add(6); |
| }; |
| |
| /* compute wrapper length */ |
| let wrap_len = match stream.state.wrap { |
| 0 => { |
| // raw deflate |
| 0 |
| } |
| 1 => { |
| // zlib wrapper |
| if stream.state.strstart != 0 { |
| ZLIB_WRAPLEN + 4 |
| } else { |
| ZLIB_WRAPLEN |
| } |
| } |
| 2 => { |
| // gzip wrapper |
| let mut gz_wrap_len = GZIP_WRAPLEN; |
| |
| if let Some(header) = &stream.state.gzhead { |
| if !header.extra.is_null() { |
| gz_wrap_len += 2 + header.extra_len as usize; |
| } |
| |
| let mut c_string = header.name; |
| if !c_string.is_null() { |
| loop { |
| gz_wrap_len += 1; |
| // SAFETY: user guarantees header.name is a valid C string. |
| unsafe { |
| if *c_string == 0 { |
| break; |
| } |
| c_string = c_string.add(1); |
| } |
| } |
| } |
| |
| let mut c_string = header.comment; |
| if !c_string.is_null() { |
| loop { |
| gz_wrap_len += 1; |
| // SAFETY: user guarantees header.comment is a valid C string. |
| unsafe { |
| if *c_string == 0 { |
| break; |
| } |
| c_string = c_string.add(1); |
| } |
| } |
| } |
| |
| if header.hcrc != 0 { |
| gz_wrap_len += 2; |
| } |
| } |
| |
| gz_wrap_len |
| } |
| _ => { |
| // default |
| ZLIB_WRAPLEN |
| } |
| }; |
| |
| if stream.state.w_bits() != MAX_WBITS as u32 || HASH_BITS < 15 { |
| if stream.state.level == 0 { |
| /* upper bound for stored blocks with length 127 (memLevel == 1) ~4% overhead plus a small constant */ |
| source_len |
| .wrapping_add(source_len >> 5) |
| .wrapping_add(source_len >> 7) |
| .wrapping_add(source_len >> 11) |
| .wrapping_add(7) |
| .wrapping_add(wrap_len) |
| } else { |
| comp_len.wrapping_add(wrap_len) |
| } |
| } else { |
| compress_bound_help(source_len, wrap_len) |
| } |
| } |
| |
| /// # Safety |
| /// |
| /// The `dictionary` must have enough space for the dictionary. |
| pub unsafe fn get_dictionary(stream: &DeflateStream<'_>, dictionary: *mut u8) -> usize { |
| let s = &stream.state; |
| let len = Ord::min(s.strstart + s.lookahead, s.w_size); |
| |
| if !dictionary.is_null() && len > 0 { |
| unsafe { |
| core::ptr::copy_nonoverlapping( |
| s.window.as_ptr().add(s.strstart + s.lookahead - len), |
| dictionary, |
| len, |
| ); |
| } |
| } |
| |
| len |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use crate::{ |
| inflate::{uncompress_slice, InflateConfig, InflateStream}, |
| InflateFlush, |
| }; |
| |
| use super::*; |
| |
| use core::{ffi::CStr, sync::atomic::AtomicUsize}; |
| |
| #[test] |
| fn detect_data_type_basic() { |
| let empty = || [Value::new(0, 0); LITERALS]; |
| |
| assert_eq!(State::detect_data_type(&empty()), DataType::Binary); |
| |
| let mut binary = empty(); |
| binary[0] = Value::new(1, 0); |
| assert_eq!(State::detect_data_type(&binary), DataType::Binary); |
| |
| let mut text = empty(); |
| text[b'\r' as usize] = Value::new(1, 0); |
| assert_eq!(State::detect_data_type(&text), DataType::Text); |
| |
| let mut text = empty(); |
| text[b'a' as usize] = Value::new(1, 0); |
| assert_eq!(State::detect_data_type(&text), DataType::Text); |
| |
| let mut non_text = empty(); |
| non_text[7] = Value::new(1, 0); |
| assert_eq!(State::detect_data_type(&non_text), DataType::Binary); |
| } |
| |
| #[test] |
| fn from_stream_mut() { |
| unsafe { |
| assert!(DeflateStream::from_stream_mut(core::ptr::null_mut()).is_none()); |
| |
| let mut stream = z_stream::default(); |
| assert!(DeflateStream::from_stream_mut(&mut stream).is_none()); |
| |
| // state is still NULL |
| assert!(DeflateStream::from_stream_mut(&mut stream).is_none()); |
| |
| init(&mut stream, DeflateConfig::default()); |
| let stream = DeflateStream::from_stream_mut(&mut stream); |
| assert!(stream.is_some()); |
| |
| assert!(end(stream.unwrap()).is_ok()); |
| } |
| } |
| |
| unsafe extern "C" fn fail_nth_allocation<const N: usize>( |
| opaque: crate::c_api::voidpf, |
| items: crate::c_api::uInt, |
| size: crate::c_api::uInt, |
| ) -> crate::c_api::voidpf { |
| let count = unsafe { &*(opaque as *const AtomicUsize) }; |
| |
| if count.fetch_add(1, core::sync::atomic::Ordering::Relaxed) != N { |
| // must use the C allocator internally because (de)allocation is based on function |
| // pointer values and because we don't use the rust allocator directly, the allocation |
| // logic will store the pointer to the start at the start of the allocation. |
| unsafe { (crate::allocate::Allocator::C.zalloc)(opaque, items, size) } |
| } else { |
| core::ptr::null_mut() |
| } |
| } |
| |
| #[test] |
| fn init_invalid_allocator() { |
| { |
| let atomic = AtomicUsize::new(0); |
| let mut stream = z_stream { |
| zalloc: Some(fail_nth_allocation::<0>), |
| zfree: Some(crate::allocate::Allocator::C.zfree), |
| opaque: &atomic as *const _ as *const core::ffi::c_void as *mut _, |
| ..z_stream::default() |
| }; |
| assert_eq!( |
| init(&mut stream, DeflateConfig::default()), |
| ReturnCode::MemError |
| ); |
| } |
| |
| { |
| let atomic = AtomicUsize::new(0); |
| let mut stream = z_stream { |
| zalloc: Some(fail_nth_allocation::<3>), |
| zfree: Some(crate::allocate::Allocator::C.zfree), |
| opaque: &atomic as *const _ as *const core::ffi::c_void as *mut _, |
| ..z_stream::default() |
| }; |
| assert_eq!( |
| init(&mut stream, DeflateConfig::default()), |
| ReturnCode::MemError |
| ); |
| } |
| |
| { |
| let atomic = AtomicUsize::new(0); |
| let mut stream = z_stream { |
| zalloc: Some(fail_nth_allocation::<5>), |
| zfree: Some(crate::allocate::Allocator::C.zfree), |
| opaque: &atomic as *const _ as *const core::ffi::c_void as *mut _, |
| ..z_stream::default() |
| }; |
| assert_eq!( |
| init(&mut stream, DeflateConfig::default()), |
| ReturnCode::MemError |
| ); |
| } |
| } |
| |
| mod copy_invalid_allocator { |
| use super::*; |
| |
| #[test] |
| fn fail_0() { |
| let mut stream = z_stream::default(); |
| |
| let atomic = AtomicUsize::new(0); |
| stream.opaque = &atomic as *const _ as *const core::ffi::c_void as *mut _; |
| stream.zalloc = Some(fail_nth_allocation::<6>); |
| stream.zfree = Some(crate::allocate::Allocator::C.zfree); |
| |
| // init performs 6 allocations; we don't want those to fail |
| assert_eq!(init(&mut stream, DeflateConfig::default()), ReturnCode::Ok); |
| |
| let Some(stream) = (unsafe { DeflateStream::from_stream_mut(&mut stream) }) else { |
| unreachable!() |
| }; |
| |
| let mut stream_copy = MaybeUninit::<DeflateStream>::zeroed(); |
| |
| assert_eq!(copy(&mut stream_copy, stream), ReturnCode::MemError); |
| |
| assert!(end(stream).is_ok()); |
| } |
| |
| #[test] |
| fn fail_3() { |
| let mut stream = z_stream::default(); |
| |
| let atomic = AtomicUsize::new(0); |
| stream.zalloc = Some(fail_nth_allocation::<{ 6 + 3 }>); |
| stream.zfree = Some(crate::allocate::Allocator::C.zfree); |
| stream.opaque = &atomic as *const _ as *const core::ffi::c_void as *mut _; |
| |
| // init performs 6 allocations; we don't want those to fail |
| assert_eq!(init(&mut stream, DeflateConfig::default()), ReturnCode::Ok); |
| |
| let Some(stream) = (unsafe { DeflateStream::from_stream_mut(&mut stream) }) else { |
| unreachable!() |
| }; |
| |
| let mut stream_copy = MaybeUninit::<DeflateStream>::zeroed(); |
| |
| assert_eq!(copy(&mut stream_copy, stream), ReturnCode::MemError); |
| |
| assert!(end(stream).is_ok()); |
| } |
| |
| #[test] |
| fn fail_5() { |
| let mut stream = z_stream::default(); |
| |
| let atomic = AtomicUsize::new(0); |
| stream.zalloc = Some(fail_nth_allocation::<{ 6 + 5 }>); |
| stream.zfree = Some(crate::allocate::Allocator::C.zfree); |
| stream.opaque = &atomic as *const _ as *const core::ffi::c_void as *mut _; |
| |
| // init performs 6 allocations; we don't want those to fail |
| assert_eq!(init(&mut stream, DeflateConfig::default()), ReturnCode::Ok); |
| |
| let Some(stream) = (unsafe { DeflateStream::from_stream_mut(&mut stream) }) else { |
| unreachable!() |
| }; |
| |
| let mut stream_copy = MaybeUninit::<DeflateStream>::zeroed(); |
| |
| assert_eq!(copy(&mut stream_copy, stream), ReturnCode::MemError); |
| |
| assert!(end(stream).is_ok()); |
| } |
| } |
| |
| mod invalid_deflate_config { |
| use super::*; |
| |
| #[test] |
| fn sanity_check() { |
| let mut stream = z_stream::default(); |
| assert_eq!(init(&mut stream, DeflateConfig::default()), ReturnCode::Ok); |
| |
| assert!(stream.zalloc.is_some()); |
| assert!(stream.zfree.is_some()); |
| |
| // this should be the default level |
| let stream = unsafe { DeflateStream::from_stream_mut(&mut stream) }.unwrap(); |
| assert_eq!(stream.state.level, 6); |
| |
| assert!(end(stream).is_ok()); |
| } |
| |
| #[test] |
| fn window_bits_correction() { |
| // window_bits of 8 gets turned into 9 internally |
| let mut stream = z_stream::default(); |
| let config = DeflateConfig { |
| window_bits: 8, |
| ..Default::default() |
| }; |
| assert_eq!(init(&mut stream, config), ReturnCode::Ok); |
| let stream = unsafe { DeflateStream::from_stream_mut(&mut stream) }.unwrap(); |
| assert_eq!(stream.state.w_bits(), 9); |
| |
| assert!(end(stream).is_ok()); |
| } |
| |
| #[test] |
| fn window_bits_too_low() { |
| let mut stream = z_stream::default(); |
| let config = DeflateConfig { |
| window_bits: -16, |
| ..Default::default() |
| }; |
| assert_eq!(init(&mut stream, config), ReturnCode::StreamError); |
| } |
| |
| #[test] |
| fn window_bits_too_high() { |
| // window bits too high |
| let mut stream = z_stream::default(); |
| let config = DeflateConfig { |
| window_bits: 42, |
| ..Default::default() |
| }; |
| assert_eq!(init(&mut stream, config), ReturnCode::StreamError); |
| } |
| } |
| |
| #[test] |
| fn end_data_error() { |
| let mut stream = z_stream::default(); |
| assert_eq!(init(&mut stream, DeflateConfig::default()), ReturnCode::Ok); |
| let stream = unsafe { DeflateStream::from_stream_mut(&mut stream) }.unwrap(); |
| |
| // next deflate into too little space |
| let input = b"Hello World\n"; |
| stream.next_in = input.as_ptr() as *mut u8; |
| stream.avail_in = input.len() as _; |
| let output = &mut [0, 0, 0]; |
| stream.next_out = output.as_mut_ptr(); |
| stream.avail_out = output.len() as _; |
| |
| // the deflate is fine |
| assert_eq!(deflate(stream, DeflateFlush::NoFlush), ReturnCode::Ok); |
| |
| // but end is not |
| assert!(end(stream).is_err()); |
| } |
| |
| #[test] |
| fn test_reset_keep() { |
| let mut stream = z_stream::default(); |
| assert_eq!(init(&mut stream, DeflateConfig::default()), ReturnCode::Ok); |
| let stream = unsafe { DeflateStream::from_stream_mut(&mut stream) }.unwrap(); |
| |
| // next deflate into too little space |
| let input = b"Hello World\n"; |
| stream.next_in = input.as_ptr() as *mut u8; |
| stream.avail_in = input.len() as _; |
| |
| let output = &mut [0; 1024]; |
| stream.next_out = output.as_mut_ptr(); |
| stream.avail_out = output.len() as _; |
| assert_eq!(deflate(stream, DeflateFlush::Finish), ReturnCode::StreamEnd); |
| |
| assert_eq!(reset_keep(stream), ReturnCode::Ok); |
| |
| let output = &mut [0; 1024]; |
| stream.next_out = output.as_mut_ptr(); |
| stream.avail_out = output.len() as _; |
| assert_eq!(deflate(stream, DeflateFlush::Finish), ReturnCode::StreamEnd); |
| |
| assert!(end(stream).is_ok()); |
| } |
| |
| #[test] |
| fn hello_world_huffman_only() { |
| const EXPECTED: &[u8] = &[ |
| 0x78, 0x01, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x08, 0xcf, 0x2f, 0xca, 0x49, 0x51, |
| 0xe4, 0x02, 0x00, 0x20, 0x91, 0x04, 0x48, |
| ]; |
| |
| let input = "Hello World!\n"; |
| |
| let mut output = vec![0; 128]; |
| |
| let config = DeflateConfig { |
| level: 6, |
| method: Method::Deflated, |
| window_bits: crate::MAX_WBITS, |
| mem_level: DEF_MEM_LEVEL, |
| strategy: Strategy::HuffmanOnly, |
| }; |
| |
| let (output, err) = compress_slice(&mut output, input.as_bytes(), config); |
| |
| assert_eq!(err, ReturnCode::Ok); |
| |
| assert_eq!(output.len(), EXPECTED.len()); |
| |
| assert_eq!(EXPECTED, output); |
| } |
| |
| #[test] |
| fn hello_world_quick() { |
| const EXPECTED: &[u8] = &[ |
| 0x78, 0x01, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x08, 0xcf, 0x2f, 0xca, 0x49, 0x51, |
| 0xe4, 0x02, 0x00, 0x20, 0x91, 0x04, 0x48, |
| ]; |
| |
| let input = "Hello World!\n"; |
| |
| let mut output = vec![0; 128]; |
| |
| let config = DeflateConfig { |
| level: 1, |
| method: Method::Deflated, |
| window_bits: crate::MAX_WBITS, |
| mem_level: DEF_MEM_LEVEL, |
| strategy: Strategy::Default, |
| }; |
| |
| let (output, err) = compress_slice(&mut output, input.as_bytes(), config); |
| |
| assert_eq!(err, ReturnCode::Ok); |
| |
| assert_eq!(output.len(), EXPECTED.len()); |
| |
| assert_eq!(EXPECTED, output); |
| } |
| |
| #[test] |
| fn hello_world_quick_random() { |
| const EXPECTED: &[u8] = &[ |
| 0x78, 0x01, 0x53, 0xe1, 0x50, 0x51, 0xe1, 0x52, 0x51, 0x51, 0x01, 0x00, 0x03, 0xec, |
| 0x00, 0xeb, |
| ]; |
| |
| let input = "$\u{8}$$\n$$$"; |
| |
| let mut output = vec![0; 128]; |
| |
| let config = DeflateConfig { |
| level: 1, |
| method: Method::Deflated, |
| window_bits: crate::MAX_WBITS, |
| mem_level: DEF_MEM_LEVEL, |
| strategy: Strategy::Default, |
| }; |
| |
| let (output, err) = compress_slice(&mut output, input.as_bytes(), config); |
| |
| assert_eq!(err, ReturnCode::Ok); |
| |
| assert_eq!(output.len(), EXPECTED.len()); |
| |
| assert_eq!(EXPECTED, output); |
| } |
| |
| fn fuzz_based_test(input: &[u8], config: DeflateConfig, expected: &[u8]) { |
| let mut output_rs = [0; 1 << 17]; |
| let (output_rs, err) = compress_slice(&mut output_rs, input, config); |
| assert_eq!(err, ReturnCode::Ok); |
| |
| assert_eq!(output_rs, expected); |
| } |
| |
| #[test] |
| fn simple_rle() { |
| fuzz_based_test( |
| "\0\0\0\0\u{6}".as_bytes(), |
| DeflateConfig { |
| level: -1, |
| method: Method::Deflated, |
| window_bits: 11, |
| mem_level: 4, |
| strategy: Strategy::Rle, |
| }, |
| &[56, 17, 99, 0, 2, 54, 0, 0, 11, 0, 7], |
| ) |
| } |
| |
| #[test] |
| fn fill_window_out_of_bounds() { |
| const INPUT: &[u8] = &[ |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x6a, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d, 0x1d, 0x1d, 0x1d, 0x63, |
| 0x63, 0x63, 0x63, 0x63, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, |
| 0x1d, 0x27, 0x0, 0x0, 0x0, 0x1d, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, 0x71, |
| 0x71, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x31, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x1d, 0x1d, 0x0, 0x0, 0x0, 0x0, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, |
| 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x48, 0x50, |
| 0x50, 0x50, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2c, 0x0, 0x0, 0x0, 0x0, 0x4a, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x70, 0x71, 0x71, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, 0x71, 0x71, 0x6a, 0x0, 0x0, 0x0, 0x0, |
| 0x71, 0x0, 0x71, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x31, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x71, 0x0, 0x4a, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x70, 0x71, 0x71, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, 0x71, 0x71, |
| 0x6a, 0x0, 0x0, 0x0, 0x0, 0x71, 0x0, 0x71, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x31, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d, 0x1d, 0x0, 0x0, 0x0, 0x0, |
| 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, 0x50, |
| 0x50, 0x50, 0x50, 0x50, 0x48, 0x50, 0x0, 0x0, 0x71, 0x71, 0x71, 0x71, 0x3b, 0x3f, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x50, 0x50, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x2c, 0x0, 0x0, 0x0, 0x0, 0x4a, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x70, 0x71, 0x71, 0x0, 0x0, 0x0, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x70, 0x71, 0x71, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x71, 0x71, 0x71, 0x71, 0x3b, 0x3f, 0x71, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x3b, 0x3f, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x20, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x71, 0x75, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x10, 0x0, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x3b, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x76, 0x71, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x71, 0x71, 0x10, 0x0, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, |
| 0x71, 0x3b, 0x71, 0x71, 0x71, 0x71, 0x71, 0x71, 0x76, 0x71, 0x34, 0x34, 0x34, 0x34, |
| 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, |
| 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x0, 0x0, 0x0, 0x0, 0x0, 0x34, 0x34, 0x34, 0x34, |
| 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, |
| 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, |
| 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, |
| 0x34, 0x34, 0x30, 0x34, 0x34, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x0, 0x71, 0x0, 0x0, 0x0, 0x0, 0x6, |
| ]; |
| |
| fuzz_based_test( |
| INPUT, |
| DeflateConfig { |
| level: -1, |
| method: Method::Deflated, |
| window_bits: 9, |
| mem_level: 1, |
| strategy: Strategy::HuffmanOnly, |
| }, |
| &[ |
| 0x18, 0x19, 0x4, 0xc1, 0x21, 0x1, 0xc4, 0x0, 0x10, 0x3, 0xb0, 0x18, 0x29, 0x1e, |
| 0x7e, 0x17, 0x83, 0xf5, 0x70, 0x6c, 0xac, 0xfe, 0xc9, 0x27, 0xdb, 0xb6, 0x6f, 0xdb, |
| 0xb6, 0x6d, 0xdb, 0x80, 0x24, 0xb9, 0xbb, 0xbb, 0x24, 0x49, 0x92, 0x24, 0xf, 0x2, |
| 0xd8, 0x36, 0x0, 0xf0, 0x3, 0x0, 0x0, 0x24, 0xd0, 0xb6, 0x6d, 0xdb, 0xb6, 0x6d, |
| 0xdb, 0xbe, 0x6d, 0xf9, 0x13, 0x4, 0xc7, 0x4, 0x0, 0x80, 0x30, 0x0, 0xc3, 0x22, |
| 0x68, 0xf, 0x36, 0x90, 0xc2, 0xb5, 0xfa, 0x7f, 0x48, 0x80, 0x81, 0xb, 0x40, 0x55, |
| 0x55, 0x55, 0xd5, 0x16, 0x80, 0xaa, 0x7, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0xe, 0x7c, 0x82, 0xe0, 0x98, 0x0, 0x0, 0x0, 0x4, 0x60, 0x10, 0xf9, 0x8c, 0xe2, |
| 0xe5, 0xfa, 0x3f, 0x2, 0x54, 0x55, 0x55, 0x65, 0x0, 0xa8, 0xaa, 0xaa, 0xaa, 0xba, |
| 0x2, 0x50, 0xb5, 0x90, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x78, 0x82, 0xe0, 0xd0, |
| 0x8a, 0x41, 0x0, 0x0, 0xa2, 0x58, 0x54, 0xb7, 0x60, 0x83, 0x9a, 0x6a, 0x4, 0x96, |
| 0x87, 0xba, 0x51, 0xf8, 0xfb, 0x9b, 0x26, 0xfc, 0x0, 0x1c, 0x7, 0x6c, 0xdb, 0xb6, |
| 0x6d, 0xdb, 0xb6, 0x6d, 0xf7, 0xa8, 0x3a, 0xaf, 0xaa, 0x6a, 0x3, 0xf8, 0xc2, 0x3, |
| 0x40, 0x55, 0x55, 0x55, 0xd5, 0x5b, 0xf8, 0x80, 0xaa, 0x7a, 0xb, 0x0, 0x7f, 0x82, |
| 0xe0, 0x98, 0x0, 0x40, 0x18, 0x0, 0x82, 0xd8, 0x49, 0x40, 0x2, 0x22, 0x7e, 0xeb, |
| 0x80, 0xa6, 0xc, 0xa0, 0x9f, 0xa4, 0x2a, 0x38, 0xf, 0x0, 0x0, 0xe7, 0x1, 0xdc, |
| 0x55, 0x95, 0x17, 0x0, 0x0, 0xae, 0x0, 0x38, 0xc0, 0x67, 0xdb, 0x36, 0x80, 0x2b, |
| 0x0, 0xe, 0xf0, 0xd9, 0xf6, 0x13, 0x4, 0xc7, 0x4, 0x0, 0x0, 0x30, 0xc, 0x83, 0x22, |
| 0x69, 0x7, 0xc6, 0xea, 0xff, 0x19, 0x0, 0x0, 0x80, 0xaa, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 0x0, 0x8e, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0x6a, |
| 0xf5, 0x63, 0x60, 0x60, 0x3, 0x0, 0xee, 0x8a, 0x88, 0x67, |
| ], |
| ) |
| } |
| |
| #[test] |
| fn gzip_no_header() { |
| let config = DeflateConfig { |
| level: 9, |
| method: Method::Deflated, |
| window_bits: 31, // gzip |
| ..Default::default() |
| }; |
| |
| let input = b"Hello World!"; |
| let os = gz_header::OS_CODE; |
| |
| fuzz_based_test( |
| input, |
| config, |
| &[ |
| 31, 139, 8, 0, 0, 0, 0, 0, 2, os, 243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, |
| 81, 4, 0, 163, 28, 41, 28, 12, 0, 0, 0, |
| ], |
| ) |
| } |
| |
| #[test] |
| #[rustfmt::skip] |
| fn gzip_stored_block_checksum() { |
| fuzz_based_test( |
| &[ |
| 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 9, 0, |
| ], |
| DeflateConfig { |
| level: 0, |
| method: Method::Deflated, |
| window_bits: 26, |
| mem_level: 6, |
| strategy: Strategy::Default, |
| }, |
| &[ |
| 31, 139, 8, 0, 0, 0, 0, 0, 4, gz_header::OS_CODE, 1, 18, 0, 237, 255, 27, 27, 27, 27, 27, 27, 27, |
| 27, 27, 27, 27, 27, 27, 27, 27, 27, 9, 0, 60, 101, 156, 55, 18, 0, 0, 0, |
| ], |
| ) |
| } |
| |
| #[test] |
| fn gzip_header_pending_flush() { |
| let extra = "aaaaaaaaaaaaaaaaaaaa\0"; |
| let name = "bbbbbbbbbbbbbbbbbbbb\0"; |
| let comment = "cccccccccccccccccccc\0"; |
| |
| let mut header = gz_header { |
| text: 0, |
| time: 0, |
| xflags: 0, |
| os: 0, |
| extra: extra.as_ptr() as *mut _, |
| extra_len: extra.len() as _, |
| extra_max: 0, |
| name: name.as_ptr() as *mut _, |
| name_max: 0, |
| comment: comment.as_ptr() as *mut _, |
| comm_max: 0, |
| hcrc: 1, |
| done: 0, |
| }; |
| |
| let config = DeflateConfig { |
| window_bits: 31, |
| mem_level: 1, |
| ..Default::default() |
| }; |
| |
| let mut stream = z_stream::default(); |
| assert_eq!(init(&mut stream, config), ReturnCode::Ok); |
| |
| let Some(stream) = (unsafe { DeflateStream::from_stream_mut(&mut stream) }) else { |
| unreachable!() |
| }; |
| |
| unsafe { set_header(stream, Some(&mut header)) }; |
| |
| let input = b"Hello World\n"; |
| stream.next_in = input.as_ptr() as *mut _; |
| stream.avail_in = input.len() as _; |
| |
| let mut output = [0u8; 1024]; |
| stream.next_out = output.as_mut_ptr(); |
| stream.avail_out = 100; |
| |
| assert_eq!(stream.state.bit_writer.pending.capacity(), 512); |
| |
| // only 12 bytes remain, so to write the name the pending buffer must be flushed. |
| // but there is insufficient output space to flush (only 100 bytes) |
| stream.state.bit_writer.pending.extend(&[0; 500]); |
| |
| assert_eq!(deflate(stream, DeflateFlush::Finish), ReturnCode::Ok); |
| |
| // now try that again but with sufficient output space |
| stream.avail_out = output.len() as _; |
| assert_eq!(deflate(stream, DeflateFlush::Finish), ReturnCode::StreamEnd); |
| |
| let n = stream.total_out as usize; |
| |
| assert!(end(stream).is_ok()); |
| |
| let output_rs = &mut output[..n]; |
| |
| assert_eq!(output_rs.len(), 500 + 99); |
| } |
| |
| #[test] |
| fn gzip_with_header() { |
| // this test is here mostly so we get some MIRI action on the gzip header. A test that |
| // compares behavior with zlib-ng is in the libz-rs-sys test suite |
| |
| let extra = "some extra stuff\0"; |
| let name = "nomen est omen\0"; |
| let comment = "such comment\0"; |
| |
| let mut header = gz_header { |
| text: 0, |
| time: 0, |
| xflags: 0, |
| os: 0, |
| extra: extra.as_ptr() as *mut _, |
| extra_len: extra.len() as _, |
| extra_max: 0, |
| name: name.as_ptr() as *mut _, |
| name_max: 0, |
| comment: comment.as_ptr() as *mut _, |
| comm_max: 0, |
| hcrc: 1, |
| done: 0, |
| }; |
| |
| let config = DeflateConfig { |
| window_bits: 31, |
| ..Default::default() |
| }; |
| |
| let mut stream = z_stream::default(); |
| assert_eq!(init(&mut stream, config), ReturnCode::Ok); |
| |
| let Some(stream) = (unsafe { DeflateStream::from_stream_mut(&mut stream) }) else { |
| unreachable!() |
| }; |
| |
| unsafe { set_header(stream, Some(&mut header)) }; |
| |
| let input = b"Hello World\n"; |
| stream.next_in = input.as_ptr() as *mut _; |
| stream.avail_in = input.len() as _; |
| |
| let mut output = [0u8; 256]; |
| stream.next_out = output.as_mut_ptr(); |
| stream.avail_out = output.len() as _; |
| |
| assert_eq!(deflate(stream, DeflateFlush::Finish), ReturnCode::StreamEnd); |
| |
| let n = stream.total_out as usize; |
| |
| assert!(end(stream).is_ok()); |
| |
| let output_rs = &mut output[..n]; |
| |
| assert_eq!(output_rs.len(), 81); |
| |
| { |
| let mut stream = z_stream::default(); |
| |
| let config = InflateConfig { |
| window_bits: config.window_bits, |
| }; |
| |
| assert_eq!(crate::inflate::init(&mut stream, config), ReturnCode::Ok); |
| |
| let Some(stream) = (unsafe { InflateStream::from_stream_mut(&mut stream) }) else { |
| unreachable!(); |
| }; |
| |
| stream.next_in = output_rs.as_mut_ptr() as _; |
| stream.avail_in = output_rs.len() as _; |
| |
| let mut output = [0u8; 12]; |
| stream.next_out = output.as_mut_ptr(); |
| stream.avail_out = output.len() as _; |
| |
| let mut extra_buf = [0u8; 64]; |
| let mut name_buf = [0u8; 64]; |
| let mut comment_buf = [0u8; 64]; |
| |
| let mut header = gz_header { |
| text: 0, |
| time: 0, |
| xflags: 0, |
| os: 0, |
| extra: extra_buf.as_mut_ptr(), |
| extra_len: 0, |
| extra_max: extra_buf.len() as _, |
| name: name_buf.as_mut_ptr(), |
| name_max: name_buf.len() as _, |
| comment: comment_buf.as_mut_ptr(), |
| comm_max: comment_buf.len() as _, |
| hcrc: 0, |
| done: 0, |
| }; |
| |
| assert_eq!( |
| unsafe { crate::inflate::get_header(stream, Some(&mut header)) }, |
| ReturnCode::Ok |
| ); |
| |
| assert_eq!( |
| unsafe { crate::inflate::inflate(stream, InflateFlush::Finish) }, |
| ReturnCode::StreamEnd |
| ); |
| |
| crate::inflate::end(stream); |
| |
| assert!(!header.comment.is_null()); |
| assert_eq!( |
| unsafe { CStr::from_ptr(header.comment.cast()) } |
| .to_str() |
| .unwrap(), |
| comment.trim_end_matches('\0') |
| ); |
| |
| assert!(!header.name.is_null()); |
| assert_eq!( |
| unsafe { CStr::from_ptr(header.name.cast()) } |
| .to_str() |
| .unwrap(), |
| name.trim_end_matches('\0') |
| ); |
| |
| assert!(!header.extra.is_null()); |
| assert_eq!( |
| unsafe { CStr::from_ptr(header.extra.cast()) } |
| .to_str() |
| .unwrap(), |
| extra.trim_end_matches('\0') |
| ); |
| } |
| } |
| |
| #[test] |
| fn insufficient_compress_space() { |
| const DATA: &[u8] = include_bytes!("deflate/test-data/inflate_buf_error.dat"); |
| |
| fn helper(deflate_buf: &mut [u8]) -> ReturnCode { |
| let config = DeflateConfig { |
| level: 0, |
| method: Method::Deflated, |
| window_bits: 10, |
| mem_level: 6, |
| strategy: Strategy::Default, |
| }; |
| |
| let (output, err) = compress_slice(deflate_buf, DATA, config); |
| assert_eq!(err, ReturnCode::Ok); |
| |
| let config = InflateConfig { |
| window_bits: config.window_bits, |
| }; |
| |
| let mut uncompr = [0; 1 << 17]; |
| let (uncompr, err) = uncompress_slice(&mut uncompr, output, config); |
| |
| if err == ReturnCode::Ok { |
| assert_eq!(DATA, uncompr); |
| } |
| |
| err |
| } |
| |
| let mut output = [0; 1 << 17]; |
| |
| // this is too little space |
| assert_eq!(helper(&mut output[..1 << 16]), ReturnCode::DataError); |
| |
| // this is sufficient space |
| assert_eq!(helper(&mut output), ReturnCode::Ok); |
| } |
| |
| fn test_flush(flush: DeflateFlush, expected: &[u8]) { |
| let input = b"Hello World!\n"; |
| |
| let config = DeflateConfig { |
| level: 6, // use gzip |
| method: Method::Deflated, |
| window_bits: 16 + crate::MAX_WBITS, |
| mem_level: DEF_MEM_LEVEL, |
| strategy: Strategy::Default, |
| }; |
| |
| let mut output_rs = vec![0; 128]; |
| |
| // with the flush modes that we test here, the deflate process still has `Status::Busy`, |
| // and the `deflateEnd` function will return `DataError`. |
| let expected_err = ReturnCode::DataError; |
| |
| let (rs, err) = compress_slice_with_flush(&mut output_rs, input, config, flush); |
| assert_eq!(expected_err, err); |
| |
| assert_eq!(rs, expected); |
| } |
| |
| #[test] |
| #[rustfmt::skip] |
| fn sync_flush() { |
| test_flush( |
| DeflateFlush::SyncFlush, |
| &[ |
| 31, 139, 8, 0, 0, 0, 0, 0, 0, gz_header::OS_CODE, 242, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, |
| 81, 228, 2, 0, 0, 0, 255, 255, |
| ], |
| ) |
| } |
| |
| #[test] |
| #[rustfmt::skip] |
| fn partial_flush() { |
| test_flush( |
| DeflateFlush::PartialFlush, |
| &[ |
| 31, 139, 8, 0, 0, 0, 0, 0, 0, gz_header::OS_CODE, 242, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, |
| 81, 228, 2, 8, |
| ], |
| ); |
| } |
| |
| #[test] |
| #[rustfmt::skip] |
| fn full_flush() { |
| test_flush( |
| DeflateFlush::FullFlush, |
| &[ |
| 31, 139, 8, 0, 0, 0, 0, 0, 0, gz_header::OS_CODE, 242, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, |
| 81, 228, 2, 0, 0, 0, 255, 255, |
| ], |
| ); |
| } |
| |
| #[test] |
| #[rustfmt::skip] |
| fn block_flush() { |
| test_flush( |
| DeflateFlush::Block, |
| &[ |
| 31, 139, 8, 0, 0, 0, 0, 0, 0, gz_header::OS_CODE, 242, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, |
| 81, 228, 2, |
| ], |
| ); |
| } |
| |
| #[test] |
| // splits the input into two, deflates them seperately and then joins the deflated byte streams |
| // into something that can be correctly inflated again. This is the basic idea behind pigz, and |
| // allows for parallel compression. |
| fn split_deflate() { |
| let input = "Hello World!\n"; |
| |
| let (input1, input2) = input.split_at(6); |
| |
| let mut output1 = vec![0; 128]; |
| let mut output2 = vec![0; 128]; |
| |
| let config = DeflateConfig { |
| level: 6, // use gzip |
| method: Method::Deflated, |
| window_bits: 16 + crate::MAX_WBITS, |
| mem_level: DEF_MEM_LEVEL, |
| strategy: Strategy::Default, |
| }; |
| |
| // see also the docs on `SyncFlush`. it makes sure everything is flushed, ends on a byte |
| // boundary, and that the final block does not have the "last block" bit set. |
| let (prefix, err) = compress_slice_with_flush( |
| &mut output1, |
| input1.as_bytes(), |
| config, |
| DeflateFlush::SyncFlush, |
| ); |
| assert_eq!(err, ReturnCode::DataError); |
| |
| let (output2, err) = compress_slice_with_flush( |
| &mut output2, |
| input2.as_bytes(), |
| config, |
| DeflateFlush::Finish, |
| ); |
| assert_eq!(err, ReturnCode::Ok); |
| |
| let inflate_config = crate::inflate::InflateConfig { |
| window_bits: 16 + 15, |
| }; |
| |
| // cuts off the length and crc |
| let (suffix, end) = output2.split_at(output2.len() - 8); |
| let (crc2, len2) = end.split_at(4); |
| let crc2 = u32::from_le_bytes(crc2.try_into().unwrap()); |
| |
| // cuts off the gzip header (10 bytes) from the front |
| let suffix = &suffix[10..]; |
| |
| let mut result: Vec<u8> = Vec::new(); |
| result.extend(prefix.iter()); |
| result.extend(suffix); |
| |
| // it would be more proper to use `stream.total_in` here, but the slice helpers hide the |
| // stream so we're cheating a bit here |
| let len1 = input1.len() as u32; |
| let len2 = u32::from_le_bytes(len2.try_into().unwrap()); |
| assert_eq!(len2 as usize, input2.len()); |
| |
| let crc1 = crate::crc32(0, input1.as_bytes()); |
| let crc = crate::crc32_combine(crc1, crc2, len2 as u64); |
| |
| // combined crc of the parts should be the crc of the whole |
| let crc_cheating = crate::crc32(0, input.as_bytes()); |
| assert_eq!(crc, crc_cheating); |
| |
| // write the trailer |
| result.extend(crc.to_le_bytes()); |
| result.extend((len1 + len2).to_le_bytes()); |
| |
| let mut output = vec![0; 128]; |
| let (output, err) = crate::inflate::uncompress_slice(&mut output, &result, inflate_config); |
| assert_eq!(err, ReturnCode::Ok); |
| |
| assert_eq!(output, input.as_bytes()); |
| } |
| |
| #[test] |
| fn inflate_window_copy_slice() { |
| let uncompressed = [ |
| 9, 126, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 76, 33, 8, 2, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, |
| 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 76, 33, 8, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 12, 10, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 14, 0, 0, 0, 0, 0, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 9, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 12, 28, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 12, 10, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 14, 0, 0, 0, 0, 0, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 9, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, |
| 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 12, 28, 0, 2, 0, 0, 0, 63, 1, 0, 12, 2, |
| 36, 0, 28, 0, 0, 0, 1, 0, 0, 63, 63, 13, 0, 0, 0, 0, 0, 0, 0, 63, 63, 63, 63, 0, 0, 0, |
| 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 12, 33, 2, 0, 0, 8, |
| 0, 4, 0, 0, 0, 12, 10, 41, 12, 10, 47, |
| ]; |
| |
| let compressed = &[ |
| 31, 139, 8, 0, 0, 0, 0, 0, 4, 3, 181, 193, 49, 14, 194, 32, 24, 128, 209, 175, 192, 0, |
| 228, 151, 232, 206, 66, 226, 226, 96, 60, 2, 113, 96, 235, 13, 188, 139, 103, 23, 106, |
| 104, 108, 100, 49, 169, 239, 185, 39, 11, 199, 7, 51, 39, 171, 248, 118, 226, 63, 52, |
| 157, 120, 86, 102, 78, 86, 209, 104, 58, 241, 84, 129, 166, 12, 4, 154, 178, 229, 202, |
| 30, 36, 130, 166, 19, 79, 21, 104, 202, 64, 160, 41, 91, 174, 236, 65, 34, 10, 200, 19, |
| 162, 206, 68, 96, 130, 156, 15, 188, 229, 138, 197, 157, 161, 35, 3, 87, 126, 245, 0, |
| 28, 224, 64, 146, 2, 139, 1, 196, 95, 196, 223, 94, 10, 96, 92, 33, 86, 2, 0, 0, |
| ]; |
| |
| let config = InflateConfig { window_bits: 25 }; |
| |
| let mut dest_vec_rs = vec![0u8; uncompressed.len()]; |
| let (output_rs, error) = |
| crate::inflate::uncompress_slice(&mut dest_vec_rs, compressed, config); |
| |
| assert_eq!(ReturnCode::Ok, error); |
| assert_eq!(output_rs, uncompressed); |
| } |
| |
| #[test] |
| fn hash_calc_difference() { |
| let input = [ |
| 0, 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, |
| 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 64, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 102, 102, 102, 102, |
| 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, |
| 102, 102, 102, 102, 102, 102, 102, 102, 112, 102, 102, 102, 102, 102, 102, 102, 102, |
| 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 0, |
| 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 64, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, |
| 50, 0, |
| ]; |
| |
| let config = DeflateConfig { |
| level: 6, |
| method: Method::Deflated, |
| window_bits: 9, |
| mem_level: 8, |
| strategy: Strategy::Default, |
| }; |
| |
| let expected = [ |
| 24, 149, 99, 96, 96, 96, 96, 208, 6, 17, 112, 138, 129, 193, 128, 1, 29, 24, 50, 208, |
| 1, 200, 146, 169, 79, 24, 74, 59, 96, 147, 52, 71, 22, 70, 246, 88, 26, 94, 80, 128, |
| 83, 6, 162, 219, 144, 76, 183, 210, 5, 8, 67, 105, 36, 159, 35, 128, 57, 118, 97, 100, |
| 160, 197, 192, 192, 96, 196, 0, 0, 3, 228, 25, 128, |
| ]; |
| |
| fuzz_based_test(&input, config, &expected); |
| } |
| |
| #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] |
| mod _cache_lines { |
| use super::State; |
| // FIXME: once zlib-rs Minimum Supported Rust Version >= 1.77, switch to core::mem::offset_of |
| // and move this _cache_lines module from up a level from tests to super:: |
| use memoffset::offset_of; |
| |
| const _: () = assert!(offset_of!(State, status) == 0); |
| const _: () = assert!(offset_of!(State, _cache_line_0) == 64); |
| const _: () = assert!(offset_of!(State, _cache_line_1) == 128); |
| const _: () = assert!(offset_of!(State, _cache_line_2) == 192); |
| const _: () = assert!(offset_of!(State, _cache_line_3) == 256); |
| } |
| } |