| // Copyright (c) 2017 The vulkano developers |
| // Licensed under the Apache License, Version 2.0 |
| // <LICENSE-APACHE or |
| // https://www.apache.org/licenses/LICENSE-2.0> or the MIT |
| // license <LICENSE-MIT or https://opensource.org/licenses/MIT>, |
| // at your option. All files in the project carrying such |
| // notice may not be copied, modified, or distributed except |
| // according to those terms. |
| |
| use crate::buffer::sys::BufferCreationError; |
| use crate::buffer::sys::UnsafeBuffer; |
| use crate::buffer::traits::BufferAccess; |
| use crate::buffer::traits::BufferInner; |
| use crate::buffer::traits::TypedBufferAccess; |
| use crate::buffer::BufferUsage; |
| use crate::device::Device; |
| use crate::device::DeviceOwned; |
| use crate::device::Queue; |
| use crate::memory::pool::AllocFromRequirementsFilter; |
| use crate::memory::pool::AllocLayout; |
| use crate::memory::pool::MappingRequirement; |
| use crate::memory::pool::MemoryPool; |
| use crate::memory::pool::MemoryPoolAlloc; |
| use crate::memory::pool::PotentialDedicatedAllocation; |
| use crate::memory::pool::StdMemoryPool; |
| use crate::memory::DedicatedAlloc; |
| use crate::memory::DeviceMemoryAllocError; |
| use crate::sync::AccessError; |
| use crate::sync::Sharing; |
| use crate::DeviceSize; |
| use crate::OomError; |
| use std::cmp; |
| use std::hash::Hash; |
| use std::hash::Hasher; |
| use std::iter; |
| use std::marker::PhantomData; |
| use std::mem; |
| use std::ptr; |
| use std::sync::atomic::AtomicU64; |
| use std::sync::atomic::Ordering; |
| use std::sync::Arc; |
| use std::sync::Mutex; |
| use std::sync::MutexGuard; |
| |
| // TODO: Add `CpuBufferPoolSubbuffer::read` to read the content of a subbuffer. |
| // But that's hard to do because we must prevent `increase_gpu_lock` from working while a |
| // a buffer is locked. |
| |
| /// Ring buffer from which "sub-buffers" can be individually allocated. |
| /// |
| /// This buffer is especially suitable when you want to upload or download some data regularly |
| /// (for example, at each frame for a video game). |
| /// |
| /// # Usage |
| /// |
| /// A `CpuBufferPool` is similar to a ring buffer. You start by creating an empty pool, then you |
| /// grab elements from the pool and use them, and if the pool is full it will automatically grow |
| /// in size. |
| /// |
| /// Contrary to a `Vec`, elements automatically free themselves when they are dropped (ie. usually |
| /// when you call `cleanup_finished()` on a future, or when you drop that future). |
| /// |
| /// # Arc-like |
| /// |
| /// The `CpuBufferPool` struct internally contains an `Arc`. You can clone the `CpuBufferPool` for |
| /// a cheap cost, and all the clones will share the same underlying buffer. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use vulkano::buffer::CpuBufferPool; |
| /// use vulkano::command_buffer::AutoCommandBufferBuilder; |
| /// use vulkano::command_buffer::CommandBufferUsage; |
| /// use vulkano::command_buffer::PrimaryCommandBuffer; |
| /// use vulkano::sync::GpuFuture; |
| /// # let device: std::sync::Arc<vulkano::device::Device> = return; |
| /// # let queue: std::sync::Arc<vulkano::device::Queue> = return; |
| /// |
| /// // Create the ring buffer. |
| /// let buffer = CpuBufferPool::upload(device.clone()); |
| /// |
| /// for n in 0 .. 25u32 { |
| /// // Each loop grabs a new entry from that ring buffer and stores ` data` in it. |
| /// let data: [f32; 4] = [1.0, 0.5, n as f32 / 24.0, 0.0]; |
| /// let sub_buffer = buffer.next(data).unwrap(); |
| /// |
| /// // You can then use `sub_buffer` as if it was an entirely separate buffer. |
| /// AutoCommandBufferBuilder::primary(device.clone(), queue.family(), CommandBufferUsage::OneTimeSubmit) |
| /// .unwrap() |
| /// // For the sake of the example we just call `update_buffer` on the buffer, even though |
| /// // it is pointless to do that. |
| /// .update_buffer(sub_buffer.clone(), &[0.2, 0.3, 0.4, 0.5]) |
| /// .unwrap() |
| /// .build().unwrap() |
| /// .execute(queue.clone()) |
| /// .unwrap() |
| /// .then_signal_fence_and_flush() |
| /// .unwrap(); |
| /// } |
| /// ``` |
| /// |
| pub struct CpuBufferPool<T, A = Arc<StdMemoryPool>> |
| where |
| A: MemoryPool, |
| { |
| // The device of the pool. |
| device: Arc<Device>, |
| |
| // The memory pool to use for allocations. |
| pool: A, |
| |
| // Current buffer from which elements are grabbed. |
| current_buffer: Mutex<Option<Arc<ActualBuffer<A>>>>, |
| |
| // Buffer usage. |
| usage: BufferUsage, |
| |
| // Necessary to make it compile. |
| marker: PhantomData<Box<T>>, |
| } |
| |
| // One buffer of the pool. |
| struct ActualBuffer<A> |
| where |
| A: MemoryPool, |
| { |
| // Inner content. |
| inner: UnsafeBuffer, |
| |
| // The memory held by the buffer. |
| memory: PotentialDedicatedAllocation<A::Alloc>, |
| |
| // List of the chunks that are reserved. |
| chunks_in_use: Mutex<Vec<ActualBufferChunk>>, |
| |
| // The index of the chunk that should be available next for the ring buffer. |
| next_index: AtomicU64, |
| |
| // Number of elements in the buffer. |
| capacity: DeviceSize, |
| } |
| |
| // Access pattern of one subbuffer. |
| #[derive(Debug)] |
| struct ActualBufferChunk { |
| // First element number within the actual buffer. |
| index: DeviceSize, |
| |
| // Number of occupied elements within the actual buffer. |
| len: DeviceSize, |
| |
| // Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer. |
| num_cpu_accesses: usize, |
| |
| // Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer and that have been |
| // GPU-locked. |
| num_gpu_accesses: usize, |
| } |
| |
| /// A subbuffer allocated from a `CpuBufferPool`. |
| /// |
| /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. |
| pub struct CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| buffer: Arc<ActualBuffer<A>>, |
| |
| // Index of the subbuffer within `buffer`. In number of elements. |
| index: DeviceSize, |
| |
| // Number of bytes to add to `index * mem::size_of::<T>()` to obtain the start of the data in |
| // the buffer. Necessary for alignment purposes. |
| align_offset: DeviceSize, |
| |
| // Size of the subbuffer in number of elements, as requested by the user. |
| // If this is 0, then no entry was added to `chunks_in_use`. |
| requested_len: DeviceSize, |
| |
| // Necessary to make it compile. |
| marker: PhantomData<Box<T>>, |
| } |
| |
| /// A subbuffer allocated from a `CpuBufferPool`. |
| /// |
| /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. |
| pub struct CpuBufferPoolSubbuffer<T, A> |
| where |
| A: MemoryPool, |
| { |
| // This struct is just a wrapper around `CpuBufferPoolChunk`. |
| chunk: CpuBufferPoolChunk<T, A>, |
| } |
| |
| impl<T> CpuBufferPool<T> { |
| /// Builds a `CpuBufferPool`. |
| #[inline] |
| pub fn new(device: Arc<Device>, usage: BufferUsage) -> CpuBufferPool<T> { |
| let pool = Device::standard_pool(&device); |
| |
| CpuBufferPool { |
| device: device, |
| pool: pool, |
| current_buffer: Mutex::new(None), |
| usage: usage.clone(), |
| marker: PhantomData, |
| } |
| } |
| |
| /// Builds a `CpuBufferPool` meant for simple uploads. |
| /// |
| /// Shortcut for a pool that can only be used as transfer source and with exclusive queue |
| /// family accesses. |
| #[inline] |
| pub fn upload(device: Arc<Device>) -> CpuBufferPool<T> { |
| CpuBufferPool::new(device, BufferUsage::transfer_source()) |
| } |
| |
| /// Builds a `CpuBufferPool` meant for simple downloads. |
| /// |
| /// Shortcut for a pool that can only be used as transfer destination and with exclusive queue |
| /// family accesses. |
| #[inline] |
| pub fn download(device: Arc<Device>) -> CpuBufferPool<T> { |
| CpuBufferPool::new(device, BufferUsage::transfer_destination()) |
| } |
| |
| /// Builds a `CpuBufferPool` meant for usage as a uniform buffer. |
| /// |
| /// Shortcut for a pool that can only be used as uniform buffer and with exclusive queue |
| /// family accesses. |
| #[inline] |
| pub fn uniform_buffer(device: Arc<Device>) -> CpuBufferPool<T> { |
| CpuBufferPool::new(device, BufferUsage::uniform_buffer()) |
| } |
| |
| /// Builds a `CpuBufferPool` meant for usage as a vertex buffer. |
| /// |
| /// Shortcut for a pool that can only be used as vertex buffer and with exclusive queue |
| /// family accesses. |
| #[inline] |
| pub fn vertex_buffer(device: Arc<Device>) -> CpuBufferPool<T> { |
| CpuBufferPool::new(device, BufferUsage::vertex_buffer()) |
| } |
| |
| /// Builds a `CpuBufferPool` meant for usage as a indirect buffer. |
| /// |
| /// Shortcut for a pool that can only be used as indirect buffer and with exclusive queue |
| /// family accesses. |
| #[inline] |
| pub fn indirect_buffer(device: Arc<Device>) -> CpuBufferPool<T> { |
| CpuBufferPool::new(device, BufferUsage::indirect_buffer()) |
| } |
| } |
| |
| impl<T, A> CpuBufferPool<T, A> |
| where |
| A: MemoryPool, |
| { |
| /// Returns the current capacity of the pool, in number of elements. |
| pub fn capacity(&self) -> DeviceSize { |
| match *self.current_buffer.lock().unwrap() { |
| None => 0, |
| Some(ref buf) => buf.capacity, |
| } |
| } |
| |
| /// Makes sure that the capacity is at least `capacity`. Allocates memory if it is not the |
| /// case. |
| /// |
| /// Since this can involve a memory allocation, an `OomError` can happen. |
| pub fn reserve(&self, capacity: DeviceSize) -> Result<(), DeviceMemoryAllocError> { |
| let mut cur_buf = self.current_buffer.lock().unwrap(); |
| |
| // Check current capacity. |
| match *cur_buf { |
| Some(ref buf) if buf.capacity >= capacity => { |
| return Ok(()); |
| } |
| _ => (), |
| }; |
| |
| self.reset_buf(&mut cur_buf, capacity) |
| } |
| |
| /// Grants access to a new subbuffer and puts `data` in it. |
| /// |
| /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will |
| /// automatically be allocated. |
| /// |
| /// > **Note**: You can think of it like a `Vec`. If you insert an element and the `Vec` is not |
| /// > large enough, a new chunk of memory is automatically allocated. |
| #[inline] |
| pub fn next(&self, data: T) -> Result<CpuBufferPoolSubbuffer<T, A>, DeviceMemoryAllocError> { |
| Ok(CpuBufferPoolSubbuffer { |
| chunk: self.chunk(iter::once(data))?, |
| }) |
| } |
| |
| /// Grants access to a new subbuffer and puts `data` in it. |
| /// |
| /// If no subbuffer is available (because they are still in use by the GPU), a new buffer will |
| /// automatically be allocated. |
| /// |
| /// > **Note**: You can think of it like a `Vec`. If you insert elements and the `Vec` is not |
| /// > large enough, a new chunk of memory is automatically allocated. |
| /// |
| /// # Panic |
| /// |
| /// Panics if the length of the iterator didn't match the actual number of element. |
| /// |
| pub fn chunk<I>(&self, data: I) -> Result<CpuBufferPoolChunk<T, A>, DeviceMemoryAllocError> |
| where |
| I: IntoIterator<Item = T>, |
| I::IntoIter: ExactSizeIterator, |
| { |
| let data = data.into_iter(); |
| |
| let mut mutex = self.current_buffer.lock().unwrap(); |
| |
| let data = match self.try_next_impl(&mut mutex, data) { |
| Ok(n) => return Ok(n), |
| Err(d) => d, |
| }; |
| |
| let next_capacity = match *mutex { |
| Some(ref b) if (data.len() as DeviceSize) < b.capacity => 2 * b.capacity, |
| _ => 2 * data.len() as DeviceSize, |
| }; |
| |
| self.reset_buf(&mut mutex, next_capacity)?; |
| |
| match self.try_next_impl(&mut mutex, data) { |
| Ok(n) => Ok(n), |
| Err(_) => unreachable!(), |
| } |
| } |
| |
| /// Grants access to a new subbuffer and puts `data` in it. |
| /// |
| /// Returns `None` if no subbuffer is available. |
| /// |
| /// A `CpuBufferPool` is always empty the first time you use it, so you shouldn't use |
| /// `try_next` the first time you use it. |
| #[inline] |
| pub fn try_next(&self, data: T) -> Option<CpuBufferPoolSubbuffer<T, A>> { |
| let mut mutex = self.current_buffer.lock().unwrap(); |
| self.try_next_impl(&mut mutex, iter::once(data)) |
| .map(|c| CpuBufferPoolSubbuffer { chunk: c }) |
| .ok() |
| } |
| |
| // Creates a new buffer and sets it as current. The capacity is in number of elements. |
| // |
| // `cur_buf_mutex` must be an active lock of `self.current_buffer`. |
| fn reset_buf( |
| &self, |
| cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, |
| capacity: DeviceSize, |
| ) -> Result<(), DeviceMemoryAllocError> { |
| unsafe { |
| let (buffer, mem_reqs) = { |
| let size_bytes = match (mem::size_of::<T>() as DeviceSize).checked_mul(capacity) { |
| Some(s) => s, |
| None => { |
| return Err(DeviceMemoryAllocError::OomError( |
| OomError::OutOfDeviceMemory, |
| )) |
| } |
| }; |
| |
| match UnsafeBuffer::new( |
| self.device.clone(), |
| size_bytes as DeviceSize, |
| self.usage, |
| Sharing::Exclusive::<iter::Empty<_>>, |
| None, |
| ) { |
| Ok(b) => b, |
| Err(BufferCreationError::AllocError(err)) => return Err(err), |
| Err(_) => unreachable!(), // We don't use sparse binding, therefore the other |
| // errors can't happen |
| } |
| }; |
| |
| let mem = MemoryPool::alloc_from_requirements( |
| &self.pool, |
| &mem_reqs, |
| AllocLayout::Linear, |
| MappingRequirement::Map, |
| DedicatedAlloc::Buffer(&buffer), |
| |_| AllocFromRequirementsFilter::Allowed, |
| )?; |
| debug_assert!((mem.offset() % mem_reqs.alignment) == 0); |
| debug_assert!(mem.mapped_memory().is_some()); |
| buffer.bind_memory(mem.memory(), mem.offset())?; |
| |
| **cur_buf_mutex = Some(Arc::new(ActualBuffer { |
| inner: buffer, |
| memory: mem, |
| chunks_in_use: Mutex::new(vec![]), |
| next_index: AtomicU64::new(0), |
| capacity: capacity, |
| })); |
| |
| Ok(()) |
| } |
| } |
| |
| // Tries to lock a subbuffer from the current buffer. |
| // |
| // `cur_buf_mutex` must be an active lock of `self.current_buffer`. |
| // |
| // Returns `data` wrapped inside an `Err` if there is no slot available in the current buffer. |
| // |
| // # Panic |
| // |
| // Panics if the length of the iterator didn't match the actual number of element. |
| // |
| fn try_next_impl<I>( |
| &self, |
| cur_buf_mutex: &mut MutexGuard<Option<Arc<ActualBuffer<A>>>>, |
| mut data: I, |
| ) -> Result<CpuBufferPoolChunk<T, A>, I> |
| where |
| I: ExactSizeIterator<Item = T>, |
| { |
| // Grab the current buffer. Return `Err` if the pool wasn't "initialized" yet. |
| let current_buffer = match cur_buf_mutex.clone() { |
| Some(b) => b, |
| None => return Err(data), |
| }; |
| |
| let mut chunks_in_use = current_buffer.chunks_in_use.lock().unwrap(); |
| debug_assert!(!chunks_in_use.iter().any(|c| c.len == 0)); |
| |
| // Number of elements requested by the user. |
| let requested_len = data.len() as DeviceSize; |
| |
| // We special case when 0 elements are requested. Polluting the list of allocated chunks |
| // with chunks of length 0 means that we will have troubles deallocating. |
| if requested_len == 0 { |
| assert!( |
| data.next().is_none(), |
| "Expected iterator passed to CpuBufferPool::chunk to be empty" |
| ); |
| return Ok(CpuBufferPoolChunk { |
| // TODO: remove .clone() once non-lexical borrows land |
| buffer: current_buffer.clone(), |
| index: 0, |
| align_offset: 0, |
| requested_len: 0, |
| marker: PhantomData, |
| }); |
| } |
| |
| // Find a suitable offset and len, or returns if none available. |
| let (index, occupied_len, align_offset) = { |
| let (tentative_index, tentative_len, tentative_align_offset) = { |
| // Since the only place that touches `next_index` is this code, and since we |
| // own a mutex lock to the buffer, it means that `next_index` can't be accessed |
| // concurrently. |
| // TODO: ^ eventually should be put inside the mutex |
| let idx = current_buffer.next_index.load(Ordering::SeqCst); |
| |
| // Find the required alignment in bytes. |
| let align_bytes = cmp::max( |
| if self.usage.uniform_buffer { |
| self.device() |
| .physical_device() |
| .properties() |
| .min_uniform_buffer_offset_alignment |
| } else { |
| 1 |
| }, |
| if self.usage.storage_buffer { |
| self.device() |
| .physical_device() |
| .properties() |
| .min_storage_buffer_offset_alignment |
| } else { |
| 1 |
| }, |
| ); |
| |
| let tentative_align_offset = (align_bytes |
| - ((idx * mem::size_of::<T>() as DeviceSize) % align_bytes)) |
| % align_bytes; |
| let additional_len = if tentative_align_offset == 0 { |
| 0 |
| } else { |
| 1 + (tentative_align_offset - 1) / mem::size_of::<T>() as DeviceSize |
| }; |
| |
| (idx, requested_len + additional_len, tentative_align_offset) |
| }; |
| |
| // Find out whether any chunk in use overlaps this range. |
| if tentative_index + tentative_len <= current_buffer.capacity |
| && !chunks_in_use.iter().any(|c| { |
| (c.index >= tentative_index && c.index < tentative_index + tentative_len) |
| || (c.index <= tentative_index && c.index + c.len > tentative_index) |
| }) |
| { |
| (tentative_index, tentative_len, tentative_align_offset) |
| } else { |
| // Impossible to allocate at `tentative_index`. Let's try 0 instead. |
| if requested_len <= current_buffer.capacity |
| && !chunks_in_use.iter().any(|c| c.index < requested_len) |
| { |
| (0, requested_len, 0) |
| } else { |
| // Buffer is full. Return. |
| return Err(data); |
| } |
| } |
| }; |
| |
| // Write `data` in the memory. |
| unsafe { |
| let mem_off = current_buffer.memory.offset(); |
| let range_start = index * mem::size_of::<T>() as DeviceSize + align_offset + mem_off; |
| let range_end = (index + requested_len) * mem::size_of::<T>() as DeviceSize |
| + align_offset |
| + mem_off; |
| let mut mapping = current_buffer |
| .memory |
| .mapped_memory() |
| .unwrap() |
| .read_write::<[T]>(range_start..range_end); |
| |
| let mut written = 0; |
| for (o, i) in mapping.iter_mut().zip(data) { |
| ptr::write(o, i); |
| written += 1; |
| } |
| assert_eq!( |
| written, requested_len, |
| "Iterator passed to CpuBufferPool::chunk has a mismatch between reported \ |
| length and actual number of elements" |
| ); |
| } |
| |
| // Mark the chunk as in use. |
| current_buffer |
| .next_index |
| .store(index + occupied_len, Ordering::SeqCst); |
| chunks_in_use.push(ActualBufferChunk { |
| index, |
| len: occupied_len, |
| num_cpu_accesses: 1, |
| num_gpu_accesses: 0, |
| }); |
| |
| Ok(CpuBufferPoolChunk { |
| // TODO: remove .clone() once non-lexical borrows land |
| buffer: current_buffer.clone(), |
| index: index, |
| align_offset, |
| requested_len, |
| marker: PhantomData, |
| }) |
| } |
| } |
| |
| // Can't automatically derive `Clone`, otherwise the compiler adds a `T: Clone` requirement. |
| impl<T, A> Clone for CpuBufferPool<T, A> |
| where |
| A: MemoryPool + Clone, |
| { |
| fn clone(&self) -> Self { |
| let buf = self.current_buffer.lock().unwrap(); |
| |
| CpuBufferPool { |
| device: self.device.clone(), |
| pool: self.pool.clone(), |
| current_buffer: Mutex::new(buf.clone()), |
| usage: self.usage.clone(), |
| marker: PhantomData, |
| } |
| } |
| } |
| |
| unsafe impl<T, A> DeviceOwned for CpuBufferPool<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn device(&self) -> &Arc<Device> { |
| &self.device |
| } |
| } |
| |
| impl<T, A> Clone for CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| fn clone(&self) -> CpuBufferPoolChunk<T, A> { |
| let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); |
| let chunk = chunks_in_use_lock |
| .iter_mut() |
| .find(|c| c.index == self.index) |
| .unwrap(); |
| |
| debug_assert!(chunk.num_cpu_accesses >= 1); |
| chunk.num_cpu_accesses = chunk |
| .num_cpu_accesses |
| .checked_add(1) |
| .expect("Overflow in CPU accesses"); |
| |
| CpuBufferPoolChunk { |
| buffer: self.buffer.clone(), |
| index: self.index, |
| align_offset: self.align_offset, |
| requested_len: self.requested_len, |
| marker: PhantomData, |
| } |
| } |
| } |
| |
| unsafe impl<T, A> BufferAccess for CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn inner(&self) -> BufferInner { |
| BufferInner { |
| buffer: &self.buffer.inner, |
| offset: self.index * mem::size_of::<T>() as DeviceSize + self.align_offset, |
| } |
| } |
| |
| #[inline] |
| fn size(&self) -> DeviceSize { |
| self.requested_len * mem::size_of::<T>() as DeviceSize |
| } |
| |
| #[inline] |
| fn conflict_key(&self) -> (u64, u64) { |
| ( |
| self.buffer.inner.key(), |
| // ensure the special cased empty buffers don't collide with a regular buffer starting at 0 |
| if self.requested_len == 0 { |
| u64::MAX |
| } else { |
| self.index |
| }, |
| ) |
| } |
| |
| #[inline] |
| fn try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError> { |
| if self.requested_len == 0 { |
| return Ok(()); |
| } |
| |
| let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); |
| let chunk = chunks_in_use_lock |
| .iter_mut() |
| .find(|c| c.index == self.index) |
| .unwrap(); |
| |
| if chunk.num_gpu_accesses != 0 { |
| return Err(AccessError::AlreadyInUse); |
| } |
| |
| chunk.num_gpu_accesses = 1; |
| Ok(()) |
| } |
| |
| #[inline] |
| unsafe fn increase_gpu_lock(&self) { |
| if self.requested_len == 0 { |
| return; |
| } |
| |
| let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); |
| let chunk = chunks_in_use_lock |
| .iter_mut() |
| .find(|c| c.index == self.index) |
| .unwrap(); |
| |
| debug_assert!(chunk.num_gpu_accesses >= 1); |
| chunk.num_gpu_accesses = chunk |
| .num_gpu_accesses |
| .checked_add(1) |
| .expect("Overflow in GPU usages"); |
| } |
| |
| #[inline] |
| unsafe fn unlock(&self) { |
| if self.requested_len == 0 { |
| return; |
| } |
| |
| let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); |
| let chunk = chunks_in_use_lock |
| .iter_mut() |
| .find(|c| c.index == self.index) |
| .unwrap(); |
| |
| debug_assert!(chunk.num_gpu_accesses >= 1); |
| chunk.num_gpu_accesses -= 1; |
| } |
| } |
| |
| impl<T, A> Drop for CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| fn drop(&mut self) { |
| // If `requested_len` is 0, then no entry was added in the chunks. |
| if self.requested_len == 0 { |
| return; |
| } |
| |
| let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); |
| let chunk_num = chunks_in_use_lock |
| .iter_mut() |
| .position(|c| c.index == self.index) |
| .unwrap(); |
| |
| if chunks_in_use_lock[chunk_num].num_cpu_accesses >= 2 { |
| chunks_in_use_lock[chunk_num].num_cpu_accesses -= 1; |
| } else { |
| debug_assert_eq!(chunks_in_use_lock[chunk_num].num_gpu_accesses, 0); |
| chunks_in_use_lock.remove(chunk_num); |
| } |
| } |
| } |
| |
| unsafe impl<T, A> TypedBufferAccess for CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| type Content = [T]; |
| } |
| |
| unsafe impl<T, A> DeviceOwned for CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn device(&self) -> &Arc<Device> { |
| self.buffer.inner.device() |
| } |
| } |
| |
| impl<T, A> PartialEq for CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn eq(&self, other: &Self) -> bool { |
| self.inner() == other.inner() && self.size() == other.size() |
| } |
| } |
| |
| impl<T, A> Eq for CpuBufferPoolChunk<T, A> where A: MemoryPool {} |
| |
| impl<T, A> Hash for CpuBufferPoolChunk<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn hash<H: Hasher>(&self, state: &mut H) { |
| self.inner().hash(state); |
| self.size().hash(state); |
| } |
| } |
| |
| impl<T, A> Clone for CpuBufferPoolSubbuffer<T, A> |
| where |
| A: MemoryPool, |
| { |
| fn clone(&self) -> CpuBufferPoolSubbuffer<T, A> { |
| CpuBufferPoolSubbuffer { |
| chunk: self.chunk.clone(), |
| } |
| } |
| } |
| |
| unsafe impl<T, A> BufferAccess for CpuBufferPoolSubbuffer<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn inner(&self) -> BufferInner { |
| self.chunk.inner() |
| } |
| |
| #[inline] |
| fn size(&self) -> DeviceSize { |
| self.chunk.size() |
| } |
| |
| #[inline] |
| fn conflict_key(&self) -> (u64, u64) { |
| self.chunk.conflict_key() |
| } |
| |
| #[inline] |
| fn try_gpu_lock(&self, e: bool, q: &Queue) -> Result<(), AccessError> { |
| self.chunk.try_gpu_lock(e, q) |
| } |
| |
| #[inline] |
| unsafe fn increase_gpu_lock(&self) { |
| self.chunk.increase_gpu_lock() |
| } |
| |
| #[inline] |
| unsafe fn unlock(&self) { |
| self.chunk.unlock() |
| } |
| } |
| |
| unsafe impl<T, A> TypedBufferAccess for CpuBufferPoolSubbuffer<T, A> |
| where |
| A: MemoryPool, |
| { |
| type Content = T; |
| } |
| |
| unsafe impl<T, A> DeviceOwned for CpuBufferPoolSubbuffer<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn device(&self) -> &Arc<Device> { |
| self.chunk.buffer.inner.device() |
| } |
| } |
| |
| impl<T, A> PartialEq for CpuBufferPoolSubbuffer<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn eq(&self, other: &Self) -> bool { |
| self.inner() == other.inner() && self.size() == other.size() |
| } |
| } |
| |
| impl<T, A> Eq for CpuBufferPoolSubbuffer<T, A> where A: MemoryPool {} |
| |
| impl<T, A> Hash for CpuBufferPoolSubbuffer<T, A> |
| where |
| A: MemoryPool, |
| { |
| #[inline] |
| fn hash<H: Hasher>(&self, state: &mut H) { |
| self.inner().hash(state); |
| self.size().hash(state); |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use crate::buffer::CpuBufferPool; |
| use std::mem; |
| |
| #[test] |
| fn basic_create() { |
| let (device, _) = gfx_dev_and_queue!(); |
| let _ = CpuBufferPool::<u8>::upload(device); |
| } |
| |
| #[test] |
| fn reserve() { |
| let (device, _) = gfx_dev_and_queue!(); |
| |
| let pool = CpuBufferPool::<u8>::upload(device); |
| assert_eq!(pool.capacity(), 0); |
| |
| pool.reserve(83).unwrap(); |
| assert_eq!(pool.capacity(), 83); |
| } |
| |
| #[test] |
| fn capacity_increase() { |
| let (device, _) = gfx_dev_and_queue!(); |
| |
| let pool = CpuBufferPool::upload(device); |
| assert_eq!(pool.capacity(), 0); |
| |
| pool.next(12).unwrap(); |
| let first_cap = pool.capacity(); |
| assert!(first_cap >= 1); |
| |
| for _ in 0..first_cap + 5 { |
| mem::forget(pool.next(12).unwrap()); |
| } |
| |
| assert!(pool.capacity() > first_cap); |
| } |
| |
| #[test] |
| fn reuse_subbuffers() { |
| let (device, _) = gfx_dev_and_queue!(); |
| |
| let pool = CpuBufferPool::upload(device); |
| assert_eq!(pool.capacity(), 0); |
| |
| let mut capacity = None; |
| for _ in 0..64 { |
| pool.next(12).unwrap(); |
| |
| let new_cap = pool.capacity(); |
| assert!(new_cap >= 1); |
| match capacity { |
| None => capacity = Some(new_cap), |
| Some(c) => assert_eq!(c, new_cap), |
| } |
| } |
| } |
| |
| #[test] |
| fn chunk_loopback() { |
| let (device, _) = gfx_dev_and_queue!(); |
| |
| let pool = CpuBufferPool::<u8>::upload(device); |
| pool.reserve(5).unwrap(); |
| |
| let a = pool.chunk(vec![0, 0]).unwrap(); |
| let b = pool.chunk(vec![0, 0]).unwrap(); |
| assert_eq!(b.index, 2); |
| drop(a); |
| |
| let c = pool.chunk(vec![0, 0]).unwrap(); |
| assert_eq!(c.index, 0); |
| |
| assert_eq!(pool.capacity(), 5); |
| } |
| |
| #[test] |
| fn chunk_0_elems_doesnt_pollute() { |
| let (device, _) = gfx_dev_and_queue!(); |
| |
| let pool = CpuBufferPool::<u8>::upload(device); |
| |
| let _ = pool.chunk(vec![]).unwrap(); |
| let _ = pool.chunk(vec![0, 0]).unwrap(); |
| } |
| } |