pub struct CudaMemPool { /* private fields */ }Expand description
A CUDA memory caching allocator.
Maintains per-dtype free lists keyed by element count. Reuses buffers
when possible, falling back to cudaMalloc on cache miss.
Implementations§
Source§impl CudaMemPool
impl CudaMemPool
Sourcepub fn alloc_f32(
&self,
dev: &Arc<CudaDevice>,
n: usize,
) -> Result<CudaSlice<f32>, DriverError>
pub fn alloc_f32( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<f32>, DriverError>
Allocate n elements of type f32, reusing a cached buffer if available.
The returned buffer content is undefined (not zeroed).
Sourcepub fn alloc_zeros_f32(
&self,
dev: &Arc<CudaDevice>,
n: usize,
) -> Result<CudaSlice<f32>, DriverError>
pub fn alloc_zeros_f32( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<f32>, DriverError>
Allocate n elements of f32 and zero them.
Sourcepub fn alloc_f64(
&self,
dev: &Arc<CudaDevice>,
n: usize,
) -> Result<CudaSlice<f64>, DriverError>
pub fn alloc_f64( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<f64>, DriverError>
Allocate n elements of type f64.
pub fn alloc_zeros_f64( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<f64>, DriverError>
Sourcepub fn alloc_u16(
&self,
dev: &Arc<CudaDevice>,
n: usize,
) -> Result<CudaSlice<u16>, DriverError>
pub fn alloc_u16( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<u16>, DriverError>
Allocate n elements of type u16 (used for F16/BF16 storage).
pub fn alloc_zeros_u16( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<u16>, DriverError>
Sourcepub fn alloc_u8(
&self,
dev: &Arc<CudaDevice>,
n: usize,
) -> Result<CudaSlice<u8>, DriverError>
pub fn alloc_u8( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<u8>, DriverError>
Allocate n elements of type u8.
pub fn alloc_zeros_u8( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<u8>, DriverError>
Sourcepub fn alloc_u32(
&self,
dev: &Arc<CudaDevice>,
n: usize,
) -> Result<CudaSlice<u32>, DriverError>
pub fn alloc_u32( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<u32>, DriverError>
Allocate n elements of type u32.
pub fn alloc_zeros_u32( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<u32>, DriverError>
Sourcepub fn alloc_i64(
&self,
dev: &Arc<CudaDevice>,
n: usize,
) -> Result<CudaSlice<i64>, DriverError>
pub fn alloc_i64( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<i64>, DriverError>
Allocate n elements of type i64.
pub fn alloc_zeros_i64( &self, dev: &Arc<CudaDevice>, n: usize, ) -> Result<CudaSlice<i64>, DriverError>
pub fn reclaim_f32(&self, s: CudaSlice<f32>)
pub fn reclaim_f64(&self, s: CudaSlice<f64>)
pub fn reclaim_u16(&self, s: CudaSlice<u16>)
pub fn reclaim_u8(&self, s: CudaSlice<u8>)
pub fn reclaim_u32(&self, s: CudaSlice<u32>)
pub fn reclaim_i64(&self, s: CudaSlice<i64>)
Sourcepub fn reclaim_storage(&self, storage: CudaStorage)
pub fn reclaim_storage(&self, storage: CudaStorage)
Reclaim all buffers inside a CudaStorage, returning them to the pool.
Sourcepub fn empty_cache(&self)
pub fn empty_cache(&self)
Release all cached buffers back to the CUDA driver. This actually frees GPU memory.
Sourcepub fn reset_stats(&self)
pub fn reset_stats(&self)
Reset hit/miss counters.
Trait Implementations§
Auto Trait Implementations§
impl !Freeze for CudaMemPool
impl RefUnwindSafe for CudaMemPool
impl Unpin for CudaMemPool
impl UnwindSafe for CudaMemPool
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more