pub struct CudaDevice { /* private fields */ }Expand description
A CUDA device handle. Contains the cudarc device and a cuBLAS handle for matrix multiplication. Clonable (uses Arc internally).
Implementations§
Source§impl CudaDevice
impl CudaDevice
Sourcepub fn new(ordinal: usize) -> Result<Self>
pub fn new(ordinal: usize) -> Result<Self>
Create a new CUDA device for the given GPU ordinal (0, 1, …). Compiles all Shrew CUDA kernels on first creation.
Sourcepub fn pool(&self) -> &CudaMemPool
pub fn pool(&self) -> &CudaMemPool
Get the memory pool.
Sourcepub fn empty_cache(&self)
pub fn empty_cache(&self)
Release all cached GPU memory back to the CUDA driver.
Sourcepub fn pool_stats(&self) -> PoolStats
pub fn pool_stats(&self) -> PoolStats
Return pool statistics (cached bytes, hits, misses, etc.).
Sourcepub fn reclaim(&self, storage: CudaStorage)
pub fn reclaim(&self, storage: CudaStorage)
Reclaim a CudaStorage buffer into the pool for future reuse.
Sourcepub fn pool_alloc_f32(&self, n: usize) -> Result<CudaSlice<f32>>
pub fn pool_alloc_f32(&self, n: usize) -> Result<CudaSlice<f32>>
Allocate n elements from the pool (content undefined).
pub fn pool_alloc_f64(&self, n: usize) -> Result<CudaSlice<f64>>
pub fn pool_alloc_u16(&self, n: usize) -> Result<CudaSlice<u16>>
pub fn pool_alloc_u8(&self, n: usize) -> Result<CudaSlice<u8>>
pub fn pool_alloc_u32(&self, n: usize) -> Result<CudaSlice<u32>>
pub fn pool_alloc_i64(&self, n: usize) -> Result<CudaSlice<i64>>
Sourcepub fn pool_alloc_zeros_f32(&self, n: usize) -> Result<CudaSlice<f32>>
pub fn pool_alloc_zeros_f32(&self, n: usize) -> Result<CudaSlice<f32>>
Allocate n elements from the pool, zeroed.
pub fn pool_alloc_zeros_f64(&self, n: usize) -> Result<CudaSlice<f64>>
pub fn pool_alloc_zeros_u16(&self, n: usize) -> Result<CudaSlice<u16>>
pub fn pool_alloc_zeros_u8(&self, n: usize) -> Result<CudaSlice<u8>>
pub fn pool_alloc_zeros_u32(&self, n: usize) -> Result<CudaSlice<u32>>
pub fn pool_alloc_zeros_i64(&self, n: usize) -> Result<CudaSlice<i64>>
Trait Implementations§
Source§impl BackendDevice for CudaDevice
impl BackendDevice for CudaDevice
Source§impl Clone for CudaDevice
impl Clone for CudaDevice
Source§impl Debug for CudaDevice
impl Debug for CudaDevice
impl Send for CudaDevice
impl Sync for CudaDevice
Auto Trait Implementations§
impl Freeze for CudaDevice
impl RefUnwindSafe for CudaDevice
impl Unpin for CudaDevice
impl UnwindSafe for CudaDevice
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more