Struct CudaBackend

Source

pub struct CudaBackend;

Expand description

Re-export CUDA backend (requires cuda feature + NVIDIA CUDA Toolkit). The CUDA GPU backend. This is a zero-sized marker type.

Trait Implementations§

Source §

impl Backend for CudaBackend

Source §

type Device = CudaDevice

The device type for this backend.

Source §

type Storage = CudaStorage

The storage type for this backend.

Source §

fn zeros( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

Allocate storage filled with zeros.

Source §

fn ones( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

Allocate storage filled with ones.

Source §

fn full( shape: &Shape, val: f64, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

Allocate storage filled with a constant value.

Source §

fn from_f64_slice( data: &[f64], dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

Create storage from a flat f64 slice, converting to the target dtype.

Source §

fn rand_uniform( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

Create storage with random uniform values in [0, 1).

Source §

fn rand_normal( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

Create storage with random normal values (mean=0, std=1).

Source §

fn binary_op( op: BinaryOp, lhs: &CudaStorage, lhs_layout: &Layout, rhs: &CudaStorage, rhs_layout: &Layout, ) -> Result<CudaStorage, Error>

Apply a binary op element-wise: result[i] = op(lhs[i], rhs[i]). The layouts handle broadcasting and non-contiguous access.

Source §

fn unary_op( op: UnaryOp, input: &CudaStorage, layout: &Layout, ) -> Result<CudaStorage, Error>

Apply a unary op element-wise: result[i] = op(input[i]).

Source §

fn reduce_op( op: ReduceOp, input: &CudaStorage, layout: &Layout, dims: &[usize], _keep_dim: bool, ) -> Result<CudaStorage, Error>

Reduce along specific dimensions. If dims is empty, reduce over all elements.

Source §

fn matmul( lhs: &CudaStorage, lhs_layout: &Layout, rhs: &CudaStorage, rhs_layout: &Layout, ) -> Result<CudaStorage, Error>

General matrix multiply: C = A @ B. Supports batched matmul for tensors with rank > 2.

Source §

fn to_contiguous( input: &CudaStorage, layout: &Layout, ) -> Result<CudaStorage, Error>

Make a contiguous copy of the storage following the given layout. If the layout is already contiguous, this may just clone the storage.

Source §

fn to_f64_vec(input: &CudaStorage, layout: &Layout) -> Result<Vec<f64>, Error>

Copy data from this storage to a Vec on the host (for inspection).

Source §

fn cmp_op( op: CmpOp, lhs: &CudaStorage, lhs_layout: &Layout, rhs: &CudaStorage, rhs_layout: &Layout, ) -> Result<CudaStorage, Error>

Element-wise comparison, returns a u8 storage (0 or 1).

Source §

fn affine( input: &CudaStorage, layout: &Layout, mul: f64, add: f64, ) -> Result<CudaStorage, Error>

Affine transform: result = input * mul + add. Used for normalization and other fused operations.

Source §

fn index_select( input: &CudaStorage, input_layout: &Layout, indices: &CudaStorage, indices_layout: &Layout, dim: usize, ) -> Result<CudaStorage, Error>

Gather elements along a dimension using index tensor.

Source §

fn powf( input: &CudaStorage, layout: &Layout, exponent: f64, ) -> Result<CudaStorage, Error>

Element-wise power: result[i] = input[i] ^ exponent.

Source §

fn clamp( input: &CudaStorage, layout: &Layout, min: f64, max: f64, ) -> Result<CudaStorage, Error>

Element-wise clamp: result[i] = clamp(input[i], min, max).

Source §

fn where_cond( mask: &CudaStorage, mask_layout: &Layout, on_true: &CudaStorage, on_true_layout: &Layout, on_false: &CudaStorage, on_false_layout: &Layout, ) -> Result<CudaStorage, Error>

Element-wise conditional: result[i] = if mask[i] != 0 { on_true[i] } else { on_false[i] }.

Source §

fn gather( input: &CudaStorage, input_layout: &Layout, index: &CudaStorage, index_layout: &Layout, dim: usize, ) -> Result<CudaStorage, Error>

Gather elements along dim using index tensor. Read more

Source §

fn cat( inputs: &[(&CudaStorage, &Layout)], out_shape: &Shape, dim: usize, ) -> Result<CudaStorage, Error>

Concatenate multiple storages along dim into a single contiguous storage. Each entry is (storage, layout) so non-contiguous inputs are handled correctly. out_shape is the pre-validated output shape.

Source §

fn cast( input: &CudaStorage, layout: &Layout, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

Cast storage to a different dtype on-device (no host round-trip). Read more

Source §

impl Clone for CudaBackend

Source §

fn clone(&self) -> CudaBackend

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for CudaBackend

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl UnwindSafe for CudaBackend

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

§

impl<T> Pointable for T

§

const ALIGN: usize

The alignment of pointer.

§

type Init = T

The type for initializers.

§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

CudaBackend

Struct CudaBackend Copy item path

Trait Implementations§

impl Backend for CudaBackend

type Device = CudaDevice

type Storage = CudaStorage

fn zeros( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

fn ones( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

fn full( shape: &Shape, val: f64, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

fn from_f64_slice( data: &[f64], dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

fn rand_uniform( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

fn rand_normal( shape: &Shape, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

fn binary_op( op: BinaryOp, lhs: &CudaStorage, lhs_layout: &Layout, rhs: &CudaStorage, rhs_layout: &Layout, ) -> Result<CudaStorage, Error>

fn unary_op( op: UnaryOp, input: &CudaStorage, layout: &Layout, ) -> Result<CudaStorage, Error>

fn reduce_op( op: ReduceOp, input: &CudaStorage, layout: &Layout, dims: &[usize], _keep_dim: bool, ) -> Result<CudaStorage, Error>

fn matmul( lhs: &CudaStorage, lhs_layout: &Layout, rhs: &CudaStorage, rhs_layout: &Layout, ) -> Result<CudaStorage, Error>

fn to_contiguous( input: &CudaStorage, layout: &Layout, ) -> Result<CudaStorage, Error>

fn to_f64_vec(input: &CudaStorage, layout: &Layout) -> Result<Vec<f64>, Error>

fn cmp_op( op: CmpOp, lhs: &CudaStorage, lhs_layout: &Layout, rhs: &CudaStorage, rhs_layout: &Layout, ) -> Result<CudaStorage, Error>

fn affine( input: &CudaStorage, layout: &Layout, mul: f64, add: f64, ) -> Result<CudaStorage, Error>

fn index_select( input: &CudaStorage, input_layout: &Layout, indices: &CudaStorage, indices_layout: &Layout, dim: usize, ) -> Result<CudaStorage, Error>

fn powf( input: &CudaStorage, layout: &Layout, exponent: f64, ) -> Result<CudaStorage, Error>

fn clamp( input: &CudaStorage, layout: &Layout, min: f64, max: f64, ) -> Result<CudaStorage, Error>

fn where_cond( mask: &CudaStorage, mask_layout: &Layout, on_true: &CudaStorage, on_true_layout: &Layout, on_false: &CudaStorage, on_false_layout: &Layout, ) -> Result<CudaStorage, Error>

fn gather( input: &CudaStorage, input_layout: &Layout, index: &CudaStorage, index_layout: &Layout, dim: usize, ) -> Result<CudaStorage, Error>

fn cat( inputs: &[(&CudaStorage, &Layout)], out_shape: &Shape, dim: usize, ) -> Result<CudaStorage, Error>

fn cast( input: &CudaStorage, layout: &Layout, dtype: DType, device: &CudaDevice, ) -> Result<CudaStorage, Error>

impl Clone for CudaBackend

fn clone(&self) -> CudaBackend

fn clone_from(&mut self, source: &Self)

impl Debug for CudaBackend

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Auto Trait Implementations§

impl Freeze for CudaBackend

impl RefUnwindSafe for CudaBackend

impl Send for CudaBackend

impl Sync for CudaBackend

impl Unpin for CudaBackend

impl UnwindSafe for CudaBackend

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct CudaBackend

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,