class nbla::Cuda

class Cuda : public BackendBase

Singleton class for storing some handles or configs for CUDA Computation.

Public Functions

cublasHandle_t cublas_handle(int device = -1)

Get cuBLAS handle of a specified device.

cusolverDnHandle_t cusolverdn_handle(int device = -1)

Get cuSOLVER Dn handle of a specified device.

cutensorHandle_t cutensor_handle(int device = -1)

Get cuTENSOR handle of a specified device.

bool cutensor_available(int device = -1)

Get cuTENSOR availability.

std::shared_ptr<cudaEvent_t> cuda_event(unsigned int flags, int device = -1)

Get or create cuda event.

curandGenerator_t &curand_generator()

Get cuRAND global generator.

vector<string> array_classes() const

Available array class list used in CUDA Function implementations.

void _set_array_classes(const vector<string> &a)

Set array class list.

Note

Dangerous to call. End users shouldn’t call.

void register_array_class(const string &name)

Register array class to available list by name.

shared_ptr<Allocator> caching_allocator()

Get a caching allocator.

shared_ptr<Allocator> naive_allocator()

Get a no-cache allocator.

shared_ptr<Allocator> unified_allocator()

Get a caching unified-memory allocator.

shared_ptr<Allocator> pinned_allocator()

Get a caching pinned-host-memory allocator.

shared_ptr<Allocator> virtual_caching_allocator()

Get a caching virtual-memory allocator.

void free_unused_host_caches()

Free all unused host memory caches.

void device_synchronize(const string &device)

Synchronize host to device.

void default_stream_synchronize(const string &device)

Synchronize host to default stream of device.

shared_ptr<cudaStream_t> get_stream(unsigned int flag, CudaStreamId streamId, int device = -1)

Get auxilliary stream.

void create_lms_streams(int device = -1)

Create non blockuing streams for data transfer.

void set_vma_chunk_size(size_t size)

Change a chunk size of physical memory used in virtual memory allocator.

shared_ptr<cudaDeviceProp> get_device_properties(int device = -1)

get cuda device property

Public Members

cudaStream_t stream_HtoD = 0

Non blockuing streams for data transfer.