class nbla::Cuda
-
class Cuda : public BackendBase
Singleton class for storing some handles or configs for CUDA Computation.
Public Functions
-
cublasHandle_t cublas_handle(int device = -1)
Get cuBLAS handle of a specified device.
-
cusolverDnHandle_t cusolverdn_handle(int device = -1)
Get cuSOLVER Dn handle of a specified device.
-
cutensorHandle_t cutensor_handle(int device = -1)
Get cuTENSOR handle of a specified device.
-
bool cutensor_available(int device = -1)
Get cuTENSOR availability.
-
std::shared_ptr<cudaEvent_t> cuda_event(unsigned int flags, int device = -1)
Get or create cuda event.
-
curandGenerator_t &curand_generator()
Get cuRAND global generator.
-
vector<string> array_classes() const
Available array class list used in CUDA Function implementations.
-
void _set_array_classes(const vector<string> &a)
Set array class list.
Note
Dangerous to call. End users shouldn’t call.
-
void register_array_class(const string &name)
Register array class to available list by name.
-
void free_unused_host_caches()
Free all unused host memory caches.
-
void device_synchronize(const string &device)
Synchronize host to device.
-
void default_stream_synchronize(const string &device)
Synchronize host to default stream of device.
-
shared_ptr<cudaStream_t> get_stream(unsigned int flag, CudaStreamId streamId, int device = -1)
Get auxilliary stream.
-
void create_lms_streams(int device = -1)
Create non blockuing streams for data transfer.
-
void set_vma_chunk_size(size_t size)
Change a chunk size of physical memory used in virtual memory allocator.
-
shared_ptr<cudaDeviceProp> get_device_properties(int device = -1)
get cuda device property
Public Members
-
cudaStream_t stream_HtoD = 0
Non blockuing streams for data transfer.
-
cublasHandle_t cublas_handle(int device = -1)