class nbla::Communicator

class Communicator

Communicator interface which is extended to implement a new Communicator class.

Communicator exchanges gradients.

Subclassed by nbla::DataParallelCommunicator< T >, nbla::MultiProcessDataParallelCommunicator< T >

Public Functions

explicit Communicator(const Context &ctx)

Constructor takes at least context and parameters.

Parameters:

ctxContext

virtual ~Communicator() = 0

Name of Communicator class, usually class name.

void add_context_and_parameters(const pair<Context, vector<pair<string, VariablePtr>>> &ctx_params)

Add context and parameters.

Parameters:

pair – pair<Context, vector of pair<name, VariablePtr>

void remove_context_parameters(const pair<Context, vector<string>> &ctx_keys)

Remove previously registered parameters.

void clear_context_parameters()

Clear all parameters.

virtual void init()

Initall or initrank, depending multi-threads or multi-processes.

This function MUST be called after all parameters communicated are added by add_context_and_parameters method.

virtual void barrier()

Synchronize all processes in the specified group.

virtual void abort()

Abort all processes in the specified group.

virtual string new_group(pair<string, vector<int>> name_ranks_pair)

Create group.

virtual unordered_map<string, vector<int>> list_groups()

List groups.

virtual vector<int> find_group(const string &group)

Find groups.

void check_array_class(Context ctx, VariablePtr vp)

Check difference of the array classes.

Check difference between the array class of the context and that of the synced_array. If it differs, the error occurs.

virtual void reduce(const vector<NdArrayPtr> &ndarray_list, int dst, bool division = false, bool inplace = false, const string &group = "world")

reduce over parameters added.

Parameters:
  • ndarray_list – Vector of NdArrayPtr.

  • dst – Destination rank.

  • division – Divide the reduced value.

  • inplace – Pack the arrays into one large array if false.

  • group – Name of a group.

virtual void reduce(NdArrayPtr ndarray, int dst, bool division = false, bool inplace = false, const string &group = "world")

reduce over parameters added.

Parameters:
  • data – NdArrayPtr.

  • dst – Destination rank.

  • division – Divide the reduced value.

  • inplace – Pack the arrays into one large array if false.

  • group – Name of a group.

virtual void allreduce(bool division = false, bool inplace = false)

allreduce over parameters added.

Deprecated. Use all_reduce.

Currently, allreduce is applied to gradient regions.

Parameters:
  • division – Divide the reduced value.

  • inplace – Pack the arrays into one large array if false.

  • group – Name of a group.

virtual void all_reduce(const vector<NdArrayPtr> &ndarray_list, bool division = false, bool inplace = false, const string &group = "world")

all_reduce over parameters added.

Parameters:
  • ndarray_list – Vector of NdArrayPtr

  • division – Divide the reduced value.

  • inplace – Pack the arrays into one large array if false.

  • group – Name of a group.

virtual void all_reduce(NdArrayPtr ndarray, bool division = false, bool inplace = false, const string &group = "world")

all_reduce over parameters added.

Parameters:
  • data – NdArrayPtr

  • division – Divide the reduced value.

  • inplace – Pack the arrays into one large array if false.

  • group – Name of a group.

virtual CommunicatorBackwardCallbackPtr all_reduce_callback(const vector<NdArrayPtr> &ndarray_list, size_t pack_size, bool division = false, const string &group = "world", float scale_grad = 1.0, bool keep_dtype = false)

all_reduce over parameters added.

Parameters:
  • ndarray_list – Vector of NdArrayPtr

  • pack_size – The number of values contained in the packed data.

  • division – Divide the reduced value.

  • scale_grad – Scaling gradient before allreduce. See Python documentation for more details.

  • keep_dtype – Keep dtype of arrays unchanged. See Python documentation for more details.

virtual CommunicatorBackwardCallbackPtr all_reduce_callback(NdArrayPtr ndarray, size_t pack_size, bool division = false, const string &group = "world", float scale_grad = 1.0, bool keep_dtype = false)

all_reduce over parameters added.

Parameters:
  • ndarray – NdArrayPtr

  • pack_size – The number of values contained in the packed data.

  • division – Divide the reduced value.

  • scale_grad – Scaling gradient before allreduce. See Python documentation for more details.

  • keep_dtype – Keep dtype of arrays unchanged. See Python documentation for more details.

virtual void reduce_scatter(const vector<NdArrayPtr> &ndarray_list, NdArrayPtr ndarray, bool division = false, const string &group = "world")

reducescatter.

Parameters:
  • ndarray_list – Vector of NdArrayPtr

  • ndarray – NdArrayPtr

  • division – Divide the reduced value.

  • group – Name of a group.

virtual void bcast(const vector<NdArrayPtr> &ndarray_list, int src, bool inplace = false, const string &group = "world")

broadcast.

Parameters:
  • ndarray_list – Vector of NdArrayPtr.

  • src – Source rank.

  • inplace – Pack the arrays into one large array if false.

  • group – Name of a group.

virtual void bcast(NdArrayPtr ndarray, int src, bool inplace = false, const string &group = "world")

broadcast.

Parameters:
  • data – NdArrayPtr.

  • src – Source rank.

  • inplace – Pack the arrays into one large array if false.

  • group – Name of a group.

virtual void all_gather(NdArrayPtr ndarray, const vector<NdArrayPtr> &ndarray_list, const string &group = "world")

all_gather.

Parameters:
  • ndarray – data to be sent.

  • ndarray_list – Vector of NdArrayPtr to receive data.

  • group – Name of a group.

virtual void reduce_async(bool division = false)

reduce asynchronously.

Parameters:

division – Divide the reduced value.

virtual void allreduce_async(bool division = false, bool inplace = true)

reduce asynchronously.

Parameters:
  • division – Divide the reduced value.

  • inplace – Pack the arrays into one large array if false.

virtual void reducescatter_async(bool division = false)

reducescatter asynchronously.

Parameters:

division – Divide the reduced value.

virtual void bcast_async()

broadcast asynchronously.

virtual void allgather_async()

allgather asynchronously.

vector<string> allowed_array_classes()

Get array classes that are allowed to be specified by Context.