шаблоны — Как избежать репликации кода в следующем примере? C ++ / Cuda

Question

шаблоны — Как избежать репликации кода в следующем примере? C ++ / Cuda

РЕДАКТИРОВАТЬ: Этот код работает, но, похоже, есть много частей репликации кода, и я не могу найти способ обойти это.

В классе MatrixDevice я хочу вызывать функции ядра в kerne.cu.
Я уменьшил класс MatrixDevice, чтобы показать только эту концепцию, как я это делаю.

Из MatricDevice у меня есть некоторые функции, чтобы добавить MatrixDevice с другим MatrixDevice или числом, это должно работать для разных типов, в этом примере с float и double, это не должно быть проблемой с шаблонами, но я должен объявить функции перегрузки MatrixCudaOperations extern, потому что я не может включить файл .cu в файл .h / .cpp.

matrixdevice.h

extern void MatrixCudaOperations(const float* a, const float* b, float* result, size_t rows, size_t cols, EOperation operation);
extern void MatrixCudaOperations(const float* a, float b, float* result, size_t rows, size_t cols, EOperation operation);
extern void MatrixCudaOperations(const double* a, const double* b, double* result, size_t rows, size_t cols, EOperation operation);
extern void MatrixCudaOperations(const double* a, double b, double* result, size_t rows, size_t cols, EOperation operation);template<class T>
class MatrixDevice{

T* data;
size_t rows;
size_t cols;

MatrixDevice& Add(const MatrixDevice &other);
MatrixDevice& Add(T &other);
};

//Operations with MatrixDevice
//Add MatrixDevice to this
template<class T>
MatrixDevice& MatrixDevice::Add(const MatrixDevice &other){
MatrixCudaOperations(data, other.data, data, rows, cols, EOperation::ADD);
return *this;
}

//Add two MatrixDevice and return the result as new MatrixDevice
template<class T>
MatrixDevice Add(const MatrixDevice &a, const MatrixDevice &b){
MatrixDevice result(a);
result.Add(b);
return result;
}

//Add two MatrixDevice to result MatrixDevice
template<class T>
void Add(const MatrixDevice &a, const MatrixDevice &b, MatrixDevice &result){
MatrixCudaOperations(a.data, b.data, result.data, a.rows, a.cols, EOperation::ADD);
}//Operations with Number

//Add T number to this
template<class T>
MatrixDevice& MatrixDevice::Add(T &other){
MatrixCudaOperations(data, other, data, rows, cols, EOperation::ADD);
return *this;
}

//Add T number to MatrixDevice and return the result as new MatrixDevice
template<class T>
MatrixDevice Add(const MatrixDevice &a, T &b){
MatrixDevice result(a);
result.Add(b);
return result;
}

//Add T number with MatrixDevice to result MatrixDevice
template<class T>
void Add(const MatrixDevice &a, T &b, MatrixDevice &result){
MatrixCudaOperations(a.data, b, result.data, a.rows, a.cols, EOperation::ADD);
}

В ядре я объявляю функции перегрузки для MatrixCudaOpertions, и код в любой функции одинаков.
Я пробовал это с шаблонами, но это не сработало, если мне нужно extern объявление в классе MatrixDevice.

kernel.cu

template<class T> __global__
void d_Add(const T* a, const T* b, T* result){
//code
}

template<class T> __global__
void d_Add(const T* a, T b, T* result){
//code
}

void MatrixCudaOperations(const float* a, const float* b, float* result, size_t rows, size_t cols, EOperation operation){
dim3 blocksize(rows, cols);

switch(operation){
case ADD:
d_Add<<<1,blocksize>>>(a, b, result);
break;
//other cases, subtract, multiply...
}
}

void MatrixCudaOperations(const float* a, float b, float* result, size_t rows, size_t cols, EOperation operation){
dim3 blocksize(rows, cols);

switch(operation){
case ADD:
d_Add<<<1,blocksize>>>(a, b, result);
break;
//other cases, subtract, multiply...
}
}

void MatrixCudaOperations(const double* a, const double* b, double* result, size_t rows, size_t cols, EOperation operation){
dim3 blocksize(rows, cols);

switch(operation){
case ADD:
d_Add<<<1,blocksize>>>(a, b, result);
break;
//other cases, subtract, multiply...
}
}

void MatrixCudaOperations(const double* a, double b, double* result, size_t rows, size_t cols, EOperation operation){
dim3 blocksize(rows, cols);

switch(operation){
case ADD:
d_Add<<<1,blocksize>>>(a, b, result);
break;
//other cases, subtract, multiply...
}
}

0

c++cuda refactoring replication templates

Решение

Другие решения

Других решений пока нет …

Источник

Accepted Answer

Начиная сверху.

template<class T>
class MatrixDevice;

template<class T>
static T const& to_matrix_data( T const& t ) { return t; }
template<class T>
static T const* to_matrix_data( MatrixDevice<T> const& m ) { return m.data; }

template<class T, class Rhs>
void AddInto(MatrixDevice<T>& target, MatrixDevice<T> const& src, Rhs const& rhs) {
MatrixCudaOperations(src.data, to_matrix_data<T>(rhs), target.data, EOperation::ADD );
}

template<class T>
class MatrixDevice{
T* data;
size_t rows;
size_t cols;

template<class Rhs>
MatrixDevice& +=(const Rhs &other)& {
AddInto( *this, *this, other );
return *this;
}

template<class Rhs>
friend MatrixDevice operator+(MatrixDevice lhs, Rhs const& rhs) {
lhs += rhs;
return lhs;
}
};

Используя слово Add для 3-х разных операций это плохо. Один увеличивается на, другой — на добавление, последний — на.

Поэтому я написал бесплатную функцию шаблона AddInto, Затем на основе приращения и прибавления к нему.

Мое добавление стоит, по крайней мере, дополнительного перемещения по сравнению с вашим, и исходя из внутренней структуры вашей матрицы, это движение бесплатно.

1