33 #include <immintrin.h>
54 nAllocatedElements(0),
70 #pragma omp parallel for schedule (static)
86 cudaMemcpyHostToDevice));
99 cudaMemcpyDeviceToHost));
122 throw std::bad_alloc();
126 checkCudaErrors(cudaHostRegister(hostData, sizeInBytes, cudaHostRegisterPortable));
130 throw std::bad_alloc();
Abstract base class. The common ancestor defining the common interface and allowing derived classes t...
size_t nAllocatedElements
Total number of allocated elements (the array size).
#define checkCudaErrors(val)
Macro checking cuda errors and printing the file name and line. Inspired by CUDA common checking rout...
virtual void CopyFromDevice()
Copy data from GPU -> CPU (Device -> Host).
size_t * deviceData
Raw GPU matrix data.
virtual void AllocateMemory()
Memory allocation (both on CPU and GPU)
The header file containing a class responsible for printing out info and error messages (stdout...
virtual void ZeroMatrix()
Zero all elements of the matrix (NUMA first touch).
The header file containing the structure with 3D dimension sizes.
size_t * hostData
Raw CPU matrix data.
virtual void CopyToDevice()
Copy data from CPU -> GPU (Host -> Device).
The header file containing the base class for 64b-wide integers implemented as size_t datatype...
const int DATA_ALIGNMENT
memory alignment for SSE, SSE2, SSE3, SSE4 (16B)
TBaseIndexMatrix()
Default constructor.
virtual void FreeMemory()
Memory deallocation (both on CPU and GPU)