35 #include <immintrin.h>
68 rootObjectName(rootObjectName),
69 sourceMatrix(sourceMatrix),
131 throw std::bad_alloc();
163 hostBuffer[i] = -1 * std::numeric_limits<float>::max();
173 hostBuffer[i] = std::numeric_limits<float>::max();
182 cudaHostRegisterPortable | cudaHostRegisterMapped));
196 throw std::bad_alloc();
237 cudaMemcpyHostToDevice));
250 cudaMemcpyDeviceToHost));
size_t GetStartTimeIndex() const
Get start time index for sensor recording.
TBaseOutputHDF5Stream()
Default constructor not allowed.
The header file of the class saving RealMatrix data into the output HDF5 file.
virtual void CopyDataFromDevice()
Copy data deviceBuffer -> hostBuffer.
calculate root mean square
static TParameters & GetInstance()
Get instance of the singleton class.
void PostProcessingRMS(float *samplingBuffer, const float scalingCoeff, const size_t nSamples)
Kernel to calculate post-processing for RMS.
const TReduceOperator reduceOp
Reduce operator.
The header file containing the parameters of the simulation.
The header file of cuda kernels used for data sampling (output streams).
const std::string TMatrixName
Datatype for matrix names.
#define checkCudaErrors(val)
Macro checking cuda errors and printing the file name and line. Inspired by CUDA common checking rout...
virtual void PostProcess()
Apply post-processing on the buffer and flush it to the file.
virtual void AllocateMemory()
A generic function to allocate memory - not used in the base class.
The header file containing a class responsible for printing out info and error messages (stdout...
TReduceOperator
How to aggregate data.
float * deviceBuffer
Temporary buffer on the GPU side - only for aggregated quantities.
store actual data (time series)
virtual void FreeMemory()
A generic function to free memory - not used in the base class.
The class for real matrices.
float * hostBuffer
Temporary buffer for store on the GPU side.
virtual void CopyDataToDevice()
Copy data hostBuffer-> deviceBuffer.
size_t bufferSize
Buffer size.
const int DATA_ALIGNMENT
memory alignment for SSE, SSE2, SSE3, SSE4 (16B)
size_t Get_nt() const
Get Nt value.
Class wrapping the HDF5 routines.