34 #include <cuda_runtime.h>
91 template <TBaseOutputHDF5Stream::TReduceOperator reduceOp>
93 const float* sourceData,
94 const size_t* sensorData,
95 const size_t nSamples)
101 case TBaseOutputHDF5Stream::TReduceOperator::NONE:
103 samplingBuffer[i] = sourceData[sensorData[i]];
106 case TBaseOutputHDF5Stream::TReduceOperator::RMS:
108 samplingBuffer[i] += (sourceData[sensorData[i]] * sourceData[sensorData[i]]);
111 case TBaseOutputHDF5Stream::TReduceOperator::MAX:
113 samplingBuffer[i] = max(samplingBuffer[i], sourceData[sensorData[i]]);
116 case TBaseOutputHDF5Stream::TReduceOperator::MIN:
118 samplingBuffer[i] = min(samplingBuffer[i], sourceData[sensorData[i]]);
134 template<TBaseOutputHDF5Stream::TReduceOperator reduceOp>
136 const float* sourceData,
137 const size_t* sensorData,
138 const size_t nSamples)
140 CUDASampleIndex<reduceOp>
154 void OutputStreamsCUDAKernels::SampleIndex<TBaseOutputHDF5Stream::TReduceOperator::NONE>
155 (
float* samplingBuffer,
156 const float* sourceData,
157 const size_t* sensorData,
158 const size_t nSamples);
161 void OutputStreamsCUDAKernels::SampleIndex<TBaseOutputHDF5Stream::TReduceOperator::RMS>
162 (
float* samplingBuffer,
163 const float* sourceData,
164 const size_t* sensorData,
165 const size_t nSamples);
168 void OutputStreamsCUDAKernels::SampleIndex<TBaseOutputHDF5Stream::TReduceOperator::MAX>
169 (
float* samplingBuffer,
170 const float* sourceData,
171 const size_t* sensorData,
172 const size_t nSamples);
175 void OutputStreamsCUDAKernels::SampleIndex<TBaseOutputHDF5Stream::TReduceOperator::MIN>
176 (
float* samplingBuffer,
177 const float* sourceData,
178 const size_t* sensorData,
179 const size_t nSamples);
196 const dim3& topLeftCorner,
197 const dim3& bottomRightCorner,
198 const dim3& matrixSize)
202 dim3 cuboidSize(bottomRightCorner.x - topLeftCorner.x + 1,
203 bottomRightCorner.y - topLeftCorner.y + 1,
204 bottomRightCorner.z - topLeftCorner.z + 1);
207 size_t slabSize = cuboidSize.x * cuboidSize.y;
208 localPosition.z = cuboidIdx / slabSize;
209 localPosition.y = (cuboidIdx % slabSize) / cuboidSize.x;
210 localPosition.x = (cuboidIdx % slabSize) % cuboidSize.x;
213 dim3 globalPosition(localPosition);
214 globalPosition.z += topLeftCorner.z;
215 globalPosition.y += topLeftCorner.y;
216 globalPosition.x += topLeftCorner.x;
219 return (globalPosition.z * matrixSize.x * matrixSize.y +
220 globalPosition.y * matrixSize.x +
235 template <TBaseOutputHDF5Stream::TReduceOperator reduceOp>
237 const float* sourceData,
238 const dim3 topLeftCorner,
239 const dim3 bottomRightCorner,
240 const dim3 matrixSize,
241 const size_t nSamples)
249 case TBaseOutputHDF5Stream::TReduceOperator::NONE:
251 samplingBuffer[i] = sourceData[Position];
254 case TBaseOutputHDF5Stream::TReduceOperator::RMS:
256 samplingBuffer[i] += (sourceData[Position] * sourceData[Position]);
259 case TBaseOutputHDF5Stream::TReduceOperator::MAX:
261 samplingBuffer[i] = max(samplingBuffer[i], sourceData[Position]);
264 case TBaseOutputHDF5Stream::TReduceOperator::MIN:
266 samplingBuffer[i] = min(samplingBuffer[i], sourceData[Position]);
286 template<TBaseOutputHDF5Stream::TReduceOperator reduceOp>
288 const float* sourceData,
289 const dim3 topLeftCorner,
290 const dim3 bottomRightCorner,
291 const dim3 matrixSize,
292 const size_t nSamples)
294 CUDASampleCuboid<reduceOp>
310 void OutputStreamsCUDAKernels::SampleCuboid<TBaseOutputHDF5Stream::TReduceOperator::NONE>
311 (
float* samplingBuffer,
312 const float* sourceData,
313 const dim3 topLeftCorner,
314 const dim3 bottomRightCorner,
315 const dim3 matrixSize,
316 const size_t nSamples);
318 void OutputStreamsCUDAKernels::SampleCuboid<TBaseOutputHDF5Stream::TReduceOperator::RMS>
319 (
float* samplingBuffer,
320 const float* sourceData,
321 const dim3 topLeftCorner,
322 const dim3 bottomRightCorner,
323 const dim3 matrixSize,
324 const size_t nSamples);
326 void OutputStreamsCUDAKernels::SampleCuboid<TBaseOutputHDF5Stream::TReduceOperator::MAX>
327 (
float* samplingBuffer,
328 const float* sourceData,
329 const dim3 topLeftCorner,
330 const dim3 bottomRightCorner,
331 const dim3 matrixSize,
332 const size_t nSamples);
334 void OutputStreamsCUDAKernels::SampleCuboid<TBaseOutputHDF5Stream::TReduceOperator::MIN>
335 (
float* samplingBuffer,
336 const float* sourceData,
337 const dim3 topLeftCorner,
338 const dim3 bottomRightCorner,
339 const dim3 matrixSize,
340 const size_t nSamples);
354 template <TBaseOutputHDF5Stream::TReduceOperator reduceOp>
356 const float* sourceData,
357 const size_t nSamples)
363 case TBaseOutputHDF5Stream::TReduceOperator::RMS:
365 samplingBuffer[i] += (sourceData[i] * sourceData[i]);
368 case TBaseOutputHDF5Stream::TReduceOperator::MAX:
370 samplingBuffer[i] = max(samplingBuffer[i], sourceData[i]);
373 case TBaseOutputHDF5Stream::TReduceOperator::MIN:
375 samplingBuffer[i] = min(samplingBuffer[i], sourceData[i]);
391 template<TBaseOutputHDF5Stream::TReduceOperator reduceOp>
393 const float* sourceData,
394 const size_t nSamples)
396 CUDASampleAll<reduceOp>
409 void OutputStreamsCUDAKernels::SampleAll<TBaseOutputHDF5Stream::TReduceOperator::RMS>
410 (
float* samplingBuffer,
411 const float* sourceData,
412 const size_t nSamples);
414 void OutputStreamsCUDAKernels::SampleAll<TBaseOutputHDF5Stream::TReduceOperator::MAX>
415 (
float* samplingBuffer,
416 const float* sourceData,
417 const size_t nSamples);
419 void OutputStreamsCUDAKernels::SampleAll<TBaseOutputHDF5Stream::TReduceOperator::MIN>
420 (
float* samplingBuffer,
421 const float* sourceData,
422 const size_t nSamples);
437 const float scalingCoeff,
438 const size_t nSamples)
442 samplingBuffer[i] = sqrt(samplingBuffer[i] * scalingCoeff);
456 const float scalingCoeff,
457 const size_t nSamples)
459 CUDAPostProcessingRMS<<<GetSamplerGridSize(),GetSamplerBlockSize()>>>
460 (samplingBuffer, scalingCoeff, nSamples);
int GetSamplerGridSize1D() const
Get Number of blocks for the 1D data sampling kernels.
__global__ void CUDASampleIndex(float *samplingBuffer, const float *sourceData, const size_t *sensorData, const size_t nSamples)
__global__ void CUDAPostProcessingRMS(float *samplingBuffer, const float scalingCoeff, const size_t nSamples)
The header file of the class saving RealMatrix data into the output HDF5 file.
void SampleIndex(float *samplingBuffer, const float *sourceData, const size_t *sensorData, const size_t nSamples)
Kernel to sample quantities using an index sensor mask.
static TParameters & GetInstance()
Get instance of the singleton class.
__device__ unsigned int GetIndex()
Get global 1D coordinate for 1D CUDA block.
void PostProcessingRMS(float *samplingBuffer, const float scalingCoeff, const size_t nSamples)
Kernel to calculate post-processing for RMS.
The header file containing the parameters of the simulation.
The header file of cuda kernels used for data sampling (output streams).
#define checkCudaErrors(val)
Macro checking cuda errors and printing the file name and line. Inspired by CUDA common checking rout...
The header file containing a class responsible for printing out info and error messages (stdout...
const TCUDAParameters & GetCudaParameters() const
Get class with CUDA Parameters (runtime setup), cosnt version.
int GetSamplerBlockSize1D() const
Get number of threads for the 1D data sampling kernels.
The header file with CUDA utility functions. These routines are to be inlined.
__global__ void CUDASampleCuboid(float *samplingBuffer, const float *sourceData, const dim3 topLeftCorner, const dim3 bottomRightCorner, const dim3 matrixSize, const size_t nSamples)
__device__ unsigned int GetStride()
Get x-stride for 3D CUDA block (for processing multiple grid points by a single thread).
int GetSamplerBlockSize()
void SampleCuboid(float *samplingBuffer, const float *sourceData, const dim3 topLeftCorner, const dim3 bottomRightCorner, const dim3 matrixSize, const size_t nSamples)
Kernel to sample quantities inside one cuboid.
__global__ void CUDASampleAll(float *samplingBuffer, const float *sourceData, const size_t nSamples)
__device__ size_t TransformCoordinates(const size_t cuboidIdx, const dim3 &topLeftCorner, const dim3 &bottomRightCorner, const dim3 &matrixSize)
void SampleAll(float *samplingBuffer, const float *sourceData, const size_t nSamples)
Kernel to sample of the quantity on the whole domain.