37 #include <immintrin.h>
82 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
83 for (
size_t i = 0; i < BufferSize; i++)
85 StoreBuffer[i] = sqrt(StoreBuffer[i] * ScalingCoeff);
121 StoreBuffer = (
float *) _mm_malloc(BufferSize *
sizeof (
float),
DATA_ALIGNMENT);
135 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
136 for (
size_t i = 0; i < BufferSize; i++)
138 StoreBuffer[i] = 0.0f;
146 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
147 for (
size_t i = 0; i < BufferSize; i++)
149 StoreBuffer[i] = 0.0f;
157 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
158 for (
size_t i = 0; i < BufferSize; i++)
160 StoreBuffer[i] = -1 * std::numeric_limits<float>::max();
168 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
169 for (
size_t i = 0; i < BufferSize; i++)
171 StoreBuffer[i] = std::numeric_limits<float>::max();
188 _mm_free(StoreBuffer);
217 const char * HDF5_ObjectName,
221 float * BufferToReuse)
223 SensorMask(SensorMask),
224 HDF5_DatasetId(H5I_BADID),
279 THDF5_File::hdf5_mdt_real);
282 THDF5_File::hdf5_mdt_float);
348 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
369 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
372 StoreBuffer[i] += (SourceData[SensorData[i]] * SourceData[SensorData[i]]);
379 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
390 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
481 const char * HDF5_GroupName,
485 float * BufferToReuse)
487 SensorMask(SensorMask),
488 HDF5_GroupId(H5I_BADID),
523 size_t ActualPositionInBuffer = 0;
525 for (
size_t CuboidIndex = 0; CuboidIndex < NumberOfCuboids; CuboidIndex++)
573 size_t ActualPositionInBuffer = 0;
578 for (
size_t CuboidIndex = 0; CuboidIndex < NumberOfCuboids; CuboidIndex++)
583 char HDF5_DatasetName[32] =
"";
585 sprintf(HDF5_DatasetName,
"%ld",CuboidIndex + 1);
633 for (
size_t CuboidIndex = 0; CuboidIndex <
CuboidsInfo.size(); CuboidIndex++)
681 size_t CuboidInBufferStart = 0;
689 size_t cuboid_XY_plane_size = (BottomRightCorner.
Y - TopLeftCorner.
Y + 1) *
690 (BottomRightCorner.
X - TopLeftCorner.
X + 1);
691 size_t cuboid_X_plane_size = (BottomRightCorner.
X - TopLeftCorner.
X + 1);
693 #pragma omp parallel for collapse(3) \
694 if ((BottomRightCorner - TopLeftCorner).GetElementCount() > MinGridpointsToSampleInParallel)
695 for (
size_t z = TopLeftCorner.
Z; z <= BottomRightCorner.
Z; z++)
696 for (
size_t y = TopLeftCorner.
Y; y <= BottomRightCorner.
Y; y++)
697 for (
size_t x = TopLeftCorner.
X; x <= BottomRightCorner.
X; x++)
699 const size_t StoreBufferIndex = CuboidInBufferStart +
700 (z - TopLeftCorner.
Z) * cuboid_XY_plane_size +
701 (y - TopLeftCorner.
Y) * cuboid_X_plane_size +
702 (x - TopLeftCorner.
X);
704 const size_t SourceIndex = z * XY_Size + y * X_Size + x;
707 StoreBuffer[StoreBufferIndex] += (SourceData[SourceIndex] * SourceData[SourceIndex]);
710 CuboidInBufferStart += (BottomRightCorner - TopLeftCorner).GetElementCount();
718 size_t CuboidInBufferStart = 0;
726 size_t cuboid_XY_plane_size = (BottomRightCorner.
Y - TopLeftCorner.
Y + 1) *
727 (BottomRightCorner.
X - TopLeftCorner.
X + 1);
728 size_t cuboid_X_plane_size = (BottomRightCorner.
X - TopLeftCorner.
X + 1);
730 #pragma omp parallel for collapse(3) \
731 if ((BottomRightCorner - TopLeftCorner).GetElementCount() > MinGridpointsToSampleInParallel)
732 for (
size_t z = TopLeftCorner.
Z; z <= BottomRightCorner.
Z; z++)
733 for (
size_t y = TopLeftCorner.
Y; y <= BottomRightCorner.
Y; y++)
734 for (
size_t x = TopLeftCorner.
X; x <= BottomRightCorner.
X; x++)
736 const size_t StoreBufferIndex = CuboidInBufferStart +
737 (z - TopLeftCorner.
Z) * cuboid_XY_plane_size +
738 (y - TopLeftCorner.
Y) * cuboid_X_plane_size +
739 (x - TopLeftCorner.
X);
741 const size_t SourceIndex = z * XY_Size + y * X_Size + x;
744 if (
StoreBuffer[StoreBufferIndex] < SourceData[SourceIndex])
746 StoreBuffer[StoreBufferIndex] = SourceData[SourceIndex];
749 CuboidInBufferStart += (BottomRightCorner - TopLeftCorner).GetElementCount();
756 size_t CuboidInBufferStart = 0;
764 size_t cuboid_XY_plane_size = (BottomRightCorner.
Y - TopLeftCorner.
Y + 1) *
765 (BottomRightCorner.
X - TopLeftCorner.
X + 1);
766 size_t cuboid_X_plane_size = (BottomRightCorner.
X - TopLeftCorner.
X + 1);
768 #pragma omp parallel for collapse(3) \
769 if ((BottomRightCorner - TopLeftCorner).GetElementCount() > MinGridpointsToSampleInParallel)
770 for (
size_t z = TopLeftCorner.
Z; z <= BottomRightCorner.
Z; z++)
771 for (
size_t y = TopLeftCorner.
Y; y <= BottomRightCorner.
Y; y++)
772 for (
size_t x = TopLeftCorner.
X; x <= BottomRightCorner.
X; x++)
774 const size_t StoreBufferIndex = CuboidInBufferStart +
775 (z - TopLeftCorner.
Z) * cuboid_XY_plane_size +
776 (y - TopLeftCorner.
Y) * cuboid_X_plane_size +
777 (x - TopLeftCorner.
X);
778 const size_t SourceIndex = z * XY_Size + y * X_Size + x;
781 if (
StoreBuffer[StoreBufferIndex] > SourceData[SourceIndex])
783 StoreBuffer[StoreBufferIndex] = SourceData[SourceIndex];
786 CuboidInBufferStart += (BottomRightCorner - TopLeftCorner).GetElementCount();
829 for (
size_t CuboidIndex = 0; CuboidIndex <
CuboidsInfo.size(); CuboidIndex++)
858 size_t NumberOfSampledTimeSteps = (
ReductionOp == roNONE)
865 NumberOfSampledTimeSteps
870 size_t NumberOfSlabs = 1;
875 while (NumberOfSlabs * CuboidSize.
X * CuboidSize.
Y <
ChunkSize_4MB) NumberOfSlabs++;
876 CuboidChunkSize.
Z = NumberOfSlabs;
880 char HDF5_DatasetName[32] =
"";
882 sprintf(HDF5_DatasetName,
"%ld",Index+1);
893 THDF5_File::hdf5_mdt_real);
896 THDF5_File::hdf5_mdt_float);
898 return HDF5_DatasetId;
914 for (
size_t CuboidIndex = 0; CuboidIndex <
CuboidsInfo.size(); CuboidIndex++)
948 const char * HDF5_DatasetName,
951 float * BufferToReuse)
953 HDF5_DatasetId(H5I_BADID),
990 THDF5_File::hdf5_mdt_real);
993 THDF5_File::hdf5_mdt_float);
1083 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
1086 StoreBuffer[i] += (SourceData[i] * SourceData[i]);
1093 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
1103 #pragma omp parallel for if (BufferSize > MinGridpointsToSampleInParallel)
size_t Z
Z dimension size.
virtual float * GetRawData()
Get raw data out of the class (for direct kernel access).
size_t SampledTimeStep
Time step to store (N/A for aggregated).
size_t GetTotalNumberOfElementsInAllCuboids() const
Get the total number of elements to be sampled within all cuboids.
hid_t CreateGroup(const hid_t ParentGroup, const char *GroupName)
Create a HDF5 group at a specified place in the file tree.
virtual void FlushBufferToFile()
Flush the buffer to the file.
std::vector< TCuboidInfo > CuboidsInfo
vector keeping handles and positions of all cuboids
TReductionOperator
How to aggregate data.
TCuboidOutputHDF5Stream(THDF5_File &HDF5_File, const char *HDF5_GroupName, const TRealMatrix &SourceMatrix, const TIndexMatrix &SensorMask, const TReductionOperator ReductionOp, float *BufferToReuse=NULL)
Constructor - links the HDF5 File, SourceMatrix, and SensorMask together.
size_t GetStartTimeIndex() const
Get start time index for sensor recording.
char * HDF5_RootObjectName
Dataset name.
virtual void Reopen()
Reopen the output stream after restart and reload data.
virtual size_t GetTotalElementCount() const
Get element count of the matrix.
const TReductionOperator ReductionOp
Reduction operator.
hid_t OpenGroup(const hid_t ParentGroup, const char *GroupName)
Open a HDF5 group at a specified place in the file tree.
const TIndexMatrix & SensorMask
Sensor mask to sample data.
TDimensionSizes GetBottomRightCorner(const size_t &index) const
Get the bottom right corner of the index-th cuboid.
bool BufferReuse
if true, the container reuses e.g. Temp_1_RS3D, Temp_2_RS3D, Temp_3_RS3D.
virtual void Close()
Close stream (apply post-processing if necessary, flush data and close).
void WriteCuboidToHyperSlab(const hid_t HDF5_Dataset_id, const TDimensionSizes &HyperslabPosition, const TDimensionSizes &CuboidPosition, const TDimensionSizes &CuboidSize, const TDimensionSizes &MatrixDimensions, const float *MatrixData)
Write a cuboid selected inside MatrixData into a Hyperslab.
virtual size_t GetTotalElementCount() const
Get total element count of the matrix.
virtual size_t * GetRawData()
Get raw data out of the class (for direct kernel access).
TIndexOutputHDF5Stream(THDF5_File &HDF5_File, const char *HDF5_ObjectName, const TRealMatrix &SourceMatrix, const TIndexMatrix &SensorMask, const TReductionOperator ReductionOp, float *BufferToReuse=NULL)
Constructor - links the HDF5 dataset, SourceMatrix, and SensorMask together.
virtual void Sample()
Sample data into buffer and apply reduction, or flush to disk (no sensor mask here).
size_t X
X dimension size.
virtual void Reopen()
Reopen the output stream after restart and reload data.
virtual void Sample()
Sample data into buffer, apply reduction or flush to disk - based on a sensor mask.
The header file containing the parameters of the simulation.
const TIndexMatrix & SensorMask
Sensor mask to sample data.
virtual void PostProcess()
Apply post-processing on the buffer and flush it to the file.
hid_t HDF5_CuboidId
ID of the dataset storing the given cuboid.
void WriteHyperSlab(const hid_t HDF5_Dataset_id, const TDimensionSizes &Position, const TDimensionSizes &Size, const float *Data)
Write a hyper-slab into the dataset - float dataset.
virtual void AllocateMemory()
A generic function to allocate memory - not used in the base class.
size_t SampledTimeStep
Timestep to store (N/A for aggregated).
virtual TDimensionSizes GetDimensionSizes() const
Get dimension sizes of the matrix.
float * StoreBuffer
Temporary buffer for store - only if Buffer Reuse = false!
size_t Get_t_index() const
Get simulation time step.
const TRealMatrix & SourceMatrix
Source matrix to be sampled.
virtual void Checkpoint()
Checkpoint the stream.
virtual void FlushBufferToFile()
Flush the buffer to the file.
virtual ~TWholeDomainOutputHDF5Stream()
Destructor.
void WriteMatrixDataType(const hid_t ParentGroup, const char *DatasetName, const THDF5_MatrixDataType &MatrixDataType)
Write matrix data type into the dataset under a specified group.
The header file of classes responsible for storing output quantities into the output HDF5 file...
void WriteMatrixDomainType(const hid_t ParentGroup, const char *DatasetName, const THDF5_MatrixDomainType &MatrixDomainType)
Write matrix domain type into the dataset under the root group.
Class storing all parameters of the simulation.
TDimensionSizes GetTopLeftCorner(const size_t &index) const
Get the top left corner of the index-th cuboid.
virtual void PostProcess()
Apply post-processing on the buffer and flush it to the file.
hid_t GetRootGroup() const
Get handle to the root group.
virtual void Create()
Create a HDF5 stream and allocate data for it.
The header file containing all error messages of the project.
hid_t CreateFloatDataset(const hid_t ParentGroup, const char *DatasetName, const TDimensionSizes &DimensionSizes, const TDimensionSizes &ChunkSizes, const size_t CompressionLevel)
Create the HDF5 dataset at a specified place in the file tree (3D/4D).
size_t GetElementCount() const
Get element count, in 3D only spatial domain, in 4D with time.
hid_t OpenDataset(const hid_t ParentGroup, const char *DatasetName)
Open the HDF5 dataset at a specified place in the file tree.
Abstract base class for output data streams (sampled data).
virtual void Checkpoint()
Checkpoint the stream and close.
virtual void Close()
Close stream (apply post-processing if necessary, flush data and close).
virtual void FreeMemory()
A generic function to free memory - not used in the base class.
The class for real matrices.
size_t Y
Y dimension size.
static const size_t ChunkSize_4MB
chunk size of 4MB in number of float elements.
This structure information about a HDF5 dataset (one cuboid). Namely, its HDF5_ID, Starting position in a lineup buffer.
void ReadCompleteDataset(const hid_t ParentGroup, const char *DatasetName, const TDimensionSizes &DimensionSizes, float *Data)
Read data from the dataset under a specified group - float dataset.
size_t Get_Nt() const
Get Nt value.
virtual void Checkpoint()
Checkpoint the stream and close.
The class for 64b unsigned integers (indices). It is used for sensor_mask_index or sensor_corners_mas...
virtual void PostProcess()
Apply post-processing on the buffer and flush it to the file.
size_t T
Number of time steps (for time series datasets).
void CloseGroup(const hid_t Group)
Close HDF5 group.
size_t SampledTimeStep
Time step to store (N/A for aggregated).
virtual void Sample()
Sample data into buffer and apply reduction, or flush to disk - based on a sensor mask...
virtual void Close()
Close stream (apply post-processing if necessary, flush data and close).
size_t GetCompressionLevel() const
Get compression level.
size_t BufferSize
Buffer size.
size_t StartingPossitionInBuffer
Having a single buffer for all cuboids, where this one starts.
hid_t HDF5_GroupId
Handle to a HDF5 dataset.
struct TDimensionSizes GetDimensionSizes() const
Get dimension sizes of the matrix.
const int DATA_ALIGNMENT
memory alignment for SSE, SSE2, SSE3, SSE4 (16B)
hid_t HDF5_DatasetId
Handle to a HDF5 dataset.
virtual void Reopen()
Reopen the output stream after restart and reload data.
virtual hid_t CreateCuboidDataset(const size_t Index)
Create a new dataset for a given cuboid specified by index (order).
virtual void Create()
Create a HDF5 stream and allocate data for it.
hid_t HDF5_DatasetId
Handle to a HDF5 dataset.
const char *const Matrix_ERR_FMT_NotEnoughMemory
Matrix class error message.
void CloseDataset(const hid_t HDF5_Dataset_id)
Close the HDF5 dataset.
virtual void PostProcess()
Apply post-processing on the buffer and flush it to the file.
virtual void FlushBufferToFile()
Flush the buffer to the file.
THDF5_File & HDF5_File
HDF5 file handle.
virtual ~TIndexOutputHDF5Stream()
Destructor.
TWholeDomainOutputHDF5Stream(THDF5_File &HDF5_File, const char *HDF5_DatasetName, const TRealMatrix &SourceMatrix, const TReductionOperator ReductionOp, float *BufferToReuse=NULL)
Constructor - links the HDF5 File, SourceMatrix, and SensorMask together.
Class wrapping the HDF5 routines.
virtual void Create()
Create a HDF5 stream and allocate data for it.
Structure with 4D dimension sizes (3 in space and 1 in time).
virtual ~TCuboidOutputHDF5Stream()
Destructor.
static TParameters * GetInstance()
Get instance of the singleton class.