kspaceFirstOrder3D-CUDA  1.1
The CUDA/C++ implementation of the k-wave toolbox for the time-domain simulation of acoustic wave fields in 3D
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaseIndexMatrix.cpp
Go to the documentation of this file.
1 /**
2  * @file BaseIndexMatrix.cpp
3  *
4  * @author Jiri Jaros \n
5  * Faculty of Information Technology \n
6  * Brno University of Technology \n
7  * jarosjir@fit.vutbr.cz
8  *
9  * @brief The implementation file containing the base class for 64b-wide integers implemented
10  * as size_t datatype.
11  *
12  * @version kspaceFirstOrder3D 3.4
13  *
14  * @date 26 July 2011, 14:17 (created) \n
15  * 29 July 2016, 16:51 (revised)
16  *
17  * @section License
18  * This file is part of the C++ extension of the k-Wave Toolbox
19  * (http://www.k-wave.org).\n Copyright (C) 2016 Jiri Jaros and Bradley Treeby.
20  *
21  * This file is part of the k-Wave. k-Wave is free software: you can redistribute it and/or modify
22  * it under the terms of the GNU Lesser General Public License as published by the Free Software
23  * Foundation, either version 3 of the License, or (at your option) any later version.
24  *
25  * k-Wave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
26  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
27  * General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License along with k-Wave.
30  * If not, see http://www.gnu.org/licenses/.
31  */
32 
33 #include <immintrin.h>
34 
36 #include <Utils/DimensionSizes.h>
37 #include <Logger/Logger.h>
38 
39 
40 //------------------------------------------------------------------------------------------------//
41 //------------------------------------------ Constants -------------------------------------------//
42 //------------------------------------------------------------------------------------------------//
43 
44 //------------------------------------------------------------------------------------------------//
45 //--------------------------------------- Public methods -----------------------------------------//
46 //------------------------------------------------------------------------------------------------//
47 
48 
49 /**
50  * Default constructor
51  */
53  nElements(0),
54  nAllocatedElements(0),
55  dimensionSizes(),
56  rowSize(0),
57  slabSize(0),
58  hostData(nullptr),
59  deviceData(nullptr)
60 {
61 
62 }// end of TBaseIndexMatrix
63 //--------------------------------------------------------------------------------------------------
64 
65 /**
66  * Zero all allocated elements.
67  */
69 {
70  #pragma omp parallel for schedule (static)
71  for (size_t i = 0; i < nAllocatedElements; i++)
72  {
73  hostData[i] = size_t(0);
74  }
75 }// end of ZeroMatrix
76 //--------------------------------------------------------------------------------------------------
77 
78 /**
79  * Copy data from CPU -> GPU (Host -> Device).
80  */
82 {
83  checkCudaErrors(cudaMemcpy(deviceData,
84  hostData,
85  nAllocatedElements * sizeof(size_t),
86  cudaMemcpyHostToDevice));
87 
88 }// end of CopyToDevice
89 //--------------------------------------------------------------------------------------------------
90 
91 /**
92  * Copy data from GPU -> CPU (Device -> Host).
93  */
95 {
96  checkCudaErrors(cudaMemcpy(hostData,
97  deviceData,
98  nAllocatedElements * sizeof(size_t),
99  cudaMemcpyDeviceToHost));
100 }// end of CopyFromDevice
101 //--------------------------------------------------------------------------------------------------
102 
103 //------------------------------------------------------------------------------------------------//
104 //-------------------------------------- Protected methods ---------------------------------------//
105 //------------------------------------------------------------------------------------------------//
106 
107 /**
108  * Memory allocation based on the total number of elements. \n
109  *
110  * CPU memory is aligned by the DATA_ALIGNMENT and then registered as pinned and zeroed.
111  * The GPU memory is allocated on GPU but not zeroed (no reason).
112  */
114 {
115  //size of memory to allocate
116  size_t sizeInBytes = nAllocatedElements * sizeof(size_t);
117 
118  hostData = static_cast<size_t*>(_mm_malloc(sizeInBytes, DATA_ALIGNMENT));
119 
120  if (!hostData)
121  {
122  throw std::bad_alloc();
123  }
124 
125  // Register Host memory (pin in memory)
126  checkCudaErrors(cudaHostRegister(hostData, sizeInBytes, cudaHostRegisterPortable));
127 
128  if ((cudaMalloc<size_t>(&deviceData, sizeInBytes) != cudaSuccess) || (!deviceData))
129  {
130  throw std::bad_alloc();
131  }
132 }// end of AllocateMemory
133 //--------------------------------------------------------------------------------------------------
134 
135 /**
136  * Free memory.
137  */
139 {
140  if (hostData)
141  {
142  cudaHostUnregister(hostData);
143  _mm_free(hostData);
144  }
145  hostData = nullptr;
146 
147  // Free GPU memory
148  if (deviceData)
149  {
150  checkCudaErrors(cudaFree(deviceData));
151  }
152  deviceData = nullptr;
153 }// end of FreeMemory
154 //--------------------------------------------------------------------------------------------------
155 
156 //------------------------------------------------------------------------------------------------//
157 //--------------------------------------- Private methods ----------------------------------------//
158 //------------------------------------------------------------------------------------------------//
Abstract base class. The common ancestor defining the common interface and allowing derived classes t...
Definition: BaseMatrix.h:48
size_t nAllocatedElements
Total number of allocated elements (the array size).
#define checkCudaErrors(val)
Macro checking cuda errors and printing the file name and line. Inspired by CUDA common checking rout...
Definition: Logger.h:209
virtual void CopyFromDevice()
Copy data from GPU -> CPU (Device -> Host).
size_t * deviceData
Raw GPU matrix data.
virtual void AllocateMemory()
Memory allocation (both on CPU and GPU)
The header file containing a class responsible for printing out info and error messages (stdout...
virtual void ZeroMatrix()
Zero all elements of the matrix (NUMA first touch).
The header file containing the structure with 3D dimension sizes.
size_t * hostData
Raw CPU matrix data.
virtual void CopyToDevice()
Copy data from CPU -> GPU (Host -> Device).
The header file containing the base class for 64b-wide integers implemented as size_t datatype...
const int DATA_ALIGNMENT
memory alignment for SSE, SSE2, SSE3, SSE4 (16B)
TBaseIndexMatrix()
Default constructor.
virtual void FreeMemory()
Memory deallocation (both on CPU and GPU)