33 #include <cuda_runtime.h>
52 deviceIdx(DEFAULT_DEVICE_IDX),
53 solverBlockSize1D(UNDEFINDED_SIZE), solverGridSize1D(UNDEFINDED_SIZE),
54 solverTransposeBlockSize(UNDEFINDED_SIZE), solverTransposeGirdSize(UNDEFINDED_SIZE),
55 samplerBlockSize1D(UNDEFINDED_SIZE), samplerGridSize1D(UNDEFINDED_SIZE),
102 cudaError_t lastError;
106 bool deviceFound =
false;
108 for (
int testDevice = 0; testDevice < nDevices; testDevice++)
111 cudaSetDevice(testDevice);
113 lastError = cudaGetLastError();
116 if (lastError == cudaSuccess)
120 lastError = cudaGetLastError();
122 if (cudaCodeVersionOK && (lastError == cudaSuccess))
125 this->deviceIdx = testDevice;
131 lastError = cudaDeviceReset();
146 if ((this->deviceIdx > nDevices - 1) || (this->deviceIdx < 0))
154 cudaSetDevice(this->deviceIdx);
156 lastError = cudaGetLastError();
159 lastError = cudaGetLastError();
161 if ((lastError != cudaSuccess) || (!cudaCodeVersionOK))
163 lastError = cudaDeviceReset();
214 if ((
size_t(
solverGridSize1D) *
size_t(solverBlockSize1D)) > fullDims.GetElementCount())
216 solverGridSize1D = int((fullDims.GetElementCount() + size_t(solverBlockSize1D) - 1 ) /
size_t(solverBlockSize1D));
263 constantsToTransfer.
nx = fullDimSizes.
nx;
264 constantsToTransfer.
ny = fullDimSizes.
ny;
265 constantsToTransfer.
nz = fullDimSizes.
nz;
267 constantsToTransfer.
slabSize = fullDimSizes.
nx * fullDimSizes.
ny;
269 constantsToTransfer.
nxComplex = reducedDimSizes.
nx;
270 constantsToTransfer.
nyComplex = reducedDimSizes.
ny;
271 constantsToTransfer.
nzComplex = reducedDimSizes.
nz;
280 constantsToTransfer.
dt = params.
Get_dt();
281 constantsToTransfer.
dt2 = params.
Get_dt() * 2.0f;
323 int cudaRuntimeVersion;
324 int cudaDriverVersion;
326 if (cudaRuntimeGetVersion(&cudaRuntimeVersion) != cudaSuccess)
331 if (cudaDriverGetVersion(&cudaDriverVersion) != cudaSuccess)
336 if (cudaDriverVersion < cudaRuntimeVersion)
339 cudaRuntimeVersion / 1000, (cudaRuntimeVersion % 100) / 10,
340 cudaDriverVersion / 1000, (cudaDriverVersion % 100) / 10));
size_t nx
number of elements in the x direction
TDimensionSizes GetReducedDimensionSizes() const
Reduced dimension sizes of the simulation (complex classes).
float & Get_c0_scalar()
Get c0_scalar value.
int samplerGridSize1D
Number of blocks for the 1D data sampling kernels.
unsigned int u_source_index_size
size of the u source index
size_t Get_sensor_mask_index_size() const
Get sensor_mask_index_size value.
dim3 solverTransposeBlockSize
Block size for the transposition kernels.
size_t Get_u_source_index_size() const
Get u_source_index_size value.
int deviceIdx
Index of the device the code is being run on.
dim3 solverTransposeGirdSize
Grid size for the transposition kernels.
TErrorMessage ERR_FMT_NO_FREE_DEVICE
CUDATuner error message.
unsigned int nx
size of X dimension.
float fftDivider
normalization constant for 3D FFT.
int GetCUDACodeVersion()
Get the CUDA architecture and GPU code version the code was compiled with.
float BonA_scalar
BonA value for homogeneous case.
static const int DEFAULT_DEVICE_IDX
Default Device Index - no default GPU.
The header file for the class for storing constants residing in CUDA constant memory.
Structure for CUDA parameters to be placed in constant memory. Only 32b values are used...
unsigned int slabSize
2D Slab size
float & Get_BonA_scalar()
Get BonA_scalar value.
unsigned int nxComplex
size of complex X dimension.
The header file for the class for setting CUDA kernel parameters.
static TParameters & GetInstance()
Get instance of the singleton class.
int samplerBlockSize1D
Number of threads for the 1D data sampling kernels.
void SetUpDeviceConstants() const
Upload useful simulation constants into device constant memory.
float rho0_sgx_scalar
dt / rho0_sgx in homogeneous case
int solverBlockSize1D
Number of threads for 1D block used by kSpaceSolver.
size_t Get_u_source_many() const
Get u_source_many value.
The header file containing the parameters of the simulation.
TErrorMessage ERR_FMT_BAD_DEVICE_IDX
CUDATuner error message.
#define checkCudaErrors(val)
Macro checking cuda errors and printing the file name and line. Inspired by CUDA common checking rout...
unsigned int nzComplex
size of complex Z dimension.
unsigned int nz
size of Z dimension.
std::string GetDeviceName() const
Get the name of the device used.
void SelectDevice(const int DeviceIdx=DEFAULT_DEVICE_IDX)
Select cuda device for execution.
float fftDividerX
normalization constant for 1D FFT over X.
float rho0_scalar
rho0 in homogeneous case
TDimensionSizes GetFullDimensionSizes() const
Full dimension sizes of the simulation (real classes).
The header file containing a class responsible for printing out info and error messages (stdout...
unsigned int p_source_many
p source many
unsigned int u_source_mode
u source mode
TCUDAParameters()
Default constructor - only friend class can create an instance.
TErrorMessage ERR_FMT_DEVICE_IS_BUSY
CUDATuner error message.
Class storing all parameters of the simulation.
size_t Get_p_source_mode() const
Get p_source_mode value.
unsigned int nElementsComplex
complex number of elements.
size_t Get_p_source_many() const
Get p_source_many value.
unsigned int p_source_index_size
size of the p_source mask
size_t GetElementCount() const
Get element count, in 3D only spatial domain, in 4D with time.
size_t ny
number of elements in the y direction
float Get_dt() const
Get dt value.
float & Get_absorb_eta_scalar()
Get absorb_eta_scalar value.
TErrorMessage ERR_FMT_INSUFFICIENT_CUDA_DRIVER
CUDAParameters error message.
float & Get_rho0_sgy_scalar()
Get rho0_sgy_scalar value.
float & Get_rho0_scalar()
Get rho0_scalar value.
unsigned int slabSizeComplex
complex slab size.
float fftDividerY
normalization constant for 1D FFT over Y.
float rho0_sgy_scalar
dt / rho0_sgy in homogeneous case
unsigned int p_source_mode
p source mode
unsigned int u_source_many
u source many
unsigned int nyComplex
size of complex Y dimension.
Name space for all CUDA kernels used in the 3D solver.
static std::string FormatMessage(const std::string &format, Args...args)
C++-11 replacement for sprintf that works with std::string instead of char *.
unsigned int nElements
total number of elements.
cudaDeviceProp deviceProperties
Device properties of the selected GPU.
size_t Get_u_source_mode() const
Get ux_source_mode value.
unsigned int ny
size of Y dimension.
int solverGridSize1D
Number of block for 1D grid used by kSpaceSolver.
size_t Get_p_source_index_size() const
Get p_source_index_size value.
float & Get_absorb_tau_scalar()
Get absorb_tau_scalar value.
float rho0_sgz_scalar
dt / rho0_sgz in homogeneous case
float fftDividerZ
normalization constant for 1D FFT over Z.
void CheckCUDAVersion()
Check whether the CUDA driver version installed is sufficient for the code.
void SetKernelConfiguration()
Set kernel configurations based on the simulation parameters.
size_t nz
number of elements in the z direction
float dt_rho0_scalar
dt * rho0 in homogeneous case
float absorb_tau_scalar
Absorb_tau value for homogeneous case.
float absorb_eta_scalar
Absorb_eta value for homogeneous case.
bool CheckCUDACodeVersion()
Check whether the code was compiled for a given SM model.
float & Get_rho0_sgx_scalar()
Get rho0_sgx_scalar value.
TErrorMessage ERR_FM_CANNOT_READ_CUDA_VERSION
CUDAParameters error message.
TSensorMaskType Get_sensor_mask_type() const
Get sensor mask type (linear or corners).
Structure with 4D dimension sizes (3 in space and 1 in time).
TErrorMessage ERR_FMT_GPU_NOT_SUPPORTED
CUDAParameters error message.
__host__ void SetUpCUDADeviceConstatns()
Set constant memory.
float & Get_rho0_sgz_scalar()
Get rho0_sgz_scalar value.