kspaceFirstOrder3D-CUDA  1.1
The CUDA/C++ implementation of the k-wave toolbox for the time-domain simulation of acoustic wave fields in 3D
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CUFFTComplexMatrix.cpp
Go to the documentation of this file.
1 /**
2  * @file CUFFTComplexMatrix.cpp
3  *
4  * @author Jiri Jaros \n
5  * Faculty of Information Technology \n
6  * Brno University of Technology \n
7  * jarosjir@fit.vutbr.cz
8  *
9  * @brief The implementation file containing the class that implements 3D FFT using the cuFFT
10  * interface.
11  *
12  * @version kspaceFirstOrder3D 3.4
13  *
14  * @date 09 August 2011, 13:10 (created) \n
15  * 10 August 2016, 11:56 (revised)
16  *
17  * @section License
18  * This file is part of the C++ extension of the k-Wave Toolbox
19  * (http://www.k-wave.org).\n Copyright (C) 2016 Jiri Jaros and Bradley Treeby.
20  *
21  * This file is part of the k-Wave. k-Wave is free software: you can redistribute it and/or modify
22  * it under the terms of the GNU Lesser General Public License as published by the Free Software
23  * Foundation, either version 3 of the License, or (at your option) any later version.
24  *
25  * k-Wave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
26  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
27  * General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License along with k-Wave.
30  * If not, see http://www.gnu.org/licenses/.
31  */
32 
33 
34 #include <string>
35 #include <stdexcept>
36 #include <cufft.h>
37 
40 #include <Logger/Logger.h>
42 
43 
44 //------------------------------------------------------------------------------------------------//
45 //------------------------------------------ Constants -------------------------------------------//
46 //------------------------------------------------------------------------------------------------//
47 
48 
49 
50 //------------------------------------------------------------------------------------------------//
51 //----------------------------------- Static Member Variables ------------------------------------//
52 //------------------------------------------------------------------------------------------------//
53 cufftHandle TCUFFTComplexMatrix::cufftPlan_3D_R2C = static_cast<cufftHandle>(NULL);
54 cufftHandle TCUFFTComplexMatrix::cufftPlan_3D_C2R = static_cast<cufftHandle>(NULL);
55 
56 cufftHandle TCUFFTComplexMatrix::cufftPlan_1DX_R2C = static_cast<cufftHandle>(NULL);
57 cufftHandle TCUFFTComplexMatrix::cufftPlan_1DY_R2C = static_cast<cufftHandle>(NULL);
58 cufftHandle TCUFFTComplexMatrix::cufftPlan_1DZ_R2C = static_cast<cufftHandle>(NULL);
59 cufftHandle TCUFFTComplexMatrix::cufftPlan_1DX_C2R = static_cast<cufftHandle>(NULL);
60 cufftHandle TCUFFTComplexMatrix::cufftPlan_1DY_C2R = static_cast<cufftHandle>(NULL);
61 cufftHandle TCUFFTComplexMatrix::cufftPlan_1DZ_C2R = static_cast<cufftHandle>(NULL);
62 
63 
64 
65 
66 /**
67  * Error message for the CUFFT class.
68  */
69 std::map<cufftResult, TErrorMessage> TCUFFTComplexMatrix::cuFFTErrorMessages
70 {
71  {CUFFT_INVALID_PLAN , ERR_FMT_CUFFT_INVALID_PLAN},
72  {CUFFT_ALLOC_FAILED , ERR_FMT_CUFFT_ALLOC_FAILED},
73  {CUFFT_INVALID_TYPE , ERR_FMT_CUFFT_INVALID_TYPE},
74  {CUFFT_INVALID_VALUE , ERR_FMT_CUFFT_INVALID_VALUE},
75  {CUFFT_INTERNAL_ERROR , ERR_FMT_CUFFT_INVALID_VALUE},
76  {CUFFT_EXEC_FAILED , ERR_FMT_CUFFT_EXEC_FAILED},
77  {CUFFT_SETUP_FAILED , eRR_FMT_CUFFT_SETUP_FAILED},
78  {CUFFT_INVALID_SIZE , ERR_FMT_CUFFT_INVALID_SIZE},
79  {CUFFT_UNALIGNED_DATA , ERR_FMT_CUFFT_UNALIGNED_DATA},
80  {CUFFT_INCOMPLETE_PARAMETER_LIST, ERR_FMT_CUFFT_INCOMPLETE_PARAMETER_LIST},
81  {CUFFT_INVALID_DEVICE , ERR_FMT_CUFFT_INVALID_DEVICE},
82  {CUFFT_PARSE_ERROR , ERR_FMT_CUFFT_PARSE_ERROR},
83  {CUFFT_NO_WORKSPACE , ERR_FMT_CUFFT_NO_WORKSPACE},
84  {CUFFT_NOT_IMPLEMENTED , eRR_FMT_CUFFT_NOT_IMPLEMENTED},
85  {CUFFT_LICENSE_ERROR , ERR_FMT_CUFFT_LICENSE_ERROR}
86 };
87 //--------------------------------------------------------------------------------------------------
88 
89 
90 //------------------------------------------------------------------------------------------------//
91 //--------------------------------------- Public methods -----------------------------------------//
92 //------------------------------------------------------------------------------------------------//
93 
94 /**
95  * Create an cuFFT plan for 3D Real-to-Complex. \n
96  * This version doesn't need any scratch place for planning.
97  *
98  * @param [in] inMatrixDims - The dimension sizes of the input matrix
99  * @throw runtime_error if the plan can't be created.
100  */
102 {
103  cufftResult cufftError;
104  cufftError = cufftPlan3d(&cufftPlan_3D_R2C,
105  static_cast<int>(inMatrixDims.nz),
106  static_cast<int>(inMatrixDims.ny),
107  static_cast<int>(inMatrixDims.nx),
108  CUFFT_R2C);
109 
110  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Plan_3D_R2C");
111 
112  // be careful, this feature is deprecated in CUDA 6.5
113  cufftError = cufftSetCompatibilityMode(cufftPlan_3D_R2C, CUFFT_COMPATIBILITY_NATIVE);
114 
115  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_3D_R2C_Plan");
116 }// end of CreateFFTPlan3D_RealToComplex
117 //-------------------------------------------------------------------------------------------------
118 
119 /**
120  * Create cuFFT plan for Complex-to-Real. \n
121  * This version doesn't need any scratch place for planning.
122  *
123  * @param [in] outMatrixDims - the dimension sizes of the output matrix
124  * @throw runtime_error if the plan can't be created.
125  */
127 {
128  cufftResult_t cufftError;
129  cufftError = cufftPlan3d(&cufftPlan_3D_C2R,
130  static_cast<int>(outMatrixDims.nz),
131  static_cast<int>(outMatrixDims.ny),
132  static_cast<int>(outMatrixDims.nx),
133  CUFFT_C2R);
134 
135  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "CUDA_FFT_3D_C2R");
136 
137 
138  // be careful, this feature is deprecated in CUDA 6.5
139  cufftError = cufftSetCompatibilityMode(cufftPlan_3D_C2R, CUFFT_COMPATIBILITY_NATIVE);
140 
141  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_3D_C2R_Plan");
142 }//end of CreateFFTPlan3D_ComplexToReal
143 //--------------------------------------------------------------------------------------------------
144 
145 
146 /**
147  * Create cuFFT plan for 1DX Real-to-Complex. \n
148  * This version doesn't need any scratch place for planning. All 1D transforms are done in a
149  * single batch (no transpose needed).
150  *
151  * @param [in] inMatrixDims - The dimension sizes of the input matrix
152  * @throw runtime_error if the plan can't be created.
153  */
155 {
156  cufftResult_t cufftError;
157 
158  // set dimensions
159  const int nx = static_cast<int> (inMatrixDims.nx);
160  const int ny = static_cast<int> (inMatrixDims.ny);
161  const int nz = static_cast<int> (inMatrixDims.nz);
162  const int nx_2 = ((nx / 2) + 1);
163 
164  // set up rank and strides
165  int rank = 1;
166  int n[] = {nx};
167 
168  int inembed[] = {nx};
169  int istride = 1;
170  int idist = nx;
171 
172  int onembed[] = {nx_2};
173  int ostride = 1;
174  int odist = nx_2;
175 
176  int batch = ny * nz;
177 
178  // plan the FFT
179  cufftError = cufftPlanMany(&cufftPlan_1DX_R2C, rank, n,
180  inembed, istride, idist,
181  onembed, ostride, odist,
182  CUFFT_R2C, batch);
183 
184  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "CUDA_FFT_1DX_R2C");
185 
186  // be careful, this feature is deprecated in CUDA 7.0.
187  // It is necessary to use this compatibility level, otherwise we would have to use an
188  // out-of-place transform - (inplace transform corrupts data)
189  cufftError = cufftSetCompatibilityMode(cufftPlan_1DX_R2C, CUFFT_COMPATIBILITY_NATIVE);
190 
191  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_1DX_R2C_Plan");
192 }//end of Create_FFT_Plan_1DX_R2C
193 //--------------------------------------------------------------------------------------------------
194 
195 
196 /**
197  * Create cuFFT plan for 1DY Real-to-Complex. \n
198  * This version doesn't need any scratch place for planning. All 1D transforms are done in a single
199  * batch. Data has to be transposed before the transform.
200  *
201  * @param [in] inMatrixDims - The dimension sizes of the input matrix
202  * @throw runtime_error if the plan can't be created.
203  */
205 {
206  cufftResult_t cufftError;
207 
208  // set dimensions
209  const int nx = static_cast<int> (inMatrixDims.nx);
210  const int ny = static_cast<int> (inMatrixDims.ny);
211  const int nz = static_cast<int> (inMatrixDims.nz);
212  const int ny_2 = ((ny / 2) + 1);
213 
214  // set up rank and strides
215  int rank = 1;
216  int n[] = {ny};
217 
218  int inembed[] = {ny};
219  int istride = 1;
220  int idist = ny;
221 
222  int onembed[] = {ny_2};
223  int ostride = 1;
224  int odist = ny_2;
225 
226  int batch = nx * nz;
227 
228  cufftError = cufftPlanMany(&cufftPlan_1DY_R2C, rank, n,
229  inembed, istride, idist,
230  onembed, ostride, odist,
231  CUFFT_R2C, batch);
232 
233  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "CUDA_FFT_1DY_R2C");
234 
235  // be careful, this feature is deprecated in CUDA 7.0.
236  // It is necessary to use this compatibility level, otherwise we would have to use an
237  // out-of-place transform - (inplace transforms corrupts data)
238  cufftError = cufftSetCompatibilityMode(cufftPlan_1DY_R2C, CUFFT_COMPATIBILITY_NATIVE);
239 
240  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_1DY_R2C_Plan");
241 }//end of Create_FFT_Plan_1DY_R2C
242 //--------------------------------------------------------------------------------------------------
243 
244 /*
245  * Create cuFFT plan for 1DZ Real-to-Complex. \n
246  * This version doesn't need any scratch place for planning. All 1D transforms are done in a single
247  * batch. Data has to be transposed before the transform.
248  *
249  * @param [in] inMatrixDims - The dimension sizes of the input matrix
250  * @throw runtime_error if the plan can't be created.
251  */
253 {
254  cufftResult_t cufftError;
255 
256  const int nx = static_cast<int> (inMatrixDims.nx);
257  const int ny = static_cast<int> (inMatrixDims.ny);
258  const int nz = static_cast<int> (inMatrixDims.nz);
259  const int nz_2 = ((nz / 2) + 1);
260 
261  // set up rank and strides
262  int rank = 1;
263  int n[] = {nz};
264 
265  int inembed[] = {nz};
266  int istride = 1;
267  int idist = nz;
268 
269  int onembed[] = {nz_2};
270  int ostride = 1;
271  int odist = nz_2;
272 
273  int batch = nx * ny;
274 
275  cufftError = cufftPlanMany(&cufftPlan_1DZ_R2C, rank, n,
276  inembed, istride, idist,
277  onembed, ostride, odist,
278  CUFFT_R2C, batch);
279 
280  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "CUDA_FFT_1DZ_R2C");
281 
282  // be careful, this feature is deprecated in CUDA 7.0.
283  // It is necessary to use this compatibility level, otherwise we would have to use an
284  // out-of-place transform - (inplace transforms corrupts data)
285  cufftError = cufftSetCompatibilityMode(cufftPlan_1DZ_R2C, CUFFT_COMPATIBILITY_NATIVE);
286 
287  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_1DZ_R2C_Plan");
288 }//end of Create_FFT_Plan_1DZ_R2C
289 //--------------------------------------------------------------------------------------------------
290 
291 
292 
293 /*
294  * Create cuFFT plan for 1DX Complex-to-Real. \n
295  * This version doesn't need any scratch place for planning. All 1D transforms are done in a
296  * single batch (no transpose needed).
297  *
298  * @param [in] outMatrixDims - The dimension sizes of the input matrix
299  * @throw runtime_error if the plan can't be created.
300  */
302 {
303  cufftResult_t cufftError;
304 
305  // set dimensions
306  const int nx = static_cast<int> (outMatrixDims.nx);
307  const int ny = static_cast<int> (outMatrixDims.ny);
308  const int nz = static_cast<int> (outMatrixDims.nz);
309  const int nx_2 = ((nx / 2) + 1);
310 
311  // set up rank and strides
312  int rank = 1;
313  int n[] = {nx};
314 
315  int inembed[] = {nx_2};
316  int istride = 1;
317  int idist = nx_2;
318 
319  int onembed[] = {nx};
320  int ostride = 1;
321  int odist = nx;
322 
323  int batch = ny * nz;
324 
325  cufftError = cufftPlanMany(&cufftPlan_1DX_C2R, rank, n,
326  inembed, istride, idist,
327  onembed, ostride, odist,
328  CUFFT_C2R, batch);
329 
330  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "CUDA_FFT_1DX_C2R");
331 
332  // be careful, this feature is deprecated in CUDA 7.0.
333  // It is necessary to use this compatibility level, otherwise we would have to use an
334  // out-of-place transform - (inplace transforms corrupts data)
335  cufftError = cufftSetCompatibilityMode(cufftPlan_1DX_C2R, CUFFT_COMPATIBILITY_NATIVE);
336 
337  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_1DX_C2R_Plan");
338 }//end of Create_FFT_Plan_1DX_C2R
339 //-------------------------------------------------------------------------------------------------
340 
341 
342 /*
343  * Create cuFFT plan for 1DY Complex-to-Real. \n
344  * This version doesn't need any scratch place for planning. All 1D transforms are done in a single
345  * batch. Data has to be transposed before the transform.
346  *
347  * @param [in] outMatrixDims - The dimension sizes of the input matrix
348  * @throw runtime_error if the plan can't be created.
349  */
351 {
352  cufftResult_t cufftError;
353  // set dimensions
354  const int nx = static_cast<int> (outMatrixDims.nx);
355  const int ny = static_cast<int> (outMatrixDims.ny);
356  const int nz = static_cast<int> (outMatrixDims.nz);
357  const int ny_2 = ((ny / 2) + 1);
358 
359  // set up rank and strides
360  int rank = 1;
361  int n[] = {ny};
362 
363  int inembed[] = {ny_2};
364  int istride = 1;
365  int idist = ny_2;
366 
367  int onembed[] = {ny};
368  int ostride = 1;
369  int odist = ny;
370 
371  int batch = nx * nz;
372 
373  cufftError = cufftPlanMany(&cufftPlan_1DY_C2R, rank, n,
374  inembed, istride, idist,
375  onembed, ostride, odist,
376  CUFFT_C2R, batch);
377 
378  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "CUDA_FFT_1DY_C2R");
379 
380  // be careful, this feature is deprecated in CUDA 7.0.
381  // It is necessary to use this compatibility level, otherwise we would have to use an
382  // out-of-place transform - (inplace transforms corrupts data)
383  cufftError = cufftSetCompatibilityMode(cufftPlan_1DY_C2R, CUFFT_COMPATIBILITY_NATIVE);
384 
385  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_1DX_C2R_Plan");
386 }//end of Create_FFT_Plan_1DY_R2C
387 //--------------------------------------------------------------------------------------------------
388 
389 
390 /*
391  * Create cuFFT plan for 1DZ Complex-to-Real. \n
392  * This version doesn't need any scratch place for planning. All 1D transforms are done in a single
393  * batch. Data has to be transposed before the transform.
394  *
395  * @param [in] outMatrixDims - the dimension sizes of the input matrix
396  * @throw runtime_error if the plan can't be created.
397  */
399 {
400  cufftResult_t cufftError;
401 
402  // set dimensions
403  const int nx = static_cast<int> (outMatrixDims.nx);
404  const int ny = static_cast<int> (outMatrixDims.ny);
405  const int nz = static_cast<int> (outMatrixDims.nz);
406  const int nz_2 = ((nz / 2) + 1);
407 
408  // set up rank and strides
409  int rank = 1;
410  int n[] = {nz};
411 
412  int inembed[] = {nz_2};
413  int istride = 1;
414  int idist = nz_2;
415 
416  int onembed[] = {nz};
417  int ostride = 1;
418  int odist = nz;
419 
420  int batch = nx * ny;
421 
422  cufftError = cufftPlanMany(&cufftPlan_1DZ_C2R, rank, n,
423  inembed, istride, idist,
424  onembed, ostride, odist,
425  CUFFT_C2R, batch);
426 
427  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "CUDA_FFT_1DZ_C2R");
428 
429  // be careful, this feature is deprecated in CUDA 7.0.
430  // It is necessary to use this compatibility level, otherwise we would have to use an
431  // out-of-place transform - (inplace transforms corrupts data)
432  cufftError = cufftSetCompatibilityMode(cufftPlan_1DZ_C2R, CUFFT_COMPATIBILITY_NATIVE);
433 
434  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "SetCompatibilty_1DZ_R2C_Plan");
435 }//end of Create_FFT_Plan_1DX_R2C
436 //--------------------------------------------------------------------------------------------------
437 
438 
439 /**
440  * Destroy all static plans created by the application.
441  * @throw runtime_error if the plan can't be created.
442  */
444 {
445  cufftResult_t cufftError;
446 
447  if (cufftPlan_3D_R2C)
448  {
449  cufftError = cufftDestroy(cufftPlan_3D_R2C);
450  cufftPlan_3D_R2C = static_cast<cufftHandle>(NULL);
451  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_3D_R2C_Plan");
452  }
453 
454  if (cufftPlan_3D_C2R)
455  {
456  cufftError = cufftDestroy(cufftPlan_3D_C2R);
457  cufftPlan_3D_C2R = static_cast<cufftHandle>(NULL);
458  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_3D_C2R_Plan");
459  }
460 
461  if (cufftPlan_1DX_R2C)
462  {
463  cufftError = cufftDestroy(cufftPlan_1DX_R2C);
464  cufftPlan_1DX_R2C = static_cast<cufftHandle>(NULL);
465  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_1DX_R2C_Plan");
466  }
467 
468  if (cufftPlan_1DY_R2C)
469  {
470  cufftError = cufftDestroy(cufftPlan_1DY_R2C);
471  cufftPlan_1DY_R2C = static_cast<cufftHandle>(NULL);
472  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_1DY_R2C_Plan");
473  }
474 
475  if (cufftPlan_1DZ_R2C)
476  {
477  cufftError = cufftDestroy(cufftPlan_1DZ_R2C);
478  cufftPlan_1DZ_R2C = static_cast<cufftHandle>(NULL);
479  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_1DZ_R2C_Plan");
480  }
481 
482  if (cufftPlan_1DX_C2R)
483  {
484  cufftError = cufftDestroy(cufftPlan_1DX_C2R);
485  cufftPlan_1DX_C2R = static_cast<cufftHandle>(NULL);
486  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_1DX_C2R_Plan");
487  }
488 
489  if (cufftPlan_1DY_C2R)
490  {
491  cufftError = cufftDestroy(cufftPlan_1DY_C2R);
492  cufftPlan_1DY_C2R = static_cast<cufftHandle>(NULL);
493  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_1DY_C2R_Plan");
494  }
495 
496  if (cufftPlan_1DZ_C2R)
497  {
498  cufftError = cufftDestroy(cufftPlan_1DZ_C2R);
499  cufftPlan_1DZ_C2R = static_cast<cufftHandle>(NULL);
500  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Destroy_1DZ_C2R_Plan");
501  }
502 
503  cuFFTErrorMessages.clear();
504 }// end of DestroyAllPlans
505 //--------------------------------------------------------------------------------------------------
506 
507 
508 /**
509  * Computer forward out-of-place 3D Real-to-Complex FFT.
510  *
511  * @param [in] inMatrix - Input data for the forward FFT
512  * @throw runtime_error if the plan is not valid.
513  */
515 {
516  //Compute forward cuFFT (if the plan does not exist, it also returns error)
517  cufftResult_t cufftError = cufftExecR2C(cufftPlan_3D_R2C,
518  static_cast<cufftReal*>(inMatrix.GetDeviceData()),
519  reinterpret_cast<cufftComplex*>(deviceData));
520 
521  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_3D_R2C");
522 }// end of Compute_FFT_3D_R2C
523 //--------------------------------------------------------------------------------------------------
524 
525 
526 
527 /**
528  * Computer forward out-of-place 3D Complex-to-Real FFT.
529  *
530  * @param [out] outMatrix - output of the inverse FFT.
531  * @throw runtime_error if the plan is not valid.
532  */
534 {
535  //Compute forward cuFFT (if the plan does not exist, it also returns error)
536  cufftResult_t cufftError = cufftExecC2R(cufftPlan_3D_C2R,
537  reinterpret_cast<cufftComplex*>(deviceData),
538  static_cast<cufftReal*>(outMatrix.GetDeviceData()));
539 
540  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_3D_RCR");
541 }// end of Compute_FFT_3D_C2R
542 //--------------------------------------------------------------------------------------------------
543 
544 /**
545  * Computer forward out-of-place 1DX Real-to-Complex FFT.
546  *
547  * @param [in] inMatrix - Input data for the forward FFT.
548  * @throw runtime_error if the plan is not valid.
549  */
551 {
552  //Compute forward cuFFT (if the plan does not exist, it also returns error)
553  cufftResult_t cufftError = cufftExecR2C(cufftPlan_1DX_R2C,
554  static_cast<cufftReal*>(inMatrix.GetDeviceData()),
555  reinterpret_cast<cufftComplex*>(deviceData));
556 
557  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_1DX_R2C");
558 }// end of Compute_FFT_1DX_R2C
559 //--------------------------------------------------------------------------------------------------
560 
561 
562 /**
563  * Computer forward out-of-place 1DY Real-to-Complex FFT. The matrix is first X<->Y transposed
564  * followed by the 1D FFT. The matrix is left in the transposed format.
565  *
566  * @param [in] inMatrix - Input data for the forward FFT.
567  * @throw runtime_error if the plan is not valid.
568  */
570 {
571  /// Transpose a real 3D matrix in the X-Y direction
572  dim3 dimSizes(inMatrix.GetDimensionSizes().nx,
573  inMatrix.GetDimensionSizes().ny,
574  inMatrix.GetDimensionSizes().nz);
575 
577  inMatrix.GetDeviceData(),
578  dimSizes);
579 
580  // Compute forward cuFFT (if the plan does not exist, it also returns error).
581  // the FFT is calculated in-place (may be a bit slower than out-of-place, however
582  // it does not request additional transfers and memory).
583  cufftResult_t cufftError = cufftExecR2C(cufftPlan_1DY_R2C,
584  static_cast<cufftReal*>(deviceData),
585  reinterpret_cast<cufftComplex*>(deviceData));
586 
587  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_1DY_R2C");
588 }// end of Compute_FFT_1DY_R2C
589 //--------------------------------------------------------------------------------------------------
590 
591 /**
592  * Computer forward out-of-place 1DY Real-to-Complex FFT. The matrix is first X<->Z transposed
593  * followed by the 1D FFT. The matrix is left in the transposed format.
594  *
595  * @param [in] inMatrix - Input data for the forward FFT.
596  * @throw runtime_error if the plan is not valid.
597  */
599 {
600  /// Transpose a real 3D matrix in the X-Z direction
601  dim3 dimSizes(inMatrix.GetDimensionSizes().nx,
602  inMatrix.GetDimensionSizes().ny,
603  inMatrix.GetDimensionSizes().nz);
604 
606  inMatrix.GetDeviceData(),
607  dimSizes);
608 
609  // Compute forward cuFFT (if the plan does not exist, it also returns error).
610  // the FFT is calculated in-place (may be a bit slower than out-of-place, however
611  // it does not request additional transfers and memory).
612  cufftResult_t cufftError = cufftExecR2C(cufftPlan_1DZ_R2C,
613  static_cast<cufftReal*>(deviceData),
614  reinterpret_cast<cufftComplex*>(deviceData));
615 
616  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_1DZ_R2C");
617 }// end of Compute_FFT_1DZ_R2C
618 //--------------------------------------------------------------------------------------------------
619 
620 /**
621  * Computer inverse out-of-place 1DX Real-to-Complex FFT.
622  *
623  * @param [out] outMatrix - Output data for the inverse FFT.
624  * @throw runtime_error if the plan is not valid.
625  */
627 {
628  //Compute inverse cuFFT (if the plan does not exist, it also returns error)
629  cufftResult_t cufftError = cufftExecC2R(cufftPlan_1DX_C2R,
630  reinterpret_cast<cufftComplex*>(deviceData),
631  static_cast<cufftReal*> (outMatrix.GetDeviceData()));
632 
633  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_1DX_C2R");
634 }// end of Compute_FFT_1DX_C2R
635 //--------------------------------------------------------------------------------------------------
636 
637 
638 /**
639  * Computer inverse out-of-place 1DY Real-to-Complex FFT. The matrix is requested to be in the
640  * transposed layout. After the FFT is calculated, an Y<->X transposed follows. The matrix is
641  * returned in the normal layout (z, y, x) format.
642  *
643  * @param [out] outMatrix - Output data for the inverse FFT.
644  * @throw runtime_error if the plan is not valid.
645  */
647 {
648  // Compute forward cuFFT (if the plan does not exist, it also returns error).
649  // the FFT is calculated in-place (may be a bit slower than out-of-place, however
650  // it does not request additional transfers and memory).
651  cufftResult_t cufftError = cufftExecC2R(cufftPlan_1DY_C2R,
652  reinterpret_cast<cufftComplex*>(deviceData),
653  static_cast<cufftReal*>(deviceData));
654 
655  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_1DY_C2R");
656 
657  /// Transpose a real 3D matrix back in the X-Y direction
658  dim3 dimSizes(outMatrix.GetDimensionSizes().ny,
659  outMatrix.GetDimensionSizes().nx,
660  outMatrix.GetDimensionSizes().nz);
661 
663  deviceData,
664  dimSizes);
665 }// end of Compute_FFT_1DY_C2R
666 //--------------------------------------------------------------------------------------------------
667 
668 
669 /**
670  * Computer forward out-of-place 1DY Real-to-Complex FFT. The matrix is requested to be in the
671  * transposed layout. After the FFT is calculated, an Z<->X transposed follows. The matrix is
672  * returned in the normal layout (z, y, x).
673  *
674  * @param [out] outMatrix - Output data for the inverse FFT.
675  * @throw runtime_error if the plan is not valid.
676  */
678 {
679  // Compute forward cuFFT (if the plan does not exist, it also returns error).
680  // the FFT is calculated in-place (may be a bit slower than out-of-place, however
681  // it does not request additional transfers and memory).
682  cufftResult_t cufftError = cufftExecC2R(cufftPlan_1DZ_C2R,
683  reinterpret_cast<cufftComplex*>(deviceData),
684  static_cast<cufftReal*>(deviceData));
685 
686  if (cufftError != CUFFT_SUCCESS) ThrowCUFFTException(cufftError, "Execute_FFT_1DZ_C2R");
687 
688  /// Transpose a real 3D matrix in the Z<->X direction
689  dim3 DimSizes(outMatrix.GetDimensionSizes().nz,
690  outMatrix.GetDimensionSizes().ny,
691  outMatrix.GetDimensionSizes().nx);
692 
694  GetDeviceData(),
695  DimSizes);
696 }// end of Compute_FFT_1DZ_C2R
697 //--------------------------------------------------------------------------------------------------
698 
699 
700 //------------------------------------------------------------------------------------------------//
701 //-------------------------------------- Protected methods ---------------------------------------//
702 //------------------------------------------------------------------------------------------------//
703 
704 
705 //------------------------------------------------------------------------------------------------//
706 //--------------------------------------- Private methods ----------------------------------------//
707 //------------------------------------------------------------------------------------------------//
708 
709 /**
710  * Throw cuda FFT exception
711  *
712  * @param [in] cufftError - CUDA FFT error code
713  * @param [in] transformTypeName - CUDA transform type name
714  * @throw runtime error if error occurs
715  */
716 void TCUFFTComplexMatrix::ThrowCUFFTException(const cufftResult cufftError,
717  const std::string& transformTypeName)
718 {
719  std::string errMsg;
720  if (cuFFTErrorMessages.find(cufftError) != cuFFTErrorMessages.end())
721  {
722  errMsg = TLogger::FormatMessage(cuFFTErrorMessages[cufftError], transformTypeName.c_str());
723  }
724  else // unknown error
725  {
726  errMsg = TLogger::FormatMessage(ERR_FMT_CUFFT_UNKNOWN_ERROR, transformTypeName.c_str());
727  }
728 
729  // Throw exception
730  throw std::runtime_error(errMsg);
731 }// end of GetCuFFTErrorMessage
732 //--------------------------------------------------------------------------------------------------
void Compute_FFT_1DY_R2C(TRealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the Y dimension.
static cufftHandle cufftPlan_1DZ_C2R
cuFFT plan for the 3Z Complex-to-Real transform in the Z dimension.
size_t nx
number of elements in the x direction
TErrorMessage ERR_FMT_CUFFT_INVALID_PLAN
CUDA FFT error message.
static void Create_FFT_Plan_1DZ_R2C(const TDimensionSizes &inMatrixDims)
Create static cuFFT plan for Real-to-Complex in the Z dimension.
TErrorMessage eRR_FMT_CUFFT_SETUP_FAILED
CUDA FFT error message.
void Compute_FFT_3D_C2R(TRealMatrix &outMatrix)
Compute 3D out-of-place Complex-to-Real FFT.
static void DestroyAllPlansAndStaticData()
Destroy all static plans and error messages.
static void Create_FFT_Plan_1DX_R2C(const TDimensionSizes &inMatrixDims)
Create static cuFFT plan for Real-to-Complex in the X dimension.
TErrorMessage ERR_FMT_CUFFT_INVALID_TYPE
CUDA FFT error message.
void Compute_FFT_1DZ_R2C(TRealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the Z dimension.
The header file containing the class for real matrices.
static void Create_FFT_Plan_1DY_C2R(const TDimensionSizes &outMatrixDims)
Create static cuFFT plan for Complex-to-Real in the Y dimension.
void Compute_FFT_1DY_C2R(TRealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the Y dimension.
TErrorMessage eRR_FMT_CUFFT_NOT_IMPLEMENTED
CUDA FFT error message.
static void Create_FFT_Plan_1DZ_C2R(const TDimensionSizes &outMatrixDims)
Create static cuFFT plan for Complex-to-Real in the Z dimension.
static cufftHandle cufftPlan_1DX_C2R
cuFFT plan for the 3D Complex-to-Real transform in the X dimension.
static void Create_FFT_Plan_3D_C2R(const TDimensionSizes &outMatrixDims)
Create static cuFFT plan for Complex-to-Real.
TErrorMessage ERR_FMT_CUFFT_INVALID_SIZE
CUDA FFT error message.
static cufftHandle cufftPlan_1DY_R2C
cuFFT plan for the 3D Real-to-Complex transform in the Y dimension.
static cufftHandle cufftPlan_3D_R2C
cuFFT plan for the 3D Real-to-Complex transform.
TErrorMessage ERR_FMT_CUFFT_UNALIGNED_DATA
CUDA FFT error message.
static void Create_FFT_Plan_1DX_C2R(const TDimensionSizes &outMatrixDims)
Create static cuFFT plan for Complex-to-Real in the X dimension.
void Compute_FFT_1DX_R2C(TRealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the X dimension.
virtual TDimensionSizes GetDimensionSizes() const
Get dimension sizes of the matrix.
static void Create_FFT_Plan_1DY_R2C(const TDimensionSizes &inMatrixDims)
Create static cuFFT plan for Real-to-Complex in the Y dimension.
TErrorMessage ERR_FMT_CUFFT_LICENSE_ERROR
CUDA FFT error message.
The header file containing a class responsible for printing out info and error messages (stdout...
void Compute_FFT_3D_R2C(TRealMatrix &inMatrix)
Compute 3D out-of-place Real-to-Complex FFT.
void Compute_FFT_1DX_C2R(TRealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the X dimension.
float * deviceData
Raw GPU matrix data.
The header file containing the class that implements 3D FFT using the cuFFT interface.
TErrorMessage ERR_FMT_CUFFT_PARSE_ERROR
CUDA FFT error message.
static cufftHandle cufftPlan_3D_C2R
cuFFT plan for the 3D Complex-to-Real transform.
size_t ny
number of elements in the y direction
static void Create_FFT_Plan_3D_R2C(const TDimensionSizes &inMatrixDims)
Create static cuFFT plan for Real-to-Complex.
TErrorMessage ERR_FMT_CUFFT_EXEC_FAILED
CUDA FFT error message.
The class for real matrices.
Definition: RealMatrix.h:45
void Compute_FFT_1DZ_C2R(TRealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the Z dimension.
TErrorMessage ERR_FMT_CUFFT_INVALID_DEVICE
CUDA FFT error message.
static void ThrowCUFFTException(const cufftResult cufftError, const std::string &transformTypeName)
Throw an exception with a given error message.
TErrorMessage ERR_FMT_CUFFT_ALLOC_FAILED
CUDA FFT error message.
TErrorMessage ERR_FMT_CUFFT_NO_WORKSPACE
CUDA FFT error message.
static cufftHandle cufftPlan_1DY_C2R
cuFFT plan for the 3D Complex-to-Real transform in the Y dimension.
Name space for all CUDA kernels used in the 3D solver.
static std::string FormatMessage(const std::string &format, Args...args)
C++-11 replacement for sprintf that works with std::string instead of char *.
Definition: Logger.h:126
void TrasposeReal3DMatrixXY(float *outputMatrix, const float *inputMatrix, const dim3 &dimSizes)
Transpose a real 3D matrix in the X-Y direction.
virtual float * GetDeviceData()
Get raw GPU data out of the class (for direct GPU kernel access).
static cufftHandle cufftPlan_1DX_R2C
cuFFT plan for the 1D Real-to-Complex transform in the X dimension.
TErrorMessage ERR_FMT_CUFFT_INCOMPLETE_PARAMETER_LIST
CUDA FFT error message.
TErrorMessage ERR_FMT_CUFFT_INVALID_VALUE
CUDA FFT error message.
static std::map< cufftResult, TErrorMessage > cuFFTErrorMessages
size_t nz
number of elements in the z direction
TErrorMessage ERR_FMT_CUFFT_UNKNOWN_ERROR
CUDA FFT error message.
void TrasposeReal3DMatrixXZ(float *outputMatrix, const float *inputMatrix, const dim3 &dimSizes)
Transpose a real 3D matrix in the X-Y direction.
static cufftHandle cufftPlan_1DZ_R2C
cuFFT plan for the 3D Real-to-Complex transform in the Z dimension.
Structure with 4D dimension sizes (3 in space and 1 in time).