kspaceFirstOrder3D-OMP  1.2
The C++ implementation of the k-wave toolbox for the time-domain simulation of acoustic wave fields in 3D
KSpaceFirstOrder3DSolver.cpp
Go to the documentation of this file.
1 /**
2  * @file KSpaceFirstOrder3DSolver.cpp
3  *
4  * @author Jiri Jaros \n
5  * Faculty of Information Technology \n
6  * Brno University of Technology \n
7  * jarosjir@fit.vutbr.cz
8  *
9  * @brief The implementation file containing the main class of the project responsible for the entire simulation.
10  *
11  * @version kspaceFirstOrder3D 2.16
12  *
13  * @date 12 July 2012, 10:27 (created) \n
14  * 04 September 2017, 10:59 (revised)
15  *
16  * @copyright Copyright (C) 2017 Jiri Jaros and Bradley Treeby.
17  *
18  * This file is part of the C++ extension of the [k-Wave Toolbox](http://www.k-wave.org).
19  *
20  * This file is part of the k-Wave. k-Wave is free software: you can redistribute it and/or modify it under the terms
21  * of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the
22  * License, or (at your option) any later version.
23  *
24  * k-Wave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
25  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
26  * more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License along with k-Wave.
29  * If not, see [http://www.gnu.org/licenses/](http://www.gnu.org/licenses/).
30  */
31 
32 // Linux build
33 #ifdef __linux__
34  #include <sys/resource.h>
35 #endif
36 
37 // Windows build
38 #ifdef _WIN64
39  #define _USE_MATH_DEFINES
40  #include <Windows.h>
41  #include <Psapi.h>
42  #pragma comment(lib, "Psapi.lib")
43 #endif
44 
45 #ifdef _OPENMP
46  #include <omp.h>
47 #endif
48 
49 #include <immintrin.h>
50 #include <cmath>
51 #include <ctime>
52 #include <limits>
53 
57 
59 #include <Logger/Logger.h>
60 
61 using std::ios;
62 
63 //--------------------------------------------------------------------------------------------------------------------//
64 //---------------------------------------------------- Constants -----------------------------------------------------//
65 //--------------------------------------------------------------------------------------------------------------------//
66 
67 //--------------------------------------------------------------------------------------------------------------------//
68 //------------------------------------------------- Public methods ---------------------------------------------------//
69 //--------------------------------------------------------------------------------------------------------------------//
70 
71 /**
72  * Constructor of the class.
73  */
75  mMatrixContainer(), mOutputStreamContainer(),
76  mParameters(Parameters::getInstance()),
77  mActPercent(0l),
78  mTotalTime(), mPreProcessingTime(), mDataLoadTime (), mSimulationTime(),
79  mPostProcessingTime(), mIterationTime()
80 {
81  mTotalTime.start();
82 
83  //Switch off HDF5 error messages
84  H5Eset_auto(H5E_DEFAULT, NULL, NULL);
85 }// end of KSpaceFirstOrder3DSolver
86 //----------------------------------------------------------------------------------------------------------------------
87 
88 
89 /**
90  * Destructor of the class.
91  */
93 {
94  freeMemory();
95 }// end of KSpaceFirstOrder3DSolver
96 //----------------------------------------------------------------------------------------------------------------------
97 
98 /**
99  * The method allocates the matrix container, creates all matrices and creates all output streams
100  * (however not allocating memory).
101  */
103 {
106 
107  // create container, then all matrices
110 
111  // add output streams into container
112  //@todo Think about moving under LoadInputData routine...
114 
116 }// end of allocateMemory
117 //----------------------------------------------------------------------------------------------------------------------
118 
119 /**
120  * The method frees all memory allocated by the class.
121  */
123 {
126 }// end of freeMemory
127 //----------------------------------------------------------------------------------------------------------------------
128 
129 /**
130  * Load data from the input file provided by the Parameter class and creates the output time series streams.
131  */
133 {
136  // Start timer
138 
139  // get handles
140  Hdf5File& inputFile = mParameters.getInputFile(); // file is opened (in Parameters)
141  Hdf5File& outputFile = mParameters.getOutputFile();
142  Hdf5File& checkpointFile = mParameters.getCheckpointFile();
143 
144  // Load data from disk
148 
149  // Load data from disk
151 
152  // close the input file since we don't need it anymore.
153  inputFile.close();
154 
156 
157  // The simulation does not use checkpointing or this is the first turn
158  bool recoverFromCheckpoint = (mParameters.isCheckpointEnabled() &&
160 
161  if (recoverFromCheckpoint)
162  {
163  //------------------------------------- Read data from the checkpoint file ---------------------------------------//
166 
167  // Open checkpoint file
168  checkpointFile.open(mParameters.getCheckpointFileName());
169 
170  // Check the checkpoint file
172 
173  // read the actual value of t_index
174  size_t checkpointedTimeIndex;
175  checkpointFile.readScalarValue(checkpointFile.getRootGroup(), kTimeIndexName, checkpointedTimeIndex);
176  mParameters.setTimeIndex(checkpointedTimeIndex);
177 
178  // Read necessary matrices from the checkpoint file
180 
181  checkpointFile.close();
183 
184  //--------------------------------------- Read data from the output file -----------------------------------------//
187 
188  // Reopen output file for RW access
189  outputFile.open(mParameters.getOutputFileName(), H5F_ACC_RDWR);
190  //Read file header of the output file
192  // Check the checkpoint file
193  checkOutputFile();
194  // Restore elapsed time
196 
199  }
200  else
201  { //------------------------------------ First round of multi-leg simulation ---------------------------------------//
202  // Create the output file
205 
206  outputFile.create(mParameters.getOutputFileName());
208 
209  // Create the steams, link them with the sampled matrices, however DO NOT allocate memory!
211  }
212 
213  // Stop timer
216  {
218  }
219 }// end of loadInputData
220 //----------------------------------------------------------------------------------------------------------------------
221 
222 
223 /**
224 * This method computes k-space First Order 3D simulation.
225  */
227 {
228  // fft initialisation and preprocessing
229  try
230  {
232 
233  // initilaise all FFTW plans
235 
236  // preprocessing phase generating necessary variables
237  preProcessing();
238 
240  }
241  catch (const std::exception& e)
242  {
245 
247  }
248 
249  // Logger header for simulation
253 
254  // Main loop
255  try
256  {
258 
259  computeMainLoop();
260 
262 
264  }
265  catch (const std::exception& e)
266  {
269  }
270 
271  // Post processing region
273  try
274  {
276  { // Checkpoint
282 
284  {
286  }
287 
289 
291  {
293  }
294  }
295  else
296  { // Finish
301 
302  postProcessing();
303 
304  // if checkpointing is enabled and the checkpoint file was created in the past, delete it
306  {
307  std::remove(mParameters.getCheckpointFileName().c_str());
308  }
310  }
311  }
312  catch (const std::exception &e)
313  {
316 
318  }
320 
321  // Final data written
322  try
323  {
326 
328  }
329  catch (const std::exception &e)
330  {
333  }
334 }// end of compute()
335 //----------------------------------------------------------------------------------------------------------------------
336 
337 /**
338  * Get peak memory usage.
339  */
341 {
342  // Linux build
343  #ifdef __linux__
344  struct rusage mem_usage;
345  getrusage(RUSAGE_SELF, &mem_usage);
346 
347  return mem_usage.ru_maxrss >> 10;
348  #endif
349 
350  // Windows build
351  #ifdef _WIN64
352  HANDLE hProcess;
353  PROCESS_MEMORY_COUNTERS pmc;
354 
355  hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ,
356  FALSE,
357  GetCurrentProcessId());
358 
359  GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc));
360  CloseHandle(hProcess);
361 
362  return pmc.PeakWorkingSetSize >> 20;
363  #endif
364 }// end of getMemoryUsage
365 //----------------------------------------------------------------------------------------------------------------------
366 
367 /**
368  * Get release code version.
369  */
371 {
372  return std::string(kOutFmtKWaveVersion);
373 }// end of getCodeName
374 //----------------------------------------------------------------------------------------------------------------------
375 
376 
377 /**
378  * Print full code name and the license.
379  */
381 {
382  Logger::log(Logger::LogLevel::kBasic, kOutFmtBuildNoDataTime, 10, 11, __DATE__, 8, 8, __TIME__);
383 
384  if (mParameters.getGitHash() != "")
385  {
387  }
389 
390 
391  // OS detection
392  #ifdef __linux__
394  #elif __APPLE__
396  #elif _WIN32
398  #endif
399 
400  // Compiler detections
401  #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
403  #endif
404  #ifdef __INTEL_COMPILER
406  #endif
407  #ifdef _MSC_VER
409  #endif
410 
411  // instruction set
412  #if (defined (__AVX2__))
414  #elif (defined (__AVX__))
416  #elif (defined (__SSE4_2__))
418  #elif (defined (__SSE4_1__))
420  #elif (defined (__SSE3__))
422  #elif (defined (__SSE2__))
424  #endif
425 
427 
428 }// end of printFullCodeNameAndLicense
429 //----------------------------------------------------------------------------------------------------------------------
430 
431 /**
432  * Set processor affinity.
433  */
435 {
436  // Linux Build
437  #ifdef __linux__
438  //GNU compiler
439  #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
440  setenv("OMP_PROC_BIND","TRUE", 1);
441  #endif
442 
443  #ifdef __INTEL_COMPILER
444  setenv("KMP_AFFINITY","none", 1);
445  #endif
446  #endif
447 
448  // Windows build is always compiled by the Intel Compiler
449  #ifdef _WIN64
450  _putenv_s("KMP_AFFINITY","none");
451  #endif
452 }//end of setProcessorAffinity
453 //----------------------------------------------------------------------------------------------------------------------
454 
455 
456 //--------------------------------------------------------------------------------------------------------------------//
457 //------------------------------------------------ Protected methods -------------------------------------------------//
458 //--------------------------------------------------------------------------------------------------------------------//
459 
460 /**
461  * Initialize FFTW plans.
462  */
464 {
465 
466  // initialization of FFTW library
467  #ifdef _OPENMP
468  fftwf_init_threads();
469  fftwf_plan_with_nthreads(mParameters.getNumberOfThreads());
470  #endif
471 
472  // The simulation does not use checkpointing or this is the first turn
473  bool recoverFromPrevState = (mParameters.isCheckpointEnabled() &&
475 
476 
477  #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
478  // import FFTW wisdom if it is here
479  if (recoverFromPrevState)
480  {
481 
482 
485  // import FFTW wisdom
486  try
487  {
488  // try to find the wisdom in the file that has the same name as the checkpoint file (different extension)
491  }
492  catch (const std::runtime_error& e)
493  {
495  }
496  }
497  #endif
498 
501 
502  // create real to complex plans
506 
507  // create real to complex plans
511 
512  // if necessary, create 1D shift plans.
513  // in this case, the matrix has a bit bigger dimensions to be able to store
514  // shifted matrices.
515  if (Parameters::getInstance().getStoreVelocityNonStaggeredRawFlag())
516  {
517  // X shifts
520 
521  // Y shifts
524 
525  // Z shifts
528  }// end u_non_staggered
529 
531 }// end of InitializeFftwPlans
532 //----------------------------------------------------------------------------------------------------------------------
533 
534 /**
535  * Compute pre-processing phase.
536  */
538 {
541 
542  // get the correct sensor mask and recompute indices
544  {
546  }
547 
549  {
551  }
552 
553  if ((mParameters.getTransducerSourceFlag() != 0) ||
557  )
558  {
560  }
561 
563  {
565  }
566 
568  {
570  }
571 
572 
573  // compute dt / rho0_sg...
575  { // non-uniform grid cannot be pre-calculated :-(
576  // rho is matrix
578  {
580  }
581  else
582  {
586  }
587  }
588 
589  // generate different matrices
590  if (mParameters.getAbsorbingFlag() != 0)
591  {
594  }
595  else
596  {
597  generateKappa();
598  }
599 
600  // calculate c^2. It has to be after kappa gen... because of c modification
601  computeC2();
602 
604 }// end of preProcessing
605 //----------------------------------------------------------------------------------------------------------------------
606 
607 /**
608  * Compute the main time loop of KSpaceFirstOrder3D.
609  */
611 {
612  mActPercent = 0;
613  // set ActPercent to correspond the t_index after recovery
614  if (mParameters.getTimeIndex() > 0)
615  {
617  }
618 
619  // Progress header
621 
623 
625  {
626  const size_t timeIndex = mParameters.getTimeIndex();
627 
628  // compute velocity
629  computeVelocity();
630  // add in the velocity source term
632 
633  // add in the transducer source term (t = t1) to ux
634  if (mParameters.getTransducerSourceFlag() > timeIndex)
635  {
636  // transducer source is added only to the x component of the particle velocity
638  }
639 
640  // compute gradient of velocity
642 
644  {
646  }
647  else
648  {
650  }
651 
652 
653  // add in the source pressure term
655 
657  {
659  }
660  else
661  {
663  }
664 
665  // calculate initial pressure
666  if ((timeIndex == 0) && (mParameters.getInitialPressureSourceFlag() == 1)) addInitialPressureSource();
667 
668  storeSensorData();
669  printStatistics();
671  }// time loop
672 }// end of computeMainLoop
673 //----------------------------------------------------------------------------------------------------------------------
674 
675 /**
676  * Post processing the quantities, closing the output streams and storing the sensor mask.
677  */
679 {
681  {
683  }// p_final
684 
686  {
690  }// u_final
691 
692  // Apply post-processing and close
695 
696 
697  // store sensor mask if wanted
699  {
701  {
705  }
707  {
711  }
712  }
713 }// end of postProcessing
714 //----------------------------------------------------------------------------------------------------------------------
715 
716 /**
717  * Store sensor data.
718  */
720 {
721  // Unless the time for sampling has come, exit
723  {
725  {
727  }
729  }
730 }// end of storeSensorData
731 //----------------------------------------------------------------------------------------------------------------------
732 
733 /**
734  * Write statistics and the header into the output file.
735  */
737 {
738  // write timeIndex into the output file
742 
743  // Write scalars
745  Hdf5FileHeader& fileHeader = mParameters.getFileHeader();
746 
747  // Write File header
748  fileHeader.setCodeName(getCodeName());
749  fileHeader.setMajorFileVersion();
750  fileHeader.setMinorFileVersion();
751  fileHeader.setActualCreationTime();
753  fileHeader.setHostName();
754 
755  fileHeader.setMemoryConsumption(getMemoryUsage());
756 
757  // Stop total timer here
758  mTotalTime.stop();
764 
765  fileHeader.setNumberOfCores();
767 }// end of writeOutputDataInfo
768 //----------------------------------------------------------------------------------------------------------------------
769 
770 /**
771  * Save checkpoint data into the checkpoint file, flush aggregated outputs into the output file.
772  */
774 {
775  #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
778  // export FFTW wisdom
779  try
780  {
783  }
784  catch (const std::runtime_error& e)
785  {
787  }
788  #endif
789 
790  // Create Checkpoint file
791  Hdf5File& checkpointFile = mParameters.getCheckpointFile();
792  // if it happens and the file is opened (from the recovery, close it)
793  if (checkpointFile.isOpen()) checkpointFile.close();
794 
797 
798  // Create the new file (overwrite the old one)
799  checkpointFile.create(mParameters.getCheckpointFileName());
800 
801 
802  //------------------------------------------------ Store Matrices --------------------------------------------------//
803  // Store all necessary matrices in Checkpoint file
805 
806  // Write t_index
807  checkpointFile.writeScalarValue(checkpointFile.getRootGroup(), kTimeIndexName, mParameters.getTimeIndex());
808 
809  // store basic dimension sizes (Nx, Ny, Nz) - Nt is not necessary
810  checkpointFile.writeScalarValue(checkpointFile.getRootGroup(), kNxName, mParameters.getFullDimensionSizes().nx);
811  checkpointFile.writeScalarValue(checkpointFile.getRootGroup(), kNyName, mParameters.getFullDimensionSizes().ny);
812  checkpointFile.writeScalarValue(checkpointFile.getRootGroup(), kNzName, mParameters.getFullDimensionSizes().nz);
813 
814 
815  // Write checkpoint file header
817 
819  fileHeader.setCodeName(getCodeName());
820  fileHeader.setActualCreationTime();
821 
822  fileHeader.writeHeaderToCheckpointFile(checkpointFile);
823 
824  // Close the checkpoint file
825  checkpointFile.close();
827 
828  // checkpoint output streams only if necessary (t_index > start_index) - here we're at step + 1
830  {
833 
835 
837  }
839 }// end of saveCheckpointData
840 //----------------------------------------------------------------------------------------------------------------------
841 
842 
843  /**
844  * Compute new values of acoustic velocity in all three dimensions (UxSgx, UySgy, UzSgz).
845  *
846  * <b>Matlab code:</b> \n
847  *
848  * \verbatim
849  p_k = fftn(p);
850  ux_sgx = bsxfun(@times, pml_x_sgx, ...
851  bsxfun(@times, pml_x_sgx, ux_sgx) ...
852  - dt .* rho0_sgx_inv .* real(ifftn( bsxfun(@times, ddx_k_shift_pos, kappa .* fftn(p)) )) ...
853  );
854  uy_sgy = bsxfun(@times, pml_y_sgy, ...
855  bsxfun(@times, pml_y_sgy, uy_sgy) ...
856  - dt .* rho0_sgy_inv .* real(ifftn( bsxfun(@times, ddy_k_shift_pos, kappa .* fftn(p)) )) ...
857  );
858  uz_sgz = bsxfun(@times, pml_z_sgz, ...
859  bsxfun(@times, pml_z_sgz, uz_sgz) ...
860  - dt .* rho0_sgz_inv .* real(ifftn( bsxfun(@times, ddz_k_shift_pos, kappa .* fftn(p)) )) ...
861  );
862  \endverbatim
863  */
865  {
866  // bsxfun(@times, ddx_k_shift_pos, kappa .* fftn(p)), for all 3 dims
868 
872 
874  { // scalars
876  {
878  }
879  else
880  {
882  }
883  }
884  else
885  {// matrices
887 
888  }
889 }// end of computeVelocity
890 //----------------------------------------------------------------------------------------------------------------------
891 
892  /**
893  * Compute new values for duxdx, duydy, duzdz.
894  */
896 {
900 
901  const DimensionSizes& reducedDimensionSizes = mParameters.getReducedDimensionSizes();
902  const float divider = 1.0f / static_cast<float>(mParameters.getFullDimensionSizes().nElements());
903 
904  const float* kappa = getKappa().getData();
905 
906  FloatComplex* tempFftX = getTempFftwX().getComplexData();
907  FloatComplex* tempFftY = getTempFftwY().getComplexData();
908  FloatComplex* tempFftZ = getTempFftwZ().getComplexData();
909 
910  FloatComplex* ddxKShiftNeg = getDdxKShiftNeg().getComplexData();
911  FloatComplex* ddyKShiftNeg = getDdyKShiftNeg().getComplexData();
912  FloatComplex* ddzKShiftNeg = getDdzKShiftNeg().getComplexData();
913 
914  #pragma omp parallel for schedule(static)
915  for (size_t z = 0; z < reducedDimensionSizes.nz; z++)
916  {
917  for (size_t y = 0; y < reducedDimensionSizes.ny; y++)
918  {
919  #pragma omp simd
920  for (size_t x = 0; x < reducedDimensionSizes.nx; x++)
921  {
922  const size_t i = get1DIndex(z, y, x, reducedDimensionSizes);
923  const float eKappa = divider * kappa[i];
924 
925  tempFftX[i] *= ddxKShiftNeg[x] * eKappa;
926  tempFftY[i] *= ddyKShiftNeg[y] * eKappa;
927  tempFftZ[i] *= ddzKShiftNeg[z] * eKappa;
928  } // x
929  } // y
930  } // z
931 
932 
936 
937  //------------------------------------------------- Non linear grid -------------------------------------------------//
939  {
940  float* duxdx = getDuxdx().getData();
941  float* duydy = getDuydy().getData();
942  float* duzdz = getDuzdz().getData();
943 
944  const float* duxdxn = getDxudxn().getData();
945  const float* duydyn = getDyudyn().getData();
946  const float* duzdzn = getDzudzn().getData();
947 
948  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
949 
950  #pragma omp parallel for schedule(static)
951  for (size_t z = 0; z < dimensionSizes.nz; z++)
952  {
953  for (size_t y = 0; y < dimensionSizes.ny; y++)
954  {
955  #pragma omp simd
956  for (size_t x = 0; x < dimensionSizes.nx; x++)
957  {
958  const size_t i = get1DIndex(z, y, x, dimensionSizes);
959  duxdx[i] *= duxdxn[x];
960  duydy[i] *= duydyn[y];
961  duzdz[i] *= duzdzn[z];
962  } // x
963  } // y
964  } // z
965  }// nonlinear
966 }// end of computeVelocityGradient
967 //----------------------------------------------------------------------------------------------------------------------
968 
969 /**
970  * Calculate new values of acoustic density for nonlinear case (rhoX, rhoy and rhoZ).
971  *
972  * <b>Matlab code:</b> \n
973  *
974  *\verbatim
975  rho0_plus_rho = 2 .* (rhox + rhoy + rhoz) + rho0;
976  rhox = bsxfun(@times, pml_x, bsxfun(@times, pml_x, rhox) - dt .* rho0_plus_rho .* duxdx);
977  rhoy = bsxfun(@times, pml_y, bsxfun(@times, pml_y, rhoy) - dt .* rho0_plus_rho .* duydy);
978  rhoz = bsxfun(@times, pml_z, bsxfun(@times, pml_z, rhoz) - dt .* rho0_plus_rho .* duzdz);
979  \endverbatim
980  */
982 {
983  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
984 
985  const float dt = mParameters.getDt();
986 
987  float* rhoX = getRhoX().getData();
988  float* rhoY = getRhoY().getData();
989  float* rhoZ = getRhoZ().getData();
990 
991  const float* pmlX = getPmlX().getData();
992  const float* pmlY = getPmlY().getData();
993  const float* pmlZ = getPmlZ().getData();
994 
995  const float* duxdx = getDuxdx().getData();
996  const float* duydy = getDuydy().getData();
997  const float* duzdz = getDuzdz().getData();
998 
999  //----------------------------------------------- rho0 is scalar -------------------------------------------------//
1001  {
1002  const float rho0 = mParameters.getRho0Scalar();
1003 
1004  #pragma omp parallel for schedule(static)
1005  for (size_t z = 0; z < dimensionSizes.nz; z++)
1006  {
1007  for (size_t y = 0; y < dimensionSizes.ny; y++)
1008  {
1009  #pragma omp simd
1010  for (size_t x = 0; x < dimensionSizes.nx; x++)
1011  {
1012  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1013  const float sumRhosDt = (2.0f * (rhoX[i] + rhoY[i] + rhoZ[i]) + rho0) * dt;
1014 
1015  rhoX[i] = pmlX[x] * ((pmlX[x] * rhoX[i]) - sumRhosDt * duxdx[i]);
1016  rhoY[i] = pmlY[y] * ((pmlY[y] * rhoY[i]) - sumRhosDt * duydy[i]);
1017  rhoZ[i] = pmlZ[z] * ((pmlZ[z] * rhoZ[i]) - sumRhosDt * duzdz[i]);
1018  }// x
1019  }// y
1020  }// z
1021  }
1022  else
1023  { //---------------------------------------------- rho0 is matrix ------------------------------------------------//
1024  // rho0 is a matrix
1025  const float* rho0 = getRho0().getData();
1026 
1027  #pragma omp parallel for schedule(static)
1028  for (size_t z = 0; z < dimensionSizes.nz; z++)
1029  {
1030  for (size_t y = 0; y < dimensionSizes.ny; y++)
1031  {
1032  #pragma omp simd
1033  for (size_t x = 0; x < dimensionSizes.nx; x++)
1034  {
1035  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1036  const float sumRhosDt = (2.0f * (rhoX[i] + rhoY[i] + rhoZ[i]) + rho0[i]) * dt;
1037 
1038  rhoX[i] = pmlX[x] * ((pmlX[x] * rhoX[i]) - sumRhosDt * duxdx[i]);
1039  rhoY[i] = pmlY[y] * ((pmlY[y] * rhoY[i]) - sumRhosDt * duydy[i]);
1040  rhoZ[i] = pmlZ[z] * ((pmlZ[z] * rhoZ[i]) - sumRhosDt * duzdz[i]);
1041  } // x
1042  }// y
1043  }// z
1044  } // end rho is matrix
1045 }// end of computeDensityNonliner
1046 //----------------------------------------------------------------------------------------------------------------------
1047 
1048 /**
1049  * Calculate new values of acoustic density for linear case (rhoX, rhoy and rhoZ).
1050  *
1051  * <b>Matlab code:</b> \n
1052  *
1053  *\verbatim
1054  rhox = bsxfun(@times, pml_x, bsxfun(@times, pml_x, rhox) - dt .* rho0 .* duxdx);
1055  rhoy = bsxfun(@times, pml_y, bsxfun(@times, pml_y, rhoy) - dt .* rho0 .* duydy);
1056  rhoz = bsxfun(@times, pml_z, bsxfun(@times, pml_z, rhoz) - dt .* rho0 .* duzdz);
1057 \endverbatim
1058  *
1059  */
1061 {
1062  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
1063  const float dt = mParameters.getDt();
1064 
1065  float* rhoX = getRhoX().getData();
1066  float* rhoY = getRhoY().getData();
1067  float* rhoZ = getRhoZ().getData();
1068 
1069  const float* pmlX = getPmlX().getData();
1070  const float* pmlY = getPmlY().getData();
1071  const float* pmlZ = getPmlZ().getData();
1072 
1073  const float* duxdx = getDuxdx().getData();
1074  const float* duydy = getDuydy().getData();
1075  const float* duzdz = getDuzdz().getData();
1076 
1077  //----------------------------------------------- rho0 is scalar -------------------------------------------------//
1079  { // rho0 is a scalar
1080  const float dtRho0 = mParameters.getRho0Scalar() * dt;
1081 
1082  #pragma omp parallel for schedule(static)
1083  for (size_t z = 0; z < dimensionSizes.nz; z++)
1084  {
1085  for (size_t y = 0; y < dimensionSizes.ny; y++)
1086  {
1087  #pragma omp simd
1088  for (size_t x = 0; x < dimensionSizes.nx; x++)
1089  {
1090  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1091 
1092  rhoX[i] = pmlX[x] * (((pmlX[x] * rhoX[i]) - (dtRho0 * duxdx[i])));
1093  rhoY[i] = pmlY[y] * (((pmlY[y] * rhoY[i]) - (dtRho0 * duydy[i])));
1094  rhoZ[i] = pmlZ[z] * (((pmlZ[z] * rhoZ[i]) - (dtRho0 * duzdz[i])));
1095  } // x
1096  }// y
1097  }// z
1098  }
1099  else
1100  { //---------------------------------------------- rho0 is matrix ------------------------------------------------//
1101  // rho0 is a matrix
1102  const float* rho0 = getRho0().getData();
1103 
1104  #pragma omp parallel for schedule(static)
1105  for (size_t z = 0; z < dimensionSizes.nz; z++)
1106  {
1107  for (size_t y = 0; y < dimensionSizes.ny; y++)
1108  {
1109  #pragma omp simd
1110  for (size_t x = 0; x < dimensionSizes.nx; x++)
1111  {
1112  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1113  const float dtRho0 = dt * rho0[i];
1114 
1115  rhoX[i] = pmlX[x] * (((pmlX[x] * rhoX[i]) - (dtRho0 * duxdx[i])));
1116  rhoY[i] = pmlY[y] * (((pmlY[y] * rhoY[i]) - (dtRho0 * duydy[i])));
1117  rhoZ[i] = pmlZ[z] * (((pmlZ[z] * rhoZ[i]) - (dtRho0 * duzdz[i])));
1118  } // x
1119  }// y
1120  }// z
1121  } // end rho is a matrix
1122 }// end of computeDensityLinear
1123 //----------------------------------------------------------------------------------------------------------------------
1124 
1125 /**
1126  * Compute acoustic pressure for non-linear case.
1127  *
1128  * <b>Matlab code:</b> \n
1129  *
1130  *\verbatim
1131  case 'lossless'
1132  % calculate p using a nonlinear adiabatic equation of state
1133  p = c.^2 .* (rhox + rhoy + rhoz + medium.BonA .* (rhox + rhoy + rhoz).^2 ./ (2 .* rho0));
1134 
1135  case 'absorbing'
1136  % calculate p using a nonlinear absorbing equation of state
1137  p = c.^2 .* (...
1138  (rhox + rhoy + rhoz) ...
1139  + absorb_tau .* real(ifftn( absorb_nabla1 .* fftn(rho0 .* (duxdx + duydy + duzdz)) ))...
1140  - absorb_eta .* real(ifftn( absorb_nabla2 .* fftn(rhox + rhoy + rhoz) ))...
1141  + medium.BonA .*(rhox + rhoy + rhoz).^2 ./ (2 .* rho0) ...
1142  );
1143 
1144  \endverbatim
1145  */
1147 {
1149  { // absorbing case
1150 
1151  RealMatrix& densitySum = getTemp1Real3D();
1152  RealMatrix& nonlinearTerm = getTemp2Real3D();
1153  RealMatrix& velocitGradientSum = getTemp3Real3D();
1154 
1155  // reusing of the temp variables
1156  RealMatrix& absorbTauTerm = velocitGradientSum;
1157  RealMatrix& absorbEtaTerm = densitySum;
1158 
1159  // different templated variants of computePressureTermsNonlinear
1161  {
1163  {
1164  computePressureTermsNonlinear<true, true>(densitySum, nonlinearTerm, velocitGradientSum);
1165  }
1166  else
1167  {
1168  computePressureTermsNonlinear<true, false>(densitySum, nonlinearTerm, velocitGradientSum);
1169  }
1170  }
1171  else
1172  {
1174  {
1175  computePressureTermsNonlinear<false, true>(densitySum, nonlinearTerm, velocitGradientSum);
1176  }
1177  else
1178  {
1179  computePressureTermsNonlinear<false, false>(densitySum, nonlinearTerm, velocitGradientSum);
1180  }
1181  }
1182 
1183  // ifftn( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz))
1184  getTempFftwX().computeR2CFft3D(velocitGradientSum);
1185  getTempFftwY().computeR2CFft3D(densitySum);
1186 
1188 
1189  getTempFftwX().computeC2RFft3D(absorbTauTerm);
1190  getTempFftwY().computeC2RFft3D(absorbEtaTerm);
1191 
1192  // different templated variants of sumPressureTermsNonlinear
1194  {
1196  {
1197  sumPressureTermsNonlinear<true, true>(absorbTauTerm, absorbEtaTerm, nonlinearTerm);
1198  }
1199  else
1200  {
1201  sumPressureTermsNonlinear<true, false>(absorbTauTerm, absorbEtaTerm, nonlinearTerm);
1202  }
1203  }
1204  else
1205  {
1206  sumPressureTermsNonlinear<false, false>(absorbTauTerm, absorbEtaTerm, nonlinearTerm);
1207  }
1208 
1209  }
1210  else
1211  { //------------------------------------------------ lossless case--------------------------------------------------//
1213  {
1215  {
1217  {
1218  sumPressureTermsNonlinearLossless<true, true, true>();
1219  }
1220  else
1221  {
1222  sumPressureTermsNonlinearLossless<true, true, false>();
1223  }
1224  }
1225  else
1226  {
1228  {
1229  sumPressureTermsNonlinearLossless<true, false, true>();
1230  }
1231  else
1232  {
1233  sumPressureTermsNonlinearLossless<true, false, false>();
1234  }
1235  }
1236  }
1237  else
1238  {
1240  {
1242  {
1243  sumPressureTermsNonlinearLossless<false, true, true>();
1244  }
1245  else
1246  {
1247  sumPressureTermsNonlinearLossless<false, true, false>();
1248  }
1249  }
1250  else
1251  {
1253  {
1254  sumPressureTermsNonlinearLossless<false, false, true>();
1255  }
1256  else
1257  {
1258  sumPressureTermsNonlinearLossless<false, false, false>();
1259  }
1260  }
1261  }
1262  }
1263 }// end of computePressureNonlinear
1264 //----------------------------------------------------------------------------------------------------------------------
1265 
1266 /**
1267  * Compute new p for linear case.
1268  *
1269  * <b>Matlab code:</b> \n
1270  *
1271  *\verbatim
1272  case 'lossless'
1273 
1274  % calculate p using a linear adiabatic equation of state
1275  p = c.^2 .* (rhox + rhoy + rhoz);
1276 
1277  case 'absorbing'
1278 
1279  % calculate p using a linear absorbing equation of state
1280  p = c.^2 .* ( ...
1281  (rhox + rhoy + rhoz) ...
1282  + absorb_tau .* real(ifftn( absorb_nabla1 .* fftn(rho0 .* (duxdx + duydy + duzdz)) )) ...
1283  - absorb_eta .* real(ifftn( absorb_nabla2 .* fftn(rhox + rhoy + rhoz) )) ...
1284  );
1285  \endverbatim
1286  */
1288  {
1289  // rhox + rhoy + rhoz
1291  { // absorbing case
1292 
1293  RealMatrix& densitySum = getTemp1Real3D();
1294  RealMatrix& velocityGradientTerm = getTemp2Real3D();
1295 
1296  RealMatrix& absorbTauTerm = getTemp2Real3D();
1297  RealMatrix& absorbEtaTerm = getTemp3Real3D();
1298 
1299  computePressureTermsLinear(densitySum, velocityGradientTerm);
1300 
1301  // ifftn ( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz))
1302 
1303  getTempFftwX().computeR2CFft3D(velocityGradientTerm);
1304  getTempFftwY().computeR2CFft3D(densitySum);
1305 
1307 
1308  getTempFftwX().computeC2RFft3D(absorbTauTerm);
1309  getTempFftwY().computeC2RFft3D(absorbEtaTerm);
1310 
1312  {
1314  {
1315  sumPressureTermsLinear<true, true>(absorbTauTerm, absorbEtaTerm, densitySum);
1316  }
1317  else
1318  {
1319  sumPressureTermsLinear<true, false>(absorbTauTerm, absorbEtaTerm, densitySum);
1320  }
1321  }
1322  else
1323  {
1324  sumPressureTermsLinear<false, false>(absorbTauTerm, absorbEtaTerm, densitySum);
1325  }
1326  }
1327  else
1328  {
1329  // lossless case
1331  }
1332  }// end of computePressureLinear
1333 //----------------------------------------------------------------------------------------------------------------------
1334 
1335 /**
1336  * Add u source to the particle velocity.
1337  */
1339 {
1340  const size_t timeIndex = mParameters.getTimeIndex();
1341 
1342  if (mParameters.getVelocityXSourceFlag() > timeIndex)
1343  {
1345  }
1346 
1347  if (mParameters.getVelocityYSourceFlag() > timeIndex)
1348  {
1350  }
1351 
1352  if (mParameters.getVelocityZSourceFlag() > timeIndex)
1353  {
1355  }
1356 }// end of addVelocitySource
1357 //----------------------------------------------------------------------------------------------------------------------
1358 
1359 /**
1360  * Add in velocity source terms.
1361  */
1363  const RealMatrix& velocitySourceInput,
1364  const IndexMatrix& velocitySourceIndex)
1365 {
1366 
1367  const size_t timeIndex = mParameters.getTimeIndex();
1368  const size_t sourceSize = velocitySourceIndex.size();
1369  const size_t index2D = (mParameters.getVelocitySourceMany() != 0) ? timeIndex * sourceSize : timeIndex;
1370 
1371  const bool velocitySourceMode = mParameters.getVelocitySourceMode();
1372  const bool velocitySourceMany = mParameters.getVelocitySourceMany();
1373 
1374  if (velocitySourceMode == 0)
1375  {
1376  #pragma omp parallel for if (sourceSize > 16384)
1377  for (size_t i = 0; i < sourceSize; i++)
1378  {
1379  const size_t signalIndex = (velocitySourceMany != 0) ? index2D + i : index2D;
1380  velocityMatrix[velocitySourceIndex[i]] = velocitySourceInput[signalIndex];
1381  }
1382  }// end of Dirichlet
1383 
1384  if (velocitySourceMode == 1)
1385  {
1386  #pragma omp parallel for if (sourceSize > 16384)
1387  for (size_t i = 0; i < sourceSize; i++)
1388  {
1389  const size_t signalIndex = (velocitySourceMany != 0) ? index2D + i : index2D;
1390  velocityMatrix[velocitySourceIndex[i]] += velocitySourceInput[signalIndex];
1391  }
1392  }// end of add
1393 }// end of computeVelocitySourceTerm
1394 //----------------------------------------------------------------------------------------------------------------------
1395 
1396  /**
1397  * Add in pressure source.
1398  */
1400 {
1401  const size_t timeIndex = mParameters.getTimeIndex();
1402 
1403  if (mParameters.getPressureSourceFlag() > timeIndex)
1404  {
1405  float* rhox = getRhoX().getData();
1406  float* rhoy = getRhoY().getData();
1407  float* rhoz = getRhoZ().getData();
1408 
1409  const float* sourceInput = getPressureSourceInput().getData();
1410  const size_t* sourceIndex = getPressureSourceIndex().getData();
1411 
1412  const bool isManyFlag = (mParameters.getPressureSourceMany() != 0);
1413  const size_t sourceSize = getPressureSourceIndex().size();
1414  const size_t index2D = (isManyFlag) ? timeIndex * sourceSize : timeIndex;
1415 
1416  // replacement
1418  {
1419  #pragma omp parallel for if (sourceSize > 16384)
1420  for (size_t i = 0; i < sourceSize; i++)
1421  {
1422  const size_t signalIndex = (isManyFlag) ? index2D + i : index2D;
1423 
1424  rhox[sourceIndex[i]] = sourceInput[signalIndex];
1425  rhoy[sourceIndex[i]] = sourceInput[signalIndex];
1426  rhoz[sourceIndex[i]] = sourceInput[signalIndex];
1427  }
1428  }
1429  // Addition
1430  else
1431  {
1432  #pragma omp parallel for if (sourceSize > 16384)
1433  for (size_t i = 0; i < sourceSize; i++)
1434  {
1435  const size_t signalIndex = (isManyFlag) ? index2D + i : index2D;
1436 
1437  rhox[sourceIndex[i]] += sourceInput[signalIndex];
1438  rhoy[sourceIndex[i]] += sourceInput[signalIndex];
1439  rhoz[sourceIndex[i]] += sourceInput[signalIndex];
1440  }
1441  }// type of replacement
1442  }// if do at all
1443 }// end of addPressureSource
1444 //----------------------------------------------------------------------------------------------------------------------
1445 
1446 /**
1447  * Calculate p0 source when necessary.
1448  *
1449  * <b>Matlab code:</b> \n
1450  *
1451  *\verbatim
1452  % add the initial pressure to rho as a mass source
1453  p = source.p0;
1454  rhox = source.p0 ./ (3 .* c.^2);
1455  rhoy = source.p0 ./ (3 .* c.^2);
1456  rhoz = source.p0 ./ (3 .* c.^2);
1457 
1458  % compute u(t = t1 + dt/2) based on the assumption u(dt/2) = -u(-dt/2)
1459  % which forces u(t = t1) = 0
1460  ux_sgx = dt .* rho0_sgx_inv .* real(ifftn( bsxfun(@times, ddx_k_shift_pos, kappa .* fftn(p)) )) / 2;
1461  uy_sgy = dt .* rho0_sgy_inv .* real(ifftn( bsxfun(@times, ddy_k_shift_pos, kappa .* fftn(p)) )) / 2;
1462  uz_sgz = dt .* rho0_sgz_inv .* real(ifftn( bsxfun(@times, ddz_k_shift_pos, kappa .* fftn(p)) )) / 2;
1463  \endverbatim
1464  */
1466 {
1468 
1469  const float* sourceInput = getInitialPressureSourceInput().getData();
1470 
1471  const bool c0ScalarFlag = mParameters.getC0ScalarFlag();
1472  const float c2Scalar = (c0ScalarFlag) ? mParameters.getC2Scalar() : 0;
1473  const float* c2Matrix = (c0ScalarFlag) ? nullptr : getC2().getData();
1474 
1475  float* rhoX = getRhoX().getData();
1476  float* rhoY = getRhoY().getData();
1477  float* rhoZ = getRhoZ().getData();
1478 
1479  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
1480 
1481  #pragma omp parallel for simd schedule(static)
1482  for (size_t i = 0; i < nElements; i++)
1483  {
1484  const float tmp = sourceInput[i] / (3.0f * ((c0ScalarFlag) ? c2Scalar : c2Matrix[i]));
1485  rhoX[i] = tmp;
1486  rhoY[i] = tmp;
1487  rhoZ[i] = tmp;
1488  }
1489 
1490  //------------------------------------------------------------------------//
1491  //-- compute u(t = t1 + dt/2) based on the assumption u(dt/2) = -u(-dt/2) --//
1492  //-- which forces u(t = t1) = 0 --//
1493  //------------------------------------------------------------------------//
1495 
1497  {
1499  { // non uniform grid, homogeneous case
1501  }
1502  else
1503  { //uniform grid, heterogeneous
1505  }
1506  }
1507  else
1508  { // homogeneous, unifrom grid
1509  // divide the matrix by 2 and multiply with st./rho0_sg
1511  }
1512 }// end of addInitialPressureSource
1513 //----------------------------------------------------------------------------------------------------------------------
1514 
1515 /**
1516  * Add transducer data source to velocity x component.
1517  */
1519 {
1520  float* uxSgx = getUxSgx().getData();
1521 
1522  const size_t* velocitySourceIndex = getVelocitySourceIndex().getData();
1523  const float* transducerSourceInput = getTransducerSourceInput().getData();
1524  const size_t* delayMask = getDelayMask().getData();
1525 
1526  const size_t timeIndex = mParameters.getTimeIndex();
1527  const size_t sourceSize = getVelocitySourceIndex().size();
1528 
1529  #pragma omp parallel for schedule(static) if (sourceSize > 16384)
1530  for (size_t i = 0; i < sourceSize; i++)
1531  {
1532  uxSgx[velocitySourceIndex[i]] += transducerSourceInput[delayMask[i] + timeIndex];
1533  }
1534 }// end of addTransducerSource
1535 //----------------------------------------------------------------------------------------------------------------------
1536 
1537 /**
1538  * Generate kappa matrix for lossless medium.
1539  */
1541 {
1542  const float dx2Rec = 1.0f / (mParameters.getDx() * mParameters.getDx());
1543  const float dy2Rec = 1.0f / (mParameters.getDy() * mParameters.getDy());
1544  const float dz2Rec = 1.0f / (mParameters.getDz() * mParameters.getDz());
1545 
1546  const float cRefDtPi = mParameters.getCRef() * mParameters.getDt() * static_cast<float>(M_PI);
1547 
1548  const float nxRec = 1.0f / static_cast<float>(mParameters.getFullDimensionSizes().nx);
1549  const float nyRec = 1.0f / static_cast<float>(mParameters.getFullDimensionSizes().ny);
1550  const float nzRec = 1.0f / static_cast<float>(mParameters.getFullDimensionSizes().nz);
1551 
1552  const DimensionSizes& reducedDimensionSizes = mParameters.getReducedDimensionSizes();
1553 
1554  float* kappa = getKappa().getData();
1555 
1556  #pragma omp parallel for schedule (static)
1557  for (size_t z = 0; z < reducedDimensionSizes.nz; z++)
1558  {
1559  const float zf = static_cast<float>(z);
1560  float zPart = 0.5f - fabs(0.5f - zf * nzRec);
1561  zPart = (zPart * zPart) * dz2Rec;
1562 
1563  for (size_t y = 0; y < reducedDimensionSizes.ny; y++)
1564  {
1565  const float yf = static_cast<float>(y);
1566  float yPart = 0.5f - fabs(0.5f - yf * nyRec);
1567  yPart = (yPart * yPart) * dy2Rec;
1568 
1569  const float yzPart = zPart + yPart;
1570  for (size_t x = 0; x < reducedDimensionSizes.nx; x++)
1571  {
1572  const float xf = static_cast<float>(x);
1573  float xPart = 0.5f - fabs(0.5f - xf * nxRec);
1574  xPart = (xPart * xPart) * dx2Rec;
1575 
1576  float k = cRefDtPi * sqrt(xPart + yzPart);
1577 
1578  const size_t i = get1DIndex(z, y, x, reducedDimensionSizes);
1579 
1580  // kappa element
1581  kappa[i] = (k == 0.0f) ? 1.0f : sin(k) / k;
1582  }//x
1583  }//y
1584  }// z
1585 }// end of generateKappa
1586 //----------------------------------------------------------------------------------------------------------------------
1587 
1588 /**
1589  * Generate kappa, absorb_nabla1, absorb_nabla2 for absorbing medium.
1590  */
1592 {
1593  const float dxSqRec = 1.0f / (mParameters.getDx() * mParameters.getDx());
1594  const float dySqRec = 1.0f / (mParameters.getDy() * mParameters.getDy());
1595  const float dzSqRec = 1.0f / (mParameters.getDz() * mParameters.getDz());
1596 
1597  const float cRefDt2 = mParameters.getCRef() * mParameters.getDt() * 0.5f;
1598  const float pi2 = static_cast<float>(M_PI) * 2.0f;
1599 
1600  const size_t nx = mParameters.getFullDimensionSizes().nx;
1601  const size_t ny = mParameters.getFullDimensionSizes().ny;
1602  const size_t nz = mParameters.getFullDimensionSizes().nz;
1603 
1604  const float nxRec = 1.0f / static_cast<float>(nx);
1605  const float nyRec = 1.0f / static_cast<float>(ny);
1606  const float nzRec = 1.0f / static_cast<float>(nz);
1607 
1608  const DimensionSizes& reducedDimensionSizes = mParameters.getReducedDimensionSizes();
1609 
1610  float* kappa = getKappa().getData();
1611  float* absorbNabla1 = getAbsorbNabla1().getData();
1612  float* absorbNabla2 = getAbsorbNabla2().getData();
1613  const float alphaPower = mParameters.getAlphaPower();
1614 
1615  #pragma omp parallel for schedule (static)
1616  for (size_t z = 0; z < reducedDimensionSizes.nz; z++)
1617  {
1618  const float zf = static_cast<float>(z);
1619  float zPart = 0.5f - fabs(0.5f - zf * nzRec);
1620  zPart = (zPart * zPart) * dzSqRec;
1621 
1622  for (size_t y = 0; y < reducedDimensionSizes.ny; y++)
1623  {
1624  const float yf = static_cast<float>(y);
1625  float yPart = 0.5f - fabs(0.5f - yf * nyRec);
1626  yPart = (yPart * yPart) * dySqRec;
1627 
1628  const float yzPart = zPart + yPart;
1629 
1630  for (size_t x = 0; x < reducedDimensionSizes.nx; x++)
1631  {
1632  const float xf = static_cast<float>(x);
1633  float xPart = 0.5f - fabs(0.5f - xf * nxRec);
1634  xPart = (xPart * xPart) * dxSqRec;
1635 
1636  float k = pi2 * sqrt(xPart + yzPart);
1637  float cRefK = cRefDt2 * k;
1638 
1639  const size_t i = get1DIndex(z, y, x, reducedDimensionSizes);
1640 
1641  kappa[i] = (cRefK == 0.0f) ? 1.0f : sin(cRefK) / cRefK;
1642 
1643  absorbNabla1[i] = pow(k, alphaPower - 2);
1644  absorbNabla2[i] = pow(k, alphaPower - 1);
1645 
1646  if (absorbNabla1[i] == std::numeric_limits<float>::infinity()) absorbNabla1[i] = 0.0f;
1647  if (absorbNabla2[i] == std::numeric_limits<float>::infinity()) absorbNabla2[i] = 0.0f;
1648  }//x
1649  }//y
1650  }// z
1651 
1652 }// end of generateKappaAndNablas
1653 //----------------------------------------------------------------------------------------------------------------------
1654 
1655 /**
1656  * Generate absorbTau and absorbEta in for heterogenous medium.
1657  */
1659 {
1661  { // scalar values
1662  const float alphaPower = mParameters.getAlphaPower();
1663  const float tanPi2AlphaPower = tan(static_cast<float> (M_PI_2) * alphaPower);
1664  const float alphaNeperCoeff = (100.0f * pow(1.0e-6f / (2.0f * static_cast<float>(M_PI)), alphaPower)) /
1665  (20.0f * static_cast<float>(M_LOG10E));
1666 
1667  const float alphaCoeff2 = 2.0f * mParameters.getAlphaCoeffScalar() * alphaNeperCoeff;
1668 
1669  mParameters.setAbsorbTauScalar((-alphaCoeff2) * pow(mParameters.getC0Scalar(), alphaPower - 1));
1670  mParameters.setAbsorbEtaScalar( alphaCoeff2 * pow(mParameters.getC0Scalar(), alphaPower) * tanPi2AlphaPower);
1671  }
1672  else
1673  { // matrix
1674 
1675  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
1676 
1677  float* absorbTau = getAbsorbTau().getData();
1678  float* absorbEta = getAbsorbEta().getData();
1679 
1680  const bool alphaCoeffScalarFlag = mParameters.getAlphaCoeffScalarFlag();
1681  const float alphaCoeffScalar = (alphaCoeffScalarFlag) ? mParameters.getAlphaCoeffScalar() : 0;
1682  const float* alphaCoeffMatrix = (alphaCoeffScalarFlag) ? nullptr : getTemp1Real3D().getData();
1683 
1684 
1685  const bool c0ScalarFlag = mParameters.getC0ScalarFlag();
1686  const float c0Scalar = (c0ScalarFlag) ? mParameters.getC0Scalar() : 0;
1687  // here c2 still holds just c0!
1688  const float* cOMatrix = (c0ScalarFlag) ? nullptr : getC2().getData();
1689 
1690 
1691  const float alphaPower = mParameters.getAlphaPower();
1692  const float tanPi2AlphaPower = tan(static_cast<float>(M_PI_2) * alphaPower);
1693 
1694  //alpha = 100*alpha.*(1e-6/(2*pi)).^y./
1695  // (20*log10(exp(1)));
1696  const float alphaNeperCoeff = (100.0f * pow(1.0e-6f / (2.0f * static_cast<float>(M_PI)), alphaPower)) /
1697  (20.0f * static_cast<float>(M_LOG10E));
1698 
1699 
1700  #pragma omp parallel for schedule (static)
1701  for (size_t z = 0; z < dimensionSizes.nz; z++)
1702  {
1703  for (size_t y = 0; y < dimensionSizes.ny; y++)
1704  {
1705  for (size_t x = 0; x < dimensionSizes.nx; x++)
1706  {
1707  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1708 
1709  const float alphaCoeff2 = 2.0f * alphaNeperCoeff *
1710  ((alphaCoeffScalarFlag) ? alphaCoeffScalar : alphaCoeffMatrix[i]);
1711 
1712  absorbTau[i] = (-alphaCoeff2) * pow(((c0ScalarFlag) ? c0Scalar : cOMatrix[i]), alphaPower - 1);
1713  absorbEta[i] = alphaCoeff2 * pow(((c0ScalarFlag) ? c0Scalar : cOMatrix[i]),
1714  alphaPower) * tanPi2AlphaPower;
1715 
1716  }//x
1717  }//y
1718  }// z
1719  } // matrix
1720 }// end of generateTauAndEta
1721 //----------------------------------------------------------------------------------------------------------------------
1722 
1723 /**
1724  * Prepare dt./ rho0 for non-uniform grid.
1725  */
1727 {
1728  float* dtRho0Sgx = getDtRho0Sgx().getData();
1729  float* dtRho0Sgy = getDtRho0Sgy().getData();
1730  float* dtRho0Sgz = getDtRho0Sgz().getData();
1731 
1732  const float dt = mParameters.getDt();
1733 
1734  const float* duxdxnSgx = getDxudxnSgx().getData();
1735  const float* duydynSgy = getDyudynSgy().getData();
1736  const float* duzdznSgz = getDzudznSgz().getData();
1737 
1738  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
1739 
1740  #pragma omp parallel for schedule(static)
1741  for (size_t z = 0; z < dimensionSizes.nz; z++)
1742  {
1743  for (size_t y = 0; y < dimensionSizes.ny; y++)
1744  {
1745  #pragma omp simd
1746  for (size_t x = 0; x < dimensionSizes.nx; x++)
1747  {
1748  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1749 
1750  dtRho0Sgx[i] = (dt * duxdxnSgx[x]) / dtRho0Sgx[i];
1751  dtRho0Sgy[i] = (dt * duydynSgy[y]) / dtRho0Sgy[i];
1752  dtRho0Sgz[i] = (dt * duzdznSgz[z]) / dtRho0Sgz[i];
1753  } // x
1754  } // y
1755  } // z
1756 
1757 }// end of generateInitialDenisty
1758 //----------------------------------------------------------------------------------------------------------------------
1759 
1760 /**
1761  * Compute c^2.
1762  */
1764 {
1766  {
1767  float* c2 = getC2().getData();
1768 
1769  #pragma omp parallel for simd schedule(static) aligned(c2)
1770  for (size_t i=0; i < getC2().size(); i++)
1771  {
1772  c2[i] = c2[i] * c2[i];
1773  }
1774  }// matrix
1775 }// computeC2
1776 //----------------------------------------------------------------------------------------------------------------------
1777 
1778 /**
1779  * Compute acoustic velocity for initial pressure problem.
1780  */
1782 {
1786 
1787  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
1788  const float divider = 1.0f / (2.0f * static_cast<float>(nElements));
1789 
1790  float* uxSgx = getUxSgx().getData();
1791  float* uySgy = getUySgy().getData();
1792  float* uzSgz = getUzSgz().getData();
1793 
1794  const float* dtRho0Sgx = getDtRho0Sgx().getData();
1795  const float* dtRho0Sgy = getDtRho0Sgy().getData();
1796  const float* dtRho0Sgz = getDtRho0Sgz().getData();
1797 
1798 
1799  #pragma omp parallel for simd schedule(static) \
1800  aligned(uxSgx, uySgy, uzSgz, dtRho0Sgx, dtRho0Sgy, dtRho0Sgz)
1801  for (size_t i = 0; i < nElements; i++)
1802  {
1803  uxSgx[i] *= dtRho0Sgx[i] * divider;
1804  uySgy[i] *= dtRho0Sgy[i] * divider;
1805  uzSgz[i] *= dtRho0Sgz[i] * divider;
1806  }
1807 }// end of computeInitialVelocityHeterogeneous
1808 //----------------------------------------------------------------------------------------------------------------------
1809 
1810 /**
1811  * Compute velocity for the initial pressure problem, homogeneous medium, uniform grid.
1812  */
1814 {
1818 
1819  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
1820  const float dividerX = 1.0f / (2.0f * static_cast<float>(nElements)) * mParameters.getDtRho0SgxScalar();
1821  const float dividerY = 1.0f / (2.0f * static_cast<float>(nElements)) * mParameters.getDtRho0SgyScalar();
1822  const float dividerZ = 1.0f / (2.0f * static_cast<float>(nElements)) * mParameters.getDtRho0SgzScalar();
1823 
1824  float* uxSgx = getUxSgx().getData();
1825  float* uySgy = getUySgy().getData();
1826  float* uzSgz = getUzSgz().getData();
1827 
1828  #pragma omp parallel for simd schedule(static) aligned(uxSgx, uySgy, uzSgz)
1829  for (size_t i = 0; i < nElements; i++)
1830  {
1831  uxSgx[i] *= dividerX;
1832  uySgy[i] *= dividerY;
1833  uzSgz[i] *= dividerZ;
1834  }
1835 }// end of computeInitialVelocityHomogeneousUniform
1836 //----------------------------------------------------------------------------------------------------------------------
1837 
1838 /**
1839  * Compute acoustic velocity for initial pressure problem, homogenous medium, nonuniform grid.
1840  */
1842 {
1846 
1847  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
1848  const size_t nElements = dimensionSizes.nElements();
1849 
1850  const float dividerX = 1.0f / (2.0f * static_cast<float>(nElements)) * mParameters.getDtRho0SgxScalar();
1851  const float dividerY = 1.0f / (2.0f * static_cast<float>(nElements)) * mParameters.getDtRho0SgyScalar();
1852  const float dividerZ = 1.0f / (2.0f * static_cast<float>(nElements)) * mParameters.getDtRho0SgzScalar();
1853 
1854  const float* dxudxnSgx = getDxudxnSgx().getData();
1855  const float* dyudynSgy = getDyudynSgy().getData();
1856  const float* dzudznSgz = getDzudznSgz().getData();
1857 
1858  float* uxSgx = getUxSgx().getData();
1859  float* uySgy = getUySgy().getData();
1860  float* uzSgz = getUzSgz().getData();
1861 
1862 
1863  #pragma omp parallel for schedule(static)
1864  for (size_t z = 0; z < dimensionSizes.nz; z++)
1865  {
1866  for (size_t y = 0; y < dimensionSizes.ny; y++)
1867  {
1868  #pragma omp simd
1869  for (size_t x = 0; x < dimensionSizes.nx; x++)
1870  {
1871  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1872  uxSgx[i] *= dividerX * dxudxnSgx[x];
1873  uySgy[i] *= dividerY * dyudynSgy[y];
1874  uzSgz[i] *= dividerZ * dzudznSgz[z] ;
1875  } // x
1876  } // y
1877  } // z
1878 }// end of computeInitialVelocityHomogeneousNonuniform
1879 //----------------------------------------------------------------------------------------------------------------------
1880 
1881 /**
1882  * Compute acoustic velocity for heterogeneous medium and a uniform grid, x direction.
1883  */
1885 {
1886  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
1887  const size_t nElements = dimensionSizes.nElements();
1888  const float divider = 1.0f / static_cast<float>(nElements);
1889 
1890  const float* ifftX = getTemp1Real3D().getData();
1891  const float* ifftY = getTemp2Real3D().getData();
1892  const float* ifftZ = getTemp3Real3D().getData();
1893 
1894  const float* dtRho0Sgx = getDtRho0Sgx().getData();
1895  const float* dtRho0Sgy = getDtRho0Sgy().getData();
1896  const float* dtRho0Sgz = getDtRho0Sgz().getData();
1897 
1898  const float* pmlX = getPmlXSgx().getData();
1899  const float* pmlY = getPmlYSgy().getData();
1900  const float* pmlZ = getPmlZSgz().getData();
1901 
1902  float* uxSgx = getUxSgx().getData();
1903  float* uySgy = getUySgy().getData();
1904  float* uzSgz = getUzSgz().getData();
1905 
1906  // long loops are replicated for every dimension to save SIMD registers
1907  #pragma omp parallel for schedule(static)
1908  for (size_t z = 0; z < dimensionSizes.nz; z++)
1909  {
1910  for (size_t y = 0; y < dimensionSizes.ny; y++)
1911  {
1912  #pragma omp simd
1913  for (size_t x = 0; x < dimensionSizes.nx; x++)
1914  {
1915  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1916 
1917  uxSgx[i] = (uxSgx[i] * pmlX[x] - divider * ifftX[i] * dtRho0Sgx[i]) * pmlX[x];
1918  } // x
1919  } // y
1920  } // z
1921 
1922  #pragma omp parallel for schedule(static)
1923  for (size_t z = 0; z < dimensionSizes.nz; z++)
1924  {
1925  for (size_t y = 0; y < dimensionSizes.ny; y++)
1926  {
1927  const float ePmlY = pmlY[y];
1928  #pragma omp simd
1929  for (size_t x = 0; x < dimensionSizes.nx; x++)
1930  {
1931  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1932 
1933  uySgy[i] = (uySgy[i] * ePmlY - divider * ifftY[i] * dtRho0Sgy[i]) * ePmlY;
1934  } // x
1935  } // y
1936  } // z
1937 
1938  #pragma omp parallel for schedule(static)
1939  for (size_t z = 0; z < dimensionSizes.nz; z++)
1940  {
1941  const float ePmlZ = pmlZ[z];
1942  for (size_t y = 0; y < dimensionSizes.ny; y++)
1943  {
1944  #pragma omp simd
1945  for (size_t x = 0; x < dimensionSizes.nx; x++)
1946  {
1947  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1948 
1949  uzSgz[i] = (uzSgz[i] * ePmlZ - divider * ifftZ[i] * dtRho0Sgz[i]) * ePmlZ;
1950  } // x
1951  } // y
1952  } // z
1953 }// end of computeVelocityHeterogeneous
1954 //----------------------------------------------------------------------------------------------------------------------
1955 
1956 /**
1957  * Compute acoustic velocity for homogeneous medium and a uniform grid.
1958  */
1960 {
1961  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
1962  const size_t nElements = dimensionSizes.nElements();
1963 
1964  const float dividerX = mParameters.getDtRho0SgxScalar() / static_cast<float>(nElements);
1965  const float dividerY = mParameters.getDtRho0SgyScalar() / static_cast<float>(nElements);
1966  const float dividerZ = mParameters.getDtRho0SgzScalar() / static_cast<float>(nElements);
1967 
1968  const float* ifftX = getTemp1Real3D().getData();
1969  const float* ifftY = getTemp2Real3D().getData();
1970  const float* ifftZ = getTemp3Real3D().getData();
1971 
1972  const float* pmlX = getPmlXSgx().getData();
1973  const float* pmlY = getPmlYSgy().getData();
1974  const float* pmlZ = getPmlZSgz().getData();
1975 
1976  float* uxSgx = getUxSgx().getData();
1977  float* uySgy = getUySgy().getData();
1978  float* uzSgz = getUzSgz().getData();
1979 
1980  // long loops are replicated for every dimension to save SIMD registers
1981  #pragma omp parallel for schedule(static)
1982  for (size_t z = 0; z < dimensionSizes.nz; z++)
1983  {
1984  for (size_t y = 0; y < dimensionSizes.ny; y++)
1985  {
1986  #pragma omp simd
1987  for (size_t x = 0; x < dimensionSizes.nx; x++)
1988  {
1989  const size_t i = get1DIndex(z, y, x, dimensionSizes);
1990 
1991  uxSgx[i] = (uxSgx[i] * pmlX[x] - dividerX * ifftX[i]) * pmlX[x];
1992  } // x
1993  } // y
1994  } // z
1995 
1996  #pragma omp parallel for schedule(static)
1997  for (size_t z = 0; z < dimensionSizes.nz; z++)
1998  {
1999  for (size_t y = 0; y < dimensionSizes.ny; y++)
2000  {
2001  #pragma omp simd
2002  for (size_t x = 0; x < dimensionSizes.nx; x++)
2003  {
2004  const size_t i = get1DIndex(z, y, x, dimensionSizes);
2005 
2006  uySgy[i] = (uySgy[i] * pmlY[y] - dividerY * ifftY[i]) * pmlY[y];
2007  } // x
2008  } // y
2009  } // z
2010 
2011  #pragma omp parallel for schedule(static)
2012  for (size_t z = 0; z < dimensionSizes.nz; z++)
2013  {
2014  for (size_t y = 0; y < dimensionSizes.ny; y++)
2015  {
2016  #pragma omp simd
2017  for (size_t x = 0; x < dimensionSizes.nx; x++)
2018  {
2019  const size_t i = get1DIndex(z, y, x, dimensionSizes);
2020 
2021  uzSgz[i] = (uzSgz[i] * pmlZ[z] - dividerZ * ifftZ[i]) * pmlZ[z];
2022  } // x
2023  } // y
2024  } // z
2025 }// end of computeVelocityXHomogeneousUniform
2026 //----------------------------------------------------------------------------------------------------------------------
2027 
2028 /**
2029  * Compute acoustic velocity for homogenous medium and nonuniform grid, x direction.
2030  */
2032 {
2033  const DimensionSizes& dimensionSizes = mParameters.getFullDimensionSizes();
2034  const size_t nElements = dimensionSizes.nElements();
2035 
2036  const float dividerX = mParameters.getDtRho0SgxScalar() / static_cast<float>(nElements);
2037  const float dividerY = mParameters.getDtRho0SgyScalar() / static_cast<float>(nElements);
2038  const float dividerZ = mParameters.getDtRho0SgzScalar() / static_cast<float>(nElements);
2039 
2040  const float* ifftX = getTemp1Real3D().getData();
2041  const float* ifftY = getTemp2Real3D().getData();
2042  const float* ifftZ = getTemp3Real3D().getData();
2043 
2044  const float* dxudxnSgx = getDxudxnSgx().getData();
2045  const float* dyudynSgy = getDyudynSgy().getData();
2046  const float* dzudznSgz = getDzudznSgz().getData();
2047 
2048  const float* pmlX = getPmlXSgx().getData();
2049  const float* pmlY = getPmlYSgy().getData();
2050  const float* pmlZ = getPmlZSgz().getData();
2051 
2052  float* uxSgx = getUxSgx().getData();
2053  float* uySgy = getUySgy().getData();
2054  float* uzSgz = getUzSgz().getData();
2055 
2056  // long loops are replicated for every dimension to save SIMD registers
2057  #pragma omp parallel for schedule(static)
2058  for (size_t z = 0; z < dimensionSizes.nz; z++)
2059  {
2060  for (size_t y = 0; y < dimensionSizes.ny; y++)
2061  {
2062  #pragma omp simd
2063  for (size_t x = 0; x < dimensionSizes.nx; x++)
2064  {
2065  const size_t i = get1DIndex(z, y, x, dimensionSizes);
2066 
2067  uxSgx[i] = (uxSgx[i] * pmlX[x] - (dividerX * dxudxnSgx[x] * ifftX[i])) * pmlX[x];
2068  } // x
2069  } // y
2070  } // z
2071 
2072  #pragma omp parallel for schedule(static)
2073  for (size_t z = 0; z < dimensionSizes.nz; z++)
2074  {
2075  for (size_t y = 0; y < dimensionSizes.ny; y++)
2076  {
2077  #pragma omp simd
2078  for (size_t x = 0; x < dimensionSizes.nx; x++)
2079  {
2080  const size_t i = get1DIndex(z, y, x, dimensionSizes);
2081 
2082  uySgy[i] = (uySgy[i] * pmlY[y] - (dividerY * dyudynSgy[y] * ifftY[i])) * pmlY[y];
2083  } // x
2084  } // y
2085  } // z
2086 
2087  #pragma omp parallel for schedule(static)
2088  for (size_t z = 0; z < dimensionSizes.nz; z++)
2089  {
2090  for (size_t y = 0; y < dimensionSizes.ny; y++)
2091  {
2092  #pragma omp simd
2093  for (size_t x = 0; x < dimensionSizes.nx; x++)
2094  {
2095  const size_t i = get1DIndex(z, y, x, dimensionSizes);
2096 
2097  uzSgz[i] = (uzSgz[i] * pmlZ[z] - (dividerZ * dzudznSgz[z] * ifftZ[i])) * pmlZ[z];
2098  } // x
2099  } // y
2100  } // z
2101 }// end of computeVelocityHomogeneousNonuniform
2102 //----------------------------------------------------------------------------------------------------------------------
2103 
2104 /**
2105  * Compute part of the new velocity term - gradient of pressure.
2106  * <b>Matlab code:</b> \n
2107  *
2108  *\verbatim
2109  bsxfun(\@times, ddx_k_shift_pos, kappa .* fftn(p))
2110  \endverbatim
2111  */
2113 {
2114  // Compute FFT of pressure
2116 
2120 
2121  const FloatComplex* ddxKShiftPos = getDdxKShiftPos().getComplexData();
2122  const FloatComplex* ddyKShiftPos = getDdyKShiftPos().getComplexData();
2123  const FloatComplex* ddzKShiftPos = getDdzKShiftPos().getComplexData();
2124 
2125  const float* kappa = getKappa().getData();
2126 
2127  const DimensionSizes& reducedDimensionSizes= mParameters.getReducedDimensionSizes();
2128 
2129  #pragma omp parallel for schedule(static)
2130  for (size_t z = 0; z < reducedDimensionSizes.nz; z++)
2131  {
2132  for (size_t y = 0; y < reducedDimensionSizes.ny; y++)
2133  {
2134  #pragma omp simd
2135  for (size_t x = 0; x < reducedDimensionSizes.nx; x++)
2136  {
2137  const size_t i = get1DIndex(z, y, x, reducedDimensionSizes);
2138 
2139  const FloatComplex eKappa = ifftX[i] * kappa[i];
2140 
2141  ifftX[i] = eKappa * ddxKShiftPos[x];
2142  ifftY[i] = eKappa * ddyKShiftPos[y];
2143  ifftZ[i] = eKappa * ddzKShiftPos[z];
2144  } // x
2145  } // y
2146  } // z
2147 }// end of computePressureGradient
2148 //----------------------------------------------------------------------------------------------------------------------
2149 
2150 /**
2151  * Calculate three temporary sums in the new pressure formula non-linear absorbing case.
2152  */
2153 template<bool bOnAScalarFlag, bool rho0ScalarFlag>
2155  RealMatrix& nonlinearTerm,
2156  RealMatrix& velocityGradientSum)
2157 {
2158  const float* rhoX = getRhoX().getData();
2159  const float* rhoY = getRhoY().getData();
2160  const float* rhoZ = getRhoZ().getData();
2161 
2162  const float* duxdx = getDuxdx().getData();
2163  const float* duydy = getDuydy().getData();
2164  const float* duzdz = getDuzdz().getData();
2165 
2166  const float bOnAScalar = (bOnAScalarFlag) ? mParameters.getBOnAScalar() : 0;
2167  const float* bOnAMatrix = (bOnAScalarFlag) ? nullptr : getBOnA().getData();
2168 
2169  const float rho0Scalar = (rho0ScalarFlag) ? mParameters.getRho0Scalar() : 0;
2170  const float* rho0Matrix = (rho0ScalarFlag) ? nullptr : getRho0().getData();
2171 
2172 
2173  float* eDensitySum = densitySum.getData();
2174  float* eNonlinearTerm = nonlinearTerm.getData();
2175  float* eVelocityGradientSum = velocityGradientSum.getData();
2176 
2177  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
2178 
2179  #pragma omp parallel for simd schedule(static) \
2180  aligned(eDensitySum, eNonlinearTerm, eVelocityGradientSum, \
2181  rhoX, rhoY, rhoZ, bOnAMatrix, rho0Matrix, duxdx, duydy, duzdz)
2182  for (size_t i = 0; i < nElements ; i++)
2183  {
2184  const float rhoSum = rhoX[i] + rhoY[i] + rhoZ[i];
2185  const float bOnA = (bOnAScalarFlag) ? bOnAScalar : bOnAMatrix[i];
2186  const float rho0 = (rho0ScalarFlag) ? rho0Scalar : rho0Matrix[i];
2187 
2188  eDensitySum[i] = rhoSum;
2189  eNonlinearTerm[i] = (bOnA * rhoSum * rhoSum) / (2.0f * rho0) + rhoSum;
2190  eVelocityGradientSum[i] = rho0 * (duxdx[i] + duydy[i] + duzdz[i]);
2191  }
2192 } // end of computePressureTermsNonlinear
2193 //----------------------------------------------------------------------------------------------------------------------
2194 
2195  /**
2196  * Calculate two temporary sums in the new pressure formula, linear absorbing case.
2197  */
2199  RealMatrix& velocityGradientSum)
2200 {
2201  const size_t size = mParameters.getFullDimensionSizes().nElements();
2202 
2203  const float* rhoX = getRhoX().getData();
2204  const float* rhoY = getRhoY().getData();
2205  const float* rhoZ = getRhoZ().getData();
2206 
2207  const float* duxdx = getDuxdx().getData();
2208  const float* duydy = getDuydy().getData();
2209  const float* duzdz = getDuzdz().getData();
2210 
2211  float* pDensitySum = densitySum.getData();
2212  float* pVelocityGradientSum = velocityGradientSum.getData();
2213 
2214  #pragma omp parallel for simd schedule(static) aligned (pDensitySum, rhoX, rhoY, rhoZ)
2215  for (size_t i = 0; i < size; i++)
2216  {
2217  pDensitySum[i] = rhoX[i] + rhoY[i] + rhoZ[i];
2218  }
2219 
2221  { // scalar
2222  const float eRho0 = mParameters.getRho0Scalar();
2223  #pragma omp parallel for simd schedule(static) aligned (pDensitySum, duxdx, duydy, duzdz)
2224  for (size_t i = 0; i < size; i++)
2225  {
2226  pVelocityGradientSum[i] = eRho0 * (duxdx[i] + duydy[i] + duzdz[i]);
2227  }
2228  }
2229  else
2230  { // matrix
2231  const float* rho0 = getRho0().getData();
2232  #pragma omp parallel for simd schedule(static) aligned (pDensitySum, rho0, duxdx, duydy, duzdz)
2233  for (size_t i = 0; i < size; i++)
2234  {
2235  pVelocityGradientSum[i] = rho0[i] * (duxdx[i] + duydy[i] + duzdz[i]);
2236  }
2237  }
2238 }// end of computePressureTermsLinear
2239 //----------------------------------------------------------------------------------------------------------------------
2240 
2241 
2242  /**
2243  * Compute absorbing term with abosrbNabla1 and absorbNabla2.
2244  */
2246  FftwComplexMatrix& fftPart2)
2247 {
2248  const size_t nElements = mParameters.getReducedDimensionSizes().nElements();
2249 
2250  FloatComplex* pFftPart1 = fftPart1.getComplexData();
2251  FloatComplex* pFftPart2 = fftPart2.getComplexData();
2252 
2253  float* absorbNabla1 = getAbsorbNabla1().getData();
2254  float* absorbNabla2 = getAbsorbNabla2().getData();
2255 
2256  #pragma omp parallel for simd schedule(static) aligned(pFftPart1, pFftPart2, absorbNabla1, absorbNabla2)
2257  for (size_t i = 0; i < nElements; i++)
2258  {
2259  pFftPart1[i] *= absorbNabla1[i];
2260  pFftPart2[i] *= absorbNabla2[i];
2261  }
2262 } // end of computeAbsorbtionTerm
2263 //----------------------------------------------------------------------------------------------------------------------
2264 
2265  /**
2266  * @brief Sum sub-terms to calculate new pressure, after FFTs, non-linear case.
2267  */
2268 template<bool c0ScalarFlag, bool areTauAndEtaScalars>
2270  const RealMatrix& absorbEtaTerm,
2271  const RealMatrix& nonlinearTerm)
2272 {
2273  const float* pAbsorbTauTerm = absorbTauTerm.getData();
2274  const float* pAbsorbEtaTerm = absorbEtaTerm.getData();
2275 
2276  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
2277  const float divider = 1.0f / static_cast<float>(nElements);
2278 
2279  const float c2Scalar = (c0ScalarFlag) ? mParameters.getC2Scalar() : 0;
2280  const float* c2Matrix = (c0ScalarFlag) ? nullptr : getC2().getData();
2281 
2282  const float absorbTauScalar = (areTauAndEtaScalars) ? mParameters.getAbsorbTauScalar() : 0;
2283  const float* absorbTauMatrix = (areTauAndEtaScalars) ? nullptr : getAbsorbTau().getData();
2284 
2285  const float absorbEtaScalar = (areTauAndEtaScalars) ? mParameters.getAbsorbEtaScalar() : 0;
2286  const float* absorbEtaMatrix = (areTauAndEtaScalars) ? nullptr : getAbsorbEta().getData();;
2287 
2288  const float* bOnA = nonlinearTerm.getData();
2289  float* p = getP().getData();
2290 
2291  #pragma omp parallel for simd schedule(static) \
2292  aligned(p, c2Matrix, pAbsorbTauTerm, absorbTauMatrix, pAbsorbEtaTerm, absorbEtaMatrix)
2293  for (size_t i = 0; i < nElements; i++)
2294  {
2295  const float c2 = (c0ScalarFlag) ? c2Scalar : c2Matrix[i];
2296  const float absorbTau = (areTauAndEtaScalars) ? absorbTauScalar : absorbTauMatrix[i];
2297  const float absorbEta = (areTauAndEtaScalars) ? absorbEtaScalar : absorbEtaMatrix[i];
2298 
2299  p[i] = c2 *(bOnA[i] + (divider * ((pAbsorbTauTerm[i] * absorbTau) - (pAbsorbEtaTerm[i] * absorbEta))));
2300  }
2301 }// end of sumPressureTermsNonlinear
2302 //----------------------------------------------------------------------------------------------------------------------
2303 
2304  /**
2305  * Sum sub-terms to calculate new pressure, after FFTs, linear case.
2306  */
2307 template<bool c0ScalarFlag, bool areTauAndEtaScalars>
2309  const RealMatrix& absorbEtaTerm,
2310  const RealMatrix& densitySum)
2311 {
2312  const float* pAbsorbTauTerm = absorbTauTerm.getData();
2313  const float* pAbsorbEtaTerm = absorbEtaTerm.getData();
2314 
2315  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
2316  const float divider = 1.0f / static_cast<float>(nElements);
2317 
2318  const float c2Scalar = (c0ScalarFlag) ? mParameters.getC2Scalar() : 0;
2319  const float* c2Matrix = (c0ScalarFlag) ? nullptr : getC2().getData();
2320 
2321  const float absorbTauScalar = (areTauAndEtaScalars) ? mParameters.getAbsorbTauScalar() : 0;
2322  const float* absorbTauMatrix = (areTauAndEtaScalars) ? nullptr : getAbsorbTau().getData();
2323 
2324  const float absorbEtaScalar = (areTauAndEtaScalars) ? mParameters.getAbsorbEtaScalar() : 0;
2325  const float* absorbEtaMatrix = (areTauAndEtaScalars) ? nullptr : getAbsorbEta().getData();;
2326 
2327  const float* pDenistySum = densitySum.getData();
2328  float* p = getP().getData();
2329 
2330  #pragma omp parallel for simd schedule(static) \
2331  aligned (p, pDenistySum, c2Matrix, absorbTauMatrix, absorbEtaMatrix, pAbsorbTauTerm, pAbsorbEtaTerm)
2332  for (size_t i = 0; i < nElements; i++)
2333  {
2334  const float c2 = (c0ScalarFlag) ? c2Scalar : c2Matrix[i];
2335  const float absorbTau = (areTauAndEtaScalars) ? absorbTauScalar : absorbTauMatrix[i];
2336  const float absorbEta = (areTauAndEtaScalars) ? absorbEtaScalar : absorbEtaMatrix[i];
2337 
2338  p[i] = c2 * (pDenistySum[i] + (divider * ((pAbsorbTauTerm[i] * absorbTau) - (pAbsorbEtaTerm[i] * absorbEta))));
2339  }
2340 }// end of sumPressureTermsLinear
2341 //----------------------------------------------------------------------------------------------------------------------
2342 
2343 /**
2344  * Sum sub-terms for new p, nonlinear lossless case.
2345  */
2346 template<bool c0ScalarFlag, bool nonlinearFlag, bool rho0ScalarFlag>
2348 {
2349  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
2350 
2351  float* p = getP().getData();
2352 
2353  const float* rhoX = getRhoX().getData();
2354  const float* rhoY = getRhoY().getData();
2355  const float* rhoZ = getRhoZ().getData();
2356 
2357  const float c2Scalar = (c0ScalarFlag) ? mParameters.getC2Scalar() : 0;
2358  const float* c2Matrix = (c0ScalarFlag) ? nullptr : getC2().getData();
2359 
2360  const float bOnAScalar = (nonlinearFlag) ? mParameters.getBOnAScalar(): 0;
2361  const float* bOnAMatrix = (nonlinearFlag) ? nullptr : getBOnA().getData();
2362 
2363  const float rho0Scalar = (rho0ScalarFlag) ? mParameters.getRho0Scalar() : 0;
2364  const float* rho0Matrix = (rho0ScalarFlag) ? nullptr : getRho0().getData();
2365 
2366  #pragma omp parallel for simd schedule (static)
2367  for (size_t i = 0; i < nElements; i++)
2368  {
2369  const float c2 = (c0ScalarFlag) ? c2Scalar : c2Matrix[i];
2370  const float bOnA = (nonlinearFlag) ? bOnAScalar : bOnAMatrix[i];
2371  const float rho0 = (rho0ScalarFlag) ? rho0Scalar : rho0Matrix[i];
2372 
2373  const float sumDensity = rhoX[i] + rhoY[i] + rhoZ[i];
2374 
2375  p[i] = c2 * (sumDensity + (bOnA * (sumDensity * sumDensity) / (2.0f * rho0)));
2376  }
2377 }// end of sumPressureTermsNonlinearLossless
2378 //----------------------------------------------------------------------------------------------------------------------
2379 
2380  /**
2381  * Sum sub-terms for new pressure, linear lossless case.
2382  */
2384 {
2385  const float* rhoX = getRhoX().getData();
2386  const float* rhoY = getRhoY().getData();
2387  const float* rhoZ = getRhoZ().getData();
2388  float* p = getP().getData();
2389 
2390  const size_t nElements = mParameters.getFullDimensionSizes().nElements();
2391 
2393  {
2394  const float c2 = mParameters.getC2Scalar();
2395 
2396  #pragma omp parallel for simd schedule(static) aligned(p, rhoX, rhoY, rhoZ)
2397  for (size_t i = 0; i < nElements; i++)
2398  {
2399  p[i] = c2 * (rhoX[i] + rhoY[i] + rhoZ[i]);
2400  }
2401  }
2402  else
2403  {
2404  const float* c2 = getC2().getData();
2405 
2406  #pragma omp parallel for simd schedule(static) aligned(p, c2, rhoX, rhoY, rhoZ)
2407  for (size_t i = 0; i < nElements; i++)
2408  {
2409  p[i] = c2[i] * (rhoX[i] + rhoY[i] + rhoZ[i]);
2410  }
2411  }
2412 
2413 }// end of sumPressureTermsLinearLossless()
2414 //----------------------------------------------------------------------------------------------------------------------
2415 
2416 /**
2417  * Calculated shifted velocities.
2418  *
2419  * <b>Matlab code:</b> \n
2420  *
2421  * \verbatim
2422  ux_shifted = real(ifft(bsxfun(\@times, x_shift_neg, fft(ux_sgx, [], 1)), [], 1));
2423  uy_shifted = real(ifft(bsxfun(\@times, y_shift_neg, fft(uy_sgy, [], 2)), [], 2));
2424  uz_shifted = real(ifft(bsxfun(\@times, z_shift_neg, fft(uz_sgz, [], 3)), [], 3));
2425  \endverbatim
2426  */
2427 
2429 {
2430  const FloatComplex* xShiftNegR = getXShiftNegR().getComplexData();
2431  const FloatComplex* yShiftNegR = getYShiftNegR().getComplexData();
2432  const FloatComplex* zShiftNegR = getZShiftNegR().getComplexData();
2433 
2434  FloatComplex* tempFftShift = getTempFftwShift().getComplexData();
2435 
2436 
2437  // sizes of frequency spaces
2439  xShiftDims.nx = xShiftDims.nx / 2 + 1;
2440 
2442  yShiftDims.ny = yShiftDims.ny / 2 + 1;
2443 
2445  zShiftDims.nz = zShiftDims.nz / 2 + 1;
2446 
2447  // normalization constants for FFTs
2448  const float dividerX = 1.0f / static_cast<float>(mParameters.getFullDimensionSizes().nx);
2449  const float dividerY = 1.0f / static_cast<float>(mParameters.getFullDimensionSizes().ny);
2450  const float dividerZ = 1.0f / static_cast<float>(mParameters.getFullDimensionSizes().nz);
2451 
2452  //-------------------------------------------------- ux_shifted ----------------------------------------------------//
2454 
2455  #pragma omp parallel for schedule(static)
2456  for (size_t z = 0; z < xShiftDims.nz; z++)
2457  {
2458  for (size_t y = 0; y < xShiftDims.ny; y++)
2459  {
2460  #pragma omp simd
2461  for (size_t x = 0; x < xShiftDims.nx; x++)
2462  {
2463  const size_t i = get1DIndex(z, y, x, xShiftDims);
2464 
2465  tempFftShift[i] = tempFftShift[i] * xShiftNegR[x] * dividerX;
2466  } // x
2467  } // y
2468  }//z*/
2470 
2471 
2472  //-------------------------------------------------- uy shifted ----------------------------------------------------//
2474 
2475  #pragma omp parallel for schedule(static)
2476  for (size_t z = 0; z < yShiftDims.nz; z++)
2477  {
2478  for (size_t y = 0; y < yShiftDims.ny; y++)
2479  {
2480  #pragma omp simd
2481  for (size_t x = 0; x < yShiftDims.nx; x++)
2482  {
2483  const size_t i = get1DIndex(z, y, x, yShiftDims);
2484 
2485  tempFftShift[i] = (tempFftShift[i] * yShiftNegR[y]) * dividerY;
2486  } // x
2487  } // y
2488  }//z
2490 
2491 
2492  //-------------------------------------------------- uz_shifted ----------------------------------------------------//
2494  #pragma omp parallel for schedule(static)
2495  for (size_t z = 0; z < zShiftDims.nz; z++)
2496  {
2497  for (size_t y = 0; y < zShiftDims.ny; y++)
2498  {
2499  #pragma omp simd
2500  for (size_t x = 0; x < zShiftDims.nx; x++)
2501  {
2502  const size_t i = get1DIndex(z, y, x, zShiftDims);
2503 
2504  tempFftShift[i] = (tempFftShift[i] * zShiftNegR[z]) * dividerZ;
2505  } // x
2506  } // y
2507  }//z
2509 }// end of computeShiftedVelocity
2510 //----------------------------------------------------------------------------------------------------------------------
2511 
2512 /**
2513  * Print progress statistics.
2514  */
2516 {
2517  const size_t nt = mParameters.getNt();
2518  const size_t timeIndex = mParameters.getTimeIndex();
2519 
2520 
2521  if (timeIndex > (mActPercent * nt * 0.01f))
2522  {
2524 
2525  mIterationTime.stop();
2526 
2527  const double elTime = mIterationTime.getElapsedTime();
2529  const double toGo = ((elTimeWithLegs / static_cast<double>((timeIndex + 1)) * nt)) - elTimeWithLegs;
2530 
2531  struct tm *current;
2532  time_t now;
2533  time(&now);
2534  now += toGo;
2535  current = localtime(&now);
2536 
2539  static_cast<size_t>(((timeIndex) / (nt * 0.01f))),'%',
2540  elTime, toGo,
2541  current->tm_mday, current->tm_mon+1, current->tm_year-100,
2542  current->tm_hour, current->tm_min, current->tm_sec);
2544  }
2545 }// end of printStatistics
2546 //----------------------------------------------------------------------------------------------------------------------
2547 
2548 /**
2549  * Is time to checkpoint?
2550  */
2552 {
2553  if (!mParameters.isCheckpointEnabled()) return false;
2554 
2555  mTotalTime.stop();
2556 
2557  return (mTotalTime.getElapsedTime() > static_cast<float>(mParameters.getCheckpointInterval()));
2558 
2559 }// end of isTimeToCheckpoint
2560 //----------------------------------------------------------------------------------------------------------------------
2561 
2562 /**
2563  * Was the loop interrupted to checkpoint?
2564  */
2566 {
2567  return (mParameters.getTimeIndex() != mParameters.getNt());
2568 }// end of isCheckpointInterruption
2569 //----------------------------------------------------------------------------------------------------------------------
2570 
2571 /**
2572  * Check the output file has the correct format and version.
2573  */
2575 {
2576  // The header has already been read
2577  Hdf5FileHeader& fileHeader = mParameters.getFileHeader();
2578  Hdf5File& outputFile = mParameters.getOutputFile();
2579 
2580  // test file type
2581  if (fileHeader.getFileType() != Hdf5FileHeader::FileType::kOutput)
2582  {
2584  }
2585 
2586  // test file major version
2587  if (!fileHeader.checkMajorFileVersion())
2588  {
2591  fileHeader.getFileMajorVersion().c_str()));
2592  }
2593 
2594  // test file minor version
2595  if (!fileHeader.checkMinorFileVersion())
2596  {
2599  fileHeader.getFileMinorVersion().c_str()));
2600  }
2601 
2602 
2603  // Check dimension sizes
2604  DimensionSizes outputDimSizes;
2605  outputFile.readScalarValue(outputFile.getRootGroup(),
2606  kNxName,
2607  outputDimSizes.nx);
2608 
2609  outputFile.readScalarValue(outputFile.getRootGroup(), kNyName, outputDimSizes.ny);
2610 
2611  outputFile.readScalarValue(outputFile.getRootGroup(), kNzName, outputDimSizes.nz);
2612 
2613  if (mParameters.getFullDimensionSizes() != outputDimSizes)
2614  {
2616  outputDimSizes.nx,
2617  outputDimSizes.ny,
2618  outputDimSizes.nz,
2622  }
2623 }// end of checkOutputFile
2624 //----------------------------------------------------------------------------------------------------------------------
2625 
2626 
2627 /**
2628  * Check the file type and the version of the checkpoint file.
2629  */
2631 {
2632  // read the header and check the file version
2633  Hdf5FileHeader fileHeader;
2634  Hdf5File& checkpointFile = mParameters.getCheckpointFile();
2635 
2636  fileHeader.readHeaderFromCheckpointFile(checkpointFile);
2637 
2638  // test file type
2640  {
2642  mParameters.getCheckpointFileName().c_str()));
2643  }
2644 
2645  // test file major version
2646  if (!fileHeader.checkMajorFileVersion())
2647  {
2650  fileHeader.getFileMajorVersion().c_str()));
2651  }
2652 
2653  // test file minor version
2654  if (!fileHeader.checkMinorFileVersion())
2655  {
2658  fileHeader.getFileMinorVersion().c_str()));
2659  }
2660 
2661 
2662  // Check dimension sizes
2663  DimensionSizes checkpointDimSizes;
2664  checkpointFile.readScalarValue(checkpointFile.getRootGroup(), kNxName, checkpointDimSizes.nx);
2665  checkpointFile.readScalarValue(checkpointFile.getRootGroup(), kNyName, checkpointDimSizes.ny);
2666  checkpointFile.readScalarValue(checkpointFile.getRootGroup(), kNzName, checkpointDimSizes.nz);
2667 
2668  if (mParameters.getFullDimensionSizes() != checkpointDimSizes)
2669  {
2671  checkpointDimSizes.nx,
2672  checkpointDimSizes.ny,
2673  checkpointDimSizes.nz,
2677  }
2678 }// end of checkCheckpointFile
2679 //----------------------------------------------------------------------------------------------------------------------
2680 
2681 /**
2682  * Restore cumulated elapsed time from the output file.
2683  */
2685 {
2686  double totalTime, dataLoadTime, preProcessingTime, simulationTime, postProcessingTime;
2687 
2688  // Get execution times stored in the output file header
2690  dataLoadTime,
2691  preProcessingTime,
2692  simulationTime,
2693  postProcessingTime);
2694 
2700 
2701 }// end of loadElapsedTimeFromOutputFile
2702 //----------------------------------------------------------------------------------------------------------------------
2703 
2704 inline size_t KSpaceFirstOrder3DSolver::get1DIndex(const size_t z,
2705  const size_t y,
2706  const size_t x,
2707  const DimensionSizes& dimensionSizes)
2708 {
2709  return (z * dimensionSizes.ny + y) * dimensionSizes.nx + x;
2710 }// end of get1DIndex
2711 //----------------------------------------------------------------------------------------------------------------------
2712 
2713 //--------------------------------------------------------------------------------------------------------------------//
2714 //------------------------------------------------- Private methods --------------------------------------------------//
2715 //--------------------------------------------------------------------------------------------------------------------//
2716 
ComplexMatrix & getDdyKShiftNeg()
Get negative Fourier shift in y.
IndexMatrix & getDelayMask()
Get delay mask for many types sources.
void saveScalarsToOutputFile()
Save scalar values into the output HDF5 file.
Definition: Parameters.cpp:391
RealMatrix & getVelocityZSourceInput()
Get Velocity source input data in z direction.
bool getStoreVelocityFinalAllFlag() const
Is –u_final set?
Definition: Parameters.h:580
ErrorMessage kErrFmtBadMajorFileVersion
Command line parameters error message.
MatrixName kUyFinalName
uy_final variable name
Definition: MatrixNames.h:318
ComplexMatrix & getDdyKShiftPos()
Get positive Fourier shift in y.
The class for real matrices.
Definition: RealMatrix.h:47
float getDt() const
Get time step size.
Definition: Parameters.h:222
void generateKappa()
Generate kappa matrix for lossless media.
void create(const std::string &fileName, unsigned int flags=H5F_ACC_TRUNC)
Create the HDF5 file.
Definition: Hdf5File.cpp:93
float getRho0Scalar() const
Get value of homogeneous medium density.
Definition: Parameters.h:271
bool isCheckpointInterruption() const
Was the loop interrupted to checkpoint?
RealMatrix & getDuydy()
Get velocity gradient on in y direction.
void sumPressureTermsLinear(const RealMatrix &absorbTauTerm, const RealMatrix &absorbEtaTerm, const RealMatrix &densitySum)
Sum sub-terms to calculate new pressure, after FFTs, linear case.
void recomputeIndicesToMatlab()
Recompute indices C++ -> MATLAB.
void setMemoryConsumption(const size_t totalMemory)
Set memory consumption.
void writeScalarValue(const hid_t parentGroup, MatrixName &datasetName, const T value)
Write the scalar value under a specified group.
Definition: Hdf5File.cpp:559
Hdf5File & getCheckpointFile()
Get checkpoint file handle.
Definition: Parameters.h:163
RealMatrix & getRhoX()
Get density matrix in x direction.
void postProcessStreams()
Post-process all streams and flush them to the file.
void computePressureTermsLinear(RealMatrix &densitySum, RealMatrix &velocityGradientSum)
Calculate two temporary sums in the new pressure formula before taking the FFT, linear absorbing case...
float getDx() const
Get spatial displacement in x.
Definition: Parameters.h:227
RealMatrix & getAbsorbNabla2()
Get absorbing coefficient Nabla2.
void addTransducerSource()
Add transducer data source to velocity x component.
void createC2RFftPlan1DY(RealMatrix &outMatrix)
Create FFTW plan for Complex-to-Real in the y dimension.
double getCumulatedSimulationTime() const
Get simulation time (time loop) accumulated over all legs.
RealMatrix & getPmlX()
Get PML in x.
RealMatrix & getUzSgz()
Get velocity matrix on staggered grid in z direction.
void createC2RFftPlan3D(RealMatrix &outMatrix)
Create FFTW plan for 3D Complex-to-Real.
size_t getCompressionLevel() const
Get compression level.
Definition: Parameters.h:127
size_t getVelocitySourceMode() const
Get velocity source mode.
Definition: Parameters.h:472
OutputMessage kOutFmtSSE3
Print version output message.
RealMatrix & getDtRho0Sgz()
Get dt * rho0Sgz matrix (time step size * ambient velocity on staggered grid in z direction)...
Class implementing 3D and 1D Real-To-Complex and Complex-To-Real transforms using FFTW interface...
void saveCheckpointData()
Save checkpoint data and flush aggregated outputs into the output file.
virtual void writeData(Hdf5File &file, MatrixName &matrixName, const size_t compressionLevel)
Write data into HDF5 file.
Definition: RealMatrix.cpp:94
OutputMessage kOutFmtLoadingFftwWisdom
Output message.
OutputMessage kOutFmtCreatingCheckpoint
Output message.
hid_t getRootGroup() const
Get handle to the root group of the file.
Definition: Hdf5File.h:608
virtual void writeData(Hdf5File &file, MatrixName &matrixName, const size_t compressionLevel)
Write data into HDF5 file.
Definition: IndexMatrix.cpp:94
size_t nz
Number of elements in the z direction.
void computeC2RFft1DX(RealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the x dimension.
bool getC0ScalarFlag() const
Is sound speed in the medium homogeneous (scalar value)?
Definition: Parameters.h:249
float getAlphaPower() const
Get alpha power value for the absorption law.
Definition: Parameters.h:335
RealMatrix & getRho0()
Get ambient density matrix.
RealMatrix & getAbsorbEta()
Get absorbing coefficient Eta.
size_t getNt() const
Get total number of time steps.
Definition: Parameters.h:203
OutputMessage kOutFmtReadingOutputFile
Output message.
MatrixName kTimeIndexName
t_index name
Definition: MatrixNames.h:49
OutputMessage kOutFmtCompResourcesHeader
Output message.
MatrixName kUxFinalName
ux_final variable name
Definition: MatrixNames.h:316
OutputMessage kOutFmtSimulatoinFinalSeparator
Output message.
ErrorMessage kErrFmtPathDelimiters
delimiters for linux paths
Definition: ErrorMessages.h:53
OutputMessage kOutFmtFailed
Output message - failed message.
bool getRho0ScalarFlag() const
Is density in the medium homogeneous (scalar value)?
Definition: Parameters.h:266
ComplexMatrix & getDdzKShiftPos()
Get positive Fourier shift in z.
float getAbsorbEtaScalar() const
Get absorb eta coefficient for homogeneous medium (scalar value)?
Definition: Parameters.h:340
void open(const std::string &fileName, unsigned int flags=H5F_ACC_RDONLY)
Open the HDF5 file.
Definition: Hdf5File.cpp:117
Hdf5FileHeader & getFileHeader()
Get file header handle.
Definition: Parameters.h:168
TimeMeasure mTotalTime
Total time of the simulation.
RealMatrix & getRhoZ()
Get density matrix in z direction.
size_t getTransducerSourceFlag() const
Get transducer source flag.
Definition: Parameters.h:419
std::string getCheckpointFileName() const
Get checkpoint file name.
Definition: Parameters.h:184
void computeVelocityHomogeneousUniform()
Compute acoustic velocity for homogeneous medium and a uniform grid.
static void log(const LogLevel queryLevel, const std::string &format, Args ... args)
Log desired activity for a given log level, version with string format.
Definition: Logger.h:97
RealMatrix & getUyShifted()
Get velocity shifted on normal grid in y direction.
MatrixContainer mMatrixContainer
Matrix container with all the matrix classes.
TimeMeasure mPostProcessingTime
Post-processing time of the simulation.
RealMatrix & getTransducerSourceInput()
Get transducer source input data (signal).
OutputMessage kOutFmtElapsedTime
Output message.
OutputMessage kOutFmtPreProcessing
Output message.
void generateKappaAndNablas()
Generate kappa matrix, absorbNabla1, absorbNabla2 for absorbing medium.
static std::string wordWrapString(const std::string &inputString, const std::string &delimiters, const int indentation=0, const int lineSize=65)
Definition: Logger.cpp:111
RealMatrix & getAbsorbNabla1()
Get absorbing coefficient Nabla1.
size_t getNonLinearFlag() const
Is the wave propagation nonlinear?
Definition: Parameters.h:319
float getDtRho0SgzScalar() const
Get value of dt / rho0Sgz.
Definition: Parameters.h:301
double getCumulatedPreProcessingTime() const
Get pre-processing time accumulated over all legs.
TimeMeasure mPreProcessingTime
Pre-processing time of the simulation.
void computeAbsorbtionTerm(FftwComplexMatrix &fftPart1, FftwComplexMatrix &fftPart2)
Compute absorbing term with abosrbNabla1 and absorbNabla2.
virtual void loadInputData()
Load simulation data.
void setMinorFileVersion()
Set minor file version.
OutputMessage kOutFmtDataLoading
Output message.
void createC2RFftPlan1DX(RealMatrix &outMatrix)
Create FFTW plan for Complex-to-Real in the x dimension.
TimeMeasure mIterationTime
Iteration time of the simulation.
RealMatrix & getTemp2Real3D()
Get second real 3D temporary matrix.
void loadDataFromCheckpointFile()
Load selected matrices from the checkpoint HDF5 file.
Class storing all parameters of the simulation.
Definition: Parameters.h:50
void computeInitialVelocityHeterogeneous()
Compute velocity for the initial pressure problem, heterogeneous medium, uniform grid.
void postProcessing()
Post processing, and closing the output streams.
void addInitialPressureSource()
Calculate initial pressure source.
RealMatrix & getAbsorbTau()
Get absorbing coefficient Tau.
MatrixName kNzName
Nz variable name.
Definition: MatrixNames.h:74
void computePressureNonlinear()
Compute acoustic pressure for nonlinear case.
void setHostName()
Set host name.
virtual void compute()
This method computes k-space First Order 3D simulation.
Hdf5File & getOutputFile()
Get output file handle.
Definition: Parameters.h:158
double getElapsedTime() const
Get elapsed time.
Definition: TimeMeasure.h:119
RealMatrix & getTemp3Real3D()
Get third real 3D temporary matrix.
DimensionSizes getFullDimensionSizes() const
Get full dimension sizes of the simulation (real classes).
Definition: Parameters.h:191
void computeC2RFft1DZ(RealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the z dimension.
OutputMessage kOutFmtSimulationEndSeparator
Output message.
IndexMatrix & getSensorMaskIndex()
Get linear sensor mask (spatial geometry of the sensor).
void storeSensorData()
Store sensor data.
void createR2CFftPlan1DY(RealMatrix &inMatrix)
Create an FFTW plan for 1D Real-to-Complex in the y dimension.
double getCumulatedDataLoadTime() const
Get simulation time (time loop) accumulated over all legs.
std::string getCodeName() const
Get code name - release code version.
void writeHeaderToOutputFile(Hdf5File &outputFile)
Write header into the output file.
size_t mActPercent
Percentage of the simulation done.
virtual size_t * getData()
Get raw data out of the class (for direct kernel access).
static void exportWisdom()
Export wisdom to the file.
RealMatrix & getBOnA()
Get B on A (nonlinear coefficient).
OutputMessage kOutFmtStoringFftwWisdom
Output message.
static bool canAccess(const std::string &fileName)
Can I access the file.
Definition: Hdf5File.cpp:146
void setAbsorbTauScalar(const float absorbTau)
Set absorb tau coefficient for homogeneous medium (scalar value).
Definition: Parameters.h:355
void setMajorFileVersion()
Set major file version.
RealMatrix & getUzShifted()
Get velocity shifted on normal grid in z direction.
static std::string getFileMajorVersion()
Get string representing of current Major version of the file.
size_t getCheckpointInterval() const
Get checkpoint interval.
Definition: Parameters.h:145
void SetElapsedTimeOverPreviousLegs(const double elapsedTime)
Set elapsed time in previous legs of the simulation.
Definition: TimeMeasure.h:149
float getDtRho0SgyScalar() const
Get value of dt / rho0Sgy.
Definition: Parameters.h:291
virtual size_t size() const
Size of the matrix.
static std::string getFileMinorVersion()
Get string representing of current Minor version of the file.
size_t getNumberOfThreads() const
Get number of CPU threads to use.
Definition: Parameters.h:121
FftwComplexMatrix & getTempFftwZ()
Get temporary matrix for 1D fft in z.
void setCodeName(const std::string &codeName)
Set code name.
double getElapsedTimeOverPreviousLegs() const
Get time spent in previous legs.
Definition: TimeMeasure.h:139
void computeVelocityHomogeneousNonuniform()
Compute acoustic velocity for homogenous medium and nonuniform grid.
virtual size_t size() const
Size of the matrix.
RealMatrix & getDtRho0Sgx()
Get dt * rho0Sgx matrix (time step size * ambient velocity on staggered grid in x direction)...
void computeC2RFft3D(RealMatrix &outMatrix)
Compute forward out-of-place 3D Complex-to-Real FFT.
size_t getPressureSourceFlag() const
Get pressure source flag.
Definition: Parameters.h:408
float getC2Scalar() const
Get scalar value of sound speed squared.
Definition: Parameters.h:259
The header file containing the main class of the project responsible for the entire simulation...
void getExecutionTimes(double &totalTime, double &loadTime, double &preProcessingTime, double &simulationTime, double &postprocessingTime)
Get execution times stored in the output file header.
std::complex< float > FloatComplex
Datatype for complex single precision numbers.
Definition: ComplexMatrix.h:43
void readScalarValue(const hid_t parentGroup, MatrixName &datasetName, T &value)
Read the scalar value under a specified group.
Definition: Hdf5File.cpp:646
void reopenStreams()
Reopen streams after checkpoint file (datasets).
static void flush(const LogLevel queryLevel)
Flush output messages.
Definition: Logger.cpp:97
RealMatrix & getC2()
Get the c^2 matrix from the container.
void readHeaderFromCheckpointFile(Hdf5File &checkpointFile)
Read the file header form the checkpoint file.
void addStreams(MatrixContainer &matrixContainer)
Add all streams in simulation in the container, set all streams records here!
bool checkMinorFileVersion()
Check minor file version.
virtual void freeMemory()
Memory deallocation.
OutputMessage kOutFmtSSE2
Print version output message.
void computeDensityLinear()
Compute new values of acoustic density for linear case.
size_t getNonUniformGridFlag() const
Enable non uniform grid? - not implemented yet.
Definition: Parameters.h:309
double getCumulatedTotalTime() const
Get total simulation time accumulated over all legs.
static void errorAndTerminate(const std::string &errorMessage)
Log an error and terminate the execution.
Definition: Logger.cpp:84
void freeMatrices()
Destroy and free all matrices.
OutputMessage kOutFmtIntelCompiler
Print version output message.
OutputMessage kOutFmtBuildNoDataTime
Print version output message.
bool getStoreVelocityNonStaggeredRawFlag() const
Is –u_non_staggered_raw set?
Definition: Parameters.h:547
void freeStreams()
Free all streams - destroy them.
size_t get1DIndex(const size_t z, const size_t y, const size_t x, const DimensionSizes &dimensionSizes)
Compute 1D index using 3 spatial coordinates and the size of the matrix.
RealMatrix & getUxSgx()
Get velocity matrix on staggered grid in x direction.
void checkCheckpointFile()
Check the file type and the version of the checkpoint file.
RealMatrix & getPmlYSgy()
Get PML on staggered grid y.
OutputMessage kOutFmtStoringCheckpointData
Output message.
void computeVelocitySourceTerm(RealMatrix &velocityMatrix, const RealMatrix &velocitySourceInput, const IndexMatrix &velocitySourceIndex)
Add in velocity source terms.
ErrorMessage kErrFmtBadMinorFileVersion
Command line parameters error message.
RealMatrix & getPmlZSgz()
Get PML on staggered grid z.
FftwComplexMatrix & getTempFftwX()
Get temporary matrix for 1D fft in x.
void computeVelocityGradient()
Compute new values of acoustic velocity gradients.
OutputMessage kOutFmtCheckpointHeader
Output message.
RealMatrix & getP()
Get pressure matrix.
MatrixName kUzFinalName
uz_final variable name
Definition: MatrixNames.h:320
FftwComplexMatrix & getTempFftwY()
Get temporary matrix for 1D fft in y.
size_t getSamplingStartTimeIndex() const
Get start time index when sensor data collection begins.
Definition: Parameters.h:499
void computeR2CFft3D(RealMatrix &inMatrix)
Compute forward out-of-place 3D Real-to-Complex FFT.
void createMatrices()
Create all matrix objects in the container.
void incrementTimeIndex()
Increment simulation time step.
Definition: Parameters.h:215
The header file containing a class responsible for printing out info and error messages (stdout...
void start()
Take start timestamp.
Definition: TimeMeasure.h:91
size_t getVelocityXSourceFlag() const
Get velocity in x source flag.
Definition: Parameters.h:425
Class wrapping the HDF5 routines.
Definition: Hdf5File.h:490
void sampleStreams()
Sample all streams.
float getDz() const
Get spatial displacement in z.
Definition: Parameters.h:237
void addVelocitySource()
Add in all velocity sources.
void computeC2()
Calculate square of velocity.
RealMatrix & getDuxdx()
Get velocity gradient on in x direction.
OutputMessage kOutFmtVersionGitHash
Print version output message.
ComplexMatrix & getYShiftNegR()
Get negative shift for non-staggered velocity in y.
void setActualCreationTime()
Set creation time.
static std::string formatMessage(const std::string &format, Args ... args)
C++-11 replacement for sprintf that works with std::string instead of char*.
Definition: Logger.h:157
TimeMeasure mSimulationTime
Simulation time of the simulation.
void createStreams()
Create all streams - opens the datasets.
Class for HDF5 file header.
void setTimeIndex(const size_t timeIndex)
Set simulation time step - should be used only when recovering from checkpoint.
Definition: Parameters.h:213
float getDtRho0SgxScalar() const
Get value of dt / rho0Sgx.
Definition: Parameters.h:281
void stop()
Take stop timestamp.
Definition: TimeMeasure.h:103
bool getBOnAScalarFlag() const
Is nonlinear coefficient homogeneous in the medium (scalar value)?
Definition: Parameters.h:361
void computePressureLinear()
Compute acoustic pressure for linear case.
OutputStreamContainer mOutputStreamContainer
Output stream container.
virtual void copyData(const BaseFloatMatrix &src)
Copy data from other matrix with the same size.
float getBOnAScalar() const
Get nonlinear coefficient for homogenous medium.
Definition: Parameters.h:366
ComplexMatrix & getDdxKShiftNeg()
Get negative Fourier shift in x.
RealMatrix & getPmlZ()
Get PML in z.
Structure with 4D dimension sizes (3 in space and 1 in time).
float getCRef() const
Get reference sound speed.
Definition: Parameters.h:244
float getDy() const
Get spatial displacement in y.
Definition: Parameters.h:232
virtual FloatComplex * getComplexData()
Get raw complex data out of the class (for direct kernel access).
Definition: ComplexMatrix.h:96
size_t getInitialPressureSourceFlag() const
Get initial pressure source flag (p0).
Definition: Parameters.h:413
virtual ~KSpaceFirstOrder3DSolver()
Destructor.
void setExecutionTimes(const double totalTime, const double loadTime, const double preProcessingTime, const double simulationTime, const double postprocessingTime)
Set execution times in file header.
RealMatrix & getDxudxn()
Non uniform grid acoustic velocity in x.
OutputMessage kOutFmtKWaveVersion
Output message.
Cuboid corners sensor mask.
void createC2RFftPlan1DZ(RealMatrix &outMatrix)
Create FFTW plan for Complex-to-Real in the z dimension.
Full level of verbosity.
void readHeaderFromOutputFile(Hdf5File &outputFile)
Read header from output file (necessary for checkpoint-restart).
OutputMessage kOutFmtPostProcessing
Output message.
bool getCopySensorMaskFlag() const
Is –copy_mask set set?
Definition: Parameters.h:586
OutputMessage kOutFmtCheckpointTimeSteps
Output message.
void writeHeaderToCheckpointFile(Hdf5File &checkpointFile)
Write header to the output file (only a subset of all possible fields are written).
SensorMaskType getSensorMaskType() const
Get sensor mask type (linear or corners).
Definition: Parameters.h:484
IndexMatrix & getSensorMaskCorners()
Get cuboid corners sensor mask. (Spatial geometry of multiple sensors).
void setAbsorbEtaScalar(const float absrobEta)
Set absorb eta coefficient for homogeneous medium (scalar value).
Definition: Parameters.h:345
static LogLevel getLevel()
Definition: Logger.h:87
OutputMessage kOutFmtMacOsBuild
Print version output message.
TimeMeasure mDataLoadTime
Data load time of the simulation.
Hdf5FileHeader::FileType getFileType()
Get File type.
RealMatrix & getKappa()
Get the kappa matrix from the container.
OutputMessage kOutFmtFftPlans
Output message.
size_t ny
Number of elements in the y direction.
void checkOutputFile()
Check the output file has the correct format and version.
OutputMessage kOutFmtSimulationProgress
Output message.
void computeDensityNonliner()
Compute new values of acoustic density for nonlinear case.
RealMatrix & getDyudyn()
Non uniform grid acoustic velocity in y.
float getC0Scalar() const
Get scalar value of sound speed.
Definition: Parameters.h:254
RealMatrix & getDzudzn()
Non uniform grid acoustic velocity in z.
size_t getTimeIndex() const
Get actual simulation time step.
Definition: Parameters.h:208
IndexMatrix & getVelocitySourceIndex()
Get velocity source geometry data.
void computeInitialVelocityHomogeneousUniform()
Compute velocity for the initial pressure problem, homogeneous medium, uniform grid.
OutputMessage kOutFmtCurrentMemory
Output message.
virtual size_t getMemoryUsage() const
Get memory usage in MB on the host side.
MatrixName kSensorMaskIndexName
sensor_mask_index variable name
Definition: MatrixNames.h:165
bool getStorePressureFinalAllFlag() const
Is –p_final set?
Definition: Parameters.h:535
OutputMessage kOutFmtSimulationHeader
Output message.
IndexMatrix & getPressureSourceIndex()
Get pressure source geometry data.
size_t getProgressPrintInterval() const
Get progress print interval.
Definition: Parameters.h:133
Hdf5File & getInputFile()
Get input file handle.
Definition: Parameters.h:153
ErrorMessage kErrFmtBadCheckpointFileFormat
KSpaceFirstOrder3DSolver error message.
OutputMessage kOutFmtWindowsBuild
Print version output message.
double getCumulatedPostProcessingTime() const
Get post-processing time accumulated over all legs.
void computeR2CFft1DX(RealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the x dimension.
MatrixName kNxName
Nx variable name.
Definition: MatrixNames.h:70
size_t nElements() const
Get the number of elements, in 3D only spatial domain, in 4D with time.
void generateTauAndEta()
Generate absorbTau, absorbEta for heterogenous medium.
void close()
Close the HDF5 file.
Definition: Hdf5File.cpp:161
OutputMessage kOutFmtVisualStudioCompiler
Print version output message.
bool getAlphaCoeffScalarFlag() const
Is alpha absorption coefficient homogeneous (scalar value)?
Definition: Parameters.h:325
virtual void allocateMemory()
Memory allocation.
Basic (default) level of verbosity.
void recomputeIndicesToCPP()
Recompute indices MATALAB->C++.
void storeDataIntoCheckpointFile()
Store selected matrices into the checkpoint file.
RealMatrix & getInitialPressureSourceInput()
Get initial pressure source input data (whole matrix).
void printFullCodeNameAndLicense() const
Print the code name and license.
Parameters & mParameters
Global parameters of the simulation.
virtual float * getData()
Get raw data out of the class (for direct kernel access).
OutputMessage kOutFmtAVX2
Print version output message.
void computePressureGradient()
Compute part of the new velocity term - gradient of pressure.
OutputMessage kOutFmtCreatingOutputFile
Output message.
OutputMessage kOutFmtLastSeparator
Output message -last separator.
RealMatrix & getPmlY()
Get PML in y.
void computeR2CFft1DZ(RealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the z dimension.
void computeVelocityHeterogeneous()
Compute acoustic velocity for heterogeneous medium and a uniform grid.
void createR2CFftPlan1DX(RealMatrix &inMatrix)
Create an FFTW plan for 1D Real-to-Complex in the x dimension.
MatrixName kNyName
Ny variable name.
Definition: MatrixNames.h:72
RealMatrix & getDtRho0Sgy()
Get dt * rho0Sgy matrix (time step size * ambient velocity on staggered grid in y direction)...
RealMatrix & GetVelocityYSourceInput()
Get Velocity source input data in y direction.
RealMatrix & getDyudynSgy()
Non uniform grid acoustic velocity on staggered grid x.
std::string getOutputFileName() const
Get output file name.
Definition: Parameters.h:179
The header file containing the matrix container.
size_t getVelocityYSourceFlag() const
Get velocity in y source flag.
Definition: Parameters.h:431
OutputMessage kOutFmtSSE42
Print version output message.
OutputMessage kOutFmtReadingInputFile
Output message.
MatrixName kSensorMaskCornersName
sensor_mask_corners variable name
Definition: MatrixNames.h:169
OutputMessage kOutFmtLinuxBuild
Print version output message.
size_t nx
Number of elements in the x direction.
float getAlphaCoeffScalar() const
Get value of alpha absorption coefficient.
Definition: Parameters.h:330
void loadDataFromInputFile()
Load all marked matrices from the input HDF5 file.
size_t getPressureSourceMany() const
Get number of time series in the pressure source.
Definition: Parameters.h:466
virtual void scalarDividedBy(const float scalar)
Calculate matrix = scalar / matrix.
size_t getPressureSourceMode() const
Get pressure source mode.
Definition: Parameters.h:461
OutputMessage kOutFmtReadingCheckpointFile
Output message.
RealMatrix & getDzudznSgz()
Non uniform grid acoustic velocity on staggered grid x.
void sumPressureTermsLinearLossless()
Sum sub-terms for new pressure, linear lossless case.
size_t getVelocityZSourceFlag() const
Get velocity in z source flag.
Definition: Parameters.h:437
The header file containing the class that implements 3D FFT using the FFTW interface.
RealMatrix & getPressureSourceInput()
Get pressure source input data (signal).
OutputMessage kOutFmtSeparator
Output message - separator.
bool checkMajorFileVersion()
Check major file version.
void computeMainLoop()
Compute the main time loop of the kspaceFirstOrder3D.
The header file defining the output stream container.
void setProcessorAffinity()
Set processor affinity.
RealMatrix & getRhoY()
Get density matrix in y direction.
void computeVelocity()
Compute new values of acoustic velocity.
size_t getVelocitySourceMany() const
Get number of time series in the velocity sources.
Definition: Parameters.h:477
static Parameters & getInstance()
Get instance of the singleton class.
Definition: Parameters.cpp:84
size_t getAbsorbingFlag() const
Is the simulation absrobing or lossless?
Definition: Parameters.h:314
RealMatrix & getUySgy()
Get velocity matrix on staggered grid in y direction.
void computeC2RFft1DY(RealMatrix &outMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the y dimension.
static void importWisdom()
Import wisdom from the file.
float getAbsorbTauScalar() const
Get absorb tau coefficient for homogeneous medium.
Definition: Parameters.h:350
OutputMessage kOutFmtStoringSensorData
Output message.
void loadElapsedTimeFromOutputFile()
Reads the header of the output file and sets the cumulative elapsed time from the first log...
RealMatrix & GetVelocityXSourceInput()
Get Velocity source input data in x direction.
RealMatrix & getUxShifted()
Get velocity shifted on normal grid in x direction.
void createR2CFftPlan3D(RealMatrix &inMatrix)
Create FFTW plan for 3D Real-to-Complex.
ComplexMatrix & getDdxKShiftPos()
Get positive Fourier shift in x.
ComplexMatrix & getXShiftNegR()
Get negative shift for non-staggered velocity in x.
ErrorMessage kErrFmtOutputDimensionsMismatch
KSpaceFirstOrder3DSolver error message.
void addPressureSource()
Add in pressure source.
void setFileType(const Hdf5FileHeader::FileType fileType)
Set File type.
RealMatrix & getPmlXSgx()
Get PML on staggered grid x.
void printStatistics()
Print progress statistics.
RealMatrix & getTemp1Real3D()
Get first real 3D temporary matrix.
OutputMessage kOutFmtMemoryAllocation
Output message.
void closeStreams()
Close all streams.
ErrorMessage kErrFmtCheckpointDimensionsMismatch
KSpaceFirstOrder3DSolver error message.
void checkpointStreams()
Checkpoint streams.
The class for 64b unsigned integers (indices). It is used for linear and cuboid corners masks to get ...
Definition: IndexMatrix.h:47
void computePressureTermsNonlinear(RealMatrix &densitySum, RealMatrix &nonlinearTerm, RealMatrix &velocityGradientSum)
Calculate three temporary sums in the new pressure formula before taking the FFT, nonlinear absorbing...
ErrorMessage kErrFmtBadOutputFileFormat
KSpaceFirstOrder3DSolver error message.
void computeInitialVelocityHomogeneousNonuniform()
Compute acoustic velocity for initial pressure problem, homogenous medium, nonuniform grid...
void sumPressureTermsNonlinear(const RealMatrix &absorbTauTerm, const RealMatrix &absorbEtaTerm, const RealMatrix &nonlinearTerm)
Sum sub-terms to calculate new pressure, after FFTs, nonlinear case.
std::string getGitHash() const
Get git hash of the code.
Definition: Parameters.cpp:471
RealMatrix & getDxudxnSgx()
Non uniform grid acoustic velocity on staggered grid x.
OutputMessage kOutFmtSSE41
Print version output message.
void createR2CFftPlan1DZ(RealMatrix &inMatrix)
Create an FFTW plan for 1D Real-to-Complex in the z dimension.
void sumPressureTermsNonlinearLossless()
Sum sub-terms for new pressure, linear lossless case.
FftwComplexMatrix & getTempFftwShift()
Get temporary matrix for fft shift.
void writeOutputDataInfo()
Write statistics and header into the output file.
bool isCheckpointEnabled() const
Is checkpoint enabled?
Definition: Parameters.h:140
OutputMessage kOutFmtGnuCompiler
Print version output message.
RealMatrix & getDuzdz()
Get velocity gradient on in z direction.
void InitializeFftwPlans()
Initialize FFTW plans.
bool isOpen() const
Is the file opened?
Definition: Hdf5File.h:560
void preProcessing()
Compute pre-processing phase.
OutputMessage kOutFmtLicense
Print version output message.
OutputMessage kOutFmtNoDone
Output message - finish line without done.
void addMatrices()
Populate the container based on the simulation type.
OutputMessage kOutFmtDone
Output message - Done with two spaces.
ComplexMatrix & getZShiftNegR()
Get negative shift for non-staggered velocity in z.
MatrixName kPressureFinalName
p_final variable name
Definition: MatrixNames.h:280
void computeR2CFft1DY(RealMatrix &inMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the y dimension.
void setNumberOfCores()
Set number of cores.
OutputMessage kOutFmtAVX
Print version output message.
void generateInitialDenisty()
Calculate dt ./ rho0 for nonuniform grids.
ComplexMatrix & getDdzKShiftNeg()
Get negative Fourier shift in z.
DimensionSizes getReducedDimensionSizes() const
Get reduced dimension sizes of the simulation (complex classes).
Definition: Parameters.h:197
void computeShiftedVelocity()
compute shifted velocity for –u_non_staggered flag.
bool isTimeToCheckpoint()
Is time to checkpoint (save actual state on disk).