kspaceFirstOrder3D-OMP  1.1
The C++ implementation of the k-wave toolbox for the time-domain simulation of acoustic wave fields in 3D
 All Classes Files Functions Variables Typedefs Enumerations Friends Pages
KSpaceFirstOrder3DSolver.cpp
Go to the documentation of this file.
1 /**
2  * @file KSpaceFirstOrder3DSolver.cpp
3  * @author Jiri Jaros \n
4  * Faculty of Information Technology\n
5  * Brno University of Technology \n
6  * jarosjir@fit.vutbr.cz
7  *
8  * @brief The implementation file containing the main class of the project
9  * responsible for the entire simulation.
10  *
11  * @version kspaceFirstOrder3D 2.15
12  * @date 12 July 2012, 10:27 (created)\n
13  * 29 September 2014, 17:23 (revised)
14  *
15  * @section License
16  * This file is part of the C++ extension of the k-Wave Toolbox (http://www.k-wave.org).\n
17  * Copyright (C) 2014 Jiri Jaros and Bradley Treeby.
18  *
19  * This file is part of k-Wave. k-Wave is free software: you can redistribute it
20  * and/or modify it under the terms of the GNU Lesser General Public License as
21  * published by the Free Software Foundation, either version 3 of the License,
22  * or (at your option) any later version.
23  *
24  * k-Wave is distributed in the hope that it will be useful, but
25  * WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
27  * See the GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with k-Wave. If not, see <http://www.gnu.org/licenses/>.
31  */
32 
33 // Linux build
34 #ifdef __linux__
35  #include <sys/resource.h>
36  #include <cmath>
37 #endif
38 
39 // Windows build
40 #ifdef _WIN64
41  #define _USE_MATH_DEFINES
42  #include <cmath>
43  #include <Windows.h>
44  #include <Psapi.h>
45  #pragma comment(lib, "Psapi.lib")
46 #endif
47 
48 #ifdef _OPENMP
49  #include <omp.h>
50 #endif
51 
52 #include <iostream>
53 #include <sstream>
54 #include <cstdio>
55 #include <limits>
56 
57 #include <immintrin.h>
58 #include <time.h>
59 
61 
62 #include <Utils/ErrorMessages.h>
63 
66 
67 using namespace std;
68 
69 //----------------------------------------------------------------------------//
70 // Constants //
71 //----------------------------------------------------------------------------//
72 
73 
74 
75 
76 //----------------------------------------------------------------------------//
77 // Public methods //
78 //----------------------------------------------------------------------------//
79 
80 
81 /**
82  * Constructor of the class.
83  */
85  MatrixContainer(), OutputStreamContainer(),
86  ActPercent(0),
87  Parameters(NULL),
88  TotalTime(), PreProcessingTime(), DataLoadTime (), SimulationTime(),
89  PostProcessingTime(), IterationTime()
90 {
91  TotalTime.Start();
93 
94  //Switch off HDF5 error messages
95  H5Eset_auto( H5E_DEFAULT, NULL, NULL);
96 }// end of TKSpaceFirstOrder3DSolver
97 //------------------------------------------------------------------------------
98 
99 
100 /**
101  * Destructor of the class.
102  */
104 {
105  FreeMemory();
106 }// end of TKSpace3DSolver
107 //------------------------------------------------------------------------------
108 
109 /**
110  * The method allocates the matrix container, creates all matrices and
111  * creates all output streams (however not allocating memory).
112  */
114 {
115  // create container, then all matrices
118 
119  // add output streams into container
120  //@todo Think about moving under LoadInputData routine...
122 }// end of AllocateMemory
123 //------------------------------------------------------------------------------
124 
125 /**
126  * The method frees all memory allocated by the class.
127  */
129 {
132 }// end of FreeMemory
133 //------------------------------------------------------------------------------
134 
135 /**
136  * Load data from the input file provided by the Parameter class and creates
137  * the output time series streams.
138  */
140 {
141  // Start timer
143 
144  // get handles
145  THDF5_File& HDF5_InputFile = Parameters->HDF5_InputFile; // file is opened (in Parameters)
146  THDF5_File& HDF5_OutputFile = Parameters->HDF5_OutputFile;
147  THDF5_File& HDF5_CheckpointFile = Parameters->HDF5_CheckpointFile;
148 
149  // Load data from disk
151 
152  // close the input file
153  HDF5_InputFile.Close();
154 
155  // The simulation does not use checkpointing or this is the first turn
156  bool RecoverFromPrevState = (Parameters->IsCheckpointEnabled() &&
158 
159  //-------------------- Read data from the checkpoint file -----------------//
160  if (RecoverFromPrevState)
161  {
162  // Open checkpoint file
163  HDF5_CheckpointFile.Open(Parameters->GetCheckpointFileName().c_str());
164 
165  // Check the checkpoint file
167 
168  // read the actual value of t_index
169  size_t new_t_index;
170  HDF5_CheckpointFile.ReadScalarValue(HDF5_CheckpointFile.GetRootGroup(),
171  t_index_Name,
172  new_t_index);
173  Parameters->Set_t_index(new_t_index);
174 
175  // Read necessary matrices from the checkpoint file
176  MatrixContainer.LoadDataFromCheckpointHDF5File(HDF5_CheckpointFile);
177 
178  HDF5_CheckpointFile.Close();
179 
180 
181  //------------- Read data from the output file -------------------------//
182 
183  // Reopen output file for RW access
184  HDF5_OutputFile.Open(Parameters->GetOutputFileName().c_str(), H5F_ACC_RDWR);
185  //Read file header of the output file
187  // Check the checkpoint file
188  CheckOutputFile();
189  // Restore elapsed time
191 
193  }
194  else
195  { //-------------------- First round of multi-leg simulation ---------------//
196  // Create the output file
197  HDF5_OutputFile.Create(Parameters->GetOutputFileName().c_str());
198 
199 
200  // Create the steams, link them with the sampled matrices, however DO NOT allocate memory!
202 
203  }
204 
205  // Stop timer
206  DataLoadTime.Stop();
207 }// end of LoadInputData
208 //------------------------------------------------------------------------------
209 
210 
211 /**
212  * This method computes k-space First Order 3D simulation.
213  * It launches calculation on a given dataset going through
214  * FFT initialization, pre-processing, main loop and post-processing phases.
215  *
216  */
218 {
220 
221  fprintf(stdout,"FFT plans creation.........."); fflush(stdout);
222 
224 
225  fprintf(stdout,"Done \n");
226  fprintf(stdout,"Pre-processing phase........"); fflush(stdout);
227 
228 
230 
231  fprintf(stdout,"Done \n");
232  fprintf(stdout,"Current memory in use:%8ldMB\n", ShowMemoryUsageInMB());
234 
235  fprintf(stdout,"Elapsed time: %8.2fs\n",PreProcessingTime.GetElapsedTime());
236 
238 
240 
242 
244 
246  { // Checkpoint
247  fprintf(stdout,"-------------------------------------------------------------\n");
248  fprintf(stdout,".............. Interrupted to checkpoint! ...................\n");
249  fprintf(stdout,"Number of time steps completed: %10ld\n", Parameters->Get_t_index());
250  fprintf(stdout,"Elapsed time: %8.2fs\n",SimulationTime.GetElapsedTime());
251  fprintf(stdout,"-------------------------------------------------------------\n");
252  fprintf(stdout,"Checkpoint in progress......"); fflush(stdout);
253 
255  }
256  else
257  { // Finish
258  fprintf(stdout,"-------------------------------------------------------------\n");
259  fprintf(stdout,"Elapsed time: %8.2fs\n",SimulationTime.GetElapsedTime());
260  fprintf(stdout,"-------------------------------------------------------------\n");
261  fprintf(stdout,"Post-processing phase......."); fflush(stdout);
262 
263  PostPorcessing();
264 
265  // if checkpointing is enabled and the checkpoint file was created created in the past, delete it
267  {
268  std::remove(Parameters->GetCheckpointFileName().c_str());
270  }
271  }
272 
274 
275  fprintf(stdout,"Done \n");
276  fprintf(stdout,"Elapsed time: %8.2fs\n",PostProcessingTime.GetElapsedTime());
277 
279 
281 }// end of Compute()
282 //------------------------------------------------------------------------------
283 
284 
285 
286 
287 /**
288  * Print parameters of the simulation.
289  * @param [in,out] file - where to print the parameters
290  */
292 {
293  fprintf(file,"Domain dims: [%4ld, %4ld,%4ld]\n",
297 
298  fprintf(file,"Simulation time steps: %8ld\n", Parameters->Get_Nt());
299 }// end of PrintParametersOfSimulation
300 //------------------------------------------------------------------------------
301 
302 
303 
304 /**
305  * Get peak memory usage.
306  * @return Peak memory usage in MBs.
307  *
308  */
310 {
311  // Linux build
312  #ifdef __linux__
313  struct rusage mem_usage;
314  getrusage(RUSAGE_SELF, &mem_usage);
315 
316  return mem_usage.ru_maxrss >> 10;
317  #endif
318 
319  // Windows build
320  #ifdef _WIN64
321  HANDLE hProcess;
322  PROCESS_MEMORY_COUNTERS pmc;
323 
324  hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ,
325  FALSE,
326  GetCurrentProcessId());
327 
328  GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc));
329  CloseHandle(hProcess);
330 
331  return pmc.PeakWorkingSetSize >> 20;
332  #endif
333 }// end of ShowMemoryUsageInMB
334 //------------------------------------------------------------------------------
335 
336 
337 /**
338  * Print Full code name and the license.
339  * @param [in] file - file to print the data (stdout)
340  */
342 {
343  fprintf(file,"\n");
344  fprintf(file,"+----------------------------------------------------+\n");
345  fprintf(file,"| Build name: kspaceFirstOrder3D v2.15 |\n");
346  fprintf(file,"| Build date: %*.*s |\n", 10,11,__DATE__);
347  fprintf(file,"| Build time: %*.*s |\n", 8,8,__TIME__);
348  #if (defined (__KWAVE_GIT_HASH__))
349  fprintf(file,"| Git hash: %s |\n",__KWAVE_GIT_HASH__);
350  #endif
351  fprintf(file,"| |\n");
352 
353  // OS detection
354  #ifdef __linux__
355  fprintf(file,"| Operating system: Linux x64 |\n");
356  #endif
357  #ifdef _WIN64
358  fprintf(file,"| Operating system: Windows x64 |\n");
359  #endif
360 
361  // Compiler detections
362  #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
363  fprintf(file,"| Compiler name: GNU C++ %.19s |\n", __VERSION__);
364  #endif
365  #ifdef __INTEL_COMPILER
366  fprintf(file,"| Compiler name: Intel C++ %d |\n", __INTEL_COMPILER);
367  #endif
368 
369  // instruction set
370  #if (defined (__AVX2__))
371  fprintf(file,"| Instruction set: Intel AVX 2 |\n");
372  #elif (defined (__AVX__))
373  fprintf(file,"| Instruction set: Intel AVX |\n");
374  #elif (defined (__SSE4_2__))
375  fprintf(file,"| Instruction set: Intel SSE 4.2 |\n");
376  #elif (defined (__SSE4_1__))
377  fprintf(file,"| Instruction set: Intel SSE 4.1 |\n");
378  #elif (defined (__SSE3__))
379  fprintf(file,"| Instruction set: Intel SSE 3 |\n");
380  #elif (defined (__SSE2__))
381  fprintf(file,"| Instruction set: Intel SSE 2 |\n");
382  #endif
383 
384  fprintf(file,"| |\n");
385  fprintf(file,"| Copyright (C) 2014 Jiri Jaros and Bradley Treeby |\n");
386  fprintf(file,"| http://www.k-wave.org |\n");
387  fprintf(file,"+----------------------------------------------------+\n");
388  fprintf(file,"\n");
389 }// end of GetFullCodeAndLincence
390 //------------------------------------------------------------------------------
391 
392 /**
393  * Set processor affinity.
394  */
396 {
397  // Linux Build
398  #ifdef __linux__
399  //GNU compiler
400  #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
401  setenv("OMP_PROC_BIND","TRUE",1);
402  #endif
403 
404  #ifdef __INTEL_COMPILER
405  setenv("KMP_AFFINITY","none",1);
406  #endif
407  #endif
408 
409  // Windows build is always compiled by the Intel Compiler
410  #ifdef _WIN64
411  _putenv_s("KMP_AFFINITY","none");
412  #endif
413 }//end of SetProcessorAffinity
414 //------------------------------------------------------------------------------
415 
416 
417 //----------------------------------------------------------------------------//
418 // Protected methods //
419 //----------------------------------------------------------------------------//
420 
421 /**
422  * Initialize FFTW plans.
423  *
424  */
426 {
427  // initialization of FFTW library
428  #ifdef _OPENMP
429  fftwf_init_threads();
430  fftwf_plan_with_nthreads(Parameters->GetNumberOfThreads());
431  #endif
432 
433  // The simulation does not use checkpointing or this is the first turn
434  bool RecoverFromPrevState = (Parameters->IsCheckpointEnabled() &&
436 
437  // import FFTW wisdom if it is here
438  if (RecoverFromPrevState)
439  {
440  // try to find the wisdom in the file that has the same name as the checkpoint file (different extension)
442  }
443 
444  // create real to complex plans
448 
449  // create real to complex plans
453 
454  // if necessary, create 1D shift plans.
455  // in this case, the matrix has a bit bigger dimensions to be able to store
456  // shifted matrices.
457  if (TParameters::GetInstance()->IsStore_u_non_staggered_raw())
458  {
459  // X shifts
462 
463  // Y shifts
466 
467  // Z shifts
470  }// end u_non_staggered
471 }// end of InitializeFFTWPlans
472 //------------------------------------------------------------------------------
473 
474 
475 
476 /**
477  * Compute pre-processing phase \n
478  * Initialize all indices, pre-compute constants such as c^2, rho0_sg* x dt
479  * and create kappa, absorb_eta, absorb_tau, absorb_nabla1, absorb_nabla2 matrices.
480  */
482 {
483  // get the correct sensor mask and recompute indices
484  if (Parameters->Get_sensor_mask_type() == TParameters::smt_index)
485  {
487  }
488 
489  if (Parameters->Get_sensor_mask_type() == TParameters::smt_corners)
490  {
492  }
493 
494 
495  if ((Parameters->Get_transducer_source_flag() != 0) ||
496  (Parameters->Get_ux_source_flag() != 0) ||
497  (Parameters->Get_uy_source_flag() != 0) ||
499  )
500  {
502  }
503 
505  {
507  }
508 
509  if (Parameters->Get_p_source_flag() != 0)
510  {
512  }
513 
514 
515  // compute dt / rho0_sg...
517  { // rho is scalar
521  }
522  else
523  { // non-uniform grid cannot be pre-calculated :-(
524  // rho is matrix
526  {
528  }
529  else
530  {
534  }
535  }
536 
537  // generate different matrices
538  if (Parameters->Get_absorbing_flag() != 0)
539  {
542  }
543  else
544  {
545  Generate_kappa();
546  }
547 
548  // calculate c^2. It has to be after kappa gen... because of c modification
549  Compute_c2();
550 }// end of PreProcessingPhase
551 //------------------------------------------------------------------------------
552 
553 
554 /**
555  * Generate kappa matrix for lossless case.
556  *
557  */
559 {
560  #pragma omp parallel
561  {
562  const float dx_sq_rec = 1.0f / (Parameters->Get_dx()*Parameters->Get_dx());
563  const float dy_sq_rec = 1.0f / (Parameters->Get_dy()*Parameters->Get_dy());
564  const float dz_sq_rec = 1.0f / (Parameters->Get_dz()*Parameters->Get_dz());
565 
566  const float c_ref_dt_pi = Parameters->Get_c_ref() * Parameters->Get_dt() * float(M_PI);
567 
568  const float Nx_rec = 1.0f / (float) Parameters->GetFullDimensionSizes().X;
569  const float Ny_rec = 1.0f / (float) Parameters->GetFullDimensionSizes().Y;
570  const float Nz_rec = 1.0f / (float) Parameters->GetFullDimensionSizes().Z;
571 
572 
573  const size_t X_Size = Parameters->GetReducedDimensionSizes().X;
574  const size_t Y_Size = Parameters->GetReducedDimensionSizes().Y;
575  const size_t Z_Size = Parameters->GetReducedDimensionSizes().Z;
576 
577  float * kappa = Get_kappa().GetRawData();
578 
579  #pragma omp for schedule (static)
580  for (size_t z = 0; z < Z_Size; z++)
581  {
582  const float z_f = (float) z;
583  float z_part = 0.5f - fabs(0.5f - z_f * Nz_rec );
584  z_part = (z_part * z_part) * dz_sq_rec;
585 
586  for (size_t y = 0; y < Y_Size; y++)
587  {
588  const float y_f = (float) y;
589  float y_part = 0.5f - fabs(0.5f - y_f * Ny_rec);
590  y_part = (y_part * y_part) * dy_sq_rec;
591 
592  const float yz_part = z_part + y_part;
593  for (size_t x = 0; x < X_Size; x++)
594  {
595  const float x_f = (float) x;
596  float x_part = 0.5f - fabs(0.5f - x_f * Nx_rec);
597  x_part = (x_part * x_part) * dx_sq_rec;
598 
599  float k = c_ref_dt_pi * sqrtf(x_part + yz_part);
600 
601  // kappa element
602  kappa[(z*Y_Size + y) * X_Size + x ] = (k == 0.0f) ? 1.0f : sin(k)/k;
603  }//x
604  }//y
605  }// z
606  }// parallel
607 }// end of Generate_kappa
608 //------------------------------------------------------------------------------
609 
610 /**
611  * Generate kappa, absorb_nabla1, absorb_nabla2 for absorbing media.
612  *
613  */
615 {
616  #pragma omp parallel
617  {
618  const float dx_sq_rec = 1.0f / (Parameters->Get_dx()*Parameters->Get_dx());
619  const float dy_sq_rec = 1.0f / (Parameters->Get_dy()*Parameters->Get_dy());
620  const float dz_sq_rec = 1.0f / (Parameters->Get_dz()*Parameters->Get_dz());
621 
622  const float c_ref_dt_2 = Parameters->Get_c_ref() * Parameters->Get_dt() * 0.5f;
623  const float pi_2 = float(M_PI) * 2.0f;
624 
625  const size_t Nx = Parameters->GetFullDimensionSizes().X;
626  const size_t Ny = Parameters->GetFullDimensionSizes().Y;
627  const size_t Nz = Parameters->GetFullDimensionSizes().Z;
628 
629  const float Nx_rec = 1.0f / (float) Nx;
630  const float Ny_rec = 1.0f / (float) Ny;
631  const float Nz_rec = 1.0f / (float) Nz;
632 
633  const size_t X_Size = Parameters->GetReducedDimensionSizes().X;
634  const size_t Y_Size = Parameters->GetReducedDimensionSizes().Y;
635  const size_t Z_Size = Parameters->GetReducedDimensionSizes().Z;
636 
637  float * kappa = Get_kappa().GetRawData();
638  float * absorb_nabla1 = Get_absorb_nabla1().GetRawData();
639  float * absorb_nabla2 = Get_absorb_nabla2().GetRawData();
640  const float alpha_power = Parameters->Get_alpha_power();
641 
642  #pragma omp for schedule (static)
643  for (size_t z = 0; z < Z_Size; z++)
644  {
645  const float z_f = (float) z;
646  float z_part = 0.5f - fabs(0.5f - z_f * Nz_rec );
647  z_part = (z_part * z_part) * dz_sq_rec;
648 
649  for (size_t y = 0; y < Y_Size; y++)
650  {
651  const float y_f = (float) y;
652  float y_part = 0.5f - fabs(0.5f - y_f * Ny_rec);
653  y_part = (y_part * y_part) * dy_sq_rec;
654 
655  const float yz_part = z_part + y_part;
656 
657  size_t i = (z*Y_Size + y) * X_Size;
658 
659  for (size_t x = 0; x < X_Size; x++)
660  {
661  const float x_f = (float) x;
662 
663  float x_part = 0.5f - fabs(0.5f - x_f * Nx_rec);
664  x_part = (x_part * x_part) * dx_sq_rec;
665 
666 
667  float k = pi_2 * sqrt(x_part + yz_part);
668  float c_ref_k = c_ref_dt_2 * k;
669 
670  absorb_nabla1[i] = pow(k, alpha_power - 2);
671  absorb_nabla2[i] = pow(k, alpha_power - 1);
672 
673  kappa[i] = (c_ref_k == 0.0f) ? 1.0f : sin(c_ref_k)/c_ref_k;
674 
675  if (absorb_nabla1[i] == std::numeric_limits<float>::infinity()) absorb_nabla1[i] = 0.0f;
676  if (absorb_nabla2[i] == std::numeric_limits<float>::infinity()) absorb_nabla2[i] = 0.0f;
677 
678  i++;
679  }//x
680  }//y
681  }// z
682  }// parallel
683 }// end of Generate_kappa_absorb_nabla1_absorb_nabla2
684 //------------------------------------------------------------------------------
685 
686 /**
687  * Generate absorb_tau and absorb_eta in for heterogenous media.
688  */
690 {
691  // test for scalars
693  {
694  const float alpha_power = Parameters->Get_alpha_power();
695  const float tan_pi_y_2 = tan(float(M_PI_2)* alpha_power);
696  const float alpha_db_neper_coeff = (100.0f * pow(1.0e-6f / (2.0f * (float) M_PI), alpha_power)) /
697  (20.0f * (float) M_LOG10E);
698 
699  const float alpha_coeff_2 = 2.0f * Parameters->Get_alpha_coeff_scallar() * alpha_db_neper_coeff;
700 
701  Parameters->Get_absorb_tau_scalar() = (-alpha_coeff_2) * pow(Parameters->Get_c0_scalar(),alpha_power-1);
702  Parameters->Get_absorb_eta_scalar() = alpha_coeff_2 * pow(Parameters->Get_c0_scalar(),alpha_power) * tan_pi_y_2;
703  }
704  else
705  {
706  #pragma omp parallel
707  {
708  const size_t Z_Size = Parameters->GetFullDimensionSizes().Z;
709  const size_t Y_Size = Parameters->GetFullDimensionSizes().Y;
710  const size_t X_Size = Parameters->GetFullDimensionSizes().X;
711 
712  float * absorb_tau = Get_absorb_tau().GetRawData();
713  float * absorb_eta = Get_absorb_eta().GetRawData();
714 
715  float * alpha_coeff;
716  size_t alpha_shift;
717 
719  {
720  alpha_coeff = &(Parameters->Get_alpha_coeff_scallar());
721  alpha_shift = 0;
722  }
723  else
724  {
725  alpha_coeff = Get_Temp_1_RS3D().GetRawData();
726  alpha_shift = 1;
727  }
728 
729  float * c0;
730  size_t c0_shift;
732  {
733  c0 = &(Parameters->Get_c0_scalar());
734  c0_shift = 0;
735  }
736  else
737  {
738  c0 = Get_c2().GetRawData();
739  c0_shift = 1;
740  }
741 
742  const float alpha_power = Parameters->Get_alpha_power();
743  const float tan_pi_y_2 = tan(float(M_PI_2)* alpha_power);
744 
745  //alpha = 100*alpha.*(1e-6/(2*pi)).^y./
746  // (20*log10(exp(1)));
747  const float alpha_db_neper_coeff = (100.0f * pow(1.0e-6f / (2.0f * (float) M_PI), alpha_power)) /
748  (20.0f * (float) M_LOG10E);
749 
750  #pragma omp for schedule (static)
751  for (size_t z = 0; z < Z_Size; z++)
752  {
753  for (size_t y = 0; y < Y_Size; y++)
754  {
755  size_t i = (z*Y_Size + y) * X_Size;
756  for (size_t x = 0; x < X_Size; x++)
757  {
758  const float alpha_coeff_2 = 2.0f * alpha_coeff[i * alpha_shift] * alpha_db_neper_coeff;
759  absorb_tau[i] = (-alpha_coeff_2) * pow(c0[i * c0_shift],alpha_power-1);
760  absorb_eta[i] = alpha_coeff_2 * pow(c0[i * c0_shift],alpha_power) * tan_pi_y_2;
761  i++;
762  }//x
763  }//y
764  }// z
765  }// parallel
766  } // absorb_tau and aborb_eta = matrics
767 }// end of Generate_absorb_tau_absorb_eta_matrix
768 //------------------------------------------------------------------------------
769 
770 /**
771  * Prepare dt./ rho0 for non-uniform grid.
772  *
773  */
775 {
776  #pragma omp parallel
777  {
778  float * dt_rho0_sgx = Get_dt_rho0_sgx().GetRawData();
779  float * dt_rho0_sgy = Get_dt_rho0_sgy().GetRawData();
780  float * dt_rho0_sgz = Get_dt_rho0_sgz().GetRawData();
781 
782  const float dt = Parameters->Get_dt();
783 
784  const float * duxdxn_sgx = Get_dxudxn_sgx().GetRawData();
785  const float * duydyn_sgy = Get_dyudyn_sgy().GetRawData();
786  const float * duzdzn_sgz = Get_dzudzn_sgz().GetRawData();
787 
788  const size_t Z_Size = Get_dt_rho0_sgx().GetDimensionSizes().Z;
789  const size_t Y_Size = Get_dt_rho0_sgx().GetDimensionSizes().Y;
790  const size_t X_Size = Get_dt_rho0_sgx().GetDimensionSizes().X;
791 
792  const size_t SliceSize = (X_Size * Y_Size );
793 
794  #pragma omp for schedule (static)
795  for (size_t z = 0; z < Z_Size; z++)
796  {
797  register size_t i = z * SliceSize;
798  for (size_t y = 0; y < Y_Size; y++)
799  {
800  for (size_t x = 0; x < X_Size; x++)
801  {
802  dt_rho0_sgx[i] = (dt * duxdxn_sgx[x]) / dt_rho0_sgx[i];
803  i++;
804  } // x
805  } // y
806  } // z
807 
808  #pragma omp for schedule (static)
809  for (size_t z = 0; z < Z_Size; z++)
810  {
811  register size_t i = z * SliceSize;
812  for (size_t y = 0; y < Y_Size; y++)
813  {
814  const float duydyn_el = duydyn_sgy[y];
815  for (size_t x = 0; x < X_Size; x++)
816  {
817  dt_rho0_sgy[i] = (dt * duydyn_el) / dt_rho0_sgy[i];
818  i++;
819  } // x
820  } // y
821  } // z
822 
823  #pragma omp for schedule (static)
824  for (size_t z = 0; z < Z_Size; z++)
825  {
826  register size_t i = z* SliceSize;
827  const float duzdzn_el = duzdzn_sgz[z];
828  for (size_t y = 0; y < Y_Size; y++)
829  {
830  for (size_t x = 0; x < X_Size; x++)
831  {
832  dt_rho0_sgz[i] = (dt * duzdzn_el) / dt_rho0_sgz[i];
833  i++;
834  } // x
835  } // y
836  } // z
837  } // parallel
838 }// end of Calculate_dt_rho0_non_uniform
839 //------------------------------------------------------------------------------
840 
841 /**
842  * Calculate p0 source when necessary.
843  *
844  */
846 {
848 
849  const float * p0 = Get_p0_source_input().GetRawData();
850 
851  float * c2;
852  size_t c2_shift;
853 
855  {
856  c2 = &Parameters->Get_c0_scalar();
857  c2_shift = 0;
858  }
859  else
860  {
861  c2 = Get_c2().GetRawData();
862  c2_shift = 1;
863  }
864 
865  float * rhox = Get_rhox().GetRawData();
866  float * rhoy = Get_rhoy().GetRawData();
867  float * rhoz = Get_rhoz().GetRawData();
868 
869  // add the initial pressure to rho as a mass source
870  float tmp;
871 
872  #pragma omp parallel for schedule (static) private(tmp)
873  for (size_t i = 0; i < Get_rhox().GetTotalElementCount(); i++)
874  {
875  tmp = p0[i] / (3.0f* c2[i * c2_shift]);
876  rhox[i] = tmp;
877  rhoy[i] = tmp;
878  rhoz[i] = tmp;
879  }
880 
881  //------------------------------------------------------------------------//
882  //-- compute u(t = t1 + dt/2) based on the assumption u(dt/2) = -u(-dt/2) --//
883  //-- which forces u(t = t1) = 0 --//
884  //------------------------------------------------------------------------//
887  Get_kappa(),
889  );
890 
892  {
894  { // non uniform grid
896  Get_dxudxn_sgx(),
897  Get_FFT_X_temp());
899  Get_dyudyn_sgy(),
900  Get_FFT_Y_temp());
902  Get_dzudzn_sgz(),
903  Get_FFT_Z_temp());
904  }
905  else
906  { //uniform grid, heterogeneous
910  }
911  }
912  else
913  { // homogeneous, non-unifrom grid
914  // divide the matrix by 2 and multiply with st./rho0_sg
918  }
919 }// end of Calculate_p0_source
920 //------------------------------------------------------------------------------
921 
922 /**
923  * Compute c^2.
924  *
925  */
927 {
929  { //scalar
930  float c = Parameters->Get_c0_scalar();
931  Parameters->Get_c0_scalar() = c * c;
932  }
933  else
934  {
935  float * c2 = Get_c2().GetRawData();
936 
937  #pragma omp parallel for schedule (static)
938  for (size_t i=0; i< Get_c2().GetTotalElementCount(); i++)
939  {
940  c2[i] = c2[i] * c2[i];
941  }
942  }// matrix
943 }// ComputeC2
944 //------------------------------------------------------------------------------
945 
946 /**
947  * Compute part of the new velocity term - gradient of p
948  * represented by:
949  * bsxfun(\@times, ddx_k_shift_pos, kappa .* p_k)
950  *
951  * @param [in] X_Matrix - 3D pressure matrix
952  * @param [out] FFT_X - matrix to store input for iFFT (p) /dx
953  * @param [out] FFT_Y - matrix to store input for iFFT (p) /dy
954  * @param [out] FFT_Z - matrix to store input for iFFT (p) /dz
955  *
956  * @param [in] kappa - Real matrix of kappa
957  *
958  * @param [in] ddx - precomputed value of ddx_k_shift_pos
959  * @param [in] ddy - precomputed value of ddy_k_shift_pos
960  * @param [in] ddz - precomputed value of ddz_k_shift_pos
961  */
963  TFFTWComplexMatrix& FFT_X,
964  TFFTWComplexMatrix& FFT_Y,
965  TFFTWComplexMatrix& FFT_Z,
966  const TRealMatrix& kappa,
967  const TComplexMatrix& ddx,
968  const TComplexMatrix& ddy,
969  const TComplexMatrix& ddz)
970 {
971  // Compute FFT of X
972  FFT_X.Compute_FFT_3D_R2C(X_Matrix);
973 
974  #pragma omp parallel
975  {
976  float * p_k_x_data = FFT_X.GetRawData();
977  float * p_k_y_data = FFT_Y.GetRawData();
978  float * p_k_z_data = FFT_Z.GetRawData();
979 
980  const float * kappa_data = kappa.GetRawData();
981  const float * ddx_data = ddx.GetRawData();
982  const float * ddy_data = ddy.GetRawData();
983  const float * ddz_data = ddz.GetRawData();
984 
985  const size_t Z_Size = FFT_X.GetDimensionSizes().Z;
986  const size_t Y_Size = FFT_X.GetDimensionSizes().Y;
987  const size_t X_Size = FFT_X.GetDimensionSizes().X;
988 
989  const size_t SliceSize = (X_Size * Y_Size ) << 1;
990 
991  #pragma omp for schedule (static)
992  for (size_t z = 0; z < Z_Size; z++)
993  {
994  register size_t i = z * SliceSize;
995  const size_t z2 = z<<1;
996  for (size_t y = 0; y < Y_Size; y++)
997  {
998  const size_t y2 = y<<1;
999  for (size_t x = 0; x < X_Size; x++)
1000  {
1001  // kappa ./ p_k
1002  const float kappa_el = kappa_data[i>>1];
1003  const float p_k_el_re = p_k_x_data[i] * kappa_el;
1004  const float p_k_el_im = p_k_x_data[i+1] * kappa_el;
1005  const size_t x2 = x<<1;
1006 
1007  //bsxfun(ddx...)
1008  p_k_x_data[i] = p_k_el_re * ddx_data[x2] - p_k_el_im * ddx_data[x2+1];
1009  p_k_x_data[i+1] = p_k_el_re * ddx_data[x2+1] + p_k_el_im * ddx_data[x2];
1010 
1011  //bsxfun(ddy...)
1012  p_k_y_data[i] = p_k_el_re * ddy_data[y2] - p_k_el_im * ddy_data[y2+1];
1013  p_k_y_data[i+1] = p_k_el_re * ddy_data[y2+1] + p_k_el_im * ddy_data[y2];
1014 
1015  //bsxfun(ddz...)
1016  p_k_z_data[i] = p_k_el_re * ddz_data[z2] - p_k_el_im * ddz_data[z2+1];
1017  p_k_z_data[i+1] = p_k_el_re * ddz_data[z2+1] + p_k_el_im * ddz_data[z2];
1018 
1019  i +=2;
1020  } // x
1021  } // y
1022  } // z
1023  }// parallel
1024 }// end of KSpaceFirstOrder3DSolver
1025 //------------------------------------------------------------------------------
1026 
1027 
1028 /**
1029  * Compute new values for duxdx, duydy, duzdz.
1030  *
1031  */
1033 {
1037 
1038  #pragma omp parallel
1039  {
1040  float * Temp_FFT_X_Data = Get_FFT_X_temp().GetRawData();
1041  float * Temp_FFT_Y_Data = Get_FFT_Y_temp().GetRawData();
1042  float * Temp_FFT_Z_Data = Get_FFT_Z_temp().GetRawData();
1043 
1044  const float * kappa = Get_kappa().GetRawData();
1045 
1046  const size_t FFT_Z_dim = Get_FFT_X_temp().GetDimensionSizes().Z;
1047  const size_t FFT_Y_dim = Get_FFT_X_temp().GetDimensionSizes().Y;
1048  const size_t FFT_X_dim = Get_FFT_X_temp().GetDimensionSizes().X;
1049 
1050  const size_t SliceSize = (FFT_X_dim * FFT_Y_dim) << 1;
1051  const float Divider = 1.0f / Get_ux_sgx().GetTotalElementCount();
1052 
1056 
1057 
1058  #pragma omp for schedule (static)
1059  for (size_t z = 0; z < FFT_Z_dim; z++)
1060  {
1061  register size_t i = z * SliceSize;
1062 
1063  const float ddz_neg_re = ddz[z].real;
1064  const float ddz_neg_im = ddz[z].imag;
1065  for (size_t y = 0; y < FFT_Y_dim; y++)
1066  {
1067  const float ddy_neg_re = ddy[y].real;
1068  const float ddy_neg_im = ddy[y].imag;
1069  for (size_t x = 0; x < FFT_X_dim; x++)
1070  {
1071  float FFT_el_x_re = Temp_FFT_X_Data[i];
1072  float FFT_el_x_im = Temp_FFT_X_Data[i+1];
1073 
1074  float FFT_el_y_re = Temp_FFT_Y_Data[i];
1075  float FFT_el_y_im = Temp_FFT_Y_Data[i+1];
1076 
1077  float FFT_el_z_re = Temp_FFT_Z_Data[i];
1078  float FFT_el_z_im = Temp_FFT_Z_Data[i+1];
1079 
1080  const float kappa_el = kappa[i >> 1];
1081 
1082  FFT_el_x_re *= kappa_el;
1083  FFT_el_x_im *= kappa_el;
1084 
1085  FFT_el_y_re *= kappa_el;
1086  FFT_el_y_im *= kappa_el;
1087 
1088  FFT_el_z_re *= kappa_el;
1089  FFT_el_z_im *= kappa_el;
1090 
1091 
1092  Temp_FFT_X_Data[i] = ((FFT_el_x_re * ddx[x].real) -
1093  (FFT_el_x_im * ddx[x].imag)
1094  ) * Divider;
1095  Temp_FFT_X_Data[i + 1] = ((FFT_el_x_im * ddx[x].real) +
1096  (FFT_el_x_re * ddx[x].imag)
1097  )* Divider;
1098 
1099  Temp_FFT_Y_Data[i] = ((FFT_el_y_re * ddy_neg_re) -
1100  (FFT_el_y_im * ddy_neg_im)
1101  ) * Divider;
1102  Temp_FFT_Y_Data[i + 1] = ((FFT_el_y_im * ddy_neg_re) +
1103  (FFT_el_y_re * ddy_neg_im)
1104  )* Divider;
1105 
1106  Temp_FFT_Z_Data[i] = ((FFT_el_z_re * ddz_neg_re) -
1107  (FFT_el_z_im * ddz_neg_im)
1108  ) * Divider;
1109  Temp_FFT_Z_Data[i + 1] = ((FFT_el_z_im * ddz_neg_re) +
1110  (FFT_el_z_re * ddz_neg_im)
1111  )* Divider;
1112 
1113  i+=2;
1114  } // x
1115  } // y
1116  } // z
1117  } // parallel;
1118 
1122 
1123  //-------------------------------------------------------------------------//
1124  //--------------------- Non linear grid -----------------------------------//
1125  //-------------------------------------------------------------------------//
1127  {
1128  #pragma omp parallel
1129  {
1130  float * duxdx = Get_duxdx().GetRawData();
1131  float * duydy = Get_duydy().GetRawData();
1132  float * duzdz = Get_duzdz().GetRawData();
1133 
1134  const float * duxdxn = Get_dxudxn().GetRawData();
1135  const float * duydyn = Get_dyudyn().GetRawData();
1136  const float * duzdzn = Get_dzudzn().GetRawData();
1137 
1138  const size_t Z_Size = Get_duxdx().GetDimensionSizes().Z;
1139  const size_t Y_Size = Get_duxdx().GetDimensionSizes().Y;
1140  const size_t X_Size = Get_duxdx().GetDimensionSizes().X;
1141 
1142  const size_t SliceSize = (X_Size * Y_Size );
1143 
1144  #pragma omp for schedule (static)
1145  for (size_t z = 0; z < Z_Size; z++)
1146  {
1147  register size_t i = z* SliceSize;
1148  for (size_t y = 0; y < Y_Size; y++)
1149  {
1150  for (size_t x = 0; x < X_Size; x++)
1151  {
1152  duxdx[i] *= duxdxn[x];
1153  i++;
1154  } // x
1155  } // y
1156  } // z
1157 
1158  #pragma omp for schedule (static)
1159  for (size_t z = 0; z < Z_Size; z++)
1160  {
1161  register size_t i = z * SliceSize;
1162  for (size_t y = 0; y < Y_Size; y++)
1163  {
1164  const float dyudyn_el = duydyn[y];
1165  for (size_t x = 0; x < X_Size; x++)
1166  {
1167  duydy[i] *= dyudyn_el;
1168  i++;
1169  } // x
1170  } // y
1171  } // z
1172 
1173  #pragma omp for schedule (static)
1174  for (size_t z = 0; z < Z_Size; z++)
1175  {
1176  const float duzdzn_el = duzdzn[z];
1177  register size_t i = z * SliceSize;
1178  for (size_t y = 0; y < Y_Size; y++)
1179  {
1180  for (size_t x = 0; x < X_Size; x++)
1181  {
1182  duzdz[i] *= duzdzn_el;
1183  i++;
1184  } // x
1185  } // y
1186  } // z
1187  } // parallel
1188  }// nonlinear
1189 }// end of Compute_duxyz
1190 //------------------------------------------------------------------------------
1191 
1192 
1193 /**
1194  * Calculate new values of rhox, rhoy and rhoz for non-linear case.
1195  *
1196  */
1198 {
1199  const size_t Z_Size = Get_rhox().GetDimensionSizes().Z;
1200  const size_t Y_Size = Get_rhox().GetDimensionSizes().Y;
1201  const size_t X_Size = Get_rhox().GetDimensionSizes().X;
1202 
1203  const float dt2 = 2.0f * Parameters->Get_dt();
1204  const float dt_el = Parameters->Get_dt();
1205  const size_t SliceSize = Y_Size * X_Size;
1206 
1207  #pragma omp parallel
1208  {
1209  float * rhox_data = Get_rhox().GetRawData();
1210  float * rhoy_data = Get_rhoy().GetRawData();
1211  float * rhoz_data = Get_rhoz().GetRawData();
1212 
1213  const float * pml_x_data = Get_pml_x().GetRawData();
1214  const float * duxdx_data = Get_duxdx().GetRawData();
1215  const float * duydy_data = Get_duydy().GetRawData();
1216  const float * duzdz_data = Get_duzdz().GetRawData();
1217 
1218  // rho0 is a scalar
1219  if (Parameters->Get_rho0_scalar())
1220  {
1221  const float dt_rho0 = Parameters->Get_rho0_scalar() * dt_el;
1222 
1223  #pragma omp for schedule (static)
1224  for (size_t z = 0; z < Z_Size; z++)
1225  {
1226  register size_t i = z * SliceSize;
1227  for (size_t y = 0; y < Y_Size; y++)
1228  {
1229  for (size_t x = 0; x < X_Size; x++)
1230  {
1231  const float pml_x = pml_x_data[x];
1232  float du = duxdx_data[i];
1233 
1234  rhox_data[i] = pml_x * (
1235  ((pml_x * rhox_data[i]) - (dt_rho0 * du))/
1236  (1.0f + (dt2 * du))
1237  );
1238  i++;
1239  } // x
1240  }// y
1241  }// z
1242 
1243  #pragma omp for schedule (static)
1244  for (size_t z = 0; z < Z_Size; z++)
1245  {
1246  register size_t i = z * SliceSize;
1247  for (size_t y = 0; y < Y_Size; y++)
1248  {
1249  const float pml_y = Get_pml_y()[y];
1250  for (size_t x = 0; x < X_Size; x++)
1251  {
1252  float du = duydy_data[i];
1253 
1254  rhoy_data[i] = pml_y * (
1255  ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/
1256  (1.0f + (dt2 * du))
1257  );
1258  i++;
1259  } // x
1260  }// y
1261  }// z
1262 
1263 
1264  #pragma omp for schedule (static)
1265  for (size_t z = 0; z < Z_Size; z++)
1266  {
1267  register size_t i = z * SliceSize;
1268  const float pml_z = Get_pml_z()[z];
1269  for (size_t y = 0; y < Y_Size; y++)
1270  {
1271  for (size_t x = 0; x < X_Size; x++)
1272  {
1273  float du = duzdz_data[i];
1274 
1275  rhoz_data[i] = pml_z * (
1276  ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/
1277  (1.0f + (dt2 * du))
1278  );
1279  i++;
1280  } // x
1281  }// y
1282  }// z
1283  }
1284  else
1285  { //----------------------------------------------------------------//
1286  // rho0 is a matrix
1287  const float * rho0_data = Get_rho0().GetRawData();
1288 
1289  #pragma omp for schedule (static)
1290  for (size_t z = 0; z < Z_Size; z++)
1291  {
1292  register size_t i = z * SliceSize;
1293  for (size_t y = 0; y < Y_Size; y++)
1294  {
1295  for (size_t x = 0; x < X_Size; x++)
1296  {
1297  const float pml_x = pml_x_data[x];
1298  const float dt_rho0 = dt_el * rho0_data[i];
1299  float du = duxdx_data[i];
1300 
1301  rhox_data[i] = pml_x * (
1302  ((pml_x * rhox_data[i]) - (dt_rho0 * du))/
1303  (1.0f + (dt2 * du))
1304  );
1305  i++;
1306  } // x
1307  }// y
1308  }// z
1309 
1310  #pragma omp for schedule (static)
1311  for (size_t z = 0; z < Z_Size; z++)
1312  {
1313  register size_t i = z * SliceSize;
1314  for (size_t y = 0; y < Y_Size; y++)
1315  {
1316  const float pml_y = Get_pml_y()[y];
1317  for (size_t x = 0; x < X_Size; x++)
1318  {
1319  const float dt_rho0 = dt_el * rho0_data[i];
1320  float du = duydy_data[i];
1321 
1322  rhoy_data[i] = pml_y * (
1323  ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/
1324  (1.0f + (dt2 * du))
1325  );
1326  i++;
1327  } // x
1328  }// y
1329  }// z
1330 
1331  #pragma omp for schedule (static)
1332  for (size_t z = 0; z < Z_Size; z++)
1333  {
1334  register size_t i = z * SliceSize;
1335  const float pml_z = Get_pml_z()[z];
1336  for (size_t y = 0; y < Y_Size; y++)
1337  {
1338  for (size_t x = 0; x < X_Size; x++)
1339  {
1340  const float dt_rho0 = dt_el * rho0_data[i];
1341  float du = duzdz_data[i];
1342 
1343  rhoz_data[i] = pml_z * (
1344  ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/
1345  (1.0f + (dt2 * du))
1346  );
1347  i++;
1348  } // x
1349  }// y
1350  }// z
1351  } // end rho is matrix
1352  }// parallel
1353 }// end of Compute_rhoxyz_nonlinear
1354 //------------------------------------------------------------------------------
1355 
1356 
1357 /**
1358  * Calculate new values of rhox, rhoy and rhoz for linear case.
1359  *
1360  */
1362 {
1363  const size_t Z_Size = Get_rhox().GetDimensionSizes().Z;
1364  const size_t Y_Size = Get_rhox().GetDimensionSizes().Y;
1365  const size_t X_Size = Get_rhox().GetDimensionSizes().X;
1366 
1367  const float dt_el = Parameters->Get_dt();
1368  const size_t SliceSize = Y_Size * X_Size;
1369 
1370  #pragma omp parallel
1371  {
1372  float * rhox_data = Get_rhox().GetRawData();
1373  float * rhoy_data = Get_rhoy().GetRawData();
1374  float * rhoz_data = Get_rhoz().GetRawData();
1375 
1376  const float * pml_x_data = Get_pml_x().GetRawData();
1377  const float * duxdx_data = Get_duxdx().GetRawData();
1378  const float * duydy_data = Get_duydy().GetRawData();
1379  const float * duzdz_data = Get_duzdz().GetRawData();
1380 
1381 
1382  if (Parameters->Get_rho0_scalar())
1383  { // rho0 is a scalar
1384  const float dt_rho0 = Parameters->Get_rho0_scalar() * dt_el;
1385 
1386  #pragma omp for schedule (static)
1387  for (size_t z = 0; z < Z_Size; z++)
1388  {
1389  register size_t i = z * SliceSize;
1390  for (size_t y = 0; y < Y_Size; y++)
1391  {
1392  for (size_t x = 0; x < X_Size; x++)
1393  {
1394  const float pml_x = pml_x_data[x];
1395 
1396  rhox_data[i] = pml_x * (
1397  ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i]))
1398  );
1399  i++;
1400  } // x
1401  }// y
1402  }// z
1403 
1404  #pragma omp for schedule (static)
1405  for (size_t z = 0; z < Z_Size; z++)
1406  {
1407  register size_t i = z * SliceSize;
1408  for (size_t y = 0; y < Y_Size; y++)
1409  {
1410  const float pml_y = Get_pml_y()[y];
1411  for (size_t x = 0; x < X_Size; x++)
1412  {
1413  rhoy_data[i] = pml_y * (
1414  ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i]))
1415  );
1416  i++;
1417  } // x
1418  }// y
1419  }// z
1420 
1421  #pragma omp for schedule (static)
1422  for (size_t z = 0; z < Z_Size; z++)
1423  {
1424  register size_t i = z * SliceSize;
1425  const float pml_z = Get_pml_z()[z];
1426 
1427  for (size_t y = 0; y < Y_Size; y++)
1428  {
1429  for (size_t x = 0; x < X_Size; x++)
1430  {
1431  rhoz_data[i] = pml_z * (
1432  ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i]))
1433  );
1434  i++;
1435  } // x
1436  }// y
1437  }// z
1438 
1439  }
1440  else
1441  { //-----------------------------------------------------//
1442  // rho0 is a matrix
1443  const float * rho0_data = Get_rho0().GetRawData();
1444 
1445  #pragma omp for schedule (static)
1446  for (size_t z = 0; z < Z_Size; z++)
1447  {
1448  register size_t i = z * SliceSize;
1449  for (size_t y = 0; y < Y_Size; y++)
1450  {
1451  for (size_t x = 0; x < X_Size; x++)
1452  {
1453  const float pml_x = pml_x_data[x];
1454  const float dt_rho0 = dt_el * rho0_data[i];
1455 
1456  rhox_data[i] = pml_x * (
1457  ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i]))
1458  );
1459 
1460  i++;
1461  } // x
1462  }// y
1463  }// z
1464 
1465  #pragma omp for schedule (static)
1466  for (size_t z = 0; z < Z_Size; z++)
1467  {
1468  register size_t i = z * SliceSize;
1469  for (size_t y = 0; y < Y_Size; y++)
1470  {
1471  const float pml_y = Get_pml_y()[y];
1472  for (size_t x = 0; x < X_Size; x++)
1473  {
1474  const float dt_rho0 = dt_el * rho0_data[i];
1475 
1476  rhoy_data[i] = pml_y * (
1477  ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i]))
1478  );
1479  i++;
1480 
1481  } // x
1482  }// y
1483  }// z
1484 
1485 
1486  #pragma omp for schedule (static)
1487  for (size_t z = 0; z < Z_Size; z++)
1488  {
1489  register size_t i = z * SliceSize;
1490  const float pml_z = Get_pml_z()[z];
1491 
1492  for (size_t y = 0; y < Y_Size; y++)
1493  {
1494  for (size_t x = 0; x < X_Size; x++)
1495  {
1496  const float dt_rho0 = dt_el * rho0_data[i];
1497 
1498  rhoz_data[i] = pml_z * (
1499  ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i]))
1500  );
1501  i++;
1502  } // x
1503  }// y
1504  }// z
1505 
1506  } // end rho is a matrix
1507  }// parallel
1508 }// end of Compute_rhoxyz_linear
1509 //------------------------------------------------------------------------------
1510 
1511 
1512 /**
1513  * Calculate three temporary sums in the new pressure formula \n
1514  * non-linear absorbing case, SSE2 version.
1515  * @param [out] RHO_Temp - rhox_sgx + rhoy_sgy + rhoz_sgz
1516  * @param [out] BonA_Temp - BonA + rho ^2 / 2 rho0 + (rhox_sgx + rhoy_sgy + rhoz_sgz)
1517  * @param [out] Sum_du - rho0* (duxdx + duydy + duzdz)
1518  */
1520  TRealMatrix & BonA_Temp,
1521  TRealMatrix & Sum_du)
1522 {
1523  // step of 4
1524  const size_t TotalElementCount_4 = (RHO_Temp.GetTotalElementCount() >> 2) << 2;
1525 
1526  const float * rhox_data = Get_rhox().GetRawData();
1527  const float * rhoy_data = Get_rhoy().GetRawData();
1528  const float * rhoz_data = Get_rhoz().GetRawData();
1529 
1530  const float * dux_data = Get_duxdx().GetRawData();
1531  const float * duy_data = Get_duydy().GetRawData();
1532  const float * duz_data = Get_duzdz().GetRawData();
1533 
1534 
1535  // set BonA to be either scalar or a matrix
1536  float * BonA;
1537  size_t BonA_shift;
1538  const bool BonA_flag = Parameters->Get_BonA_scalar_flag();
1539 
1540  if (BonA_flag)
1541  {
1542  BonA = &Parameters->Get_BonA_scalar();
1543  BonA_shift = 0;
1544  }
1545  else
1546  {
1547  BonA = Get_BonA().GetRawData();
1548  BonA_shift = 1;
1549  }
1550 
1551 
1552  // set rho0A to be either scalar or a matrix
1553  float * rho0_data;
1554  size_t rho0_shift;
1555  const bool rho0_flag = Parameters->Get_rho0_scalar_flag();
1556 
1557  if (rho0_flag)
1558  {
1559  rho0_data = &Parameters->Get_rho0_scalar();
1560  rho0_shift = 0;
1561  }
1562  else
1563  {
1564  rho0_data = Get_rho0().GetRawData();
1565  rho0_shift = 1;
1566  }
1567 
1568  // compute loop
1569  #pragma omp parallel
1570  {
1571  float * RHO_Temp_Data = RHO_Temp.GetRawData();
1572  float * BonA_Temp_Data = BonA_Temp.GetRawData();
1573  float * SumDU_Temp_Data= Sum_du.GetRawData();
1574 
1575 
1576  const __m128 Two_SSE = _mm_set1_ps(2.0f);
1577  __m128 BonA_SSE = _mm_set1_ps(Parameters->Get_BonA_scalar());
1578  __m128 rho0_SSE = _mm_set1_ps(Parameters->Get_rho0_scalar());
1579 
1580 
1581  #pragma omp for schedule (static) nowait
1582  for (size_t i = 0; i < TotalElementCount_4; i+=4)
1583  {
1584  if (!BonA_flag) BonA_SSE = _mm_load_ps(&BonA[i]);
1585 
1586  __m128 xmm1 = _mm_load_ps(&rhox_data[i]);
1587  __m128 xmm2 = _mm_load_ps(&rhoy_data[i]);
1588  __m128 xmm3 = _mm_load_ps(&rhoz_data[i]);
1589 
1590  if (!rho0_flag) rho0_SSE = _mm_load_ps(&rho0_data[i]);
1591 
1592  __m128 rho_xyz_sq_SSE;
1593  __m128 rho_xyz_el_SSE;
1594 
1595  // register const float rho_xyz_el = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
1596  rho_xyz_el_SSE = _mm_add_ps(xmm1, xmm2);
1597  rho_xyz_el_SSE = _mm_add_ps(xmm3, rho_xyz_el_SSE);
1598 
1599  // RHO_Temp_Data[i] = rho_xyz_el;
1600  _mm_stream_ps(&RHO_Temp_Data[i], rho_xyz_el_SSE);
1601 
1602  // BonA_Temp_Data[i] = ((BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i])) + rho_xyz_el;
1603  rho_xyz_sq_SSE = _mm_mul_ps(rho_xyz_el_SSE, rho_xyz_el_SSE);// (rho_xyz_el * rho_xyz_el)
1604 
1605  xmm1 = _mm_mul_ps(rho_xyz_sq_SSE, BonA_SSE); //((BonA * (rho_xyz_el * rho_xyz_el))
1606  xmm2 = _mm_mul_ps(Two_SSE, rho0_SSE); // (2.0f * rho0_data[i])
1607  xmm3 = _mm_div_ps(xmm1, xmm2); // (BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i])
1608 
1609  xmm1 = _mm_add_ps(xmm3, rho_xyz_el_SSE); // + rho_xyz_el
1610 
1611  _mm_stream_ps(&BonA_Temp_Data[i], xmm1); //bypass cache
1612 
1613  xmm1 = _mm_load_ps(&dux_data[i]); //dudx
1614  xmm2 = _mm_load_ps(&duy_data[i]); //dudu
1615  xmm3 = _mm_load_ps(&duz_data[i]); //dudz
1616 
1617  __m128 xmm_acc = _mm_add_ps(xmm1, xmm2);
1618  xmm_acc = _mm_add_ps(xmm_acc, xmm3);
1619  xmm_acc = _mm_mul_ps(xmm_acc, rho0_SSE);
1620 
1621  _mm_stream_ps(&SumDU_Temp_Data[i],xmm_acc);
1622 
1623  // BonA_Temp_Data[i] = ((BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i])) + rho_xyz_el;
1624  }
1625 
1626  // non SSE code, in OpenMP only the last thread does this
1627  #ifdef _OPENMP
1628  if (omp_get_thread_num() == omp_get_num_threads() -1)
1629  #endif
1630  {
1631  for (size_t i = TotalElementCount_4; i < RHO_Temp.GetTotalElementCount() ; i++)
1632  {
1633  register const float rho_xyz_el = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
1634 
1635  RHO_Temp_Data[i] = rho_xyz_el;
1636  BonA_Temp_Data[i] = ((BonA[i * BonA_shift] * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i* rho0_shift])) + rho_xyz_el;
1637  SumDU_Temp_Data[i] = rho0_data[i * rho0_shift] * (dux_data[i] + duy_data[i] + duz_data[i]);
1638  }
1639  }
1640 
1641  }// parallel
1642  } // end of Calculate_SumRho_BonA_SumDu_SSE2
1643 //------------------------------------------------------------------------------
1644 
1645 
1646 
1647  /**
1648  * Calculate two temporary sums in the new pressure formula, linear absorbing case.
1649  * @param [out] Sum_rhoxyz - rhox_sgx + rhoy_sgy + rhoz_sgz
1650  * @param [out] Sum_rho0_du - rho0* (duxdx + duydy + duzdz);
1651  */
1653  TRealMatrix & Sum_rho0_du)
1654  {
1655  const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
1656 
1657  #pragma omp parallel
1658  {
1659  const float * rhox_data = Get_rhox().GetRawData();
1660  const float * rhoy_data = Get_rhoy().GetRawData();
1661  const float * rhoz_data = Get_rhoz().GetRawData();
1662 
1663  const float * dux_data = Get_duxdx().GetRawData();
1664  const float * duy_data = Get_duydy().GetRawData();
1665  const float * duz_data = Get_duzdz().GetRawData();
1666 
1667  const float * rho0_data = NULL;
1668 
1669  const float rho0_data_el = Parameters->Get_rho0_scalar();
1671  {
1672  rho0_data = Get_rho0().GetRawData();
1673  }
1674 
1675 
1676  float * Sum_rhoxyz_Data = Sum_rhoxyz.GetRawData();
1677  float * Sum_rho0_du_Data = Sum_rho0_du.GetRawData();
1678 
1679  #pragma omp for schedule (static)
1680  for (size_t i = 0; i < TotalElementCount; i++)
1681  {
1682  Sum_rhoxyz_Data[i] = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
1683  }
1684 
1685 
1687  { // scalar
1688  #pragma omp for schedule (static) nowait
1689  for (size_t i = 0; i < TotalElementCount; i++)
1690  {
1691  Sum_rho0_du_Data[i] = rho0_data_el * (dux_data[i] + duy_data[i] + duz_data[i]);
1692  }
1693  }
1694  else
1695  { // matrix
1696  #pragma omp for schedule (static) nowait
1697  for (size_t i = 0; i < TotalElementCount; i++)
1698  {
1699  Sum_rho0_du_Data[i] = rho0_data[i] * (dux_data[i] + duy_data[i] + duz_data[i]);
1700  }
1701  }
1702  } // parallel
1703 }// end of Calculate_SumRho_SumRhoDu
1704 //------------------------------------------------------------------------------
1705 
1706 
1707  /**
1708  * Compute absorbing term with abosrb_nabla1 and absorb_nabla2, SSE2 version. \n
1709  * Calculate absorb_nabla1 .* fft_1 \n
1710  * Calculate absorb_nabla2 .* fft_2 \n
1711  *
1712  * @param [in,out] FFT_1
1713  * @param [in,out] FFT_2
1714  */
1716  TFFTWComplexMatrix& FFT_2)
1717 {
1718  const float * nabla1 = Get_absorb_nabla1().GetRawData();
1719  const float * nabla2 = Get_absorb_nabla2().GetRawData();
1720 
1721  const size_t TotalElementCount = FFT_1.GetTotalElementCount();
1722  const size_t TotalElementCount_SSE = (FFT_1.GetTotalElementCount() >> 1) << 1;
1723 
1724  #pragma omp parallel
1725  {
1726  float * FFT_1_data = FFT_1.GetRawData();
1727  float * FFT_2_data = FFT_2.GetRawData();
1728 
1729  #pragma omp for schedule (static) nowait
1730  for (size_t i = 0; i < TotalElementCount_SSE; i+=2)
1731  {
1732  __m128 xmm_nabla1 = _mm_set_ps(nabla1[i+1], nabla1[i+1], nabla1[i], nabla1[i]);
1733  __m128 xmm_FFT_1 = _mm_load_ps(&FFT_1_data[2*i]);
1734 
1735  xmm_FFT_1 = _mm_mul_ps(xmm_nabla1, xmm_FFT_1);
1736  _mm_store_ps(&FFT_1_data[2*i], xmm_FFT_1);
1737  }
1738 
1739  #pragma omp for schedule (static)
1740  for (size_t i = 0; i < TotalElementCount; i+=2)
1741  {
1742  __m128 xmm_nabla2 = _mm_set_ps(nabla2[i+1], nabla2[i+1], nabla2[i], nabla2[i]);
1743  __m128 xmm_FFT_2 = _mm_load_ps(&FFT_2_data[2*i]);
1744 
1745  xmm_FFT_2 = _mm_mul_ps(xmm_nabla2, xmm_FFT_2);
1746  _mm_store_ps(&FFT_2_data[2*i], xmm_FFT_2);
1747  }
1748 
1749  //-- non SSE code --//
1750  #ifdef _OPENMP
1751  if (omp_get_thread_num() == omp_get_num_threads() -1)
1752  #endif
1753  {
1754  for (size_t i = TotalElementCount_SSE; i < TotalElementCount ; i++)
1755  {
1756  FFT_1_data[(i<<1)] *= nabla1[i];
1757  FFT_1_data[(i<<1)+1] *= nabla1[i];
1758 
1759  FFT_2_data[(i<<1)] *= nabla2[i];
1760  FFT_2_data[(i<<1)+1] *= nabla2[i];
1761  }
1762  }
1763 
1764  }// parallel
1765  } // end of Compute_Absorb_nabla1_2_SSE2
1766 //------------------------------------------------------------------------------
1767 
1768 
1769  /**
1770  * Sum sub-terms to calculate new pressure, non-linear case.
1771  * @param [in] Absorb_tau_temp -
1772  * @param [in] Absorb_eta_temp - BonA + rho ^2 / 2 rho0 + (rhox_sgx + rhoy_sgy + rhoz_sgz)
1773  * @param [in] BonA_temp - rho0* (duxdx + duydy + duzdz)
1774  */
1776  TRealMatrix& Absorb_eta_temp,
1777  TRealMatrix& BonA_temp)
1778 {
1779  float * tau_data;
1780  float * eta_data;
1781  float * c2_data;
1782 
1783  size_t c2_shift;
1784  size_t tau_eta_shift;
1785 
1786  const float * Absorb_tau_data = Absorb_tau_temp.GetRawData();
1787  const float * Absorb_eta_data = Absorb_eta_temp.GetRawData();
1788 
1789  const size_t TotalElementCount = Get_p().GetTotalElementCount();
1790  const float Divider = 1.0f / (float) TotalElementCount;
1791 
1792  // c2 scalar
1794  {
1795  c2_data = &Parameters->Get_c0_scalar();
1796  c2_shift = 0;
1797  }
1798  else
1799  {
1800  c2_data = Get_c2().GetRawData();
1801  c2_shift = 1;
1802  }
1803 
1804  // eta and tau scalars
1806  {
1807  tau_data = &Parameters->Get_absorb_tau_scalar();
1808  eta_data = &Parameters->Get_absorb_eta_scalar();
1809  tau_eta_shift = 0;
1810  }
1811  else
1812  {
1813  tau_data = Get_absorb_tau().GetRawData();
1814  eta_data = Get_absorb_eta().GetRawData();
1815  tau_eta_shift = 1;
1816  }
1817 
1818  #pragma omp parallel
1819  {
1820  const float * BonA_data = BonA_temp.GetRawData();
1821  float * p_data = Get_p().GetRawData();
1822 
1823  #pragma omp for schedule (static)
1824  for (size_t i = 0; i < TotalElementCount; i++)
1825  {
1826  p_data[i] = (c2_data[i * c2_shift]) *(
1827  BonA_data[i] +
1828  (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) -
1829  (Absorb_eta_data[i] * eta_data[i * tau_eta_shift])
1830  ))
1831  );
1832  }
1833 
1834  }// parallel
1835 }// end of Sum_Subterms_nonlinear
1836 //------------------------------------------------------------------------------
1837 
1838 
1839  /**
1840  * Sum sub-terms to calculate new pressure, linear case.
1841  * @param [in] Absorb_tau_temp - sub-term with absorb_tau
1842  * @param [in] Absorb_eta_temp - sub-term with absorb_eta
1843  * @param [in] Sum_rhoxyz - rhox_sgx + rhoy_sgy + rhoz_sgz
1844  */
1846  TRealMatrix& Absorb_eta_temp,
1847  TRealMatrix& Sum_rhoxyz)
1848 {
1849  const float * tau_data = NULL;
1850  const float * eta_data = NULL;
1851  const float * c2_data = NULL;
1852 
1853  size_t c2_shift = 0;
1854  size_t tau_eta_shift = 0;
1855 
1856  const float * Absorb_tau_data = Absorb_tau_temp.GetRawData();
1857  const float * Absorb_eta_data = Absorb_eta_temp.GetRawData();
1858 
1859  const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
1860  const float Divider = 1.0f / (float) TotalElementCount;
1861 
1862  // c2 scalar
1864  {
1865  c2_data = &Parameters->Get_c0_scalar();
1866  c2_shift = 0;
1867  }
1868  else
1869  {
1870  c2_data = Get_c2().GetRawData();
1871  c2_shift = 1;
1872  }
1873 
1874  // eta and tau scalars
1876  {
1877  tau_data = &Parameters->Get_absorb_tau_scalar();
1878  eta_data = &Parameters->Get_absorb_eta_scalar();
1879  tau_eta_shift = 0;
1880  }
1881  else
1882  {
1883  tau_data = Get_absorb_tau().GetRawData();
1884  eta_data = Get_absorb_eta().GetRawData();
1885  tau_eta_shift = 1;
1886  }
1887 
1888  #pragma omp parallel
1889  {
1890  const float * Sum_rhoxyz_Data = Sum_rhoxyz.GetRawData();
1891  float * p_data = Get_p().GetRawData();
1892 
1893  #pragma omp for schedule (static)
1894  for (size_t i = 0; i < TotalElementCount; i++)
1895  {
1896  p_data[i] = (c2_data[i * c2_shift]) *
1897  (Sum_rhoxyz_Data[i] +
1898  (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) -
1899  (Absorb_eta_data[i] * eta_data[i * tau_eta_shift])
1900  ))
1901  );
1902  }
1903  }// parallel
1904 }// end of Sum_Subterms_linear
1905 //------------------------------------------------------------------------------
1906 
1907 
1908 
1909 /**
1910  * Sum sub-terms for new p, non-linear lossless case.
1911  *
1912  */
1914  {
1915  #pragma omp parallel
1916  {
1917  const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
1918  float * p_data = Get_p().GetRawData();
1919 
1920  const float * rhox_data = Get_rhox().GetRawData();
1921  const float * rhoy_data = Get_rhoy().GetRawData();
1922  const float * rhoz_data = Get_rhoz().GetRawData();
1923 
1924  float * c2_data;
1925  size_t c2_shift;
1926 
1928  {
1929  c2_data = &Parameters->Get_c0_scalar();
1930  c2_shift = 0;
1931  }
1932  else
1933  {
1934  c2_data = Get_c2().GetRawData();
1935  c2_shift = 1;
1936  }
1937 
1938  float * BonA_data;
1939  size_t BonA_shift;
1940 
1942  {
1943  BonA_data = &Parameters->Get_BonA_scalar();
1944  BonA_shift = 0;
1945  }
1946  else
1947  {
1948  BonA_data = Get_BonA().GetRawData();
1949  BonA_shift = 1;
1950  }
1951 
1952  float * rho0_data;
1953  size_t rho0_shift;
1954 
1956  {
1957  rho0_data = &Parameters->Get_rho0_scalar();
1958  rho0_shift = 0;
1959  }
1960  else
1961  {
1962  rho0_data = Get_rho0().GetRawData();
1963  rho0_shift = 1;
1964  }
1965 
1966 
1967  #pragma omp for schedule (static)
1968  for (size_t i = 0; i < TotalElementCount; i++)
1969  {
1970  const float sum_rho = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
1971 
1972  p_data[i] = c2_data[i * c2_shift] *
1973  (sum_rho +
1974  (BonA_data[i * BonA_shift] * (sum_rho * sum_rho) /
1975  (2.0f * rho0_data[i * rho0_shift]))
1976  );
1977  }
1978  }// parallel
1979  }// end of Sum_new_p_nonlinear_lossless
1980 //------------------------------------------------------------------------------
1981 
1982 
1983  /**
1984  * Sum sub-terms for new p, linear lossless case.
1985  *
1986  */
1988 {
1989  #pragma omp parallel
1990  {
1991  const float * rhox = Get_rhox().GetRawData();
1992  const float * rhoy = Get_rhoy().GetRawData();
1993  const float * rhoz = Get_rhoz().GetRawData();
1994  float * p_data = Get_p().GetRawData();
1995  const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount();
1996 
1998  {
1999  const float c2_element = Parameters->Get_c0_scalar();
2000 
2001  #pragma omp for schedule (static)
2002  for (size_t i = 0; i < TotalElementCount; i++)
2003  {
2004  p_data[i] = c2_element * (rhox[i] + rhoy[i] + rhoz[i]);
2005  }
2006  }
2007  else
2008  {
2009  const float * c2_data = Get_c2().GetRawData();
2010 
2011  #pragma omp for schedule (static)
2012  for (size_t i = 0; i < TotalElementCount; i++)
2013  {
2014  p_data[i] = (c2_data[i]) * (rhox[i] + rhoy[i] + rhoz[i]);
2015  }
2016  }
2017  }// parallel
2018 }// end of Sum_new_p_linear_lossless()
2019 //------------------------------------------------------------------------------
2020 
2021 
2022 /**
2023  * Compute new p for non-linear case.
2024  */
2026 {
2027  // rhox + rhoy + rhoz
2029  { // absorbing case
2030 
2031  TRealMatrix & Sum_rhoxyz = Get_Temp_1_RS3D();
2032  TRealMatrix & BonA_rho_rhoxyz = Get_Temp_2_RS3D();
2033  TRealMatrix & Sum_du = Get_Temp_3_RS3D();
2034 
2035  Calculate_SumRho_BonA_SumDu_SSE2(Sum_rhoxyz, BonA_rho_rhoxyz, Sum_du);
2036 
2037  // ifftn ( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz))
2039  Get_FFT_Y_temp().Compute_FFT_3D_R2C(Sum_rhoxyz);
2040 
2042 
2043  TRealMatrix& Absorb_tau_temp = Sum_du;
2044  TRealMatrix& Absorb_eta_temp = Sum_rhoxyz;
2045 
2046  Get_FFT_X_temp().Compute_FFT_3D_C2R(Absorb_tau_temp);
2047  Get_FFT_Y_temp().Compute_FFT_3D_C2R(Absorb_eta_temp);
2048 
2049  Sum_Subterms_nonlinear(Absorb_tau_temp, Absorb_eta_temp, BonA_rho_rhoxyz);
2050  }
2051  else
2052  {
2054  }
2055 }// end of Compute_new_p_nonlinear
2056 //------------------------------------------------------------------------------
2057 
2058 
2059 /**
2060  * Compute new p for linear case.
2061  */
2063  {
2064  // rhox + rhoy + rhoz
2066  { // absorbing case
2067 
2068  TRealMatrix& Sum_rhoxyz = Get_Temp_1_RS3D();
2069  TRealMatrix& Sum_rho0_du = Get_Temp_2_RS3D();
2070 
2071  Calculate_SumRho_SumRhoDu(Sum_rhoxyz, Sum_rho0_du);
2072 
2073  // ifftn ( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz))
2074 
2075  Get_FFT_X_temp().Compute_FFT_3D_R2C(Sum_rho0_du);
2076  Get_FFT_Y_temp().Compute_FFT_3D_R2C(Sum_rhoxyz);
2077 
2079 
2080  TRealMatrix& Absorb_tau_temp = Get_Temp_2_RS3D(); // override Sum_rho0_dx
2081  TRealMatrix& Absorb_eta_temp = Get_Temp_3_RS3D();
2082 
2083  Get_FFT_X_temp().Compute_FFT_3D_C2R(Absorb_tau_temp);
2084  Get_FFT_Y_temp().Compute_FFT_3D_C2R(Absorb_eta_temp);
2085 
2086  Sum_Subterms_linear(Absorb_tau_temp, Absorb_eta_temp, Sum_rhoxyz);
2087  }
2088  else
2089  {
2090  // lossless case
2092  }
2093  }// end of Compute_new_p_linear
2094 //------------------------------------------------------------------------------
2095 
2096 
2097 
2098 
2099  /*
2100  * Compute new values of ux_sgx, uy_sgy, uz_sgz.
2101  */
2103  {
2104 
2107  Get_kappa(),
2109 
2113 
2114  #pragma omp parallel
2115  {
2117  { // scalars
2119  {
2129  }
2130  else
2131  {
2134  Get_pml_x_sgx());
2137  Get_pml_y_sgy());
2140  Get_pml_z_sgz());
2141  }
2142  }
2143  else
2144  {// matrices
2146  Get_dt_rho0_sgx(),
2147  Get_pml_x_sgx());
2149  Get_dt_rho0_sgy(),
2150  Get_pml_y_sgy());
2152  Get_dt_rho0_sgz(),
2153  Get_pml_z_sgz());
2154  }
2155  } // parallel
2156 }// end of Compute_uxyz()
2157 //------------------------------------------------------------------------------
2158 
2159 /**
2160  * Add u source to the particle velocity.
2161  */
2163 {
2164  const size_t t_index = Parameters->Get_t_index();
2165 
2166  if (Parameters->Get_ux_source_flag() > t_index)
2167  {
2170  t_index,
2173  }
2174 
2175  if (Parameters->Get_uy_source_flag() > t_index)
2176  {
2179  t_index,
2182  }
2183 
2184  if (Parameters->Get_uz_source_flag() > t_index)
2185  {
2188  t_index,
2191  }
2192 }// end of Add_u_source
2193 //------------------------------------------------------------------------------
2194 
2195 
2196 
2197  /**
2198  * Add in pressure source.
2199  *
2200  */
2202 {
2203  const size_t t_index = Parameters->Get_t_index();
2204 
2205  if (Parameters->Get_p_source_flag() > t_index)
2206  {
2207  float * rhox = Get_rhox().GetRawData();
2208  float * rhoy = Get_rhoy().GetRawData();
2209  float * rhoz = Get_rhoz().GetRawData();
2210 
2211  float * p_source_input = Get_p_source_input().GetRawData();
2212  size_t * p_source_index = Get_p_source_index().GetRawData();
2213 
2214 
2215  size_t index2D = t_index;
2216 
2217  if (Parameters->Get_p_source_many() != 0)
2218  { // is 2D
2219  index2D = t_index * Get_p_source_index().GetTotalElementCount();
2220  }
2221 
2222  // replacement
2223  if (Parameters->Get_p_source_mode() == 0)
2224  {
2225  for (size_t i = 0; i < Get_p_source_index().GetTotalElementCount(); i++)
2226  {
2227  rhox[p_source_index[i]] = p_source_input[index2D];
2228  rhoy[p_source_index[i]] = p_source_input[index2D];
2229  rhoz[p_source_index[i]] = p_source_input[index2D];
2230 
2231  if (Parameters->Get_p_source_many() != 0) index2D++;
2232  }
2233  }
2234  // Addition
2235  else
2236  {
2237 
2238  for (size_t i = 0; i < Get_p_source_index().GetTotalElementCount(); i++)
2239  {
2240  rhox[p_source_index[i]] += p_source_input[index2D];
2241  rhoy[p_source_index[i]] += p_source_input[index2D];
2242  rhoz[p_source_index[i]] += p_source_input[index2D];
2243 
2244  if (Parameters->Get_p_source_many() != 0) index2D++;
2245  }
2246  }// type of replacement
2247  }// if do at all
2248 }// end of Add_p_source
2249 //------------------------------------------------------------------------------
2250 
2251 
2252 /**
2253  * Calculated shifted velocities.
2254  * \n
2255  * ux_shifted = real(ifft(bsxfun(\@times, x_shift_neg, fft(ux_sgx, [], 1)), [], 1)); \n
2256  * uy_shifted = real(ifft(bsxfun(\@times, y_shift_neg, fft(uy_sgy, [], 2)), [], 2)); \n
2257  * uz_shifted = real(ifft(bsxfun(\@times, z_shift_neg, fft(uz_sgz, [], 3)), [], 3)); \n
2258  */
2260 {
2261  const TFloatComplex * x_shift_neg_r = (TFloatComplex *) Get_x_shift_neg_r().GetRawData();
2262  const TFloatComplex * y_shift_neg_r = (TFloatComplex *) Get_y_shift_neg_r().GetRawData();
2263  const TFloatComplex * z_shift_neg_r = (TFloatComplex *) Get_z_shift_neg_r().GetRawData();
2264 
2265  TFloatComplex * FFT_shift_temp = (TFloatComplex *) Get_FFT_shift_temp().GetRawData();
2266 
2267 
2268  // sizes of frequency spaces
2270  XShiftDims.X = XShiftDims.X/2 + 1;
2271 
2273  YShiftDims.Y = YShiftDims.Y/2 + 1;
2274 
2276  ZShiftDims.Z = ZShiftDims.Z/2 + 1;
2277 
2278  // normalization constants for FFTs
2279  const float DividerX = 1.0f / (float) Parameters->GetFullDimensionSizes().X;
2280  const float DividerY = 1.0f / (float) Parameters->GetFullDimensionSizes().Y;
2281  const float DividerZ = 1.0f / (float) Parameters->GetFullDimensionSizes().Z;
2282 
2283  //-------------------------- ux_shifted --------------------------------------
2285 
2286  #pragma omp parallel for schedule (static)
2287  for (size_t z = 0; z < XShiftDims.Z; z++)
2288  {
2289  register size_t i = z * XShiftDims.Y * XShiftDims.X;
2290  for (size_t y = 0; y < XShiftDims.Y; y++)
2291  {
2292  for (size_t x = 0; x < XShiftDims.X; x++)
2293  {
2294  TFloatComplex Temp;
2295 
2296  Temp.real = ((FFT_shift_temp[i].real * x_shift_neg_r[x].real) -
2297  (FFT_shift_temp[i].imag * x_shift_neg_r[x].imag)
2298  ) * DividerX;
2299 
2300 
2301  Temp.imag = ((FFT_shift_temp[i].imag * x_shift_neg_r[x].real) +
2302  (FFT_shift_temp[i].real * x_shift_neg_r[x].imag)
2303  ) * DividerX;
2304 
2305  FFT_shift_temp[i] = Temp;
2306 
2307  i++;
2308  } // x
2309  } // y
2310  }//z*/
2312 
2313 
2314  //-------------------------- uy_shifted --------------------------------------
2316 
2317  #pragma omp parallel for schedule (static)
2318  for (size_t z = 0; z < YShiftDims.Z; z++)
2319  {
2320  register size_t i = z * YShiftDims.Y * YShiftDims.X;
2321  for (size_t y = 0; y < YShiftDims.Y; y++)
2322  {
2323  for (size_t x = 0; x < YShiftDims.X; x++)
2324  {
2325  TFloatComplex Temp;
2326 
2327  Temp.real = ((FFT_shift_temp[i].real * y_shift_neg_r[y].real) -
2328  (FFT_shift_temp[i].imag * y_shift_neg_r[y].imag)) *
2329  DividerY;
2330 
2331 
2332  Temp.imag = ((FFT_shift_temp[i].imag * y_shift_neg_r[y].real) +
2333  (FFT_shift_temp[i].real * y_shift_neg_r[y].imag)
2334  ) * DividerY;
2335 
2336  FFT_shift_temp[i] = Temp;
2337 
2338  i++;
2339  } // x
2340  } // y
2341  }//z
2343 
2344 
2345  //-------------------------- uz_shifted --------------------------------------
2347  #pragma omp parallel for schedule (static)
2348  for (size_t z = 0; z < ZShiftDims.Z; z++)
2349  {
2350  register size_t i = z * ZShiftDims.Y * ZShiftDims.X;
2351  for (size_t y = 0; y < ZShiftDims.Y; y++)
2352  {
2353  for (size_t x = 0; x < ZShiftDims.X; x++)
2354  {
2355  TFloatComplex Temp;
2356 
2357  Temp.real = ((FFT_shift_temp[i].real * z_shift_neg_r[z].real) -
2358  (FFT_shift_temp[i].imag * z_shift_neg_r[z].imag)) *
2359  DividerZ;
2360 
2361 
2362  Temp.imag = ((FFT_shift_temp[i].imag * z_shift_neg_r[z].real) +
2363  (FFT_shift_temp[i].real * z_shift_neg_r[z].imag)
2364  ) * DividerZ;
2365 
2366  FFT_shift_temp[i] = Temp;
2367 
2368  i++;
2369  } // x
2370  } // y
2371  }//z
2373 }// end of Calculate_shifted_velocity()
2374 //------------------------------------------------------------------------------
2375 
2376 
2377 /**
2378  * Compute the main time loop of KSpaceFirstOrder3D.
2379  *
2380  */
2382 {
2383 
2384  ActPercent = 0;
2385  // set ActPercent to correspond the t_index after recovery
2386  if (Parameters->Get_t_index() > 0)
2387  {
2388  ActPercent = (Parameters->Get_t_index() / (Parameters->Get_Nt() / 100));
2389  }
2390 
2391  PrintOtputHeader();
2392 
2393  IterationTime.Start();
2394 
2396  {
2397  const size_t t_index = Parameters->Get_t_index();
2398 
2399  Compute_uxyz();
2400  // add in the velocity u source term
2401  Add_u_source();
2402 
2403  // add in the transducer source term (t = t1) to ux
2404  if (Parameters->Get_transducer_source_flag() > t_index)
2405  {
2407  }
2408 
2409  Compute_duxyz();
2410 
2412  {
2414  }
2415  else
2416  {
2418  }
2419 
2420 
2421  // add in the source pressure term
2422  Add_p_source();
2423 
2425  {
2427  }
2428  else
2429  {
2431  }
2432 
2433  // calculate initial pressure
2434  if ((t_index == 0) && (Parameters->Get_p0_source_flag() == 1)) Calculate_p0_source();
2435 
2436  StoreSensorData();
2437  PrintStatisitcs();
2439 
2440  }// time loop
2441 }// end of Compute_Main_Loop()
2442 //------------------------------------------------------------------------------
2443 
2444 
2445 /**
2446  * Print progress statistics.
2447  *
2448  */
2450 {
2451  const float Nt = (float) Parameters->Get_Nt();
2452  const size_t t_index = Parameters->Get_t_index();
2453 
2454 
2455  if (t_index > (ActPercent * Nt * 0.01f) )
2456  {
2458 
2459  IterationTime.Stop();
2460 
2461  const double ElTime = IterationTime.GetElapsedTime();
2463  const double ToGo = ((ElTimeWithLegs / (float) (t_index + 1)) * Nt) - ElTimeWithLegs;
2464 
2465  struct tm *current;
2466  time_t now;
2467  time(&now);
2468  now += ToGo;
2469  current = localtime(&now);
2470 
2471  fprintf(stdout, "%5li%c %9.3fs %9.3fs %02i/%02i/%02i %02i:%02i:%02i\n",
2472  (size_t) ((t_index) / (Nt * 0.01f)),'%',
2473  ElTime, ToGo,
2474  current->tm_mday, current->tm_mon+1, current->tm_year-100,
2475  current->tm_hour, current->tm_min, current->tm_sec
2476  );
2477 
2478  fflush(stdout);
2479  }
2480 }// end of KSpaceFirstOrder3DSolver
2481 //------------------------------------------------------------------------------
2482 
2483 /**
2484  * Print the header of the progress statistics.
2485  */
2487 {
2488  fprintf(stdout,"-------------------------------------------------------------\n");
2489  fprintf(stdout,"....................... Simulation ..........................\n");
2490  fprintf(stdout,"Progress...ElapsedTime........TimeToGo......TimeOfTermination\n");
2491 }// end of PrintOtputHeader
2492 //------------------------------------------------------------------------------
2493 
2494 /**
2495  * Is time to checkpoint
2496  * @return true if it is necessary to stop to checkpoint?
2497  */
2499 {
2500  if (!Parameters->IsCheckpointEnabled()) return false;
2501 
2502  TotalTime.Stop();
2503 
2504  return (TotalTime.GetElapsedTime() > (float) Parameters->GetCheckpointInterval());
2505 
2506 }// end of IsTimeToCheckpoint
2507 //------------------------------------------------------------------------------
2508 
2509 
2510 /**
2511  * Post processing the quantities, closing the output streams and storing the
2512  * sensor mask.
2513  *
2514  */
2516 {
2517  if (Parameters->IsStore_p_final())
2518  {
2520  }// p_final
2521 
2522  if (Parameters->IsStore_u_final())
2523  {
2527  }// u_final
2528 
2529  // Apply post-processing and close
2532 
2533 
2534  // store sensor mask if wanted
2536  {
2537  if (Parameters->Get_sensor_mask_type() == TParameters::smt_index)
2538  {
2542  }
2543  if (Parameters->Get_sensor_mask_type() == TParameters::smt_corners)
2544  {
2548  }
2549  }
2550 }// end of PostPorcessing
2551 //------------------------------------------------------------------------------
2552 
2553 
2554 /**
2555  * Store sensor data.
2556  *
2557  */
2559 {
2560  // Unless the time for sampling has come, exit
2562  {
2564  {
2566  }
2568  }
2569 }// end of StoreData
2570 //------------------------------------------------------------------------------
2571 
2572 /**
2573  * Save checkpoint data into the checkpoint file, flush aggregated outputs into
2574  * the output file.
2575  */
2577 {
2578  // export FFTW wisdom
2580 
2581 
2582  // Create Checkpoint file
2583  THDF5_File & HDF5_CheckpointFile = Parameters->HDF5_CheckpointFile;
2584  // if it happens and the file is opened (from the recovery, close it)
2585  if (HDF5_CheckpointFile.IsOpened()) HDF5_CheckpointFile.Close();
2586  // Create the new file (overwrite the old one)
2587  HDF5_CheckpointFile.Create(Parameters->GetCheckpointFileName().c_str());
2588 
2589 
2590  //--------------------- Store Matrices ------------------------------//
2591 
2592  // Store all necessary matrices in Checkpoint file
2593  MatrixContainer.StoreDataIntoCheckpointHDF5File(HDF5_CheckpointFile);
2594 
2595  // Write t_index
2596  HDF5_CheckpointFile.WriteScalarValue(HDF5_CheckpointFile.GetRootGroup(),
2597  t_index_Name,
2598  Parameters->Get_t_index());
2599  // store basic dimension sizes (Nx, Ny, Nz) - Nt is not necessary
2600  HDF5_CheckpointFile.WriteScalarValue(HDF5_CheckpointFile.GetRootGroup(),
2601  Nx_Name,
2603  HDF5_CheckpointFile.WriteScalarValue(HDF5_CheckpointFile.GetRootGroup(),
2604  Ny_Name,
2606  HDF5_CheckpointFile.WriteScalarValue(HDF5_CheckpointFile.GetRootGroup(),
2607  Nz_Name,
2609 
2610 
2611  // Write checkpoint file header
2612  THDF5_FileHeader CheckpointFileHeader = Parameters->HDF5_FileHeader;
2613 
2614  CheckpointFileHeader.SetFileType(THDF5_FileHeader::hdf5_ft_checkpoint);
2615  CheckpointFileHeader.SetCodeName(GetCodeName());
2616  CheckpointFileHeader.SetActualCreationTime();
2617 
2618  CheckpointFileHeader.WriteHeaderToCheckpointFile(HDF5_CheckpointFile);
2619 
2620  // Close the checkpoint file
2621  HDF5_CheckpointFile.Close();
2622 
2623  // checkpoint only if necessary (t_index >= start_index)
2625  {
2627  }
2629 }// end of SaveCheckpointData()
2630 //------------------------------------------------------------------------------
2631 
2632 
2633 /**
2634  * Write statistics and the header into the output file.
2635  */
2637 {
2638  // write t_index into the output file
2640  t_index_Name,
2641  Parameters->Get_t_index());
2642 
2643  // Write scalars
2645  THDF5_FileHeader & HDF5_FileHeader = Parameters->HDF5_FileHeader;
2646 
2647  // Write File header
2648 
2649  HDF5_FileHeader.SetCodeName(GetCodeName());
2650  HDF5_FileHeader.SetMajorFileVersion();
2651  HDF5_FileHeader.SetMinorFileVersion();
2652  HDF5_FileHeader.SetActualCreationTime();
2653  HDF5_FileHeader.SetFileType(THDF5_FileHeader::hdf5_ft_output);
2654  HDF5_FileHeader.SetHostName();
2655 
2656  HDF5_FileHeader.SetMemoryConsumption(ShowMemoryUsageInMB());
2657 
2658  // Stop total timer here
2659  TotalTime.Stop();
2660  HDF5_FileHeader.SetExecutionTimes(GetCumulatedTotalTime(),
2665 
2666  HDF5_FileHeader.SetNumberOfCores();
2668 }// end of WriteOutputDataInfo
2669 //------------------------------------------------------------------------------
2670 
2671 
2672 /**
2673  *
2674  * Restore cumulated elapsed time form Output file (header stored in TParameters)
2675  * Open the header, read this and store into TTimeMeasure classes.
2676  */
2678 {
2679  double ElapsedTotalTime, ElapsedDataLoadTime, ElapsedPreProcessingTime,
2680  ElapsedSimulationTime, ElapsedPostProcessingTime;
2681 
2682  /// Get execution times stored in the output file header
2683  Parameters->HDF5_FileHeader.GetExecutionTimes(ElapsedTotalTime,
2684  ElapsedDataLoadTime,
2685  ElapsedPreProcessingTime,
2686  ElapsedSimulationTime,
2687  ElapsedPostProcessingTime);
2688 
2694 
2695 }// end of RestoreCumulatedElapsedFromOutputFile
2696 //------------------------------------------------------------------------------
2697 
2698 
2699 /**
2700  * Check the output file has the correct format and version.
2701  * @throw ios::failure if an error happens
2702  */
2704 {
2705  // The header has already been read
2706  THDF5_FileHeader & OutputFileHeader = Parameters->HDF5_FileHeader;
2707  THDF5_File & OutputFile = Parameters->HDF5_OutputFile;
2708 
2709  // test file type
2710  if (OutputFileHeader.GetFileType() != THDF5_FileHeader::hdf5_ft_output)
2711  {
2712  char ErrorMessage[256] = "";
2713  sprintf(ErrorMessage,
2715  Parameters->GetOutputFileName().c_str());
2716  throw ios::failure(ErrorMessage);
2717  }
2718 
2719  // test file major version
2720  if (!OutputFileHeader.CheckMajorFileVersion())
2721  {
2722  char ErrorMessage[256] = "";
2723  sprintf(ErrorMessage,
2725  Parameters->GetCheckpointFileName().c_str(),
2726  OutputFileHeader.GetCurrentHDF5_MajorVersion().c_str());
2727  throw ios::failure(ErrorMessage);
2728  }
2729 
2730  // test file minor version
2731  if (!OutputFileHeader.CheckMinorFileVersion())
2732  {
2733  char ErrorMessage[256] = "";
2734  sprintf(ErrorMessage,
2736  Parameters->GetCheckpointFileName().c_str(),
2737  OutputFileHeader.GetCurrentHDF5_MinorVersion().c_str());
2738  throw ios::failure(ErrorMessage);
2739  }
2740 
2741 
2742  // Check dimension sizes
2743  TDimensionSizes OutputDimSizes;
2744  OutputFile.ReadScalarValue(OutputFile.GetRootGroup(),
2745  Nx_Name,
2746  OutputDimSizes.X);
2747 
2748  OutputFile.ReadScalarValue(OutputFile.GetRootGroup(),
2749  Ny_Name,
2750  OutputDimSizes.Y);
2751 
2752  OutputFile.ReadScalarValue(OutputFile.GetRootGroup(),
2753  Nz_Name,
2754  OutputDimSizes.Z);
2755 
2756  if (Parameters->GetFullDimensionSizes() != OutputDimSizes)
2757  {
2758  char ErrorMessage[256] = "";
2759  sprintf(ErrorMessage,
2761  OutputDimSizes.X,
2762  OutputDimSizes.Y,
2763  OutputDimSizes.Z,
2767 
2768  throw ios::failure(ErrorMessage);
2769  }
2770 }// end of CheckOutputFile
2771 //------------------------------------------------------------------------------
2772 
2773 
2774 /**
2775  * Check the file type and the version of the checkpoint file.
2776  * @throw ios::failure if an error happens
2777  *
2778  */
2780 {
2781  // read the header and check the file version
2782  THDF5_FileHeader CheckpointFileHeader;
2783  THDF5_File & HDF5_CheckpointFile = Parameters->HDF5_CheckpointFile;
2784 
2785  CheckpointFileHeader.ReadHeaderFromCheckpointFile(HDF5_CheckpointFile);
2786 
2787  // test file type
2788  if (CheckpointFileHeader.GetFileType() != THDF5_FileHeader::hdf5_ft_checkpoint)
2789  {
2790  char ErrorMessage[256] = "";
2791  sprintf(ErrorMessage,
2793  Parameters->GetCheckpointFileName().c_str());
2794  throw ios::failure(ErrorMessage);
2795  }
2796 
2797  // test file major version
2798  if (!CheckpointFileHeader.CheckMajorFileVersion())
2799  {
2800  char ErrorMessage[256] = "";
2801  sprintf(ErrorMessage,
2803  Parameters->GetCheckpointFileName().c_str(),
2804  CheckpointFileHeader.GetCurrentHDF5_MajorVersion().c_str());
2805  throw ios::failure(ErrorMessage);
2806  }
2807 
2808  // test file minor version
2809  if (!CheckpointFileHeader.CheckMinorFileVersion())
2810  {
2811  char ErrorMessage[256] = "";
2812  sprintf(ErrorMessage,
2814  Parameters->GetCheckpointFileName().c_str(),
2815  CheckpointFileHeader.GetCurrentHDF5_MinorVersion().c_str());
2816  throw ios::failure(ErrorMessage);
2817  }
2818 
2819 
2820  // Check dimension sizes
2821  TDimensionSizes CheckpointDimSizes;
2822  HDF5_CheckpointFile.ReadScalarValue(HDF5_CheckpointFile.GetRootGroup(),
2823  Nx_Name,
2824  CheckpointDimSizes.X);
2825 
2826  HDF5_CheckpointFile.ReadScalarValue(HDF5_CheckpointFile.GetRootGroup(),
2827  Ny_Name,
2828  CheckpointDimSizes.Y);
2829 
2830  HDF5_CheckpointFile.ReadScalarValue(HDF5_CheckpointFile.GetRootGroup(),
2831  Nz_Name,
2832  CheckpointDimSizes.Z);
2833 
2834  if (Parameters->GetFullDimensionSizes() != CheckpointDimSizes)
2835  {
2836  char ErrorMessage[256] = "";
2837  sprintf(ErrorMessage,
2839  CheckpointDimSizes.X,
2840  CheckpointDimSizes.Y,
2841  CheckpointDimSizes.Z,
2845 
2846  throw ios::failure(ErrorMessage);
2847  }
2848 }// end of CheckCheckpointFile
2849 //------------------------------------------------------------------------------
2850 
2851 //----------------------------------------------------------------------------//
2852 // Private methods //
2853 //----------------------------------------------------------------------------//
Class for HDF5 header.
Definition: HDF5_File.h:749
size_t Z
Z dimension size.
void PrintFullNameCodeAndLicense(FILE *file)
Print the code name and license.
TRealMatrix & Get_absorb_nabla1()
Get the absorb_nabla1 matrix from the container.
TDimensionSizes GetReducedDimensionSizes() const
Reduced dimension sizes of the simulation (complex classes).
Definition: Parameters.h:86
float & Get_c0_scalar()
Get c0_scalar value.
Definition: Parameters.h:177
TRealMatrix & Get_dt_rho0_sgy()
Get the dt.*rho0_sgy matrix from the container.
void Compute_dt_rho_sg_mul_ifft_div_2(const TRealMatrix &dt_rho_0_sgx, TFFTWComplexMatrix &FFT)
Compute dt ./ rho0_sgx .* ifft (FFT).
virtual float * GetRawData()
Get raw data out of the class (for direct kernel access).
float Get_dz() const
Get dz value.
Definition: Parameters.h:104
static void ExportWisdom()
Export wisdom to the file.
TTimeMeasure TotalTime
Total time of the simulation.
void AddStreamsIntoContainer(TMatrixContainer &MatrixContainer)
Create all streams in container (no file manipulation).
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_CheckpointDimensionsDoNotMatch
KSpaceFirstOrder3DSolver error message.
TFFTWComplexMatrix & Get_FFT_X_temp()
Get the FFT_X_temp from the container.
void Compute_rhoxyz_linear()
Compute new values of rhox, rhoy, rhoz for linear case.
void InitializeFFTWPlans()
Initialize FFT plans.
void Compute_uz_sgz_normalize(const TRealMatrix &FFT_p, const TRealMatrix &dt_rho0, const TRealMatrix &pml)
Compute a new value for uz_sgz, default case.
void StoreDataIntoCheckpointHDF5File(THDF5_File &HDF5_File)
Store selected matrices into the checkpoint file.
TRealMatrix & Get_ux_source_input()
Get the ux_source_input matrix from the container.
const char *const Parameters_ERR_FMT_IncorrectMajorHDF5FileVersion
Command line parameters error message.
TMatrixContainer MatrixContainer
Matrix container with all the matrix classes.
void Compute_new_p_nonlinear()
Calculate new p, non-linear case.
void SaveScalarsToHDF5File(THDF5_File &HDF5_OutputFile)
Save scalars into the output HDF5 file.
Definition: Parameters.cpp:362
size_t GetStartTimeIndex() const
Get start time index for sensor recording.
Definition: Parameters.h:215
TRealMatrix & Get_Temp_3_RS3D()
Get the Temp_3_RS3D matrix from the container.
TRealMatrix & Get_absorb_tau()
Get the absorb_tau matrix from the container.
TComplexMatrix & Get_x_shift_neg_r()
Get the x_shift_neg_r matrix from the container.
void Compute_FFT_1DY_R2C(TRealMatrix &InMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the Y dimension.
void Calculate_SumRho_SumRhoDu(TRealMatrix &Sum_rhoxyz, TRealMatrix &Sum_rho0_du)
Calculate two temporary sums in the new pressure formula, linear absorbing case.
THDF5_File HDF5_OutputFile
Handle to the output HDF5 file.
Definition: Parameters.h:264
float Get_c_ref() const
Get c_ref value.
Definition: Parameters.h:107
void SetMinorFileVersion()
Set minor file version.
Definition: HDF5_File.h:850
void Compute_uxyz()
compute new values of for ux_sgx, uy_sgy, uz_sgz.
void Compute_ux_sgx_normalize_scalar_nonuniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &dxudxn_sgx, const TRealMatrix &pml)
Compute a new value of ux_sgx, scalar, non-uniform case.
TRealMatrix & Get_duydy()
Get the duydy matrix from the container.
void PrintOtputHeader()
Print the header of the progress statistics.
size_t GetNumberOfThreads() const
Get number of threads.
Definition: Parameters.h:210
TRealMatrix & Get_pml_x()
Get the pml_x matrix from the container.
size_t GetVerboseInterval() const
Get verbose interval.
Definition: Parameters.h:212
const char *const uy_final_Name
uy_final variable name
Definition: MatrixNames.h:304
float & Get_BonA_scalar()
Get BonA_scalar value.
Definition: Parameters.h:187
bool IsCheckpointInterruption() const
Was the loop interrupted to checkpoint?
bool IsTimeToCheckpoint()
Is time to checkpoint (save actual state on disk)?
virtual size_t GetTotalElementCount() const
Get element count of the matrix.
void SetMajorFileVersion()
Set major file version.
Definition: HDF5_File.h:845
TRealMatrix & Get_dzudzn_sgz()
Get the dzudzn_sgz matrix from the container.
void Create_FFT_Plan_1DZ_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real in the Z dimension.
void Compute_rhoxyz_nonlinear()
Compute new values of rhox, rhoy, rhoz for non-linear case.
const char *const p_final_Name
p_final variable name
Definition: MatrixNames.h:265
void WriteScalarValue(const hid_t ParentGroup, const char *DatasetName, const float Value)
Write the scalar value under a specified group - float value.
Definition: HDF5_File.cpp:794
void SetProcessorAffinity()
Set processor affinity.
virtual void WriteDataToHDF5File(THDF5_File &HDF5_File, const char *MatrixName, const size_t CompressionLevel)
Write data into the HDF5 file.
Definition: RealMatrix.cpp:114
TComplexMatrix & Get_ddz_k_shift_neg()
Get the ddz_k_shift_neg matrix from the container.
TRealMatrix & Get_kappa()
Get the kappa matrix from the container.
TTimeMeasure SimulationTime
Simulation time of the simulation.
virtual void FreeMemory()
Memory deallocation.
TTimeMeasure PostProcessingTime
Post-processing time of the simulation.
const char *const uz_final_Name
uz_final variable name
Definition: MatrixNames.h:306
TRealMatrix & Get_pml_z()
Get the pml_z matrix from the container.
TComplexMatrix & Get_ddz_k_shift_pos()
Get the ddz_k_shift_pos matrix from the container.
void WriteHeaderToOutputFile(THDF5_File &OutputFile)
Write header to the output file.
Definition: HDF5_File.cpp:1451
void Add_p_source()
Add in pressure source.
TRealMatrix & Get_dxudxn_sgx()
Get the dxudxn_sgx matrix from the container.
size_t Get_ux_source_flag() const
Get ux_source_flag value.
Definition: Parameters.h:127
void Compute_FFT_1DX_C2R(TRealMatrix &OutMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the X dimension.
void Create_FFT_Plan_1DY_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex in the Y dimension.
void Compute_uy_sgy_normalize(const TRealMatrix &FFT_p, const TRealMatrix &dt_rho0, const TRealMatrix &pml)
Compute a new value of uy_sgy, default case.
TRealMatrix & Get_pml_z_sgz()
Get the pml_z_sgz matrix from the container.
TOutputStreamContainer OutputStreamContainer
Output stream container.
Tuxyz_sgxyzMatrix & Get_ux_sgx()
Get the ux_sgx matrix from the container.
TSenosrMaskType Get_sensor_mask_type() const
Get sensor mask type (linear or corners).
Definition: Parameters.h:156
void PrintStatisitcs()
Print progress statistics.
TParameters * Parameters
Global parameters of the simulation.
void Create_FFT_Plan_3D_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex.
void FreeAllStreams()
Free all streams - destroy them.
TRealMatrix & Get_c2()
Get the c^2 matrix from the container.
void Set_t_index(const size_t new_t_index)
Set simulation time step – should be used only when recovering from checkpoint.
Definition: Parameters.h:93
void Compute_FFT_1DZ_C2R(TRealMatrix &OutMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the Z dimension.
Tuxyz_sgxyzMatrix & Get_uy_sgy()
Get the uy_sgy matrix from the container.
THDF5_FileHeader HDF5_FileHeader
Handle to file header.
Definition: Parameters.h:269
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_IncorrectCheckpointFileFormat
KSpaceFirstOrder3DSolver error message.
const char *const Ny_Name
Ny variable name.
Definition: MatrixNames.h:66
Tuxyz_sgxyzMatrix & Get_uy_shifted()
Get the uy_shifted matrix from the container.
void Add_u_source(const TRealMatrix &u_source_input, const TIndexMatrix &u_source_index, const size_t t_index, const size_t u_source_mode, const size_t u_source_many)
Add in velocity source terms.
static bool IsAccessible(const char *FileName)
Does the file exist? static method.
Definition: HDF5_File.h:550
void Compute_uz_sgz_normalize_scalar_uniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &pml)
Compute a new value for uz_sgz, scalar, uniform case.
bool IsOpened() const
Is the file opened?
Definition: HDF5_File.h:539
THDF5_FileHeader::THDF5_FileType GetFileType()
Get File type.
Definition: HDF5_File.cpp:1514
void ReadScalarValue(const hid_t ParentGroup, const char *DatasetName, float &Value)
Read the scalar value under a specified group - float value.
Definition: HDF5_File.cpp:922
virtual size_t GetTotalElementCount() const
Get total element count of the matrix.
TRealMatrix & Get_dyudyn_sgy()
Get the dyudyn_sgy matrix from the container.
TRealMatrix & Get_p_source_input()
Get the p_source_input matrix from the container.
void Compute_duxyz()
Compute new values of for duxdx, duydy, dzdz.
string GetCheckpointFileName() const
Get checkpoint filename.
Definition: Parameters.h:205
virtual size_t * GetRawData()
Get raw data out of the class (for direct kernel access).
bool Get_c0_scalar_flag() const
Get c0_scalar_flag value.
Definition: Parameters.h:175
void Close()
Close file.
Definition: HDF5_File.cpp:169
float imag
imaginary part
Definition: ComplexMatrix.h:54
void Add_u_source()
Add u source to the particle velocity.
virtual size_t ShowMemoryUsageInMB()
Get memory usage in MB.
void Calculate_SumRho_BonA_SumDu_SSE2(TRealMatrix &RHO_Temp, TRealMatrix &BonA_Temp, TRealMatrix &Sum_du)
Calculate three temporary sums in the new pressure formula, non-linear absorbing case, SSE2 version.
TRealMatrix & Get_transducer_source_input()
Get the transducer_source_input matrix from the container.
size_t X
X dimension size.
size_t Get_u_source_many() const
Get u_source_many value.
Definition: Parameters.h:133
void PostPorcessing()
Post processing, and closing the output streams.
bool Get_BonA_scalar_flag() const
Get BonA_scalar_flag value.
Definition: Parameters.h:185
static string GetCurrentHDF5_MajorVersion()
Get string version of current Major version.
Definition: HDF5_File.h:829
TComplexMatrix & Get_z_shift_neg_r()
Get the y_shift_neg_r from the container.
Structure for complex values.
Definition: ComplexMatrix.h:49
The header file containing the main class of the project responsible for the entire simulation...
THDF5_File HDF5_InputFile
Handle to the input HDF5 file.
Definition: Parameters.h:258
void CheckCheckpointFile()
Check the checkpoint file has the correct format and version.
void RecomputeIndicesToMatlab()
Recompute indices C++ -> MATLAB.
Tuxyz_sgxyzMatrix & Get_uz_sgz()
Get the uz_sgz matrix from the container.
void Create_FFT_Plan_1DX_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex in the X dimension.
TRealMatrix & Get_p0_source_input()
Get the p0_source_input from the container.
void AddTransducerSource(const TIndexMatrix &u_source_index, TIndexMatrix &delay_mask, const TRealMatrix &transducer_signal)
Add transducer data source to X component.
TRealMatrix & Get_p()
Get the p matrix from the container.
TComplexMatrix & Get_ddy_k_shift_pos()
Get the ddy_k_shift_pos matrix from the container.
TTimeMeasure DataLoadTime
Data load time of the simulation.
TRealMatrix & Get_BonA()
Get the BonA matrix from the container.
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_IncorrectOutputFileFormat
KSpaceFirstOrder3DSolver error message.
TRealMatrix & Get_dxudxn()
Get the dxudxn matrix from the container.
void SetExecutionTimes(const double TotalTime, const double LoadTime, const double PreProcessingTime, const double SimulationTime, const double PostprocessingTime)
Set execution times.
Definition: HDF5_File.cpp:1634
void Generate_kappa()
Generate kappa matrix for non-absorbing media.
Class implementing 3D Real-To-Complex and Complex-To-Real transforms using FFTW interface.
double GetCumulatedElapsedTimeOverPreviousLegs() const
Get time spent in previous legs.
Definition: TimeMeasure.h:141
virtual TDimensionSizes GetDimensionSizes() const
Get dimension sizes of the matrix.
void Sum_Subterms_linear(TRealMatrix &Absorb_tau_temp, TRealMatrix &Absorb_eta_temp, TRealMatrix &Sum_rhoxyz)
Sum sub-terms to calculate new pressure, linear case.
void Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_x(const float dt_rho_0_sgx, const TRealMatrix &dxudxn_sgx, TFFTWComplexMatrix &FFT)
Compute dt ./ rho0_sgx .* ifft (FFT), if rho0_sgx is scalar, non uniform grid, x component.
const char *const Nx_Name
Nx variable name.
Definition: MatrixNames.h:64
void Compute_new_p_linear()
Calculate new p linear-case, absorbing.
TRealMatrix & Get_dzudzn()
Get the dzudzn matrix from the container.
void Compute_Absorb_nabla1_2_SSE2(TFFTWComplexMatrix &FFT_1, TFFTWComplexMatrix &FFT_2)
Compute absorbing term with abosrb_nabla1 and absorb_nabla2, SSE2 version.
size_t Get_t_index() const
Get simulation time step.
Definition: Parameters.h:91
void Compute_MainLoop()
Compute the main time loop of the kspaceFirstOrder3D.
void Create_FFT_Plan_1DZ_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex in the Z dimension.
void FreeAllMatrices()
Free all matrices - destroy them.
void Calculate_p0_source()
Calculate p0_source.
TRealMatrix & Get_absorb_nabla2()
Get the absorb_nabla2 matrix from the container.
TDimensionSizes GetFullDimensionSizes() const
Full dimension sizes of the simulation (real classes).
Definition: Parameters.h:84
void SetHostName()
Set host name.
Definition: HDF5_File.cpp:1586
void Compute_FFT_3D_C2R(TRealMatrix &OutMatrix)
Compute 3D out-of-place Complex-to-Real FFT.
void Compute_FFT_1DX_R2C(TRealMatrix &InMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the X dimension.
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_OutputDimensionsDoNotMatch
KSpaceFirstOrder3DSolver error message.
void StoreSensorData()
Store sensor data.
static void DeleteStoredWisdom()
Destroy wisdom in the file (delete it).
TRealMatrix & Get_rho0()
Get the rho0 matrix from the container.
void WriteOutputDataInfo()
Write statistics and header into the output file.
void ReadHeaderFromCheckpointFile(THDF5_File &CheckpointFile)
Read Header from checkpoint file (necessary for checkpoint-restart).
Definition: HDF5_File.cpp:1413
void RestoreCumulatedElapsedFromOutputFile()
Reads the header of the output file and sets the cumulative elapsed time from the first log...
TRealMatrix & Get_uz_source_input()
Get the uz_source_input matrix from the container.
void ReadHeaderFromOutputFile(THDF5_File &OutputFile)
Read Header from output file (necessary for checkpoint-restart).
Definition: HDF5_File.cpp:1374
Tuxyz_sgxyzMatrix & Get_uz_shifted()
Get the uz_shifted matrix from the container.
TIndexMatrix & Get_sensor_mask_index()
Get the sensor_mask_index matrix from the container.
void Start()
Get start timestamp.
Definition: TimeMeasure.h:94
bool IsStore_u_non_staggered_raw() const
Is –u_non_staggered_raw set?
Definition: Parameters.h:242
TFFTWComplexMatrix & Get_FFT_shift_temp()
Get the FFT_shift_temp the container.
void Create_FFT_Plan_1DY_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real in the Y dimension.
TIndexMatrix & Get_delay_mask()
Get the delay_mask matrix from the container.
size_t Get_uy_source_flag() const
Get uy_source_flag value.
Definition: Parameters.h:129
size_t Get_p_source_mode() const
Get p_source_mode value.
Definition: Parameters.h:144
void Compute_ddx_kappa_fft_p(TRealMatrix &X_Matrix, TFFTWComplexMatrix &FFT_X, TFFTWComplexMatrix &FFT_Y, TFFTWComplexMatrix &FFT_Z, const TRealMatrix &kappa, const TComplexMatrix &ddx, const TComplexMatrix &ddy, const TComplexMatrix &ddz)
Compute part of the new velocity - gradient of p.
TRealMatrix & Get_dt_rho0_sgz()
Get the dt.*rho0_sgz matrix from the container.
void Compute_FFT_1DY_C2R(TRealMatrix &OutMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the Y dimension.
hid_t GetRootGroup() const
Get handle to the root group.
Definition: HDF5_File.h:581
TIndexMatrix & Get_p_source_index()
Get the p_source_index matrix from the container.
void Generate_kappa_absorb_nabla1_absorb_nabla2()
Generate kappa matrix, absorb_nabla1, absorb_nabla2 for absorbing media.
void SampleStreams()
Sample all streams.
void LoadDataFromInputHDF5File(THDF5_File &HDF5_File)
Load all matrices from the HDF5 file.
bool IsStore_p_final() const
Is –p_final specified at the command line?
Definition: Parameters.h:237
TRealMatrix & Get_dyudyn()
Get the dyudyn matrix from the container.
TRealMatrix & Get_pml_y_sgy()
Get the pml_y_sgy matrix from the container.
TRealMatrix & Get_pml_y()
Get the pml_y matrix from the container.
TIndexMatrix & Get_u_source_index()
Get the u_source_index matrix from the container.
double GetCumulatedTotalTime() const
Get total simulation time cumulated over all legs.
The header file containing all error messages of the project.
void Calculate_dt_rho0_non_uniform()
Calculate dt ./ rho0 for non-uniform grids.
TIndexMatrix & Get_sensor_mask_corners()
Get the sensor_mask_corners matrix from the container.
size_t Get_p0_source_flag() const
Get p0_source_flag value.
Definition: Parameters.h:140
size_t Get_p_source_many() const
Get p_source_many value.
Definition: Parameters.h:142
static void ImportWisdom()
Import wisdom from the file.
void ReopenStreams()
Reopen streams after checkpoint file (datasets).
void Compute_uy_sgy_normalize_scalar_nonuniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &dyudyn_sgy, const TRealMatrix &pml)
Compute a new value of uy_sgy, scalar, non-uniform case.
void Open(const char *FileName, unsigned int Flags=H5F_ACC_RDONLY)
Open the file.
Definition: HDF5_File.cpp:136
TRealMatrix & Get_duxdx()
Get the duxdx matrix from the container.
size_t GetElementCount() const
Get element count, in 3D only spatial domain, in 4D with time.
float Get_dt() const
Get dt value.
Definition: Parameters.h:98
float Get_dx() const
Get dx value.
Definition: Parameters.h:100
TRealMatrix & Get_Temp_1_RS3D()
Get the Temp_1_RS3D matrix from the container.
const char *const t_index_Name
t_index name
Definition: MatrixNames.h:43
void Compute_uz_sgz_normalize_scalar_nonuniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &dzudzn_sgz, const TRealMatrix &pml)
Compute a new value for uz_sgz, scalar, non-uniform case.
virtual void LoadInputData()
Load simulation data from the input file.
virtual void CopyData(const TBaseFloatMatrix &src)
Copy data from other matrix with the same size.
float Get_dy() const
Get dy value.
Definition: Parameters.h:102
size_t Get_nonlinear_flag() const
Get nonlinear_flag value.
Definition: Parameters.h:151
TFFTWComplexMatrix & Get_FFT_Z_temp()
Get the FFT_Z_temp from the container.
The class for real matrices.
Definition: RealMatrix.h:48
virtual void PrintParametersOfSimulation(FILE *file)
Print parameters of the simulation.
void Sum_Subterms_nonlinear(TRealMatrix &Absorb_tau_temp, TRealMatrix &Absorb_eta_temp, TRealMatrix &BonA_temp)
Sum sub-terms to calculate new pressure, non-linear case.
void RecomputeIndicesToCPP()
Recompute indices MATALAB->C++.
virtual ~TKSpaceFirstOrder3DSolver()
Destructor.
float & Get_absorb_eta_scalar()
Get absorb_eta_scalar value.
Definition: Parameters.h:180
size_t Y
Y dimension size.
TRealMatrix & Get_dt_rho0_sgx()
Get the dt.*rho0_sgx matrix from the container.
float & Get_rho0_sgy_scalar()
Get rho0_sgy_scalar value.
Definition: Parameters.h:196
float & Get_rho0_scalar()
Get rho0_scalar value.
Definition: Parameters.h:192
size_t Get_uz_source_flag() const
Get uz_source_flag value.
Definition: Parameters.h:131
void Compute_FFT_1DZ_R2C(TRealMatrix &InMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the Z dimension.
void SetActualCreationTime()
Set creation time.
Definition: HDF5_File.cpp:1565
double GetCumulatedPostProcessingTime() const
Get post-processing time cumulated over all legs.
static string GetCurrentHDF5_MinorVersion()
Get string version of current Minor version.
Definition: HDF5_File.h:839
size_t Get_Nt() const
Get Nt value.
Definition: Parameters.h:89
const char *const sensor_mask_corners_Name
sensor_mask_corners variable name
Definition: MatrixNames.h:156
void CheckpointStreams()
Checkpoint streams.
bool CheckMajorFileVersion()
Check major file version.
Definition: HDF5_File.h:863
bool CheckMinorFileVersion()
Check minor file version.
Definition: HDF5_File.h:872
void CheckOutputFile()
Check the output file has the correct format and version.
void WriteHeaderToCheckpointFile(THDF5_File &CheckpointFile)
Write header to the output file.
Definition: HDF5_File.cpp:1473
size_t Get_p_source_flag() const
Get p_source_flag value.
Definition: Parameters.h:138
size_t Get_transducer_source_flag() const
Get transducer_source_flag value.
Definition: Parameters.h:153
void Calculate_shifted_velocity()
Calculate ux_shifted, uy_shifted and uz_shifted.
virtual void WriteDataToHDF5File(THDF5_File &HDF5_File, const char *MatrixName, const size_t CompressionLevel)
Write data into the HDF5 file.
size_t GetCompressionLevel() const
Get compression level.
Definition: Parameters.h:208
bool Get_alpha_coeff_scallar_flag() const
Get alpha_coeff_scallar_flag value.
Definition: Parameters.h:170
float & Get_alpha_coeff_scallar()
Get alpha_coeff_scallar value.
Definition: Parameters.h:172
TTimeMeasure PreProcessingTime
Pre-processing time of the simulation.
void Create_FFT_Plan_3D_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real.
string GetCodeName()
Get code name.
The header file containing the matrix container.
const char *const ux_final_Name
ux_final variable name
Definition: MatrixNames.h:302
double GetCumulatedSimulationTime() const
Get simulation time (time loop) cumulated over all legs.
TFFTWComplexMatrix & Get_FFT_Y_temp()
Get the FFT_Y_temp from the container.
void Stop()
Get stop timestamp.
Definition: TimeMeasure.h:106
TTimeMeasure IterationTime
Iteration time of the simulation.
void Create(const char *FileName, unsigned int Flags=H5F_ACC_TRUNC)
Create the file, overwrite if exist.
Definition: HDF5_File.cpp:103
void SetNumberOfCores()
Set number of cores.
Definition: HDF5_File.cpp:1686
TRealMatrix & Get_duzdz()
Get the duzdz matrix from the container.
void Compute_FFT_3D_R2C(TRealMatrix &InMatrix)
Compute 3D out-of-place Real-to-Complex FFT.
bool IsStore_u_final() const
Is –u_final specified at the command line.
Definition: Parameters.h:255
TRealMatrix & Get_Temp_2_RS3D()
Get the Temp_2_RS3D matrix from the container.
const char *const Parameters_ERR_FMT_IncorrectMinorHDF5FileVersion
Command line parameters error message.
float real
real part
Definition: ComplexMatrix.h:52
size_t Get_u_source_mode() const
Get u_source_mode value.
Definition: Parameters.h:135
virtual void Compute()
Compute the 3D kspace first order simulation.
void SetFileType(const THDF5_FileHeader::THDF5_FileType FileType)
Set file type.
Definition: HDF5_File.cpp:1532
TRealMatrix & Get_rhoz()
Get the rhoz matrix from the container.
TComplexMatrix & Get_ddx_k_shift_neg()
Get the ddx_k_shift_neg matrix from the container.
double GetCumulatedDataLoadTime() const
Get data load time cumulated over all legs.
void Compute_uy_sgy_normalize_scalar_uniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &pml)
Compute a new value of uy_sgy, scalar, uniform case.
void Generate_absorb_tau_absorb_eta_matrix()
Generate absorb_tau, absorb_eta for heterogenous media.
float & Get_absorb_tau_scalar()
Get absorb_tau_scalar value.
Definition: Parameters.h:182
void SetMemoryConsumption(size_t TotalMemory)
Set memory consumption.
Definition: HDF5_File.cpp:1613
void Sum_new_p_nonlinear_lossless()
Sum sub-terms for new p, linear lossless case.
void CreateStreams()
Create all streams - opens the datasets.
TComplexMatrix & Get_ddy_k_shift_neg()
Get the ddy_k_shift_neg matrix from the container.
bool Get_rho0_scalar_flag() const
Get rho0_scalar_flag value.
Definition: Parameters.h:190
virtual void AllocateMemory()
Memory allocation.
void SaveCheckpointData()
Save checkpoint data.
double GetCumulatedPreProcessingTime() const
Get pre-processing time cumulated over all legs.
TComplexMatrix & Get_ddx_k_shift_pos()
Get the ddx_k_shift_pos matrix from the container.
Tuxyz_sgxyzMatrix & Get_ux_shifted()
Get the ux_shifted matrix from the container.
float Get_alpha_power() const
Get alpha_power value.
Definition: Parameters.h:109
size_t Get_absorbing_flag() const
Get absorbing_flag value.
Definition: Parameters.h:149
void AddMatricesIntoContainer()
Set all matrices recored - populate the container.
void LoadDataFromCheckpointHDF5File(THDF5_File &HDF5_File)
Load all matrices from the HDF5 file.
TRealMatrix & Get_pml_x_sgx()
Get the pml_x_sgx matrix from the container.
size_t Get_nonuniform_grid_flag() const
Get nonuniform_grid_flag value.
Definition: Parameters.h:147
string GetOutputFileName() const
Get output file name.
Definition: Parameters.h:203
TRealMatrix & Get_rhox()
Get the rhox matrix from the container.
const char *const Nz_Name
Nz variable name.
Definition: MatrixNames.h:68
The class for complex matrices.
Definition: ComplexMatrix.h:64
const char *const sensor_mask_index_Name
sensor_mask_index variable name
Definition: MatrixNames.h:152
void SetCodeName(const string &CodeName)
Set code name.
Definition: HDF5_File.h:816
size_t ActPercent
Percentage of the simulation done.
virtual void ScalarDividedBy(const float scalar)
Divide scalar/ matrix_element[i].
double GetElapsedTime() const
Get elapsed time.
Definition: TimeMeasure.h:122
void Sum_new_p_linear_lossless()
Sum sub-terms for new p, linear lossless case.
void CloseStreams()
Close all streams.
void Increment_t_index()
Increment simulation time step.
Definition: Parameters.h:95
TComplexMatrix & Get_y_shift_neg_r()
Get the y_shift_neg_r from the container.
void Create_FFT_Plan_1DX_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real in the X dimension.
TRealMatrix & Get_rhoy()
Get the rhoy matrix from the container.
float & Get_rho0_sgx_scalar()
Get rho0_sgx_scalar value.
Definition: Parameters.h:194
void SetCumulatedElapsedTimeOverPreviousLegs(const double ElapsedTime)
Set elapsed time in previous legs of the simulation.
Definition: TimeMeasure.h:151
THDF5_File HDF5_CheckpointFile
Handle to the checkpoint HDF5 file.
Definition: Parameters.h:266
bool IsCopySensorMask() const
is –copy_mask set?
Definition: Parameters.h:258
void CreateAllObjects()
Create instances of all objects in the container.
void Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_z(const float dt_rho_0_sgz, const TRealMatrix &dzudzn_sgz, TFFTWComplexMatrix &FFT)
Compute dt./rho0_sgx .* ifft (FFT), if rho0_sgx is scalar, non uniform grid, z component.
TRealMatrix & Get_absorb_eta()
Get the absorb_eta matrix from the container.
bool IsCheckpointEnabled() const
Is checkpoint enabled.
Definition: Parameters.h:218
Class wrapping the HDF5 routines.
Definition: HDF5_File.h:506
Structure with 4D dimension sizes (3 in space and 1 in time).
void Compute_ux_sgx_normalize_scalar_uniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &pml)
Compute a new value of ux_sgx, scalar, uniform case.
size_t GetCheckpointInterval() const
Get checkpoint interval.
Definition: Parameters.h:220
void GetExecutionTimes(double &TotalTime, double &LoadTime, double &PreProcessingTime, double &SimulationTime, double &PostprocessingTime)
Get execution times stored in the output file header.
Definition: HDF5_File.cpp:1668
void Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_y(const float dt_rho_0_sgy, const TRealMatrix &dyudyn_sgy, TFFTWComplexMatrix &FFT)
Compute dt./rho0_sgx .* ifft (FFT), if rho0_sgx is scalar, non uniform grid, y component.
float & Get_rho0_sgz_scalar()
Get rho0_sgz_scalar value.
Definition: Parameters.h:198
void PostProcessStreams()
Post-process all streams and flush them to the file.
void Compute_ux_sgx_normalize(const TRealMatrix &FFT_p, const TRealMatrix &dt_rho0, const TRealMatrix &pml)
Compute a new value of ux_sgx, default case.
The header file containing the class that implements 3D FFT using the FFTW interface.
static TParameters * GetInstance()
Get instance of the singleton class.
Definition: Parameters.cpp:74
TRealMatrix & Get_uy_source_input()
Get the uy_source_input matrix from the container.
void PreProcessingPhase()
Compute pre-processing phase.