35 #include <sys/resource.h>
41 #define _USE_MATH_DEFINES
45 #pragma comment(lib, "Psapi.lib")
57 #include <immintrin.h>
85 MatrixContainer(), OutputStreamContainer(),
88 TotalTime(), PreProcessingTime(), DataLoadTime (), SimulationTime(),
89 PostProcessingTime(), IterationTime()
95 H5Eset_auto( H5E_DEFAULT, NULL, NULL);
153 HDF5_InputFile.
Close();
160 if (RecoverFromPrevState)
178 HDF5_CheckpointFile.
Close();
221 fprintf(stdout,
"FFT plans creation.........."); fflush(stdout);
225 fprintf(stdout,
"Done \n");
226 fprintf(stdout,
"Pre-processing phase........"); fflush(stdout);
231 fprintf(stdout,
"Done \n");
247 fprintf(stdout,
"-------------------------------------------------------------\n");
248 fprintf(stdout,
".............. Interrupted to checkpoint! ...................\n");
251 fprintf(stdout,
"-------------------------------------------------------------\n");
252 fprintf(stdout,
"Checkpoint in progress......"); fflush(stdout);
258 fprintf(stdout,
"-------------------------------------------------------------\n");
260 fprintf(stdout,
"-------------------------------------------------------------\n");
261 fprintf(stdout,
"Post-processing phase......."); fflush(stdout);
275 fprintf(stdout,
"Done \n");
293 fprintf(file,
"Domain dims: [%4ld, %4ld,%4ld]\n",
313 struct rusage mem_usage;
314 getrusage(RUSAGE_SELF, &mem_usage);
316 return mem_usage.ru_maxrss >> 10;
322 PROCESS_MEMORY_COUNTERS pmc;
324 hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ,
326 GetCurrentProcessId());
328 GetProcessMemoryInfo(hProcess, &pmc,
sizeof(pmc));
329 CloseHandle(hProcess);
331 return pmc.PeakWorkingSetSize >> 20;
344 fprintf(file,
"+----------------------------------------------------+\n");
345 fprintf(file,
"| Build name: kspaceFirstOrder3D v2.15 |\n");
346 fprintf(file,
"| Build date: %*.*s |\n", 10,11,__DATE__);
347 fprintf(file,
"| Build time: %*.*s |\n", 8,8,__TIME__);
348 #if (defined (__KWAVE_GIT_HASH__))
349 fprintf(file,
"| Git hash: %s |\n",__KWAVE_GIT_HASH__);
351 fprintf(file,
"| |\n");
355 fprintf(file,
"| Operating system: Linux x64 |\n");
358 fprintf(file,
"| Operating system: Windows x64 |\n");
362 #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
363 fprintf(file,
"| Compiler name: GNU C++ %.19s |\n", __VERSION__);
365 #ifdef __INTEL_COMPILER
366 fprintf(file,
"| Compiler name: Intel C++ %d |\n", __INTEL_COMPILER);
370 #if (defined (__AVX2__))
371 fprintf(file,
"| Instruction set: Intel AVX 2 |\n");
372 #elif (defined (__AVX__))
373 fprintf(file,
"| Instruction set: Intel AVX |\n");
374 #elif (defined (__SSE4_2__))
375 fprintf(file,
"| Instruction set: Intel SSE 4.2 |\n");
376 #elif (defined (__SSE4_1__))
377 fprintf(file,
"| Instruction set: Intel SSE 4.1 |\n");
378 #elif (defined (__SSE3__))
379 fprintf(file,
"| Instruction set: Intel SSE 3 |\n");
380 #elif (defined (__SSE2__))
381 fprintf(file,
"| Instruction set: Intel SSE 2 |\n");
384 fprintf(file,
"| |\n");
385 fprintf(file,
"| Copyright (C) 2014 Jiri Jaros and Bradley Treeby |\n");
386 fprintf(file,
"| http://www.k-wave.org |\n");
387 fprintf(file,
"+----------------------------------------------------+\n");
400 #if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__) || defined(__INTEL_COMPILER))
401 setenv(
"OMP_PROC_BIND",
"TRUE",1);
404 #ifdef __INTEL_COMPILER
405 setenv(
"KMP_AFFINITY",
"none",1);
411 _putenv_s(
"KMP_AFFINITY",
"none");
429 fftwf_init_threads();
438 if (RecoverFromPrevState)
579 #pragma omp for schedule (static)
580 for (
size_t z = 0; z < Z_Size; z++)
582 const float z_f = (float) z;
583 float z_part = 0.5f - fabs(0.5f - z_f * Nz_rec );
584 z_part = (z_part * z_part) * dz_sq_rec;
586 for (
size_t y = 0; y < Y_Size; y++)
588 const float y_f = (float) y;
589 float y_part = 0.5f - fabs(0.5f - y_f * Ny_rec);
590 y_part = (y_part * y_part) * dy_sq_rec;
592 const float yz_part = z_part + y_part;
593 for (
size_t x = 0; x < X_Size; x++)
595 const float x_f = (float) x;
596 float x_part = 0.5f - fabs(0.5f - x_f * Nx_rec);
597 x_part = (x_part * x_part) * dx_sq_rec;
599 float k = c_ref_dt_pi * sqrtf(x_part + yz_part);
602 kappa[(z*Y_Size + y) * X_Size + x ] = (k == 0.0f) ? 1.0f : sin(k)/k;
623 const float pi_2 = float(M_PI) * 2.0f;
629 const float Nx_rec = 1.0f / (float) Nx;
630 const float Ny_rec = 1.0f / (float) Ny;
631 const float Nz_rec = 1.0f / (float) Nz;
642 #pragma omp for schedule (static)
643 for (
size_t z = 0; z < Z_Size; z++)
645 const float z_f = (float) z;
646 float z_part = 0.5f - fabs(0.5f - z_f * Nz_rec );
647 z_part = (z_part * z_part) * dz_sq_rec;
649 for (
size_t y = 0; y < Y_Size; y++)
651 const float y_f = (float) y;
652 float y_part = 0.5f - fabs(0.5f - y_f * Ny_rec);
653 y_part = (y_part * y_part) * dy_sq_rec;
655 const float yz_part = z_part + y_part;
657 size_t i = (z*Y_Size + y) * X_Size;
659 for (
size_t x = 0; x < X_Size; x++)
661 const float x_f = (float) x;
663 float x_part = 0.5f - fabs(0.5f - x_f * Nx_rec);
664 x_part = (x_part * x_part) * dx_sq_rec;
667 float k = pi_2 * sqrt(x_part + yz_part);
668 float c_ref_k = c_ref_dt_2 * k;
670 absorb_nabla1[i] = pow(k, alpha_power - 2);
671 absorb_nabla2[i] = pow(k, alpha_power - 1);
673 kappa[i] = (c_ref_k == 0.0f) ? 1.0f : sin(c_ref_k)/c_ref_k;
675 if (absorb_nabla1[i] == std::numeric_limits<float>::infinity()) absorb_nabla1[i] = 0.0f;
676 if (absorb_nabla2[i] == std::numeric_limits<float>::infinity()) absorb_nabla2[i] = 0.0f;
695 const float tan_pi_y_2 = tan(
float(M_PI_2)* alpha_power);
696 const float alpha_db_neper_coeff = (100.0f * pow(1.0e-6f / (2.0f * (
float) M_PI), alpha_power)) /
697 (20.0f * (
float) M_LOG10E);
743 const float tan_pi_y_2 = tan(
float(M_PI_2)* alpha_power);
747 const float alpha_db_neper_coeff = (100.0f * pow(1.0e-6f / (2.0f * (
float) M_PI), alpha_power)) /
748 (20.0f * (
float) M_LOG10E);
750 #pragma omp for schedule (static)
751 for (
size_t z = 0; z < Z_Size; z++)
753 for (
size_t y = 0; y < Y_Size; y++)
755 size_t i = (z*Y_Size + y) * X_Size;
756 for (
size_t x = 0; x < X_Size; x++)
758 const float alpha_coeff_2 = 2.0f * alpha_coeff[i * alpha_shift] * alpha_db_neper_coeff;
759 absorb_tau[i] = (-alpha_coeff_2) * pow(c0[i * c0_shift],alpha_power-1);
760 absorb_eta[i] = alpha_coeff_2 * pow(c0[i * c0_shift],alpha_power) * tan_pi_y_2;
792 const size_t SliceSize = (X_Size * Y_Size );
794 #pragma omp for schedule (static)
795 for (
size_t z = 0; z < Z_Size; z++)
797 register size_t i = z * SliceSize;
798 for (
size_t y = 0; y < Y_Size; y++)
800 for (
size_t x = 0; x < X_Size; x++)
802 dt_rho0_sgx[i] = (dt * duxdxn_sgx[x]) / dt_rho0_sgx[i];
808 #pragma omp for schedule (static)
809 for (
size_t z = 0; z < Z_Size; z++)
811 register size_t i = z * SliceSize;
812 for (
size_t y = 0; y < Y_Size; y++)
814 const float duydyn_el = duydyn_sgy[y];
815 for (
size_t x = 0; x < X_Size; x++)
817 dt_rho0_sgy[i] = (dt * duydyn_el) / dt_rho0_sgy[i];
823 #pragma omp for schedule (static)
824 for (
size_t z = 0; z < Z_Size; z++)
826 register size_t i = z* SliceSize;
827 const float duzdzn_el = duzdzn_sgz[z];
828 for (
size_t y = 0; y < Y_Size; y++)
830 for (
size_t x = 0; x < X_Size; x++)
832 dt_rho0_sgz[i] = (dt * duzdzn_el) / dt_rho0_sgz[i];
872 #pragma omp parallel for schedule (static) private(tmp)
875 tmp = p0[i] / (3.0f* c2[i * c2_shift]);
937 #pragma omp parallel for schedule (static)
940 c2[i] = c2[i] * c2[i];
980 const float * kappa_data = kappa.
GetRawData();
989 const size_t SliceSize = (X_Size * Y_Size ) << 1;
991 #pragma omp for schedule (static)
992 for (
size_t z = 0; z < Z_Size; z++)
994 register size_t i = z * SliceSize;
995 const size_t z2 = z<<1;
996 for (
size_t y = 0; y < Y_Size; y++)
998 const size_t y2 = y<<1;
999 for (
size_t x = 0; x < X_Size; x++)
1002 const float kappa_el = kappa_data[i>>1];
1003 const float p_k_el_re = p_k_x_data[i] * kappa_el;
1004 const float p_k_el_im = p_k_x_data[i+1] * kappa_el;
1005 const size_t x2 = x<<1;
1008 p_k_x_data[i] = p_k_el_re * ddx_data[x2] - p_k_el_im * ddx_data[x2+1];
1009 p_k_x_data[i+1] = p_k_el_re * ddx_data[x2+1] + p_k_el_im * ddx_data[x2];
1012 p_k_y_data[i] = p_k_el_re * ddy_data[y2] - p_k_el_im * ddy_data[y2+1];
1013 p_k_y_data[i+1] = p_k_el_re * ddy_data[y2+1] + p_k_el_im * ddy_data[y2];
1016 p_k_z_data[i] = p_k_el_re * ddz_data[z2] - p_k_el_im * ddz_data[z2+1];
1017 p_k_z_data[i+1] = p_k_el_re * ddz_data[z2+1] + p_k_el_im * ddz_data[z2];
1038 #pragma omp parallel
1050 const size_t SliceSize = (FFT_X_dim * FFT_Y_dim) << 1;
1058 #pragma omp for schedule (static)
1059 for (
size_t z = 0; z < FFT_Z_dim; z++)
1061 register size_t i = z * SliceSize;
1063 const float ddz_neg_re = ddz[z].
real;
1064 const float ddz_neg_im = ddz[z].
imag;
1065 for (
size_t y = 0; y < FFT_Y_dim; y++)
1067 const float ddy_neg_re = ddy[y].
real;
1068 const float ddy_neg_im = ddy[y].
imag;
1069 for (
size_t x = 0; x < FFT_X_dim; x++)
1071 float FFT_el_x_re = Temp_FFT_X_Data[i];
1072 float FFT_el_x_im = Temp_FFT_X_Data[i+1];
1074 float FFT_el_y_re = Temp_FFT_Y_Data[i];
1075 float FFT_el_y_im = Temp_FFT_Y_Data[i+1];
1077 float FFT_el_z_re = Temp_FFT_Z_Data[i];
1078 float FFT_el_z_im = Temp_FFT_Z_Data[i+1];
1080 const float kappa_el = kappa[i >> 1];
1082 FFT_el_x_re *= kappa_el;
1083 FFT_el_x_im *= kappa_el;
1085 FFT_el_y_re *= kappa_el;
1086 FFT_el_y_im *= kappa_el;
1088 FFT_el_z_re *= kappa_el;
1089 FFT_el_z_im *= kappa_el;
1092 Temp_FFT_X_Data[i] = ((FFT_el_x_re * ddx[x].
real) -
1093 (FFT_el_x_im * ddx[x].imag)
1095 Temp_FFT_X_Data[i + 1] = ((FFT_el_x_im * ddx[x].
real) +
1096 (FFT_el_x_re * ddx[x].imag)
1099 Temp_FFT_Y_Data[i] = ((FFT_el_y_re * ddy_neg_re) -
1100 (FFT_el_y_im * ddy_neg_im)
1102 Temp_FFT_Y_Data[i + 1] = ((FFT_el_y_im * ddy_neg_re) +
1103 (FFT_el_y_re * ddy_neg_im)
1106 Temp_FFT_Z_Data[i] = ((FFT_el_z_re * ddz_neg_re) -
1107 (FFT_el_z_im * ddz_neg_im)
1109 Temp_FFT_Z_Data[i + 1] = ((FFT_el_z_im * ddz_neg_re) +
1110 (FFT_el_z_re * ddz_neg_im)
1128 #pragma omp parallel
1142 const size_t SliceSize = (X_Size * Y_Size );
1144 #pragma omp for schedule (static)
1145 for (
size_t z = 0; z < Z_Size; z++)
1147 register size_t i = z* SliceSize;
1148 for (
size_t y = 0; y < Y_Size; y++)
1150 for (
size_t x = 0; x < X_Size; x++)
1152 duxdx[i] *= duxdxn[x];
1158 #pragma omp for schedule (static)
1159 for (
size_t z = 0; z < Z_Size; z++)
1161 register size_t i = z * SliceSize;
1162 for (
size_t y = 0; y < Y_Size; y++)
1164 const float dyudyn_el = duydyn[y];
1165 for (
size_t x = 0; x < X_Size; x++)
1167 duydy[i] *= dyudyn_el;
1173 #pragma omp for schedule (static)
1174 for (
size_t z = 0; z < Z_Size; z++)
1176 const float duzdzn_el = duzdzn[z];
1177 register size_t i = z * SliceSize;
1178 for (
size_t y = 0; y < Y_Size; y++)
1180 for (
size_t x = 0; x < X_Size; x++)
1182 duzdz[i] *= duzdzn_el;
1205 const size_t SliceSize = Y_Size * X_Size;
1207 #pragma omp parallel
1223 #pragma omp for schedule (static)
1224 for (
size_t z = 0; z < Z_Size; z++)
1226 register size_t i = z * SliceSize;
1227 for (
size_t y = 0; y < Y_Size; y++)
1229 for (
size_t x = 0; x < X_Size; x++)
1231 const float pml_x = pml_x_data[x];
1232 float du = duxdx_data[i];
1234 rhox_data[i] = pml_x * (
1235 ((pml_x * rhox_data[i]) - (dt_rho0 * du))/
1243 #pragma omp for schedule (static)
1244 for (
size_t z = 0; z < Z_Size; z++)
1246 register size_t i = z * SliceSize;
1247 for (
size_t y = 0; y < Y_Size; y++)
1250 for (
size_t x = 0; x < X_Size; x++)
1252 float du = duydy_data[i];
1254 rhoy_data[i] = pml_y * (
1255 ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/
1264 #pragma omp for schedule (static)
1265 for (
size_t z = 0; z < Z_Size; z++)
1267 register size_t i = z * SliceSize;
1269 for (
size_t y = 0; y < Y_Size; y++)
1271 for (
size_t x = 0; x < X_Size; x++)
1273 float du = duzdz_data[i];
1275 rhoz_data[i] = pml_z * (
1276 ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/
1289 #pragma omp for schedule (static)
1290 for (
size_t z = 0; z < Z_Size; z++)
1292 register size_t i = z * SliceSize;
1293 for (
size_t y = 0; y < Y_Size; y++)
1295 for (
size_t x = 0; x < X_Size; x++)
1297 const float pml_x = pml_x_data[x];
1298 const float dt_rho0 = dt_el * rho0_data[i];
1299 float du = duxdx_data[i];
1301 rhox_data[i] = pml_x * (
1302 ((pml_x * rhox_data[i]) - (dt_rho0 * du))/
1310 #pragma omp for schedule (static)
1311 for (
size_t z = 0; z < Z_Size; z++)
1313 register size_t i = z * SliceSize;
1314 for (
size_t y = 0; y < Y_Size; y++)
1317 for (
size_t x = 0; x < X_Size; x++)
1319 const float dt_rho0 = dt_el * rho0_data[i];
1320 float du = duydy_data[i];
1322 rhoy_data[i] = pml_y * (
1323 ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/
1331 #pragma omp for schedule (static)
1332 for (
size_t z = 0; z < Z_Size; z++)
1334 register size_t i = z * SliceSize;
1336 for (
size_t y = 0; y < Y_Size; y++)
1338 for (
size_t x = 0; x < X_Size; x++)
1340 const float dt_rho0 = dt_el * rho0_data[i];
1341 float du = duzdz_data[i];
1343 rhoz_data[i] = pml_z * (
1344 ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/
1368 const size_t SliceSize = Y_Size * X_Size;
1370 #pragma omp parallel
1386 #pragma omp for schedule (static)
1387 for (
size_t z = 0; z < Z_Size; z++)
1389 register size_t i = z * SliceSize;
1390 for (
size_t y = 0; y < Y_Size; y++)
1392 for (
size_t x = 0; x < X_Size; x++)
1394 const float pml_x = pml_x_data[x];
1396 rhox_data[i] = pml_x * (
1397 ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i]))
1404 #pragma omp for schedule (static)
1405 for (
size_t z = 0; z < Z_Size; z++)
1407 register size_t i = z * SliceSize;
1408 for (
size_t y = 0; y < Y_Size; y++)
1411 for (
size_t x = 0; x < X_Size; x++)
1413 rhoy_data[i] = pml_y * (
1414 ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i]))
1421 #pragma omp for schedule (static)
1422 for (
size_t z = 0; z < Z_Size; z++)
1424 register size_t i = z * SliceSize;
1427 for (
size_t y = 0; y < Y_Size; y++)
1429 for (
size_t x = 0; x < X_Size; x++)
1431 rhoz_data[i] = pml_z * (
1432 ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i]))
1445 #pragma omp for schedule (static)
1446 for (
size_t z = 0; z < Z_Size; z++)
1448 register size_t i = z * SliceSize;
1449 for (
size_t y = 0; y < Y_Size; y++)
1451 for (
size_t x = 0; x < X_Size; x++)
1453 const float pml_x = pml_x_data[x];
1454 const float dt_rho0 = dt_el * rho0_data[i];
1456 rhox_data[i] = pml_x * (
1457 ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i]))
1465 #pragma omp for schedule (static)
1466 for (
size_t z = 0; z < Z_Size; z++)
1468 register size_t i = z * SliceSize;
1469 for (
size_t y = 0; y < Y_Size; y++)
1472 for (
size_t x = 0; x < X_Size; x++)
1474 const float dt_rho0 = dt_el * rho0_data[i];
1476 rhoy_data[i] = pml_y * (
1477 ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i]))
1486 #pragma omp for schedule (static)
1487 for (
size_t z = 0; z < Z_Size; z++)
1489 register size_t i = z * SliceSize;
1492 for (
size_t y = 0; y < Y_Size; y++)
1494 for (
size_t x = 0; x < X_Size; x++)
1496 const float dt_rho0 = dt_el * rho0_data[i];
1498 rhoz_data[i] = pml_z * (
1499 ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i]))
1569 #pragma omp parallel
1571 float * RHO_Temp_Data = RHO_Temp.
GetRawData();
1572 float * BonA_Temp_Data = BonA_Temp.
GetRawData();
1573 float * SumDU_Temp_Data= Sum_du.
GetRawData();
1576 const __m128 Two_SSE = _mm_set1_ps(2.0f);
1581 #pragma omp for schedule (static) nowait
1582 for (
size_t i = 0; i < TotalElementCount_4; i+=4)
1584 if (!BonA_flag) BonA_SSE = _mm_load_ps(&BonA[i]);
1586 __m128 xmm1 = _mm_load_ps(&rhox_data[i]);
1587 __m128 xmm2 = _mm_load_ps(&rhoy_data[i]);
1588 __m128 xmm3 = _mm_load_ps(&rhoz_data[i]);
1590 if (!rho0_flag) rho0_SSE = _mm_load_ps(&rho0_data[i]);
1592 __m128 rho_xyz_sq_SSE;
1593 __m128 rho_xyz_el_SSE;
1596 rho_xyz_el_SSE = _mm_add_ps(xmm1, xmm2);
1597 rho_xyz_el_SSE = _mm_add_ps(xmm3, rho_xyz_el_SSE);
1600 _mm_stream_ps(&RHO_Temp_Data[i], rho_xyz_el_SSE);
1603 rho_xyz_sq_SSE = _mm_mul_ps(rho_xyz_el_SSE, rho_xyz_el_SSE);
1605 xmm1 = _mm_mul_ps(rho_xyz_sq_SSE, BonA_SSE);
1606 xmm2 = _mm_mul_ps(Two_SSE, rho0_SSE);
1607 xmm3 = _mm_div_ps(xmm1, xmm2);
1609 xmm1 = _mm_add_ps(xmm3, rho_xyz_el_SSE);
1611 _mm_stream_ps(&BonA_Temp_Data[i], xmm1);
1613 xmm1 = _mm_load_ps(&dux_data[i]);
1614 xmm2 = _mm_load_ps(&duy_data[i]);
1615 xmm3 = _mm_load_ps(&duz_data[i]);
1617 __m128 xmm_acc = _mm_add_ps(xmm1, xmm2);
1618 xmm_acc = _mm_add_ps(xmm_acc, xmm3);
1619 xmm_acc = _mm_mul_ps(xmm_acc, rho0_SSE);
1621 _mm_stream_ps(&SumDU_Temp_Data[i],xmm_acc);
1628 if (omp_get_thread_num() == omp_get_num_threads() -1)
1633 register const float rho_xyz_el = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
1635 RHO_Temp_Data[i] = rho_xyz_el;
1636 BonA_Temp_Data[i] = ((BonA[i * BonA_shift] * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i* rho0_shift])) + rho_xyz_el;
1637 SumDU_Temp_Data[i] = rho0_data[i * rho0_shift] * (dux_data[i] + duy_data[i] + duz_data[i]);
1657 #pragma omp parallel
1667 const float * rho0_data = NULL;
1676 float * Sum_rhoxyz_Data = Sum_rhoxyz.
GetRawData();
1677 float * Sum_rho0_du_Data = Sum_rho0_du.
GetRawData();
1679 #pragma omp for schedule (static)
1680 for (
size_t i = 0; i < TotalElementCount; i++)
1682 Sum_rhoxyz_Data[i] = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
1688 #pragma omp for schedule (static) nowait
1689 for (
size_t i = 0; i < TotalElementCount; i++)
1691 Sum_rho0_du_Data[i] = rho0_data_el * (dux_data[i] + duy_data[i] + duz_data[i]);
1696 #pragma omp for schedule (static) nowait
1697 for (
size_t i = 0; i < TotalElementCount; i++)
1699 Sum_rho0_du_Data[i] = rho0_data[i] * (dux_data[i] + duy_data[i] + duz_data[i]);
1724 #pragma omp parallel
1729 #pragma omp for schedule (static) nowait
1730 for (
size_t i = 0; i < TotalElementCount_SSE; i+=2)
1732 __m128 xmm_nabla1 = _mm_set_ps(nabla1[i+1], nabla1[i+1], nabla1[i], nabla1[i]);
1733 __m128 xmm_FFT_1 = _mm_load_ps(&FFT_1_data[2*i]);
1735 xmm_FFT_1 = _mm_mul_ps(xmm_nabla1, xmm_FFT_1);
1736 _mm_store_ps(&FFT_1_data[2*i], xmm_FFT_1);
1739 #pragma omp for schedule (static)
1740 for (
size_t i = 0; i < TotalElementCount; i+=2)
1742 __m128 xmm_nabla2 = _mm_set_ps(nabla2[i+1], nabla2[i+1], nabla2[i], nabla2[i]);
1743 __m128 xmm_FFT_2 = _mm_load_ps(&FFT_2_data[2*i]);
1745 xmm_FFT_2 = _mm_mul_ps(xmm_nabla2, xmm_FFT_2);
1746 _mm_store_ps(&FFT_2_data[2*i], xmm_FFT_2);
1751 if (omp_get_thread_num() == omp_get_num_threads() -1)
1754 for (
size_t i = TotalElementCount_SSE; i < TotalElementCount ; i++)
1756 FFT_1_data[(i<<1)] *= nabla1[i];
1757 FFT_1_data[(i<<1)+1] *= nabla1[i];
1759 FFT_2_data[(i<<1)] *= nabla2[i];
1760 FFT_2_data[(i<<1)+1] *= nabla2[i];
1784 size_t tau_eta_shift;
1786 const float * Absorb_tau_data = Absorb_tau_temp.
GetRawData();
1787 const float * Absorb_eta_data = Absorb_eta_temp.
GetRawData();
1790 const float Divider = 1.0f / (float) TotalElementCount;
1818 #pragma omp parallel
1820 const float * BonA_data = BonA_temp.
GetRawData();
1823 #pragma omp for schedule (static)
1824 for (
size_t i = 0; i < TotalElementCount; i++)
1826 p_data[i] = (c2_data[i * c2_shift]) *(
1828 (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) -
1829 (Absorb_eta_data[i] * eta_data[i * tau_eta_shift])
1849 const float * tau_data = NULL;
1850 const float * eta_data = NULL;
1851 const float * c2_data = NULL;
1853 size_t c2_shift = 0;
1854 size_t tau_eta_shift = 0;
1856 const float * Absorb_tau_data = Absorb_tau_temp.
GetRawData();
1857 const float * Absorb_eta_data = Absorb_eta_temp.
GetRawData();
1860 const float Divider = 1.0f / (float) TotalElementCount;
1888 #pragma omp parallel
1890 const float * Sum_rhoxyz_Data = Sum_rhoxyz.
GetRawData();
1893 #pragma omp for schedule (static)
1894 for (
size_t i = 0; i < TotalElementCount; i++)
1896 p_data[i] = (c2_data[i * c2_shift]) *
1897 (Sum_rhoxyz_Data[i] +
1898 (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) -
1899 (Absorb_eta_data[i] * eta_data[i * tau_eta_shift])
1915 #pragma omp parallel
1967 #pragma omp for schedule (static)
1968 for (
size_t i = 0; i < TotalElementCount; i++)
1970 const float sum_rho = rhox_data[i] + rhoy_data[i] + rhoz_data[i];
1972 p_data[i] = c2_data[i * c2_shift] *
1974 (BonA_data[i * BonA_shift] * (sum_rho * sum_rho) /
1975 (2.0f * rho0_data[i * rho0_shift]))
1989 #pragma omp parallel
2001 #pragma omp for schedule (static)
2002 for (
size_t i = 0; i < TotalElementCount; i++)
2004 p_data[i] = c2_element * (rhox[i] + rhoy[i] + rhoz[i]);
2011 #pragma omp for schedule (static)
2012 for (
size_t i = 0; i < TotalElementCount; i++)
2014 p_data[i] = (c2_data[i]) * (rhox[i] + rhoy[i] + rhoz[i]);
2114 #pragma omp parallel
2215 size_t index2D = t_index;
2227 rhox[p_source_index[i]] = p_source_input[index2D];
2228 rhoy[p_source_index[i]] = p_source_input[index2D];
2229 rhoz[p_source_index[i]] = p_source_input[index2D];
2240 rhox[p_source_index[i]] += p_source_input[index2D];
2241 rhoy[p_source_index[i]] += p_source_input[index2D];
2242 rhoz[p_source_index[i]] += p_source_input[index2D];
2270 XShiftDims.
X = XShiftDims.
X/2 + 1;
2273 YShiftDims.
Y = YShiftDims.
Y/2 + 1;
2276 ZShiftDims.
Z = ZShiftDims.
Z/2 + 1;
2286 #pragma omp parallel for schedule (static)
2287 for (
size_t z = 0; z < XShiftDims.
Z; z++)
2289 register size_t i = z * XShiftDims.
Y * XShiftDims.
X;
2290 for (
size_t y = 0; y < XShiftDims.
Y; y++)
2292 for (
size_t x = 0; x < XShiftDims.
X; x++)
2296 Temp.
real = ((FFT_shift_temp[i].
real * x_shift_neg_r[x].
real) -
2297 (FFT_shift_temp[i].imag * x_shift_neg_r[x].imag)
2301 Temp.
imag = ((FFT_shift_temp[i].
imag * x_shift_neg_r[x].
real) +
2302 (FFT_shift_temp[i].real * x_shift_neg_r[x].imag)
2305 FFT_shift_temp[i] = Temp;
2317 #pragma omp parallel for schedule (static)
2318 for (
size_t z = 0; z < YShiftDims.
Z; z++)
2320 register size_t i = z * YShiftDims.
Y * YShiftDims.
X;
2321 for (
size_t y = 0; y < YShiftDims.
Y; y++)
2323 for (
size_t x = 0; x < YShiftDims.
X; x++)
2327 Temp.
real = ((FFT_shift_temp[i].
real * y_shift_neg_r[y].
real) -
2328 (FFT_shift_temp[i].imag * y_shift_neg_r[y].imag)) *
2332 Temp.
imag = ((FFT_shift_temp[i].
imag * y_shift_neg_r[y].
real) +
2333 (FFT_shift_temp[i].real * y_shift_neg_r[y].imag)
2336 FFT_shift_temp[i] = Temp;
2347 #pragma omp parallel for schedule (static)
2348 for (
size_t z = 0; z < ZShiftDims.
Z; z++)
2350 register size_t i = z * ZShiftDims.
Y * ZShiftDims.
X;
2351 for (
size_t y = 0; y < ZShiftDims.
Y; y++)
2353 for (
size_t x = 0; x < ZShiftDims.
X; x++)
2357 Temp.
real = ((FFT_shift_temp[i].
real * z_shift_neg_r[z].
real) -
2358 (FFT_shift_temp[i].imag * z_shift_neg_r[z].imag)) *
2362 Temp.
imag = ((FFT_shift_temp[i].
imag * z_shift_neg_r[z].
real) +
2363 (FFT_shift_temp[i].real * z_shift_neg_r[z].imag)
2366 FFT_shift_temp[i] = Temp;
2463 const double ToGo = ((ElTimeWithLegs / (float) (t_index + 1)) * Nt) - ElTimeWithLegs;
2469 current = localtime(&now);
2471 fprintf(stdout,
"%5li%c %9.3fs %9.3fs %02i/%02i/%02i %02i:%02i:%02i\n",
2472 (
size_t) ((t_index) / (Nt * 0.01f)),
'%',
2474 current->tm_mday, current->tm_mon+1, current->tm_year-100,
2475 current->tm_hour, current->tm_min, current->tm_sec
2488 fprintf(stdout,
"-------------------------------------------------------------\n");
2489 fprintf(stdout,
"....................... Simulation ..........................\n");
2490 fprintf(stdout,
"Progress...ElapsedTime........TimeToGo......TimeOfTermination\n");
2585 if (HDF5_CheckpointFile.
IsOpened()) HDF5_CheckpointFile.
Close();
2614 CheckpointFileHeader.
SetFileType(THDF5_FileHeader::hdf5_ft_checkpoint);
2621 HDF5_CheckpointFile.
Close();
2653 HDF5_FileHeader.
SetFileType(THDF5_FileHeader::hdf5_ft_output);
2679 double ElapsedTotalTime, ElapsedDataLoadTime, ElapsedPreProcessingTime,
2680 ElapsedSimulationTime, ElapsedPostProcessingTime;
2684 ElapsedDataLoadTime,
2685 ElapsedPreProcessingTime,
2686 ElapsedSimulationTime,
2687 ElapsedPostProcessingTime);
2710 if (OutputFileHeader.
GetFileType() != THDF5_FileHeader::hdf5_ft_output)
2712 char ErrorMessage[256] =
"";
2713 sprintf(ErrorMessage,
2716 throw ios::failure(ErrorMessage);
2722 char ErrorMessage[256] =
"";
2723 sprintf(ErrorMessage,
2727 throw ios::failure(ErrorMessage);
2733 char ErrorMessage[256] =
"";
2734 sprintf(ErrorMessage,
2738 throw ios::failure(ErrorMessage);
2758 char ErrorMessage[256] =
"";
2759 sprintf(ErrorMessage,
2768 throw ios::failure(ErrorMessage);
2788 if (CheckpointFileHeader.
GetFileType() != THDF5_FileHeader::hdf5_ft_checkpoint)
2790 char ErrorMessage[256] =
"";
2791 sprintf(ErrorMessage,
2794 throw ios::failure(ErrorMessage);
2800 char ErrorMessage[256] =
"";
2801 sprintf(ErrorMessage,
2805 throw ios::failure(ErrorMessage);
2811 char ErrorMessage[256] =
"";
2812 sprintf(ErrorMessage,
2816 throw ios::failure(ErrorMessage);
2824 CheckpointDimSizes.
X);
2828 CheckpointDimSizes.
Y);
2832 CheckpointDimSizes.
Z);
2836 char ErrorMessage[256] =
"";
2837 sprintf(ErrorMessage,
2839 CheckpointDimSizes.
X,
2840 CheckpointDimSizes.
Y,
2841 CheckpointDimSizes.
Z,
2846 throw ios::failure(ErrorMessage);
size_t Z
Z dimension size.
void PrintFullNameCodeAndLicense(FILE *file)
Print the code name and license.
TRealMatrix & Get_absorb_nabla1()
Get the absorb_nabla1 matrix from the container.
TDimensionSizes GetReducedDimensionSizes() const
Reduced dimension sizes of the simulation (complex classes).
float & Get_c0_scalar()
Get c0_scalar value.
TRealMatrix & Get_dt_rho0_sgy()
Get the dt.*rho0_sgy matrix from the container.
void Compute_dt_rho_sg_mul_ifft_div_2(const TRealMatrix &dt_rho_0_sgx, TFFTWComplexMatrix &FFT)
Compute dt ./ rho0_sgx .* ifft (FFT).
virtual float * GetRawData()
Get raw data out of the class (for direct kernel access).
float Get_dz() const
Get dz value.
static void ExportWisdom()
Export wisdom to the file.
TTimeMeasure TotalTime
Total time of the simulation.
void AddStreamsIntoContainer(TMatrixContainer &MatrixContainer)
Create all streams in container (no file manipulation).
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_CheckpointDimensionsDoNotMatch
KSpaceFirstOrder3DSolver error message.
TFFTWComplexMatrix & Get_FFT_X_temp()
Get the FFT_X_temp from the container.
void Compute_rhoxyz_linear()
Compute new values of rhox, rhoy, rhoz for linear case.
void InitializeFFTWPlans()
Initialize FFT plans.
void Compute_uz_sgz_normalize(const TRealMatrix &FFT_p, const TRealMatrix &dt_rho0, const TRealMatrix &pml)
Compute a new value for uz_sgz, default case.
void StoreDataIntoCheckpointHDF5File(THDF5_File &HDF5_File)
Store selected matrices into the checkpoint file.
TRealMatrix & Get_ux_source_input()
Get the ux_source_input matrix from the container.
const char *const Parameters_ERR_FMT_IncorrectMajorHDF5FileVersion
Command line parameters error message.
TMatrixContainer MatrixContainer
Matrix container with all the matrix classes.
void Compute_new_p_nonlinear()
Calculate new p, non-linear case.
void SaveScalarsToHDF5File(THDF5_File &HDF5_OutputFile)
Save scalars into the output HDF5 file.
size_t GetStartTimeIndex() const
Get start time index for sensor recording.
TRealMatrix & Get_Temp_3_RS3D()
Get the Temp_3_RS3D matrix from the container.
TRealMatrix & Get_absorb_tau()
Get the absorb_tau matrix from the container.
TComplexMatrix & Get_x_shift_neg_r()
Get the x_shift_neg_r matrix from the container.
void Compute_FFT_1DY_R2C(TRealMatrix &InMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the Y dimension.
void Calculate_SumRho_SumRhoDu(TRealMatrix &Sum_rhoxyz, TRealMatrix &Sum_rho0_du)
Calculate two temporary sums in the new pressure formula, linear absorbing case.
THDF5_File HDF5_OutputFile
Handle to the output HDF5 file.
float Get_c_ref() const
Get c_ref value.
void Compute_uxyz()
compute new values of for ux_sgx, uy_sgy, uz_sgz.
void Compute_ux_sgx_normalize_scalar_nonuniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &dxudxn_sgx, const TRealMatrix &pml)
Compute a new value of ux_sgx, scalar, non-uniform case.
TRealMatrix & Get_duydy()
Get the duydy matrix from the container.
void PrintOtputHeader()
Print the header of the progress statistics.
size_t GetNumberOfThreads() const
Get number of threads.
TRealMatrix & Get_pml_x()
Get the pml_x matrix from the container.
size_t GetVerboseInterval() const
Get verbose interval.
const char *const uy_final_Name
uy_final variable name
float & Get_BonA_scalar()
Get BonA_scalar value.
bool IsCheckpointInterruption() const
Was the loop interrupted to checkpoint?
bool IsTimeToCheckpoint()
Is time to checkpoint (save actual state on disk)?
virtual size_t GetTotalElementCount() const
Get element count of the matrix.
TRealMatrix & Get_dzudzn_sgz()
Get the dzudzn_sgz matrix from the container.
void Create_FFT_Plan_1DZ_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real in the Z dimension.
void Compute_rhoxyz_nonlinear()
Compute new values of rhox, rhoy, rhoz for non-linear case.
const char *const p_final_Name
p_final variable name
void WriteScalarValue(const hid_t ParentGroup, const char *DatasetName, const float Value)
Write the scalar value under a specified group - float value.
void SetProcessorAffinity()
Set processor affinity.
virtual void WriteDataToHDF5File(THDF5_File &HDF5_File, const char *MatrixName, const size_t CompressionLevel)
Write data into the HDF5 file.
TComplexMatrix & Get_ddz_k_shift_neg()
Get the ddz_k_shift_neg matrix from the container.
TRealMatrix & Get_kappa()
Get the kappa matrix from the container.
TTimeMeasure SimulationTime
Simulation time of the simulation.
virtual void FreeMemory()
Memory deallocation.
TTimeMeasure PostProcessingTime
Post-processing time of the simulation.
const char *const uz_final_Name
uz_final variable name
TRealMatrix & Get_pml_z()
Get the pml_z matrix from the container.
TComplexMatrix & Get_ddz_k_shift_pos()
Get the ddz_k_shift_pos matrix from the container.
void Add_p_source()
Add in pressure source.
TRealMatrix & Get_dxudxn_sgx()
Get the dxudxn_sgx matrix from the container.
size_t Get_ux_source_flag() const
Get ux_source_flag value.
void Compute_FFT_1DX_C2R(TRealMatrix &OutMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the X dimension.
void Create_FFT_Plan_1DY_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex in the Y dimension.
void Compute_uy_sgy_normalize(const TRealMatrix &FFT_p, const TRealMatrix &dt_rho0, const TRealMatrix &pml)
Compute a new value of uy_sgy, default case.
TRealMatrix & Get_pml_z_sgz()
Get the pml_z_sgz matrix from the container.
TOutputStreamContainer OutputStreamContainer
Output stream container.
Tuxyz_sgxyzMatrix & Get_ux_sgx()
Get the ux_sgx matrix from the container.
TSenosrMaskType Get_sensor_mask_type() const
Get sensor mask type (linear or corners).
void PrintStatisitcs()
Print progress statistics.
TParameters * Parameters
Global parameters of the simulation.
void Create_FFT_Plan_3D_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex.
void FreeAllStreams()
Free all streams - destroy them.
TRealMatrix & Get_c2()
Get the c^2 matrix from the container.
void Set_t_index(const size_t new_t_index)
Set simulation time step – should be used only when recovering from checkpoint.
void Compute_FFT_1DZ_C2R(TRealMatrix &OutMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the Z dimension.
Tuxyz_sgxyzMatrix & Get_uy_sgy()
Get the uy_sgy matrix from the container.
THDF5_FileHeader HDF5_FileHeader
Handle to file header.
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_IncorrectCheckpointFileFormat
KSpaceFirstOrder3DSolver error message.
const char *const Ny_Name
Ny variable name.
Tuxyz_sgxyzMatrix & Get_uy_shifted()
Get the uy_shifted matrix from the container.
void Add_u_source(const TRealMatrix &u_source_input, const TIndexMatrix &u_source_index, const size_t t_index, const size_t u_source_mode, const size_t u_source_many)
Add in velocity source terms.
static bool IsAccessible(const char *FileName)
Does the file exist? static method.
void Compute_uz_sgz_normalize_scalar_uniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &pml)
Compute a new value for uz_sgz, scalar, uniform case.
bool IsOpened() const
Is the file opened?
void ReadScalarValue(const hid_t ParentGroup, const char *DatasetName, float &Value)
Read the scalar value under a specified group - float value.
virtual size_t GetTotalElementCount() const
Get total element count of the matrix.
TRealMatrix & Get_dyudyn_sgy()
Get the dyudyn_sgy matrix from the container.
TKSpaceFirstOrder3DSolver()
Constructor.
TRealMatrix & Get_p_source_input()
Get the p_source_input matrix from the container.
void Compute_duxyz()
Compute new values of for duxdx, duydy, dzdz.
string GetCheckpointFileName() const
Get checkpoint filename.
virtual size_t * GetRawData()
Get raw data out of the class (for direct kernel access).
bool Get_c0_scalar_flag() const
Get c0_scalar_flag value.
void Compute_c2()
Calculate c^2.
void Add_u_source()
Add u source to the particle velocity.
virtual size_t ShowMemoryUsageInMB()
Get memory usage in MB.
void Calculate_SumRho_BonA_SumDu_SSE2(TRealMatrix &RHO_Temp, TRealMatrix &BonA_Temp, TRealMatrix &Sum_du)
Calculate three temporary sums in the new pressure formula, non-linear absorbing case, SSE2 version.
TRealMatrix & Get_transducer_source_input()
Get the transducer_source_input matrix from the container.
size_t X
X dimension size.
size_t Get_u_source_many() const
Get u_source_many value.
void PostPorcessing()
Post processing, and closing the output streams.
bool Get_BonA_scalar_flag() const
Get BonA_scalar_flag value.
TComplexMatrix & Get_z_shift_neg_r()
Get the y_shift_neg_r from the container.
Structure for complex values.
The header file containing the main class of the project responsible for the entire simulation...
THDF5_File HDF5_InputFile
Handle to the input HDF5 file.
void CheckCheckpointFile()
Check the checkpoint file has the correct format and version.
void RecomputeIndicesToMatlab()
Recompute indices C++ -> MATLAB.
Tuxyz_sgxyzMatrix & Get_uz_sgz()
Get the uz_sgz matrix from the container.
void Create_FFT_Plan_1DX_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex in the X dimension.
TRealMatrix & Get_p0_source_input()
Get the p0_source_input from the container.
void AddTransducerSource(const TIndexMatrix &u_source_index, TIndexMatrix &delay_mask, const TRealMatrix &transducer_signal)
Add transducer data source to X component.
TRealMatrix & Get_p()
Get the p matrix from the container.
TComplexMatrix & Get_ddy_k_shift_pos()
Get the ddy_k_shift_pos matrix from the container.
TTimeMeasure DataLoadTime
Data load time of the simulation.
TRealMatrix & Get_BonA()
Get the BonA matrix from the container.
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_IncorrectOutputFileFormat
KSpaceFirstOrder3DSolver error message.
TRealMatrix & Get_dxudxn()
Get the dxudxn matrix from the container.
void Generate_kappa()
Generate kappa matrix for non-absorbing media.
Class implementing 3D Real-To-Complex and Complex-To-Real transforms using FFTW interface.
double GetCumulatedElapsedTimeOverPreviousLegs() const
Get time spent in previous legs.
virtual TDimensionSizes GetDimensionSizes() const
Get dimension sizes of the matrix.
void Sum_Subterms_linear(TRealMatrix &Absorb_tau_temp, TRealMatrix &Absorb_eta_temp, TRealMatrix &Sum_rhoxyz)
Sum sub-terms to calculate new pressure, linear case.
void Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_x(const float dt_rho_0_sgx, const TRealMatrix &dxudxn_sgx, TFFTWComplexMatrix &FFT)
Compute dt ./ rho0_sgx .* ifft (FFT), if rho0_sgx is scalar, non uniform grid, x component.
const char *const Nx_Name
Nx variable name.
void Compute_new_p_linear()
Calculate new p linear-case, absorbing.
TRealMatrix & Get_dzudzn()
Get the dzudzn matrix from the container.
void Compute_Absorb_nabla1_2_SSE2(TFFTWComplexMatrix &FFT_1, TFFTWComplexMatrix &FFT_2)
Compute absorbing term with abosrb_nabla1 and absorb_nabla2, SSE2 version.
size_t Get_t_index() const
Get simulation time step.
void Compute_MainLoop()
Compute the main time loop of the kspaceFirstOrder3D.
void Create_FFT_Plan_1DZ_R2C(TRealMatrix &InMatrix)
Create FFTW plan for Real-to-Complex in the Z dimension.
void FreeAllMatrices()
Free all matrices - destroy them.
void Calculate_p0_source()
Calculate p0_source.
TRealMatrix & Get_absorb_nabla2()
Get the absorb_nabla2 matrix from the container.
TDimensionSizes GetFullDimensionSizes() const
Full dimension sizes of the simulation (real classes).
void Compute_FFT_3D_C2R(TRealMatrix &OutMatrix)
Compute 3D out-of-place Complex-to-Real FFT.
void Compute_FFT_1DX_R2C(TRealMatrix &InMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the X dimension.
const char *const KSpaceFirstOrder3DSolver_ERR_FMT_OutputDimensionsDoNotMatch
KSpaceFirstOrder3DSolver error message.
void StoreSensorData()
Store sensor data.
static void DeleteStoredWisdom()
Destroy wisdom in the file (delete it).
TRealMatrix & Get_rho0()
Get the rho0 matrix from the container.
void WriteOutputDataInfo()
Write statistics and header into the output file.
void RestoreCumulatedElapsedFromOutputFile()
Reads the header of the output file and sets the cumulative elapsed time from the first log...
TRealMatrix & Get_uz_source_input()
Get the uz_source_input matrix from the container.
Tuxyz_sgxyzMatrix & Get_uz_shifted()
Get the uz_shifted matrix from the container.
TIndexMatrix & Get_sensor_mask_index()
Get the sensor_mask_index matrix from the container.
void Start()
Get start timestamp.
bool IsStore_u_non_staggered_raw() const
Is –u_non_staggered_raw set?
TFFTWComplexMatrix & Get_FFT_shift_temp()
Get the FFT_shift_temp the container.
void Create_FFT_Plan_1DY_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real in the Y dimension.
TIndexMatrix & Get_delay_mask()
Get the delay_mask matrix from the container.
size_t Get_uy_source_flag() const
Get uy_source_flag value.
size_t Get_p_source_mode() const
Get p_source_mode value.
void Compute_ddx_kappa_fft_p(TRealMatrix &X_Matrix, TFFTWComplexMatrix &FFT_X, TFFTWComplexMatrix &FFT_Y, TFFTWComplexMatrix &FFT_Z, const TRealMatrix &kappa, const TComplexMatrix &ddx, const TComplexMatrix &ddy, const TComplexMatrix &ddz)
Compute part of the new velocity - gradient of p.
TRealMatrix & Get_dt_rho0_sgz()
Get the dt.*rho0_sgz matrix from the container.
void Compute_FFT_1DY_C2R(TRealMatrix &OutMatrix)
Compute 1D out-of-place Complex-to-Real FFT in the Y dimension.
hid_t GetRootGroup() const
Get handle to the root group.
TIndexMatrix & Get_p_source_index()
Get the p_source_index matrix from the container.
void Generate_kappa_absorb_nabla1_absorb_nabla2()
Generate kappa matrix, absorb_nabla1, absorb_nabla2 for absorbing media.
void SampleStreams()
Sample all streams.
void LoadDataFromInputHDF5File(THDF5_File &HDF5_File)
Load all matrices from the HDF5 file.
bool IsStore_p_final() const
Is –p_final specified at the command line?
TRealMatrix & Get_dyudyn()
Get the dyudyn matrix from the container.
TRealMatrix & Get_pml_y_sgy()
Get the pml_y_sgy matrix from the container.
TRealMatrix & Get_pml_y()
Get the pml_y matrix from the container.
TIndexMatrix & Get_u_source_index()
Get the u_source_index matrix from the container.
double GetCumulatedTotalTime() const
Get total simulation time cumulated over all legs.
The header file containing all error messages of the project.
void Calculate_dt_rho0_non_uniform()
Calculate dt ./ rho0 for non-uniform grids.
TIndexMatrix & Get_sensor_mask_corners()
Get the sensor_mask_corners matrix from the container.
size_t Get_p0_source_flag() const
Get p0_source_flag value.
size_t Get_p_source_many() const
Get p_source_many value.
static void ImportWisdom()
Import wisdom from the file.
void ReopenStreams()
Reopen streams after checkpoint file (datasets).
void Compute_uy_sgy_normalize_scalar_nonuniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &dyudyn_sgy, const TRealMatrix &pml)
Compute a new value of uy_sgy, scalar, non-uniform case.
void Open(const char *FileName, unsigned int Flags=H5F_ACC_RDONLY)
Open the file.
TRealMatrix & Get_duxdx()
Get the duxdx matrix from the container.
size_t GetElementCount() const
Get element count, in 3D only spatial domain, in 4D with time.
float Get_dt() const
Get dt value.
float Get_dx() const
Get dx value.
TRealMatrix & Get_Temp_1_RS3D()
Get the Temp_1_RS3D matrix from the container.
const char *const t_index_Name
t_index name
void Compute_uz_sgz_normalize_scalar_nonuniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &dzudzn_sgz, const TRealMatrix &pml)
Compute a new value for uz_sgz, scalar, non-uniform case.
virtual void LoadInputData()
Load simulation data from the input file.
virtual void CopyData(const TBaseFloatMatrix &src)
Copy data from other matrix with the same size.
float Get_dy() const
Get dy value.
size_t Get_nonlinear_flag() const
Get nonlinear_flag value.
TFFTWComplexMatrix & Get_FFT_Z_temp()
Get the FFT_Z_temp from the container.
The class for real matrices.
virtual void PrintParametersOfSimulation(FILE *file)
Print parameters of the simulation.
void Sum_Subterms_nonlinear(TRealMatrix &Absorb_tau_temp, TRealMatrix &Absorb_eta_temp, TRealMatrix &BonA_temp)
Sum sub-terms to calculate new pressure, non-linear case.
void RecomputeIndicesToCPP()
Recompute indices MATALAB->C++.
virtual ~TKSpaceFirstOrder3DSolver()
Destructor.
float & Get_absorb_eta_scalar()
Get absorb_eta_scalar value.
size_t Y
Y dimension size.
TRealMatrix & Get_dt_rho0_sgx()
Get the dt.*rho0_sgx matrix from the container.
float & Get_rho0_sgy_scalar()
Get rho0_sgy_scalar value.
float & Get_rho0_scalar()
Get rho0_scalar value.
size_t Get_uz_source_flag() const
Get uz_source_flag value.
void Compute_FFT_1DZ_R2C(TRealMatrix &InMatrix)
Compute 1D out-of-place Real-to-Complex FFT in the Z dimension.
double GetCumulatedPostProcessingTime() const
Get post-processing time cumulated over all legs.
size_t Get_Nt() const
Get Nt value.
const char *const sensor_mask_corners_Name
sensor_mask_corners variable name
void CheckpointStreams()
Checkpoint streams.
void CheckOutputFile()
Check the output file has the correct format and version.
size_t Get_p_source_flag() const
Get p_source_flag value.
size_t Get_transducer_source_flag() const
Get transducer_source_flag value.
void Calculate_shifted_velocity()
Calculate ux_shifted, uy_shifted and uz_shifted.
virtual void WriteDataToHDF5File(THDF5_File &HDF5_File, const char *MatrixName, const size_t CompressionLevel)
Write data into the HDF5 file.
size_t GetCompressionLevel() const
Get compression level.
bool Get_alpha_coeff_scallar_flag() const
Get alpha_coeff_scallar_flag value.
float & Get_alpha_coeff_scallar()
Get alpha_coeff_scallar value.
TTimeMeasure PreProcessingTime
Pre-processing time of the simulation.
void Create_FFT_Plan_3D_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real.
string GetCodeName()
Get code name.
The header file containing the matrix container.
const char *const ux_final_Name
ux_final variable name
double GetCumulatedSimulationTime() const
Get simulation time (time loop) cumulated over all legs.
TFFTWComplexMatrix & Get_FFT_Y_temp()
Get the FFT_Y_temp from the container.
void Stop()
Get stop timestamp.
TTimeMeasure IterationTime
Iteration time of the simulation.
void Create(const char *FileName, unsigned int Flags=H5F_ACC_TRUNC)
Create the file, overwrite if exist.
TRealMatrix & Get_duzdz()
Get the duzdz matrix from the container.
void Compute_FFT_3D_R2C(TRealMatrix &InMatrix)
Compute 3D out-of-place Real-to-Complex FFT.
bool IsStore_u_final() const
Is –u_final specified at the command line.
TRealMatrix & Get_Temp_2_RS3D()
Get the Temp_2_RS3D matrix from the container.
const char *const Parameters_ERR_FMT_IncorrectMinorHDF5FileVersion
Command line parameters error message.
size_t Get_u_source_mode() const
Get u_source_mode value.
virtual void Compute()
Compute the 3D kspace first order simulation.
TRealMatrix & Get_rhoz()
Get the rhoz matrix from the container.
TComplexMatrix & Get_ddx_k_shift_neg()
Get the ddx_k_shift_neg matrix from the container.
double GetCumulatedDataLoadTime() const
Get data load time cumulated over all legs.
void Compute_uy_sgy_normalize_scalar_uniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &pml)
Compute a new value of uy_sgy, scalar, uniform case.
void Generate_absorb_tau_absorb_eta_matrix()
Generate absorb_tau, absorb_eta for heterogenous media.
float & Get_absorb_tau_scalar()
Get absorb_tau_scalar value.
void Sum_new_p_nonlinear_lossless()
Sum sub-terms for new p, linear lossless case.
void CreateStreams()
Create all streams - opens the datasets.
TComplexMatrix & Get_ddy_k_shift_neg()
Get the ddy_k_shift_neg matrix from the container.
bool Get_rho0_scalar_flag() const
Get rho0_scalar_flag value.
virtual void AllocateMemory()
Memory allocation.
void SaveCheckpointData()
Save checkpoint data.
double GetCumulatedPreProcessingTime() const
Get pre-processing time cumulated over all legs.
TComplexMatrix & Get_ddx_k_shift_pos()
Get the ddx_k_shift_pos matrix from the container.
Tuxyz_sgxyzMatrix & Get_ux_shifted()
Get the ux_shifted matrix from the container.
float Get_alpha_power() const
Get alpha_power value.
size_t Get_absorbing_flag() const
Get absorbing_flag value.
void AddMatricesIntoContainer()
Set all matrices recored - populate the container.
void LoadDataFromCheckpointHDF5File(THDF5_File &HDF5_File)
Load all matrices from the HDF5 file.
TRealMatrix & Get_pml_x_sgx()
Get the pml_x_sgx matrix from the container.
size_t Get_nonuniform_grid_flag() const
Get nonuniform_grid_flag value.
string GetOutputFileName() const
Get output file name.
TRealMatrix & Get_rhox()
Get the rhox matrix from the container.
const char *const Nz_Name
Nz variable name.
The class for complex matrices.
const char *const sensor_mask_index_Name
sensor_mask_index variable name
size_t ActPercent
Percentage of the simulation done.
virtual void ScalarDividedBy(const float scalar)
Divide scalar/ matrix_element[i].
double GetElapsedTime() const
Get elapsed time.
void Sum_new_p_linear_lossless()
Sum sub-terms for new p, linear lossless case.
void CloseStreams()
Close all streams.
void Increment_t_index()
Increment simulation time step.
TComplexMatrix & Get_y_shift_neg_r()
Get the y_shift_neg_r from the container.
void Create_FFT_Plan_1DX_C2R(TRealMatrix &OutMatrix)
Create FFTW plan for Complex-to-Real in the X dimension.
TRealMatrix & Get_rhoy()
Get the rhoy matrix from the container.
float & Get_rho0_sgx_scalar()
Get rho0_sgx_scalar value.
void SetCumulatedElapsedTimeOverPreviousLegs(const double ElapsedTime)
Set elapsed time in previous legs of the simulation.
THDF5_File HDF5_CheckpointFile
Handle to the checkpoint HDF5 file.
bool IsCopySensorMask() const
is –copy_mask set?
void CreateAllObjects()
Create instances of all objects in the container.
void Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_z(const float dt_rho_0_sgz, const TRealMatrix &dzudzn_sgz, TFFTWComplexMatrix &FFT)
Compute dt./rho0_sgx .* ifft (FFT), if rho0_sgx is scalar, non uniform grid, z component.
TRealMatrix & Get_absorb_eta()
Get the absorb_eta matrix from the container.
bool IsCheckpointEnabled() const
Is checkpoint enabled.
Class wrapping the HDF5 routines.
Structure with 4D dimension sizes (3 in space and 1 in time).
void Compute_ux_sgx_normalize_scalar_uniform(const TRealMatrix &FFT_p, const float dt_rho0, const TRealMatrix &pml)
Compute a new value of ux_sgx, scalar, uniform case.
size_t GetCheckpointInterval() const
Get checkpoint interval.
void Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_y(const float dt_rho_0_sgy, const TRealMatrix &dyudyn_sgy, TFFTWComplexMatrix &FFT)
Compute dt./rho0_sgx .* ifft (FFT), if rho0_sgx is scalar, non uniform grid, y component.
float & Get_rho0_sgz_scalar()
Get rho0_sgz_scalar value.
void PostProcessStreams()
Post-process all streams and flush them to the file.
void Compute_ux_sgx_normalize(const TRealMatrix &FFT_p, const TRealMatrix &dt_rho0, const TRealMatrix &pml)
Compute a new value of ux_sgx, default case.
The header file containing the class that implements 3D FFT using the FFTW interface.
static TParameters * GetInstance()
Get instance of the singleton class.
TRealMatrix & Get_uy_source_input()
Get the uy_source_input matrix from the container.
void PreProcessingPhase()
Compute pre-processing phase.