![]() |
kspaceFirstOrder3D-OMP 1.0
The C++ implementation of the k-wave toolbox for the time-domain simulation of acoustic wave fields in 3D
|
00001 00033 #include <iostream> 00034 #include <omp.h> 00035 #include <sys/resource.h> 00036 #include <emmintrin.h> 00037 #include <math.h> 00038 #include <time.h> 00039 #include <sstream> 00040 00041 #include <KSpaceSolver/KSpaceFirstOrder3DSolver.h> 00042 00043 #include <Utils/ErrorMessages.h> 00044 00045 #include <MatrixClasses/FFTWComplexMatrix.h> 00046 #include <MatrixClasses/MatrixContainer.h> 00047 00048 using namespace std; 00049 00050 //----------------------------------------------------------------------------// 00051 // Constants // 00052 //----------------------------------------------------------------------------// 00053 00054 00055 00056 00057 //----------------------------------------------------------------------------// 00058 // Public methods // 00059 //----------------------------------------------------------------------------// 00060 00061 00066 TKSpaceFirstOrder3DSolver::TKSpaceFirstOrder3DSolver(): 00067 p_sensor_raw_OutputStream (NULL), 00068 ux_sensor_raw_OutputStream(NULL), 00069 uy_sensor_raw_OutputStream(NULL), 00070 uz_sensor_raw_OutputStream(NULL), 00071 00072 MatrixContainer(), 00073 t_index(0), ActPercent(0), 00074 Parameters(NULL), 00075 TotalTime(), PreProcessingTime(), DataLoadTime (), SimulationTime(), 00076 PostProcessingTime(), IterationTime() 00077 00078 { 00079 TotalTime.Start(); 00080 00081 Parameters = TParameters::GetInstance(); 00082 00083 }// end of TKSpace3DSolver 00084 //------------------------------------------------------------------------------ 00085 00086 00091 TKSpaceFirstOrder3DSolver::~TKSpaceFirstOrder3DSolver(){ 00092 FreeMemory(); 00093 }// end of TKSpace3DSolver 00094 //------------------------------------------------------------------------------ 00095 00096 00101 void TKSpaceFirstOrder3DSolver::AllocateMemory(){ 00102 00103 00104 // create container, then all matrices 00105 MatrixContainer.AddMatricesIntoContainer(); 00106 MatrixContainer.CreateAllObjects(); 00107 00108 00109 //------------------------ Output matrices ---------------------------// 00110 00111 if (Parameters->IsStore_p_raw()) { 00112 p_sensor_raw_OutputStream = new TOutputHDF5Stream(); 00113 if (!p_sensor_raw_OutputStream) throw bad_alloc(); 00114 } 00115 00116 if (Parameters->IsStore_u_raw()){ 00117 00118 ux_sensor_raw_OutputStream = new TOutputHDF5Stream(); 00119 if (!ux_sensor_raw_OutputStream) throw bad_alloc(); 00120 00121 uy_sensor_raw_OutputStream = new TOutputHDF5Stream(); 00122 if (!uy_sensor_raw_OutputStream) throw bad_alloc(); 00123 00124 uz_sensor_raw_OutputStream = new TOutputHDF5Stream(); 00125 if (!uz_sensor_raw_OutputStream) throw bad_alloc(); 00126 }// GetSaveUxyz 00127 00128 00129 }// end of AllocateMemory 00130 //------------------------------------------------------------------------------ 00131 00136 void TKSpaceFirstOrder3DSolver::FreeMemory(){ 00137 00138 00139 MatrixContainer.FreeAllMatrices(); 00140 00141 //-- output matrices --// 00142 if (p_sensor_raw_OutputStream){ 00143 delete p_sensor_raw_OutputStream; 00144 p_sensor_raw_OutputStream = NULL; 00145 } 00146 00147 00148 if (ux_sensor_raw_OutputStream){ 00149 delete ux_sensor_raw_OutputStream; 00150 ux_sensor_raw_OutputStream = NULL; 00151 } 00152 00153 if (uy_sensor_raw_OutputStream){ 00154 delete uy_sensor_raw_OutputStream; 00155 uy_sensor_raw_OutputStream = NULL; 00156 } 00157 00158 if (uz_sensor_raw_OutputStream){ 00159 delete uz_sensor_raw_OutputStream; 00160 uz_sensor_raw_OutputStream = NULL; 00161 } 00162 00163 }// end of FreeMemory 00164 //------------------------------------------------------------------------------ 00165 00171 void TKSpaceFirstOrder3DSolver::LoadInputData(){ 00172 00173 DataLoadTime.Start(); 00174 00175 //-- open and load input file 00176 00177 THDF5_File& HDF5_InputFile = Parameters->HDF5_InputFile; // file is opened (in Parameters) 00178 THDF5_File& HDF5_OutputFile = Parameters->HDF5_OutputFile; 00179 00180 00181 MatrixContainer.LoadMatricesDataFromDisk(HDF5_InputFile); 00182 00183 HDF5_InputFile.Close(); 00184 00185 00186 00187 //--------------- outputs ------------// 00188 00189 HDF5_OutputFile.Create(Parameters->GetOutputFileName().c_str()); 00190 00191 00192 TDimensionSizes TotalSizes(Get_sensor_mask_ind().GetTotalElementCount(),Parameters->Get_Nt() - Parameters->GetStartTimeIndex(), 1); 00193 TDimensionSizes ChunkSizes(Get_sensor_mask_ind().GetTotalElementCount(),1, 1); 00194 00195 00196 if (Parameters->IsStore_p_raw()) { 00197 p_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile, 00198 MatrixContainer[p].HDF5MatrixName.c_str(), 00199 TotalSizes, ChunkSizes, Parameters->GetCompressionLevel()); 00200 } 00201 00202 00203 if (Parameters->IsStore_u_raw()) { 00204 00205 ux_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile,ux_Name,TotalSizes, ChunkSizes, Parameters->GetCompressionLevel()); 00206 uy_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile,uy_Name,TotalSizes, ChunkSizes, Parameters->GetCompressionLevel()); 00207 uz_sensor_raw_OutputStream->CreateStream(HDF5_OutputFile,uz_Name,TotalSizes, ChunkSizes, Parameters->GetCompressionLevel()); 00208 } 00209 00210 00211 00212 DataLoadTime.Stop(); 00213 00214 }// end of LoadInputData 00215 //------------------------------------------------------------------------------ 00216 00217 00224 void TKSpaceFirstOrder3DSolver::Compute(){ 00225 00226 PreProcessingTime.Start(); 00227 00228 fprintf(stdout,"FFT plans creation.........."); fflush(stdout); 00229 00230 InitializeFFTWPlans( ); 00231 00232 fprintf(stdout,"Done \n"); 00233 fprintf(stdout,"Pre-processing phase........"); fflush(stdout); 00234 00235 00236 PreProcessingPhase( ); 00237 00238 00239 fprintf(stdout,"Done \n"); 00240 fprintf(stdout,"Current memory in use:%8ldMB\n", ShowMemoryUsageInMB()); 00241 PreProcessingTime.Stop(); 00242 00243 fprintf(stdout,"Elapsed time: %8.2fs\n",PreProcessingTime.GetElapsedTime()); 00244 00245 00246 SimulationTime.Start(); 00247 00248 Compute_MainLoop(); 00249 00250 SimulationTime.Stop(); 00251 00252 fprintf(stdout,"-------------------------------------------------------------\n"); 00253 fprintf(stdout,"Elapsed time: %8.2fs\n",SimulationTime.GetElapsedTime()); 00254 fprintf(stdout,"-------------------------------------------------------------\n"); 00255 00256 fprintf(stdout,"Post-processing phase......."); fflush(stdout); 00257 PostProcessingTime.Start(); 00258 00259 00260 PostPorcessing(); 00261 PostProcessingTime.Stop(); 00262 fprintf(stdout,"Done \n"); 00263 fprintf(stdout,"Elapsed time: %8.2fs\n",PostProcessingTime.GetElapsedTime()); 00264 00265 00266 WriteOutputDataInfo(); 00267 00268 Parameters->HDF5_OutputFile.Close(); 00269 00270 00271 }// end of Compute() 00272 //------------------------------------------------------------------------------ 00273 00274 00275 00276 00281 void TKSpaceFirstOrder3DSolver::PrintParametersOfSimulation(FILE * file){ 00282 00283 00284 fprintf(file,"Domain dims: [%4ld, %4ld,%4ld]\n", 00285 Parameters->GetFullDimensionSizes().X, 00286 Parameters->GetFullDimensionSizes().Y, 00287 Parameters->GetFullDimensionSizes().Z); 00288 00289 fprintf(file,"Simulation time steps: %8ld\n", Parameters->Get_Nt()); 00290 }// end of PrintParametersOfTask 00291 //------------------------------------------------------------------------------ 00292 00293 00294 00300 size_t TKSpaceFirstOrder3DSolver::ShowMemoryUsageInMB(){ 00301 00302 struct rusage mem_usage; 00303 getrusage(RUSAGE_SELF, &mem_usage); 00304 00305 return mem_usage.ru_maxrss >> 10; 00306 00307 00308 }// end of ShowMemoryUsageInMB 00309 //------------------------------------------------------------------------------ 00310 00311 00318 void TKSpaceFirstOrder3DSolver::PrintFullNameCodeAndLicense(FILE * file){ 00319 fprintf(file,"\n"); 00320 fprintf(file,"+--------------------------------------------------+\n"); 00321 fprintf(file,"| Build Number: kspaceFirstOrder3D v2.13 |\n"); 00322 fprintf(file,"| Operating System: Linux x64 |\n"); 00323 fprintf(file,"| |\n"); 00324 fprintf(file,"| Copyright (C) 2012 Jiri Jaros and Bradley Treeby |\n"); 00325 fprintf(file,"| http://www.k-wave.org |\n"); 00326 fprintf(file,"+--------------------------------------------------+\n"); 00327 fprintf(file,"\n"); 00328 }// end of GetFullCodeAndLincence 00329 //------------------------------------------------------------------------------ 00330 00331 00332 00333 //----------------------------------------------------------------------------// 00334 // Protected methods // 00335 //----------------------------------------------------------------------------// 00336 00341 void TKSpaceFirstOrder3DSolver::InitializeFFTWPlans(){ 00342 00343 // initialization of FFTW library 00344 fftwf_init_threads(); 00345 fftwf_plan_with_nthreads(Parameters->GetNumberOfThreads()); 00346 00347 // create real to complex plans 00348 Get_FFT_X_temp().CreateFFTPlan3D_R2C(Get_p()); 00349 Get_FFT_Y_temp().CreateFFTPlan3D_R2C(Get_p()); 00350 Get_FFT_Z_temp().CreateFFTPlan3D_R2C(Get_p()); 00351 00352 // create real to complex plans 00353 Get_FFT_X_temp().CreateFFTPlan3D_C2R(Get_p()); 00354 Get_FFT_Y_temp().CreateFFTPlan3D_C2R(Get_p()); 00355 Get_FFT_Z_temp().CreateFFTPlan3D_C2R(Get_p()); 00356 }// end of PrepareData 00357 //------------------------------------------------------------------------------ 00358 00359 00360 00366 void TKSpaceFirstOrder3DSolver::PreProcessingPhase(){ 00367 00368 Get_sensor_mask_ind().RecomputeIndices(); 00369 00370 00371 if ((Parameters->Get_transducer_source_flag() != 0) || 00372 (Parameters->Get_ux_source_flag() != 0) || 00373 (Parameters->Get_uy_source_flag() != 0) || 00374 (Parameters->Get_uz_source_flag() != 0) ){ 00375 Get_u_source_index().RecomputeIndices(); 00376 } 00377 00378 if (Parameters->Get_transducer_source_flag() != 0){ 00379 Get_delay_mask().RecomputeIndices(); 00380 } 00381 00382 if (Parameters->Get_p_source_flag() != 0){ 00383 Get_p_source_index().RecomputeIndices(); 00384 } 00385 00386 00387 00388 // compute dt / rho0_sg... 00389 if (Parameters->Get_rho0_scalar_flag()){ // rho is scalar 00390 00391 Parameters->Get_rho0_sgx_scalar() = Parameters->Get_dt() / Parameters->Get_rho0_sgx_scalar(); 00392 Parameters->Get_rho0_sgy_scalar() = Parameters->Get_dt() / Parameters->Get_rho0_sgy_scalar(); 00393 Parameters->Get_rho0_sgz_scalar() = Parameters->Get_dt() / Parameters->Get_rho0_sgz_scalar(); 00394 00395 // non-uniform grid cannot be pre-calculated :-( 00396 00397 }else { // rho is matrix 00398 if (Parameters->Get_nonuniform_grid_flag()) { 00399 Caclucalte_dt_rho0_non_uniform(); 00400 00401 }else{ 00402 Get_dt_rho0_sgx().ScalarDividedBy(Parameters->Get_dt()); 00403 Get_dt_rho0_sgy().ScalarDividedBy(Parameters->Get_dt()); 00404 Get_dt_rho0_sgz().ScalarDividedBy(Parameters->Get_dt()); 00405 } 00406 } 00407 00408 00409 00410 // generate different matrices 00411 if (Parameters->Get_absorbing_flag() != 0) { 00412 Generate_kappa_absorb_nabla1_absorb_nabla2(); 00413 Generate_absorb_tau_absorb_eta_matrix(); 00414 } 00415 else { 00416 Generate_kappa(); 00417 } 00418 00419 // calculate c^2. It has to be after kappa gen... because of c modification 00420 Compute_c2(); 00421 00422 00423 }// end of ComputeInitPhase 00424 //------------------------------------------------------------------------------ 00425 00426 00432 void TKSpaceFirstOrder3DSolver::Generate_kappa(){ 00433 00434 #ifndef __NO_OMP__ 00435 #pragma omp parallel 00436 #endif 00437 { 00438 00439 const float dx_sq_rec = 1.0f / (Parameters->Get_dx()*Parameters->Get_dx()); 00440 const float dy_sq_rec = 1.0f / (Parameters->Get_dy()*Parameters->Get_dy()); 00441 const float dz_sq_rec = 1.0f / (Parameters->Get_dz()*Parameters->Get_dz()); 00442 00443 const float c_ref_dt_pi = Parameters->Get_c_ref() * Parameters->Get_dt() * float(M_PI); 00444 00445 const float Nx_rec = 1.0f / (float) Parameters->GetFullDimensionSizes().X; 00446 const float Ny_rec = 1.0f / (float) Parameters->GetFullDimensionSizes().Y; 00447 const float Nz_rec = 1.0f / (float) Parameters->GetFullDimensionSizes().Z; 00448 00449 00450 const size_t X_Size = Parameters->GetReducedDimensionSizes().X; 00451 const size_t Y_Size = Parameters->GetReducedDimensionSizes().Y; 00452 const size_t Z_Size = Parameters->GetReducedDimensionSizes().Z; 00453 00454 00455 float * kappa = Get_kappa().GetRawData(); 00456 00457 #ifndef __NO_OMP__ 00458 #pragma omp for schedule (static) 00459 #endif 00460 for (size_t z = 0; z < Z_Size; z++){ 00461 00462 const float z_f = (float) z; 00463 float z_part = 0.5f - fabsf(0.5f - z_f * Nz_rec ); 00464 z_part = (z_part * z_part) * dz_sq_rec; 00465 00466 00467 for (size_t y = 0; y < Y_Size; y++){ 00468 00469 const float y_f = (float) y; 00470 float y_part = 0.5f - fabsf(0.5f - y_f * Ny_rec); 00471 y_part = (y_part * y_part) * dy_sq_rec; 00472 00473 const float yz_part = z_part + y_part; 00474 for (size_t x = 0; x < X_Size; x++){ 00475 00476 const float x_f = (float) x; 00477 float x_part = 0.5f - fabsf(0.5f - x_f * Nx_rec); 00478 x_part = (x_part * x_part) * dx_sq_rec; 00479 00480 float k = c_ref_dt_pi * sqrtf(x_part + yz_part); 00481 00482 // kappa element 00483 kappa[(z*Y_Size + y) * X_Size + x ] = (k == 0.0f) ? 1.0f : sinf(k)/k; 00484 00485 }//x 00486 00487 }//y 00488 }// z 00489 00490 00491 00492 }// parallel 00493 00494 00495 }// end of GenerateKappa 00496 //------------------------------------------------------------------------------ 00497 00503 void TKSpaceFirstOrder3DSolver::Generate_kappa_absorb_nabla1_absorb_nabla2(){ 00504 00505 00506 #ifndef __NO_OMP__ 00507 #pragma omp parallel 00508 #endif 00509 { 00510 00511 const float dx_sq_rec = 1.0f / (Parameters->Get_dx()*Parameters->Get_dx()); 00512 const float dy_sq_rec = 1.0f / (Parameters->Get_dy()*Parameters->Get_dy()); 00513 const float dz_sq_rec = 1.0f / (Parameters->Get_dz()*Parameters->Get_dz()); 00514 00515 00516 const float c_ref_dt_2 = Parameters->Get_c_ref() * Parameters->Get_dt() * 0.5f; 00517 const float pi_2 = float(M_PI) * 2.0f; 00518 00519 const size_t Nx = Parameters->GetFullDimensionSizes().X; 00520 const size_t Ny = Parameters->GetFullDimensionSizes().Y; 00521 const size_t Nz = Parameters->GetFullDimensionSizes().Z; 00522 00523 const float Nx_rec = 1.0f / (float) Nx; 00524 const float Ny_rec = 1.0f / (float) Ny; 00525 const float Nz_rec = 1.0f / (float) Nz; 00526 00527 00528 00529 const size_t X_Size = Parameters->GetReducedDimensionSizes().X; 00530 const size_t Y_Size = Parameters->GetReducedDimensionSizes().Y; 00531 const size_t Z_Size = Parameters->GetReducedDimensionSizes().Z; 00532 00533 00534 00535 float * kappa = Get_kappa().GetRawData(); 00536 float * absorb_nabla1 = Get_absorb_nabla1().GetRawData(); 00537 float * absorb_nabla2 = Get_absorb_nabla2().GetRawData(); 00538 const float alpha_power = Parameters->Get_alpha_power(); 00539 00540 00541 #ifndef __NO_OMP__ 00542 #pragma omp for schedule (static) 00543 #endif 00544 for (size_t z = 0; z < Z_Size; z++){ 00545 00546 const float z_f = (float) z; 00547 float z_part = 0.5f - fabsf(0.5f - z_f * Nz_rec ); 00548 z_part = (z_part * z_part) * dz_sq_rec; 00549 00550 for (size_t y = 0; y < Y_Size; y++){ 00551 00552 const float y_f = (float) y; 00553 float y_part = 0.5f - fabsf(0.5f - y_f * Ny_rec); 00554 y_part = (y_part * y_part) * dy_sq_rec; 00555 00556 const float yz_part = z_part + y_part; 00557 00558 00559 size_t i = (z*Y_Size + y) * X_Size; 00560 00561 for (size_t x = 0; x < X_Size; x++){ 00562 const float x_f = (float) x; 00563 00564 float x_part = 0.5f - fabsf(0.5f - x_f * Nx_rec); 00565 x_part = (x_part * x_part) * dx_sq_rec; 00566 00567 00568 float k = pi_2 * sqrtf(x_part + yz_part); 00569 float c_ref_k = c_ref_dt_2 * k; 00570 00571 absorb_nabla1[i] = powf(k, alpha_power - 2); 00572 absorb_nabla2[i] = powf(k, alpha_power - 1); 00573 00574 kappa[i] = (c_ref_k == 0.0f) ? 1.0f : sinf(c_ref_k)/c_ref_k; 00575 00576 00577 00578 if (absorb_nabla1[i] == INFINITY) absorb_nabla1[i] = 0.0f; 00579 if (absorb_nabla2[i] == INFINITY) absorb_nabla2[i] = 0.0f; 00580 00581 i++; 00582 }//x 00583 00584 }//y 00585 }// z 00586 00587 00588 00589 }// parallel 00590 00591 }// end of Generate_kappa_absorb_nabla1_absorb_nabla2 00592 //------------------------------------------------------------------------------ 00593 00597 void TKSpaceFirstOrder3DSolver::Generate_absorb_tau_absorb_eta_matrix(){ 00598 00599 00600 // test for scalars 00601 if ((Parameters->Get_alpha_coeff_scallar_flag()) && (Parameters->Get_c0_scalar_flag())){ 00602 00603 const float alpha_power = Parameters->Get_alpha_power(); 00604 const float tan_pi_y_2 = tanf(float(M_PI_2)* alpha_power); 00605 const float alpha_db_neper_coeff = (100.0f * powf(1.0e-6f / (2.0f * (float) M_PI), alpha_power)) / 00606 (20.0f * (float) M_LOG10E); 00607 00608 const float alpha_coeff_2 = 2.0f * Parameters->Get_alpha_coeff_scallar() * alpha_db_neper_coeff; 00609 00610 Parameters->Get_absorb_tau_scalar() = (-alpha_coeff_2) * powf(Parameters->Get_c0_scalar(),alpha_power-1); 00611 Parameters->Get_absorb_eta_scalar() = alpha_coeff_2 * powf(Parameters->Get_c0_scalar(),alpha_power) * tan_pi_y_2; 00612 00613 } 00614 else { 00615 00616 #ifndef __NO_OMP__ 00617 #pragma omp parallel 00618 #endif 00619 { 00620 00621 00622 const size_t Z_Size = Parameters->GetFullDimensionSizes().Z; 00623 const size_t Y_Size = Parameters->GetFullDimensionSizes().Y; 00624 const size_t X_Size = Parameters->GetFullDimensionSizes().X; 00625 00626 float * absorb_tau = Get_absorb_tau().GetRawData(); 00627 float * absorb_eta = Get_absorb_eta().GetRawData(); 00628 00629 00630 float * alpha_coeff; 00631 size_t alpha_shift; 00632 00633 if (Parameters->Get_alpha_coeff_scallar_flag()) { 00634 alpha_coeff = &(Parameters->Get_alpha_coeff_scallar()); 00635 alpha_shift = 0; 00636 }else { 00637 alpha_coeff = Get_Temp_1_RS3D().GetRawData(); 00638 alpha_shift = 1; 00639 } 00640 00641 00642 float * c0; 00643 size_t c0_shift; 00644 if (Parameters->Get_c0_scalar_flag()) { 00645 c0 = &(Parameters->Get_c0_scalar()); 00646 c0_shift = 0; 00647 }else { 00648 c0 = Get_c2().GetRawData(); 00649 c0_shift = 1; 00650 } 00651 00652 00653 const float alpha_power = Parameters->Get_alpha_power(); 00654 const float tan_pi_y_2 = tanf(float(M_PI_2)* alpha_power); 00655 00656 //alpha = 100*alpha.*(1e-6/(2*pi)).^y./ 00657 // (20*log10(exp(1))); 00658 const float alpha_db_neper_coeff = (100.0f * powf(1.0e-6f / (2.0f * (float) M_PI), alpha_power)) / 00659 (20.0f * (float) M_LOG10E); 00660 00661 #ifndef __NO_OMP__ 00662 #pragma omp for schedule (static) 00663 #endif 00664 for (size_t z = 0; z < Z_Size; z++){ 00665 for (size_t y = 0; y < Y_Size; y++){ 00666 size_t i = (z*Y_Size + y) * X_Size; 00667 for (size_t x = 0; x < X_Size; x++){ 00668 00669 const float alpha_coeff_2 = 2.0f * alpha_coeff[i * alpha_shift] * alpha_db_neper_coeff; 00670 absorb_tau[i] = (-alpha_coeff_2) * powf(c0[i * c0_shift],alpha_power-1); 00671 absorb_eta[i] = alpha_coeff_2 * powf(c0[i * c0_shift],alpha_power) * tan_pi_y_2; 00672 i++; 00673 00674 }//x 00675 00676 }//y 00677 }// z 00678 }// parallel 00679 } // absorb_tau and aborb_eta = matrics 00680 00681 }// end of Generate_absorb_tau_absorb_eta_matrix 00682 //------------------------------------------------------------------------------ 00683 00688 void TKSpaceFirstOrder3DSolver::Caclucalte_dt_rho0_non_uniform(){ 00689 00690 00691 #ifndef __NO_OMP__ 00692 #pragma omp parallel 00693 #endif 00694 { 00695 00696 float * dt_rho0_sgx = Get_dt_rho0_sgx().GetRawData(); 00697 float * dt_rho0_sgy = Get_dt_rho0_sgy().GetRawData(); 00698 float * dt_rho0_sgz = Get_dt_rho0_sgz().GetRawData(); 00699 00700 const float dt = Parameters->Get_dt(); 00701 00702 const float * duxdxn_sgx = Get_dxudxn_sgx().GetRawData(); 00703 const float * duydyn_sgy = Get_dyudyn_sgy().GetRawData(); 00704 const float * duzdzn_sgz = Get_dzudzn_sgz().GetRawData(); 00705 00706 const size_t Z_Size = Get_dt_rho0_sgx().GetDimensionSizes().Z; 00707 const size_t Y_Size = Get_dt_rho0_sgx().GetDimensionSizes().Y; 00708 const size_t X_Size = Get_dt_rho0_sgx().GetDimensionSizes().X; 00709 00710 const size_t SliceSize = (X_Size * Y_Size ); 00711 00712 00713 00714 #ifndef __NO_OMP__ 00715 #pragma omp for schedule (static) 00716 #endif 00717 for (size_t z = 0; z < Z_Size; z++){ 00718 00719 00720 register size_t i = z* SliceSize; 00721 for (size_t y = 0; y< Y_Size; y++){ 00722 for (size_t x = 0; x < X_Size; x++){ 00723 dt_rho0_sgx[i] = (dt * duxdxn_sgx[x]) / dt_rho0_sgx[i]; 00724 i++; 00725 } // x 00726 } // y 00727 } // z 00728 00729 00730 #ifndef __NO_OMP__ 00731 #pragma omp for schedule (static) 00732 #endif 00733 for (size_t z = 0; z < Z_Size; z++){ 00734 00735 register size_t i = z* SliceSize; 00736 for (size_t y = 0; y< Y_Size; y++){ 00737 const float duydyn_el = duydyn_sgy[y]; 00738 for (size_t x = 0; x < X_Size; x++){ 00739 00740 dt_rho0_sgy[i] = (dt * duydyn_el) / dt_rho0_sgy[i]; 00741 i++; 00742 } // x 00743 } // y 00744 } // z 00745 00746 00747 00748 #ifndef __NO_OMP__ 00749 #pragma omp for schedule (static) 00750 #endif 00751 for (size_t z = 0; z < Z_Size; z++){ 00752 00753 register size_t i = z* SliceSize; 00754 const float duzdzn_el = duzdzn_sgz[z]; 00755 for (size_t y = 0; y< Y_Size; y++){ 00756 for (size_t x = 0; x < X_Size; x++){ 00757 dt_rho0_sgz[i] = (dt * duzdzn_el) / dt_rho0_sgz[i]; 00758 i++; 00759 } // x 00760 } // y 00761 } // z 00762 00763 } // parallel 00764 00765 00766 00767 }// end of Caclucalte_dt_rho0_non_uniform 00768 //------------------------------------------------------------------------------ 00769 00774 void TKSpaceFirstOrder3DSolver::Calculate_p0_source(){ 00775 00776 00777 Get_p().CopyData(Get_p0_source_input()); 00778 00779 const float * p0 = Get_p0_source_input().GetRawData(); 00780 00781 float * c2; 00782 size_t c2_shift; 00783 00784 if (Parameters->Get_c0_scalar_flag()) { 00785 c2 = &Parameters->Get_c0_scalar(); 00786 c2_shift = 0; 00787 00788 }else{ 00789 c2 = Get_c2().GetRawData(); 00790 c2_shift = 1; 00791 } 00792 00793 00794 00795 float * rhox = Get_rhox().GetRawData(); 00796 float * rhoy = Get_rhoy().GetRawData(); 00797 float * rhoz = Get_rhoz().GetRawData(); 00798 00799 //-- add the initial pressure to rho as a mass source --// 00800 00801 float tmp; 00802 00803 #ifndef __NO_OMP__ 00804 #pragma omp parallel for schedule (static) private(tmp) 00805 #endif 00806 for (size_t i = 0; i < Get_rhox().GetTotalElementCount(); i++){ 00807 00808 tmp = p0[i] / (3.0f* c2[i * c2_shift]); 00809 rhox[i] = tmp; 00810 rhoy[i] = tmp; 00811 rhoz[i] = tmp; 00812 } 00813 00814 00815 00816 //------------------------------------------------------------------------// 00817 //-- compute u(t = t1 + dt/2) based on the assumption u(dt/2) = -u(-dt/2) --// 00818 //-- which forces u(t = t1) = 0 --// 00819 //------------------------------------------------------------------------// 00820 00821 00822 Compute_ddx_kappa_fft_p(Get_p(), 00823 Get_FFT_X_temp(),Get_FFT_Y_temp(),Get_FFT_Z_temp(), 00824 Get_kappa(), 00825 Get_ddx_k_shift_pos(),Get_ddy_k_shift_pos(),Get_ddz_k_shift_pos() 00826 ); 00827 00828 00829 if (Parameters->Get_rho0_scalar_flag()){ 00830 00831 if (Parameters->Get_nonuniform_grid_flag() ) { 00832 Get_ux_sgx().Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_x( 00833 Parameters->Get_rho0_sgx_scalar(), Get_dxudxn_sgx(), Get_FFT_X_temp()); 00834 Get_uy_sgy(). Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_y 00835 (Parameters->Get_rho0_sgy_scalar(), Get_dyudyn_sgy(), Get_FFT_Y_temp()); 00836 Get_uz_sgz().Compute_dt_rho_sg_mul_ifft_div_2_scalar_nonuniform_z( 00837 Parameters->Get_rho0_sgz_scalar(), Get_dzudzn_sgz(),Get_FFT_Z_temp()); 00838 00839 } else { 00840 Get_ux_sgx().Compute_dt_rho_sg_mul_ifft_div_2(Parameters->Get_rho0_sgx_scalar(), Get_FFT_X_temp()); 00841 Get_uy_sgy().Compute_dt_rho_sg_mul_ifft_div_2(Parameters->Get_rho0_sgy_scalar(), Get_FFT_Y_temp()); 00842 Get_uz_sgz().Compute_dt_rho_sg_mul_ifft_div_2(Parameters->Get_rho0_sgz_scalar(), Get_FFT_Z_temp()); 00843 } 00844 } else { 00845 // divide the matrix by 2 and multiply with st./rho0_sg 00846 Get_ux_sgx().Compute_dt_rho_sg_mul_ifft_div_2(Get_dt_rho0_sgx(), Get_FFT_X_temp()); 00847 Get_uy_sgy().Compute_dt_rho_sg_mul_ifft_div_2(Get_dt_rho0_sgy(), Get_FFT_Y_temp()); 00848 Get_uz_sgz().Compute_dt_rho_sg_mul_ifft_div_2(Get_dt_rho0_sgz(), Get_FFT_Z_temp()); 00849 } 00850 00851 }// end of Calculate_p0_source 00852 //------------------------------------------------------------------------------ 00853 00854 00855 00860 void TKSpaceFirstOrder3DSolver::Compute_c2(){ 00861 00862 if (Parameters->Get_c0_scalar_flag()) { //scalar 00863 float c = Parameters->Get_c0_scalar(); 00864 Parameters->Get_c0_scalar() = c * c; 00865 00866 }else { 00867 00868 float * c2 = Get_c2().GetRawData(); 00869 00870 #ifndef __NO_OMP__ 00871 #pragma omp parallel for schedule (static) 00872 #endif 00873 for (size_t i=0; i< Get_c2().GetTotalElementCount(); i++){ 00874 c2[i] = c2[i] * c2[i]; 00875 } 00876 }// matrix 00877 }// ComputeC2 00878 //------------------------------------------------------------------------------ 00879 00880 00881 00898 void TKSpaceFirstOrder3DSolver::Compute_ddx_kappa_fft_p(TRealMatrix& X_Matrix, 00899 TFFTWComplexMatrix& FFT_X, TFFTWComplexMatrix& FFT_Y, TFFTWComplexMatrix& FFT_Z, 00900 TRealMatrix& kappa, 00901 TComplexMatrix& ddx, TComplexMatrix& ddy, TComplexMatrix& ddz) 00902 { 00903 // Compute FFT of X 00904 FFT_X.Compute_FFT_3D_R2C(X_Matrix); 00905 00906 #ifndef __NO_OMP__ 00907 #pragma omp parallel 00908 #endif 00909 { 00910 float * p_k_x_data = FFT_X.GetRawData(); 00911 float * p_k_y_data = FFT_Y.GetRawData(); 00912 float * p_k_z_data = FFT_Z.GetRawData(); 00913 00914 float * kappa_data = kappa.GetRawData(); 00915 float * ddx_data = ddx.GetRawData(); 00916 float * ddy_data = ddy.GetRawData(); 00917 float * ddz_data = ddz.GetRawData(); 00918 00919 const size_t Z_Size = FFT_X.GetDimensionSizes().Z; 00920 const size_t Y_Size = FFT_X.GetDimensionSizes().Y; 00921 const size_t X_Size = FFT_X.GetDimensionSizes().X; 00922 00923 const size_t SliceSize = (X_Size * Y_Size ) << 1; 00924 00925 #ifndef __NO_OMP__ 00926 #pragma omp for schedule (static) 00927 #endif 00928 for (size_t z = 0; z < Z_Size; z++){ 00929 00930 register size_t i = z * SliceSize; 00931 const size_t z2 = z<<1; 00932 for (size_t y = 0; y< Y_Size; y++){ 00933 const size_t y2 = y<<1; 00934 for (size_t x = 0; x < X_Size; x++){ 00935 00936 // kappa ./ p_k 00937 const float kappa_el = kappa_data[i>>1]; 00938 const float p_k_el_re = p_k_x_data[i] * kappa_el; 00939 const float p_k_el_im = p_k_x_data[i+1] * kappa_el; 00940 const size_t x2 = x<<1; 00941 00942 00943 //bsxfun(ddx...) 00944 p_k_x_data[i] = p_k_el_re * ddx_data[x2] - p_k_el_im * ddx_data[x2+1]; 00945 p_k_x_data[i+1] = p_k_el_re * ddx_data[x2+1] + p_k_el_im * ddx_data[x2]; 00946 00947 //bsxfun(ddy...) 00948 p_k_y_data[i] = p_k_el_re * ddy_data[y2] - p_k_el_im * ddy_data[y2+1]; 00949 p_k_y_data[i+1] = p_k_el_re * ddy_data[y2+1] + p_k_el_im * ddy_data[y2]; 00950 00951 //bsxfun(ddz...) 00952 p_k_z_data[i] = p_k_el_re * ddz_data[z2] - p_k_el_im * ddz_data[z2+1]; 00953 p_k_z_data[i+1] = p_k_el_re * ddz_data[z2+1] + p_k_el_im * ddz_data[z2]; 00954 00955 00956 i +=2; 00957 } // x 00958 } // y 00959 } // z 00960 00961 } 00962 }// end of KSpaceFirstOrder3DSolver 00963 //------------------------------------------------------------------------------ 00964 00965 00970 void TKSpaceFirstOrder3DSolver::Compute_duxyz(){ 00971 00972 Get_FFT_X_temp().Compute_FFT_3D_R2C(Get_ux_sgx()); 00973 Get_FFT_Y_temp().Compute_FFT_3D_R2C(Get_uy_sgy()); 00974 Get_FFT_Z_temp().Compute_FFT_3D_R2C(Get_uz_sgz()); 00975 00976 #ifndef __NO_OMP__ 00977 #pragma omp parallel 00978 #endif 00979 { 00980 00981 float * Temp_FFT_X_Data = Get_FFT_X_temp().GetRawData(); 00982 float * Temp_FFT_Y_Data = Get_FFT_Y_temp().GetRawData(); 00983 float * Temp_FFT_Z_Data = Get_FFT_Z_temp().GetRawData(); 00984 00985 00986 const float * kappa = Get_kappa().GetRawData(); 00987 00988 const size_t FFT_Z_dim = Get_FFT_X_temp().GetDimensionSizes().Z; 00989 const size_t FFT_Y_dim = Get_FFT_X_temp().GetDimensionSizes().Y; 00990 const size_t FFT_X_dim = Get_FFT_X_temp().GetDimensionSizes().X; 00991 00992 const size_t SliceSize = (FFT_X_dim * FFT_Y_dim) << 1; 00993 const float Divider = 1.0f / Get_ux_sgx().GetTotalElementCount(); 00994 00995 const TFloatComplex * ddx = (TFloatComplex *) Get_ddx_k_shift_neg().GetRawData(); 00996 const TFloatComplex * ddy = (TFloatComplex *) Get_ddy_k_shift_neg().GetRawData(); 00997 const TFloatComplex * ddz = (TFloatComplex *) Get_ddz_k_shift_neg().GetRawData(); 00998 00999 01000 #ifndef __NO_OMP__ 01001 #pragma omp for schedule (static) 01002 #endif 01003 for (size_t z = 0; z < FFT_Z_dim; z++){ 01004 01005 register size_t i = z* SliceSize; 01006 01007 const float ddz_neg_re = ddz[z].real; 01008 const float ddz_neg_im = ddz[z].imag; 01009 for (size_t y = 0; y< FFT_Y_dim; y++){ 01010 01011 const float ddy_neg_re = ddy[y].real; 01012 const float ddy_neg_im = ddy[y].imag; 01013 01014 for (size_t x = 0; x < FFT_X_dim; x++){ 01015 01016 float FFT_el_x_re = Temp_FFT_X_Data[i]; 01017 float FFT_el_x_im = Temp_FFT_X_Data[i+1]; 01018 01019 float FFT_el_y_re = Temp_FFT_Y_Data[i]; 01020 float FFT_el_y_im = Temp_FFT_Y_Data[i+1]; 01021 01022 float FFT_el_z_re = Temp_FFT_Z_Data[i]; 01023 float FFT_el_z_im = Temp_FFT_Z_Data[i+1]; 01024 01025 const float kappa_el = kappa[i >> 1]; 01026 01027 FFT_el_x_re *= kappa_el; 01028 FFT_el_x_im *= kappa_el; 01029 01030 FFT_el_y_re *= kappa_el; 01031 FFT_el_y_im *= kappa_el; 01032 01033 FFT_el_z_re *= kappa_el; 01034 FFT_el_z_im *= kappa_el; 01035 01036 01037 Temp_FFT_X_Data[i] = ((FFT_el_x_re * ddx[x].real) - 01038 (FFT_el_x_im * ddx[x].imag) 01039 ) * Divider; 01040 Temp_FFT_X_Data[i + 1] = ((FFT_el_x_im * ddx[x].real) + 01041 (FFT_el_x_re * ddx[x].imag) 01042 )* Divider; 01043 01044 Temp_FFT_Y_Data[i] = ((FFT_el_y_re * ddy_neg_re) - 01045 (FFT_el_y_im * ddy_neg_im) 01046 ) * Divider; 01047 Temp_FFT_Y_Data[i + 1] = ((FFT_el_y_im * ddy_neg_re) + 01048 (FFT_el_y_re * ddy_neg_im) 01049 )* Divider; 01050 01051 Temp_FFT_Z_Data[i] = ((FFT_el_z_re * ddz_neg_re) - 01052 (FFT_el_z_im * ddz_neg_im) 01053 ) * Divider; 01054 Temp_FFT_Z_Data[i + 1] = ((FFT_el_z_im * ddz_neg_re) + 01055 (FFT_el_z_re * ddz_neg_im) 01056 )* Divider; 01057 01058 i+=2; 01059 } // x 01060 } // y 01061 } // z 01062 01063 } // parallel; 01064 01065 Get_FFT_X_temp().Compute_iFFT_3D_C2R(Get_duxdx()); 01066 Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Get_duydy()); 01067 Get_FFT_Z_temp().Compute_iFFT_3D_C2R(Get_duzdz()); 01068 01069 //-------------------------------------------------------------------------// 01070 //--------------------- Non linear grid -----------------------------------// 01071 //-------------------------------------------------------------------------// 01072 if (Parameters->Get_nonuniform_grid_flag() != 0){ 01073 01074 01075 #ifndef __NO_OMP__ 01076 #pragma omp parallel 01077 #endif 01078 { 01079 01080 float * duxdx = Get_duxdx().GetRawData(); 01081 float * duydy = Get_duydy().GetRawData(); 01082 float * duzdz = Get_duzdz().GetRawData(); 01083 01084 const float * duxdxn = Get_dxudxn().GetRawData(); 01085 const float * duydyn = Get_dyudyn().GetRawData(); 01086 const float * duzdzn = Get_dzudzn().GetRawData(); 01087 01088 const size_t Z_Size = Get_duxdx().GetDimensionSizes().Z; 01089 const size_t Y_Size = Get_duxdx().GetDimensionSizes().Y; 01090 const size_t X_Size = Get_duxdx().GetDimensionSizes().X; 01091 01092 const size_t SliceSize = (X_Size * Y_Size ); 01093 01094 01095 #ifndef __NO_OMP__ 01096 #pragma omp for schedule (static) 01097 #endif 01098 for (size_t z = 0; z < Z_Size; z++){ 01099 register size_t i = z* SliceSize; 01100 for (size_t y = 0; y< Y_Size; y++){ 01101 for (size_t x = 0; x < X_Size; x++){ 01102 duxdx[i] *= duxdxn[x]; 01103 i++; 01104 } // x 01105 } // y 01106 } // z 01107 01108 01109 #ifndef __NO_OMP__ 01110 #pragma omp for schedule (static) 01111 #endif 01112 for (size_t z = 0; z < Z_Size; z++){ 01113 01114 register size_t i = z* SliceSize; 01115 for (size_t y = 0; y< Y_Size; y++){ 01116 const float dyudyn_el = duydyn[y]; 01117 for (size_t x = 0; x < X_Size; x++){ 01118 01119 duydy[i] *= dyudyn_el; 01120 i++; 01121 } // x 01122 } // y 01123 } // z 01124 01125 01126 01127 #ifndef __NO_OMP__ 01128 #pragma omp for schedule (static) 01129 #endif 01130 for (size_t z = 0; z < Z_Size; z++){ 01131 const float duzdzn_el = duzdzn[z]; 01132 register size_t i = z* SliceSize; 01133 for (size_t y = 0; y< Y_Size; y++){ 01134 for (size_t x = 0; x < X_Size; x++){ 01135 01136 duzdz[i] *= duzdzn_el; 01137 i++; 01138 } // x 01139 } // y 01140 } // z 01141 01142 } // parallel 01143 01144 }// nonlinear 01145 01146 01147 01148 }// end of Compute_duxyz 01149 //------------------------------------------------------------------------------ 01150 01151 01152 01153 01154 01155 01160 void TKSpaceFirstOrder3DSolver::Compute_rhoxyz_nonlinear(){ 01161 01162 01163 const size_t Z_Size = Get_rhox().GetDimensionSizes().Z; 01164 const size_t Y_Size = Get_rhox().GetDimensionSizes().Y; 01165 const size_t X_Size = Get_rhox().GetDimensionSizes().X; 01166 01167 01168 const float dt2 = 2.0f * Parameters->Get_dt(); 01169 const float dt_el = Parameters->Get_dt(); 01170 const size_t SliceSize = Y_Size * X_Size; 01171 01172 #ifndef __NO_OMP__ 01173 #pragma omp parallel 01174 #endif 01175 { 01176 01177 float * rhox_data = Get_rhox().GetRawData(); 01178 float * rhoy_data = Get_rhoy().GetRawData(); 01179 float * rhoz_data = Get_rhoz().GetRawData(); 01180 01181 const float * pml_x_data = Get_pml_x().GetRawData(); 01182 const float * duxdx_data = Get_duxdx().GetRawData(); 01183 const float * duydy_data = Get_duydy().GetRawData(); 01184 const float * duzdz_data = Get_duzdz().GetRawData(); 01185 01186 01187 01188 // Scalar 01189 if (Parameters->Get_rho0_scalar()) { 01190 01191 01192 const float dt_rho0 = Parameters->Get_rho0_scalar() * dt_el; 01193 01194 #ifndef __NO_OMP__ 01195 #pragma omp for schedule (static) 01196 #endif 01197 for (size_t z = 0; z < Z_Size; z++){ 01198 01199 register size_t i = z * SliceSize; 01200 01201 for (size_t y = 0; y< Y_Size; y++){ 01202 for (size_t x = 0; x < X_Size; x++){ 01203 01204 const float pml_x = pml_x_data[x]; 01205 float du = duxdx_data[i]; 01206 01207 rhox_data[i] = pml_x * ( 01208 ((pml_x * rhox_data[i]) - (dt_rho0 * du))/ 01209 (1.0f + (dt2 * du)) 01210 ); 01211 i++; 01212 01213 } // x 01214 }// y 01215 }// z 01216 01217 #ifndef __NO_OMP__ 01218 #pragma omp for schedule (static) 01219 #endif 01220 for (size_t z = 0; z < Z_Size; z++){ 01221 01222 register size_t i = z * SliceSize; 01223 for (size_t y = 0; y< Y_Size; y++){ 01224 01225 const float pml_y = Get_pml_y()[y]; 01226 for (size_t x = 0; x < X_Size; x++){ 01227 01228 float du = duydy_data[i]; 01229 01230 01231 rhoy_data[i] = pml_y * ( 01232 ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/ 01233 (1.0f + (dt2 * du)) 01234 ); 01235 i++; 01236 01237 } // x 01238 }// y 01239 }// z 01240 01241 01242 #ifndef __NO_OMP__ 01243 #pragma omp for schedule (static) 01244 #endif 01245 for (size_t z = 0; z < Z_Size; z++){ 01246 01247 register size_t i = z * SliceSize; 01248 const float pml_z = Get_pml_z()[z]; 01249 for (size_t y = 0; y< Y_Size; y++){ 01250 01251 for (size_t x = 0; x < X_Size; x++){ 01252 01253 float du = duzdz_data[i]; 01254 01255 01256 01257 rhoz_data[i] = pml_z * ( 01258 ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/ 01259 (1.0f + (dt2 * du)) 01260 ); 01261 i++; 01262 01263 } // x 01264 }// y 01265 }// z 01266 01267 01268 01269 01270 01271 }else { 01272 //----------------------------------------------------------------// 01273 // rho0 is a matrix 01274 01275 01276 const float * rho0_data = Get_rho0().GetRawData(); 01277 01278 #ifndef __NO_OMP__ 01279 #pragma omp for schedule (static) 01280 #endif 01281 for (size_t z = 0; z < Z_Size; z++){ 01282 01283 register size_t i = z * SliceSize; 01284 01285 for (size_t y = 0; y< Y_Size; y++){ 01286 for (size_t x = 0; x < X_Size; x++){ 01287 01288 const float pml_x = pml_x_data[x]; 01289 const float dt_rho0 = dt_el * rho0_data[i]; 01290 float du = duxdx_data[i]; 01291 01292 rhox_data[i] = pml_x * ( 01293 ((pml_x * rhox_data[i]) - (dt_rho0 * du))/ 01294 (1.0f + (dt2 * du)) 01295 ); 01296 01297 i++; 01298 01299 } // x 01300 }// y 01301 }// z 01302 01303 01304 #ifndef __NO_OMP__ 01305 #pragma omp for schedule (static) 01306 #endif 01307 for (size_t z = 0; z < Z_Size; z++){ 01308 01309 register size_t i = z * SliceSize; 01310 for (size_t y = 0; y< Y_Size; y++){ 01311 01312 const float pml_y = Get_pml_y()[y]; 01313 for (size_t x = 0; x < X_Size; x++){ 01314 01315 const float dt_rho0 = dt_el * rho0_data[i]; 01316 float du = duydy_data[i]; 01317 01318 01319 rhoy_data[i] = pml_y * ( 01320 ((pml_y * rhoy_data[i]) - (dt_rho0 * du))/ 01321 (1.0f + (dt2 * du)) 01322 ); 01323 01324 i++; 01325 01326 } // x 01327 }// y 01328 }// z 01329 01330 01331 #ifndef __NO_OMP__ 01332 #pragma omp for schedule (static) 01333 #endif 01334 for (size_t z = 0; z < Z_Size; z++){ 01335 01336 register size_t i = z * SliceSize; 01337 const float pml_z = Get_pml_z()[z]; 01338 for (size_t y = 0; y< Y_Size; y++){ 01339 01340 for (size_t x = 0; x < X_Size; x++){ 01341 01342 01343 const float dt_rho0 = dt_el * rho0_data[i]; 01344 float du = duzdz_data[i]; 01345 01346 01347 01348 rhoz_data[i] = pml_z * ( 01349 ((pml_z * rhoz_data[i]) - (dt_rho0 * du))/ 01350 (1.0f + (dt2 * du)) 01351 ); 01352 i++; 01353 01354 } // x 01355 }// y 01356 }// z 01357 01358 } // end matrix 01359 }// parallel 01360 }// end of Compute_rhoxyz 01361 //------------------------------------------------------------------------------ 01362 01363 01364 01365 01370 void TKSpaceFirstOrder3DSolver::Compute_rhoxyz_linear(){ 01371 01372 01373 const size_t Z_Size = Get_rhox().GetDimensionSizes().Z; 01374 const size_t Y_Size = Get_rhox().GetDimensionSizes().Y; 01375 const size_t X_Size = Get_rhox().GetDimensionSizes().X; 01376 01377 01378 01379 const float dt_el = Parameters->Get_dt(); 01380 const size_t SliceSize = Y_Size * X_Size; 01381 01382 #ifndef __NO_OMP__ 01383 #pragma omp parallel 01384 #endif 01385 { 01386 01387 float * rhox_data = Get_rhox().GetRawData(); 01388 float * rhoy_data = Get_rhoy().GetRawData(); 01389 float * rhoz_data = Get_rhoz().GetRawData(); 01390 01391 const float * pml_x_data = Get_pml_x().GetRawData(); 01392 const float * duxdx_data = Get_duxdx().GetRawData(); 01393 const float * duydy_data = Get_duydy().GetRawData(); 01394 const float * duzdz_data = Get_duzdz().GetRawData(); 01395 01396 01397 01398 // Scalar 01399 if (Parameters->Get_rho0_scalar()) { 01400 01401 01402 const float dt_rho0 = Parameters->Get_rho0_scalar() * dt_el; 01403 01404 #ifndef __NO_OMP__ 01405 #pragma omp for schedule (static) 01406 #endif 01407 for (size_t z = 0; z < Z_Size; z++){ 01408 01409 register size_t i = z * SliceSize; 01410 01411 for (size_t y = 0; y< Y_Size; y++){ 01412 for (size_t x = 0; x < X_Size; x++){ 01413 01414 const float pml_x = pml_x_data[x]; 01415 rhox_data[i] = pml_x * ( 01416 ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i])) 01417 ); 01418 i++; 01419 01420 } // x 01421 }// y 01422 }// z 01423 01424 #ifndef __NO_OMP__ 01425 #pragma omp for schedule (static) 01426 #endif 01427 for (size_t z = 0; z < Z_Size; z++){ 01428 01429 register size_t i = z * SliceSize; 01430 for (size_t y = 0; y< Y_Size; y++){ 01431 01432 const float pml_y = Get_pml_y()[y]; 01433 for (size_t x = 0; x < X_Size; x++){ 01434 01435 rhoy_data[i] = pml_y * ( 01436 ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i])) 01437 ); 01438 i++; 01439 01440 } // x 01441 }// y 01442 }// z 01443 01444 01445 #ifndef __NO_OMP__ 01446 #pragma omp for schedule (static) 01447 #endif 01448 for (size_t z = 0; z < Z_Size; z++){ 01449 01450 register size_t i = z * SliceSize; 01451 const float pml_z = Get_pml_z()[z]; 01452 for (size_t y = 0; y< Y_Size; y++){ 01453 01454 for (size_t x = 0; x < X_Size; x++){ 01455 01456 rhoz_data[i] = pml_z * ( 01457 ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i])) 01458 ); 01459 i++; 01460 01461 } // x 01462 }// y 01463 }// z 01464 01465 01466 01467 01468 01469 }else { 01470 //----------------------------------------------------------------// 01471 // rho0 is a matrix 01472 01473 01474 const float * rho0_data = Get_rho0().GetRawData(); 01475 01476 #ifndef __NO_OMP__ 01477 #pragma omp for schedule (static) 01478 #endif 01479 for (size_t z = 0; z < Z_Size; z++){ 01480 01481 register size_t i = z * SliceSize; 01482 01483 for (size_t y = 0; y< Y_Size; y++){ 01484 for (size_t x = 0; x < X_Size; x++){ 01485 01486 const float pml_x = pml_x_data[x]; 01487 const float dt_rho0 = dt_el * rho0_data[i]; 01488 01489 rhox_data[i] = pml_x * ( 01490 ((pml_x * rhox_data[i]) - (dt_rho0 * duxdx_data[i])) 01491 ); 01492 01493 i++; 01494 01495 } // x 01496 }// y 01497 }// z 01498 01499 01500 #ifndef __NO_OMP__ 01501 #pragma omp for schedule (static) 01502 #endif 01503 for (size_t z = 0; z < Z_Size; z++){ 01504 01505 register size_t i = z * SliceSize; 01506 for (size_t y = 0; y< Y_Size; y++){ 01507 01508 const float pml_y = Get_pml_y()[y]; 01509 for (size_t x = 0; x < X_Size; x++){ 01510 01511 const float dt_rho0 = dt_el * rho0_data[i]; 01512 01513 rhoy_data[i] = pml_y * ( 01514 ((pml_y * rhoy_data[i]) - (dt_rho0 * duydy_data[i])) 01515 ); 01516 01517 i++; 01518 01519 } // x 01520 }// y 01521 }// z 01522 01523 01524 #ifndef __NO_OMP__ 01525 #pragma omp for schedule (static) 01526 #endif 01527 for (size_t z = 0; z < Z_Size; z++){ 01528 01529 register size_t i = z * SliceSize; 01530 const float pml_z = Get_pml_z()[z]; 01531 for (size_t y = 0; y< Y_Size; y++){ 01532 01533 for (size_t x = 0; x < X_Size; x++){ 01534 01535 const float dt_rho0 = dt_el * rho0_data[i]; 01536 01537 rhoz_data[i] = pml_z * ( 01538 ((pml_z * rhoz_data[i]) - (dt_rho0 * duzdz_data[i])) 01539 ); 01540 i++; 01541 01542 } // x 01543 }// y 01544 }// z 01545 01546 } // end matrix 01547 }// parallel 01548 }// end of Compute_rhoxyz 01549 //------------------------------------------------------------------------------ 01550 01551 01559 void TKSpaceFirstOrder3DSolver::Calculate_SumRho_BonA_SumDu_SSE2(TRealMatrix & RHO_Temp, TRealMatrix & BonA_Temp, TRealMatrix & Sum_du){ 01560 01561 const size_t TotalElementCount_4 = (RHO_Temp.GetTotalElementCount() >> 2) << 2; 01562 01563 const float * rhox_data = Get_rhox().GetRawData(); 01564 const float * rhoy_data = Get_rhoy().GetRawData(); 01565 const float * rhoz_data = Get_rhoz().GetRawData(); 01566 01567 const float * dux_data = Get_duxdx().GetRawData(); 01568 const float * duy_data = Get_duydy().GetRawData(); 01569 const float * duz_data = Get_duzdz().GetRawData(); 01570 01571 01572 float * BonA; 01573 size_t BonA_shift; 01574 const bool BonA_flag = Parameters->Get_BonA_scalar_flag(); 01575 01576 if (BonA_flag) { 01577 BonA = &Parameters->Get_BonA_scalar(); 01578 BonA_shift = 0; 01579 }else { 01580 BonA = Get_BonA().GetRawData(); 01581 BonA_shift = 1; 01582 } 01583 01584 01585 float * rho0_data; 01586 size_t rho0_shift; 01587 const bool rho0_flag = Parameters->Get_rho0_scalar_flag(); 01588 01589 if (rho0_flag) { 01590 rho0_data = &Parameters->Get_rho0_scalar(); 01591 rho0_shift = 0; 01592 }else { 01593 rho0_data = Get_rho0().GetRawData(); 01594 rho0_shift = 1; 01595 } 01596 01597 #ifndef __NO_OMP__ 01598 #pragma omp parallel 01599 #endif 01600 { 01601 01602 float * RHO_Temp_Data = RHO_Temp.GetRawData(); 01603 float * BonA_Temp_Data = BonA_Temp.GetRawData(); 01604 float * SumDU_Temp_Data= Sum_du.GetRawData(); 01605 01606 01607 const __m128 Two_SSE = _mm_set1_ps(2.0f); 01608 __m128 BonA_SSE = _mm_set1_ps(Parameters->Get_BonA_scalar()); 01609 __m128 rho0_SSE = _mm_set1_ps(Parameters->Get_rho0_scalar()); 01610 01611 #ifndef __NO_OMP__ 01612 #pragma omp for schedule (static) nowait 01613 #endif 01614 for (size_t i = 0; i < TotalElementCount_4; i+=4){ 01615 01616 01617 if (!BonA_flag) BonA_SSE = _mm_load_ps(&BonA[i]); 01618 01619 __m128 xmm1 = _mm_load_ps(&rhox_data[i]); 01620 __m128 xmm2 = _mm_load_ps(&rhoy_data[i]); 01621 __m128 xmm3 = _mm_load_ps(&rhoz_data[i]); 01622 01623 01624 01625 if (!rho0_flag) rho0_SSE = _mm_load_ps(&rho0_data[i]); 01626 __m128 rho_xyz_sq_SSE; 01627 __m128 rho_xyz_el_SSE; 01628 01629 // register const float rho_xyz_el = rhox_data[i] + rhoy_data[i] + rhoz_data[i]; 01630 rho_xyz_el_SSE = _mm_add_ps(xmm1,xmm2); 01631 rho_xyz_el_SSE = _mm_add_ps(xmm3, rho_xyz_el_SSE); 01632 01633 // RHO_Temp_Data[i] = rho_xyz_el; 01634 _mm_stream_ps(&RHO_Temp_Data[i],rho_xyz_el_SSE); 01635 01636 // BonA_Temp_Data[i] = ((BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i])) + rho_xyz_el; 01637 rho_xyz_sq_SSE = _mm_mul_ps(rho_xyz_el_SSE,rho_xyz_el_SSE);// (rho_xyz_el * rho_xyz_el) 01638 01639 xmm1 = _mm_mul_ps(rho_xyz_sq_SSE,BonA_SSE); //((BonA * (rho_xyz_el * rho_xyz_el)) 01640 xmm2 = _mm_mul_ps(Two_SSE,rho0_SSE); // (2.0f * rho0_data[i]) 01641 xmm3 = _mm_div_ps(xmm1,xmm2); // (BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i]) 01642 01643 xmm1 = _mm_add_ps(xmm3,rho_xyz_el_SSE); // + rho_xyz_el 01644 01645 _mm_stream_ps(&BonA_Temp_Data[i],xmm1); 01646 01647 xmm1 = _mm_load_ps(&dux_data[i]); //dudx 01648 xmm2 = _mm_load_ps(&duy_data[i]); //dudu 01649 xmm3 = _mm_load_ps(&duz_data[i]); //dudz 01650 01651 __m128 xmm_acc = _mm_add_ps(xmm1,xmm2); 01652 xmm_acc = _mm_add_ps(xmm_acc,xmm3); 01653 xmm_acc = _mm_mul_ps(xmm_acc,rho0_SSE); 01654 01655 _mm_stream_ps(&SumDU_Temp_Data[i],xmm_acc); 01656 01657 01658 // BonA_Temp_Data[i] = ((BonA * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i])) + rho_xyz_el; 01659 01660 } 01661 01662 01663 //-- non SSE code --// 01664 if (omp_get_thread_num() == omp_get_num_threads() -1 ){ 01665 for (size_t i = TotalElementCount_4; i < RHO_Temp.GetTotalElementCount() ; i++){ 01666 register const float rho_xyz_el = rhox_data[i] + rhoy_data[i] + rhoz_data[i]; 01667 RHO_Temp_Data[i] = rho_xyz_el; 01668 BonA_Temp_Data[i] = ((BonA[i * BonA_shift] * (rho_xyz_el * rho_xyz_el)) / (2.0f * rho0_data[i* rho0_shift])) + rho_xyz_el; 01669 SumDU_Temp_Data[i] = rho0_data[i * rho0_shift] * (dux_data[i] + duy_data[i] + duz_data[i]); 01670 } 01671 } 01672 01673 }// parallel 01674 01675 } // end of Calculate_SumRho_BonA_SumDu 01676 //----------------------------------------------------------------------------- 01677 01678 01679 01685 void TKSpaceFirstOrder3DSolver::Calculate_SumRho_SumRhoDu(TRealMatrix & Sum_rhoxyz, TRealMatrix & Sum_rho0_du){ 01686 01687 01688 const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount(); 01689 01690 #ifndef __NO_OMP__ 01691 #pragma omp parallel 01692 #endif 01693 { 01694 const float * rhox_data = Get_rhox().GetRawData(); 01695 const float * rhoy_data = Get_rhoy().GetRawData(); 01696 const float * rhoz_data = Get_rhoz().GetRawData(); 01697 01698 const float * dux_data = Get_duxdx().GetRawData(); 01699 const float * duy_data = Get_duydy().GetRawData(); 01700 const float * duz_data = Get_duzdz().GetRawData(); 01701 01702 const float * rho0_data = NULL; 01703 01704 const float rho0_data_el = Parameters->Get_rho0_scalar(); 01705 if (!Parameters->Get_rho0_scalar_flag()) { 01706 rho0_data = Get_rho0().GetRawData(); 01707 } 01708 01709 01710 float * Sum_rhoxyz_Data = Sum_rhoxyz.GetRawData(); 01711 float * Sum_rho0_du_Data = Sum_rho0_du.GetRawData(); 01712 01713 01714 01715 #ifndef __NO_OMP__ 01716 #pragma omp for schedule (static) 01717 #endif 01718 for (size_t i = 0; i < TotalElementCount; i++){ 01719 Sum_rhoxyz_Data[i] = rhox_data[i] + rhoy_data[i] + rhoz_data[i]; 01720 } 01721 01722 01723 if (Parameters->Get_rho0_scalar_flag()){ // scalar 01724 #ifndef __NO_OMP__ 01725 #pragma omp for schedule (static) nowait 01726 #endif 01727 for (size_t i = 0; i < TotalElementCount; i++){ 01728 Sum_rho0_du_Data[i] = rho0_data_el * (dux_data[i] + duy_data[i] + duz_data[i]); 01729 01730 } 01731 }else{ // matrix 01732 01733 #ifndef __NO_OMP__ 01734 #pragma omp for schedule (static) nowait 01735 #endif 01736 for (size_t i = 0; i < TotalElementCount; i++){ 01737 Sum_rho0_du_Data[i] = rho0_data[i] * (dux_data[i] + duy_data[i] + duz_data[i]); 01738 } 01739 01740 } 01741 01742 } // parallel 01743 01744 }// end of Calculate_SumRho_SumRhoDu 01745 //----------------------------------------------------------------------------- 01746 01747 01748 01757 void TKSpaceFirstOrder3DSolver::Compute_Absorb_nabla1_2_SSE2(TFFTWComplexMatrix& FFT_1, TFFTWComplexMatrix& FFT_2){ 01758 01759 const float * nabla1 = Get_absorb_nabla1().GetRawData(); 01760 const float * nabla2 = Get_absorb_nabla2().GetRawData(); 01761 01762 const size_t TotalElementCount = FFT_1.GetTotalElementCount(); 01763 const size_t TotalElementCount_SSE = (FFT_1.GetTotalElementCount() >> 1) << 1; 01764 01765 #ifndef __NO_OMP__ 01766 #pragma omp parallel 01767 #endif 01768 { 01769 01770 float * FFT_1_data = FFT_1.GetRawData(); 01771 float * FFT_2_data = FFT_2.GetRawData(); 01772 01773 #ifndef __NO_OMP__ 01774 #pragma omp for schedule (static) nowait 01775 #endif 01776 for (size_t i = 0; i < TotalElementCount_SSE; i+=2){ 01777 01778 __m128 xmm_nabla1 = _mm_set_ps( nabla1[i+1], nabla1[i+1] , nabla1[i] , nabla1[i] ); 01779 __m128 xmm_FFT_1 = _mm_load_ps(&FFT_1_data[2*i]); 01780 01781 xmm_FFT_1 = _mm_mul_ps(xmm_nabla1, xmm_FFT_1); 01782 _mm_store_ps(&FFT_1_data[2*i], xmm_FFT_1); 01783 01784 01785 } 01786 01787 #ifndef __NO_OMP__ 01788 #pragma omp for schedule (static) 01789 #endif 01790 for (size_t i = 0; i < TotalElementCount; i+=2){ 01791 __m128 xmm_nabla2 = _mm_set_ps( nabla2[i+1], nabla2[i+1] , nabla2[i] , nabla2[i] ); 01792 __m128 xmm_FFT_2 = _mm_load_ps(&FFT_2_data[2*i]); 01793 01794 xmm_FFT_2 = _mm_mul_ps(xmm_nabla2, xmm_FFT_2); 01795 _mm_store_ps(&FFT_2_data[2*i], xmm_FFT_2); 01796 } 01797 01798 //-- non SSE code --// 01799 if (omp_get_thread_num() == omp_get_num_threads() -1 ){ 01800 for (size_t i = TotalElementCount_SSE; i < TotalElementCount ; i++){ 01801 FFT_1_data[(i<<1)] *= nabla1[i]; 01802 FFT_1_data[(i<<1)+1] *= nabla1[i]; 01803 01804 FFT_2_data[(i<<1)] *= nabla2[i]; 01805 FFT_2_data[(i<<1)+1] *= nabla2[i]; 01806 } 01807 } 01808 01809 01810 }// parallel 01811 01812 } // end of Compute_Absorb_nabla1_2_Normalize 01813 //----------------------------------------------------------------------------- 01814 01815 01816 01817 01824 void TKSpaceFirstOrder3DSolver::Sum_Subterms_nonlinear(TRealMatrix& Absorb_tau_temp, TRealMatrix& Absorb_eta_temp, TRealMatrix& BonA_temp){ 01825 01826 float * tau_data; 01827 float * eta_data; 01828 float * c2_data; 01829 01830 size_t c2_shift; 01831 size_t tau_eta_shift; 01832 01833 01834 const float * Absorb_tau_data = Absorb_tau_temp.GetRawData(); 01835 const float * Absorb_eta_data = Absorb_eta_temp.GetRawData(); 01836 01837 const size_t TotalElementCount = Get_p().GetTotalElementCount(); 01838 const float Divider = 1.0f / (float) TotalElementCount; 01839 01840 // c2 scalar 01841 if (Parameters->Get_c0_scalar_flag()){ 01842 c2_data = &Parameters->Get_c0_scalar(); 01843 c2_shift = 0; 01844 }else{ 01845 c2_data = Get_c2().GetRawData(); 01846 c2_shift = 1; 01847 01848 } 01849 01850 // eta and tau scalars 01851 if (Parameters->Get_c0_scalar_flag() && Parameters->Get_alpha_coeff_scallar_flag()){ 01852 tau_data = &Parameters->Get_absorb_tau_scalar(); 01853 eta_data = &Parameters->Get_absorb_eta_scalar(); 01854 tau_eta_shift = 0; 01855 01856 }else{ 01857 tau_data = Get_absorb_tau().GetRawData(); 01858 eta_data = Get_absorb_eta().GetRawData(); 01859 tau_eta_shift = 1; 01860 01861 } 01862 01863 01864 01865 #ifndef __NO_OMP__ 01866 #pragma omp parallel 01867 #endif 01868 { 01869 01870 const float * BonA_data = BonA_temp.GetRawData(); 01871 float * p_data = Get_p().GetRawData(); 01872 01873 01874 #ifndef __NO_OMP__ 01875 #pragma omp for schedule (static) 01876 #endif 01877 for (size_t i = 0; i < TotalElementCount; i++){ 01878 p_data[i] = (c2_data[i * c2_shift]) *( 01879 BonA_data[i] + 01880 (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) - 01881 (Absorb_eta_data[i] * eta_data[i * tau_eta_shift]) 01882 )) 01883 ); 01884 01885 } 01886 01887 01888 }// parallel 01889 01890 }// end of Sum_Subterms_nonlinear 01891 //----------------------------------------------------------------------------- 01892 01893 01900 void TKSpaceFirstOrder3DSolver::Sum_Subterms_linear(TRealMatrix& Absorb_tau_temp, TRealMatrix& Absorb_eta_temp, TRealMatrix& Sum_rhoxyz){ 01901 01902 const float * tau_data = NULL; 01903 const float * eta_data = NULL; 01904 const float * c2_data = NULL; 01905 01906 size_t c2_shift = 0; 01907 size_t tau_eta_shift = 0; 01908 01909 01910 const float * Absorb_tau_data = Absorb_tau_temp.GetRawData(); 01911 const float * Absorb_eta_data = Absorb_eta_temp.GetRawData(); 01912 01913 const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount(); 01914 const float Divider = 1.0f / (float) TotalElementCount; 01915 01916 // c2 scalar 01917 if (Parameters->Get_c0_scalar_flag()){ 01918 c2_data = &Parameters->Get_c0_scalar(); 01919 c2_shift = 0; 01920 }else{ 01921 c2_data = Get_c2().GetRawData(); 01922 c2_shift = 1; 01923 01924 } 01925 01926 // eta and tau scalars 01927 if (Parameters->Get_c0_scalar_flag() && Parameters->Get_alpha_coeff_scallar_flag()){ 01928 tau_data = &Parameters->Get_absorb_tau_scalar(); 01929 eta_data = &Parameters->Get_absorb_eta_scalar(); 01930 tau_eta_shift = 0; 01931 01932 }else{ 01933 tau_data = Get_absorb_tau().GetRawData(); 01934 eta_data = Get_absorb_eta().GetRawData(); 01935 tau_eta_shift = 1; 01936 01937 } 01938 01939 01940 01941 #ifndef __NO_OMP__ 01942 #pragma omp parallel 01943 #endif 01944 { 01945 01946 const float * Sum_rhoxyz_Data = Sum_rhoxyz.GetRawData(); 01947 float * p_data = Get_p().GetRawData(); 01948 01949 01950 #ifndef __NO_OMP__ 01951 #pragma omp for schedule (static) 01952 #endif 01953 for (size_t i = 0; i < TotalElementCount; i++){ 01954 p_data[i] = (c2_data[i * c2_shift]) *( 01955 Sum_rhoxyz_Data[i] + 01956 (Divider * ((Absorb_tau_data[i] * tau_data[i * tau_eta_shift]) - 01957 (Absorb_eta_data[i] * eta_data[i * tau_eta_shift]) 01958 )) 01959 ); 01960 01961 } 01962 01963 01964 }// parallel 01965 01966 01967 01968 }// end of Sum_Subterms_linear 01969 //----------------------------------------------------------------------------- 01970 01971 01972 01977 void TKSpaceFirstOrder3DSolver::Sum_new_p_nonlinear_lossless(){ 01978 01979 01980 #ifndef __NO_OMP__ 01981 #pragma omp parallel 01982 #endif 01983 { 01984 01985 const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount(); 01986 float * p_data = Get_p().GetRawData(); 01987 01988 const float * rhox_data = Get_rhox().GetRawData(); 01989 const float * rhoy_data = Get_rhoy().GetRawData(); 01990 const float * rhoz_data = Get_rhoz().GetRawData(); 01991 01992 float * c2_data; 01993 size_t c2_shift; 01994 01995 if (Parameters->Get_c0_scalar_flag()){ 01996 c2_data = &Parameters->Get_c0_scalar(); 01997 c2_shift = 0; 01998 }else{ 01999 c2_data = Get_c2().GetRawData(); 02000 c2_shift = 1; 02001 02002 } 02003 02004 02005 float * BonA_data; 02006 size_t BonA_shift; 02007 02008 if (Parameters->Get_BonA_scalar_flag()){ 02009 BonA_data = &Parameters->Get_BonA_scalar(); 02010 BonA_shift = 0; 02011 }else{ 02012 BonA_data = Get_BonA().GetRawData(); 02013 BonA_shift = 1; 02014 } 02015 02016 float * rho0_data; 02017 size_t rho0_shift; 02018 02019 if (Parameters->Get_rho0_scalar_flag()){ 02020 rho0_data = &Parameters->Get_rho0_scalar(); 02021 rho0_shift = 0; 02022 }else{ 02023 rho0_data = Get_rho0().GetRawData(); 02024 rho0_shift = 1; 02025 } 02026 02027 02028 #ifndef __NO_OMP__ 02029 #pragma omp for schedule (static) 02030 #endif 02031 for (size_t i = 0; i < TotalElementCount; i++){ 02032 const float sum_rho = rhox_data[i] + rhoy_data[i] + rhoz_data[i]; 02033 p_data[i] = c2_data[i * c2_shift] *( 02034 sum_rho + 02035 (BonA_data[i * BonA_shift] * (sum_rho* sum_rho) / 02036 (2.0f* rho0_data[i * rho0_shift])) 02037 02038 ); 02039 02040 } 02041 02042 02043 }// parallel 02044 02045 02046 02047 }// end of Sum_new_p_nonlinear_lossless 02048 //----------------------------------------------------------------------------- 02049 02050 02055 void TKSpaceFirstOrder3DSolver::Sum_new_p_linear_lossless(){ 02056 02057 02058 #ifndef __NO_OMP__ 02059 #pragma omp parallel 02060 #endif 02061 { 02062 02063 const float * rhox = Get_rhox().GetRawData(); 02064 const float * rhoy = Get_rhoy().GetRawData(); 02065 const float * rhoz = Get_rhoz().GetRawData(); 02066 float * p_data = Get_p().GetRawData(); 02067 const size_t TotalElementCount = Parameters->GetFullDimensionSizes().GetElementCount(); 02068 02069 02070 if (Parameters->Get_c0_scalar_flag()){ 02071 02072 const float c2_element = Parameters->Get_c0_scalar(); 02073 #ifndef __NO_OMP__ 02074 #pragma omp for schedule (static) 02075 #endif 02076 for (size_t i = 0; i < TotalElementCount; i++){ 02077 p_data[i] = c2_element * ( rhox[i] + rhoy[i] + rhoz[i]); 02078 } 02079 02080 }else { 02081 02082 const float * c2_data = Get_c2().GetRawData(); 02083 02084 #ifndef __NO_OMP__ 02085 #pragma omp for schedule (static) 02086 #endif 02087 for (size_t i = 0; i < TotalElementCount; i++){ 02088 p_data[i] = (c2_data[i]) *( rhox[i] + rhoy[i] + rhoz[i]); 02089 } 02090 } 02091 02092 02093 }// parallel 02094 02095 }// end of Sum_new_p_linear_lossless() 02096 //----------------------------------------------------------------------------- 02097 02098 02099 02103 void TKSpaceFirstOrder3DSolver::Compute_new_p_nonlinear(){ 02104 02105 02106 02107 //-- rhox + rhoy + rhoz --// 02108 if (Parameters->Get_absorbing_flag()) { // absorbing case 02109 02110 02111 TRealMatrix& Sum_rhoxyz = Get_Temp_1_RS3D(); 02112 TRealMatrix& BonA_rho_rhoxyz = Get_Temp_2_RS3D(); 02113 TRealMatrix& Sum_du = Get_Temp_3_RS3D(); 02114 02115 02116 Calculate_SumRho_BonA_SumDu_SSE2(Sum_rhoxyz,BonA_rho_rhoxyz, Sum_du); 02117 02118 02119 // ifftn ( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz)) 02120 02121 Get_FFT_X_temp().Compute_FFT_3D_R2C(Sum_du); 02122 Get_FFT_Y_temp().Compute_FFT_3D_R2C(Sum_rhoxyz); 02123 02124 02125 Compute_Absorb_nabla1_2_SSE2(Get_FFT_X_temp(),Get_FFT_Y_temp()); 02126 02127 02128 TRealMatrix& Absorb_tau_temp = Sum_du; 02129 TRealMatrix& Absorb_eta_temp = Sum_rhoxyz; 02130 02131 Get_FFT_X_temp().Compute_iFFT_3D_C2R(Absorb_tau_temp); 02132 Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Absorb_eta_temp); 02133 02134 Sum_Subterms_nonlinear(Absorb_tau_temp, Absorb_eta_temp, BonA_rho_rhoxyz); 02135 }else { 02136 02137 Sum_new_p_nonlinear_lossless(); 02138 02139 } 02140 02141 02142 }// end of Compute_new_p_nonlinear_absorbing() 02143 //----------------------------------------------------------------------------- 02144 02145 02146 02150 void TKSpaceFirstOrder3DSolver::Compute_new_p_linear(){ 02151 02152 02153 //-- rhox + rhoy + rhoz --// 02154 if (Parameters->Get_absorbing_flag()) { // absorbing case 02155 02156 TRealMatrix& Sum_rhoxyz = Get_Temp_1_RS3D(); 02157 TRealMatrix& Sum_rho0_du = Get_Temp_2_RS3D(); 02158 02159 02160 Calculate_SumRho_SumRhoDu(Sum_rhoxyz,Sum_rho0_du); 02161 02162 02163 // ifftn ( absorb_nabla1 * fftn (rho0 * (duxdx+duydy+duzdz)) 02164 02165 Get_FFT_X_temp().Compute_FFT_3D_R2C(Sum_rho0_du); 02166 Get_FFT_Y_temp().Compute_FFT_3D_R2C(Sum_rhoxyz); 02167 02168 02169 Compute_Absorb_nabla1_2_SSE2(Get_FFT_X_temp(),Get_FFT_Y_temp()); 02170 02171 02172 TRealMatrix& Absorb_tau_temp = Get_Temp_2_RS3D(); // override Sum_rho0_dx 02173 TRealMatrix& Absorb_eta_temp = Get_Temp_3_RS3D(); 02174 02175 Get_FFT_X_temp().Compute_iFFT_3D_C2R(Absorb_tau_temp); 02176 Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Absorb_eta_temp); 02177 02178 Sum_Subterms_linear(Absorb_tau_temp, Absorb_eta_temp, Sum_rhoxyz); 02179 }else{ 02180 // lossless case 02181 02182 Sum_new_p_linear_lossless(); 02183 02184 02185 } 02186 02187 02188 }// end of Compute_new_p_linear_absorbing() 02189 //----------------------------------------------------------------------------- 02190 02191 02192 02193 02194 02195 02196 /* 02197 * Compute new values of ux_sgx, uy_sgy, uz_sgz. 02198 */ 02199 void TKSpaceFirstOrder3DSolver::Compute_uxyz(){ 02200 02201 Compute_ddx_kappa_fft_p(Get_p(), 02202 Get_FFT_X_temp(),Get_FFT_Y_temp(),Get_FFT_Z_temp(), 02203 Get_kappa(), 02204 Get_ddx_k_shift_pos(),Get_ddy_k_shift_pos(),Get_ddz_k_shift_pos() 02205 ); 02206 02207 Get_FFT_X_temp().Compute_iFFT_3D_C2R(Get_Temp_1_RS3D()); 02208 Get_FFT_Y_temp().Compute_iFFT_3D_C2R(Get_Temp_2_RS3D()); 02209 Get_FFT_Z_temp().Compute_iFFT_3D_C2R(Get_Temp_3_RS3D()); 02210 02211 #ifndef __NO_OMP__ 02212 #pragma omp parallel 02213 #endif 02214 { 02215 02216 if (Parameters->Get_rho0_scalar_flag()) { // scalars 02217 if (Parameters->Get_nonuniform_grid_flag()){ 02218 Get_ux_sgx().Compute_ux_sgx_normalize_scalar_nonuniform(Get_Temp_1_RS3D(),Parameters->Get_rho0_sgx_scalar(), Get_dxudxn_sgx(), Get_pml_x_sgx()); 02219 Get_uy_sgy().Compute_uy_sgy_normalize_scalar_nonuniform(Get_Temp_2_RS3D(),Parameters->Get_rho0_sgy_scalar(), Get_dyudyn_sgy(), Get_pml_y_sgy()); 02220 Get_uz_sgz().Compute_uz_sgz_normalize_scalar_nonuniform(Get_Temp_3_RS3D(),Parameters->Get_rho0_sgz_scalar(), Get_dzudzn_sgz(), Get_pml_z_sgz()); 02221 02222 }else { 02223 Get_ux_sgx().Compute_ux_sgx_normalize_scalar_uniform(Get_Temp_1_RS3D(),Parameters->Get_rho0_sgx_scalar(), Get_pml_x_sgx()); 02224 Get_uy_sgy().Compute_uy_sgy_normalize_scalar_uniform(Get_Temp_2_RS3D(),Parameters->Get_rho0_sgy_scalar(), Get_pml_y_sgy()); 02225 Get_uz_sgz().Compute_uz_sgz_normalize_scalar_uniform(Get_Temp_3_RS3D(),Parameters->Get_rho0_sgz_scalar(), Get_pml_z_sgz()); 02226 } 02227 02228 }else {// matrices 02229 Get_ux_sgx().Compute_ux_sgx_normalize(Get_Temp_1_RS3D(),Get_dt_rho0_sgx(), Get_pml_x_sgx()); 02230 Get_uy_sgy().Compute_uy_sgy_normalize(Get_Temp_2_RS3D(),Get_dt_rho0_sgy(), Get_pml_y_sgy()); 02231 Get_uz_sgz().Compute_uz_sgz_normalize(Get_Temp_3_RS3D(),Get_dt_rho0_sgz(), Get_pml_z_sgz()); 02232 } 02233 02234 } // parallel 02235 02236 }// end of Compute_uxyz() 02237 //----------------------------------------------------------------------------- 02238 02242 void TKSpaceFirstOrder3DSolver::Add_u_source(){ 02243 02244 02245 if (Parameters->Get_ux_source_flag() > t_index) { 02246 Get_ux_sgx().Add_u_source(Get_ux_source_input(), Get_u_source_index(), t_index, 02247 Parameters->Get_u_source_mode(), Parameters->Get_u_source_many()); 02248 02249 } 02250 if (Parameters->Get_uy_source_flag() > t_index) { 02251 Get_uy_sgy().Add_u_source(Get_uy_source_input(), Get_u_source_index(), t_index, 02252 Parameters->Get_u_source_mode(), Parameters->Get_u_source_many()); 02253 02254 02255 02256 } 02257 if (Parameters->Get_uz_source_flag() > t_index) { 02258 Get_uz_sgz().Add_u_source(Get_uz_source_input(), Get_u_source_index(), t_index, 02259 Parameters->Get_u_source_mode(), Parameters->Get_u_source_many()); 02260 02261 } 02262 02263 }// end of Add_u_source 02264 //----------------------------------------------------------------------------- 02265 02266 02267 02272 void TKSpaceFirstOrder3DSolver::Add_p_source(){ 02273 02274 if (Parameters->Get_p_source_flag() > t_index){ 02275 02276 float * rhox = Get_rhox().GetRawData(); 02277 float * rhoy = Get_rhoy().GetRawData(); 02278 float * rhoz = Get_rhoz().GetRawData(); 02279 02280 float * p_source_input = Get_p_source_input().GetRawData(); 02281 long * p_source_index = Get_p_source_index().GetRawData(); 02282 02283 02284 size_t index2D = t_index; 02285 02286 if (Parameters->Get_p_source_many() != 0) { // is 2D 02287 index2D = t_index * Get_p_source_index().GetTotalElementCount(); 02288 } 02289 02290 // replacement 02291 if (Parameters->Get_p_source_mode() == 0){ 02292 02293 for (size_t i = 0; i < Get_p_source_index().GetTotalElementCount(); i++){ 02294 02295 rhox[p_source_index[i]] = p_source_input[index2D]; 02296 rhoy[p_source_index[i]] = p_source_input[index2D]; 02297 rhoz[p_source_index[i]] = p_source_input[index2D]; 02298 02299 if (Parameters->Get_p_source_many() != 0) index2D ++; 02300 02301 } 02302 02303 // Addition 02304 }else{ 02305 02306 for (size_t i = 0; i < Get_p_source_index().GetTotalElementCount(); i++){ 02307 02308 rhox[p_source_index[i]] += p_source_input[index2D]; 02309 rhoy[p_source_index[i]] += p_source_input[index2D]; 02310 rhoz[p_source_index[i]] += p_source_input[index2D]; 02311 02312 if (Parameters->Get_p_source_many() != 0) index2D ++; 02313 02314 } 02315 }// type of replacement 02316 02317 }// if do at all 02318 02319 }// end of Add_p_source 02320 //------------------------------------------------------------------------------ 02321 02322 02327 void TKSpaceFirstOrder3DSolver::Compute_MainLoop(){ 02328 02329 02330 ActPercent = 0; 02331 PrintOtputHeader(); 02332 02333 IterationTime.Start(); 02334 for (t_index = 0; t_index < Parameters->Get_Nt(); t_index ++){ 02335 02336 02337 Compute_uxyz(); 02338 02339 // add in the velocity u source term 02340 Add_u_source(); 02341 02342 02343 // add in the transducer source term (t = t1) to ux 02344 if (Parameters->Get_transducer_source_flag() > t_index) 02345 Get_ux_sgx().AddTransducerSource(Get_u_source_index(), Get_delay_mask(), Get_transducer_source_input()); 02346 02347 Compute_duxyz(); 02348 02349 02350 if (Parameters->Get_nonlinear_flag()) 02351 Compute_rhoxyz_nonlinear(); 02352 else 02353 Compute_rhoxyz_linear(); 02354 02355 02356 // add in the source pressure term 02357 Add_p_source(); 02358 02359 if (Parameters->Get_nonlinear_flag()){ 02360 02361 Compute_new_p_nonlinear(); 02362 02363 }else { 02364 Compute_new_p_linear(); 02365 } 02366 02367 02368 //-- calculate initial pressure 02369 if ((t_index == 0) && (Parameters->Get_p0_source_flag() == 1)) Calculate_p0_source(); 02370 02371 02372 //-- store the initial pressure at the first time step --// 02373 02374 StoreSensorData(); 02375 02376 PrintStatisitcs(); 02377 02378 } 02379 02380 02381 }// end of Compute_Main_Loop() 02382 //------------------------------------------------------------------------------ 02383 02384 02389 void TKSpaceFirstOrder3DSolver::PrintStatisitcs(){ 02390 02391 float Nt = Parameters->Get_Nt(); 02392 if (t_index > (ActPercent * Nt * 0.01f) ) { 02393 02394 ActPercent += Parameters->GetVerboseInterval(); 02395 02396 IterationTime.Stop(); 02397 02398 double ElTime = IterationTime.GetElapsedTime(); 02399 const double ToGo = ((ElTime / (float) (t_index+1)) * Nt) - ElTime; 02400 02401 02402 struct tm *current; 02403 time_t now; 02404 time(&now); 02405 now += ToGo; 02406 current = localtime(&now); 02407 02408 fprintf(stdout, "%5i%c %9.3fs %9.3fs %02i/%02i/%02i %02i:%02i:%02i\n", 02409 (int) ((t_index) / (Nt * 0.01f)),'%', 02410 ElTime, ToGo, 02411 current->tm_mday, current->tm_mon+1, current->tm_year-100, 02412 current->tm_hour, current->tm_min, current->tm_sec 02413 ); 02414 02415 fflush(stdout); 02416 02417 } 02418 02419 }// end of KSpaceFirstOrder3DSolver 02420 //------------------------------------------------------------------------------ 02421 02425 void TKSpaceFirstOrder3DSolver::PrintOtputHeader(){ 02426 fprintf(stdout,"-------------------------------------------------------------\n"); 02427 fprintf(stdout,"....................... Simulation ..........................\n"); 02428 fprintf(stdout,"Progress...ElapsedTime........TimeToGo......TimeOfTermination\n"); 02429 02430 }// end of PrintOtputHeader 02431 //------------------------------------------------------------------------------ 02432 02437 void TKSpaceFirstOrder3DSolver::PostPorcessing(){ 02438 02439 //-- p --// 02440 if (Parameters->IsStore_p_max()){ 02441 Get_p_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02442 MatrixContainer[p_sensor_max].HDF5MatrixName.c_str(), 02443 Parameters->GetCompressionLevel()); 02444 }// p_max 02445 02446 02447 if (Parameters->IsStore_p_rms()){ 02448 02449 float * p_rms = Get_p_sensor_rms().GetRawData(); 02450 const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount(); 02451 float Divider = 1.0f / (float) (Parameters->Get_Nt() - Parameters->GetStartTimeIndex()); 02452 02453 #ifndef __NO_OMP__ 02454 #pragma omp parallel for schedule (static) if (sensor_size > 1e5) \ 02455 firstprivate(Divider) 02456 #endif 02457 for (size_t i = 0; i < sensor_size; i++){ 02458 p_rms[i] = sqrtf(Divider * p_rms[i]); 02459 } 02460 02461 02462 Get_p_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02463 MatrixContainer[p_sensor_rms].HDF5MatrixName.c_str(), 02464 Parameters->GetCompressionLevel()); 02465 }// p_rms 02466 02467 02468 if (Parameters->IsStore_p_final()){ 02469 Get_p().WriteDataToHDF5File(Parameters->HDF5_OutputFile,p_final_Name,Parameters->GetCompressionLevel()); 02470 }// p_final 02471 02472 if (Parameters->IsStore_p_raw()){ 02473 p_sensor_raw_OutputStream->CloseStream(); 02474 } // p_raw 02475 02476 02477 02478 02479 //-- u --// 02480 02481 02482 if (Parameters->IsStore_u_max()){ 02483 Get_ux_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02484 MatrixContainer[ux_sensor_max].HDF5MatrixName.c_str(), 02485 Parameters->GetCompressionLevel()); 02486 Get_uy_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02487 MatrixContainer[uy_sensor_max].HDF5MatrixName.c_str(), 02488 Parameters->GetCompressionLevel()); 02489 Get_uz_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02490 MatrixContainer[uz_sensor_max].HDF5MatrixName.c_str(), 02491 Parameters->GetCompressionLevel()); 02492 }// u_max 02493 02494 02495 if (Parameters->IsStore_u_rms()){ 02496 float* ux_rms = Get_ux_sensor_rms().GetRawData(); 02497 float* uy_rms = Get_uy_sensor_rms().GetRawData(); 02498 float* uz_rms = Get_uz_sensor_rms().GetRawData(); 02499 const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount(); 02500 float Divider = 1.0f / (float) (Parameters->Get_Nt() - Parameters->GetStartTimeIndex()); 02501 02502 #ifndef __NO_OMP__ 02503 #pragma omp parallel for schedule (static) if (sensor_size > 1e5) \ 02504 firstprivate(Divider) 02505 #endif 02506 for (size_t i = 0; i < sensor_size; i++){ 02507 ux_rms[i] = sqrtf(Divider * ux_rms[i]); 02508 uy_rms[i] = sqrtf(Divider * uy_rms[i]); 02509 uz_rms[i] = sqrtf(Divider * uz_rms[i]); 02510 } 02511 02512 02513 Get_ux_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02514 MatrixContainer[ux_sensor_rms].HDF5MatrixName.c_str(), 02515 Parameters->GetCompressionLevel()); 02516 Get_uy_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02517 MatrixContainer[uy_sensor_rms].HDF5MatrixName.c_str(), 02518 Parameters->GetCompressionLevel()); 02519 Get_uz_sensor_rms().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02520 MatrixContainer[uz_sensor_rms].HDF5MatrixName.c_str(), 02521 Parameters->GetCompressionLevel()); 02522 }// u_rms 02523 02524 02525 if (Parameters->IsStore_u_final()){ 02526 Get_ux_sgx().WriteDataToHDF5File(Parameters->HDF5_OutputFile,ux_final_Name,Parameters->GetCompressionLevel()); 02527 Get_uy_sgy().WriteDataToHDF5File(Parameters->HDF5_OutputFile,uy_final_Name,Parameters->GetCompressionLevel()); 02528 Get_uz_sgz().WriteDataToHDF5File(Parameters->HDF5_OutputFile,uz_final_Name,Parameters->GetCompressionLevel()); 02529 }// u_final 02530 02531 02532 if (Parameters->IsStore_u_raw()){ 02533 ux_sensor_raw_OutputStream->CloseStream(); 02534 uy_sensor_raw_OutputStream->CloseStream(); 02535 uz_sensor_raw_OutputStream->CloseStream(); 02536 }// u_raw 02537 02538 02539 02540 //-- I --// 02541 02542 // compute half step for intensity! 02543 if ((Parameters->IsStore_I_avg()) || (Parameters->IsStore_I_max())) { 02544 Compute_uxyz(); 02545 StoreIntensityData(); 02546 } 02547 02548 02549 02550 if (Parameters->IsStore_I_max()){ 02551 Get_Ix_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02552 MatrixContainer[Ix_sensor_max].HDF5MatrixName.c_str(), 02553 Parameters->GetCompressionLevel()); 02554 Get_Iy_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02555 MatrixContainer[Iy_sensor_max].HDF5MatrixName.c_str(), 02556 Parameters->GetCompressionLevel()); 02557 Get_Iz_sensor_max().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02558 MatrixContainer[Iz_sensor_max].HDF5MatrixName.c_str(), 02559 Parameters->GetCompressionLevel()); 02560 }// I_max 02561 02562 02563 02564 if (Parameters->IsStore_I_avg()){ 02565 Get_Ix_sensor_avg().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02566 MatrixContainer[Ix_sensor_avg].HDF5MatrixName.c_str(), 02567 Parameters->GetCompressionLevel()); 02568 Get_Iy_sensor_avg().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02569 MatrixContainer[Iy_sensor_avg].HDF5MatrixName.c_str(), 02570 Parameters->GetCompressionLevel()); 02571 Get_Iz_sensor_avg().WriteDataToHDF5File(Parameters->HDF5_OutputFile, 02572 MatrixContainer[Iz_sensor_avg].HDF5MatrixName.c_str(), 02573 Parameters->GetCompressionLevel()); 02574 }// I_avg 02575 02576 02577 }// end of PostPorcessing 02578 //------------------------------------------------------------------------------ 02579 02580 02581 02586 void TKSpaceFirstOrder3DSolver::StoreSensorData(){ 02587 02588 if (t_index < Parameters->GetStartTimeIndex()) return; 02589 02590 02591 if (Parameters->IsStore_p_raw()){ 02592 p_sensor_raw_OutputStream->AddData(Get_p(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData()); 02593 } 02594 02595 if (Parameters->IsStore_u_raw()){ 02596 ux_sensor_raw_OutputStream->AddData(Get_ux_sgx(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData()); 02597 uy_sensor_raw_OutputStream->AddData(Get_uy_sgy(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData()); 02598 uz_sensor_raw_OutputStream->AddData(Get_uz_sgz(),Get_sensor_mask_ind(),Get_Temp_1_RS3D().GetRawData()); 02599 } 02600 02601 02602 if (Parameters->IsStore_p_max()){ 02603 02604 const float * p = Get_p().GetRawData(); 02605 float * p_max = Get_p_sensor_max().GetRawData(); 02606 const long * index = Get_sensor_mask_ind().GetRawData(); 02607 const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount(); 02608 02609 #ifndef __NO_OMP__ 02610 #pragma omp parallel for schedule (static) if (sensor_size > 1e5) 02611 #endif 02612 for (size_t i = 0; i <sensor_size; i++){ 02613 if (p_max[i] < p[index[i]]) p_max[i] = p[index[i]]; 02614 } 02615 }// p_max 02616 02617 if (Parameters->IsStore_p_rms()){ 02618 // square sum 02619 const float * p = Get_p().GetRawData(); 02620 float * p_rms = Get_p_sensor_rms().GetRawData(); 02621 const long * index = Get_sensor_mask_ind().GetRawData(); 02622 const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount(); 02623 02624 #ifndef __NO_OMP__ 02625 #pragma omp parallel for schedule (static) if (sensor_size > 1e5) 02626 #endif 02627 for (size_t i = 0; i <sensor_size; i++){ 02628 p_rms[i] += (p[index[i]] * p[index[i]]); 02629 } 02630 }//p_rms 02631 02632 if (Parameters->IsStore_u_max()){ 02633 02634 const float* ux = Get_ux_sgx().GetRawData(); 02635 const float* uy = Get_uy_sgy().GetRawData(); 02636 const float* uz = Get_uz_sgz().GetRawData(); 02637 float* ux_max = Get_ux_sensor_max().GetRawData(); 02638 float* uy_max = Get_uy_sensor_max().GetRawData(); 02639 float* uz_max = Get_uz_sensor_max().GetRawData(); 02640 const long * index = Get_sensor_mask_ind().GetRawData(); 02641 const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount(); 02642 #ifndef __NO_OMP__ 02643 #pragma omp parallel for schedule (static) if (sensor_size > 1e5) 02644 #endif 02645 for (size_t i = 0; i < sensor_size; i++){ 02646 if (ux_max[i] < ux[index[i]]) ux_max[i] = ux[index[i]]; 02647 if (uy_max[i] < uy[index[i]]) uy_max[i] = uy[index[i]]; 02648 if (uz_max[i] < uz[index[i]]) uz_max[i] = uz[index[i]]; 02649 } 02650 }// u_max 02651 02652 02653 if (Parameters->IsStore_u_rms()){ 02654 02655 const float* ux = Get_ux_sgx().GetRawData(); 02656 const float* uy = Get_uy_sgy().GetRawData(); 02657 const float* uz = Get_uz_sgz().GetRawData(); 02658 float* ux_rms = Get_ux_sensor_rms().GetRawData(); 02659 float* uy_rms = Get_uy_sensor_rms().GetRawData(); 02660 float* uz_rms = Get_uz_sensor_rms().GetRawData(); 02661 const long * index = Get_sensor_mask_ind().GetRawData(); 02662 const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount(); 02663 02664 #ifndef __NO_OMP__ 02665 #pragma omp parallel for schedule (static) if (sensor_size > 1e5) 02666 #endif 02667 for (size_t i = 0; i <sensor_size; i++){ 02668 ux_rms[i] += (ux[index[i]] * ux[index[i]]); 02669 uy_rms[i] += (uy[index[i]] * uy[index[i]]); 02670 uz_rms[i] += (uz[index[i]] * uz[index[i]]); 02671 } 02672 }// u_rms 02673 02674 02675 02676 if ((Parameters->IsStore_I_max()) || (Parameters->IsStore_I_avg())) StoreIntensityData(); 02677 02678 02679 }// end of StoreData 02680 //------------------------------------------------------------------------------ 02681 02682 02687 void TKSpaceFirstOrder3DSolver::StoreIntensityData(){ 02688 02689 #ifndef __NO_OMP__ 02690 #pragma omp parallel 02691 #endif 02692 { 02693 const size_t sensor_size = Get_sensor_mask_ind().GetTotalElementCount(); 02694 02695 const float* ux = Get_ux_sgx().GetRawData(); 02696 const float* uy = Get_uy_sgy().GetRawData(); 02697 const float* uz = Get_uz_sgz().GetRawData(); 02698 const float* p = Get_p().GetRawData(); 02699 02700 float * ux_i_1 = Get_ux_sensor_i_1_agr_2().GetRawData(); 02701 float * uy_i_1 = Get_uy_sensor_i_1_agr_2().GetRawData(); 02702 float * uz_i_1 = Get_uz_sensor_i_1_agr_2().GetRawData(); 02703 float * p_i_1 = Get_p_sensor_i_1_raw().GetRawData(); 02704 02705 float * Ix_avg = NULL; 02706 float * Iy_avg = NULL; 02707 float * Iz_avg = NULL; 02708 02709 float * Ix_max = NULL; 02710 float * Iy_max = NULL; 02711 float * Iz_max = NULL; 02712 02713 02714 if (Parameters->IsStore_I_avg()) { 02715 Ix_avg = Get_Ix_sensor_avg().GetRawData(); 02716 Iy_avg = Get_Iy_sensor_avg().GetRawData(); 02717 Iz_avg = Get_Iz_sensor_avg().GetRawData(); 02718 } 02719 if (Parameters->IsStore_I_max()) { 02720 Ix_max = Get_Ix_sensor_max().GetRawData(); 02721 Iy_max = Get_Iy_sensor_max().GetRawData(); 02722 Iz_max = Get_Iz_sensor_max().GetRawData(); 02723 } 02724 02725 const long * index = Get_sensor_mask_ind().GetRawData(); 02726 02727 const TDimensionSizes Dims = Parameters->GetFullDimensionSizes(); 02728 02729 // calculate intensity 02730 if (t_index+1 > Parameters->GetStartTimeIndex()){ 02731 #ifndef __NO_OMP__ 02732 #pragma omp for schedule (static) 02733 #endif 02734 for (size_t i = 0; i < sensor_size; i++){ 02735 // calculate positions in the grid 02736 const size_t sensor_point_ind = index[i]; 02737 const size_t sensor_point_ind_1x = ((sensor_point_ind % Dims.X) == 0) ? 02738 sensor_point_ind : sensor_point_ind - 1; 02739 const size_t sensor_point_ind_1y = (((sensor_point_ind / Dims.X) % Dims.Y) == 0) ? 02740 sensor_point_ind : sensor_point_ind - Dims.X; 02741 const size_t sensor_point_ind_1z = (((sensor_point_ind / (Dims.Y * Dims.X))) == 0) ? 02742 sensor_point_ind : sensor_point_ind - (Dims.X * Dims.Y); 02743 02744 //avg of actual values of u in staggered grid 02745 const float ux_act = (ux[sensor_point_ind] + ux[sensor_point_ind_1x]) * 0.5f; 02746 const float uy_act = (uy[sensor_point_ind] + uy[sensor_point_ind_1y]) * 0.5f; 02747 const float uz_act = (uz[sensor_point_ind] + uz[sensor_point_ind_1z]) * 0.5f; 02748 const float p_data = p_i_1[i]; 02749 02750 // calculate actual intensity based on p(n-1) * 1/4[ u(i-1)(n-1) + u(i)(n-1) + u(i-1)(n) + u(i)(n) 02751 const float Ix = p_data * ((ux_act + ux_i_1[i]) * 0.5f); 02752 const float Iy = p_data * ((uy_act + uy_i_1[i]) * 0.5f); 02753 const float Iz = p_data * ((uz_act + uz_i_1[i]) * 0.5f); 02754 02755 ux_i_1[i] = ux_act; 02756 uy_i_1[i] = uy_act; 02757 uz_i_1[i] = uz_act; 02758 p_i_1[i] = p[sensor_point_ind]; 02759 02760 const float Divider = 1.0f / (float) (Parameters->Get_Nt() - Parameters->GetStartTimeIndex()); 02761 02762 // easily predictable... 02763 if (Parameters->IsStore_I_max()) { 02764 if (Ix_max[i] < Ix) Ix_max[i] = Ix; 02765 if (Iy_max[i] < Iy) Iy_max[i] = Iy; 02766 if (Iz_max[i] < Iz) Iz_max[i] = Iz; 02767 } 02768 02769 // easily predictable... 02770 if (Parameters->IsStore_I_avg()) { 02771 Ix_avg[i] += (Ix * Divider); 02772 Iy_avg[i] += (Iy * Divider); 02773 Iz_avg[i] += (Iz * Divider); 02774 } 02775 } 02776 } else { 02777 // first step of the recording. Store only actual data 02778 #ifndef __NO_OMP__ 02779 #pragma omp for schedule (static) 02780 #endif 02781 for (size_t i = 0; i < sensor_size; i++){ 02782 // calculate positions in the grid 02783 const size_t sensor_point_ind = index[i]; 02784 const size_t sensor_point_ind_1x = ((sensor_point_ind % Dims.X) == 0) ? 02785 sensor_point_ind : sensor_point_ind - 1; 02786 const size_t sensor_point_ind_1y = (((sensor_point_ind / Dims.X) % Dims.Y) == 0) ? 02787 sensor_point_ind : sensor_point_ind - Dims.X; 02788 const size_t sensor_point_ind_1z = (((sensor_point_ind / (Dims.Y * Dims.X))) == 0) ? 02789 sensor_point_ind : sensor_point_ind - (Dims.X * Dims.Y); 02790 02791 //avg of actual values of u in staggered grid 02792 ux_i_1[i] = (ux[sensor_point_ind] + ux[sensor_point_ind_1x]) * 0.5f; 02793 uy_i_1[i] = (uy[sensor_point_ind] + uy[sensor_point_ind_1y]) * 0.5f; 02794 uz_i_1[i] = (uz[sensor_point_ind] + uz[sensor_point_ind_1z]) * 0.5f; 02795 p_i_1[i] = p[sensor_point_ind]; 02796 02797 } 02798 }// else 02799 02800 }// parallel 02801 02802 02803 }// end of StoreIntensity 02804 //------------------------------------------------------------------------------ 02805 02809 void TKSpaceFirstOrder3DSolver::WriteOutputDataInfo(){ 02810 02811 // Write scalars 02812 Parameters->SaveScalarsToHDF5File(Parameters->HDF5_OutputFile); 02813 THDF5_FileHeader & HDF5_FileHeader = Parameters->HDF5_FileHeader; 02814 02815 // Write File header 02816 02817 HDF5_FileHeader.SetCodeName(GetCodeName()); 02818 HDF5_FileHeader.SetMajorFileVersion(); 02819 HDF5_FileHeader.SetMinorFileVersion(); 02820 HDF5_FileHeader.SetActualCreationTime(); 02821 HDF5_FileHeader.SetFileType(THDF5_FileHeader::hdf5_ft_output); 02822 HDF5_FileHeader.SetHostName(); 02823 02824 HDF5_FileHeader.SetMemoryConsumption(ShowMemoryUsageInMB()); 02825 02826 // Stop total timer here 02827 TotalTime.Stop(); 02828 HDF5_FileHeader.SetExecutionTimes(GetTotalTime(), 02829 GetDataLoadTime(), 02830 GetPreProcessingTime(), 02831 GetSimulationTime(), 02832 GetPostProcessingTime()); 02833 02834 02835 HDF5_FileHeader.SetNumberOfCores(); 02836 02837 02838 02839 HDF5_FileHeader.WriteHeaderToOutputFile(Parameters->HDF5_OutputFile); 02840 02841 02842 02843 }// end of WriteOutputDataInfo 02844 //------------------------------------------------------------------------------ 02845 02846 02847 //----------------------------------------------------------------------------// 02848 // Private methods // 02849 //----------------------------------------------------------------------------// 02850 02851