-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
967 lines (936 loc) · 81.5 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
/**
* @file main.cpp
*
* @author Jiri Jaros, Petr Kleparnik \n
* Faculty of Information Technology \n
* Brno University of Technology \n
*
* @brief The main file for the kspaceFirstOrder-CUDA.
*
* @version kspaceFirstOrder 3.6
*
* @date 11 July 2012, 10:57 (created) \n
* 08 February 2023, 12:00 (revised)
*
*
* @mainpage kspaceFirstOrder-CUDA
*
* @section Overview 1 Overview
*
* k-Wave is an open source MATLAB toolbox designed for the time-domain simulation of propagating acoustic waves in 1D,
* 2D, or 3D. The toolbox has a wide range of functionality, but at its heart is an advanced numerical model that can
* account for both linear or nonlinear wave propagation, an arbitrary distribution of weakly heterogeneous material
* parameters, and power law acoustic absorption, see the [k-Wave website] (http://www.k-wave.org).
*
* This project is a part of the k-Wave toolbox accelerating 2D/3D simulations using an optimized CUDA/C++
* implementation to run small to moderate grid sizes (64x64x64 to 512x512x512). This code uses a single NVIDIA GPU to
* accelerate the simulations (AMD GPUs are not supported).
*
* Compiled binaries of the CUDA/C++ code are available from (http://www.k-wave.org/download). Both 64-bit Linux
* (Ubuntu / Debian) and 64-bit Windows versions are provided. This Doxygen documentation was created based on the
* Linux version and provides details on the implementation of the CUDA/C++ code.
*
*
*
* @section Compilation 2 Compilation
*
* The source codes of <tt>kpsaceFirstOrder-CUDA</tt> are written using the C++11 standard (optional OpenMP 2.0
* library), NVIDIA CUDA 10.0 library and HDF5 1.8.x.
*
* There are variety of different C++ compilers that can be used to compile the source codes. We recommend using the GNU
* C++ compiler (g++) version 7.3, the Intel C++ compiler version 2018, or Visual Studio 2017. The version of
* the compiler is limited by the CUDA architecture version (CUDA 10.0 supports GCC up to 7.3). The code was tested with
* CUDA 10.0, however, the code was also tested with CUDA 8.0 and 9.x . The codes can be
* compiled on 64-bit Linux and Windows. 32-bit systems are not supported due to the the memory requirements even for
* small simulations.
*
* Before compiling the code, it is necessary to install CUDA, C++ compiler and the HDF5 library. The GNU compiler is
* usually part of Linux distributions and distributed as open source. It can be downloaded from [GNU website]
* (http://gcc.gnu.org/) if necessary. The Intel compiler can be downloaded
* from [Intel website] (https://software.intel.com/en-us/intel-parallel-studio-xe). The Intel compiler is only free for
* non-commercial use.
* The CUDA library can be downloaded from the [Nvidia website] (https://developer.nvidia.com/cuda-toolkit-archive).
* The only supported version are 8.0 - 10.0, however, the code is supposed to work with upcoming CUDA 11.0, but we
* cannot guarantee it.
*
* <b> 2.1 The HDF5 library installation procedure </b>
* 1. Download the 64-bit HDF5 library [package for your platform]
* (https://support.hdfgroup.org/HDF5/release/obtain518.html). Please use version 1.8.x, the version 1.10.x
* is not compatible with MATLAB yet.
*
* 2. Configure the HDF5 distribution. Enable the high-level library and specify an installation folder by typing:
\verbatim ./configure --enable-hl --prefix=folder_to_install \endverbatim
* 3. Make the HDF library by typing:
\verbatim make -j \endverbatim
* 4. Install the HDF library by typing:
\verbatim make install \endverbatim
*
*
* <b> 2.2 The CUDA installation procedure </b>
*
* 1. Download CUDA version [8.0] (https://developer.nvidia.com/cuda-toolkit-archive).
* 2. Follow the NVIDIA official installation guide for [Windows]
* (http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
* and [Linux] (http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
*
*
* <b> 2.3 Compiling the CUDA/C++ code </b>
*
* When the libraries and the compiler have been installed, you are ready to compile the
* <tt>kspaceFirstOrder-CUDA</tt> code. The Makefile only supports code compilation under CUDA/g++ compiler, however,
* using different compilers would be analogous (<tt>-ccbin</tt> parameter for the <tt>nvcc</tt>compiler)
*
* 1. Download the <tt>kspaceFirstOrder-CUDA</tt> source codes.
*
* 2. Open the <tt> Makefile</tt> file.
* First, set the paths to CUDA and HDF5 libraries, then select how to link the code. Dynamic lining is preferred
* since it does not require reloading the CUDA-GPU driver, however, the code will very likely only run on the
* machine where compiled. The static linking allows to create a self-consistent binary with all libraries
* linked in. Unfortunately, since there is a bug in the static <tt>cufft</tt> library, it is still necessary to link
* dynamically to this library (and copy the lib file with the binary).
*
* 3. Compile the source code by typing:
\verbatim make -j \endverbatim If you want to clean the distribution, type:
\verbatim make clean \endverbatim
*
*
* @section Parameters 3 Command Line Parameters
*
* The CUDA/C++ code requires two mandatory parameters and accepts a few optional parameters and flags. Ill parameters,
* bad simulation files, and runtime errors such as out-of-memory problems, lead to an exception followed by an error
* message shown and execution termination.
*
* The mandatory parameters <tt>-i</tt> and <tt>-o</tt> specify the input and output file. The file names respect the
* path conventions for particular operating system. If any of the files is not specified, cannot be found or created,
* an error message is shown and the code terminates.
*
* The <tt>-t</tt> parameter sets the number of threads used, which defaults the system maximum. For the CUDA version,
* the number of threads is not critical, as the CPU only preprocess data. When running on desktops with multiple GPUs,
* or clusters with per-GPU resource allocations, it may be useful to limit the number of CPU threads.
*
* The <tt>-g</tt> parameter allows to explicitly select a GPU for the execution. The CUDA capable GPUs can be listed
* by the system command <tt>nvidia-smi</tt>. If the parameter is not specified, the code uses the first free GPU. If
* the GPUs are set in the <tt>CUDA DEFAULT</tt> mode, the first CUDA device is selected. In order to get the automatic
* round-robin GPU selection working (to e.g. execute multiple instances of the code on distinct GPUs), please set the
* GPUs into <tt>PROCESS_EXCLUSIVE</tt> mode. On clusters with a PBS scheduler, this is usually done automatically,
* so no need to change it by user.
*
* The <tt>-r</tt> parameter specifies how often information about the simulation progress is printed out to the command
* line. By default, the CUDA/C++ code prints out the progress of the simulation, the elapsed time, and the estimated
* time of completion in intervals corresponding to 5% of the total number of times steps.
*
* The <tt>-c</tt> parameter specifies the compression level used by the ZIP library to reduce the size of the output
* file. The actual compression rate is highly dependent on the shape of the sensor mask and the range of stored
* quantities and may be computationally expensive. In general, the output data is very hard to compress, and using
* higher compression levels can greatly increase the time to save data while not having a large impact on the final
* file size. That's why we decided to disable compression in default settings.
*
* The <tt>\--benchmark</tt> parameter enables the total length of simulation (i.e., the number of time steps) to be
* overridden by setting a new number of time steps to simulate. This is particularly useful for performance evaluation
* and benchmarking. As the code performance is relatively stable, 50-100 time steps is usually enough to predict the
* simulation duration. This parameter can also be used to quickly check the simulation is set up correctly.
*
* The <tt>\--verbose</tt> parameter enables to select between three levels of verbosity. For routine simulations, the
* verbose level of 0 (the default one) is usually sufficient. For more information about the simulation, checking the
* parameters of the simulation, code version, GPU used, file paths, and debugging running scripts, verbose levels
* 1 and 2 may be very useful.
*
* The <tt>-h</tt> and <tt>\--help</tt> parameters print all the parameters of the C++ code. The <tt>\--version </tt>
* parameter reports detail information about the code useful for debugging and bug reports. It prints out the internal
* version, the build date and time, the git hash allowing us to track the version of the source code, the operating
* system, the compiler name and version and the instruction set used.
*
* For jobs that are expected to run for a very long time, it may be useful to checkpoint and restart the execution.
* One motivation is the wall clock limit per task on clusters where jobs must fit within a given time span (e.g. 24
* hours). The second motivation is a level of fault-tolerance, where you can back up the state of the simulation after
* a predefined period. To enable checkpoint-restart, the user is asked to specify a file to store the actual state of
* the simulation by <tt>\--checkpoint_file</tt> and the period in seconds after which the simulation will be
* interrupted by <tt>\--checkpoint_interval</tt>. When running on a cluster, please allocate enough time for the
* checkpoint procedure that can take a non-negligible amount of time (7 matrices have to be stored in the
* checkpoint file and all aggregated quantities are flushed into the output file).
* Alternatively, the user can specify the number of time steps by <tt>\--checkpoint_timesteps</tt> after which the
* simulation is interrupted. The time step interval is calculated from the beginning of current leg, not from the
* beginning of the whole simulation. The user can combine both approaches, seconds and time steps. In this case the
* first condition met triggers the checkpoint.
* Please note, that the checkpoint
* file name and path is not checked at the beginning of the simulation, but at the time the code starts
* checkpointing. Thus make sure the file path was correctly specified (otherwise you will not find out the simulation
* crashed until the first leg of the simulation finishes). The rationale behind this is that to keep as high level of
* fault tolerance as possible, the checkpoint file should be touched even when really necessary.
*
* When controlling a multi-leg simulation by a script loop, the parameters of the code remains the same in all legs.
* The first leg of the simulation creates a checkpoint file while the last one deletes it. If the checkpoint file is
* not found the simulation starts from the beginning. In order to find out how many steps have been finished, please
* open the output file and read the variable <tt>t_index</tt> and compare it with <tt>Nt</tt> (e.g. by the h5dump
* command).
*
*
* The remaining flags specify the output quantities to be recorded during the simulation and stored on disk analogous
* to the sensor.record input. If the <tt>-p</tt> or <tt>\--p\_raw</tt> flags are set (these are equivalent), a time
* series of the acoustic pressure at the grid points specified by the sensor mask is recorded. If the
* <tt>\--p_rms</tt>, <tt>\--p_max</tt>, <tt>\--p_min</tt> flags are set, the root mean square and/or maximum and/or
* minimum values of the pressure at the grid points specified by the sensor mask are recorded. If the
* <tt>\--p_final</tt> flag is set, the values for the entire acoustic pressure field in the final time step of the
* simulation is stored (this will always include the PML, regardless of the setting for <tt> 'PMLInside'</tt>).
* The flags <tt>\--p_max_all</tt> and <tt>\--p_min_all</tt> allow to calculate the maximum and minimum values over the
* entire acoustic pressure field, regardless on the shape of the sensor mask. Flags to record the acoustic particle
* velocity are defined in an analogous fashion. For proper calculation of acoustic intensity, the particle velocity
* has to be shifted onto the same grid as the acoustic pressure. This can be done by setting
* <tt>\--u_non_staggered_raw</tt> flag, that first shifts the particle velocity and then samples the grid points
* specified by the sensor mask. Since the shift operation requires additional FFTs, the impact on the simulation time
* may be significant.
*
* Any combination of <tt>p</tt> and <tt>u</tt> flags is admissible. If no output flag is set, a time-series for the
* acoustic pressure is recorded. If it is not necessary to collect the output quantities over the entire simulation,
* the starting time step when the collection begins can be specified using the -s parameter. Note, the index for the
* first time step is 1 (this follows the MATLAB indexing convention).
*
* The <tt>\--copy_sensor_mask</tt> will copy the sensor from the input file to the output one at the end of the
* simulation. This helps in post-processing and visualisation of the outputs.
*
*
\verbatim
┌───────────────────────────────────────────────────────────────┐
│ kspaceFirstOrder-CUDA v1.3 │
├───────────────────────────────────────────────────────────────┤
│ Usage │
├───────────────────────────────────────────────────────────────┤
│ Mandatory parameters │
├───────────────────────────────────────────────────────────────┤
│ -i <file_name> │ HDF5 input file │
│ -o <file_name> │ HDF5 output file │
├───────────────────────────────┴───────────────────────────────┤
│ Optional parameters │
├───────────────────────────────┬───────────────────────────────┤
│ -t <num_threads> │ Number of CPU threads │
│ │ (default = 1) │
│ -g <device_number> │ GPU device to run on │
│ │ (default = the first free) │
│ -r <interval_in_%> │ Progress print interval │
│ │ (default = 5%) │
│ -c <compression_level> │ Compression level <0,9> │
│ │ (default = 0) │
│ --benchmark <time_steps> │ Run only a specified number │
│ │ of time steps │
│ --verbose <level> │ Level of verbosity <0,2> │
│ │ 0 - basic, 1 - advanced, │
│ │ 2 - full │
│ │ (default = basic) │
│ -h, --help │ Print help │
│ --version │ Print version and build info │
├───────────────────────────────┼───────────────────────────────┤
│ --checkpoint_file <file_name> │ HDF5 Checkpoint file │
│ --checkpoint_interval <sec> │ Checkpoint after a given │
│ │ number of seconds │
│ --checkpoint_timesteps <step> │ Checkpoint after a given │
│ │ number of time steps │
├───────────────────────────────┴───────────────────────────────┤
│ Output flags │
├───────────────────────────────┬───────────────────────────────┤
│ -p │ Store acoustic pressure │
│ │ (default output flag) │
│ │ (the same as --p_raw) │
│ --p_raw │ Store raw time series of p │
│ --p_c │ Store compressed │
│ │ time series of p │
│ --p_rms │ Store rms of p │
│ --p_max │ Store max of p │
│ --p_min │ Store min of p │
│ --p_max_all │ Store max of p (whole domain) │
│ --p_min_all │ Store min of p (whole domain) │
│ --p_final │ Store final pressure field │
├───────────────────────────────┼───────────────────────────────┤
│ -u │ Store ux, uy, uz │
│ │ (the same as --u_raw) │
│ --u_raw │ Store raw time series of │
│ │ ux, uy, uz │
│ --u_c │ Store compressed │
│ │ time series of ux, uy, uz │
│ --u_non_staggered_raw │ Store non-staggered raw time │
│ │ series of ux, uy, uz │
│ --u_non_staggered_c │ Store non-staggered compressed│
│ │ time series of ux, uy, uz │
│ --u_rms │ Store rms of ux, uy, uz │
│ --u_max │ Store max of ux, uy, uz │
│ --u_min │ Store min of ux, uy, uz │
│ --u_max_all │ Store max of ux, uy, uz │
│ │ (whole domain) │
│ --u_min_all │ Store min of ux, uy, uz │
│ │ (whole domain) │
│ --u_final │ Store final acoustic velocity │
├───────────────────────────────┼───────────────────────────────┤
│ --I_avg │ Store average intensity │
│ --I_avg_c │ Store average intensity │
│ │ computed using compression │
│ --Q_term │ Store Q term (volume rate of │
│ │ heat deposition) │
│ --Q_term_c │ Store Q term (volume rate of │
│ │ heat deposition) computed │
│ │ using compression │
│ --post │ Do not simulate, do only │
│ │ post-processing (Compute │
│ │ I_avg(_c) and Q_term(_c)) │
│ --block_size │ Maximum block size for │
│ │ dataset reading (computing │
│ │ average intensity without │
│ │ compression) │
│ --no_overlap │ Compression without basis │
│ │ overlapping flag - less RAM │
│ │ , more errors │
│ --40-bit_complex │ Compression 64-bit complex │
│ │ floats to 40-bit │
├───────────────────────────────┴───────────────────────────────┤
│ Time series compression flags │
├───────────────────────────────┬───────────────────────────────┤
│ --frequency <frequency> │ Frequency for time series │
│ │ compression (needs dt for │
│ │ computing period) │
│ --period <period> │ Period for time series │
│ │ compression │
│ │ (default = computed from │
│ │ p_source_input dataset) │
│ --mos <mos> │ Multiple of overlap size for │
│ │ compression │
│ │ (default = 1) │
│ --harmonics <harmonics> │ Number of harmonics for │
│ │ compression (default = 1) │
├───────────────────────────────┼───────────────────────────────┤
│ -s <time_step> │ When data collection begins │
│ │ (default = 1) │
│ --copy_sensor_mask │ Copy sensor mask to the │
│ │ output file │
└───────────────────────────────┴───────────────────────────────┘
\endverbatim
*
*
*
* @section HDF HDF5 File Structure
*
* The CUDA/C++ code has been designed as a standalone application which is not dependent on MATLAB libraries or a MEX
* interface. This is of particular importance when using servers and supercomputers without MATLAB support. For this
* reason, simulation data must be transferred between the CUDA/C++ code and MATLAB using external input and output
* files. These files are stored using the [Hierarchical Data Format HDF5] (http://www.hdfgroup.org/HDF5/). This is a
* data model, library, and file format for storing and managing data. It supports a variety of datatypes, and is
* designed for flexible and efficient I/O and for high volume and complex data. The HDF5 technology suite includes
* tools and applications for managing, manipulating, viewing, and analysing data in the HDF5 format.
*
*
* Each HDF5 file is a container for storing a variety of scientific data and is composed of two primary types of
* objects: groups and datasets. An HDF5 group is a structure containing zero or more HDF5 objects, together with
* supporting metadata. A HDF5 group can be seen as a disk folder. An HDF5 dataset is a multidimensional array of data
* elements, together with supporting metadata. A HDF5 dataset can be seen as a disk file. Any HDF5 group or dataset
* may also have an associated attribute list. An HDF5 attribute is a user-defined HDF5 structure that provides extra
* information about an HDF5 object. More information can be obtained from the [HDF5 documentation]
* (http://www.hdfgroup.org/HDF5/doc/index.html).
*
*
* kspaceFirstOrder-CUDA v1.3 uses the file format introduced in version 1.1. The code is happy to work with both
* versions (1.0 and 1.1), however when working with an input file of version 1.0, some features are not supported,
* namely the cuboid sensor mask, and <tt>u_non_staggered_raw</tt>. When running from within the actual MATLAB K-Wave
* Toolbox, the files will always be generated in version 1.1.
*
* The HDF5 input file for the CUDA/C++ simulation code contains a file header with brief description of the simulation
* stored in string attributes, and the root group <tt>'/'</tt> which stores all the simulation properties in the form
* of 3D datasets no matter the medium is 2D or 3D (a complete list of input datasets is given bellow).
* In the case of 2D simulation, Nz equals to 1.
*
* The HDF5 checkpoint file contains the same file header as the input file and the root group <tt>'/'</tt> with a few
* datasets keeping the actual simulation state. The HDF5 output file contains a file header with the simulation
* description as well as performance statistics, such as the simulation time and memory consumption, stored in string
* attributes.
* The results of the simulation are stored in the root group <tt>'/'</tt> in the form of 3D datasets. If the linear
* sensor mask is used, all output quantities are stored as datasets in the root group. If the cuboid corners sensor
* mask is used, the sampled quantities form private groups containing datasets on per cuboid basis.
*
*\verbatim
+----------------------------------------------------------------------------------------------------------------------+
| Input File / Checkpoint File Header |
+----------------------------------------------------------------------------------------------------------------------+
| created_by Short description of the tool that created this file |
| creation_date Date when the file was created |
| file_description Short description of the content of the file (e.g. simulation name) |
| file_type Type of the file (input) |
| major_version Major version of the file definition (1) |
| minor_version Minor version of the file definition (1) |
+----------------------------------------------------------------------------------------------------------------------+
\endverbatim
*
* \verbatim
+----------------------------------------------------------------------------------------------------------------------+
| Output File Header |
+----------------------------------------------------------------------------------------------------------------------+
| created_by Short description of the tool that created this file |
| creation_date Date when the file was created |
| file_description Short description of the content of the file (e.g. simulation name) |
| file_type Type of the file (output) |
| major_version Major version of the file definition (1) |
| minor_version Minor version of the file definition (1) |
+----------------------------------------------------------------------------------------------------------------------+
| host_names List of hosts (computer names) the simulation was executed on |
| number_of_cpu_cores Number of CPU cores used for the simulation |
| data_loading_phase_execution_time Time taken to load data from the file |
| pre-processing_phase_execution_time Time taken to pre-process data |
| simulation_phase_execution_time Time taken to run the simulation |
| post-processing_phase_execution_time Time taken to complete the post-processing phase |
| total_execution_time Total execution time |
| peak_core_memory_in_use Peak memory required per core during the simulation |
| total_memory_in_use Total Peak memory in use |
+----------------------------------------------------------------------------------------------------------------------+
| mStoreQTermFlag Store Q term flag |
| mStoreQTermCFlag Store Q term compressed flag |
| mStoreIntensityAvgFlag Store average intensity flag |
| mStoreIntensityAvgCFlag Store average intensity compressed flag |
| mNoCompressionOverlapFlag No compression overlap flag |
| mPeriod Compression period |
| mMOS Multiple of overlap size for compression |
| mHarmonics Number of harmonics for compression |
| mBlockSizeDefault Maximum block size for dataset reading - computing average intensity without |
| compression) |
| mBlockSizeDefaultC Maximum block size for dataset reading - computing average intensity with |
| compression) |
| mSamplingStartTimeStep When data collection begins |
| output_file_size Output file size |
| simulation_peak_host_memory_in_use Simulation peak host memory in use |
| simulation_peak_device_memory_in_use Simulation peak GPU memory in use |
| average_sampling_iteration_time Average sampling iteration time |
| sampling_time Sampling time |
| average_non-sampling_iteration_time Average non-sampling iteration time |
| non-sampling_time Non-sampling time |
+----------------------------------------------------------------------------------------------------------------------+
\endverbatim
*
*
*
* The input and checkpoint file stores all quantities as three dimensional datasets with dimension sizes designed by
* <tt>(Nx, Ny, Nz)</tt>. In order to support scalars and 1D and 2D arrays, the unused dimensions are set to 1. For
* example, scalar variables are stored with a dimension size of <tt>(1,1,1)</tt>, 1D vectors oriented in y-direction
* are stored with a dimension size of <tt>(1, Ny, 1)</tt>, and so on. If the dataset stores a complex variable, the
* real and imaginary parts are stored in an interleaved layout and the lowest used dimension size is doubled (i.e.,
* Nx for a 3D matrix, Ny for a 1D vector oriented in the y-direction). The datasets are physically stored in row-major
* order (in contrast to column-major order used by MATLAB) using either the <tt>'H5T_IEEE_F32LE'</tt> data type for
* floating point datasets or <tt>'H5T_STD_U64LE'</tt> for integer based datasets. All the datasets are store under the
* root group.
*
*
* The output file of version 1.0 could only store recorded quantities as 3D datasets under the root group. However,
* with version 1.1 and the new cuboid corner sensor mask, the sampled quantities may be laid out as 4D quantities
* stored under specific groups. The dimensions are always <tt>(Nx, Ny, Nz, Nt)</tt>, every sampled cuboid is stored as
* a distinct dataset and the datasets are grouped under a group named by the quantity stored. This makes the file
* clearly readable and easy to parse.
*
*
* In order to enable compression and more efficient data processing, big datasets are not stored as monolithic blocks
* but broken into chunks that may be compressed by the ZIP library and stored separately. The chunk size is defined
* as follows:
*
* \li <tt> (1M elements, 1, 1) </tt> in the case of 1D variables - index sensor mask (8MB blocks).
* \li <tt> (Nx, Ny, 1) </tt> in the case of 3D variables (one 2D slab).
* \li <tt> (Nx, Ny, Nz, 1) </tt> in the case of 4D variables (one time step).
* \li <tt> (N_sensor_points, 1, 1) </tt> in the case of the output time series (one time step of
* the simulation).
*
*
* All datasets have two attributes that specify the content of the dataset. The <tt>'data_type'</tt> attribute
* specifies the data type of the dataset. The admissible values are either <tt>'float'</tt> or <tt>'long'</tt>. The
* <tt>'domain_type'</tt> attribute specifies the domain of the dataset. The admissible values are either <tt>'real'
* </tt> for the real domain or \c 'complex' for the complex domain. The C++ code reads these attributes and checks
* their values.
*
*
*
* \verbatim
+----------------------------------------------------------------------------------------------------------------------+
| Input File Datasets |
+----------------------------------------------------------------------------------------------------------------------+
| Name Size Data type Domain Type Condition of Presence |
+----------------------------------------------------------------------------------------------------------------------+
| 1. Simulation Flags |
+----------------------------------------------------------------------------------------------------------------------+
| ux_source_flag (1, 1, 1) long real |
| uy_source_flag (1, 1, 1) long real |
| uz_source_flag (1, 1, 1) long real Nz > 1 |
| p_source_flag (1, 1, 1) long real |
| p0_source_flag (1, 1, 1) long real |
| transducer_source_flag (1, 1, 1) long real |
| nonuniform_grid_flag (1, 1, 1) long real must be set to 0 |
| nonlinear_flag (1, 1, 1) long real |
| absorbing_flag (1, 1, 1) long real |
+----------------------------------------------------------------------------------------------------------------------+
2. Grid Properties |
+----------------------------------------------------------------------------------------------------------------------+
| Nx (1, 1, 1) long real |
| Ny (1, 1, 1) long real |
| Nz (1, 1, 1) long real |
| Nt (1, 1, 1) long real |
| dt (1, 1, 1) float real |
| dx (1, 1, 1) float real |
| dy (1, 1, 1) float real |
| dz (1, 1, 1) float real Nz > 1 |
+----------------------------------------------------------------------------------------------------------------------+
| 3. Medium Properties |
+----------------------------------------------------------------------------------------------------------------------+
| 3.1 Regular Medium Properties |
| rho0 (Nx, Ny, Nz) float real heterogenous |
| (1, 1, 1) float real homogenous |
| rho0_sgx (Nx, Ny, Nz) float real heterogenous |
| (1, 1, 1) float real homogenous |
| rho0_sgy (Nx, Ny, Nz) float real heterogenous |
| (1, 1, 1) float real homogenous |
| rho0_sgz (Nx, Ny, Nz) float real Nz > 1 and heterogenous |
| (1, 1, 1) float real Nz > 1 and homogenous |
| c0 (Nx, Ny, Nz) float real heterogenous |
| (1, 1, 1) float real homogenous |
| c_ref (1, 1, 1) float real |
| |
| 3.2 Nonlinear Medium Properties (defined if (nonlinear_flag == 1)) |
| BonA (Nx, Ny, Nz) float real heterogenous |
| (1, 1, 1) float real homogenous |
| |
| 3.3 Absorbing Medium Properties (defined if (absorbing_flag == 1)) |
| alpha_coef (Nx, Ny, Nz) float real heterogenous |
| (1, 1, 1) float real homogenous |
| alpha_power (1, 1, 1) float real |
+----------------------------------------------------------------------------------------------------------------------+
| 4. Sensor Variables |
+----------------------------------------------------------------------------------------------------------------------+
| sensor_mask_type (1, 1, 1) long real file version 1.1 |
| (0 = index, 1 = corners) |
| sensor_mask_index (Nsens, 1, 1) long real file version 1.0 always, |
| file version 1.1 and sensor_mask_type == 0 |
| sensor_mask_corners (Ncubes, 6, 1) long real file version 1.1 and sensor_mask_type == 1 |
+----------------------------------------------------------------------------------------------------------------------+
| 5 Source Properties |
+----------------------------------------------------------------------------------------------------------------------+
| 5.1 Velocity Source Terms (defined if (ux_source_flag == 1 || uy_source_flag == 1 || uz_source_flag == 1)) |
| u_source_mode (1, 1, 1) long real |
| u_source_many (1, 1, 1) long real |
| u_source_index (Nsrc, 1, 1) long real |
| ux_source_input (1, Nt_src, 1) float real u_source_many == 0 |
| (Nsrc, Nt_src, 1) float real u_source_many == 1 |
| uy_source_input (1, Nt_src, 1) float real u_source_many == 0 |
| (Nsrc, Nt_src, 1) float real u_source_many == 1 |
| uz_source_input (1, Nt_src, 1) float real Nz > 1 and u_source_many == 0 |
| (Nt_src, Nsrc, 1) float real Nz > 1 and u_source_many == 1 |
| |
| 5.2 Pressure Source Terms (defined if (p_source_flag == 1)) |
| p_source_mode (1, 1, 1) long real |
| p_source_many (1, 1, 1) long real |
| p_source_index (Nsrc, 1, 1) long real |
| p_source_input (Nsrc, Nt_src, 1) float real p_source_many == 1 |
| (1, Nt_src, 1) float real p_source_many == 0 |
| |
| 5.3 Transducer Source Terms (defined if (transducer_source_flag == 1)) |
| u_source_index (Nsrc, 1, 1) long real |
| transducer_source_input (Nt_src, 1, 1) float real |
| delay_mask (Nsrc, 1, 1) float real |
| |
| 5.4 IVP Source Terms (defined if ( p0_source_flag == 1)) |
| p0_source_input (Nx, Ny, Nz) float real |
+----------------------------------------------------------------------------------------------------------------------+
| 6. K-space and Shift Variables |
+----------------------------------------------------------------------------------------------------------------------+
| ddx_k_shift_pos_r (Nx/2 + 1, 1, 1) float complex |
| ddx_k_shift_neg_r (Nx/2 + 1, 1, 1) float complex |
| ddy_k_shift_pos (1, Ny, 1) float complex |
| ddy_k_shift_neg (1, Ny, 1) float complex |
| ddz_k_shift_pos (1, 1, Nz) float complex Nz > 1 |
| ddz_k_shift_neg (1, 1, Nz) float complex Nz > 1 |
| x_shift_neg_r (Nx/2 + 1, 1, 1) float complex file version 1.1 |
| y_shift_neg_r (1, Ny/2 + 1, 1) float complex file version 1.1 |
| z_shift_neg_r (1, 1, Nz/2) float complex Nz > 1 and file version 1.1 |
+----------------------------------------------------------------------------------------------------------------------+
| 7. PML Variables |
+----------------------------------------------------------------------------------------------------------------------+
| pml_x_size (1, 1, 1) long real |
| pml_y_size (1, 1, 1) long real |
| pml_z_size (1, 1, 1) long real Nz > 1 |
| pml_x_alpha (1, 1, 1) float real |
| pml_y_alpha (1, 1, 1) float real |
| pml_z_alpha (1, 1, 1) float real Nz > 1 |
| |
| pml_x (Nx, 1, 1) float real |
| pml_x_sgx (Nx, 1, 1) float real |
| pml_y (1, Ny, 1) float real |
| pml_y_sgy (1, Ny, 1) float real |
| pml_z (1, 1, Nz) float real Nz > 1 |
| pml_z_sgz (1, 1, Nz) float real Nz > 1 |
+----------------------------------------------------------------------------------------------------------------------+
\endverbatim
\verbatim
+----------------------------------------------------------------------------------------------------------------------+
| Checkpoint File Datasets |
+----------------------------------------------------------------------------------------------------------------------+
| Name Size Data type Domain Type Condition of Presence |
+----------------------------------------------------------------------------------------------------------------------+
| 1. Grid Properties |
+----------------------------------------------------------------------------------------------------------------------+
| Nx (1, 1, 1) long real |
| Ny (1, 1, 1) long real |
| Nz (1, 1, 1) long real |
| t_index (1, 1, 1) long real |
+----------------------------------------------------------------------------------------------------------------------+
| 2. Simulation State |
+----------------------------------------------------------------------------------------------------------------------+
| p (Nx, Ny, Nz) float real |
| ux_sgx (Nx, Ny, Nz) float real |
| uy_sgy (Nx, Ny, Nz) float real |
| uz_sgz (Nx, Ny, Nz) float real Nz > 1 |
| rhox (Nx, Ny, Nz) float real |
| rhoy (Nx, Ny, Nz) float real |
| rhoz (Nx, Ny, Nz) float real Nz > 1 |
+----------------------------------------------------------------------------------------------------------------------+
\endverbatim
\verbatim
+----------------------------------------------------------------------------------------------------------------------+
| Output File Datasets |
+----------------------------------------------------------------------------------------------------------------------+
| Name Size Data type Domain Type Condition of Presence |
+----------------------------------------------------------------------------------------------------------------------+
| 1. Simulation Flags |
+----------------------------------------------------------------------------------------------------------------------+
| ux_source_flag (1, 1, 1) long real |
| uy_source_flag (1, 1, 1) long real |
| uz_source_flag (1, 1, 1) long real Nz > 1 |
| p_source_flag (1, 1, 1) long real |
| p0_source_flag (1, 1, 1) long real |
| transducer_source_flag (1, 1, 1) long real |
| nonuniform_grid_flag (1, 1, 1) long real |
| nonlinear_flag (1, 1, 1) long real |
| absorbing_flag (1, 1, 1) long real |
| u_source_mode (1, 1, 1) long real if u_source |
| u_source_many (1, 1, 1) long real if u_source |
| p_source_mode (1, 1, 1) long real if p_source |
| p_source_many (1, 1, 1) long real if p_source |
+----------------------------------------------------------------------------------------------------------------------+
| 2. Grid Properties |
+----------------------------------------------------------------------------------------------------------------------+
| Nx (1, 1, 1) long real |
| Ny (1, 1, 1) long real |
| Nz (1, 1, 1) long real |
| Nt (1, 1, 1) long real |
| t_index (1, 1, 1) long real |
| dt (1, 1, 1) float real |
| dx (1, 1, 1) float real |
| dy (1, 1, 1) float real |
| dz (1, 1, 1) float real Nz > 1 |
+----------------------------------------------------------------------------------------------------------------------+
| 3. PML Variables |
+----------------------------------------------------------------------------------------------------------------------+
| pml_x_size (1, 1, 1) long real |
| pml_y_size (1, 1, 1) long real |
| pml_z_size (1, 1, 1) long real Nz > 1 |
| pml_x_alpha (1, 1, 1) float real |
| pml_y_alpha (1, 1, 1) float real |
| pml_z_alpha (1, 1, 1) float real Nz > 1 |
| |
+----------------------------------------------------------------------------------------------------------------------+
| 4. Sensor Variables (present if --copy_sensor_mask) |
+----------------------------------------------------------------------------------------------------------------------+
| sensor_mask_type (1, 1, 1) long real file version 1.1 and --copy_sensor_mask |
| sensor_mask_index (Nsens, 1, 1) long real file version 1.1 and sensor_mask_type == 0 |
| sensor_mask_corners (Ncubes, 6, 1) long real file version 1.1 and sensor_mask_type == 1 |
+----------------------------------------------------------------------------------------------------------------------+
| 5a. Simulation Results: if sensor_mask_type == 0 (index), or File version == 1.0 |
+----------------------------------------------------------------------------------------------------------------------+
| p (Nsens, Nt - s, 1) float real -p or --p_raw |
| p_c (NsensC, Nc, 1) float complex --p_c |
| p_rms (Nsens, 1, 1) float real --p_rms |
| p_max (Nsens, 1, 1) float real --p_max |
| p_min (Nsens, 1, 1) float real --p_min |
| p_max_all (Nx, Ny, Nz) float real --p_max_all |
| p_min_all (Nx, Ny, Nz) float real --p_min_all |
| p_final (Nx, Ny, Nz) float real --p_final |
| |
| ux (Nsens, Nt - s, 1) float real -u or --u_raw |
| uy (Nsens, Nt - s, 1) float real -u or --u_raw |
| uz (Nsens, Nt - s, 1) float real -u or --u_raw and Nz > 1 |
| |
| ux_c (NsensC, Nc, 1) float complex --u_c |
| uy_c (NsensC, Nc, 1) float complex --u_c |
| uz_c (NsensC, Nc, 1) float complex --u_c and Nz > 1 |
| |
| ux_non_staggered (Nsens, Nt - s, 1) float real --u_non_staggered_raw (File version ==1.1) |
| uy_non_staggered (Nsens, Nt - s, 1) float real --u_non_staggered_raw (File version ==1.1) |
| uz_non_staggered (Nsens, Nt - s, 1) float real --u_non_staggered_raw (File version ==1.1) |
| and Nz > 1 |
| |
| ux_non_staggered_c (NsensC, Nc, 1) float complex --u_non_staggered_c (File version ==1.1) |
| uy_non_staggered_c (NsensC, Nc, 1) float complex --u_non_staggered_c (File version ==1.1) |
| uz_non_staggered_c (NsensC, Nc, 1) float complex --u_non_staggered_c (File version ==1.1) |
| and Nz > 1 |
| |
| ux_rms (Nsens, 1, 1) float real --u_rms |
| uy_rms (Nsens, 1, 1) float real --u_rms |
| uz_rms (Nsens, 1, 1) float real --u_rms and Nz > 1 |
| |
| ux_max (Nsens, 1, 1) float real --u_max |
| uy_max (Nsens, 1, 1) float real --u_max |
| uz_max (Nsens, 1, 1) float real --u_max and Nz > 1 |
| |
| ux_min (Nsens, 1, 1) float real --u_min |
| uy_min (Nsens, 1, 1) float real --u_min |
| uz_min (Nsens, 1, 1) float real --u_min and Nz > 1 |
| |
| ux_max_all (Nx, Ny, Nz) float real --u_max_all |
| uy_max_all (Nx, Ny, Nz) float real --u_max_all |
| uz_max_all (Nx, Ny, Nz) float real --u_max_all and Nz > 1 |
| |
| ux_min_all (Nx, Ny, Nz) float real --u_min_all |
| uy_min_all (Nx, Ny, Nz) float real --u_min_all |
| uz_min_all (Nx, Ny, Nz) float real --u_min_all and Nz > 1 |
| |
| ux_final (Nx, Ny, Nz) float real --u_final |
| uy_final (Nx, Ny, Nz) float real --u_final |
| uz_final (Nx, Ny, Nz) float real --u_final and Nz > 1 |
| |
| Ix_avg (Nx, Ny, Nz) float real --I_avg |
| Iy_avg (Nx, Ny, Nz) float real --I_avg |
| Iz_avg (Nx, Ny, Nz) float real --I_avg and Nz > 1 |
| |
| Q_term (Nx, Ny, Nz) float real --Q_term |
| |
| Ix_avg_c (Nx, Ny, Nz) float real --I_avg_c |
| Iy_avg_c (Nx, Ny, Nz) float real --I_avg_c |
| Iz_avg_c (Nx, Ny, Nz) float real --I_avg_c and Nz > 1 |
| |
| Q_term_c (Nx, Ny, Nz) float real --Q_term_c |
+----------------------------------------------------------------------------------------------------------------------+
| 5b. Simulation Results: if sensor_mask_type == 1 (corners) and file version == 1.1 |
+----------------------------------------------------------------------------------------------------------------------+
| /p group of datasets, one per cuboid -p or --p_raw |
| /p/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /p/2 (Cx, Cy, Cz, Nt-s) float real 2nd sampled cuboid, etc. |
| |
| /p_c group of datasets, one per cuboid --p_c |
| /p_c/1 (CxC, Cy, Cz, Nc) float complex 1st sampled cuboid |
| /p_c/2 (CxC, Cy, Cz, Nc) float complex 2nd sampled cuboid, etc. |
| |
| /p_rms group of datasets, one per cuboid --p_rms |
| /p_rms/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| /p_max group of datasets, one per cuboid --p_max |
| /p_max/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| /p_min group of datasets, one per cuboid --p_min |
| /p_min/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| p_max_all (Nx, Ny, Nz) float real --p_max_all |
| p_min_all (Nx, Ny, Nz) float real --p_min_all |
| p_final (Nx, Ny, Nz) float real --p_final |
| |
| |
| /ux group of datasets, one per cuboid -u or --u_raw |
| /ux/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uy group of datasets, one per cuboid -u or --u_raw |
| /uy/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uz group of datasets, one per cuboid -u or --u_raw and Nz > 1 |
| /uz/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| /ux_c group of datasets, one per cuboid --u_c |
| /ux_c/1 (CxC, Cy, Cz, Nc) float complex 1st sampled cuboid |
| /uy_c group of datasets, one per cuboid --u_c |
| /uy_c/1 (CxC, Cy, Cz, Nc) float complex 1st sampled cuboid |
| /uz_c group of datasets, one per cuboid --u_c and Nz > 1 |
| /uz_c/1 (CxC, Cy, Cz, Nc) float complex 1st sampled cuboid |
| |
| /ux_non_staggered group of datasets, one per cuboid --u_non_staggered_raw |
| /ux_non_staggered/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uy_non_staggered group of datasets, one per cuboid --u_non_staggered_raw |
| /uy_non_staggered/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uz_non_staggered group of datasets, one per cuboid --u_non_staggered_raw and Nz > 1 |
| /uz_non_staggered/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| /ux_non_staggered_c group of datasets, one per cuboid --u_non_staggered_c |
| /ux_non_staggered_c/1 (CxC, Cy, Cz, Nc) float complex 1st sampled cuboid |
| /uy_non_staggered_c group of datasets, one per cuboid --u_non_staggered_c |
| /uy_non_staggered_c/1 (CxC, Cy, Cz, Nc) float complex 1st sampled cuboid |
| /uz_non_staggered_c group of datasets, one per cuboid --u_non_staggered_c and Nz > 1 |
| /uz_non_staggered_c/1 (CxC, Cy, Cz, Nc) float complex 1st sampled cuboid |
| |
| /ux_rms group of datasets, one per cuboid --u_rms |
| /ux_rms/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uy_rms group of datasets, one per cuboid --u_rms |
| /uy_rms/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uz_rms group of datasets, one per cuboid --u_rms and Nz > 1 |
| /uy_rms/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| /ux_max group of datasets, one per cuboid --u_max |
| /ux_max/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uy_max group of datasets, one per cuboid --u_max |
| /ux_max/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uz_max group of datasets, one per cuboid --u_max and Nz > 1 |
| /ux_max/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| /ux_min group of datasets, one per cuboid --u_min |
| /ux_min/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uy_min group of datasets, one per cuboid --u_min |
| /ux_min/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| /uz_min group of datasets, one per cuboid --u_min and Nz > 1 |
| /ux_min/1 (Cx, Cy, Cz, Nt-s) float real 1st sampled cuboid |
| |
| ux_max_all (Nx, Ny, Nz) float real --u_max_all |
| uy_max_all (Nx, Ny, Nz) float real --u_max_all |
| uz_max_all (Nx, Ny, Nz) float real --u_max_all and Nz > 1 |
| |
| ux_min_all (Nx, Ny, Nz) float real --u_min_all |
| uy_min_all (Nx, Ny, Nz) float real --u_min_all |
| uz_min_all (Nx, Ny, Nz) float real --u_min_all and Nz > 1 |
| |
| ux_final (Nx, Ny, Nz) float real --u_final |
| uy_final (Nx, Ny, Nz) float real --u_final |
| uz_final (Nx, Ny, Nz) float real --u_final and Nz > 1 |
| |
| Ix_avg (Nx, Ny, Nz) float real --I_avg |
| Iy_avg (Nx, Ny, Nz) float real --I_avg |
| Iz_avg (Nx, Ny, Nz) float real --I_avg and Nz > 1 |
| |
| Q_term (Nx, Ny, Nz) float real --Q_term |
| |
| Ix_avg_c (Nx, Ny, Nz) float real --I_avg_c |
| Iy_avg_c (Nx, Ny, Nz) float real --I_avg_c |
| Iz_avg_c (Nx, Ny, Nz) float real --I_avg_c and Nz > 1 |
| |
| Q_term_c (Nx, Ny, Nz) float real --Q_term_c |
+----------------------------------------------------------------------------------------------------------------------+
\endverbatim
*
*
* @copyright Copyright (C) 2019 Jiri Jaros and Bradley Treeby.
*
* This file is part of the C++ extension of the [k-Wave Toolbox](http://www.k-wave.org).
*
* This file is part of the k-Wave. k-Wave is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* k-Wave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU Lesser General Public License along with k-Wave.
* If not, see [http://www.gnu.org/licenses/](http://www.gnu.org/licenses/).
*/
#include <exception>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <KSpaceSolver/KSpaceFirstOrderSolver.h>
#include <Logger/Logger.h>
using std::string;
/**
* The main function of the kspaceFirstOrder-CUDA.
*
* @param [in] argc - Number of commandline parameters.
* @param [in] argv - Commandline parameters.
* @return EXIT_SUCCESS - If the simulation completed correctly.
*/
int main(int argc, char** argv)
{
// Create k-Space solver
KSpaceFirstOrderSolver kSpaceSolver;
// print header
Logger::log(Logger::LogLevel::kBasic, kOutFmtFirstSeparator);
Logger::log(Logger::LogLevel::kBasic, kOutFmtCodeName, kSpaceSolver.getCodeName().c_str());
Logger::log(Logger::LogLevel::kBasic, kOutFmtSeparator);
// Create parameters and parse command line
Parameters& params = Parameters::getInstance();
//------------------------------------- Init simulation ---------------------------------------//
try
{
// Initialise Parameters by parsing the command line and reading input file scalars
params.init(argc, argv);
// Select GPU
params.selectDevice();
// When we know the GPU, we can print out the code version
if (params.isPrintVersionOnly())
{
kSpaceSolver.printFullCodeNameAndLicense();
return EXIT_SUCCESS;
}
}
catch (const std::exception& e)
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtFailed);
// must be repeated in case the GPU we want to print out the code version for and all GPUs are busy
if (params.isPrintVersionOnly())
{
kSpaceSolver.printFullCodeNameAndLicense();
}
if (!params.isPrintVersionOnly())
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtLastSeparator);
}
Logger::errorAndTerminate(Logger::wordWrapString(e.what(), kErrFmtPathDelimiters, 9));
}
// set number of threads and bind them to cores
#ifdef _OPENMP
omp_set_num_threads(int(params.getNumberOfThreads()));
#endif
// Print simulation setup
params.printSimulatoinSetup();
Logger::log(Logger::LogLevel::kBasic, kOutFmtInitializationHeader);
//-------------------------------------- Allocate memory --------------------------------------//
try
{
kSpaceSolver.allocateMemory();
}
catch (const std::bad_alloc&)
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtFailed);
Logger::log(Logger::LogLevel::kBasic, kOutFmtLastSeparator);
// 9 = Indentation of Error:
Logger::errorAndTerminate(Logger::wordWrapString(kErrFmtOutOfMemory, " ", 9));
}
catch (const std::exception& e)
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtFailed);
Logger::log(Logger::LogLevel::kBasic, kOutFmtLastSeparator);
const string errorMessage = string(kErrFmtUnknownError) + e.what();
Logger::errorAndTerminate(Logger::wordWrapString(errorMessage, kErrFmtPathDelimiters, 9));
}
//-------------------------------------- Load input data --------------------------------------//
try
{
kSpaceSolver.loadInputData();
}
catch (const std::ios::failure& e)
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtFailed);
Logger::log(Logger::LogLevel::kBasic, kOutFmtLastSeparator);
// 9 = Indentation of Error:
Logger::errorAndTerminate(Logger::wordWrapString(e.what(), kErrFmtPathDelimiters, 9));
}
catch (const std::exception& e)
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtFailed);
Logger::log(Logger::LogLevel::kBasic, kOutFmtLastSeparator);
const string errorMessage = string(kErrFmtUnknownError) + e.what();
Logger::errorAndTerminate(Logger::wordWrapString(errorMessage, kErrFmtPathDelimiters, 9));
}
Logger::log(Logger::LogLevel::kBasic, kOutFmtElapsedTime, kSpaceSolver.getDataLoadTime());
// did we recover from checkpoint?
if (params.getTimeIndex() > 0)
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtSeparator);
Logger::log(Logger::LogLevel::kBasic, kOutFmtRecoveredFrom, params.getTimeIndex());
}
//----------------------------------------- Simulation ----------------------------------------//
Logger::log(Logger::LogLevel::kBasic, kOutFmtSeparator);
// exception are caught inside due to different log formats
kSpaceSolver.compute();
//------------------------------------------- Summary -----------------------------------------//
Logger::log(Logger::LogLevel::kBasic, kOutFmtSummaryHeader);
Logger::log(Logger::LogLevel::kBasic, kOutFmtHostMemoryUsage, kSpaceSolver.getHostMemoryUsage());
Logger::log(Logger::LogLevel::kBasic, kOutFmtDeviceMemoryUsage, kSpaceSolver.getDeviceMemoryUsage());
Logger::log(Logger::LogLevel::kBasic, kOutFmtSeparator);
// Elapsed time
if (kSpaceSolver.getCumulatedTotalTime() != kSpaceSolver.getTotalTime())
{
Logger::log(Logger::LogLevel::kBasic, kOutFmtLegExecutionTime, kSpaceSolver.getTotalTime());
}
Logger::log(Logger::LogLevel::kBasic, kOutFmtTotalExecutionTime, kSpaceSolver.getCumulatedTotalTime());
Logger::log(Logger::LogLevel::kBasic, kOutFmtEndOfSimulation);
return EXIT_SUCCESS;
} // end of main
//----------------------------------------------------------------------------------------------------------------------