-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcudajit.ml
1919 lines (1771 loc) · 84.3 KB
/
cudajit.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
open Nvrtc_ffi.Bindings_types
module Nvrtc_funs = Nvrtc_ffi.C.Functions
module Cuda = Cuda_ffi.C.Functions
open Cuda_ffi.Bindings_types
open Sexplib0.Sexp_conv
module Nvrtc = struct
type result = nvrtc_result [@@deriving sexp]
(** See {{:https://docs.nvidia.com/cuda/nvrtc/index.html#_CPPv411nvrtcResult} enum nvrtcResult}.
*)
exception Nvrtc_error of { status : result; message : string }
let error_printer = function
| Nvrtc_error { status; message } ->
ignore @@ Format.flush_str_formatter ();
Format.fprintf Format.str_formatter "%s:@ %a" message Sexplib0.Sexp.pp_hum
(sexp_of_result status);
Some (Format.flush_str_formatter ())
| _ -> None
let () = Printexc.register_printer error_printer
let is_success = function NVRTC_SUCCESS -> true | _ -> false
type compile_to_ptx_result = {
log : string option;
ptx : (char Ctypes.ptr[@sexp.opaque]);
ptx_length : int;
}
[@@deriving sexp_of]
let compile_to_ptx ~cu_src ~name ~options ~with_debug =
let open Ctypes in
let prog = allocate_n nvrtc_program ~count:1 in
(* We can add the include at the library level, because conf-cuda sets CUDA_PATH if it is
missing but the information is available. *)
let default =
if Sys.win32 then "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA"
else "/usr/local/cuda"
in
let cuda_path = Sys.getenv_opt "CUDA_PATH" |> Option.value ~default in
let options = Array.of_list @@ (("-I" ^ Filename.concat cuda_path "include") :: options) in
let status =
Nvrtc_funs.nvrtc_create_program prog cu_src name 0 (from_voidp string null)
(from_voidp string null)
in
if status <> NVRTC_SUCCESS then
raise @@ Nvrtc_error { status; message = "nvrtc_create_program " ^ name };
let num_options = Array.length options in
let get_c_options options =
let c_options = CArray.make (ptr char) num_options in
Array.iteri (fun i v -> CArray.of_string v |> CArray.start |> CArray.set c_options i) options;
c_options
in
let c_options = get_c_options options in
let valid_options =
snd
@@ CArray.fold_left
(fun (i, valid) pchar ->
if not valid then (i + 1, false)
else
let old_str = options.(i) in
let str = Ctypes.string_from_ptr pchar ~length:(String.length old_str) in
( i + 1,
String.for_all
(function
| 'a' .. 'z'
| 'A' .. 'Z'
| '0' .. '9'
| '-' | '_' | ':' | '/' | '\\' | ' ' | '"' | '.' | ';' | '&' | '#' | '%' | ','
->
true
| _ -> false)
str ))
(0, true) c_options
in
let default_options = [ "--use_fast_math"; "--device-debug" ] in
let c_options =
if valid_options then c_options
else (
Printf.printf
"WARNING: Cudajit.Nvrtc.compile_to_ptx garbled options %s, using %s instead\n%!"
(String.concat ", " @@ Array.to_list options)
(String.concat ", " default_options);
get_c_options @@ Array.of_list default_options)
in
let status = Nvrtc_funs.nvrtc_compile_program !@prog num_options @@ CArray.start c_options in
let log_msg log = Option.value log ~default:"no compilation log" in
let error prefix status log =
ignore @@ Nvrtc_funs.nvrtc_destroy_program prog;
raise @@ Nvrtc_error { status; message = prefix ^ " " ^ name ^ ": " ^ log_msg log }
in
let log =
if status = NVRTC_SUCCESS && not with_debug then None
else
let log_size = allocate size_t Unsigned.Size_t.zero in
let status = Nvrtc_funs.nvrtc_get_program_log_size !@prog log_size in
if status <> NVRTC_SUCCESS then None
else
let count = Unsigned.Size_t.to_int !@log_size in
let log = allocate_n char ~count in
let status = Nvrtc_funs.nvrtc_get_program_log !@prog log in
if status = NVRTC_SUCCESS then Some (string_from_ptr log ~length:(count - 1)) else None
in
if status <> NVRTC_SUCCESS then error "nvrtc_compile_program" status log;
let ptx_size = allocate size_t Unsigned.Size_t.zero in
let status = Nvrtc_funs.nvrtc_get_PTX_size !@prog ptx_size in
if status <> NVRTC_SUCCESS then error "nvrtc_get_PTX_size" status log;
let count = Unsigned.Size_t.to_int !@ptx_size in
let ptx = allocate_n char ~count in
let status = Nvrtc_funs.nvrtc_get_PTX !@prog ptx in
if status <> NVRTC_SUCCESS then error "nvrtc_get_PTX" status log;
ignore @@ Nvrtc_funs.nvrtc_destroy_program prog;
{ log; ptx; ptx_length = count - 1 }
let string_from_ptx prog = Ctypes.string_from_ptr prog.ptx ~length:prog.ptx_length
let compilation_log prog = prog.log
end
type result = cu_result [@@deriving sexp]
exception Cuda_error of { status : result; message : string }
exception Use_after_free of { func : string; arg : string }
let cuda_error_printer = function
| Cuda_error { status; message } ->
Some (Format.asprintf "%s:@ %a" message Sexplib0.Sexp.pp_hum (sexp_of_result status))
| Use_after_free { func; arg } ->
Some (Format.sprintf "Use-after-free in %s: argument %s" func arg)
| _ -> None
let () = Printexc.register_printer cuda_error_printer
let is_success = function CUDA_SUCCESS -> true | _ -> false
let cuda_call_hook : (message:string -> status:result -> unit) option ref = ref None
let check message status =
(match !cuda_call_hook with None -> () | Some callback -> callback ~message ~status);
if status <> CUDA_SUCCESS then raise @@ Cuda_error { status; message }
let check_freed ~func args =
List.iter
(fun (arg, freed) -> if Atomic.get freed then raise @@ Use_after_free { func; arg })
args
let init ?(flags = 0) () = check "cu_init" @@ Cuda.cu_init flags
type memptr = Unsigned.uint64
let string_of_memptr ptr = Unsigned.UInt64.to_hexstring ptr
let sexp_of_memptr ptr = Sexplib0.Sexp.Atom (string_of_memptr ptr)
type atomic_bool = bool Atomic.t
let sexp_of_atomic_bool flag = sexp_of_bool @@ Atomic.get flag
type deviceptr = Deviceptr of { ptr : memptr; freed : atomic_bool } [@@deriving sexp_of]
(* TODO: check if cuda detects use-after-free, if not consider adding *_safe function variants that
check the [freed] field. *)
module Device = struct
type t = cu_device [@@deriving sexp]
let get_count () =
let open Ctypes in
let count = allocate int 0 in
check "cu_device_get_count" @@ Cuda.cu_device_get_count count;
!@count
let get ~ordinal =
let open Ctypes in
let device = allocate Cuda_ffi.Types_generated.cu_device (Cu_device 0) in
check "cu_device_get" @@ Cuda.cu_device_get device ordinal;
!@device
let primary_ctx_release device =
check "cu_device_primary_ctx_release" @@ Cuda.cu_device_primary_ctx_release device
let primary_ctx_reset device =
check "cu_device_primary_ctx_reset" @@ Cuda.cu_device_primary_ctx_reset device
let get_free_and_total_mem () =
let open Ctypes in
let free = allocate size_t Unsigned.Size_t.zero in
let total = allocate size_t Unsigned.Size_t.zero in
check "cu_mem_get_info" @@ Cuda.cu_mem_get_info free total;
(Unsigned.Size_t.to_int !@free, Unsigned.Size_t.to_int !@total)
type computemode = DEFAULT | PROHIBITED | EXCLUSIVE_PROCESS [@@deriving sexp]
type flush_GPU_direct_RDMA_writes_options = HOST | MEMOPS [@@deriving sexp]
type p2p_attribute =
| PERFORMANCE_RANK of int
| ACCESS_SUPPORTED of bool
| NATIVE_ATOMIC_SUPPORTED of bool
| CUDA_ARRAY_ACCESS_SUPPORTED of bool
[@@deriving sexp]
let get_p2p_attributes ~dst ~src =
let open Ctypes in
let result = ref [] in
let value = allocate int 0 in
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK dst src;
result := PERFORMANCE_RANK !@value :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED dst src;
result := ACCESS_SUPPORTED (!@value = 1) :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED dst
src;
result := NATIVE_ATOMIC_SUPPORTED (!@value = 1) :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED
dst src;
result := CUDA_ARRAY_ACCESS_SUPPORTED (!@value = 1) :: !result;
!result
let can_access_peer ~dst ~src =
let open Ctypes in
let can_access_peer = allocate int 0 in
check "cu_device_can_access_peer" @@ Cuda.cu_device_can_access_peer can_access_peer dst src;
!@can_access_peer <> 0
let computemode_of_cu = function
| CU_COMPUTEMODE_DEFAULT -> DEFAULT
| CU_COMPUTEMODE_PROHIBITED -> PROHIBITED
| CU_COMPUTEMODE_EXCLUSIVE_PROCESS -> EXCLUSIVE_PROCESS
| CU_COMPUTEMODE_UNCATEGORIZED i -> invalid_arg @@ "Unknown computemode: " ^ Int64.to_string i
let int_of_flush_GPU_direct_RDMA_writes_options =
let open Cuda_ffi.Types_generated in
function
| HOST -> Int64.to_int cu_flush_gpu_direct_rdma_writes_option_host
| MEMOPS -> Int64.to_int cu_flush_gpu_direct_rdma_writes_option_memops
(* TODO: export CUmemAllocationHandleType to use in mempool_supported_handle_types. *)
type mem_allocation_handle_type = NONE | POSIX_FILE_DESCRIPTOR | WIN32 | WIN32_KMT | FABRIC
[@@deriving sexp]
let int_of_mem_allocation_handle_type =
let open Cuda_ffi.Types_generated in
function
| NONE -> Int64.to_int cu_mem_handle_type_none
| POSIX_FILE_DESCRIPTOR -> Int64.to_int cu_mem_handle_type_posix_file_descriptor
| WIN32 -> Int64.to_int cu_mem_handle_type_win32
| WIN32_KMT -> Int64.to_int cu_mem_handle_type_win32_kmt
| FABRIC -> Int64.to_int cu_mem_handle_type_fabric
type attributes = {
name : string;
max_threads_per_block : int;
max_block_dim_x : int;
max_block_dim_y : int;
max_block_dim_z : int;
max_grid_dim_x : int;
max_grid_dim_y : int;
max_grid_dim_z : int;
max_shared_memory_per_block : int;
total_constant_memory : int;
warp_size : int;
max_pitch : int;
max_registers_per_block : int;
clock_rate : int;
texture_alignment : int;
multiprocessor_count : int;
kernel_exec_timeout : bool;
integrated : bool;
can_map_host_memory : bool;
compute_mode : computemode;
maximum_texture1d_width : int;
maximum_texture2d_width : int;
maximum_texture2d_height : int;
maximum_texture3d_width : int;
maximum_texture3d_height : int;
maximum_texture3d_depth : int;
maximum_texture2d_layered_width : int;
maximum_texture2d_layered_height : int;
maximum_texture2d_layered_layers : int;
surface_alignment : int;
concurrent_kernels : bool;
ecc_enabled : bool;
pci_bus_id : int;
pci_device_id : int;
tcc_driver : bool;
memory_clock_rate : int;
global_memory_bus_width : int;
l2_cache_size : int;
max_threads_per_multiprocessor : int;
async_engine_count : int;
unified_addressing : bool;
maximum_texture1d_layered_width : int;
maximum_texture1d_layered_layers : int;
maximum_texture2d_gather_width : int;
maximum_texture2d_gather_height : int;
maximum_texture3d_width_alternate : int;
maximum_texture3d_height_alternate : int;
maximum_texture3d_depth_alternate : int;
pci_domain_id : int;
texture_pitch_alignment : int;
maximum_texturecubemap_width : int;
maximum_texturecubemap_layered_width : int;
maximum_texturecubemap_layered_layers : int;
maximum_surface1d_width : int;
maximum_surface2d_width : int;
maximum_surface2d_height : int;
maximum_surface3d_width : int;
maximum_surface3d_height : int;
maximum_surface3d_depth : int;
maximum_surface1d_layered_width : int;
maximum_surface1d_layered_layers : int;
maximum_surface2d_layered_width : int;
maximum_surface2d_layered_height : int;
maximum_surface2d_layered_layers : int;
maximum_surfacecubemap_width : int;
maximum_surfacecubemap_layered_width : int;
maximum_surfacecubemap_layered_layers : int;
maximum_texture2d_linear_width : int;
maximum_texture2d_linear_height : int;
maximum_texture2d_linear_pitch : int;
maximum_texture2d_mipmapped_width : int;
maximum_texture2d_mipmapped_height : int;
compute_capability_major : int;
compute_capability_minor : int;
maximum_texture1d_mipmapped_width : int;
stream_priorities_supported : bool;
global_l1_cache_supported : bool;
local_l1_cache_supported : bool;
max_shared_memory_per_multiprocessor : int;
max_registers_per_multiprocessor : int;
managed_memory : bool;
multi_gpu_board : bool;
multi_gpu_board_group_id : int;
host_native_atomic_supported : bool;
single_to_double_precision_perf_ratio : int;
pageable_memory_access : bool;
concurrent_managed_access : bool;
compute_preemption_supported : bool;
can_use_host_pointer_for_registered_mem : bool;
cooperative_launch : bool;
max_shared_memory_per_block_optin : int;
can_flush_remote_writes : bool;
host_register_supported : bool;
pageable_memory_access_uses_host_page_tables : bool;
direct_managed_mem_access_from_host : bool;
virtual_memory_management_supported : bool;
handle_type_posix_file_descriptor_supported : bool;
handle_type_win32_handle_supported : bool;
handle_type_win32_kmt_handle_supported : bool;
max_blocks_per_multiprocessor : int;
generic_compression_supported : bool;
max_persisting_l2_cache_size : int;
max_access_policy_window_size : int;
gpu_direct_rdma_with_cuda_vmm_supported : bool;
reserved_shared_memory_per_block : int;
sparse_cuda_array_supported : bool;
read_only_host_register_supported : bool;
timeline_semaphore_interop_supported : bool;
memory_pools_supported : bool;
gpu_direct_rdma_supported : bool;
gpu_direct_rdma_flush_writes_options : flush_GPU_direct_RDMA_writes_options list;
gpu_direct_rdma_writes_ordering : bool;
mempool_supported_handle_types : mem_allocation_handle_type list;
cluster_launch : bool;
deferred_mapping_cuda_array_supported : bool;
can_use_64_bit_stream_mem_ops : bool;
can_use_stream_wait_value_nor : bool;
dma_buf_supported : bool;
ipc_event_supported : bool;
mem_sync_domain_count : int;
tensor_map_access_supported : bool;
unified_function_pointers : bool;
multicast_supported : bool;
}
[@@deriving sexp]
let get_attributes device =
let open Ctypes in
let count = 2048 in
let name = allocate_n char ~count in
check "cu_device_get_name" @@ Cuda.cu_device_get_name name count device;
let name = coerce (ptr char) string name in
let max_threads_per_block = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_threads_per_block CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK
device;
let max_threads_per_block = !@max_threads_per_block in
let max_block_dim_x = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_block_dim_x CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X device;
let max_block_dim_x = !@max_block_dim_x in
let max_block_dim_y = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_block_dim_y CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y device;
let max_block_dim_y = !@max_block_dim_y in
let max_block_dim_z = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_block_dim_z CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z device;
let max_block_dim_z = !@max_block_dim_z in
let max_grid_dim_x = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_grid_dim_x CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X device;
let max_grid_dim_x = !@max_grid_dim_x in
let max_grid_dim_y = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_grid_dim_y CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y device;
let max_grid_dim_y = !@max_grid_dim_y in
let max_grid_dim_z = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_grid_dim_z CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z device;
let max_grid_dim_z = !@max_grid_dim_z in
let max_shared_memory_per_block = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_shared_memory_per_block
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK device;
let max_shared_memory_per_block = !@max_shared_memory_per_block in
let total_constant_memory = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute total_constant_memory CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY
device;
let total_constant_memory = !@total_constant_memory in
let warp_size = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute warp_size CU_DEVICE_ATTRIBUTE_WARP_SIZE device;
let warp_size = !@warp_size in
let max_pitch = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_pitch CU_DEVICE_ATTRIBUTE_MAX_PITCH device;
let max_pitch = !@max_pitch in
let max_registers_per_block = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_registers_per_block
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK device;
let max_registers_per_block = !@max_registers_per_block in
let clock_rate = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute clock_rate CU_DEVICE_ATTRIBUTE_CLOCK_RATE device;
let clock_rate = !@clock_rate in
let texture_alignment = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute texture_alignment CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT device;
let texture_alignment = !@texture_alignment in
let multiprocessor_count = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute multiprocessor_count CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
device;
let multiprocessor_count = !@multiprocessor_count in
let kernel_exec_timeout = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute kernel_exec_timeout CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT
device;
let kernel_exec_timeout = 0 <> !@kernel_exec_timeout in
let integrated = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute integrated CU_DEVICE_ATTRIBUTE_INTEGRATED device;
let integrated = 0 <> !@integrated in
let can_map_host_memory = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute can_map_host_memory CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY
device;
let can_map_host_memory = 0 <> !@can_map_host_memory in
let compute_mode = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute compute_mode CU_DEVICE_ATTRIBUTE_COMPUTE_MODE device;
let compute_mode = computemode_of_cu @@ Cuda.cu_computemode_of_int !@compute_mode in
let maximum_texture1d_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture1d_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH device;
let maximum_texture1d_width = !@maximum_texture1d_width in
let maximum_texture2d_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH device;
let maximum_texture2d_width = !@maximum_texture2d_width in
let maximum_texture2d_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT device;
let maximum_texture2d_height = !@maximum_texture2d_height in
let maximum_texture3d_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture3d_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH device;
let maximum_texture3d_width = !@maximum_texture3d_width in
let maximum_texture3d_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture3d_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT device;
let maximum_texture3d_height = !@maximum_texture3d_height in
let maximum_texture3d_depth = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture3d_depth
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH device;
let maximum_texture3d_depth = !@maximum_texture3d_depth in
let maximum_texture2d_layered_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_layered_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH device;
let maximum_texture2d_layered_width = !@maximum_texture2d_layered_width in
let maximum_texture2d_layered_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_layered_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT device;
let maximum_texture2d_layered_height = !@maximum_texture2d_layered_height in
let maximum_texture2d_layered_layers = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_layered_layers
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS device;
let maximum_texture2d_layered_layers = !@maximum_texture2d_layered_layers in
let surface_alignment = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute surface_alignment CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT device;
let surface_alignment = !@surface_alignment in
let concurrent_kernels = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute concurrent_kernels CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS device;
let concurrent_kernels = 0 <> !@concurrent_kernels in
let ecc_enabled = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute ecc_enabled CU_DEVICE_ATTRIBUTE_ECC_ENABLED device;
let ecc_enabled = 0 <> !@ecc_enabled in
let pci_bus_id = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute pci_bus_id CU_DEVICE_ATTRIBUTE_PCI_BUS_ID device;
let pci_bus_id = !@pci_bus_id in
let pci_device_id = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute pci_device_id CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID device;
let pci_device_id = !@pci_device_id in
let tcc_driver = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute tcc_driver CU_DEVICE_ATTRIBUTE_TCC_DRIVER device;
let tcc_driver = 0 <> !@tcc_driver in
let memory_clock_rate = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute memory_clock_rate CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE device;
let memory_clock_rate = !@memory_clock_rate in
let global_memory_bus_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute global_memory_bus_width
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH device;
let global_memory_bus_width = !@global_memory_bus_width in
let l2_cache_size = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute l2_cache_size CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE device;
let l2_cache_size = !@l2_cache_size in
let max_threads_per_multiprocessor = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_threads_per_multiprocessor
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR device;
let max_threads_per_multiprocessor = !@max_threads_per_multiprocessor in
let async_engine_count = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute async_engine_count CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT device;
let async_engine_count = !@async_engine_count in
let unified_addressing = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute unified_addressing CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING device;
let unified_addressing = 0 <> !@unified_addressing in
let maximum_texture1d_layered_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture1d_layered_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH device;
let maximum_texture1d_layered_width = !@maximum_texture1d_layered_width in
let maximum_texture1d_layered_layers = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture1d_layered_layers
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS device;
let maximum_texture1d_layered_layers = !@maximum_texture1d_layered_layers in
let maximum_texture2d_gather_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_gather_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH device;
let maximum_texture2d_gather_width = !@maximum_texture2d_gather_width in
let maximum_texture2d_gather_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_gather_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT device;
let maximum_texture2d_gather_height = !@maximum_texture2d_gather_height in
let maximum_texture3d_width_alternate = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture3d_width_alternate
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE device;
let maximum_texture3d_width_alternate = !@maximum_texture3d_width_alternate in
let maximum_texture3d_height_alternate = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture3d_height_alternate
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE device;
let maximum_texture3d_height_alternate = !@maximum_texture3d_height_alternate in
let maximum_texture3d_depth_alternate = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture3d_depth_alternate
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE device;
let maximum_texture3d_depth_alternate = !@maximum_texture3d_depth_alternate in
let pci_domain_id = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute pci_domain_id CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID device;
let pci_domain_id = !@pci_domain_id in
let texture_pitch_alignment = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute texture_pitch_alignment
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT device;
let texture_pitch_alignment = !@texture_pitch_alignment in
let maximum_texturecubemap_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texturecubemap_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH device;
let maximum_texturecubemap_width = !@maximum_texturecubemap_width in
let maximum_texturecubemap_layered_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texturecubemap_layered_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH device;
let maximum_texturecubemap_layered_width = !@maximum_texturecubemap_layered_width in
let maximum_texturecubemap_layered_layers = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texturecubemap_layered_layers
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS device;
let maximum_texturecubemap_layered_layers = !@maximum_texturecubemap_layered_layers in
let maximum_surface1d_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface1d_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH device;
let maximum_surface1d_width = !@maximum_surface1d_width in
let maximum_surface2d_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface2d_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH device;
let maximum_surface2d_width = !@maximum_surface2d_width in
let maximum_surface2d_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface2d_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT device;
let maximum_surface2d_height = !@maximum_surface2d_height in
let maximum_surface3d_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface3d_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH device;
let maximum_surface3d_width = !@maximum_surface3d_width in
let maximum_surface3d_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface3d_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT device;
let maximum_surface3d_height = !@maximum_surface3d_height in
let maximum_surface3d_depth = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface3d_depth
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH device;
let maximum_surface3d_depth = !@maximum_surface3d_depth in
let maximum_surface1d_layered_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface1d_layered_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH device;
let maximum_surface1d_layered_width = !@maximum_surface1d_layered_width in
let maximum_surface1d_layered_layers = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface1d_layered_layers
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS device;
let maximum_surface1d_layered_layers = !@maximum_surface1d_layered_layers in
let maximum_surface2d_layered_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface2d_layered_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH device;
let maximum_surface2d_layered_width = !@maximum_surface2d_layered_width in
let maximum_surface2d_layered_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface2d_layered_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT device;
let maximum_surface2d_layered_height = !@maximum_surface2d_layered_height in
let maximum_surface2d_layered_layers = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surface2d_layered_layers
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS device;
let maximum_surface2d_layered_layers = !@maximum_surface2d_layered_layers in
let maximum_surfacecubemap_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surfacecubemap_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH device;
let maximum_surfacecubemap_width = !@maximum_surfacecubemap_width in
let maximum_surfacecubemap_layered_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surfacecubemap_layered_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH device;
let maximum_surfacecubemap_layered_width = !@maximum_surfacecubemap_layered_width in
let maximum_surfacecubemap_layered_layers = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_surfacecubemap_layered_layers
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS device;
let maximum_surfacecubemap_layered_layers = !@maximum_surfacecubemap_layered_layers in
let maximum_texture2d_linear_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_linear_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH device;
let maximum_texture2d_linear_width = !@maximum_texture2d_linear_width in
let maximum_texture2d_linear_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_linear_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT device;
let maximum_texture2d_linear_height = !@maximum_texture2d_linear_height in
let maximum_texture2d_linear_pitch = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_linear_pitch
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH device;
let maximum_texture2d_linear_pitch = !@maximum_texture2d_linear_pitch in
let maximum_texture2d_mipmapped_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_mipmapped_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH device;
let maximum_texture2d_mipmapped_width = !@maximum_texture2d_mipmapped_width in
let maximum_texture2d_mipmapped_height = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture2d_mipmapped_height
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT device;
let maximum_texture2d_mipmapped_height = !@maximum_texture2d_mipmapped_height in
let compute_capability_major = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute compute_capability_major
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR device;
let compute_capability_major = !@compute_capability_major in
let compute_capability_minor = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute compute_capability_minor
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR device;
let compute_capability_minor = !@compute_capability_minor in
let maximum_texture1d_mipmapped_width = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute maximum_texture1d_mipmapped_width
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH device;
let maximum_texture1d_mipmapped_width = !@maximum_texture1d_mipmapped_width in
let stream_priorities_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute stream_priorities_supported
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED device;
let stream_priorities_supported = 0 <> !@stream_priorities_supported in
let global_l1_cache_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute global_l1_cache_supported
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED device;
let global_l1_cache_supported = 0 <> !@global_l1_cache_supported in
let local_l1_cache_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute local_l1_cache_supported
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED device;
let local_l1_cache_supported = 0 <> !@local_l1_cache_supported in
let max_shared_memory_per_multiprocessor = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_shared_memory_per_multiprocessor
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR device;
let max_shared_memory_per_multiprocessor = !@max_shared_memory_per_multiprocessor in
let max_registers_per_multiprocessor = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_registers_per_multiprocessor
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR device;
let max_registers_per_multiprocessor = !@max_registers_per_multiprocessor in
let managed_memory = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute managed_memory CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY device;
let managed_memory = 0 <> !@managed_memory in
let multi_gpu_board = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute multi_gpu_board CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD device;
let multi_gpu_board = 0 <> !@multi_gpu_board in
let multi_gpu_board_group_id = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute multi_gpu_board_group_id
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID device;
let multi_gpu_board_group_id = !@multi_gpu_board_group_id in
let host_native_atomic_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute host_native_atomic_supported
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED device;
let host_native_atomic_supported = 0 <> !@host_native_atomic_supported in
let single_to_double_precision_perf_ratio = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute single_to_double_precision_perf_ratio
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO device;
let single_to_double_precision_perf_ratio = !@single_to_double_precision_perf_ratio in
let pageable_memory_access = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute pageable_memory_access
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS device;
let pageable_memory_access = 0 <> !@pageable_memory_access in
let concurrent_managed_access = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute concurrent_managed_access
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS device;
let concurrent_managed_access = 0 <> !@concurrent_managed_access in
let compute_preemption_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute compute_preemption_supported
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED device;
let compute_preemption_supported = 0 <> !@compute_preemption_supported in
let can_use_host_pointer_for_registered_mem = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute can_use_host_pointer_for_registered_mem
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM device;
let can_use_host_pointer_for_registered_mem = 0 <> !@can_use_host_pointer_for_registered_mem in
let cooperative_launch = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute cooperative_launch CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH device;
let cooperative_launch = 0 <> !@cooperative_launch in
let max_shared_memory_per_block_optin = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_shared_memory_per_block_optin
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN device;
let max_shared_memory_per_block_optin = !@max_shared_memory_per_block_optin in
let can_flush_remote_writes = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute can_flush_remote_writes
CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES device;
let can_flush_remote_writes = 0 <> !@can_flush_remote_writes in
let host_register_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute host_register_supported
CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED device;
let host_register_supported = 0 <> !@host_register_supported in
let pageable_memory_access_uses_host_page_tables = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute pageable_memory_access_uses_host_page_tables
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES device;
let pageable_memory_access_uses_host_page_tables =
0 <> !@pageable_memory_access_uses_host_page_tables
in
let direct_managed_mem_access_from_host = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute direct_managed_mem_access_from_host
CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST device;
let direct_managed_mem_access_from_host = 0 <> !@direct_managed_mem_access_from_host in
let virtual_memory_management_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute virtual_memory_management_supported
CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED device;
let virtual_memory_management_supported = 0 <> !@virtual_memory_management_supported in
let handle_type_posix_file_descriptor_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute handle_type_posix_file_descriptor_supported
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED device;
let handle_type_posix_file_descriptor_supported =
0 <> !@handle_type_posix_file_descriptor_supported
in
let handle_type_win32_handle_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute handle_type_win32_handle_supported
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED device;
let handle_type_win32_handle_supported = 0 <> !@handle_type_win32_handle_supported in
let handle_type_win32_kmt_handle_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute handle_type_win32_kmt_handle_supported
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED device;
let handle_type_win32_kmt_handle_supported = 0 <> !@handle_type_win32_kmt_handle_supported in
let max_blocks_per_multiprocessor = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_blocks_per_multiprocessor
CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR device;
let max_blocks_per_multiprocessor = !@max_blocks_per_multiprocessor in
let generic_compression_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute generic_compression_supported
CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED device;
let generic_compression_supported = 0 <> !@generic_compression_supported in
let max_persisting_l2_cache_size = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_persisting_l2_cache_size
CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE device;
let max_persisting_l2_cache_size = !@max_persisting_l2_cache_size in
let max_access_policy_window_size = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute max_access_policy_window_size
CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE device;
let max_access_policy_window_size = !@max_access_policy_window_size in
let gpu_direct_rdma_with_cuda_vmm_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute gpu_direct_rdma_with_cuda_vmm_supported
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED device;
let gpu_direct_rdma_with_cuda_vmm_supported = 0 <> !@gpu_direct_rdma_with_cuda_vmm_supported in
let reserved_shared_memory_per_block = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute reserved_shared_memory_per_block
CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK device;
let reserved_shared_memory_per_block = !@reserved_shared_memory_per_block in
let sparse_cuda_array_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute sparse_cuda_array_supported
CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED device;
let sparse_cuda_array_supported = 0 <> !@sparse_cuda_array_supported in
let read_only_host_register_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute read_only_host_register_supported
CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED device;
let read_only_host_register_supported = 0 <> !@read_only_host_register_supported in
let timeline_semaphore_interop_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute timeline_semaphore_interop_supported
CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED device;
let timeline_semaphore_interop_supported = 0 <> !@timeline_semaphore_interop_supported in
let memory_pools_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute memory_pools_supported
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED device;
let memory_pools_supported = 0 <> !@memory_pools_supported in
let gpu_direct_rdma_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute gpu_direct_rdma_supported
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED device;
let gpu_direct_rdma_supported = 0 <> !@gpu_direct_rdma_supported in
let rec unfold f flags remaining =
let open Int in
match remaining with
| [] ->
if not (equal flags zero) then
failwith @@ "ctx_get_flags: unknown flag " ^ to_string flags
else []
| flag :: remaining ->
if equal flags zero then []
else
let uflag = f flag in
if equal (flags land uflag) zero then unfold f flags remaining
else flag :: unfold f (flags lxor uflag) remaining
in
let gpu_direct_rdma_flush_writes_options = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute gpu_direct_rdma_flush_writes_options
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS device;
let gpu_direct_rdma_flush_writes_options =
unfold int_of_flush_GPU_direct_RDMA_writes_options
!@gpu_direct_rdma_flush_writes_options
[ HOST; MEMOPS ]
in
let gpu_direct_rdma_writes_ordering = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute gpu_direct_rdma_writes_ordering
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING device;
let gpu_direct_rdma_writes_ordering = 0 <> !@gpu_direct_rdma_writes_ordering in
let mempool_supported_handle_types = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute mempool_supported_handle_types
CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES device;
let mempool_supported_handle_types =
unfold int_of_mem_allocation_handle_type !@mempool_supported_handle_types
[ NONE; POSIX_FILE_DESCRIPTOR; WIN32; WIN32_KMT; FABRIC ]
in
let cluster_launch = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute cluster_launch CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH device;
let cluster_launch = 0 <> !@cluster_launch in
let deferred_mapping_cuda_array_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute deferred_mapping_cuda_array_supported
CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED device;
let deferred_mapping_cuda_array_supported = 0 <> !@deferred_mapping_cuda_array_supported in
let can_use_64_bit_stream_mem_ops = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute can_use_64_bit_stream_mem_ops
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS device;
let can_use_64_bit_stream_mem_ops = 0 <> !@can_use_64_bit_stream_mem_ops in
let can_use_stream_wait_value_nor = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute can_use_stream_wait_value_nor
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR device;
let can_use_stream_wait_value_nor = 0 <> !@can_use_stream_wait_value_nor in
let dma_buf_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute dma_buf_supported CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED device;
let dma_buf_supported = 0 <> !@dma_buf_supported in
let ipc_event_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute ipc_event_supported CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
device;
let ipc_event_supported = 0 <> !@ipc_event_supported in
let mem_sync_domain_count = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute mem_sync_domain_count CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT
device;
let mem_sync_domain_count = !@mem_sync_domain_count in
let tensor_map_access_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute tensor_map_access_supported
CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED device;
let tensor_map_access_supported = 0 <> !@tensor_map_access_supported in
let unified_function_pointers = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute unified_function_pointers
CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS device;
let unified_function_pointers = 0 <> !@unified_function_pointers in
let multicast_supported = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute multicast_supported CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED
device;
let multicast_supported = 0 <> !@multicast_supported in
{
name;
max_threads_per_block;
max_block_dim_x;
max_block_dim_y;
max_block_dim_z;
max_grid_dim_x;
max_grid_dim_y;
max_grid_dim_z;