-
Notifications
You must be signed in to change notification settings - Fork 50
/
Copy pathcreate_lmodsitepackage.py
executable file
·241 lines (212 loc) · 10.8 KB
/
create_lmodsitepackage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#!/usr/bin/env python3
#
# Create SitePackage.lua configuration file for Lmod.
#
import os
import sys
from stat import S_IREAD, S_IWRITE, S_IRGRP, S_IWGRP, S_IROTH
DOT_LMOD = '.lmod'
hook_txt = """require("strict")
local hook = require("Hook")
local open = io.open
local function read_file(path)
local file = open(path, "rb") -- r read mode and b binary mode
if not file then return nil end
local content = file:read "*a" -- *a or *all reads the whole file
file:close()
return content
end
local function from_eessi_prefix(t)
-- eessi_prefix is the prefix with official EESSI modules
-- e.g. /cvmfs/software.eessi.io/versions/2023.06
local eessi_prefix = os.getenv("EESSI_PREFIX")
-- If EESSI_PREFIX wasn't defined, we cannot check if this module was from the EESSI environment
-- In that case, we assume it isn't, otherwise EESSI_PREFIX would (probably) have been set
if eessi_prefix == nil then
return false
else
-- NOTE: exact paths for site so may need to be updated later.
-- See https://github.com/EESSI/software-layer/pull/371
-- eessi_prefix_host_injections is the prefix with site-extensions (i.e. additional modules)
-- to the official EESSI modules, e.g. /cvmfs/software.eessi.io/host_injections/2023.06
local eessi_prefix_host_injections = string.gsub(eessi_prefix, 'versions', 'host_injections')
-- Check if the full modulepath starts with the eessi_prefix_*
return string.find(t.fn, "^" .. eessi_prefix) ~= nil or string.find(t.fn, "^" .. eessi_prefix_host_injections) ~= nil
end
end
local function load_site_specific_hooks()
-- This function will be run after the EESSI hooks are registered
-- It will load a local SitePackage.lua that is architecture independent (if it exists) from e.g.
-- /cvmfs/software.eessi.io/host_injections/2023.06/.lmod/SitePackage.lua
-- That can define a new hook
--
-- function site_specific_load_hook(t)
-- <some_action_on_load>
-- end
--
-- And the either append to the existing hook:
--
-- local function final_load_hook(t)
-- eessi_load_hook(t)
-- site_specific_load_hook(t)
-- end
--
-- Over overwrite the EESSI hook entirely:
--
-- hook.register("load", final_load_hook)
--
-- Note that the appending procedure can be simplified once we have an lmod >= 8.7.36
-- See https://github.com/TACC/Lmod/pull/696#issuecomment-1998765722
--
-- Subsequently, this function will look for an architecture-specific SitePackage.lua, e.g. from
-- /cvmfs/software.eessi.io/host_injections/2023.06/software/linux/x86_64/amd/zen2/.lmod/SitePackage.lua
-- This can then register an additional hook, e.g.
--
-- function arch_specific_load_hook(t)
-- <some_action_on_load>
-- end
--
-- local function final_load_hook(t)
-- eessi_load_hook(t)
-- site_specific_load_hook(t)
-- arch_specific_load_hook(t)
-- end
--
-- hook.register("load", final_load_hook)
--
-- Again, the host site could also decide to overwrite by simply doing
--
-- hook.register("load", arch_specific_load_hook)
-- get path to to architecture independent SitePackage.lua
local prefixHostInjections = string.gsub(os.getenv('EESSI_PREFIX') or "", 'versions', 'host_injections')
local hostSitePackage = prefixHostInjections .. "/.lmod/SitePackage.lua"
-- If the file exists, run it
if isFile(hostSitePackage) then
dofile(hostSitePackage)
end
-- build the full architecture specific path in host_injections
local archHostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
local archSitePackage = archHostInjections .. "/.lmod/SitePackage.lua"
-- If the file exists, run it
if isFile(archSitePackage) then
dofile(archSitePackage)
end
end
local function eessi_cuda_enabled_load_hook(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
-- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections.
-- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse
-- to load the CUDA module and print an informative message on how to set up GPU support for EESSI
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/site_specific_config/gpu/.\\n"
if simpleName == 'CUDA' then
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
-- build final path where the CUDA software should be installed
local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
local cudaDirExists = isDir(cudaEasyBuildDir)
if not cudaDirExists then
local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI "
advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI "
advice = advice .. "can find it.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
end
end
-- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker,
-- otherwise, refuse to load the requested module and print error message
local checkGpu = mt:haveProperty(simpleName,"arch","gpu")
local overrideGpuCheck = os.getenv("EESSI_OVERRIDE_GPU_CHECK")
if checkGpu and (overrideGpuCheck == nil) then
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
local cudaDriverExists = isFile(cudaDriverFile)
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
if not (cudaDriverExists or singularityCudaExists) then
local advice = "which relies on the CUDA runtime environment and driver libraries. "
advice = advice .. "In order to be able to use the module, you will need "
advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system. You can "
advice = advice .. "override this check by setting the environment variable EESSI_OVERRIDE_GPU_CHECK but "
advice = advice .. "the loaded application will not be able to execute on your system.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
else
-- CUDA driver exists, now we check its version to see if an update is needed
if cudaDriverExists then
local cudaVersion = read_file(cudaVersionFile)
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
-- driver CUDA versions don't give a patch version for CUDA
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
local driver_libs_need_update = false
if major < major_req then
driver_libs_need_update = true
elseif major == major_req then
if minor < minor_req then
driver_libs_need_update = true
end
end
if driver_libs_need_update == true then
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
advice = advice .. "Please update your CUDA driver libraries and then "
advice = advice .. "let EESSI know about the update.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
end
end
end
end
end
local function eessi_espresso_deprecated_message(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
local version = string.match(t.modFullName, "%d.%d.%d")
if simpleName == 'ESPResSo' and version == '4.2.1' then
-- Print a message on loading ESPreSso v <= 4.2.1 recommending using v 4.2.2 and above.
-- A message and not a warning as the exit code would break CI runs otherwise.
local advice = 'Prefer versions >= 4.2.2 which include important bugfixes.\\n'
advice = advice .. 'For details see https://github.com/espressomd/espresso/releases/tag/4.2.2\\n'
advice = advice .. 'Use version 4.2.1 at your own risk!\\n'
LmodMessage("\\nESPResSo v4.2.1 has known issues and has been deprecated. ", advice)
end
end
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
function eessi_load_hook(t)
eessi_espresso_deprecated_message(t)
-- Only apply CUDA hooks if the loaded module is in the EESSI prefix
-- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack
if from_eessi_prefix(t) then
eessi_cuda_enabled_load_hook(t)
end
end
hook.register("load", eessi_load_hook)
-- Note that this needs to happen at the end, so that any EESSI specific hooks can be overwritten by the site
load_site_specific_hooks()
"""
def error(msg):
sys.stderr.write("ERROR: %s\n" % msg)
sys.exit(1)
if len(sys.argv) != 2:
error("Usage: %s <software prefix>" % sys.argv[0])
prefix = sys.argv[1]
if not os.path.exists(prefix):
error("Prefix directory %s does not exist!" % prefix)
sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua')
# Lmod itself doesn't care about compute capability so remove this duplication from
# the install path (if it exists)
accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET")
if accel_subdir:
sitepackage_path = sitepackage_path.replace("/accel/%s" % accel_subdir, '')
try:
os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True)
with open(sitepackage_path, 'w') as fp:
fp.write(hook_txt)
# Make sure that the created Lmod file has "read/write" for the user/group and "read" permissions for others
os.chmod(sitepackage_path, S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP | S_IROTH)
except (IOError, OSError) as err:
error("Failed to create %s: %s" % (sitepackage_path, err))
print(sitepackage_path)