From 66c5a2d724f14b7062d97c87cbf782fca9f9fabb Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 14 Nov 2024 17:26:29 -0600 Subject: [PATCH] prefer wheel-provided libcudf.so in load_library(), use RTLD_LOCAL (#17316) Contributes to https://github.com/rapidsai/build-planning/issues/118 Modifies `libcudf.load_library()` in the following ways: * prefer wheel-provided `libcudf.so` to system installation * expose environment variable `RAPIDS_LIBCUDF_PREFER_SYSTEM_LIBRARY` for switching that preference * load `libcudf.so` with `RTLD_LOCAL`, to prevent adding symbols to the global namespace ([dlopen docs](https://linux.die.net/man/3/dlopen)) ## Notes for Reviewers ### How I tested this Locally (x86_64, CUDA 12, Python 3.12), built `libcudf`, `pylibcudf`, and `cudf` wheels from this branch, then `libcuspatial` and `cuspatial` from the corresponding cuspatial branch. Ran `cuspatial`'s unit tests, and tried setting the environment variable and inspecting `ld`'s logs to confirm that the environment variable changed the loading and search behavior. e.g. ```shell # clear ld cache to avoid cheating rm -f /etc/ld.so.cache ldconfig # try using an env variable to say "prefer the system-installed version" LD_DEBUG=libs \ LD_DEBUG_OUTPUT=/tmp/out.txt \ RAPIDS_LIBCUDF_PREFER_SYSTEM_LIBRARY=true \ python -c "import cuspatial; print(cuspatial.__version__)" cat /tmp/out.txt.* > prefer-system.txt # (then manually looked through those logs to confirm it searched places like /usr/lib64 and /lib64) ``` # Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17316 --- python/libcudf/libcudf/load.py | 70 ++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/python/libcudf/libcudf/load.py b/python/libcudf/libcudf/load.py index bf27ecfa7f5..a91fbb7aecf 100644 --- a/python/libcudf/libcudf/load.py +++ b/python/libcudf/libcudf/load.py @@ -16,8 +16,36 @@ import ctypes import os +# Loading with RTLD_LOCAL adds the library itself to the loader's +# loaded library cache without loading any symbols into the global +# namespace. This allows libraries that express a dependency on +# this library to be loaded later and successfully satisfy this dependency +# without polluting the global symbol table with symbols from +# libcudf that could conflict with symbols from other DSOs. +PREFERRED_LOAD_FLAG = ctypes.RTLD_LOCAL + + +def _load_system_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + Raises ``OSError`` if library cannot be loaded. + """ + return ctypes.CDLL(soname, PREFERRED_LOAD_FLAG) + + +def _load_wheel_installation(soname: str): + """Try to dlopen() the library indicated by ``soname`` + + Returns ``None`` if the library cannot be loaded. + """ + if os.path.isfile( + lib := os.path.join(os.path.dirname(__file__), "lib64", soname) + ): + return ctypes.CDLL(lib, PREFERRED_LOAD_FLAG) + return None + def load_library(): + """Dynamically load libcudf.so and its dependencies""" try: # libkvikio must be loaded before libcudf because libcudf references its symbols import libkvikio @@ -29,28 +57,28 @@ def load_library(): # we assume the library is discoverable on system paths. pass - # Dynamically load libcudf.so. Prefer a system library if one is present to - # avoid clobbering symbols that other packages might expect, but if no - # other library is present use the one in the wheel. + prefer_system_installation = ( + os.getenv("RAPIDS_LIBCUDF_PREFER_SYSTEM_LIBRARY", "false").lower() + != "false" + ) + + soname = "libcudf.so" libcudf_lib = None - try: - libcudf_lib = ctypes.CDLL("libcudf.so", ctypes.RTLD_GLOBAL) - except OSError: - # If neither of these directories contain the library, we assume we are in an - # environment where the C++ library is already installed somewhere else and the - # CMake build of the libcudf Python package was a no-op. - # - # Note that this approach won't work for real editable installs of the libcudf package. - # scikit-build-core has limited support for importlib.resources so there isn't a clean - # way to support that case yet. - for lib_dir in ("lib", "lib64"): - if os.path.isfile( - lib := os.path.join( - os.path.dirname(__file__), lib_dir, "libcudf.so" - ) - ): - libcudf_lib = ctypes.CDLL(lib, ctypes.RTLD_GLOBAL) - break + if prefer_system_installation: + # Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, but if no + # other library is present use the one in the wheel. + try: + libcudf_lib = _load_system_installation(soname) + except OSError: + libcudf_lib = _load_wheel_installation(soname) + else: + # Prefer the libraries bundled in this package. If they aren't found + # (which might be the case in builds where the library was prebuilt before + # packaging the wheel), look for a system installation. + libcudf_lib = _load_wheel_installation(soname) + if libcudf_lib is None: + libcudf_lib = _load_system_installation(soname) # The caller almost never needs to do anything with this library, but no # harm in offering the option since this object at least provides a handle