From 147b242a00fa2da4844872050c12958170534b7f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 9 Apr 2025 11:33:32 -0700 Subject: [PATCH 01/28] First version of `cuda.bindings.path_finder` (#447) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Unmodified copies of: * https://github.com/NVIDIA/numba-cuda/blob/bf487d78a40eea87f009d636882a5000a7524c95/numba_cuda/numba/cuda/cuda_paths.py * https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py * Add Forked from URLs. * Strip down cuda_paths.py to minimum required for `_get_nvvm_path()` Tested interactively with: ``` import cuda_paths nvvm_path = cuda_paths._get_nvvm_path() print(f"{nvvm_path=}") ``` * ruff auto-fixes (NO manual changes) * Make `get_nvvm_path()` a pubic API (i.e. remove leading underscore). * Fetch numba-cuda/numba_cuda/numba/cuda/cuda_paths.py from https://github.com/NVIDIA/numba-cuda/pull/155 AS-IS * ruff format NO MANUAL CHANGES * Minimal changes to adapt numba-cuda/numba_cuda/numba/cuda/cuda_paths.py from https://github.com/NVIDIA/numba-cuda/pull/155 * Rename ecosystem/cuda_paths.py -> path_finder.py * Plug cuda.bindings.path_finder into cuda/bindings/_internal/nvvm_linux.pyx * Plug cuda.bindings.path_finder into cuda/bindings/_internal/nvjitlink_linux.pyx * Fix `os.path.exists(None)` issue: ``` ______________________ ERROR collecting test_nvjitlink.py ______________________ tests/test_nvjitlink.py:62: in not check_nvjitlink_usable(), reason="nvJitLink not usable, maybe not installed or too old (<12.3)" tests/test_nvjitlink.py:58: in check_nvjitlink_usable return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 cuda/bindings/_internal/nvjitlink.pyx:257: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:260: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:208: in cuda.bindings._internal.nvjitlink._inspect_function_pointers ??? cuda/bindings/_internal/nvjitlink.pyx:102: in cuda.bindings._internal.nvjitlink._check_or_init_nvjitlink ??? cuda/bindings/_internal/nvjitlink.pyx:59: in cuda.bindings._internal.nvjitlink.load_library ??? /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:312: in get_cuda_paths "nvvm": _get_nvvm_path(), /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:285: in _get_nvvm_path by, path = _get_nvvm_path_decision() /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:96: in _get_nvvm_path_decision if os.path.exists(nvvm_ctk_dir): :19: in exists ??? E TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType ``` * Fix another `os.path.exists(None)` issue: ``` ______________________ ERROR collecting test_nvjitlink.py ______________________ tests/test_nvjitlink.py:62: in not check_nvjitlink_usable(), reason="nvJitLink not usable, maybe not installed or too old (<12.3)" tests/test_nvjitlink.py:58: in check_nvjitlink_usable return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 cuda/bindings/_internal/nvjitlink.pyx:257: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:260: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda/bindings/_internal/nvjitlink.pyx:208: in cuda.bindings._internal.nvjitlink._inspect_function_pointers ??? cuda/bindings/_internal/nvjitlink.pyx:102: in cuda.bindings._internal.nvjitlink._check_or_init_nvjitlink ??? cuda/bindings/_internal/nvjitlink.pyx:59: in cuda.bindings._internal.nvjitlink.load_library ??? /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:313: in get_cuda_paths "libdevice": _get_libdevice_paths(), /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:126: in _get_libdevice_paths by, libdir = _get_libdevice_path_decision() /opt/hostedtoolcache/Python/3.13.2/x64/lib/python3.13/site-packages/cuda/bindings/path_finder.py:73: in _get_libdevice_path_decision if os.path.exists(libdevice_ctk_dir): :19: in exists ??? E TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType ``` * Change "/lib64/" → "/lib/" in nvjitlink_linux.pyx * nvjitlink_linux.pyx load_library() enhancements, mainly to avoid os.path.join(None, "libnvJitLink.so") * Add missing f-string f * Add back get_nvjitlink_dso_version_suffix() call. * pytest -ra -s -v * Rewrite nvjitlink_linux.pyx load_library() to produce detailed error messages. * Attach listdir output to "Unable to load" exception message. * Guard os.listdir() call with os.path.isdir() * Fix logic error in nvjitlink_linux.pyx load_library() * Move path_finder.py to _path_finder_utils/cuda_paths.py, import only public functions from new path_finder.py * Add find_nvidia_dynamic_library() and use from nvjitlink_linux.pyx, nvvm_linux.pyx * Fix oversight in _find_using_lib_dir() * Also look for versioned library in _find_using_nvidia_lib_dirs() * glob.glob() Python 3.9 compatibility * Reduce build-and-test.yml to Windows-only, Python 3.12 only. * Comment out `if: ${{ github.repository_owner == nvidia }}` * Revert "Comment out `if: ${{ github.repository_owner == nvidia }}`" This reverts commit b0db24f9cfa3847e6a3e11c00f0225c7c7ef431e. * Add back `linux-64` `host-platform` * Rewrite load_library() in nvjitlink_windows.pyx to use path_finder.find_nvidia_dynamic_library() * Revert "Rewrite load_library() in nvjitlink_windows.pyx to use path_finder.find_nvidia_dynamic_library()" This reverts commit 1bb71513fea05054779312caac054a09b212b8a7. * Add _inspect_environment() in find_nvidia_dynamic_library.py, call from nvjitlink_windows.pyx, nvvm_windows.pyx * Add & use _find_dll_using_nvidia_bin_dirs(), _find_dll_using_cudalib_dir() * Fix silly oversight: forgot to undo experimental change. * Also reduce test test-linux matrix. * Reimplement load_library() functions in nvjitlink_windows.pyx, nvvm_windows.pyx to actively use path_finder.find_nvidia_dynamic_library() * Factor out load_nvidia_dynamic_library() from _internal/nvjitlink_linux.pyx, nvvm_linux.pyx * Generalize load_nvidia_dynamic_library.py to also work under Windows. * Add `void*` return type to load_library() implementations in _internal/nvjitlink_windows.pyx, nvvm_windows.pyx * Resolve cython error: object handle vs `void*` handle ``` Error compiling Cython file: ------------------------------------------------------------ ... err = (__cuDriverGetVersion)(&driver_ver) if err != 0: raise RuntimeError('something went wrong') # Load library handle = load_library(driver_ver) ^ ------------------------------------------------------------ cuda\bindings\_internal\nvjitlink.pyx:72:29: Cannot convert 'void *' to Python object ``` * Resolve another cython error: `void*` handle vs `intptr_t` handle ``` Error compiling Cython file: ------------------------------------------------------------ ... handle = load_library(driver_ver) # Load function global __nvJitLinkCreate try: __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') ^ ------------------------------------------------------------ cuda\bindings\_internal\nvjitlink.pyx:78:73: Cannot convert 'void *' to Python object ``` * Resolve signed/unsigned runtime error. Use uintptr_t consistently. https://github.com/NVIDIA/cuda-python/actions/runs/14224673173/job/39861750852?pr=447#logs ``` =================================== ERRORS ==================================== _____________________ ERROR collecting test_nvjitlink.py ______________________ tests\test_nvjitlink.py:62: in not check_nvjitlink_usable(), reason="nvJitLink not usable, maybe not installed or too old (<12.3)" tests\test_nvjitlink.py:58: in check_nvjitlink_usable return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 cuda\\bindings\\_internal\\nvjitlink.pyx:221: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda\\bindings\\_internal\\nvjitlink.pyx:224: in cuda.bindings._internal.nvjitlink._inspect_function_pointer ??? cuda\\bindings\\_internal\\nvjitlink.pyx:172: in cuda.bindings._internal.nvjitlink._inspect_function_pointers ??? cuda\\bindings\\_internal\\nvjitlink.pyx:73: in cuda.bindings._internal.nvjitlink._check_or_init_nvjitlink ??? cuda\\bindings\\_internal\\nvjitlink.pyx:46: in cuda.bindings._internal.nvjitlink.load_library ??? E OverflowError: can't convert negative value to size_t ``` * Change win32api.GetProcAddress` back to `intptr_t`. Changing load_nvidia_dynamic_library() to also use to-`intptr_t` conversion, for compatibility with win32api.GetProcAddress. Document that CDLL behaves differently (it uses to-`uintptr_t`). * Use win32api.LoadLibrary() instead of ctypes.windll.kernel32.LoadLibraryW(), to be more similar to original (and working) cython code. Hoping to resolve this kind of error: ``` _ ERROR at setup of test_c_or_v_program_fail_bad_option[txt-compile_program] __ request = > @pytest.fixture(params=MINIMAL_NVVMIR_FIXTURE_PARAMS) def minimal_nvvmir(request): for pass_counter in range(2): nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param, -1) if nvvmir != -1: if nvvmir is None: pytest.skip(f"UNAVAILABLE: {request.param}") return nvvmir if pass_counter: raise AssertionError("This code path is meant to be unreachable.") # Build cache entries, then try again (above). > major, minor, debug_major, debug_minor = nvvm.ir_version() tests\test_nvvm.py:148: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ cuda\bindings\nvvm.pyx:95: in cuda.bindings.nvvm.ir_version cpdef tuple ir_version(): cuda\bindings\nvvm.pyx:113: in cuda.bindings.nvvm.ir_version status = nvvmIRVersion(&major_ir, &minor_ir, &major_dbg, &minor_dbg) cuda\bindings\cynvvm.pyx:19: in cuda.bindings.cynvvm.nvvmIRVersion return _nvvm._nvvmIRVersion(majorIR, minorIR, majorDbg, minorDbg) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E cuda.bindings._internal.utils.FunctionNotFoundError: function nvvmIRVersion is not found ``` * Remove debug print statements. * Remove some cruft. * Trivial renaming of variables. No functional changes. * Revert debug changes under .github/workflows * Rename _path_finder_utils → _path_finder * Remove LD_LIBRARY_PATH in fetch_ctk/action.yml * Linux: First try using the platform-specific dynamic loader search mechanisms * Add _windows_load_with_dll_basename() * Revert "Revert debug changes under .github/workflows" This reverts commit cc6113cce20c5c6124d0676daeccb7db2fffd798. * Add debug prints in load_nvidia_dynamic_library() * Report dlopen error for libnvrtc.so.12 * print("\nLOOOK dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)", flush=True) * Revert "Remove LD_LIBRARY_PATH in fetch_ctk/action.yml" This reverts commit 1b1139cda8b56f2fa37c5c0102ee7fe6b5963cab. * Only remove ${CUDA_PATH}/nvvm/lib64 from LD_LIBRARY_PATH * Use path_finder.load_nvidia_dynamic_library("nvrtc") from cuda/bindings/_bindings/cynvrtc.pyx.in * Somewhat ad hoc heuristics for nvidia_cuda_nvrtc wheels. * Remove LD_LIBRARY_PATH entirely from .github/actions/fetch_ctk/action.yml * Remove CUDA_PATH\nvvm\bin in .github/workflows/test-wheel-windows.yml * Revert "Remove LD_LIBRARY_PATH entirely from .github/actions/fetch_ctk/action.yml" This reverts commit bff8cf023c82c7456af79ef004ba1c30d16b974a. * Revert "Somewhat ad hoc heuristics for nvidia_cuda_nvrtc wheels." This reverts commit 43abec8666a920e56ddc90cdb880ead248d0e45b. * Restore cuda/bindings/_bindings/cynvrtc.pyx.in as-is on main * Remove debug print from load_nvidia_dynamic_library.py * Reapply "Revert debug changes under .github/workflows" This reverts commit aaa6aff637f6bd076d0b124a39d56eeab5875351. --- .github/actions/fetch_ctk/action.yml | 2 +- .github/workflows/test-wheel-windows.yml | 7 - .../bindings/_internal/nvjitlink_linux.pyx | 20 +- .../bindings/_internal/nvjitlink_windows.pyx | 63 +-- .../cuda/bindings/_internal/nvvm_linux.pyx | 18 +- .../cuda/bindings/_internal/nvvm_windows.pyx | 63 +-- .../cuda/bindings/_internal/utils.pxd | 3 - .../cuda/bindings/_internal/utils.pyx | 14 - .../cuda/bindings/_path_finder/cuda_paths.py | 403 ++++++++++++++++++ .../find_nvidia_dynamic_library.py | 139 ++++++ .../cuda/bindings/_path_finder/findlib.py | 69 +++ .../load_nvidia_dynamic_library.py | 92 ++++ .../_path_finder/sys_path_find_sub_dirs.py | 40 ++ cuda_bindings/cuda/bindings/path_finder.py | 37 ++ cuda_bindings/tests/path_finder.py | 9 + .../tests/test_sys_path_find_sub_dirs.py | 72 ++++ 16 files changed, 889 insertions(+), 162 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/findlib.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py create mode 100644 cuda_bindings/cuda/bindings/path_finder.py create mode 100644 cuda_bindings/tests/path_finder.py create mode 100644 cuda_bindings/tests/test_sys_path_find_sub_dirs.py diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 669943296..5850b4c78 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -123,4 +123,4 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 4e48590a3..948d2fae6 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -164,13 +164,6 @@ jobs: method: 'network' sub-packages: ${{ env.MINI_CTK_DEPS }} - - name: Update PATH - if: ${{ inputs.local-ctk == '1' }} - run: | - # mimics actual CTK installation - echo $PATH - echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH - - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 9961a2105..9d21a3e10 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -4,12 +4,12 @@ # # This code was automatically generated across versions from 12.0.1 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t - -from .utils cimport get_nvjitlink_dso_version_suffix +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -52,17 +52,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') - return handle +cdef void* load_library(int driver_ver) except* with gil: + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index c8c7e6b29..f86972216 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,12 +6,9 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvjitlink_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError -import os -import site +from cuda.bindings import path_finder import win32api @@ -42,54 +39,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvjitlink_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = f"nvJitLink_{suffix}0_0.dll" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break - else: - raise RuntimeError('Failed to load nvJitLink') - - assert handle != 0 - return handle +cdef void* load_library(int driver_ver) except* with gil: + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: @@ -98,15 +50,16 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver + cdef intptr_t handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -114,7 +67,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 64e78e75a..33ba8e610 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -4,12 +4,12 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t - -from .utils cimport get_nvvm_dso_version_suffix +from libc.stdint cimport intptr_t, uintptr_t from .utils import FunctionNotFoundError, NotSupportedError +from cuda.bindings import path_finder + ############################################################################### # Extern ############################################################################### @@ -51,16 +51,8 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef void* handle - for suffix in get_nvvm_dso_version_suffix(driver_ver): - so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix) - handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) - if handle != NULL: - break - else: - err_msg = dlerror() - raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})') - return handle + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + return handle cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 76ce23254..6349fa5a1 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -6,12 +6,9 @@ from libc.stdint cimport intptr_t -from .utils cimport get_nvvm_dso_version_suffix - from .utils import FunctionNotFoundError, NotSupportedError -import os -import site +from cuda.bindings import path_finder import win32api @@ -40,54 +37,9 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef inline list get_site_packages(): - return [site.getusersitepackages()] + site.getsitepackages() - - -cdef load_library(const int driver_ver): - handle = 0 - - for suffix in get_nvvm_dso_version_suffix(driver_ver): - if len(suffix) == 0: - continue - dll_name = "nvvm64_40_0" - - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle(dll_name) - except: - pass - else: - break - - # Next, check if DLLs are installed via pip - for sp in get_site_packages(): - mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, dll_name), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - except: - pass - else: - break - - # Finally, try default search - try: - handle = win32api.LoadLibrary(dll_name) - except: - pass - else: - break - else: - raise RuntimeError('Failed to load nvvm') - - assert handle != 0 - return handle +cdef void* load_library(int driver_ver) except* with gil: + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + return handle cdef int _check_or_init_nvvm() except -1 nogil: @@ -96,15 +48,16 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver + cdef intptr_t handle with gil: # Load driver to check version try: - handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -112,7 +65,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index cac7846ff..a4b71c531 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -165,6 +165,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) -cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 0a693c052..7fc77b22c 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -127,17 +127,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, class FunctionNotFoundError(RuntimeError): pass class NotSupportedError(RuntimeError): pass - - -cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): - if 12000 <= driver_ver < 13000: - return ('12', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') - - -cdef tuple get_nvvm_dso_version_suffix(int driver_ver): - if 11000 <= driver_ver < 11020: - return ('3', '') - if 11020 <= driver_ver < 13000: - return ('4', '') - raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py new file mode 100644 index 000000000..e27e6f54b --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py @@ -0,0 +1,403 @@ +import os +import platform +import re +import site +import sys +import traceback +import warnings +from collections import namedtuple +from pathlib import Path + +from .findlib import find_file, find_lib + +IS_WIN32 = sys.platform.startswith("win32") + +_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) + + +def _get_numba_CUDA_INCLUDE_PATH(): + # From numba/numba/core/config.py + + def _readenv(name, ctor, default): + value = os.environ.get(name) + if value is None: + return default() if callable(default) else default + try: + return ctor(value) + except Exception: + warnings.warn( # noqa: B028 + f"Environment variable '{name}' is defined but " + f"its associated value '{value}' could not be " + "parsed.\nThe parse failed with exception:\n" + f"{traceback.format_exc()}", + RuntimeWarning, + ) + return default + + if IS_WIN32: + cuda_path = os.environ.get("CUDA_PATH") + if cuda_path: # noqa: SIM108 + default_cuda_include_path = os.path.join(cuda_path, "include") + else: + default_cuda_include_path = "cuda_include_not_found" + else: + default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") + CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) + return CUDA_INCLUDE_PATH + + +config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() + + +def _find_valid_path(options): + """Find valid path from *options*, which is a list of 2-tuple of + (name, path). Return first pair where *path* is not None. + If no valid path is found, return ('', None) + """ + for by, data in options: + if data is not None: + return by, data + else: + return "", None + + +def _get_libdevice_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), + ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice()), + ("NVIDIA NVCC Wheel", get_libdevice_wheel()), + ] + libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") + if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): + options.append(("System", libdevice_ctk_dir)) + + by, libdir = _find_valid_path(options) + return by, libdir + + +def _nvvm_lib_dir(): + if IS_WIN32: + return "nvvm", "bin" + else: + return "nvvm", "lib64" + + +def _get_nvvm_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), + ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), + ] + # need to ensure nvvm dir actually exists + nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) + if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): + options.append(("System", nvvm_ctk_dir)) + + by, path = _find_valid_path(options) + return by, path + + +def _get_nvvm_wheel(): + site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] + for sp in site_paths: + # The SONAME is taken based on public CTK 12.x releases + if sys.platform.startswith("linux"): + dso_dir = "lib64" + # Hack: libnvvm from Linux wheel + # does not have any soname (CUDAINST-3183) + dso_path = "libnvvm.so" + elif sys.platform.startswith("win32"): + dso_dir = "bin" + dso_path = "nvvm64_40_0.dll" + else: + raise AssertionError() + + if sp is not None: + dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) + dso_path = os.path.join(dso_dir, dso_path) + if os.path.exists(dso_path): + return str(Path(dso_path).parent) + + +def _get_libdevice_paths(): + by, libdir = _get_libdevice_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + out = os.path.join(libdir, "libdevice.10.bc") + else: + # Search for pattern + pat = r"libdevice(\.\d+)*\.bc$" + candidates = find_file(re.compile(pat), libdir) + # Keep only the max (most recent version) of the bitcode files. + out = max(candidates, default=None) + return _env_path_tuple(by, out) + + +def _cudalib_path(): + if IS_WIN32: + return "bin" + else: + return "lib64" + + +def _cuda_home_static_cudalib_path(): + if IS_WIN32: + return ("lib", "x64") + else: + return ("lib64",) + + +def _get_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_static_cudalib_dir_path_decision(): + options = [ + ("Conda environment", get_conda_ctk()), + ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), + ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), + ("System", get_system_ctk(_cudalib_path())), + ] + by, libdir = _find_valid_path(options) + return by, libdir + + +def _get_cudalib_dir(): + by, libdir = _get_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def _get_static_cudalib_dir(): + by, libdir = _get_static_cudalib_dir_path_decision() + return _env_path_tuple(by, libdir) + + +def get_system_ctk(*subdirs): + """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" + # Linux? + if sys.platform.startswith("linux"): + # Is cuda alias to /usr/local/cuda? + # We are intentionally not getting versioned cuda installation. + base = "/usr/local/cuda" + if os.path.exists(base): + return os.path.join(base, *subdirs) + + +def get_conda_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + # Assume the existence of NVVM to imply cudatoolkit installed + paths = find_lib("nvvm") + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_nvvm_ctk(): + """Return path to directory containing the NVVM shared library.""" + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + # Assume the existence of NVVM in the conda env implies that a CUDA toolkit + # conda package is installed. + + # First, try the location used on Linux and the Windows 11.x packages + libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that fails, try the location used for Windows 12.x packages + libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) + if not os.path.exists(libdir) or not os.path.isdir(libdir): + # If that doesn't exist either, assume we don't have the NVIDIA + # conda package + return + + paths = find_lib("nvvm", libdir=libdir) + if not paths: + return + # Use the directory name of the max path + return os.path.dirname(max(paths)) + + +def get_nvidia_libdevice_ctk(): + """Return path to directory containing the libdevice library.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + nvvm_dir = os.path.dirname(nvvm_ctk) + return os.path.join(nvvm_dir, "libdevice") + + +def get_nvidia_cudalib_ctk(): + """Return path to directory containing the shared libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + subdir = "bin" if IS_WIN32 else "lib" + return os.path.join(env_dir, subdir) + + +def get_nvidia_static_cudalib_ctk(): + """Return path to directory containing the static libraries of cudatoolkit.""" + nvvm_ctk = get_nvidia_nvvm_ctk() + if not nvvm_ctk: + return + + if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 + # Location specific to CUDA 11.x packages on Windows + dirs = ("Lib", "x64") + else: + # Linux, or Windows with CUDA 12.x packages + dirs = ("lib",) + + env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) + return os.path.join(env_dir, *dirs) + + +def get_cuda_home(*subdirs): + """Get paths of CUDA_HOME. + If *subdirs* are the subdirectory name to be appended in the resulting + path. + """ + cuda_home = os.environ.get("CUDA_HOME") + if cuda_home is None: + # Try Windows CUDA installation without Anaconda + cuda_home = os.environ.get("CUDA_PATH") + if cuda_home is not None: + return os.path.join(cuda_home, *subdirs) + + +def _get_nvvm_path(): + by, path = _get_nvvm_path_decision() + if by == "NVIDIA NVCC Wheel": + # The NVVM path is a directory, not a file + path = os.path.join(path, "libnvvm.so") + else: + candidates = find_lib("nvvm", path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + +def get_cuda_paths(): + """Returns a dictionary mapping component names to a 2-tuple + of (source_variable, info). + + The returned dictionary will have the following keys and infos: + - "nvvm": file_path + - "libdevice": List[Tuple[arch, file_path]] + - "cudalib_dir": directory_path + + Note: The result of the function is cached. + """ + # Check cache + if hasattr(get_cuda_paths, "_cached_result"): + return get_cuda_paths._cached_result + else: + # Not in cache + d = { + "nvvm": _get_nvvm_path(), + "libdevice": _get_libdevice_paths(), + "cudalib_dir": _get_cudalib_dir(), + "static_cudalib_dir": _get_static_cudalib_dir(), + "include_dir": _get_include_dir(), + } + # Cache result + get_cuda_paths._cached_result = d + return d + + +def get_debian_pkg_libdevice(): + """ + Return the Debian NVIDIA Maintainers-packaged libdevice location, if it + exists. + """ + pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" + if not os.path.exists(pkg_libdevice_location): + return None + return pkg_libdevice_location + + +def get_libdevice_wheel(): + nvvm_path = _get_nvvm_wheel() + if nvvm_path is None: + return None + nvvm_path = Path(nvvm_path) + libdevice_path = nvvm_path.parent / "libdevice" + + return str(libdevice_path) + + +def get_current_cuda_target_name(): + """Determine conda's CTK target folder based on system and machine arch. + + CTK's conda package delivers headers based on its architecture type. For example, + `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and + `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the + nuances at cudart's conda feedstock: + https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 + """ + system = platform.system() + machine = platform.machine() + + if system == "Linux": + arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} + elif system == "Windows": + arch_to_targets = { + "AMD64": "x64", + } + else: + arch_to_targets = {} + + return arch_to_targets.get(machine, None) + + +def get_conda_include_dir(): + """ + Return the include directory in the current conda environment, if one + is active and it exists. + """ + is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) + if not is_conda_env: + return + + if platform.system() == "Windows": + include_dir = os.path.join(sys.prefix, "Library", "include") + elif target_name := get_current_cuda_target_name(): + include_dir = os.path.join(sys.prefix, "targets", target_name, "include") + else: + # A fallback when target cannot determined + # though usually it shouldn't. + include_dir = os.path.join(sys.prefix, "include") + + if ( + os.path.exists(include_dir) + and os.path.isdir(include_dir) + and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) + ): + return include_dir + return + + +def _get_include_dir(): + """Find the root include directory.""" + options = [ + ("Conda environment (NVIDIA package)", get_conda_include_dir()), + ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), + # TODO: add others + ] + by, include_dir = _find_valid_path(options) + return _env_path_tuple(by, include_dir) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py new file mode 100644 index 000000000..30a9b68f4 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -0,0 +1,139 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import glob +import os + +from .cuda_paths import IS_WIN32, get_cuda_paths +from .sys_path_find_sub_dirs import sys_path_find_sub_dirs + + +def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): + error_messages.append(f"No such file: {file_wild}") + for sub_dir in sys_path_find_sub_dirs(sub_dirs): + attachments.append(f' listdir("{sub_dir}"):') + for node in sorted(os.listdir(sub_dir)): + attachments.append(f" {node}") + + +def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") + else: + nvidia_sub_dirs = ("nvidia", "*", "lib") + file_wild = so_basename + "*" + for lib_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + # First look for an exact match + so_name = os.path.join(lib_dir, so_basename) + if os.path.isfile(so_name): + return so_name + # Look for a versioned library + # Using sort here mainly to make the result deterministic. + for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))): + so_name = os.path.join(lib_dir, node) + if os.path.isfile(so_name): + return so_name + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + return None + + +def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): + if libname == "nvvm": # noqa: SIM108 + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") + else: + nvidia_sub_dirs = ("nvidia", "*", "bin") + file_wild = libname + "*.dll" + for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): + dll_name = os.path.join(bin_dir, node) + if os.path.isfile(dll_name): + return dll_name + _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + return None + + +def _get_cuda_paths_info(key, error_messages): + env_path_tuple = get_cuda_paths()[key] + if not env_path_tuple: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"]') + return None + if not env_path_tuple.info: + error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"].info') + return None + return env_path_tuple.info + + +def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: + return None + primary_so_dir = cudalib_dir + "/" + candidate_so_dirs = [primary_so_dir] + libs = ["/lib/", "/lib64/"] + for _ in range(2): + alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) + if alt_dir not in candidate_so_dirs: + candidate_so_dirs.append(alt_dir) + libs.reverse() + candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs] + error_messages = [] + for so_name in candidate_so_names: + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_name}") + for so_dirname in candidate_so_dirs: + attachments.append(f' listdir("{so_dirname}"):') + if not os.path.isdir(so_dirname): + attachments.append(" DIRECTORY DOES NOT EXIST") + else: + for node in sorted(os.listdir(so_dirname)): + attachments.append(f" {node}") + return None + + +def _find_dll_using_cudalib_dir(libname, error_messages, attachments): + cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) + if cudalib_dir is None: + return None + file_wild = libname + "*.dll" + for node in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): + dll_name = os.path.join(cudalib_dir, node) + if os.path.isfile(dll_name): + return dll_name + error_messages.append(f"No such file: {file_wild}") + attachments.append(f' listdir("{cudalib_dir}"):') + for node in sorted(os.listdir(cudalib_dir)): + attachments.append(f" {node}") + return None + + +@functools.cache +def find_nvidia_dynamic_library(name: str) -> str: + error_messages = [] + attachments = [] + + if IS_WIN32: + dll_name = _find_dll_using_nvidia_bin_dirs(name, error_messages, attachments) + if dll_name is None: + if name == "nvvm": + dll_name = _get_cuda_paths_info("nvvm", error_messages) + else: + dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments) + if dll_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Failure finding {name}*.dll: {', '.join(error_messages)}\n{attachments}") + return dll_name + + so_basename = f"lib{name}.so" + so_name = _find_so_using_nvidia_lib_dirs(name, so_basename, error_messages, attachments) + if so_name is None: + if name == "nvvm": + so_name = _get_cuda_paths_info("nvvm", error_messages) + else: + so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) + if so_name is None: + attachments = "\n".join(attachments) + raise RuntimeError(f"Failure finding {so_basename}: {', '.join(error_messages)}\n{attachments}") + return so_name diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py new file mode 100644 index 000000000..4de57c905 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/findlib.py @@ -0,0 +1,69 @@ +# Forked from: +# https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py + +import os +import re +import sys + + +def get_lib_dirs(): + """ + Anaconda specific + """ + if sys.platform == "win32": + # on windows, historically `DLLs` has been used for CUDA libraries, + # since approximately CUDA 9.2, `Library\bin` has been used. + dirnames = ["DLLs", os.path.join("Library", "bin")] + else: + dirnames = [ + "lib", + ] + libdirs = [os.path.join(sys.prefix, x) for x in dirnames] + return libdirs + + +DLLNAMEMAP = { + "linux": r"lib%(name)s\.so\.%(ver)s$", + "linux2": r"lib%(name)s\.so\.%(ver)s$", + "linux-static": r"lib%(name)s\.a$", + "darwin": r"lib%(name)s\.%(ver)s\.dylib$", + "win32": r"%(name)s%(ver)s\.dll$", + "win32-static": r"%(name)s\.lib$", + "bsd": r"lib%(name)s\.so\.%(ver)s$", +} + +RE_VER = r"[0-9]*([_\.][0-9]+)*" + + +def find_lib(libname, libdir=None, platform=None, static=False): + platform = platform or sys.platform + platform = "bsd" if "bsd" in platform else platform + if static: + platform = f"{platform}-static" + if platform not in DLLNAMEMAP: + # Return empty list if platform name is undefined. + # Not all platforms define their static library paths. + return [] + pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER} + regex = re.compile(pat) + return find_file(regex, libdir) + + +def find_file(pat, libdir=None): + if libdir is None: + libdirs = get_lib_dirs() + elif isinstance(libdir, str): + libdirs = [ + libdir, + ] + else: + libdirs = list(libdir) + files = [] + for ldir in libdirs: + try: + entries = os.listdir(ldir) + except FileNotFoundError: + continue + candidates = [os.path.join(ldir, ent) for ent in entries if pat.match(ent)] + files.extend([c for c in candidates if os.path.isfile(c)]) + return files diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py new file mode 100644 index 000000000..692e8e0bc --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -0,0 +1,92 @@ +import functools +import sys + +if sys.platform == "win32": + import ctypes.wintypes + + import pywintypes + import win32api + + # Mirrors WinBase.h (unfortunately not defined already elsewhere) + _WINBASE_LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + +else: + import ctypes + import os + + _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + +from .find_nvidia_dynamic_library import find_nvidia_dynamic_library + + +@functools.cache +def _windows_cuDriverGetVersion() -> int: + handle = win32api.LoadLibrary("nvcuda.dll") + + kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) + GetProcAddress = kernel32.GetProcAddress + GetProcAddress.argtypes = [ctypes.wintypes.HMODULE, ctypes.wintypes.LPCSTR] + GetProcAddress.restype = ctypes.c_void_p + cuDriverGetVersion = GetProcAddress(handle, b"cuDriverGetVersion") + assert cuDriverGetVersion + + FUNC_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.POINTER(ctypes.c_int)) + cuDriverGetVersion_fn = FUNC_TYPE(cuDriverGetVersion) + driver_ver = ctypes.c_int() + err = cuDriverGetVersion_fn(ctypes.byref(driver_ver)) + assert err == 0 + return driver_ver.value + + +@functools.cache +def _windows_load_with_dll_basename(name: str) -> int: + driver_ver = _windows_cuDriverGetVersion() + del driver_ver # Keeping this here because it will probably be needed in the future. + + if name == "nvJitLink": + dll_name = "nvJitLink_120_0.dll" + elif name == "nvrtc": + dll_name = "nvrtc64_120_0.dll" + elif name == "nvvm": + dll_name = "nvvm64_40_0.dll" + + try: + return win32api.LoadLibrary(dll_name) + except pywintypes.error: + pass + + return None + + +@functools.cache +def load_nvidia_dynamic_library(name: str) -> int: + # First try using the platform-specific dynamic loader search mechanisms + if sys.platform == "win32": + handle = _windows_load_with_dll_basename(name) + if handle: + return handle + else: + dl_path = f"lib{name}.so" # Version intentionally no specified. + try: + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + except OSError: + pass + else: + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int + + dl_path = find_nvidia_dynamic_library(name) + if sys.platform == "win32": + try: + handle = win32api.LoadLibrary(dl_path) + except pywintypes.error as e: + raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle # C signed int, matches win32api.GetProcAddress + else: + try: + handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + except OSError as e: + raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e + # Use `cdef void* ptr = ` in cython to convert back to void* + return handle._handle # C unsigned int diff --git a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py new file mode 100644 index 000000000..d2da726c9 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py @@ -0,0 +1,40 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import os +import sys + + +@functools.cache +def _impl(sys_path, sub_dirs): + results = [] + for base in sys_path: + stack = [(base, 0)] # (current_path, index into sub_dirs) + while stack: + current_path, idx = stack.pop() + if idx == len(sub_dirs): + if os.path.isdir(current_path): + results.append(current_path) + continue + + sub = sub_dirs[idx] + if sub == "*": + try: + entries = sorted(os.listdir(current_path)) + except OSError: + continue + for entry in entries: + entry_path = os.path.join(current_path, entry) + if os.path.isdir(entry_path): + stack.append((entry_path, idx + 1)) + else: + next_path = os.path.join(current_path, sub) + if os.path.isdir(next_path): + stack.append((next_path, idx + 1)) + return results + + +def sys_path_find_sub_dirs(sub_dirs): + return _impl(tuple(sys.path), tuple(sub_dirs)) diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py new file mode 100644 index 000000000..21aeb4b36 --- /dev/null +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -0,0 +1,37 @@ +# Copyright 2024-2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from cuda.bindings._path_finder.cuda_paths import ( + get_conda_ctk, + get_conda_include_dir, + get_cuda_home, + get_cuda_paths, + get_current_cuda_target_name, + get_debian_pkg_libdevice, + get_libdevice_wheel, + get_nvidia_cudalib_ctk, + get_nvidia_libdevice_ctk, + get_nvidia_nvvm_ctk, + get_nvidia_static_cudalib_ctk, + get_system_ctk, +) +from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library +from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library + +__all__ = [ + "find_nvidia_dynamic_library", + "load_nvidia_dynamic_library", + "get_conda_ctk", + "get_conda_include_dir", + "get_cuda_home", + "get_cuda_paths", + "get_current_cuda_target_name", + "get_debian_pkg_libdevice", + "get_libdevice_wheel", + "get_nvidia_cudalib_ctk", + "get_nvidia_libdevice_ctk", + "get_nvidia_nvvm_ctk", + "get_nvidia_static_cudalib_ctk", + "get_system_ctk", +] diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py new file mode 100644 index 000000000..e9245a5be --- /dev/null +++ b/cuda_bindings/tests/path_finder.py @@ -0,0 +1,9 @@ +from cuda.bindings import path_finder + +paths = path_finder.get_cuda_paths() + +for k, v in paths.items(): + print(f"{k}: {v}", flush=True) + +print(path_finder.find_nvidia_dynamic_library("nvvm")) +print(path_finder.find_nvidia_dynamic_library("nvJitLink")) diff --git a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py new file mode 100644 index 000000000..3297ce39e --- /dev/null +++ b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py @@ -0,0 +1,72 @@ +import os + +import pytest + +from cuda.bindings._path_finder.sys_path_find_sub_dirs import _impl + + +@pytest.fixture +def test_tree(tmp_path): + # Build: + # tmp_path/ + # sys1/nvidia/foo/lib + # sys1/nvidia/bar/lib + # sys2/nvidia/baz/nvvm/lib64 + base = tmp_path + (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True) + (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True) + (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True) + + return { + "sys_path": ( + str(base / "sys1"), + str(base / "sys2"), + str(base / "nonexistent"), # should be ignored + ), + "base": base, + } + + +def test_exact_match(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "foo", "lib")) + expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")] + assert result == expected + + +def test_single_wildcard(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "*", "lib")) + expected = [ + str(base / "sys1" / "nvidia" / "bar" / "lib"), + str(base / "sys1" / "nvidia" / "foo" / "lib"), + ] + assert sorted(result) == sorted(expected) + + +def test_double_wildcard(test_tree): + sys_path = test_tree["sys_path"] + base = test_tree["base"] + result = _impl(sys_path, ("nvidia", "*", "nvvm", "lib64")) + expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")] + assert result == expected + + +def test_no_match(test_tree): + sys_path = test_tree["sys_path"] + result = _impl(sys_path, ("nvidia", "nonexistent", "lib")) + assert result == [] + + +def test_empty_sys_path(): + result = _impl((), ("nvidia", "*", "lib")) + assert result == [] + + +def test_empty_sub_dirs(test_tree): + sys_path = test_tree["sys_path"] + result = _impl(sys_path, ()) + expected = [p for p in sys_path if os.path.isdir(p)] + assert sorted(result) == sorted(expected) From 7a0c06870b6260af92f90691f28279cbd40e43eb Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 10 Apr 2025 16:15:58 -0700 Subject: [PATCH 02/28] Make `path_finder` work for `"nvrtc"` (#553) * Revert "Restore cuda/bindings/_bindings/cynvrtc.pyx.in as-is on main" This reverts commit ba093f5700a99153b5c26b224a21aaceb69ae72b. * Revert "Reapply "Revert debug changes under .github/workflows"" This reverts commit 8f69f832af51c393601b09c2fe29d874e9abb057. * Also load nvrtc from cuda_bindings/tests/path_finder.py * Add heuristics for nvidia_cuda_nvrtc Windows wheels. Also fix a couple bugs discovered by ChatGPT: * `glob.glob()` in this code return absolute paths. * stray `error_messages = []` * Add debug prints, mostly for `os.add_dll_directory(bin_dir)` * Fix unfortunate silly oversight (import os missing under Windows) * Use `win32api.LoadLibraryEx()` with suitable `flags`; also update `os.environ["PATH"]` * Hard-wire WinBase.h constants (they are not exposed by win32con) * Remove debug prints * Reapply "Reapply "Revert debug changes under .github/workflows"" This reverts commit b002ff676c681c18f82fb9ebda875ddfec668fc9. --- .../cuda/bindings/_bindings/cynvrtc.pyx.in | 63 +++---------------- .../find_nvidia_dynamic_library.py | 45 +++++++++---- .../load_nvidia_dynamic_library.py | 6 +- cuda_bindings/tests/path_finder.py | 13 +++- 4 files changed, 58 insertions(+), 69 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index a0f8a27a0..2b0f3dc23 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -9,13 +9,12 @@ # This code was automatically generated with version 12.8.0. Do not modify it directly. {{if 'Windows' == platform.system()}} import os -import site -import struct import win32api -from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn +from libc.stdint cimport uintptr_t {{endif}} +from cuda.bindings import path_finder cdef bint __cuPythonInit = False {{if 'nvrtcGetErrorString' in found_functions}}cdef void *__nvrtcGetErrorString = NULL{{endif}} @@ -46,64 +45,18 @@ cdef bint __cuPythonInit = False {{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}} cdef int cuPythonInit() except -1 nogil: + {{if 'Windows' != platform.system()}} + cdef void* handle = NULL + {{endif}} + global __cuPythonInit if __cuPythonInit: return 0 __cuPythonInit = True - # Load library - {{if 'Windows' == platform.system()}} - with gil: - # First check if the DLL has been loaded by 3rd parties - try: - handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") - except: - handle = None - - # Else try default search - if not handle: - LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 - try: - handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) - except: - pass - - # Final check if DLLs can be found within pip installations - if not handle: - site_packages = [site.getusersitepackages()] + site.getsitepackages() - for sp in site_packages: - mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") - if not os.path.isdir(mod_path): - continue - os.add_dll_directory(mod_path) - LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 - LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 - try: - handle = win32api.LoadLibraryEx( - # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... - os.path.join(mod_path, "nvrtc64_120_0.dll"), - 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) - - # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is - # located in the same mod_path. - # Update PATH environ so that the two dlls can find each other - os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) - except: - pass - - if not handle: - raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') - {{else}} - handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) - if handle == NULL: - with gil: - raise RuntimeError('Failed to dlopen libnvrtc.so.12') - {{endif}} - - - # Load function {{if 'Windows' == platform.system()}} with gil: + handle = path_finder.load_nvidia_dynamic_library("nvrtc") {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -288,6 +241,8 @@ cdef int cuPythonInit() except -1 nogil: {{endif}} {{else}} + with gil: + handle = path_finder.load_nvidia_dynamic_library("nvrtc") {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 30a9b68f4..3d6604f08 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -31,14 +31,18 @@ def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachm return so_name # Look for a versioned library # Using sort here mainly to make the result deterministic. - for node in sorted(glob.glob(os.path.join(lib_dir, file_wild))): - so_name = os.path.join(lib_dir, node) + for so_name in sorted(glob.glob(os.path.join(lib_dir, file_wild))): if os.path.isfile(so_name): return so_name _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) return None +def _append_to_os_environ_path(dirpath): + curr_path = os.environ.get("PATH") + os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) + + def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): if libname == "nvvm": # noqa: SIM108 nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") @@ -46,10 +50,31 @@ def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): nvidia_sub_dirs = ("nvidia", "*", "bin") file_wild = libname + "*.dll" for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): - for node in sorted(glob.glob(os.path.join(bin_dir, file_wild))): - dll_name = os.path.join(bin_dir, node) - if os.path.isfile(dll_name): - return dll_name + dll_name = None + have_builtins = False + for path in sorted(glob.glob(os.path.join(bin_dir, file_wild))): + # nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl: + # nvidia\cuda_nvrtc\bin\ + # nvrtc-builtins64_128.dll + # nvrtc64_120_0.alt.dll + # nvrtc64_120_0.dll + node = os.path.basename(path) + if node.endswith(".alt.dll"): + continue + if "-builtins" in node: + have_builtins = True + continue + if dll_name is not None: + continue + if os.path.isfile(path): + dll_name = path + if dll_name is not None: + if have_builtins: + # Add the DLL directory to the search path + os.add_dll_directory(bin_dir) + # Update PATH as a fallback for dependent DLL resolution + _append_to_os_environ_path(bin_dir) + return dll_name _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) return None @@ -78,7 +103,6 @@ def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): candidate_so_dirs.append(alt_dir) libs.reverse() candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs] - error_messages = [] for so_name in candidate_so_names: if os.path.isfile(so_name): return so_name @@ -98,8 +122,7 @@ def _find_dll_using_cudalib_dir(libname, error_messages, attachments): if cudalib_dir is None: return None file_wild = libname + "*.dll" - for node in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): - dll_name = os.path.join(cudalib_dir, node) + for dll_name in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): if os.path.isfile(dll_name): return dll_name error_messages.append(f"No such file: {file_wild}") @@ -123,7 +146,7 @@ def find_nvidia_dynamic_library(name: str) -> str: dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments) if dll_name is None: attachments = "\n".join(attachments) - raise RuntimeError(f"Failure finding {name}*.dll: {', '.join(error_messages)}\n{attachments}") + raise RuntimeError(f'Failure finding "{name}*.dll": {", ".join(error_messages)}\n{attachments}') return dll_name so_basename = f"lib{name}.so" @@ -135,5 +158,5 @@ def find_nvidia_dynamic_library(name: str) -> str: so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) if so_name is None: attachments = "\n".join(attachments) - raise RuntimeError(f"Failure finding {so_basename}: {', '.join(error_messages)}\n{attachments}") + raise RuntimeError(f'Failure finding "{so_basename}": {", ".join(error_messages)}\n{attachments}') return so_name diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 692e8e0bc..1a52bf0dd 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -8,7 +8,8 @@ import win32api # Mirrors WinBase.h (unfortunately not defined already elsewhere) - _WINBASE_LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 else: import ctypes @@ -77,8 +78,9 @@ def load_nvidia_dynamic_library(name: str) -> int: dl_path = find_nvidia_dynamic_library(name) if sys.platform == "win32": + flags = _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR try: - handle = win32api.LoadLibrary(dl_path) + handle = win32api.LoadLibraryEx(dl_path, 0, flags) except pywintypes.error as e: raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e # Use `cdef void* ptr = ` in cython to convert back to void* diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py index e9245a5be..9b7dd23a3 100644 --- a/cuda_bindings/tests/path_finder.py +++ b/cuda_bindings/tests/path_finder.py @@ -4,6 +4,15 @@ for k, v in paths.items(): print(f"{k}: {v}", flush=True) +print() -print(path_finder.find_nvidia_dynamic_library("nvvm")) -print(path_finder.find_nvidia_dynamic_library("nvJitLink")) +libnames = ("nvJitLink", "nvrtc", "nvvm") + +for libname in libnames: + print(path_finder.find_nvidia_dynamic_library(libname)) + print() + +for libname in libnames: + print(libname) + print(path_finder.load_nvidia_dynamic_library(libname)) + print() From 74c975009c0ed8d11bd9ab6bc900164d60a4f0a4 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 25 Apr 2025 08:10:02 -0700 Subject: [PATCH 03/28] Add `path_finder.SUPPORTED_LIBNAMES` (#558) * Revert "Reapply "Revert debug changes under .github/workflows"" This reverts commit 8f69f832af51c393601b09c2fe29d874e9abb057. * Add names of all CTK 12.8.1 x86_64-linux libraries (.so) as `path_finder.SUPPORTED_LIBNAMES` https://chatgpt.com/share/67f98d0b-148c-8008-9951-9995cf5d860c * Add `SUPPORTED_WINDOWS_DLLS` * Add copyright notice * Move SUPPORTED_LIBNAMES, SUPPORTED_WINDOWS_DLLS to _path_finder/supported_libs.py * Use SUPPORTED_WINDOWS_DLLS in _windows_load_with_dll_basename() * Change "Set up mini CTK" to use `method: local`, remove `sub-packages` line. * Use Jimver/cuda-toolkit@v0.2.21 also under Linux, `method: local`, no `sub-packages`. * Add more `nvidia-*-cu12` wheels to get as many of the supported shared libraries as possible. * Revert "Use Jimver/cuda-toolkit@v0.2.21 also under Linux, `method: local`, no `sub-packages`." This reverts commit d49980665ac484626cd0ad9e7f727d5761f34da5. Problem observed: ``` /usr/bin/docker exec 1b42cd4ea3149ac3f2448eae830190ee62289b7304a73f8001e90cead5005102 sh -c "cat /etc/*release | grep ^ID" Warning: Failed to restore: Cache service responded with 422 /usr/bin/tar --posix -cf cache.tgz --exclude cache.tgz -P -C /__w/cuda-python/cuda-python --files-from manifest.txt -z Failed to save: Unable to reserve cache with key cuda_installer-linux-5.15.0-135-generic-x64-12.8.0, another job may be creating this cache. More details: This legacy service is shutting down, effective April 15, 2025. Migrate to the new service ASAP. For more information: https://gh.io/gha-cache-sunset Warning: Error during installation: Error: Unable to locate executable file: sudo. Please verify either the file path exists or the file can be found within a directory specified by the PATH environment variable. Also check the file mode to verify the file is executable. Error: Error: Unable to locate executable file: sudo. Please verify either the file path exists or the file can be found within a directory specified by the PATH environment variable. Also check the file mode to verify the file is executable. ``` * Change test_path_finder::test_find_and_load() to skip cufile on Windows, and report exceptions as failures, except for cudart * Add nvidia-cuda-runtime-cu12 to pyproject.toml (for libname cudart) * test_path_finder.py: before loading cusolver, load nvJitLink, cusparse, cublas (experiment to see if that resolves the only Windows failure) Test (win-64, Python 3.12, CUDA 12.8.0, Runner default, CTK wheels) / test ``` ================================== FAILURES =================================== ________________________ test_find_and_load[cusolver] _________________________ libname = 'cusolver' @pytest.mark.parametrize("libname", path_finder.SUPPORTED_LIBNAMES) def test_find_and_load(libname): if sys.platform == "win32" and libname == "cufile": pytest.skip(f'test_find_and_load("{libname}") not supported on this platform') print(f'\ntest_find_and_load("{libname}")') failures = [] for algo, func in ( ("find", path_finder.find_nvidia_dynamic_library), ("load", path_finder.load_nvidia_dynamic_library), ): try: out = func(libname) except Exception as e: out = f"EXCEPTION: {type(e)} {str(e)}" failures.append(algo) print(out) print() > assert not failures E AssertionError: assert not ['load'] tests\test_path_finder.py:29: AssertionError ``` * test_path_finder.py: load *only* nvJitLink before loading cusolver * Run each test_find_or_load_nvidia_dynamic_library() subtest in a subprocess * Add cublasLt to supported_libs.py and load deps for cusolver, cusolverMg, cusparse in test_path_finder.py. Also restrict test_path_finder.py to test load only for now. * Add supported_libs.DIRECT_DEPENDENCIES * Remove cufile_rdma from supported libs (comment out). https://chatgpt.com/share/68033a33-385c-8008-a293-4c8cc3ea23ae * Split out `PARTIALLY_SUPPORTED_LIBNAMES`. Fix up test code. * Reduce public API to only load_nvidia_dynamic_library, SUPPORTED_LIBNAMES * Set CUDA_BINDINGS_PATH_FINDER_TEST_ALL_LIBNAMES=1 to match expected availability of nvidia shared libraries. * Refactor as `class _find_nvidia_dynamic_library` * Strict wheel, conda, system rule: try using the platform-specific dynamic loader search mechanisms only last * Introduce _load_and_report_path_linux(), add supported_libs.EXPECTED_LIB_SYMBOLS * Plug in ctypes.windll.kernel32.GetModuleFileNameW() * Keep track of nvrtc-related GitHub comment * Factor out `_find_dll_under_dir(dirpath, file_wild)` and reuse from `_find_dll_using_nvidia_bin_dirs()`, `_find_dll_using_cudalib_dir()` (to fix loading nvrtc64_120_0.dll from local CTK) * Minimal "is already loaded" code. * Add THIS FILE NEEDS TO BE REVIEWED/UPDATED FOR EACH CTK RELEASE comment in _path_finder/supported_libs.py * Add SUPPORTED_LINUX_SONAMES in _path_finder/supported_libs.py * Update SUPPORTED_WINDOWS_DLLS in _path_finder/supported_libs.py based on DLLs found in cuda_*win*.exe files. * Remove `os.add_dll_directory()` and `os.environ["PATH"]` manipulations from find_nvidia_dynamic_library.py. Add `supported_libs.LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY` and use from `load_nvidia_dynamic_library()`. * Move nvrtc-specific code from find_nvidia_dynamic_library.py to `supported_libs.is_suppressed_dll_file()` * Introduce dataclass LoadedDL as return type for load_nvidia_dynamic_library() * Factor out _abs_path_for_dynamic_library_* and use on handle obtained through "is already loaded" checks * Factor out _load_nvidia_dynamic_library_no_cache() and use for exercising LoadedDL.was_already_loaded_from_elsewhere * _check_nvjitlink_usable() in test_path_finder.py * Undo changes in .github/workflows/ and cuda_bindings/pyproject.toml * Move cuda_bindings/tests/path_finder.py -> toolshed/run_cuda_bindings_path_finder.py * Add bandit suppressions in test_path_finder.py * Add pytest info_summary_append fixture and use from test_path_finder.py to report the absolute paths of the loaded libraries. --- .../cuda/bindings/_bindings/cynvrtc.pyx.in | 4 +- .../bindings/_internal/nvjitlink_linux.pyx | 2 +- .../bindings/_internal/nvjitlink_windows.pyx | 2 +- .../cuda/bindings/_internal/nvvm_linux.pyx | 2 +- .../cuda/bindings/_internal/nvvm_windows.pyx | 2 +- .../find_nvidia_dynamic_library.py | 106 +++-- .../load_nvidia_dynamic_library.py | 173 +++++++-- .../bindings/_path_finder/supported_libs.py | 364 ++++++++++++++++++ cuda_bindings/cuda/bindings/path_finder.py | 30 +- cuda_bindings/pyproject.toml | 1 - cuda_bindings/tests/conftest.py | 20 + cuda_bindings/tests/path_finder.py | 18 - cuda_bindings/tests/test_path_finder.py | 92 +++++ toolshed/build_path_finder_dlls.py | 84 ++++ toolshed/build_path_finder_sonames.py | 74 ++++ toolshed/find_sonames.sh | 6 + toolshed/run_cuda_bindings_path_finder.py | 34 ++ 17 files changed, 868 insertions(+), 146 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_path_finder/supported_libs.py create mode 100644 cuda_bindings/tests/conftest.py delete mode 100644 cuda_bindings/tests/path_finder.py create mode 100644 cuda_bindings/tests/test_path_finder.py create mode 100755 toolshed/build_path_finder_dlls.py create mode 100755 toolshed/build_path_finder_sonames.py create mode 100755 toolshed/find_sonames.sh create mode 100644 toolshed/run_cuda_bindings_path_finder.py diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index 2b0f3dc23..d2bb0b63b 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -56,7 +56,7 @@ cdef int cuPythonInit() except -1 nogil: {{if 'Windows' == platform.system()}} with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc") + handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -242,7 +242,7 @@ cdef int cuPythonInit() except -1 nogil: {{else}} with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc") + handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 9d21a3e10..78b4d802b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -53,7 +53,7 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index f86972216..b306a3001 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -40,7 +40,7 @@ cdef void* __nvJitLinkVersion = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink") + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 33ba8e610..82335508b 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -51,7 +51,7 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle return handle diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 6349fa5a1..21b4d9418 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -38,7 +38,7 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm") + cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle return handle diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 3d6604f08..e60154aa5 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -7,6 +7,7 @@ import os from .cuda_paths import IS_WIN32, get_cuda_paths +from .supported_libs import is_suppressed_dll_file from .sys_path_find_sub_dirs import sys_path_find_sub_dirs @@ -38,9 +39,13 @@ def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachm return None -def _append_to_os_environ_path(dirpath): - curr_path = os.environ.get("PATH") - os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) +def _find_dll_under_dir(dirpath, file_wild): + for path in sorted(glob.glob(os.path.join(dirpath, file_wild))): + if not os.path.isfile(path): + continue + if not is_suppressed_dll_file(os.path.basename(path)): + return path + return None def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): @@ -50,30 +55,8 @@ def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): nvidia_sub_dirs = ("nvidia", "*", "bin") file_wild = libname + "*.dll" for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): - dll_name = None - have_builtins = False - for path in sorted(glob.glob(os.path.join(bin_dir, file_wild))): - # nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl: - # nvidia\cuda_nvrtc\bin\ - # nvrtc-builtins64_128.dll - # nvrtc64_120_0.alt.dll - # nvrtc64_120_0.dll - node = os.path.basename(path) - if node.endswith(".alt.dll"): - continue - if "-builtins" in node: - have_builtins = True - continue - if dll_name is not None: - continue - if os.path.isfile(path): - dll_name = path + dll_name = _find_dll_under_dir(bin_dir, file_wild) if dll_name is not None: - if have_builtins: - # Add the DLL directory to the search path - os.add_dll_directory(bin_dir) - # Update PATH as a fallback for dependent DLL resolution - _append_to_os_environ_path(bin_dir) return dll_name _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) return None @@ -122,9 +105,9 @@ def _find_dll_using_cudalib_dir(libname, error_messages, attachments): if cudalib_dir is None: return None file_wild = libname + "*.dll" - for dll_name in sorted(glob.glob(os.path.join(cudalib_dir, file_wild))): - if os.path.isfile(dll_name): - return dll_name + dll_name = _find_dll_under_dir(cudalib_dir, file_wild) + if dll_name is not None: + return dll_name error_messages.append(f"No such file: {file_wild}") attachments.append(f' listdir("{cudalib_dir}"):') for node in sorted(os.listdir(cudalib_dir)): @@ -132,31 +115,42 @@ def _find_dll_using_cudalib_dir(libname, error_messages, attachments): return None -@functools.cache -def find_nvidia_dynamic_library(name: str) -> str: - error_messages = [] - attachments = [] - - if IS_WIN32: - dll_name = _find_dll_using_nvidia_bin_dirs(name, error_messages, attachments) - if dll_name is None: - if name == "nvvm": - dll_name = _get_cuda_paths_info("nvvm", error_messages) - else: - dll_name = _find_dll_using_cudalib_dir(name, error_messages, attachments) - if dll_name is None: - attachments = "\n".join(attachments) - raise RuntimeError(f'Failure finding "{name}*.dll": {", ".join(error_messages)}\n{attachments}') - return dll_name - - so_basename = f"lib{name}.so" - so_name = _find_so_using_nvidia_lib_dirs(name, so_basename, error_messages, attachments) - if so_name is None: - if name == "nvvm": - so_name = _get_cuda_paths_info("nvvm", error_messages) +class _find_nvidia_dynamic_library: + def __init__(self, libname: str): + self.libname = libname + self.error_messages = [] + self.attachments = [] + self.abs_path = None + + if IS_WIN32: + self.abs_path = _find_dll_using_nvidia_bin_dirs(libname, self.error_messages, self.attachments) + if self.abs_path is None: + if libname == "nvvm": + self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) + else: + self.abs_path = _find_dll_using_cudalib_dir(libname, self.error_messages, self.attachments) + self.lib_searched_for = f"{libname}*.dll" else: - so_name = _find_so_using_cudalib_dir(so_basename, error_messages, attachments) - if so_name is None: - attachments = "\n".join(attachments) - raise RuntimeError(f'Failure finding "{so_basename}": {", ".join(error_messages)}\n{attachments}') - return so_name + self.lib_searched_for = f"lib{libname}.so" + self.abs_path = _find_so_using_nvidia_lib_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) + if self.abs_path is None: + if libname == "nvvm": + self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) + else: + self.abs_path = _find_so_using_cudalib_dir( + self.lib_searched_for, self.error_messages, self.attachments + ) + + def raise_if_abs_path_is_None(self): + if self.abs_path: + return self.abs_path + err = ", ".join(self.error_messages) + att = "\n".join(self.attachments) + raise RuntimeError(f'Failure finding "{self.lib_searched_for}": {err}\n{att}') + + +@functools.cache +def find_nvidia_dynamic_library(libname: str) -> str: + return _find_nvidia_dynamic_library(libname).raise_if_abs_path_is_None() diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 1a52bf0dd..c770de67d 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -1,5 +1,13 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import ctypes import functools +import os import sys +from dataclasses import dataclass +from typing import Optional, Tuple if sys.platform == "win32": import ctypes.wintypes @@ -12,12 +20,42 @@ _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 else: - import ctypes - import os + import ctypes.util _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL -from .find_nvidia_dynamic_library import find_nvidia_dynamic_library + _LIBDL_PATH = ctypes.util.find_library("dl") or "libdl.so.2" + _LIBDL = ctypes.CDLL(_LIBDL_PATH) + _LIBDL.dladdr.argtypes = [ctypes.c_void_p, ctypes.c_void_p] + _LIBDL.dladdr.restype = ctypes.c_int + + class Dl_info(ctypes.Structure): + _fields_ = [ + ("dli_fname", ctypes.c_char_p), # path to .so + ("dli_fbase", ctypes.c_void_p), + ("dli_sname", ctypes.c_char_p), + ("dli_saddr", ctypes.c_void_p), + ] + + +from .find_nvidia_dynamic_library import _find_nvidia_dynamic_library +from .supported_libs import ( + DIRECT_DEPENDENCIES, + EXPECTED_LIB_SYMBOLS, + LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY, + SUPPORTED_LINUX_SONAMES, + SUPPORTED_WINDOWS_DLLS, +) + + +def _add_dll_directory(dll_abs_path): + dirpath = os.path.dirname(dll_abs_path) + assert os.path.isdir(dirpath), dll_abs_path + # Add the DLL directory to the search path + os.add_dll_directory(dirpath) + # Update PATH as a fallback for dependent DLL resolution + curr_path = os.environ.get("PATH") + os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) @functools.cache @@ -39,56 +77,117 @@ def _windows_cuDriverGetVersion() -> int: return driver_ver.value +def _abs_path_for_dynamic_library_windows(handle: int) -> str: + buf = ctypes.create_unicode_buffer(260) + n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, len(buf)) + if n_chars == 0: + raise OSError("GetModuleFileNameW failed") + return buf.value + + @functools.cache -def _windows_load_with_dll_basename(name: str) -> int: +def _windows_load_with_dll_basename(name: str) -> Tuple[Optional[int], Optional[str]]: driver_ver = _windows_cuDriverGetVersion() del driver_ver # Keeping this here because it will probably be needed in the future. - if name == "nvJitLink": - dll_name = "nvJitLink_120_0.dll" - elif name == "nvrtc": - dll_name = "nvrtc64_120_0.dll" - elif name == "nvvm": - dll_name = "nvvm64_40_0.dll" + dll_names = SUPPORTED_WINDOWS_DLLS.get(name) + if dll_names is None: + return None - try: - return win32api.LoadLibrary(dll_name) - except pywintypes.error: - pass + for dll_name in dll_names: + handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) + if handle: + return handle, _abs_path_for_dynamic_library_windows(handle) - return None + return None, None -@functools.cache -def load_nvidia_dynamic_library(name: str) -> int: - # First try using the platform-specific dynamic loader search mechanisms +def _abs_path_for_dynamic_library_linux(libname: str, handle: int) -> str: + for symbol_name in EXPECTED_LIB_SYMBOLS[libname]: + symbol = getattr(handle, symbol_name, None) + if symbol is not None: + break + else: + return None + addr = ctypes.cast(symbol, ctypes.c_void_p) + info = Dl_info() + if _LIBDL.dladdr(addr, ctypes.byref(info)) == 0: + raise OSError(f"dladdr failed for {libname=!r}") + return info.dli_fname.decode() + + +def _load_and_report_path_linux(libname: str, soname: str) -> Tuple[int, str]: + handle = ctypes.CDLL(soname, _LINUX_CDLL_MODE) + abs_path = _abs_path_for_dynamic_library_linux(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return handle, abs_path + + +@dataclass +class LoadedDL: + # ATTENTION: To convert `handle` back to `void*` in cython: + # Linux: `cdef void* ptr = ` + # Windows: `cdef void* ptr = ` + handle: int + abs_path: Optional[str] + was_already_loaded_from_elsewhere: bool + + +def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: + # Detect if the library was loaded already in some other way (i.e. not via this function). if sys.platform == "win32": - handle = _windows_load_with_dll_basename(name) - if handle: - return handle + for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): + try: + handle = win32api.GetModuleHandle(dll_name) + except pywintypes.error: + pass + else: + return LoadedDL(handle, _abs_path_for_dynamic_library_windows(handle), True) else: - dl_path = f"lib{name}.so" # Version intentionally no specified. - try: - handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) - except OSError: - pass + for soname in SUPPORTED_LINUX_SONAMES.get(libname, ()): + try: + handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) + except OSError: + pass + else: + return LoadedDL(handle, _abs_path_for_dynamic_library_linux(libname, handle), True) + + for dep in DIRECT_DEPENDENCIES.get(libname, ()): + load_nvidia_dynamic_library(dep) + + found = _find_nvidia_dynamic_library(libname) + if found.abs_path is None: + if sys.platform == "win32": + handle, abs_path = _windows_load_with_dll_basename(libname) + if handle: + return LoadedDL(handle, abs_path, False) else: - # Use `cdef void* ptr = ` in cython to convert back to void* - return handle._handle # C unsigned int + try: + handle, abs_path = _load_and_report_path_linux(libname, found.lib_searched_for) + except OSError: + pass + else: + return LoadedDL(handle._handle, abs_path, False) + found.raise_if_abs_path_is_None() - dl_path = find_nvidia_dynamic_library(name) if sys.platform == "win32": + if libname in LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY: + _add_dll_directory(found.abs_path) flags = _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR try: - handle = win32api.LoadLibraryEx(dl_path, 0, flags) + handle = win32api.LoadLibraryEx(found.abs_path, 0, flags) except pywintypes.error as e: - raise RuntimeError(f"Failed to load DLL at {dl_path}: {e}") from e - # Use `cdef void* ptr = ` in cython to convert back to void* - return handle # C signed int, matches win32api.GetProcAddress + raise RuntimeError(f"Failed to load DLL at {found.abs_path}: {e}") from e + return LoadedDL(handle, found.abs_path, False) else: try: - handle = ctypes.CDLL(dl_path, _LINUX_CDLL_MODE) + handle = ctypes.CDLL(found.abs_path, _LINUX_CDLL_MODE) except OSError as e: - raise RuntimeError(f"Failed to dlopen {dl_path}: {e}") from e - # Use `cdef void* ptr = ` in cython to convert back to void* - return handle._handle # C unsigned int + raise RuntimeError(f"Failed to dlopen {found.abs_path}: {e}") from e + return LoadedDL(handle._handle, found.abs_path, False) + + +@functools.cache +def load_nvidia_dynamic_library(libname: str) -> LoadedDL: + return _load_nvidia_dynamic_library_no_cache(libname) diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py new file mode 100644 index 000000000..ee62b92b8 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -0,0 +1,364 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# THIS FILE NEEDS TO BE REVIEWED/UPDATED FOR EACH CTK RELEASE + +SUPPORTED_LIBNAMES = ( + # Core CUDA Runtime and Compiler + "nvJitLink", + "nvrtc", + "nvvm", +) + +PARTIALLY_SUPPORTED_LIBNAMES = ( + # Core CUDA Runtime and Compiler + "cudart", + "nvfatbin", + # Math Libraries + "cublas", + "cublasLt", + "cufft", + "cufftw", + "curand", + "cusolver", + "cusolverMg", + "cusparse", + "nppc", + "nppial", + "nppicc", + "nppidei", + "nppif", + "nppig", + "nppim", + "nppist", + "nppisu", + "nppitc", + "npps", + "nvblas", + # Other + "cufile", + # "cufile_rdma", # Requires libmlx5.so + "nvjpeg", +) + +# Based on ldd output for Linux x86_64 nvidia-*-cu12 wheels (12.8.1) +DIRECT_DEPENDENCIES = { + "cublas": ("cublasLt",), + "cufftw": ("cufft",), + # "cufile_rdma": ("cufile",), + "cusolver": ("nvJitLink", "cusparse", "cublasLt", "cublas"), + "cusolverMg": ("nvJitLink", "cublasLt", "cublas"), + "cusparse": ("nvJitLink",), + "nppial": ("nppc",), + "nppicc": ("nppc",), + "nppidei": ("nppc",), + "nppif": ("nppc",), + "nppig": ("nppc",), + "nppim": ("nppc",), + "nppist": ("nppc",), + "nppisu": ("nppc",), + "nppitc": ("nppc",), + "npps": ("nppc",), + "nvblas": ("cublas", "cublasLt"), +} + +# Based on these released files: +# cuda_11.0.3_450.51.06_linux.run +# cuda_11.1.1_455.32.00_linux.run +# cuda_11.2.2_460.32.03_linux.run +# cuda_11.3.1_465.19.01_linux.run +# cuda_11.4.4_470.82.01_linux.run +# cuda_11.5.1_495.29.05_linux.run +# cuda_11.6.2_510.47.03_linux.run +# cuda_11.7.1_515.65.01_linux.run +# cuda_11.8.0_520.61.05_linux.run +# cuda_12.0.1_525.85.12_linux.run +# cuda_12.1.1_530.30.02_linux.run +# cuda_12.2.2_535.104.05_linux.run +# cuda_12.3.2_545.23.08_linux.run +# cuda_12.4.1_550.54.15_linux.run +# cuda_12.5.1_555.42.06_linux.run +# cuda_12.6.2_560.35.03_linux.run +# cuda_12.8.0_570.86.10_linux.run +# Generated with toolshed/build_path_finder_sonames.py +SUPPORTED_LINUX_SONAMES = { + "cublas": ( + "libcublas.so.11", + "libcublas.so.12", + ), + "cublasLt": ( + "libcublasLt.so.11", + "libcublasLt.so.12", + ), + "cudart": ( + "libcudart.so.11.0", + "libcudart.so.12", + ), + "cufft": ( + "libcufft.so.10", + "libcufft.so.11", + ), + "cufftw": ( + "libcufftw.so.10", + "libcufftw.so.11", + ), + "cufile": ("libcufile.so.0",), + # "cufile_rdma": ("libcufile_rdma.so.1",), + "curand": ("libcurand.so.10",), + "cusolver": ( + "libcusolver.so.10", + "libcusolver.so.11", + ), + "cusolverMg": ( + "libcusolverMg.so.10", + "libcusolverMg.so.11", + ), + "cusparse": ( + "libcusparse.so.11", + "libcusparse.so.12", + ), + "nppc": ( + "libnppc.so.11", + "libnppc.so.12", + ), + "nppial": ( + "libnppial.so.11", + "libnppial.so.12", + ), + "nppicc": ( + "libnppicc.so.11", + "libnppicc.so.12", + ), + "nppidei": ( + "libnppidei.so.11", + "libnppidei.so.12", + ), + "nppif": ( + "libnppif.so.11", + "libnppif.so.12", + ), + "nppig": ( + "libnppig.so.11", + "libnppig.so.12", + ), + "nppim": ( + "libnppim.so.11", + "libnppim.so.12", + ), + "nppist": ( + "libnppist.so.11", + "libnppist.so.12", + ), + "nppisu": ( + "libnppisu.so.11", + "libnppisu.so.12", + ), + "nppitc": ( + "libnppitc.so.11", + "libnppitc.so.12", + ), + "npps": ( + "libnpps.so.11", + "libnpps.so.12", + ), + "nvJitLink": ("libnvJitLink.so.12",), + "nvblas": ( + "libnvblas.so.11", + "libnvblas.so.12", + ), + "nvfatbin": ("libnvfatbin.so.12",), + "nvjpeg": ( + "libnvjpeg.so.11", + "libnvjpeg.so.12", + ), + "nvrtc": ( + "libnvrtc.so.11.0", + "libnvrtc.so.11.1", + "libnvrtc.so.11.2", + "libnvrtc.so.12", + ), + "nvvm": ( + "libnvvm.so.3", + "libnvvm.so.4", + ), +} + +# Based on these released files: +# cuda_11.0.3_451.82_win10.exe +# cuda_11.1.1_456.81_win10.exe +# cuda_11.2.2_461.33_win10.exe +# cuda_11.3.1_465.89_win10.exe +# cuda_11.4.4_472.50_windows.exe +# cuda_11.5.1_496.13_windows.exe +# cuda_11.6.2_511.65_windows.exe +# cuda_11.7.1_516.94_windows.exe +# cuda_11.8.0_522.06_windows.exe +# cuda_12.0.1_528.33_windows.exe +# cuda_12.1.1_531.14_windows.exe +# cuda_12.2.2_537.13_windows.exe +# cuda_12.3.2_546.12_windows.exe +# cuda_12.4.1_551.78_windows.exe +# cuda_12.5.1_555.85_windows.exe +# cuda_12.6.2_560.94_windows.exe +# cuda_12.8.1_572.61_windows.exe +# Generated with toolshed/build_path_finder_dlls.py (WITH MANUAL EDITS) +SUPPORTED_WINDOWS_DLLS = { + "cublas": ( + "cublas64_11.dll", + "cublas64_12.dll", + ), + "cublasLt": ( + "cublasLt64_11.dll", + "cublasLt64_12.dll", + ), + "cudart": ( + "cudart32_110.dll", + "cudart32_65.dll", + "cudart32_90.dll", + "cudart64_101.dll", + "cudart64_110.dll", + "cudart64_12.dll", + "cudart64_65.dll", + ), + "cufft": ( + "cufft64_10.dll", + "cufft64_11.dll", + "cufftw64_10.dll", + "cufftw64_11.dll", + ), + "cufftw": ( + "cufftw64_10.dll", + "cufftw64_11.dll", + ), + "cufile": (), + # "cufile_rdma": (), + "curand": ("curand64_10.dll",), + "cusolver": ( + "cusolver64_10.dll", + "cusolver64_11.dll", + ), + "cusolverMg": ( + "cusolverMg64_10.dll", + "cusolverMg64_11.dll", + ), + "cusparse": ( + "cusparse64_11.dll", + "cusparse64_12.dll", + ), + "nppc": ( + "nppc64_11.dll", + "nppc64_12.dll", + ), + "nppial": ( + "nppial64_11.dll", + "nppial64_12.dll", + ), + "nppicc": ( + "nppicc64_11.dll", + "nppicc64_12.dll", + ), + "nppidei": ( + "nppidei64_11.dll", + "nppidei64_12.dll", + ), + "nppif": ( + "nppif64_11.dll", + "nppif64_12.dll", + ), + "nppig": ( + "nppig64_11.dll", + "nppig64_12.dll", + ), + "nppim": ( + "nppim64_11.dll", + "nppim64_12.dll", + ), + "nppist": ( + "nppist64_11.dll", + "nppist64_12.dll", + ), + "nppisu": ( + "nppisu64_11.dll", + "nppisu64_12.dll", + ), + "nppitc": ( + "nppitc64_11.dll", + "nppitc64_12.dll", + ), + "npps": ( + "npps64_11.dll", + "npps64_12.dll", + ), + "nvJitLink": ("nvJitLink_120_0.dll",), + "nvblas": ( + "nvblas64_11.dll", + "nvblas64_12.dll", + ), + "nvfatbin": ("nvfatbin_120_0.dll",), + "nvjpeg": ( + "nvjpeg64_11.dll", + "nvjpeg64_12.dll", + ), + "nvrtc": ( + "nvrtc64_110_0.dll", + "nvrtc64_111_0.dll", + "nvrtc64_112_0.dll", + "nvrtc64_120_0.dll", + ), + "nvvm": ( + "nvvm32.dll", + "nvvm64.dll", + "nvvm64_33_0.dll", + "nvvm64_40_0.dll", + ), +} + +LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY = ( + "cufft", + "nvrtc", +) + + +def is_suppressed_dll_file(path_basename: str) -> bool: + if path_basename.startswith("nvrtc"): + # nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl: + # nvidia\cuda_nvrtc\bin\ + # nvrtc-builtins64_128.dll + # nvrtc64_120_0.alt.dll + # nvrtc64_120_0.dll + return path_basename.endswith(".alt.dll") or "-builtins" in path_basename + return False + + +# Based on nm output for Linux x86_64 /usr/local/cuda (12.8.1) +EXPECTED_LIB_SYMBOLS = { + "nvJitLink": ("nvJitLinkVersion",), + "nvrtc": ("nvrtcVersion",), + "nvvm": ("nvvmVersion",), + "cudart": ("cudaRuntimeGetVersion",), + "nvfatbin": ("nvFatbinVersion",), + "cublas": ("cublasGetVersion",), + "cublasLt": ("cublasLtGetVersion",), + "cufft": ("cufftGetVersion",), + "cufftw": ("fftwf_malloc",), + "curand": ("curandGetVersion",), + "cusolver": ("cusolverGetVersion",), + "cusolverMg": ("cusolverMgCreate",), + "cusparse": ("cusparseGetVersion",), + "nppc": ("nppGetLibVersion",), + "nppial": ("nppiAdd_32f_C1R",), + "nppicc": ("nppiColorToGray_8u_C3C1R",), + "nppidei": ("nppiCopy_8u_C1R",), + "nppif": ("nppiFilterSobelHorizBorder_8u_C1R",), + "nppig": ("nppiResize_8u_C1R",), + "nppim": ("nppiErode_8u_C1R",), + "nppist": ("nppiMean_8u_C1R",), + "nppisu": ("nppiFree",), + "nppitc": ("nppiThreshold_8u_C1R",), + "npps": ("nppsAdd_32f",), + "nvblas": ("dgemm",), + "cufile": ("cuFileGetVersion",), + # "cufile_rdma": ("rdma_buffer_reg",), + "nvjpeg": ("nvjpegCreate",), +} diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 21aeb4b36..9c08bdc25 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -2,36 +2,10 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings._path_finder.cuda_paths import ( - get_conda_ctk, - get_conda_include_dir, - get_cuda_home, - get_cuda_paths, - get_current_cuda_target_name, - get_debian_pkg_libdevice, - get_libdevice_wheel, - get_nvidia_cudalib_ctk, - get_nvidia_libdevice_ctk, - get_nvidia_nvvm_ctk, - get_nvidia_static_cudalib_ctk, - get_system_ctk, -) -from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library +from cuda.bindings._path_finder.supported_libs import SUPPORTED_LIBNAMES __all__ = [ - "find_nvidia_dynamic_library", "load_nvidia_dynamic_library", - "get_conda_ctk", - "get_conda_include_dir", - "get_cuda_home", - "get_cuda_paths", - "get_current_cuda_target_name", - "get_debian_pkg_libdevice", - "get_libdevice_wheel", - "get_nvidia_cudalib_ctk", - "get_nvidia_libdevice_ctk", - "get_nvidia_nvvm_ctk", - "get_nvidia_static_cudalib_ctk", - "get_system_ctk", + "SUPPORTED_LIBNAMES", ] diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index e6a9492f5..8921cc5a2 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -25,7 +25,6 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", "Environment :: GPU :: NVIDIA CUDA", ] dynamic = [ diff --git a/cuda_bindings/tests/conftest.py b/cuda_bindings/tests/conftest.py new file mode 100644 index 000000000..bcdc37db4 --- /dev/null +++ b/cuda_bindings/tests/conftest.py @@ -0,0 +1,20 @@ +import pytest + + +def pytest_configure(config): + config.custom_info = [] + + +def pytest_terminal_summary(terminalreporter, exitstatus, config): + if config.custom_info: + terminalreporter.write_sep("=", "INFO summary") + for msg in config.custom_info: + terminalreporter.line(f"INFO {msg}") + + +@pytest.fixture +def info_summary_append(request): + def _append(message): + request.config.custom_info.append(f"{request.node.name}: {message}") + + return _append diff --git a/cuda_bindings/tests/path_finder.py b/cuda_bindings/tests/path_finder.py deleted file mode 100644 index 9b7dd23a3..000000000 --- a/cuda_bindings/tests/path_finder.py +++ /dev/null @@ -1,18 +0,0 @@ -from cuda.bindings import path_finder - -paths = path_finder.get_cuda_paths() - -for k, v in paths.items(): - print(f"{k}: {v}", flush=True) -print() - -libnames = ("nvJitLink", "nvrtc", "nvvm") - -for libname in libnames: - print(path_finder.find_nvidia_dynamic_library(libname)) - print() - -for libname in libnames: - print(libname) - print(path_finder.load_nvidia_dynamic_library(libname)) - print() diff --git a/cuda_bindings/tests/test_path_finder.py b/cuda_bindings/tests/test_path_finder.py new file mode 100644 index 000000000..cb659026f --- /dev/null +++ b/cuda_bindings/tests/test_path_finder.py @@ -0,0 +1,92 @@ +import os +import subprocess # nosec B404 +import sys + +import pytest + +from cuda.bindings import path_finder +from cuda.bindings._path_finder import supported_libs + +ALL_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES +if os.environ.get("CUDA_BINDINGS_PATH_FINDER_TEST_ALL_LIBNAMES", False): + TEST_LIBNAMES = ALL_LIBNAMES +else: + TEST_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + + +def test_all_libnames_linux_sonames_consistency(): + assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_LINUX_SONAMES.keys())) + + +def test_all_libnames_windows_dlls_consistency(): + assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_WINDOWS_DLLS.keys())) + + +def test_all_libnames_libnames_requiring_os_add_dll_directory_consistency(): + assert not (set(supported_libs.LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY) - set(ALL_LIBNAMES)) + + +def test_all_libnames_expected_lib_symbols_consistency(): + assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) + + +def _check_nvjitlink_usable(): + from cuda.bindings._internal import nvjitlink as inner_nvjitlink + + return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 + + +def _build_subprocess_failed_for_libname_message(libname, result): + return ( + f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" + f"--- stdout-from-subprocess ---\n{result.stdout}\n" + f"--- stderr-from-subprocess ---\n{result.stderr}\n" + ) + + +@pytest.mark.parametrize("api", ("find", "load")) +@pytest.mark.parametrize("libname", TEST_LIBNAMES) +def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): + if sys.platform == "win32" and not supported_libs.SUPPORTED_WINDOWS_DLLS[libname]: + pytest.skip(f"{libname=!r} not supported on {sys.platform=}") + + if libname == "nvJitLink" and not _check_nvjitlink_usable(): + pytest.skip(f"{libname=!r} not usable") + + if api == "find": + code = f"""\ +from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library +abs_path = find_nvidia_dynamic_library({libname!r}) +print(f"{{abs_path!r}}") +""" + else: + code = f"""\ +from cuda.bindings.path_finder import load_nvidia_dynamic_library +from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache + +loaded_dl_fresh = load_nvidia_dynamic_library({libname!r}) +if loaded_dl_fresh.was_already_loaded_from_elsewhere: + raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") + +loaded_dl_from_cache = load_nvidia_dynamic_library({libname!r}) +if loaded_dl_from_cache is not loaded_dl_fresh: + raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") + +loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache({libname!r}) +if not loaded_dl_no_cache.was_already_loaded_from_elsewhere: + raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere") +if loaded_dl_no_cache.abs_path != loaded_dl_fresh.abs_path: + raise RuntimeError(f"{{loaded_dl_no_cache.abs_path=!r}} != {{loaded_dl_fresh.abs_path=!r}}") + +print(f"{{loaded_dl_fresh.abs_path!r}}") +""" + result = subprocess.run( # nosec B603 + [sys.executable, "-c", code], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + ) + if result.returncode == 0: + info_summary_append(f"abs_path={result.stdout.rstrip()}") + else: + raise RuntimeError(_build_subprocess_failed_for_libname_message(libname, result)) diff --git a/toolshed/build_path_finder_dlls.py b/toolshed/build_path_finder_dlls.py new file mode 100755 index 000000000..c82dcd866 --- /dev/null +++ b/toolshed/build_path_finder_dlls.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +# Input for this script: .txt files generated with: +# for exe in *.exe; do 7z l $exe > "${exe%.exe}.txt"; done + +# The output of this script +# requires obvious manual edits to remove duplicates and unwanted dlls. + +import sys + +LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER = ( + "nvJitLink", + "nvrtc", + "nvvm", + "cudart", + "nvfatbin", + "cublas", + "cublasLt", + "cufft", + "cufftw", + "curand", + "cusolver", + "cusolverMg", + "cusparse", + "nppc", + "nppial", + "nppicc", + "nppidei", + "nppif", + "nppig", + "nppim", + "nppist", + "nppisu", + "nppitc", + "npps", + "nvblas", + "cufile", + "cufile_rdma", + "nvjpeg", +) + + +def run(args): + dlls_from_files = set() + for filename in args: + lines_iter = iter(open(filename).read().splitlines()) + for line in lines_iter: + if line.startswith("-------------------"): + break + else: + raise RuntimeError("------------------- NOT FOUND") + for line in lines_iter: + if line.startswith("-------------------"): + break + assert line[52] == " ", line + assert line[53] != " ", line + path = line[53:] + if path.endswith(".dll"): + dll = path.rsplit("/", 1)[1] + dlls_from_files.add(dll) + else: + raise RuntimeError("------------------- NOT FOUND") + + print("DLLs in scope of cuda.bindings.path_finder") + print("==========================================") + dlls_in_scope = set() + for libname in sorted(LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER): + print(f'"{libname}": (') + for dll in sorted(dlls_from_files): + if dll.startswith(libname): + dlls_in_scope.add(dll) + print(f' "{dll}",') + print("),") + print() + + print("DLLs out of scope") + print("=================") + for dll in sorted(dlls_from_files - dlls_in_scope): + print(dll) + print() + + +if __name__ == "__main__": + run(args=sys.argv[1:]) diff --git a/toolshed/build_path_finder_sonames.py b/toolshed/build_path_finder_sonames.py new file mode 100755 index 000000000..20e8ec6c7 --- /dev/null +++ b/toolshed/build_path_finder_sonames.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +# Input for this script: +# output of toolshed/find_sonames.sh + +# The output of this script +# is expected to be usable as-is. + +import sys + +LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER = ( + "nvJitLink", + "nvrtc", + "nvvm", + "cudart", + "nvfatbin", + "cublas", + "cublasLt", + "cufft", + "cufftw", + "curand", + "cusolver", + "cusolverMg", + "cusparse", + "nppc", + "nppial", + "nppicc", + "nppidei", + "nppif", + "nppig", + "nppim", + "nppist", + "nppisu", + "nppitc", + "npps", + "nvblas", + "cufile", + "cufile_rdma", + "nvjpeg", +) + + +def run(args): + assert len(args) == 1, "output-of-find_sonames.sh" + + sonames_from_file = set() + for line in open(args[0]).read().splitlines(): + flds = line.split() + assert len(flds) == 3, flds + if flds[-1] != "SONAME_NOT_SET": + sonames_from_file.add(flds[-1]) + + print("SONAMEs in scope of cuda.bindings.path_finder") + print("=============================================") + sonames_in_scope = set() + for libname in sorted(LIBNAMES_IN_SCOPE_OF_CUDA_BINDINGS_PATH_FINDER): + print(f'"{libname}": (') + lib_so = "lib" + libname + ".so" + for soname in sorted(sonames_from_file): + if soname.startswith(lib_so): + sonames_in_scope.add(soname) + print(f' "{soname}",') + print("),") + print() + + print("SONAMEs out of scope") + print("====================") + for soname in sorted(sonames_from_file - sonames_in_scope): + print(soname) + print() + + +if __name__ == "__main__": + run(args=sys.argv[1:]) diff --git a/toolshed/find_sonames.sh b/toolshed/find_sonames.sh new file mode 100755 index 000000000..79c2e89d5 --- /dev/null +++ b/toolshed/find_sonames.sh @@ -0,0 +1,6 @@ +#!/bin/bash +find "$@" -type f -name '*.so*' -print0 | while IFS= read -r -d '' f; do + type=$(test -L "$f" && echo SYMLINK || echo FILE) + soname=$(readelf -d "$f" 2>/dev/null | awk '/SONAME/ {gsub(/[][]/, "", $5); print $5; exit}') + echo "$f $type ${soname:-SONAME_NOT_SET}" +done diff --git a/toolshed/run_cuda_bindings_path_finder.py b/toolshed/run_cuda_bindings_path_finder.py new file mode 100644 index 000000000..5f47b3990 --- /dev/null +++ b/toolshed/run_cuda_bindings_path_finder.py @@ -0,0 +1,34 @@ +import sys +import traceback + +from cuda.bindings import path_finder +from cuda.bindings._path_finder import cuda_paths, supported_libs + +ALL_LIBNAMES = ( + path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES +) + + +def run(args): + assert len(args) == 0 + + paths = cuda_paths.get_cuda_paths() + for k, v in paths.items(): + print(f"{k}: {v}", flush=True) + print() + + for libname in ALL_LIBNAMES: + print(f"{libname=}") + try: + loaded_dl = path_finder.load_nvidia_dynamic_library(libname) + except Exception: + print(f"EXCEPTION for {libname=}:") + traceback.print_exc(file=sys.stdout) + else: + print(f" {loaded_dl.abs_path=!r}") + print(f" {loaded_dl.was_already_loaded_from_elsewhere=!r}") + print() + + +if __name__ == "__main__": + run(args=sys.argv[1:]) From 00f8e4d9ae16d6ad5afdaac0550f8a687a5f2f42 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 25 Apr 2025 08:20:40 -0700 Subject: [PATCH 04/28] Fix tiny accident: a line in pyproject.toml got lost somehow. --- cuda_bindings/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index 8921cc5a2..e6a9492f5 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Environment :: GPU :: NVIDIA CUDA", ] dynamic = [ From 1e15f5ec27ec02b8bc64c3747b0a2e8536c601f7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 3 May 2025 22:36:11 -0700 Subject: [PATCH 05/28] Undo changes under .github (LD_LIBRARY_PATH, PATH manipulations for nvvm). --- .github/actions/fetch_ctk/action.yml | 2 +- .github/workflows/test-wheel-windows.yml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 3e8e48c4d..9fdb0a1f8 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -128,4 +128,4 @@ runs: echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "CUDA_HOME=${CUDA_PATH}" >> $GITHUB_ENV echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${CUDA_PATH}/lib:${CUDA_PATH}/nvvm/lib64" >> $GITHUB_ENV diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 948d2fae6..4e48590a3 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -164,6 +164,13 @@ jobs: method: 'network' sub-packages: ${{ env.MINI_CTK_DEPS }} + - name: Update PATH + if: ${{ inputs.local-ctk == '1' }} + run: | + # mimics actual CTK installation + echo $PATH + echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH + - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | From 2a6452d38b6556c0a8357d7163e5ad31600ef6de Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 3 May 2025 22:55:13 -0700 Subject: [PATCH 06/28] 2025-05-01 version of `cuda.bindings.path_finder` (#578) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Undo changes to the nvJitLink, nvrtc, nvvm bindings * Undo changes under .github, specific to nvvm, manipulating LD_LIBRARY_PATH or PATH * PARTIALLY_SUPPORTED_LIBNAMES_LINUX, PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS * Update EXPECTED_LIB_SYMBOLS for nvJitLink to cleanly support CTK versions 12.0, 12.1, 12.2 * Save result of factoring out load_dl_common.py, load_dl_linux.py, load_dl_windows.py with the help of Cursor. * Fix an auto-generated docstring * first round of Cursor refactoring (about 4 iterations until all tests passed), followed by ruff auto-fixes * Revert "first round of Cursor refactoring (about 4 iterations until all tests passed), followed by ruff auto-fixes" This reverts commit 001a6a23bcb6e2e458acdf8d85d134068965d352. There were many GitHub Actions jobs that failed (all tests with 12.x): https://github.com/NVIDIA/cuda-python/actions/runs/14677553387 This is not worth spending time debugging. Especially because * Cursor has been unresponsive for at least half an hour: We're having trouble connecting to the model provider. This might be temporary - please try again in a moment. * The refactored code does not seem easier to read. * A couple trivial tweaks * Prefix the public API (just two items) with underscores for now. * Add SPDX-License-Identifier to all files under toolshed/ that don't have it already * Add SPDX-License-Identifier under cuda_bindings/tests/ * Respond to "Do these need to be run as subprocesses?" review question (https://github.com/NVIDIA/cuda-python/pull/578#discussion_r2064470913) * Respond to "dead code?" review questions (e.g. https://github.com/NVIDIA/cuda-python/pull/578#discussion_r2064501694) * Respond to "Do we need to implement a cache separately ..." review question (https://github.com/NVIDIA/cuda-python/pull/578#discussion_r2064567215) * Remove cuDriverGetVersion() function for now. * Move add_dll_directory() from load_dl_common.py to load_dl_windows.py (response to review question https://github.com/NVIDIA/cuda-python/pull/578#discussion_r2064624395) * Add SPDX-License-Identifier and # Forked from: URL in cuda_paths.py * Add Add SPDX-License-Identifier and Original LICENSE in findlib.py * Very first draft of README.md * Update README.md, mostly as revised by perplexity, with various manual edits. * Refork cuda_paths.py AS-IS: https://github.com/NVIDIA/numba-cuda/blob/8c9c9d0cb901c06774a9abea6d12b6a4b0287e5e/numba_cuda/numba/cuda/cuda_paths.py * ruff format cuda_paths.py (NO manual changes) * Add back _get_numba_CUDA_INCLUDE_PATH from 2279bda65640b73a9a5632df878f52aedcbbd642 (i.e. cuda_paths.py as it was right before re-forking) * Remove cuda_paths.py dependency on numba.cuda.cudadrv.runtime * Add Forked from URLs, two SPDX-License-Identifier, Original Numba LICENSE * Temporarily restore debug changes under .github/workflows, for expanded path_finder test coverage * Restore cuda_path.py AS-IT-WAS at commit 2279bda65640b73a9a5632df878f52aedcbbd642 * Revert "Restore cuda_path.py AS-IT-WAS at commit 2279bda65640b73a9a5632df878f52aedcbbd642" This reverts commit 1b88ec27fa0f714643a139670aefabcaf89ff1b6. * Force compute-sanitizer off unconditionally * Revert "Force compute-sanitizer off unconditionally" This reverts commit 2bc7ef61632b0d88b6875f7371da99c3a49c4525. * Add timeout=10 seconds to test_path_finder.py subprocess.run() invocations. * Increase test_path_finder.py subprocess.run() timeout to 30 seconds: Under Windows, loading cublas or cusolver may exceed the 10 second timeout: https://github.com/NVIDIA/cuda-python/pull/578#issuecomment-2842638872 * Revert "Temporarily restore debug changes under .github/workflows, for expanded path_finder test coverage" This reverts commit 47ad79f317c30423a023bfef28f68163360728b6. * Force compute-sanitizer off unconditionally * Add: Note that the search is done on a per-library basis. * Add Note for CUDA_HOME / CUDA_PATH * Add 0. **Check if a library was loaded into the process already by some other means.** * _find_dll_using_nvidia_bin_dirs(): reuse lib_searched_for in place of file_wild * Systematically replace all relative imports with absolute imports. * handle: int → ctypes.CDLL fix * Make load_dl_windows.py abs_path_for_dynamic_library() implementation maximally robust. * Change argument name → libname for self-consistency * Systematically replace previously overlooked relative imports with absolute imports. * Simplify code (also for self-consistency) * Expand the 3. **System Installations** section with information produced by perplexity * Pull out `**Environment variables**` into an added section, after manual inspection of cuda_paths.py. Minor additional edits. * Revert "Force compute-sanitizer off unconditionally" This reverts commit aeaf4f02278b62befb0e380e9f6f97a50b848fb3. * Move _path_finder/sys_path_find_sub_dirs.py → find_sub_dirs.py, use find_sub_dirs_all_sitepackages() from find_nvidia_dynamic_library.py * WIP (search priority updated in README.md but not in code) * Revert "WIP (search priority updated in README.md but not in code)" This reverts commit bf9734c916a2808032b8c6f805b73fc3d3f12180. --- .../cuda/bindings/_bindings/cynvrtc.pyx.in | 64 +++- .../bindings/_internal/nvjitlink_linux.pyx | 20 +- .../bindings/_internal/nvjitlink_windows.pyx | 53 ++- .../cuda/bindings/_internal/nvvm_linux.pyx | 18 +- .../cuda/bindings/_internal/nvvm_windows.pyx | 61 +++- .../cuda/bindings/_internal/utils.pxd | 3 + .../cuda/bindings/_internal/utils.pyx | 14 + .../cuda/bindings/_path_finder/README.md | 80 +++++ .../cuda/bindings/_path_finder/cuda_paths.py | 314 ++++++++++++++---- .../find_nvidia_dynamic_library.py | 29 +- ...path_find_sub_dirs.py => find_sub_dirs.py} | 24 +- .../cuda/bindings/_path_finder/findlib.py | 28 ++ .../bindings/_path_finder/load_dl_common.py | 40 +++ .../bindings/_path_finder/load_dl_linux.py | 125 +++++++ .../bindings/_path_finder/load_dl_windows.py | 149 +++++++++ .../load_nvidia_dynamic_library.py | 209 +++--------- .../bindings/_path_finder/supported_libs.py | 41 ++- cuda_bindings/cuda/bindings/path_finder.py | 10 +- ...inder.py => test_path_finder_find_load.py} | 48 +-- .../tests/test_path_finder_find_sub_dirs.py | 91 +++++ .../tests/test_sys_path_find_sub_dirs.py | 72 ---- toolshed/build_path_finder_dlls.py | 4 + toolshed/build_path_finder_sonames.py | 4 + toolshed/find_sonames.sh | 5 + toolshed/run_cuda_bindings_path_finder.py | 8 +- 25 files changed, 1109 insertions(+), 405 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_path_finder/README.md rename cuda_bindings/cuda/bindings/_path_finder/{sys_path_find_sub_dirs.py => find_sub_dirs.py} (67%) create mode 100644 cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/load_dl_linux.py create mode 100644 cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py rename cuda_bindings/tests/{test_path_finder.py => test_path_finder_find_load.py} (58%) create mode 100644 cuda_bindings/tests/test_path_finder_find_sub_dirs.py delete mode 100644 cuda_bindings/tests/test_sys_path_find_sub_dirs.py diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in index d2bb0b63b..caf36d40e 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in @@ -9,12 +9,13 @@ # This code was automatically generated with version 12.8.0. Do not modify it directly. {{if 'Windows' == platform.system()}} import os +import site +import struct import win32api +from pywintypes import error {{else}} cimport cuda.bindings._lib.dlfcn as dlfcn -from libc.stdint cimport uintptr_t {{endif}} -from cuda.bindings import path_finder cdef bint __cuPythonInit = False {{if 'nvrtcGetErrorString' in found_functions}}cdef void *__nvrtcGetErrorString = NULL{{endif}} @@ -45,18 +46,65 @@ cdef bint __cuPythonInit = False {{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}} cdef int cuPythonInit() except -1 nogil: - {{if 'Windows' != platform.system()}} - cdef void* handle = NULL - {{endif}} - global __cuPythonInit if __cuPythonInit: return 0 __cuPythonInit = True + # Load library + {{if 'Windows' == platform.system()}} + with gil: + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle("nvrtc64_120_0.dll") + except: + handle = None + + # Check if DLLs can be found within pip installations + if not handle: + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + site_packages = [site.getusersitepackages()] + site.getsitepackages() + for sp in site_packages: + mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin") + if os.path.isdir(mod_path): + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, "nvrtc64_120_0.dll"), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + + # Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is + # located in the same mod_path. + # Update PATH environ so that the two dlls can find each other + os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path)) + except: + pass + else: + break + else: + # Else try default search + # Only reached if DLL wasn't found in any site-package path + LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000 + try: + handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS) + except: + pass + + if not handle: + raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll') + {{else}} + handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW) + if handle == NULL: + with gil: + raise RuntimeError('Failed to dlopen libnvrtc.so.12') + {{endif}} + + + # Load function {{if 'Windows' == platform.system()}} with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} try: global __nvrtcGetErrorString @@ -241,8 +289,6 @@ cdef int cuPythonInit() except -1 nogil: {{endif}} {{else}} - with gil: - handle = path_finder.load_nvidia_dynamic_library("nvrtc").handle {{if 'nvrtcGetErrorString' in found_functions}} global __nvrtcGetErrorString __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString') diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx index 78b4d802b..9961a2105 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -4,11 +4,11 @@ # # This code was automatically generated across versions from 12.0.1 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t, uintptr_t +from libc.stdint cimport intptr_t -from .utils import FunctionNotFoundError, NotSupportedError +from .utils cimport get_nvjitlink_dso_version_suffix -from cuda.bindings import path_finder +from .utils import FunctionNotFoundError, NotSupportedError ############################################################################### # Extern @@ -52,9 +52,17 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle - return handle +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') + return handle cdef int _check_or_init_nvjitlink() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx index b306a3001..979820442 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -6,9 +6,12 @@ from libc.stdint cimport intptr_t +from .utils cimport get_nvjitlink_dso_version_suffix + from .utils import FunctionNotFoundError, NotSupportedError -from cuda.bindings import path_finder +import os +import site import win32api @@ -39,9 +42,44 @@ cdef void* __nvJitLinkGetInfoLog = NULL cdef void* __nvJitLinkVersion = NULL -cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvJitLink").handle - return handle +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = f"nvJitLink_{suffix}0_0.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + return win32api.GetModuleHandle(dll_name) + except: + pass + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + if os.path.isdir(mod_path): + os.add_dll_directory(mod_path) + try: + return win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + # Finally, try default search + # Only reached if DLL wasn't found in any site-package path + try: + return win32api.LoadLibrary(dll_name) + except: + pass + + raise RuntimeError('Failed to load nvJitLink') cdef int _check_or_init_nvjitlink() except -1 nogil: @@ -50,16 +88,15 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: return 0 cdef int err, driver_ver - cdef intptr_t handle with gil: # Load driver to check version try: - nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -67,7 +104,7 @@ cdef int _check_or_init_nvjitlink() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvJitLinkCreate diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx index 82335508b..64e78e75a 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx @@ -4,11 +4,11 @@ # # This code was automatically generated across versions from 11.0.3 to 12.8.0. Do not modify it directly. -from libc.stdint cimport intptr_t, uintptr_t +from libc.stdint cimport intptr_t -from .utils import FunctionNotFoundError, NotSupportedError +from .utils cimport get_nvvm_dso_version_suffix -from cuda.bindings import path_finder +from .utils import FunctionNotFoundError, NotSupportedError ############################################################################### # Extern @@ -51,8 +51,16 @@ cdef void* __nvvmGetProgramLog = NULL cdef void* load_library(const int driver_ver) except* with gil: - cdef uintptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle - return handle + cdef void* handle + for suffix in get_nvvm_dso_version_suffix(driver_ver): + so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})') + return handle cdef int _check_or_init_nvvm() except -1 nogil: diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx index 21b4d9418..9f507e8e1 100644 --- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx @@ -6,9 +6,12 @@ from libc.stdint cimport intptr_t +from .utils cimport get_nvvm_dso_version_suffix + from .utils import FunctionNotFoundError, NotSupportedError -from cuda.bindings import path_finder +import os +import site import win32api @@ -37,9 +40,52 @@ cdef void* __nvvmGetProgramLogSize = NULL cdef void* __nvvmGetProgramLog = NULL -cdef void* load_library(int driver_ver) except* with gil: - cdef intptr_t handle = path_finder.load_nvidia_dynamic_library("nvvm").handle - return handle +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + ["conda"] + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvvm_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = "nvvm64_40_0.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + return win32api.GetModuleHandle(dll_name) + except: + pass + + # Next, check if DLLs are installed via pip or conda + for sp in get_site_packages(): + if sp == "conda": + # nvvm is not under $CONDA_PREFIX/lib, so it's not in the default search path + conda_prefix = os.environ.get("CONDA_PREFIX") + if conda_prefix is None: + continue + mod_path = os.path.join(conda_prefix, "Library", "nvvm", "bin") + else: + mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin") + if os.path.isdir(mod_path): + os.add_dll_directory(mod_path) + try: + return win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + + # Finally, try default search + # Only reached if DLL wasn't found in any site-package path + try: + return win32api.LoadLibrary(dll_name) + except: + pass + + raise RuntimeError('Failed to load nvvm') cdef int _check_or_init_nvvm() except -1 nogil: @@ -48,16 +94,15 @@ cdef int _check_or_init_nvvm() except -1 nogil: return 0 cdef int err, driver_ver - cdef intptr_t handle with gil: # Load driver to check version try: - nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) except Exception as e: raise NotSupportedError(f'CUDA driver is not found ({e})') global __cuDriverGetVersion if __cuDriverGetVersion == NULL: - __cuDriverGetVersion = win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion') + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') if __cuDriverGetVersion == NULL: raise RuntimeError('something went wrong') err = (__cuDriverGetVersion)(&driver_ver) @@ -65,7 +110,7 @@ cdef int _check_or_init_nvvm() except -1 nogil: raise RuntimeError('something went wrong') # Load library - handle = load_library(driver_ver) + handle = load_library(driver_ver) # Load function global __nvvmVersion diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd index a4b71c531..cac7846ff 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pxd +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -165,3 +165,6 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, cdef bint is_nested_sequence(data) cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) +cdef tuple get_nvvm_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx index 7fc77b22c..0a693c052 100644 --- a/cuda_bindings/cuda/bindings/_internal/utils.pyx +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -127,3 +127,17 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, class FunctionNotFoundError(RuntimeError): pass class NotSupportedError(RuntimeError): pass + + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): + if 12000 <= driver_ver < 13000: + return ('12', '') + raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') + + +cdef tuple get_nvvm_dso_version_suffix(int driver_ver): + if 11000 <= driver_ver < 11020: + return ('3', '') + if 11020 <= driver_ver < 13000: + return ('4', '') + raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported') diff --git a/cuda_bindings/cuda/bindings/_path_finder/README.md b/cuda_bindings/cuda/bindings/_path_finder/README.md new file mode 100644 index 000000000..94b80499f --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/README.md @@ -0,0 +1,80 @@ +# `cuda.bindings.path_finder` Module + +## Public API (Work in Progress) + +Currently exposes two primary interfaces: + +``` +cuda.bindings.path_finder._SUPPORTED_LIBNAMES # ('nvJitLink', 'nvrtc', 'nvvm') +cuda.bindings.path_finder._load_nvidia_dynamic_library(libname: str) -> LoadedDL +``` + +**Note:** +These APIs are prefixed with an underscore because they are considered +experimental while undergoing active development, although already +reasonably well-tested through CI pipelines. + +## Library Loading Search Priority + +The `load_nvidia_dynamic_library()` function implements a hierarchical search +strategy for locating NVIDIA shared libraries: + +0. **Check if a library was loaded into the process already by some other means.** + - If yes, there is no alternative to skipping the rest of the search logic. + The absolute path of the already loaded library will be returned, along + with the handle to the library. + +1. **Python Package Ecosystem** + - Scans `sys.path` to find libraries installed via NVIDIA Python wheels. + +2. **Conda Environments** + - Leverages Conda-specific paths through our fork of `get_cuda_paths()` + from numba-cuda. + +3. **Environment variables** + - Relies on `CUDA_HOME`/`CUDA_PATH` environment variables if set. + +4. **System Installations** + - Checks traditional system locations through these paths: + - Linux: `/usr/local/cuda/lib64` + - Windows: `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` + (where X.Y is the CTK version) + - **Notably does NOT search**: + - Versioned CUDA directories like `/usr/local/cuda-12.3` + - Distribution-specific packages (RPM/DEB) + EXCEPT Debian's `nvidia-cuda-toolkit` + +5. **OS Default Mechanisms** + - Falls back to native loader: + - `dlopen()` on Linux + - `LoadLibraryW()` on Windows + +Note that the search is done on a per-library basis. There is no centralized +mechanism that ensures all libraries are found in the same way. + +## Implementation Philosophy + +The current implementation balances stability and evolution: + +- **Baseline Foundation:** Uses a fork of numba-cuda's `cuda_paths.py` that has been + battle-tested in production environments. + +- **Validation Infrastructure:** Comprehensive CI testing matrix being developed to cover: + - Various Linux/Windows environments + - Python packaging formats (wheels, conda) + - CUDA Toolkit versions + +- **Roadmap:** Planned refactoring to: + - Unify library discovery logic + - Improve maintainability + - Better enforce search priority + - Expand platform support + +## Maintenance Requirements + +These key components must be updated for new CUDA Toolkit releases: + +- `supported_libs.SUPPORTED_LIBNAMES` +- `supported_libs.SUPPORTED_WINDOWS_DLLS` +- `supported_libs.SUPPORTED_LINUX_SONAMES` +- `supported_libs.EXPECTED_LIB_SYMBOLS` diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py index e27e6f54b..80f4e0149 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py +++ b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py @@ -1,3 +1,40 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# Forked from: +# https://github.com/NVIDIA/numba-cuda/blob/8c9c9d0cb901c06774a9abea6d12b6a4b0287e5e/numba_cuda/numba/cuda/cuda_paths.py + +# The numba-cuda version in turn was forked from: +# https://github.com/numba/numba/blob/6c8a71ffc3eaa1c68e1bac927b80ee7469002b3f/numba/cuda/cuda_paths.py +# SPDX-License-Identifier: BSD-2-Clause +# +# Original Numba LICENSE: +# Copyright (c) 2012, Anaconda, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import ctypes import os import platform import re @@ -8,7 +45,7 @@ from collections import namedtuple from pathlib import Path -from .findlib import find_file, find_lib +from cuda.bindings._path_finder.findlib import find_lib IS_WIN32 = sys.platform.startswith("win32") @@ -48,6 +85,37 @@ def _readenv(name, ctor, default): config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() +SEARCH_PRIORITY = [ + "Conda environment", + "Conda environment (NVIDIA package)", + "NVIDIA NVCC Wheel", + "CUDA_HOME", + "System", + "Debian package", +] + + +def _priority_index(label): + if label in SEARCH_PRIORITY: + return SEARCH_PRIORITY.index(label) + else: + raise ValueError(f"Can't determine search priority for {label}") + + +def _find_first_valid_lazy(options): + sorted_options = sorted(options, key=lambda x: _priority_index(x[0])) + for label, fn in sorted_options: + value = fn() + if value: + return label, value + return "", None + + +def _build_options(pairs): + """Sorts and returns a list of (label, value) tuples according to SEARCH_PRIORITY.""" + priority_index = {label: i for i, label in enumerate(SEARCH_PRIORITY)} + return sorted(pairs, key=lambda pair: priority_index.get(pair[0], float("inf"))) + def _find_valid_path(options): """Find valid path from *options*, which is a list of 2-tuple of @@ -62,19 +130,17 @@ def _find_valid_path(options): def _get_libdevice_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk()), - ("CUDA_HOME", get_cuda_home("nvvm", "libdevice")), - ("Debian package", get_debian_pkg_libdevice()), - ("NVIDIA NVCC Wheel", get_libdevice_wheel()), - ] - libdevice_ctk_dir = get_system_ctk("nvvm", "libdevice") - if libdevice_ctk_dir and os.path.exists(libdevice_ctk_dir): - options.append(("System", libdevice_ctk_dir)) - - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk), + ("CUDA_HOME", lambda: get_cuda_home("nvvm", "libdevice")), + ("NVIDIA NVCC Wheel", get_libdevice_wheel), + ("System", lambda: get_system_ctk("nvvm", "libdevice")), + ("Debian package", get_debian_pkg_libdevice), + ] + ) + return _find_first_valid_lazy(options) def _nvvm_lib_dir(): @@ -86,53 +152,113 @@ def _nvvm_lib_dir(): def _get_nvvm_path_decision(): options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk()), - ("CUDA_HOME", get_cuda_home(*_nvvm_lib_dir())), - ("NVIDIA NVCC Wheel", _get_nvvm_wheel()), + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk), + ("NVIDIA NVCC Wheel", _get_nvvm_wheel), + ("CUDA_HOME", lambda: get_cuda_home(*_nvvm_lib_dir())), + ("System", lambda: get_system_ctk(*_nvvm_lib_dir())), ] - # need to ensure nvvm dir actually exists - nvvm_ctk_dir = get_system_ctk(*_nvvm_lib_dir()) - if nvvm_ctk_dir and os.path.exists(nvvm_ctk_dir): - options.append(("System", nvvm_ctk_dir)) + return _find_first_valid_lazy(options) - by, path = _find_valid_path(options) - return by, path + +def _get_nvrtc_system_ctk(): + sys_path = get_system_ctk("bin" if IS_WIN32 else "lib64") + candidates = find_lib("nvrtc", sys_path) + if candidates: + return max(candidates) + + +def _get_nvrtc_path_decision(): + options = _build_options( + [ + ("CUDA_HOME", lambda: get_cuda_home("nvrtc")), + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), + ("NVIDIA NVCC Wheel", _get_nvrtc_wheel), + ("System", _get_nvrtc_system_ctk), + ] + ) + return _find_first_valid_lazy(options) def _get_nvvm_wheel(): - site_paths = [site.getusersitepackages()] + site.getsitepackages() + ["conda", None] + platform_map = { + "linux": ("lib64", "libnvvm.so"), + "win32": ("bin", "nvvm64_40_0.dll"), + } + + for plat, (dso_dir, dso_path) in platform_map.items(): + if sys.platform.startswith(plat): + break + else: + raise NotImplementedError("Unsupported platform") + + site_paths = [site.getusersitepackages()] + site.getsitepackages() + + for sp in filter(None, site_paths): + nvvm_path = Path(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir, dso_path) + if nvvm_path.exists(): + return str(nvvm_path.parent) + + return None + + +def get_nvrtc_dso_path(): + site_paths = [site.getusersitepackages()] + site.getsitepackages() for sp in site_paths: - # The SONAME is taken based on public CTK 12.x releases - if sys.platform.startswith("linux"): - dso_dir = "lib64" - # Hack: libnvvm from Linux wheel - # does not have any soname (CUDAINST-3183) - dso_path = "libnvvm.so" - elif sys.platform.startswith("win32"): - dso_dir = "bin" - dso_path = "nvvm64_40_0.dll" + lib_dir = os.path.join( + sp, + "nvidia", + "cuda_nvrtc", + ("bin" if IS_WIN32 else "lib") if sp else None, + ) + if lib_dir and os.path.exists(lib_dir): + for major in (12, 11): + if major == 11: + cu_ver = "112" if IS_WIN32 else "11.2" + elif major == 12: + cu_ver = "120" if IS_WIN32 else "12" + else: + raise NotImplementedError(f"CUDA {major} is not supported") + + dso_path = os.path.join( + lib_dir, + f"nvrtc64_{cu_ver}_0.dll" if IS_WIN32 else f"libnvrtc.so.{cu_ver}", + ) + if os.path.isfile(dso_path): + return dso_path + return None + + +def _get_nvrtc_wheel(): + dso_path = get_nvrtc_dso_path() + if dso_path: + try: + result = ctypes.CDLL(dso_path, mode=ctypes.RTLD_GLOBAL) + except OSError: + pass else: - raise AssertionError() - - if sp is not None: - dso_dir = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir) - dso_path = os.path.join(dso_dir, dso_path) - if os.path.exists(dso_path): - return str(Path(dso_path).parent) + if IS_WIN32: + import win32api + + # This absolute path will + # always be correct regardless of the package source + nvrtc_path = win32api.GetModuleFileNameW(result._handle) + dso_dir = os.path.dirname(nvrtc_path) + builtins_path = os.path.join( + dso_dir, + [f for f in os.listdir(dso_dir) if re.match("^nvrtc-builtins.*.dll$", f)][0], + ) + if not os.path.exists(builtins_path): + raise RuntimeError(f'Path does not exist: "{builtins_path}"') + return Path(dso_path) def _get_libdevice_paths(): by, libdir = _get_libdevice_path_decision() - if by == "NVIDIA NVCC Wheel": - # The NVVM path is a directory, not a file - out = os.path.join(libdir, "libdevice.10.bc") - else: - # Search for pattern - pat = r"libdevice(\.\d+)*\.bc$" - candidates = find_file(re.compile(pat), libdir) - # Keep only the max (most recent version) of the bitcode files. - out = max(candidates, default=None) + if not libdir: + return _env_path_tuple(by, None) + out = os.path.join(libdir, "libdevice.10.bc") return _env_path_tuple(by, out) @@ -150,26 +276,46 @@ def _cuda_home_static_cudalib_path(): return ("lib64",) +def _get_cudalib_wheel(): + """Get the cudalib path from the NVCC wheel.""" + site_paths = [site.getusersitepackages()] + site.getsitepackages() + libdir = "bin" if IS_WIN32 else "lib" + for sp in filter(None, site_paths): + cudalib_path = Path(sp, "nvidia", "cuda_runtime", libdir) + if cudalib_path.exists(): + return str(cudalib_path) + return None + + def _get_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), + ("NVIDIA NVCC Wheel", _get_cudalib_wheel), + ("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())), + ("System", lambda: get_system_ctk(_cudalib_path())), + ] + ) + return _find_first_valid_lazy(options) def _get_static_cudalib_dir_path_decision(): - options = [ - ("Conda environment", get_conda_ctk()), - ("Conda environment (NVIDIA package)", get_nvidia_static_cudalib_ctk()), - ("CUDA_HOME", get_cuda_home(*_cuda_home_static_cudalib_path())), - ("System", get_system_ctk(_cudalib_path())), - ] - by, libdir = _find_valid_path(options) - return by, libdir + options = _build_options( + [ + ("Conda environment", get_conda_ctk), + ( + "Conda environment (NVIDIA package)", + get_nvidia_static_cudalib_ctk, + ), + ( + "CUDA_HOME", + lambda: get_cuda_home(*_cuda_home_static_cudalib_path()), + ), + ("System", lambda: get_system_ctk(_cudalib_path())), + ] + ) + return _find_first_valid_lazy(options) def _get_cudalib_dir(): @@ -185,12 +331,12 @@ def _get_static_cudalib_dir(): def get_system_ctk(*subdirs): """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" # Linux? - if sys.platform.startswith("linux"): + if not IS_WIN32: # Is cuda alias to /usr/local/cuda? # We are intentionally not getting versioned cuda installation. - base = "/usr/local/cuda" - if os.path.exists(base): - return os.path.join(base, *subdirs) + result = os.path.join("/usr/local/cuda", *subdirs) + if os.path.exists(result): + return result def get_conda_ctk(): @@ -283,15 +429,38 @@ def get_cuda_home(*subdirs): def _get_nvvm_path(): by, path = _get_nvvm_path_decision() + if by == "NVIDIA NVCC Wheel": - # The NVVM path is a directory, not a file - path = os.path.join(path, "libnvvm.so") + platform_map = { + "linux": "libnvvm.so", + "win32": "nvvm64_40_0.dll", + } + + for plat, dso_name in platform_map.items(): + if sys.platform.startswith(plat): + break + else: + raise NotImplementedError("Unsupported platform") + + path = os.path.join(path, dso_name) else: candidates = find_lib("nvvm", path) path = max(candidates) if candidates else None return _env_path_tuple(by, path) +def _get_nvrtc_path(): + by, path = _get_nvrtc_path_decision() + if by == "NVIDIA NVCC Wheel": + path = str(path) + elif by == "System": + return _env_path_tuple(by, path) + else: + candidates = find_lib("nvrtc", path) + path = max(candidates) if candidates else None + return _env_path_tuple(by, path) + + def get_cuda_paths(): """Returns a dictionary mapping component names to a 2-tuple of (source_variable, info). @@ -310,6 +479,7 @@ def get_cuda_paths(): # Not in cache d = { "nvvm": _get_nvvm_path(), + "nvrtc": _get_nvrtc_path(), "libdevice": _get_libdevice_paths(), "cudalib_dir": _get_cudalib_dir(), "static_cudalib_dir": _get_static_cudalib_dir(), diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index e60154aa5..af9f42fbf 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -1,19 +1,19 @@ # Copyright 2024-2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools import glob import os +import sys -from .cuda_paths import IS_WIN32, get_cuda_paths -from .supported_libs import is_suppressed_dll_file -from .sys_path_find_sub_dirs import sys_path_find_sub_dirs +from cuda.bindings._path_finder.cuda_paths import get_cuda_paths +from cuda.bindings._path_finder.find_sub_dirs import find_sub_dirs_all_sitepackages +from cuda.bindings._path_finder.supported_libs import is_suppressed_dll_file def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): error_messages.append(f"No such file: {file_wild}") - for sub_dir in sys_path_find_sub_dirs(sub_dirs): + for sub_dir in find_sub_dirs_all_sitepackages(sub_dirs): attachments.append(f' listdir("{sub_dir}"):') for node in sorted(os.listdir(sub_dir)): attachments.append(f" {node}") @@ -25,7 +25,7 @@ def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachm else: nvidia_sub_dirs = ("nvidia", "*", "lib") file_wild = so_basename + "*" - for lib_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): + for lib_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): # First look for an exact match so_name = os.path.join(lib_dir, so_basename) if os.path.isfile(so_name): @@ -48,17 +48,16 @@ def _find_dll_under_dir(dirpath, file_wild): return None -def _find_dll_using_nvidia_bin_dirs(libname, error_messages, attachments): +def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, attachments): if libname == "nvvm": # noqa: SIM108 nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") else: nvidia_sub_dirs = ("nvidia", "*", "bin") - file_wild = libname + "*.dll" - for bin_dir in sys_path_find_sub_dirs(nvidia_sub_dirs): - dll_name = _find_dll_under_dir(bin_dir, file_wild) + for bin_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): + dll_name = _find_dll_under_dir(bin_dir, lib_searched_for) if dll_name is not None: return dll_name - _no_such_file_in_sub_dirs(nvidia_sub_dirs, file_wild, error_messages, attachments) + _no_such_file_in_sub_dirs(nvidia_sub_dirs, lib_searched_for, error_messages, attachments) return None @@ -122,14 +121,16 @@ def __init__(self, libname: str): self.attachments = [] self.abs_path = None - if IS_WIN32: - self.abs_path = _find_dll_using_nvidia_bin_dirs(libname, self.error_messages, self.attachments) + if sys.platform == "win32": + self.lib_searched_for = f"{libname}*.dll" + self.abs_path = _find_dll_using_nvidia_bin_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) if self.abs_path is None: if libname == "nvvm": self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) else: self.abs_path = _find_dll_using_cudalib_dir(libname, self.error_messages, self.attachments) - self.lib_searched_for = f"{libname}*.dll" else: self.lib_searched_for = f"lib{libname}.so" self.abs_path = _find_so_using_nvidia_lib_dirs( diff --git a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py b/cuda_bindings/cuda/bindings/_path_finder/find_sub_dirs.py similarity index 67% rename from cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py rename to cuda_bindings/cuda/bindings/_path_finder/find_sub_dirs.py index d2da726c9..810132625 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/sys_path_find_sub_dirs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_sub_dirs.py @@ -1,16 +1,15 @@ # Copyright 2024-2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools import os +import site import sys -@functools.cache -def _impl(sys_path, sub_dirs): +def find_sub_dirs_no_cache(parent_dirs, sub_dirs): results = [] - for base in sys_path: + for base in parent_dirs: stack = [(base, 0)] # (current_path, index into sub_dirs) while stack: current_path, idx = stack.pop() @@ -36,5 +35,18 @@ def _impl(sys_path, sub_dirs): return results -def sys_path_find_sub_dirs(sub_dirs): - return _impl(tuple(sys.path), tuple(sub_dirs)) +@functools.cache +def find_sub_dirs_cached(parent_dirs, sub_dirs): + return find_sub_dirs_no_cache(parent_dirs, sub_dirs) + + +def find_sub_dirs(parent_dirs, sub_dirs): + return find_sub_dirs_cached(tuple(parent_dirs), tuple(sub_dirs)) + + +def find_sub_dirs_sys_path(sub_dirs): + return find_sub_dirs(sys.path, sub_dirs) + + +def find_sub_dirs_all_sitepackages(sub_dirs): + return find_sub_dirs((site.getusersitepackages(),) + tuple(site.getsitepackages()), sub_dirs) diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py index 4de57c905..992a3940e 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/findlib.py +++ b/cuda_bindings/cuda/bindings/_path_finder/findlib.py @@ -1,5 +1,33 @@ +# SPDX-License-Identifier: BSD-2-Clause +# # Forked from: # https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py +# +# Original LICENSE: +# Copyright (c) 2012, Anaconda, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os import re diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py new file mode 100644 index 000000000..4592f6c33 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -0,0 +1,40 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from dataclasses import dataclass +from typing import Callable, Optional + +from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES + + +@dataclass +class LoadedDL: + """Represents a loaded dynamic library. + + Attributes: + handle: The library handle (can be converted to void* in Cython) + abs_path: The absolute path to the library file + was_already_loaded_from_elsewhere: Whether the library was already loaded + """ + + # ATTENTION: To convert `handle` back to `void*` in cython: + # Linux: `cdef void* ptr = ` + # Windows: `cdef void* ptr = ` + handle: int + abs_path: Optional[str] + was_already_loaded_from_elsewhere: bool + + +def load_dependencies(libname: str, load_func: Callable[[str], LoadedDL]) -> None: + """Load all dependencies for a given library. + + Args: + libname: The name of the library whose dependencies should be loaded + load_func: The function to use for loading libraries (e.g. load_nvidia_dynamic_library) + + Example: + >>> load_dependencies("cudart", load_nvidia_dynamic_library) + # This will load all dependencies of cudart using the provided loading function + """ + for dep in DIRECT_DEPENDENCIES.get(libname, ()): + load_func(dep) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_linux.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_linux.py new file mode 100644 index 000000000..b9f3839e1 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_linux.py @@ -0,0 +1,125 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import ctypes +import ctypes.util +import os +from typing import Optional + +from cuda.bindings._path_finder.load_dl_common import LoadedDL + +CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL + +LIBDL_PATH = ctypes.util.find_library("dl") or "libdl.so.2" +LIBDL = ctypes.CDLL(LIBDL_PATH) +LIBDL.dladdr.argtypes = [ctypes.c_void_p, ctypes.c_void_p] +LIBDL.dladdr.restype = ctypes.c_int + + +class Dl_info(ctypes.Structure): + """Structure used by dladdr to return information about a loaded symbol.""" + + _fields_ = [ + ("dli_fname", ctypes.c_char_p), # path to .so + ("dli_fbase", ctypes.c_void_p), + ("dli_sname", ctypes.c_char_p), + ("dli_saddr", ctypes.c_void_p), + ] + + +def abs_path_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> Optional[str]: + """Get the absolute path of a loaded dynamic library on Linux. + + Args: + libname: The name of the library + handle: The library handle + + Returns: + The absolute path to the library file, or None if no expected symbol is found + + Raises: + OSError: If dladdr fails to get information about the symbol + """ + from cuda.bindings._path_finder.supported_libs import EXPECTED_LIB_SYMBOLS + + for symbol_name in EXPECTED_LIB_SYMBOLS[libname]: + symbol = getattr(handle, symbol_name, None) + if symbol is not None: + break + else: + return None + + addr = ctypes.cast(symbol, ctypes.c_void_p) + info = Dl_info() + if LIBDL.dladdr(addr, ctypes.byref(info)) == 0: + raise OSError(f"dladdr failed for {libname=!r}") + return info.dli_fname.decode() + + +def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]: + """Check if the library is already loaded in the process. + + Args: + libname: The name of the library to check + + Returns: + A LoadedDL object if the library is already loaded, None otherwise + + Example: + >>> loaded = check_if_already_loaded_from_elsewhere("cudart") + >>> if loaded is not None: + ... print(f"Library already loaded from {loaded.abs_path}") + """ + from cuda.bindings._path_finder.supported_libs import SUPPORTED_LINUX_SONAMES + + for soname in SUPPORTED_LINUX_SONAMES.get(libname, ()): + try: + handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) + except OSError: + continue + else: + return LoadedDL(handle._handle, abs_path_for_dynamic_library(libname, handle), True) + return None + + +def load_with_system_search(libname: str, soname: str) -> Optional[LoadedDL]: + """Try to load a library using system search paths. + + Args: + libname: The name of the library to load + soname: The soname to search for + + Returns: + A LoadedDL object if successful, None if the library cannot be loaded + + Raises: + RuntimeError: If the library is loaded but no expected symbol is found + """ + try: + handle = ctypes.CDLL(soname, CDLL_MODE) + abs_path = abs_path_for_dynamic_library(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return LoadedDL(handle._handle, abs_path, False) + except OSError: + return None + + +def load_with_abs_path(libname: str, found_path: str) -> LoadedDL: + """Load a dynamic library from the given path. + + Args: + libname: The name of the library to load + found_path: The absolute path to the library file + + Returns: + A LoadedDL object representing the loaded library + + Raises: + RuntimeError: If the library cannot be loaded + """ + try: + handle = ctypes.CDLL(found_path, CDLL_MODE) + except OSError as e: + raise RuntimeError(f"Failed to dlopen {found_path}: {e}") from e + return LoadedDL(handle._handle, found_path, False) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py new file mode 100644 index 000000000..1f0c9c7e2 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py @@ -0,0 +1,149 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import ctypes +import ctypes.wintypes +from typing import Optional + +import pywintypes +import win32api + +from cuda.bindings._path_finder.load_dl_common import LoadedDL + +# Mirrors WinBase.h (unfortunately not defined already elsewhere) +WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 +WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + + +def add_dll_directory(dll_abs_path: str) -> None: + """Add a DLL directory to the search path and update PATH environment variable. + + Args: + dll_abs_path: Absolute path to the DLL file + + Raises: + AssertionError: If the directory containing the DLL does not exist + """ + import os + + dirpath = os.path.dirname(dll_abs_path) + assert os.path.isdir(dirpath), dll_abs_path + # Add the DLL directory to the search path + os.add_dll_directory(dirpath) + # Update PATH as a fallback for dependent DLL resolution + curr_path = os.environ.get("PATH") + os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) + + +def abs_path_for_dynamic_library(handle: int) -> str: + """Get the absolute path of a loaded dynamic library on Windows. + + Args: + handle: The library handle + + Returns: + The absolute path to the DLL file + + Raises: + OSError: If GetModuleFileNameW fails + RuntimeError: If the required path length is unreasonably long + """ + MAX_ITERATIONS = 10 # Allows for extremely long paths (up to ~266,000 chars) + buf_size = 260 # Start with traditional MAX_PATH + + for _ in range(MAX_ITERATIONS): + buf = ctypes.create_unicode_buffer(buf_size) + n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, buf_size) + + if n_chars == 0: + raise OSError( + "GetModuleFileNameW failed. Long paths may require enabling the " + "Windows 10+ long path registry setting. See: " + "https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" + ) + if n_chars < buf_size - 1: + return buf.value + + buf_size *= 2 # Double the buffer size and try again + + raise RuntimeError( + f"Failed to retrieve the full path after {MAX_ITERATIONS} attempts " + f"(final buffer size: {buf_size} characters). " + "This may indicate:\n" + " 1. An extremely long path requiring Windows long path support, or\n" + " 2. An invalid or corrupt library handle, or\n" + " 3. An unexpected system error.\n" + "See: https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" + ) + + +def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]: + """Check if the library is already loaded in the process. + + Args: + libname: The name of the library to check + + Returns: + A LoadedDL object if the library is already loaded, None otherwise + + Example: + >>> loaded = check_if_already_loaded_from_elsewhere("cudart") + >>> if loaded is not None: + ... print(f"Library already loaded from {loaded.abs_path}") + """ + from cuda.bindings._path_finder.supported_libs import SUPPORTED_WINDOWS_DLLS + + for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): + try: + handle = win32api.GetModuleHandle(dll_name) + except pywintypes.error: + continue + else: + return LoadedDL(handle, abs_path_for_dynamic_library(handle), True) + return None + + +def load_with_system_search(libname: str, _unused: str) -> Optional[LoadedDL]: + """Try to load a DLL using system search paths. + + Args: + libname: The name of the library to load + _unused: Unused parameter (kept for interface consistency) + + Returns: + A LoadedDL object if successful, None if the library cannot be loaded + """ + from cuda.bindings._path_finder.supported_libs import SUPPORTED_WINDOWS_DLLS + + for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): + handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) + if handle: + return LoadedDL(handle, abs_path_for_dynamic_library(handle), False) + + return None + + +def load_with_abs_path(libname: str, found_path: str) -> LoadedDL: + """Load a dynamic library from the given path. + + Args: + libname: The name of the library to load + found_path: The absolute path to the DLL file + + Returns: + A LoadedDL object representing the loaded library + + Raises: + RuntimeError: If the DLL cannot be loaded + """ + from cuda.bindings._path_finder.supported_libs import LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY + + if libname in LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY: + add_dll_directory(found_path) + + flags = WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR + try: + handle = win32api.LoadLibraryEx(found_path, 0, flags) + except pywintypes.error as e: + raise RuntimeError(f"Failed to load DLL at {found_path}: {e}") from e + return LoadedDL(handle, found_path, False) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index c770de67d..015c4cdf8 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -1,193 +1,60 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import ctypes import functools -import os import sys -from dataclasses import dataclass -from typing import Optional, Tuple -if sys.platform == "win32": - import ctypes.wintypes - - import pywintypes - import win32api - - # Mirrors WinBase.h (unfortunately not defined already elsewhere) - _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 - _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 +from cuda.bindings._path_finder.find_nvidia_dynamic_library import _find_nvidia_dynamic_library +from cuda.bindings._path_finder.load_dl_common import LoadedDL, load_dependencies +if sys.platform == "win32": + from cuda.bindings._path_finder.load_dl_windows import ( + check_if_already_loaded_from_elsewhere, + load_with_abs_path, + load_with_system_search, + ) else: - import ctypes.util - - _LINUX_CDLL_MODE = os.RTLD_NOW | os.RTLD_GLOBAL - - _LIBDL_PATH = ctypes.util.find_library("dl") or "libdl.so.2" - _LIBDL = ctypes.CDLL(_LIBDL_PATH) - _LIBDL.dladdr.argtypes = [ctypes.c_void_p, ctypes.c_void_p] - _LIBDL.dladdr.restype = ctypes.c_int - - class Dl_info(ctypes.Structure): - _fields_ = [ - ("dli_fname", ctypes.c_char_p), # path to .so - ("dli_fbase", ctypes.c_void_p), - ("dli_sname", ctypes.c_char_p), - ("dli_saddr", ctypes.c_void_p), - ] - - -from .find_nvidia_dynamic_library import _find_nvidia_dynamic_library -from .supported_libs import ( - DIRECT_DEPENDENCIES, - EXPECTED_LIB_SYMBOLS, - LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY, - SUPPORTED_LINUX_SONAMES, - SUPPORTED_WINDOWS_DLLS, -) - - -def _add_dll_directory(dll_abs_path): - dirpath = os.path.dirname(dll_abs_path) - assert os.path.isdir(dirpath), dll_abs_path - # Add the DLL directory to the search path - os.add_dll_directory(dirpath) - # Update PATH as a fallback for dependent DLL resolution - curr_path = os.environ.get("PATH") - os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) - - -@functools.cache -def _windows_cuDriverGetVersion() -> int: - handle = win32api.LoadLibrary("nvcuda.dll") - - kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) - GetProcAddress = kernel32.GetProcAddress - GetProcAddress.argtypes = [ctypes.wintypes.HMODULE, ctypes.wintypes.LPCSTR] - GetProcAddress.restype = ctypes.c_void_p - cuDriverGetVersion = GetProcAddress(handle, b"cuDriverGetVersion") - assert cuDriverGetVersion - - FUNC_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.POINTER(ctypes.c_int)) - cuDriverGetVersion_fn = FUNC_TYPE(cuDriverGetVersion) - driver_ver = ctypes.c_int() - err = cuDriverGetVersion_fn(ctypes.byref(driver_ver)) - assert err == 0 - return driver_ver.value - - -def _abs_path_for_dynamic_library_windows(handle: int) -> str: - buf = ctypes.create_unicode_buffer(260) - n_chars = ctypes.windll.kernel32.GetModuleFileNameW(ctypes.wintypes.HMODULE(handle), buf, len(buf)) - if n_chars == 0: - raise OSError("GetModuleFileNameW failed") - return buf.value - - -@functools.cache -def _windows_load_with_dll_basename(name: str) -> Tuple[Optional[int], Optional[str]]: - driver_ver = _windows_cuDriverGetVersion() - del driver_ver # Keeping this here because it will probably be needed in the future. - - dll_names = SUPPORTED_WINDOWS_DLLS.get(name) - if dll_names is None: - return None - - for dll_name in dll_names: - handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) - if handle: - return handle, _abs_path_for_dynamic_library_windows(handle) - - return None, None - - -def _abs_path_for_dynamic_library_linux(libname: str, handle: int) -> str: - for symbol_name in EXPECTED_LIB_SYMBOLS[libname]: - symbol = getattr(handle, symbol_name, None) - if symbol is not None: - break - else: - return None - addr = ctypes.cast(symbol, ctypes.c_void_p) - info = Dl_info() - if _LIBDL.dladdr(addr, ctypes.byref(info)) == 0: - raise OSError(f"dladdr failed for {libname=!r}") - return info.dli_fname.decode() - - -def _load_and_report_path_linux(libname: str, soname: str) -> Tuple[int, str]: - handle = ctypes.CDLL(soname, _LINUX_CDLL_MODE) - abs_path = _abs_path_for_dynamic_library_linux(libname, handle) - if abs_path is None: - raise RuntimeError(f"No expected symbol for {libname=!r}") - return handle, abs_path - - -@dataclass -class LoadedDL: - # ATTENTION: To convert `handle` back to `void*` in cython: - # Linux: `cdef void* ptr = ` - # Windows: `cdef void* ptr = ` - handle: int - abs_path: Optional[str] - was_already_loaded_from_elsewhere: bool + from cuda.bindings._path_finder.load_dl_linux import ( + check_if_already_loaded_from_elsewhere, + load_with_abs_path, + load_with_system_search, + ) def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: - # Detect if the library was loaded already in some other way (i.e. not via this function). - if sys.platform == "win32": - for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): - try: - handle = win32api.GetModuleHandle(dll_name) - except pywintypes.error: - pass - else: - return LoadedDL(handle, _abs_path_for_dynamic_library_windows(handle), True) - else: - for soname in SUPPORTED_LINUX_SONAMES.get(libname, ()): - try: - handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD) - except OSError: - pass - else: - return LoadedDL(handle, _abs_path_for_dynamic_library_linux(libname, handle), True) + # Check whether the library is already loaded into the current process by + # some other component. This check uses OS-level mechanisms (e.g., + # dlopen on Linux, GetModuleHandle on Windows). + loaded = check_if_already_loaded_from_elsewhere(libname) + if loaded is not None: + return loaded - for dep in DIRECT_DEPENDENCIES.get(libname, ()): - load_nvidia_dynamic_library(dep) + # Load dependencies first + load_dependencies(libname, load_nvidia_dynamic_library) + # Find the library path found = _find_nvidia_dynamic_library(libname) if found.abs_path is None: - if sys.platform == "win32": - handle, abs_path = _windows_load_with_dll_basename(libname) - if handle: - return LoadedDL(handle, abs_path, False) - else: - try: - handle, abs_path = _load_and_report_path_linux(libname, found.lib_searched_for) - except OSError: - pass - else: - return LoadedDL(handle._handle, abs_path, False) + loaded = load_with_system_search(libname, found.lib_searched_for) + if loaded is not None: + return loaded found.raise_if_abs_path_is_None() - if sys.platform == "win32": - if libname in LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY: - _add_dll_directory(found.abs_path) - flags = _WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | _WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR - try: - handle = win32api.LoadLibraryEx(found.abs_path, 0, flags) - except pywintypes.error as e: - raise RuntimeError(f"Failed to load DLL at {found.abs_path}: {e}") from e - return LoadedDL(handle, found.abs_path, False) - else: - try: - handle = ctypes.CDLL(found.abs_path, _LINUX_CDLL_MODE) - except OSError as e: - raise RuntimeError(f"Failed to dlopen {found.abs_path}: {e}") from e - return LoadedDL(handle._handle, found.abs_path, False) + # Load the library from the found path + return load_with_abs_path(libname, found.abs_path) @functools.cache def load_nvidia_dynamic_library(libname: str) -> LoadedDL: + """Load a NVIDIA dynamic library by name. + + Args: + libname: The name of the library to load (e.g. "cudart", "nvvm", etc.) + + Returns: + A LoadedDL object containing the library handle and path + + Raises: + RuntimeError: If the library cannot be found or loaded + """ return _load_nvidia_dynamic_library_no_cache(libname) diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py index ee62b92b8..6852c7fce 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -1,9 +1,10 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. -# # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE # THIS FILE NEEDS TO BE REVIEWED/UPDATED FOR EACH CTK RELEASE +import sys + SUPPORTED_LIBNAMES = ( # Core CUDA Runtime and Compiler "nvJitLink", @@ -11,7 +12,7 @@ "nvvm", ) -PARTIALLY_SUPPORTED_LIBNAMES = ( +PARTIALLY_SUPPORTED_LIBNAMES_COMMON = ( # Core CUDA Runtime and Compiler "cudart", "nvfatbin", @@ -37,11 +38,38 @@ "npps", "nvblas", # Other + "nvjpeg", +) + +# Note: The `cufile_rdma` information is intentionally retained (commented out) +# despite not being actively used in the current build. It took a nontrivial +# amount of effort to determine the SONAME, dependencies, and expected symbols +# for this special-case library, especially given its RDMA/MLX5 dependencies +# and limited availability. Keeping this as a reference avoids having to +# reconstruct the information from scratch in the future. + +PARTIALLY_SUPPORTED_LIBNAMES_LINUX_ONLY = ( "cufile", # "cufile_rdma", # Requires libmlx5.so - "nvjpeg", ) +PARTIALLY_SUPPORTED_LIBNAMES_LINUX = PARTIALLY_SUPPORTED_LIBNAMES_COMMON + PARTIALLY_SUPPORTED_LIBNAMES_LINUX_ONLY + +PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY = () + +PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS = PARTIALLY_SUPPORTED_LIBNAMES_COMMON + PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY + +PARTIALLY_SUPPORTED_LIBNAMES_ALL = ( + PARTIALLY_SUPPORTED_LIBNAMES_COMMON + + PARTIALLY_SUPPORTED_LIBNAMES_LINUX_ONLY + + PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY +) + +if sys.platform == "win32": + PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS +else: + PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_LINUX + # Based on ldd output for Linux x86_64 nvidia-*-cu12 wheels (12.8.1) DIRECT_DEPENDENCIES = { "cublas": ("cublasLt",), @@ -231,8 +259,6 @@ "cufftw64_10.dll", "cufftw64_11.dll", ), - "cufile": (), - # "cufile_rdma": (), "curand": ("curand64_10.dll",), "cusolver": ( "cusolver64_10.dll", @@ -333,7 +359,10 @@ def is_suppressed_dll_file(path_basename: str) -> bool: # Based on nm output for Linux x86_64 /usr/local/cuda (12.8.1) EXPECTED_LIB_SYMBOLS = { - "nvJitLink": ("nvJitLinkVersion",), + "nvJitLink": ( + "__nvJitLinkCreate_12_0", # 12.0 through 12.8 (at least) + "nvJitLinkVersion", # 12.3 and up + ), "nvrtc": ("nvrtcVersion",), "nvvm": ("nvvmVersion",), "cudart": ("cudaRuntimeGetVersion",), diff --git a/cuda_bindings/cuda/bindings/path_finder.py b/cuda_bindings/cuda/bindings/path_finder.py index 9c08bdc25..28badd025 100644 --- a/cuda_bindings/cuda/bindings/path_finder.py +++ b/cuda_bindings/cuda/bindings/path_finder.py @@ -2,10 +2,12 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library -from cuda.bindings._path_finder.supported_libs import SUPPORTED_LIBNAMES +from cuda.bindings._path_finder.load_nvidia_dynamic_library import ( + load_nvidia_dynamic_library as _load_nvidia_dynamic_library, +) +from cuda.bindings._path_finder.supported_libs import SUPPORTED_LIBNAMES as _SUPPORTED_LIBNAMES __all__ = [ - "load_nvidia_dynamic_library", - "SUPPORTED_LIBNAMES", + "_load_nvidia_dynamic_library", + "_SUPPORTED_LIBNAMES", ] diff --git a/cuda_bindings/tests/test_path_finder.py b/cuda_bindings/tests/test_path_finder_find_load.py similarity index 58% rename from cuda_bindings/tests/test_path_finder.py rename to cuda_bindings/tests/test_path_finder_find_load.py index cb659026f..2a5f887fd 100644 --- a/cuda_bindings/tests/test_path_finder.py +++ b/cuda_bindings/tests/test_path_finder_find_load.py @@ -1,3 +1,6 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + import os import subprocess # nosec B404 import sys @@ -7,35 +10,34 @@ from cuda.bindings import path_finder from cuda.bindings._path_finder import supported_libs -ALL_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES +ALL_LIBNAMES = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_ALL +ALL_LIBNAMES_LINUX = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_LINUX +ALL_LIBNAMES_WINDOWS = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS if os.environ.get("CUDA_BINDINGS_PATH_FINDER_TEST_ALL_LIBNAMES", False): - TEST_LIBNAMES = ALL_LIBNAMES + if sys.platform == "win32": + TEST_FIND_OR_LOAD_LIBNAMES = ALL_LIBNAMES_WINDOWS + else: + TEST_FIND_OR_LOAD_LIBNAMES = ALL_LIBNAMES_LINUX else: - TEST_LIBNAMES = path_finder.SUPPORTED_LIBNAMES + TEST_FIND_OR_LOAD_LIBNAMES = path_finder._SUPPORTED_LIBNAMES def test_all_libnames_linux_sonames_consistency(): - assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_LINUX_SONAMES.keys())) + assert tuple(sorted(ALL_LIBNAMES_LINUX)) == tuple(sorted(supported_libs.SUPPORTED_LINUX_SONAMES.keys())) def test_all_libnames_windows_dlls_consistency(): - assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.SUPPORTED_WINDOWS_DLLS.keys())) + assert tuple(sorted(ALL_LIBNAMES_WINDOWS)) == tuple(sorted(supported_libs.SUPPORTED_WINDOWS_DLLS.keys())) def test_all_libnames_libnames_requiring_os_add_dll_directory_consistency(): - assert not (set(supported_libs.LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY) - set(ALL_LIBNAMES)) + assert not (set(supported_libs.LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY) - set(ALL_LIBNAMES_WINDOWS)) def test_all_libnames_expected_lib_symbols_consistency(): assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) -def _check_nvjitlink_usable(): - from cuda.bindings._internal import nvjitlink as inner_nvjitlink - - return inner_nvjitlink._inspect_function_pointer("__nvJitLinkVersion") != 0 - - def _build_subprocess_failed_for_libname_message(libname, result): return ( f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" @@ -45,14 +47,15 @@ def _build_subprocess_failed_for_libname_message(libname, result): @pytest.mark.parametrize("api", ("find", "load")) -@pytest.mark.parametrize("libname", TEST_LIBNAMES) +@pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES) def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): - if sys.platform == "win32" and not supported_libs.SUPPORTED_WINDOWS_DLLS[libname]: - pytest.skip(f"{libname=!r} not supported on {sys.platform=}") - - if libname == "nvJitLink" and not _check_nvjitlink_usable(): - pytest.skip(f"{libname=!r} not usable") - + # We intentionally run each dynamic library operation in a subprocess + # to ensure isolation of global dynamic linking state (e.g., dlopen handles). + # Without subprocesses, loading/unloading libraries during testing could + # interfere across test cases and lead to nondeterministic or platform-specific failures. + # + # Defining the subprocess code snippets as strings ensures each subprocess + # runs a minimal, independent script tailored to the specific libname and API being tested. if api == "find": code = f"""\ from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library @@ -61,14 +64,14 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): """ else: code = f"""\ -from cuda.bindings.path_finder import load_nvidia_dynamic_library +from cuda.bindings.path_finder import _load_nvidia_dynamic_library from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache -loaded_dl_fresh = load_nvidia_dynamic_library({libname!r}) +loaded_dl_fresh = _load_nvidia_dynamic_library({libname!r}) if loaded_dl_fresh.was_already_loaded_from_elsewhere: raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere") -loaded_dl_from_cache = load_nvidia_dynamic_library({libname!r}) +loaded_dl_from_cache = _load_nvidia_dynamic_library({libname!r}) if loaded_dl_from_cache is not loaded_dl_fresh: raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh") @@ -85,6 +88,7 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", + timeout=30, # Ensure CI testing does not hang for an excessive amount of time. ) if result.returncode == 0: info_summary_append(f"abs_path={result.stdout.rstrip()}") diff --git a/cuda_bindings/tests/test_path_finder_find_sub_dirs.py b/cuda_bindings/tests/test_path_finder_find_sub_dirs.py new file mode 100644 index 000000000..6b2644bff --- /dev/null +++ b/cuda_bindings/tests/test_path_finder_find_sub_dirs.py @@ -0,0 +1,91 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import os + +import pytest + +from cuda.bindings._path_finder.find_sub_dirs import ( + find_sub_dirs, + find_sub_dirs_all_sitepackages, + find_sub_dirs_sys_path, +) + +NONEXISTENT = "NonExistentE12DBF1Fbe948337576B5F1E88f60bb2" + + +@pytest.fixture +def test_tree(tmp_path): + # Build: + # tmp_path/ + # sys1/nvidia/foo/lib + # sys1/nvidia/bar/lib + # sys2/nvidia/baz/nvvm/lib64 + base = tmp_path + (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True) + (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True) + (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True) + + return { + "parent_paths": ( + str(base / "sys1"), + str(base / "sys2"), + str(base / NONEXISTENT), + ), + "base": base, + } + + +def test_exact_match(test_tree): + parent_paths = test_tree["parent_paths"] + base = test_tree["base"] + result = find_sub_dirs(parent_paths, ("nvidia", "foo", "lib")) + expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")] + assert result == expected + + +def test_single_wildcard(test_tree): + parent_paths = test_tree["parent_paths"] + base = test_tree["base"] + result = find_sub_dirs(parent_paths, ("nvidia", "*", "lib")) + expected = [ + str(base / "sys1" / "nvidia" / "bar" / "lib"), + str(base / "sys1" / "nvidia" / "foo" / "lib"), + ] + assert sorted(result) == sorted(expected) + + +def test_double_wildcard(test_tree): + parent_paths = test_tree["parent_paths"] + base = test_tree["base"] + result = find_sub_dirs(parent_paths, ("nvidia", "*", "nvvm", "lib64")) + expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")] + assert result == expected + + +def test_no_match(test_tree): + parent_paths = test_tree["parent_paths"] + result = find_sub_dirs(parent_paths, (NONEXISTENT,)) + assert result == [] + + +def test_empty_parent_paths(): + result = find_sub_dirs((), ("nvidia", "*", "lib")) + assert result == [] + + +def test_empty_sub_dirs(test_tree): + parent_paths = test_tree["parent_paths"] + result = find_sub_dirs(parent_paths, ()) + expected = [p for p in parent_paths if os.path.isdir(p)] + assert sorted(result) == sorted(expected) + + +def test_find_sub_dirs_sys_path_no_math(): + result = find_sub_dirs_sys_path((NONEXISTENT,)) + assert result == [] + + +def test_find_sub_dirs_all_sitepackages_no_match(): + result = find_sub_dirs_all_sitepackages((NONEXISTENT,)) + assert result == [] diff --git a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py b/cuda_bindings/tests/test_sys_path_find_sub_dirs.py deleted file mode 100644 index 3297ce39e..000000000 --- a/cuda_bindings/tests/test_sys_path_find_sub_dirs.py +++ /dev/null @@ -1,72 +0,0 @@ -import os - -import pytest - -from cuda.bindings._path_finder.sys_path_find_sub_dirs import _impl - - -@pytest.fixture -def test_tree(tmp_path): - # Build: - # tmp_path/ - # sys1/nvidia/foo/lib - # sys1/nvidia/bar/lib - # sys2/nvidia/baz/nvvm/lib64 - base = tmp_path - (base / "sys1" / "nvidia" / "foo" / "lib").mkdir(parents=True) - (base / "sys1" / "nvidia" / "bar" / "lib").mkdir(parents=True) - (base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64").mkdir(parents=True) - - return { - "sys_path": ( - str(base / "sys1"), - str(base / "sys2"), - str(base / "nonexistent"), # should be ignored - ), - "base": base, - } - - -def test_exact_match(test_tree): - sys_path = test_tree["sys_path"] - base = test_tree["base"] - result = _impl(sys_path, ("nvidia", "foo", "lib")) - expected = [str(base / "sys1" / "nvidia" / "foo" / "lib")] - assert result == expected - - -def test_single_wildcard(test_tree): - sys_path = test_tree["sys_path"] - base = test_tree["base"] - result = _impl(sys_path, ("nvidia", "*", "lib")) - expected = [ - str(base / "sys1" / "nvidia" / "bar" / "lib"), - str(base / "sys1" / "nvidia" / "foo" / "lib"), - ] - assert sorted(result) == sorted(expected) - - -def test_double_wildcard(test_tree): - sys_path = test_tree["sys_path"] - base = test_tree["base"] - result = _impl(sys_path, ("nvidia", "*", "nvvm", "lib64")) - expected = [str(base / "sys2" / "nvidia" / "baz" / "nvvm" / "lib64")] - assert result == expected - - -def test_no_match(test_tree): - sys_path = test_tree["sys_path"] - result = _impl(sys_path, ("nvidia", "nonexistent", "lib")) - assert result == [] - - -def test_empty_sys_path(): - result = _impl((), ("nvidia", "*", "lib")) - assert result == [] - - -def test_empty_sub_dirs(test_tree): - sys_path = test_tree["sys_path"] - result = _impl(sys_path, ()) - expected = [p for p in sys_path if os.path.isdir(p)] - assert sorted(result) == sorted(expected) diff --git a/toolshed/build_path_finder_dlls.py b/toolshed/build_path_finder_dlls.py index c82dcd866..be2db0d1f 100755 --- a/toolshed/build_path_finder_dlls.py +++ b/toolshed/build_path_finder_dlls.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + # Input for this script: .txt files generated with: # for exe in *.exe; do 7z l $exe > "${exe%.exe}.txt"; done diff --git a/toolshed/build_path_finder_sonames.py b/toolshed/build_path_finder_sonames.py index 20e8ec6c7..17b7dd7b3 100755 --- a/toolshed/build_path_finder_sonames.py +++ b/toolshed/build_path_finder_sonames.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + # Input for this script: # output of toolshed/find_sonames.sh diff --git a/toolshed/find_sonames.sh b/toolshed/find_sonames.sh index 79c2e89d5..b742becf6 100755 --- a/toolshed/find_sonames.sh +++ b/toolshed/find_sonames.sh @@ -1,4 +1,9 @@ #!/bin/bash + +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + find "$@" -type f -name '*.so*' -print0 | while IFS= read -r -d '' f; do type=$(test -L "$f" && echo SYMLINK || echo FILE) soname=$(readelf -d "$f" 2>/dev/null | awk '/SONAME/ {gsub(/[][]/, "", $5); print $5; exit}') diff --git a/toolshed/run_cuda_bindings_path_finder.py b/toolshed/run_cuda_bindings_path_finder.py index 5f47b3990..19f43c288 100644 --- a/toolshed/run_cuda_bindings_path_finder.py +++ b/toolshed/run_cuda_bindings_path_finder.py @@ -1,3 +1,7 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 + import sys import traceback @@ -5,7 +9,7 @@ from cuda.bindings._path_finder import cuda_paths, supported_libs ALL_LIBNAMES = ( - path_finder.SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES + path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES ) @@ -20,7 +24,7 @@ def run(args): for libname in ALL_LIBNAMES: print(f"{libname=}") try: - loaded_dl = path_finder.load_nvidia_dynamic_library(libname) + loaded_dl = path_finder._load_nvidia_dynamic_library(libname) except Exception: print(f"EXCEPTION for {libname=}:") traceback.print_exc(file=sys.stdout) From 8c9a2de7044e6daaf3d1a75a16b03f71a53cf12d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 3 May 2025 20:40:13 -0700 Subject: [PATCH 07/28] WIP (search priority updated in README.md but not in code) --- .../cuda/bindings/_path_finder/README.md | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/README.md b/cuda_bindings/cuda/bindings/_path_finder/README.md index 94b80499f..ae18e86a4 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/README.md +++ b/cuda_bindings/cuda/bindings/_path_finder/README.md @@ -24,17 +24,21 @@ strategy for locating NVIDIA shared libraries: The absolute path of the already loaded library will be returned, along with the handle to the library. -1. **Python Package Ecosystem** - - Scans `sys.path` to find libraries installed via NVIDIA Python wheels. +1. **Environment variables** + - Relies on `CUDA_HOME`/`CUDA_PATH` environment variables if set. -2. **Conda Environments** - - Leverages Conda-specific paths through our fork of `get_cuda_paths()` - from numba-cuda. +2. **NVIDIA Python wheels** + - Scans all site-packages to find libraries installed via NVIDIA Python wheels. -3. **Environment variables** - - Relies on `CUDA_HOME`/`CUDA_PATH` environment variables if set. +3. **OS default mechanisms / Conda environments** + - Falls back to native loader: + - `dlopen()` on Linux + - `LoadLibraryW()` on Windows + - Conda environments are expected to be covered by OS default mechanisms: + - Based on `$ORIGIN/../lib` `RPATH` on Linux + - Based on `%CONDA_PREFIX%\Library\bin` on the system `PATH` on Windows -4. **System Installations** +5. **System Installations** - Checks traditional system locations through these paths: - Linux: `/usr/local/cuda/lib64` - Windows: `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` @@ -44,11 +48,6 @@ strategy for locating NVIDIA shared libraries: - Distribution-specific packages (RPM/DEB) EXCEPT Debian's `nvidia-cuda-toolkit` -5. **OS Default Mechanisms** - - Falls back to native loader: - - `dlopen()` on Linux - - `LoadLibraryW()` on Windows - Note that the search is done on a per-library basis. There is no centralized mechanism that ensures all libraries are found in the same way. From 2cf3fa2e52f02711459d7fb25189635b989e740e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 4 May 2025 15:25:54 -0700 Subject: [PATCH 08/28] Completely replace cuda_paths.py to achieve the desired Search Priority (see updated README.md). --- .../cuda/bindings/_path_finder/README.md | 40 +- .../cuda/bindings/_path_finder/cuda_paths.py | 573 ------------------ .../find_nvidia_dynamic_library.py | 138 +++-- .../cuda/bindings/_path_finder/findlib.py | 97 --- .../bindings/_path_finder/load_dl_common.py | 20 + .../load_nvidia_dynamic_library.py | 32 +- ..._find_load.py => test_path_finder_load.py} | 32 +- toolshed/run_cuda_bindings_path_finder.py | 7 +- 8 files changed, 148 insertions(+), 791 deletions(-) delete mode 100644 cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py delete mode 100644 cuda_bindings/cuda/bindings/_path_finder/findlib.py rename cuda_bindings/tests/{test_path_finder_find_load.py => test_path_finder_load.py} (75%) diff --git a/cuda_bindings/cuda/bindings/_path_finder/README.md b/cuda_bindings/cuda/bindings/_path_finder/README.md index ae18e86a4..5096246b8 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/README.md +++ b/cuda_bindings/cuda/bindings/_path_finder/README.md @@ -25,7 +25,8 @@ strategy for locating NVIDIA shared libraries: with the handle to the library. 1. **Environment variables** - - Relies on `CUDA_HOME`/`CUDA_PATH` environment variables if set. + - Relies on `CUDA_HOME` or `CUDA_PATH` environment variables if set + (in that order). 2. **NVIDIA Python wheels** - Scans all site-packages to find libraries installed via NVIDIA Python wheels. @@ -34,41 +35,16 @@ strategy for locating NVIDIA shared libraries: - Falls back to native loader: - `dlopen()` on Linux - `LoadLibraryW()` on Windows - - Conda environments are expected to be covered by OS default mechanisms: - - Based on `$ORIGIN/../lib` `RPATH` on Linux - - Based on `%CONDA_PREFIX%\Library\bin` on the system `PATH` on Windows - -5. **System Installations** - - Checks traditional system locations through these paths: - - Linux: `/usr/local/cuda/lib64` - - Windows: `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` - (where X.Y is the CTK version) - - **Notably does NOT search**: - - Versioned CUDA directories like `/usr/local/cuda-12.3` - - Distribution-specific packages (RPM/DEB) - EXCEPT Debian's `nvidia-cuda-toolkit` + - CTK installations with system config updates are expected to be discovered: + - Linux: Via `/etc/ld.so.conf.d/*cuda*.conf` + - Windows: Via `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\bin` on system `PATH` + - Conda installations are expected to be discovered: + - Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary) + - Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH` Note that the search is done on a per-library basis. There is no centralized mechanism that ensures all libraries are found in the same way. -## Implementation Philosophy - -The current implementation balances stability and evolution: - -- **Baseline Foundation:** Uses a fork of numba-cuda's `cuda_paths.py` that has been - battle-tested in production environments. - -- **Validation Infrastructure:** Comprehensive CI testing matrix being developed to cover: - - Various Linux/Windows environments - - Python packaging formats (wheels, conda) - - CUDA Toolkit versions - -- **Roadmap:** Planned refactoring to: - - Unify library discovery logic - - Improve maintainability - - Better enforce search priority - - Expand platform support - ## Maintenance Requirements These key components must be updated for new CUDA Toolkit releases: diff --git a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py b/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py deleted file mode 100644 index 80f4e0149..000000000 --- a/cuda_bindings/cuda/bindings/_path_finder/cuda_paths.py +++ /dev/null @@ -1,573 +0,0 @@ -# Copyright 2025 NVIDIA Corporation. All rights reserved. -# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE - -# Forked from: -# https://github.com/NVIDIA/numba-cuda/blob/8c9c9d0cb901c06774a9abea6d12b6a4b0287e5e/numba_cuda/numba/cuda/cuda_paths.py - -# The numba-cuda version in turn was forked from: -# https://github.com/numba/numba/blob/6c8a71ffc3eaa1c68e1bac927b80ee7469002b3f/numba/cuda/cuda_paths.py -# SPDX-License-Identifier: BSD-2-Clause -# -# Original Numba LICENSE: -# Copyright (c) 2012, Anaconda, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import ctypes -import os -import platform -import re -import site -import sys -import traceback -import warnings -from collections import namedtuple -from pathlib import Path - -from cuda.bindings._path_finder.findlib import find_lib - -IS_WIN32 = sys.platform.startswith("win32") - -_env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"]) - - -def _get_numba_CUDA_INCLUDE_PATH(): - # From numba/numba/core/config.py - - def _readenv(name, ctor, default): - value = os.environ.get(name) - if value is None: - return default() if callable(default) else default - try: - return ctor(value) - except Exception: - warnings.warn( # noqa: B028 - f"Environment variable '{name}' is defined but " - f"its associated value '{value}' could not be " - "parsed.\nThe parse failed with exception:\n" - f"{traceback.format_exc()}", - RuntimeWarning, - ) - return default - - if IS_WIN32: - cuda_path = os.environ.get("CUDA_PATH") - if cuda_path: # noqa: SIM108 - default_cuda_include_path = os.path.join(cuda_path, "include") - else: - default_cuda_include_path = "cuda_include_not_found" - else: - default_cuda_include_path = os.path.join(os.sep, "usr", "local", "cuda", "include") - CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, default_cuda_include_path) - return CUDA_INCLUDE_PATH - - -config_CUDA_INCLUDE_PATH = _get_numba_CUDA_INCLUDE_PATH() - -SEARCH_PRIORITY = [ - "Conda environment", - "Conda environment (NVIDIA package)", - "NVIDIA NVCC Wheel", - "CUDA_HOME", - "System", - "Debian package", -] - - -def _priority_index(label): - if label in SEARCH_PRIORITY: - return SEARCH_PRIORITY.index(label) - else: - raise ValueError(f"Can't determine search priority for {label}") - - -def _find_first_valid_lazy(options): - sorted_options = sorted(options, key=lambda x: _priority_index(x[0])) - for label, fn in sorted_options: - value = fn() - if value: - return label, value - return "", None - - -def _build_options(pairs): - """Sorts and returns a list of (label, value) tuples according to SEARCH_PRIORITY.""" - priority_index = {label: i for i, label in enumerate(SEARCH_PRIORITY)} - return sorted(pairs, key=lambda pair: priority_index.get(pair[0], float("inf"))) - - -def _find_valid_path(options): - """Find valid path from *options*, which is a list of 2-tuple of - (name, path). Return first pair where *path* is not None. - If no valid path is found, return ('', None) - """ - for by, data in options: - if data is not None: - return by, data - else: - return "", None - - -def _get_libdevice_path_decision(): - options = _build_options( - [ - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk), - ("CUDA_HOME", lambda: get_cuda_home("nvvm", "libdevice")), - ("NVIDIA NVCC Wheel", get_libdevice_wheel), - ("System", lambda: get_system_ctk("nvvm", "libdevice")), - ("Debian package", get_debian_pkg_libdevice), - ] - ) - return _find_first_valid_lazy(options) - - -def _nvvm_lib_dir(): - if IS_WIN32: - return "nvvm", "bin" - else: - return "nvvm", "lib64" - - -def _get_nvvm_path_decision(): - options = [ - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk), - ("NVIDIA NVCC Wheel", _get_nvvm_wheel), - ("CUDA_HOME", lambda: get_cuda_home(*_nvvm_lib_dir())), - ("System", lambda: get_system_ctk(*_nvvm_lib_dir())), - ] - return _find_first_valid_lazy(options) - - -def _get_nvrtc_system_ctk(): - sys_path = get_system_ctk("bin" if IS_WIN32 else "lib64") - candidates = find_lib("nvrtc", sys_path) - if candidates: - return max(candidates) - - -def _get_nvrtc_path_decision(): - options = _build_options( - [ - ("CUDA_HOME", lambda: get_cuda_home("nvrtc")), - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), - ("NVIDIA NVCC Wheel", _get_nvrtc_wheel), - ("System", _get_nvrtc_system_ctk), - ] - ) - return _find_first_valid_lazy(options) - - -def _get_nvvm_wheel(): - platform_map = { - "linux": ("lib64", "libnvvm.so"), - "win32": ("bin", "nvvm64_40_0.dll"), - } - - for plat, (dso_dir, dso_path) in platform_map.items(): - if sys.platform.startswith(plat): - break - else: - raise NotImplementedError("Unsupported platform") - - site_paths = [site.getusersitepackages()] + site.getsitepackages() - - for sp in filter(None, site_paths): - nvvm_path = Path(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir, dso_path) - if nvvm_path.exists(): - return str(nvvm_path.parent) - - return None - - -def get_nvrtc_dso_path(): - site_paths = [site.getusersitepackages()] + site.getsitepackages() - for sp in site_paths: - lib_dir = os.path.join( - sp, - "nvidia", - "cuda_nvrtc", - ("bin" if IS_WIN32 else "lib") if sp else None, - ) - if lib_dir and os.path.exists(lib_dir): - for major in (12, 11): - if major == 11: - cu_ver = "112" if IS_WIN32 else "11.2" - elif major == 12: - cu_ver = "120" if IS_WIN32 else "12" - else: - raise NotImplementedError(f"CUDA {major} is not supported") - - dso_path = os.path.join( - lib_dir, - f"nvrtc64_{cu_ver}_0.dll" if IS_WIN32 else f"libnvrtc.so.{cu_ver}", - ) - if os.path.isfile(dso_path): - return dso_path - return None - - -def _get_nvrtc_wheel(): - dso_path = get_nvrtc_dso_path() - if dso_path: - try: - result = ctypes.CDLL(dso_path, mode=ctypes.RTLD_GLOBAL) - except OSError: - pass - else: - if IS_WIN32: - import win32api - - # This absolute path will - # always be correct regardless of the package source - nvrtc_path = win32api.GetModuleFileNameW(result._handle) - dso_dir = os.path.dirname(nvrtc_path) - builtins_path = os.path.join( - dso_dir, - [f for f in os.listdir(dso_dir) if re.match("^nvrtc-builtins.*.dll$", f)][0], - ) - if not os.path.exists(builtins_path): - raise RuntimeError(f'Path does not exist: "{builtins_path}"') - return Path(dso_path) - - -def _get_libdevice_paths(): - by, libdir = _get_libdevice_path_decision() - if not libdir: - return _env_path_tuple(by, None) - out = os.path.join(libdir, "libdevice.10.bc") - return _env_path_tuple(by, out) - - -def _cudalib_path(): - if IS_WIN32: - return "bin" - else: - return "lib64" - - -def _cuda_home_static_cudalib_path(): - if IS_WIN32: - return ("lib", "x64") - else: - return ("lib64",) - - -def _get_cudalib_wheel(): - """Get the cudalib path from the NVCC wheel.""" - site_paths = [site.getusersitepackages()] + site.getsitepackages() - libdir = "bin" if IS_WIN32 else "lib" - for sp in filter(None, site_paths): - cudalib_path = Path(sp, "nvidia", "cuda_runtime", libdir) - if cudalib_path.exists(): - return str(cudalib_path) - return None - - -def _get_cudalib_dir_path_decision(): - options = _build_options( - [ - ("Conda environment", get_conda_ctk), - ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk), - ("NVIDIA NVCC Wheel", _get_cudalib_wheel), - ("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())), - ("System", lambda: get_system_ctk(_cudalib_path())), - ] - ) - return _find_first_valid_lazy(options) - - -def _get_static_cudalib_dir_path_decision(): - options = _build_options( - [ - ("Conda environment", get_conda_ctk), - ( - "Conda environment (NVIDIA package)", - get_nvidia_static_cudalib_ctk, - ), - ( - "CUDA_HOME", - lambda: get_cuda_home(*_cuda_home_static_cudalib_path()), - ), - ("System", lambda: get_system_ctk(_cudalib_path())), - ] - ) - return _find_first_valid_lazy(options) - - -def _get_cudalib_dir(): - by, libdir = _get_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - -def _get_static_cudalib_dir(): - by, libdir = _get_static_cudalib_dir_path_decision() - return _env_path_tuple(by, libdir) - - -def get_system_ctk(*subdirs): - """Return path to system-wide cudatoolkit; or, None if it doesn't exist.""" - # Linux? - if not IS_WIN32: - # Is cuda alias to /usr/local/cuda? - # We are intentionally not getting versioned cuda installation. - result = os.path.join("/usr/local/cuda", *subdirs) - if os.path.exists(result): - return result - - -def get_conda_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - # Assume the existence of NVVM to imply cudatoolkit installed - paths = find_lib("nvvm") - if not paths: - return - # Use the directory name of the max path - return os.path.dirname(max(paths)) - - -def get_nvidia_nvvm_ctk(): - """Return path to directory containing the NVVM shared library.""" - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - - # Assume the existence of NVVM in the conda env implies that a CUDA toolkit - # conda package is installed. - - # First, try the location used on Linux and the Windows 11.x packages - libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path()) - if not os.path.exists(libdir) or not os.path.isdir(libdir): - # If that fails, try the location used for Windows 12.x packages - libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path()) - if not os.path.exists(libdir) or not os.path.isdir(libdir): - # If that doesn't exist either, assume we don't have the NVIDIA - # conda package - return - - paths = find_lib("nvvm", libdir=libdir) - if not paths: - return - # Use the directory name of the max path - return os.path.dirname(max(paths)) - - -def get_nvidia_libdevice_ctk(): - """Return path to directory containing the libdevice library.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - nvvm_dir = os.path.dirname(nvvm_ctk) - return os.path.join(nvvm_dir, "libdevice") - - -def get_nvidia_cudalib_ctk(): - """Return path to directory containing the shared libraries of cudatoolkit.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - subdir = "bin" if IS_WIN32 else "lib" - return os.path.join(env_dir, subdir) - - -def get_nvidia_static_cudalib_ctk(): - """Return path to directory containing the static libraries of cudatoolkit.""" - nvvm_ctk = get_nvidia_nvvm_ctk() - if not nvvm_ctk: - return - - if IS_WIN32 and ("Library" not in nvvm_ctk): # noqa: SIM108 - # Location specific to CUDA 11.x packages on Windows - dirs = ("Lib", "x64") - else: - # Linux, or Windows with CUDA 12.x packages - dirs = ("lib",) - - env_dir = os.path.dirname(os.path.dirname(nvvm_ctk)) - return os.path.join(env_dir, *dirs) - - -def get_cuda_home(*subdirs): - """Get paths of CUDA_HOME. - If *subdirs* are the subdirectory name to be appended in the resulting - path. - """ - cuda_home = os.environ.get("CUDA_HOME") - if cuda_home is None: - # Try Windows CUDA installation without Anaconda - cuda_home = os.environ.get("CUDA_PATH") - if cuda_home is not None: - return os.path.join(cuda_home, *subdirs) - - -def _get_nvvm_path(): - by, path = _get_nvvm_path_decision() - - if by == "NVIDIA NVCC Wheel": - platform_map = { - "linux": "libnvvm.so", - "win32": "nvvm64_40_0.dll", - } - - for plat, dso_name in platform_map.items(): - if sys.platform.startswith(plat): - break - else: - raise NotImplementedError("Unsupported platform") - - path = os.path.join(path, dso_name) - else: - candidates = find_lib("nvvm", path) - path = max(candidates) if candidates else None - return _env_path_tuple(by, path) - - -def _get_nvrtc_path(): - by, path = _get_nvrtc_path_decision() - if by == "NVIDIA NVCC Wheel": - path = str(path) - elif by == "System": - return _env_path_tuple(by, path) - else: - candidates = find_lib("nvrtc", path) - path = max(candidates) if candidates else None - return _env_path_tuple(by, path) - - -def get_cuda_paths(): - """Returns a dictionary mapping component names to a 2-tuple - of (source_variable, info). - - The returned dictionary will have the following keys and infos: - - "nvvm": file_path - - "libdevice": List[Tuple[arch, file_path]] - - "cudalib_dir": directory_path - - Note: The result of the function is cached. - """ - # Check cache - if hasattr(get_cuda_paths, "_cached_result"): - return get_cuda_paths._cached_result - else: - # Not in cache - d = { - "nvvm": _get_nvvm_path(), - "nvrtc": _get_nvrtc_path(), - "libdevice": _get_libdevice_paths(), - "cudalib_dir": _get_cudalib_dir(), - "static_cudalib_dir": _get_static_cudalib_dir(), - "include_dir": _get_include_dir(), - } - # Cache result - get_cuda_paths._cached_result = d - return d - - -def get_debian_pkg_libdevice(): - """ - Return the Debian NVIDIA Maintainers-packaged libdevice location, if it - exists. - """ - pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice" - if not os.path.exists(pkg_libdevice_location): - return None - return pkg_libdevice_location - - -def get_libdevice_wheel(): - nvvm_path = _get_nvvm_wheel() - if nvvm_path is None: - return None - nvvm_path = Path(nvvm_path) - libdevice_path = nvvm_path.parent / "libdevice" - - return str(libdevice_path) - - -def get_current_cuda_target_name(): - """Determine conda's CTK target folder based on system and machine arch. - - CTK's conda package delivers headers based on its architecture type. For example, - `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and - `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the - nuances at cudart's conda feedstock: - https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501 - """ - system = platform.system() - machine = platform.machine() - - if system == "Linux": - arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"} - elif system == "Windows": - arch_to_targets = { - "AMD64": "x64", - } - else: - arch_to_targets = {} - - return arch_to_targets.get(machine, None) - - -def get_conda_include_dir(): - """ - Return the include directory in the current conda environment, if one - is active and it exists. - """ - is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta")) - if not is_conda_env: - return - - if platform.system() == "Windows": - include_dir = os.path.join(sys.prefix, "Library", "include") - elif target_name := get_current_cuda_target_name(): - include_dir = os.path.join(sys.prefix, "targets", target_name, "include") - else: - # A fallback when target cannot determined - # though usually it shouldn't. - include_dir = os.path.join(sys.prefix, "include") - - if ( - os.path.exists(include_dir) - and os.path.isdir(include_dir) - and os.path.exists(os.path.join(include_dir, "cuda_device_runtime_api.h")) - ): - return include_dir - return - - -def _get_include_dir(): - """Find the root include directory.""" - options = [ - ("Conda environment (NVIDIA package)", get_conda_include_dir()), - ("CUDA_INCLUDE_PATH Config Entry", config_CUDA_INCLUDE_PATH), - # TODO: add others - ] - by, include_dir = _find_valid_path(options) - return _env_path_tuple(by, include_dir) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index af9f42fbf..20908d971 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -6,7 +6,6 @@ import os import sys -from cuda.bindings._path_finder.cuda_paths import get_cuda_paths from cuda.bindings._path_finder.find_sub_dirs import find_sub_dirs_all_sitepackages from cuda.bindings._path_finder.supported_libs import is_suppressed_dll_file @@ -61,59 +60,77 @@ def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, a return None -def _get_cuda_paths_info(key, error_messages): - env_path_tuple = get_cuda_paths()[key] - if not env_path_tuple: - error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"]') - return None - if not env_path_tuple.info: - error_messages.append(f'Failure obtaining get_cuda_paths()["{key}"].info') - return None - return env_path_tuple.info +def _get_cuda_home(): + cuda_home = os.environ.get("CUDA_HOME") + if cuda_home is None: + cuda_home = os.environ.get("CUDA_PATH") + return cuda_home -def _find_so_using_cudalib_dir(so_basename, error_messages, attachments): - cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) - if cudalib_dir is None: +def _find_lib_dir_using_cuda_home(libname): + cuda_home = _get_cuda_home() + if cuda_home is None: return None - primary_so_dir = cudalib_dir + "/" - candidate_so_dirs = [primary_so_dir] - libs = ["/lib/", "/lib64/"] - for _ in range(2): - alt_dir = libs[0].join(primary_so_dir.rsplit(libs[1], 1)) - if alt_dir not in candidate_so_dirs: - candidate_so_dirs.append(alt_dir) - libs.reverse() - candidate_so_names = [so_dirname + so_basename for so_dirname in candidate_so_dirs] - for so_name in candidate_so_names: - if os.path.isfile(so_name): - return so_name - error_messages.append(f"No such file: {so_name}") - for so_dirname in candidate_so_dirs: - attachments.append(f' listdir("{so_dirname}"):') - if not os.path.isdir(so_dirname): - attachments.append(" DIRECTORY DOES NOT EXIST") + if sys.platform == "win32": + if libname == "nvvm": # noqa: SIM108 + subdirs = (os.path.join("nvvm", "bin"),) + else: + subdirs = ("bin",) + else: + if libname == "nvvm": # noqa: SIM108 + subdirs = (os.path.join("nvvm", "lib64"),) else: - for node in sorted(os.listdir(so_dirname)): - attachments.append(f" {node}") + subdirs = ( + "lib64", # CTK + "lib", # Conda + ) + for subdir in subdirs: + dirname = os.path.join(cuda_home, subdir) + if os.path.isdir(dirname): + return dirname return None -def _find_dll_using_cudalib_dir(libname, error_messages, attachments): - cudalib_dir = _get_cuda_paths_info("cudalib_dir", error_messages) - if cudalib_dir is None: - return None +def _find_so_using_lib_dir(lib_dir, so_basename, error_messages, attachments): + so_name = os.path.join(lib_dir, so_basename) + if os.path.isfile(so_name): + return so_name + error_messages.append(f"No such file: {so_name}") + attachments.append(f' listdir("{lib_dir}"):') + if not os.path.isdir(lib_dir): + attachments.append(" DIRECTORY DOES NOT EXIST") + else: + for node in sorted(os.listdir(lib_dir)): + attachments.append(f" {node}") + return None + + +def _find_dll_using_lib_dir(lib_dir, libname, error_messages, attachments): file_wild = libname + "*.dll" - dll_name = _find_dll_under_dir(cudalib_dir, file_wild) + dll_name = _find_dll_under_dir(lib_dir, file_wild) if dll_name is not None: return dll_name error_messages.append(f"No such file: {file_wild}") - attachments.append(f' listdir("{cudalib_dir}"):') - for node in sorted(os.listdir(cudalib_dir)): + attachments.append(f' listdir("{lib_dir}"):') + for node in sorted(os.listdir(lib_dir)): attachments.append(f" {node}") return None +def _find_nvvm_lib_dir_from_other_abs_path(other_abs_path): + if sys.platform == "win32": + nvvm_subdir = "bin" + else: + nvvm_subdir = "lib64" + while other_abs_path: + if os.path.isdir(other_abs_path): + nvvm_lib_dir = os.path.join(other_abs_path, "nvvm", nvvm_subdir) + if os.path.isdir(nvvm_lib_dir): + return nvvm_lib_dir + other_abs_path = os.path.dirname(other_abs_path) + return None + + class _find_nvidia_dynamic_library: def __init__(self, libname: str): self.libname = libname @@ -121,28 +138,39 @@ def __init__(self, libname: str): self.attachments = [] self.abs_path = None + cuda_home_lib_dir = _find_lib_dir_using_cuda_home(libname) if sys.platform == "win32": self.lib_searched_for = f"{libname}*.dll" - self.abs_path = _find_dll_using_nvidia_bin_dirs( - libname, self.lib_searched_for, self.error_messages, self.attachments - ) + if cuda_home_lib_dir is not None: + self.abs_path = _find_dll_using_lib_dir( + cuda_home_lib_dir, libname, self.error_messages, self.attachments + ) if self.abs_path is None: - if libname == "nvvm": - self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) - else: - self.abs_path = _find_dll_using_cudalib_dir(libname, self.error_messages, self.attachments) + self.abs_path = _find_dll_using_nvidia_bin_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) else: self.lib_searched_for = f"lib{libname}.so" - self.abs_path = _find_so_using_nvidia_lib_dirs( - libname, self.lib_searched_for, self.error_messages, self.attachments - ) + if cuda_home_lib_dir is not None: + self.abs_path = _find_so_using_lib_dir( + cuda_home_lib_dir, self.lib_searched_for, self.error_messages, self.attachments + ) if self.abs_path is None: - if libname == "nvvm": - self.abs_path = _get_cuda_paths_info("nvvm", self.error_messages) - else: - self.abs_path = _find_so_using_cudalib_dir( - self.lib_searched_for, self.error_messages, self.attachments - ) + self.abs_path = _find_so_using_nvidia_lib_dirs( + libname, self.lib_searched_for, self.error_messages, self.attachments + ) + + def retry_with_other_abs_path(self, other_abs_path): + assert self.libname == "nvvm" + nvvm_lib_dir = _find_nvvm_lib_dir_from_other_abs_path(other_abs_path) + if nvvm_lib_dir is None: + return + if sys.platform == "win32": + self.abs_path = _find_dll_using_lib_dir(nvvm_lib_dir, self.libname, self.error_messages, self.attachments) + else: + self.abs_path = _find_so_using_lib_dir( + nvvm_lib_dir, self.lib_searched_for, self.error_messages, self.attachments + ) def raise_if_abs_path_is_None(self): if self.abs_path: diff --git a/cuda_bindings/cuda/bindings/_path_finder/findlib.py b/cuda_bindings/cuda/bindings/_path_finder/findlib.py deleted file mode 100644 index 992a3940e..000000000 --- a/cuda_bindings/cuda/bindings/_path_finder/findlib.py +++ /dev/null @@ -1,97 +0,0 @@ -# SPDX-License-Identifier: BSD-2-Clause -# -# Forked from: -# https://github.com/numba/numba/blob/f0d24824fcd6a454827e3c108882395d00befc04/numba/misc/findlib.py -# -# Original LICENSE: -# Copyright (c) 2012, Anaconda, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import re -import sys - - -def get_lib_dirs(): - """ - Anaconda specific - """ - if sys.platform == "win32": - # on windows, historically `DLLs` has been used for CUDA libraries, - # since approximately CUDA 9.2, `Library\bin` has been used. - dirnames = ["DLLs", os.path.join("Library", "bin")] - else: - dirnames = [ - "lib", - ] - libdirs = [os.path.join(sys.prefix, x) for x in dirnames] - return libdirs - - -DLLNAMEMAP = { - "linux": r"lib%(name)s\.so\.%(ver)s$", - "linux2": r"lib%(name)s\.so\.%(ver)s$", - "linux-static": r"lib%(name)s\.a$", - "darwin": r"lib%(name)s\.%(ver)s\.dylib$", - "win32": r"%(name)s%(ver)s\.dll$", - "win32-static": r"%(name)s\.lib$", - "bsd": r"lib%(name)s\.so\.%(ver)s$", -} - -RE_VER = r"[0-9]*([_\.][0-9]+)*" - - -def find_lib(libname, libdir=None, platform=None, static=False): - platform = platform or sys.platform - platform = "bsd" if "bsd" in platform else platform - if static: - platform = f"{platform}-static" - if platform not in DLLNAMEMAP: - # Return empty list if platform name is undefined. - # Not all platforms define their static library paths. - return [] - pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER} - regex = re.compile(pat) - return find_file(regex, libdir) - - -def find_file(pat, libdir=None): - if libdir is None: - libdirs = get_lib_dirs() - elif isinstance(libdir, str): - libdirs = [ - libdir, - ] - else: - libdirs = list(libdir) - files = [] - for ldir in libdirs: - try: - entries = os.listdir(ldir) - except FileNotFoundError: - continue - candidates = [os.path.join(ldir, ent) for ent in entries if pat.match(ent)] - files.extend([c for c in candidates if os.path.isfile(c)]) - return files diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py index 4592f6c33..717bdc00c 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -1,6 +1,8 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import subprocess # nosec B404 +import sys from dataclasses import dataclass from typing import Callable, Optional @@ -38,3 +40,21 @@ def load_dependencies(libname: str, load_func: Callable[[str], LoadedDL]) -> Non """ for dep in DIRECT_DEPENDENCIES.get(libname, ()): load_func(dep) + + +def load_in_subprocess(python_code, timeout=30): + # This is to avoid loading libraries into the parent process. + return subprocess.run( # nosec B603 + [sys.executable, "-c", python_code], + capture_output=True, + encoding="utf-8", + timeout=timeout, # Ensure this does not hang for an excessive amount of time. + ) + + +def build_subprocess_failed_for_libname_message(libname, result): + return ( + f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" + f"--- stdout-from-subprocess ---\n{result.stdout}\n" + f"--- stderr-from-subprocess ---\n{result.stderr}\n" + ) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 015c4cdf8..8e9df6f12 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -2,10 +2,16 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools +import json import sys from cuda.bindings._path_finder.find_nvidia_dynamic_library import _find_nvidia_dynamic_library -from cuda.bindings._path_finder.load_dl_common import LoadedDL, load_dependencies +from cuda.bindings._path_finder.load_dl_common import ( + LoadedDL, + build_subprocess_failed_for_libname_message, + load_dependencies, + load_in_subprocess, +) if sys.platform == "win32": from cuda.bindings._path_finder.load_dl_windows import ( @@ -21,6 +27,21 @@ ) +def _load_other_in_subprocess(libname, error_messages): + code = f"""\ +from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library +import json +import sys +loaded = load_nvidia_dynamic_library({libname!r}) +sys.stdout.write(json.dumps(loaded.abs_path, ensure_ascii=True)) +""" + result = load_in_subprocess(code) + if result.returncode == 0: + return json.loads(result.stdout) + error_messages.extend(build_subprocess_failed_for_libname_message(libname, result).splitlines()) + return None + + def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: # Check whether the library is already loaded into the current process by # some other component. This check uses OS-level mechanisms (e.g., @@ -38,6 +59,15 @@ def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: loaded = load_with_system_search(libname, found.lib_searched_for) if loaded is not None: return loaded + if libname == "nvvm": + # Use cudart as anchor point (libcudart.so.12 is only ~720K, cudart64_12.dll ~560K). + loaded_cudart = check_if_already_loaded_from_elsewhere("cudart") + if loaded_cudart is not None: + found.retry_with_other_abs_path(loaded_cudart.abs_path) + else: + cudart_abs_path = _load_other_in_subprocess("cudart", found.error_messages) + if cudart_abs_path is not None: + found.retry_with_other_abs_path(cudart_abs_path) found.raise_if_abs_path_is_None() # Load the library from the found path diff --git a/cuda_bindings/tests/test_path_finder_find_load.py b/cuda_bindings/tests/test_path_finder_load.py similarity index 75% rename from cuda_bindings/tests/test_path_finder_find_load.py rename to cuda_bindings/tests/test_path_finder_load.py index 2a5f887fd..f665ca022 100644 --- a/cuda_bindings/tests/test_path_finder_find_load.py +++ b/cuda_bindings/tests/test_path_finder_load.py @@ -2,13 +2,13 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import os -import subprocess # nosec B404 import sys import pytest from cuda.bindings import path_finder from cuda.bindings._path_finder import supported_libs +from cuda.bindings._path_finder.load_dl_common import build_subprocess_failed_for_libname_message, load_in_subprocess ALL_LIBNAMES = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_ALL ALL_LIBNAMES_LINUX = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_LINUX @@ -38,17 +38,8 @@ def test_all_libnames_expected_lib_symbols_consistency(): assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) -def _build_subprocess_failed_for_libname_message(libname, result): - return ( - f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" - f"--- stdout-from-subprocess ---\n{result.stdout}\n" - f"--- stderr-from-subprocess ---\n{result.stderr}\n" - ) - - -@pytest.mark.parametrize("api", ("find", "load")) @pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES) -def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): +def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname): # We intentionally run each dynamic library operation in a subprocess # to ensure isolation of global dynamic linking state (e.g., dlopen handles). # Without subprocesses, loading/unloading libraries during testing could @@ -56,14 +47,7 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): # # Defining the subprocess code snippets as strings ensures each subprocess # runs a minimal, independent script tailored to the specific libname and API being tested. - if api == "find": - code = f"""\ -from cuda.bindings._path_finder.find_nvidia_dynamic_library import find_nvidia_dynamic_library -abs_path = find_nvidia_dynamic_library({libname!r}) -print(f"{{abs_path!r}}") -""" - else: - code = f"""\ + code = f"""\ from cuda.bindings.path_finder import _load_nvidia_dynamic_library from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache @@ -83,14 +67,8 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, api, libname): print(f"{{loaded_dl_fresh.abs_path!r}}") """ - result = subprocess.run( # nosec B603 - [sys.executable, "-c", code], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf-8", - timeout=30, # Ensure CI testing does not hang for an excessive amount of time. - ) + result = load_in_subprocess(code) if result.returncode == 0: info_summary_append(f"abs_path={result.stdout.rstrip()}") else: - raise RuntimeError(_build_subprocess_failed_for_libname_message(libname, result)) + raise RuntimeError(build_subprocess_failed_for_libname_message(libname, result)) diff --git a/toolshed/run_cuda_bindings_path_finder.py b/toolshed/run_cuda_bindings_path_finder.py index 19f43c288..cf173aa41 100644 --- a/toolshed/run_cuda_bindings_path_finder.py +++ b/toolshed/run_cuda_bindings_path_finder.py @@ -6,7 +6,7 @@ import traceback from cuda.bindings import path_finder -from cuda.bindings._path_finder import cuda_paths, supported_libs +from cuda.bindings._path_finder import supported_libs ALL_LIBNAMES = ( path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES @@ -16,11 +16,6 @@ def run(args): assert len(args) == 0 - paths = cuda_paths.get_cuda_paths() - for k, v in paths.items(): - print(f"{k}: {v}", flush=True) - print() - for libname in ALL_LIBNAMES: print(f"{libname=}") try: From 2b740226335b788e699e244007c241f082a46c3b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 4 May 2025 22:01:10 -0700 Subject: [PATCH 09/28] Define `IS_WINDOWS = sys.platform == "win32"` in supported_libs.py --- .../_path_finder/find_nvidia_dynamic_library.py | 14 +++++--------- .../_path_finder/load_nvidia_dynamic_library.py | 4 ++-- .../cuda/bindings/_path_finder/supported_libs.py | 4 +++- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 20908d971..7df8276c6 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -4,10 +4,9 @@ import functools import glob import os -import sys from cuda.bindings._path_finder.find_sub_dirs import find_sub_dirs_all_sitepackages -from cuda.bindings._path_finder.supported_libs import is_suppressed_dll_file +from cuda.bindings._path_finder.supported_libs import IS_WINDOWS, is_suppressed_dll_file def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): @@ -71,7 +70,7 @@ def _find_lib_dir_using_cuda_home(libname): cuda_home = _get_cuda_home() if cuda_home is None: return None - if sys.platform == "win32": + if IS_WINDOWS: if libname == "nvvm": # noqa: SIM108 subdirs = (os.path.join("nvvm", "bin"),) else: @@ -118,10 +117,7 @@ def _find_dll_using_lib_dir(lib_dir, libname, error_messages, attachments): def _find_nvvm_lib_dir_from_other_abs_path(other_abs_path): - if sys.platform == "win32": - nvvm_subdir = "bin" - else: - nvvm_subdir = "lib64" + nvvm_subdir = "bin" if IS_WINDOWS else "lib64" while other_abs_path: if os.path.isdir(other_abs_path): nvvm_lib_dir = os.path.join(other_abs_path, "nvvm", nvvm_subdir) @@ -139,7 +135,7 @@ def __init__(self, libname: str): self.abs_path = None cuda_home_lib_dir = _find_lib_dir_using_cuda_home(libname) - if sys.platform == "win32": + if IS_WINDOWS: self.lib_searched_for = f"{libname}*.dll" if cuda_home_lib_dir is not None: self.abs_path = _find_dll_using_lib_dir( @@ -165,7 +161,7 @@ def retry_with_other_abs_path(self, other_abs_path): nvvm_lib_dir = _find_nvvm_lib_dir_from_other_abs_path(other_abs_path) if nvvm_lib_dir is None: return - if sys.platform == "win32": + if IS_WINDOWS: self.abs_path = _find_dll_using_lib_dir(nvvm_lib_dir, self.libname, self.error_messages, self.attachments) else: self.abs_path = _find_so_using_lib_dir( diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 8e9df6f12..b368f5d9a 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -3,7 +3,6 @@ import functools import json -import sys from cuda.bindings._path_finder.find_nvidia_dynamic_library import _find_nvidia_dynamic_library from cuda.bindings._path_finder.load_dl_common import ( @@ -12,8 +11,9 @@ load_dependencies, load_in_subprocess, ) +from cuda.bindings._path_finder.supported_libs import IS_WINDOWS -if sys.platform == "win32": +if IS_WINDOWS: from cuda.bindings._path_finder.load_dl_windows import ( check_if_already_loaded_from_elsewhere, load_with_abs_path, diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py index 6852c7fce..e5d4c28c1 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -5,6 +5,8 @@ import sys +IS_WINDOWS = sys.platform == "win32" + SUPPORTED_LIBNAMES = ( # Core CUDA Runtime and Compiler "nvJitLink", @@ -65,7 +67,7 @@ + PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS_ONLY ) -if sys.platform == "win32": +if IS_WINDOWS: PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_WINDOWS else: PARTIALLY_SUPPORTED_LIBNAMES = PARTIALLY_SUPPORTED_LIBNAMES_LINUX From 27db0a78f839c9a9ad9e3a4456a84b2c862838c1 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 00:19:30 -0700 Subject: [PATCH 10/28] Use os.path.samefile() to resolve issues with doubled backslashes. --- cuda_bindings/tests/test_path_finder_load.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cuda_bindings/tests/test_path_finder_load.py b/cuda_bindings/tests/test_path_finder_load.py index f665ca022..8fc117df5 100644 --- a/cuda_bindings/tests/test_path_finder_load.py +++ b/cuda_bindings/tests/test_path_finder_load.py @@ -48,6 +48,7 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname): # Defining the subprocess code snippets as strings ensures each subprocess # runs a minimal, independent script tailored to the specific libname and API being tested. code = f"""\ +import os from cuda.bindings.path_finder import _load_nvidia_dynamic_library from cuda.bindings._path_finder.load_nvidia_dynamic_library import _load_nvidia_dynamic_library_no_cache @@ -62,8 +63,8 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname): loaded_dl_no_cache = _load_nvidia_dynamic_library_no_cache({libname!r}) if not loaded_dl_no_cache.was_already_loaded_from_elsewhere: raise RuntimeError("loaded_dl_no_cache.was_already_loaded_from_elsewhere") -if loaded_dl_no_cache.abs_path != loaded_dl_fresh.abs_path: - raise RuntimeError(f"{{loaded_dl_no_cache.abs_path=!r}} != {{loaded_dl_fresh.abs_path=!r}}") +if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path): + raise RuntimeError(f"not os.path.samefile({{loaded_dl_no_cache.abs_path=!r}}, {{loaded_dl_fresh.abs_path=!r}})") print(f"{{loaded_dl_fresh.abs_path!r}}") """ From 1f728c0cb4a5a99788e230edb739cc29a12ded7a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 11:05:52 -0700 Subject: [PATCH 11/28] `load_in_subprocess(): Pass current environment --- cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py index 717bdc00c..268d04295 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -1,6 +1,7 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import os import subprocess # nosec B404 import sys from dataclasses import dataclass @@ -49,6 +50,7 @@ def load_in_subprocess(python_code, timeout=30): capture_output=True, encoding="utf-8", timeout=timeout, # Ensure this does not hang for an excessive amount of time. + env=os.environ, # Pass current environment ) From 0d23bb697ec353a0dd991ad8a3a66e3612fe1da7 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 11:53:06 -0700 Subject: [PATCH 12/28] Add run_python_code_safely.py as generated by perplexity, plus ruff format, bandit nosec --- .../_path_finder/run_python_code_safely.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py diff --git a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py new file mode 100644 index 000000000..6b4dbd191 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py @@ -0,0 +1,64 @@ +import multiprocessing +import subprocess # nosec B404 +import sys +from io import StringIO + + +def run_python_code_safely(python_code, timeout=None): + """Replacement for subprocess.run that forces 'spawn' context""" + ctx = multiprocessing.get_context("spawn") + result_queue = ctx.Queue() + + def worker(): + # Capture stdout/stderr + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = StringIO() + sys.stderr = StringIO() + + returncode = 0 + try: + exec(python_code, {"__name__": "__main__"}) # nosec B102 + except SystemExit as e: # Handle sys.exit() + returncode = e.code if isinstance(e.code, int) else 0 + except Exception: # Capture other exceptions + import traceback + + traceback.print_exc() + returncode = 1 + finally: + # Collect outputs and restore streams + stdout = sys.stdout.getvalue() + stderr = sys.stderr.getvalue() + sys.stdout = old_stdout + sys.stderr = old_stderr + result_queue.put((returncode, stdout, stderr)) + + process = ctx.Process(target=worker) + process.start() + + try: + # Wait with timeout support + process.join(timeout) + if process.is_alive(): + process.terminate() + process.join() + raise subprocess.TimeoutExpired([sys.executable, "-c", python_code], timeout) + + # Get results from queue + if result_queue.empty(): + return subprocess.CompletedProcess( + [sys.executable, "-c", python_code], + returncode=-999, + stdout="", + stderr="Process failed to return results", + ) + + returncode, stdout, stderr = result_queue.get() + return subprocess.CompletedProcess( + [sys.executable, "-c", python_code], returncode=returncode, stdout=stdout, stderr=stderr + ) + finally: + # Cleanup if needed + if process.is_alive(): + process.kill() From b1a5e9d8adc20ffabe5d35d7f3d562a7b1f50b71 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 11:57:07 -0700 Subject: [PATCH 13/28] Replace subprocess.run with run_python_code_safely --- .../cuda/bindings/_path_finder/load_dl_common.py | 12 ++---------- .../bindings/_path_finder/run_python_code_safely.py | 2 +- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py index 268d04295..c5595f1de 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -1,12 +1,10 @@ # Copyright 2025 NVIDIA Corporation. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import os -import subprocess # nosec B404 -import sys from dataclasses import dataclass from typing import Callable, Optional +from cuda.bindings._path_finder.run_python_code_safely import run_python_code_safely from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES @@ -45,13 +43,7 @@ def load_dependencies(libname: str, load_func: Callable[[str], LoadedDL]) -> Non def load_in_subprocess(python_code, timeout=30): # This is to avoid loading libraries into the parent process. - return subprocess.run( # nosec B603 - [sys.executable, "-c", python_code], - capture_output=True, - encoding="utf-8", - timeout=timeout, # Ensure this does not hang for an excessive amount of time. - env=os.environ, # Pass current environment - ) + return run_python_code_safely(python_code, timeout=timeout) def build_subprocess_failed_for_libname_message(libname, result): diff --git a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py index 6b4dbd191..a1a93a34f 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py +++ b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py @@ -4,7 +4,7 @@ from io import StringIO -def run_python_code_safely(python_code, timeout=None): +def run_python_code_safely(python_code, *, timeout=None): """Replacement for subprocess.run that forces 'spawn' context""" ctx = multiprocessing.get_context("spawn") result_queue = ctx.Queue() From 8e9c7b14e4352517c90dfcf013255f73dfcb0dba Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 12:31:59 -0700 Subject: [PATCH 14/28] Factor out `class Worker` to fix pickle issue. --- .../_path_finder/run_python_code_safely.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py index a1a93a34f..9b8488598 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py +++ b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py @@ -4,12 +4,12 @@ from io import StringIO -def run_python_code_safely(python_code, *, timeout=None): - """Replacement for subprocess.run that forces 'spawn' context""" - ctx = multiprocessing.get_context("spawn") - result_queue = ctx.Queue() +class Worker: + def __init__(self, python_code, result_queue): + self.python_code = python_code + self.result_queue = result_queue - def worker(): + def __call__(self): # Capture stdout/stderr old_stdout = sys.stdout old_stderr = sys.stderr @@ -18,7 +18,7 @@ def worker(): returncode = 0 try: - exec(python_code, {"__name__": "__main__"}) # nosec B102 + exec(self.python_code, {"__name__": "__main__"}) # nosec B102 except SystemExit as e: # Handle sys.exit() returncode = e.code if isinstance(e.code, int) else 0 except Exception: # Capture other exceptions @@ -32,9 +32,14 @@ def worker(): stderr = sys.stderr.getvalue() sys.stdout = old_stdout sys.stderr = old_stderr - result_queue.put((returncode, stdout, stderr)) + self.result_queue.put((returncode, stdout, stderr)) + - process = ctx.Process(target=worker) +def run_python_code_safely(python_code, *, timeout=None): + """Replacement for subprocess.run that forces 'spawn' context""" + ctx = multiprocessing.get_context("spawn") + result_queue = ctx.Queue() + process = ctx.Process(target=Worker(python_code, result_queue)) process.start() try: From 5977b9d59493edcdbea080247a6ae8e7158f8d54 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 12:42:57 -0700 Subject: [PATCH 15/28] ChatGPT revisions based on Deep research: https://chatgpt.com/share/681914ce-f274-8008-9e9f-4538716b4ed7 --- .../_path_finder/run_python_code_safely.py | 43 +++++++++++++------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py index 9b8488598..e311e14bb 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py +++ b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py @@ -1,6 +1,7 @@ import multiprocessing import subprocess # nosec B404 import sys +import traceback from io import StringIO @@ -16,14 +17,12 @@ def __call__(self): sys.stdout = StringIO() sys.stderr = StringIO() - returncode = 0 try: exec(self.python_code, {"__name__": "__main__"}) # nosec B102 + returncode = 0 except SystemExit as e: # Handle sys.exit() returncode = e.code if isinstance(e.code, int) else 0 - except Exception: # Capture other exceptions - import traceback - + except BaseException: traceback.print_exc() returncode = 1 finally: @@ -32,38 +31,54 @@ def __call__(self): stderr = sys.stderr.getvalue() sys.stdout = old_stdout sys.stderr = old_stderr - self.result_queue.put((returncode, stdout, stderr)) + try: # noqa: SIM105 + self.result_queue.put((returncode, stdout, stderr)) + except Exception: # nosec B110 + # If the queue is broken (e.g., parent gone), best effort logging + pass def run_python_code_safely(python_code, *, timeout=None): - """Replacement for subprocess.run that forces 'spawn' context""" + """Run Python code in a spawned subprocess, capturing stdout/stderr/output.""" ctx = multiprocessing.get_context("spawn") - result_queue = ctx.Queue() + result_queue = ctx.SimpleQueue() process = ctx.Process(target=Worker(python_code, result_queue)) process.start() try: - # Wait with timeout support process.join(timeout) if process.is_alive(): process.terminate() process.join() - raise subprocess.TimeoutExpired([sys.executable, "-c", python_code], timeout) + return subprocess.CompletedProcess( + args=[sys.executable, "-c", python_code], + returncode=-9, + stdout="", + stderr=f"Process timed out after {timeout} seconds and was terminated.", + ) - # Get results from queue if result_queue.empty(): return subprocess.CompletedProcess( - [sys.executable, "-c", python_code], + args=[sys.executable, "-c", python_code], returncode=-999, stdout="", - stderr="Process failed to return results", + stderr="Process exited without returning results.", ) returncode, stdout, stderr = result_queue.get() return subprocess.CompletedProcess( - [sys.executable, "-c", python_code], returncode=returncode, stdout=stdout, stderr=stderr + args=[sys.executable, "-c", python_code], + returncode=returncode, + stdout=stdout, + stderr=stderr, ) + finally: - # Cleanup if needed + try: + result_queue.close() + result_queue.join_thread() + except Exception: # nosec B110 + pass if process.is_alive(): process.kill() + process.join() From 9b474bc80b079f5977712970d63439310a9f3319 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 13:42:39 -0700 Subject: [PATCH 16/28] Fix race condition in result queue handling by using timeout-based get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation checked result_queue.empty() before calling get(), which introduces a classic race condition: the queue may become non-empty immediately after the check, resulting in missed results or misleading errors. This patch replaces the empty() check with result_queue.get(timeout=1.0), allowing the parent process to robustly wait for results with a bounded delay. Also switches from ctx.SimpleQueue() to ctx.Queue() for compatibility with timeout-based get(), which SimpleQueue does not support on Python ≤3.12. Note: The race condition was discovered by Gemini 2.5 --- .../bindings/_path_finder/run_python_code_safely.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py index e311e14bb..316cb3885 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py +++ b/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py @@ -1,4 +1,5 @@ import multiprocessing +import queue # for Empty import subprocess # nosec B404 import sys import traceback @@ -41,7 +42,7 @@ def __call__(self): def run_python_code_safely(python_code, *, timeout=None): """Run Python code in a spawned subprocess, capturing stdout/stderr/output.""" ctx = multiprocessing.get_context("spawn") - result_queue = ctx.SimpleQueue() + result_queue = ctx.Queue() process = ctx.Process(target=Worker(python_code, result_queue)) process.start() @@ -57,15 +58,16 @@ def run_python_code_safely(python_code, *, timeout=None): stderr=f"Process timed out after {timeout} seconds and was terminated.", ) - if result_queue.empty(): + try: + returncode, stdout, stderr = result_queue.get(timeout=1.0) + except (queue.Empty, EOFError): return subprocess.CompletedProcess( args=[sys.executable, "-c", python_code], returncode=-999, stdout="", - stderr="Process exited without returning results.", + stderr="Process exited or crashed before returning results.", ) - returncode, stdout, stderr = result_queue.get() return subprocess.CompletedProcess( args=[sys.executable, "-c", python_code], returncode=returncode, From ab00a872181d0a84cd20bb0459e2fe20b3dbcc37 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 14:41:05 -0700 Subject: [PATCH 17/28] Resolve SIM108 --- .../find_nvidia_dynamic_library.py | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 7df8276c6..d266559a6 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -18,10 +18,7 @@ def _no_such_file_in_sub_dirs(sub_dirs, file_wild, error_messages, attachments): def _find_so_using_nvidia_lib_dirs(libname, so_basename, error_messages, attachments): - if libname == "nvvm": # noqa: SIM108 - nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") - else: - nvidia_sub_dirs = ("nvidia", "*", "lib") + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "lib64") if libname == "nvvm" else ("nvidia", "*", "lib") file_wild = so_basename + "*" for lib_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): # First look for an exact match @@ -47,10 +44,7 @@ def _find_dll_under_dir(dirpath, file_wild): def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, attachments): - if libname == "nvvm": # noqa: SIM108 - nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") - else: - nvidia_sub_dirs = ("nvidia", "*", "bin") + nvidia_sub_dirs = ("nvidia", "*", "nvvm", "bin") if libname == "nvvm" else ("nvidia", "*", "bin") for bin_dir in find_sub_dirs_all_sitepackages(nvidia_sub_dirs): dll_name = _find_dll_under_dir(bin_dir, lib_searched_for) if dll_name is not None: @@ -71,18 +65,16 @@ def _find_lib_dir_using_cuda_home(libname): if cuda_home is None: return None if IS_WINDOWS: - if libname == "nvvm": # noqa: SIM108 - subdirs = (os.path.join("nvvm", "bin"),) - else: - subdirs = ("bin",) + subdirs = (os.path.join("nvvm", "bin"),) if libname == "nvvm" else ("bin",) else: - if libname == "nvvm": # noqa: SIM108 - subdirs = (os.path.join("nvvm", "lib64"),) - else: - subdirs = ( + subdirs = ( + (os.path.join("nvvm", "lib64"),) + if libname == "nvvm" + else ( "lib64", # CTK "lib", # Conda ) + ) for subdir in subdirs: dirname = os.path.join(cuda_home, subdir) if os.path.isdir(dirname): From 2a039d2a7ec44e1d77b68b7fcc610897fcb4bd4f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 15:21:56 -0700 Subject: [PATCH 18/28] Change to "nppc" as ANCHOR_LIBNAME --- .../find_nvidia_dynamic_library.py | 14 +++++++------- .../load_nvidia_dynamic_library.py | 18 ++++++++++-------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index d266559a6..12bed47fb 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -108,14 +108,14 @@ def _find_dll_using_lib_dir(lib_dir, libname, error_messages, attachments): return None -def _find_nvvm_lib_dir_from_other_abs_path(other_abs_path): +def _find_nvvm_lib_dir_from_anchor_abs_path(anchor_abs_path): nvvm_subdir = "bin" if IS_WINDOWS else "lib64" - while other_abs_path: - if os.path.isdir(other_abs_path): - nvvm_lib_dir = os.path.join(other_abs_path, "nvvm", nvvm_subdir) + while anchor_abs_path: + if os.path.isdir(anchor_abs_path): + nvvm_lib_dir = os.path.join(anchor_abs_path, "nvvm", nvvm_subdir) if os.path.isdir(nvvm_lib_dir): return nvvm_lib_dir - other_abs_path = os.path.dirname(other_abs_path) + anchor_abs_path = os.path.dirname(anchor_abs_path) return None @@ -148,9 +148,9 @@ def __init__(self, libname: str): libname, self.lib_searched_for, self.error_messages, self.attachments ) - def retry_with_other_abs_path(self, other_abs_path): + def retry_with_anchor_abs_path(self, anchor_abs_path): assert self.libname == "nvvm" - nvvm_lib_dir = _find_nvvm_lib_dir_from_other_abs_path(other_abs_path) + nvvm_lib_dir = _find_nvvm_lib_dir_from_anchor_abs_path(anchor_abs_path) if nvvm_lib_dir is None: return if IS_WINDOWS: diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index b368f5d9a..e9195e7b5 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -26,8 +26,11 @@ load_with_system_search, ) +# "nvvm" is found from this anchor +ANCHOR_LIBNAME = "nppc" # libnppc.so.12 1.6M, nppc64_12.dll 288K -def _load_other_in_subprocess(libname, error_messages): + +def _load_anchor_in_subprocess(libname, error_messages): code = f"""\ from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library import json @@ -60,14 +63,13 @@ def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: if loaded is not None: return loaded if libname == "nvvm": - # Use cudart as anchor point (libcudart.so.12 is only ~720K, cudart64_12.dll ~560K). - loaded_cudart = check_if_already_loaded_from_elsewhere("cudart") - if loaded_cudart is not None: - found.retry_with_other_abs_path(loaded_cudart.abs_path) + loaded_anchor = check_if_already_loaded_from_elsewhere(ANCHOR_LIBNAME) + if loaded_anchor is not None: + found.retry_with_anchor_abs_path(loaded_anchor.abs_path) else: - cudart_abs_path = _load_other_in_subprocess("cudart", found.error_messages) - if cudart_abs_path is not None: - found.retry_with_other_abs_path(cudart_abs_path) + anchor_abs_path = _load_anchor_in_subprocess(ANCHOR_LIBNAME, found.error_messages) + if anchor_abs_path is not None: + found.retry_with_anchor_abs_path(anchor_abs_path) found.raise_if_abs_path_is_None() # Load the library from the found path From f978e67ece5503b814f0b4c5984fc87310343a6f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 20:25:52 -0700 Subject: [PATCH 19/28] Implement CUDA_PYTHON_CUDA_HOME_PRIORITY first, last, with default first --- .../find_nvidia_dynamic_library.py | 28 ++++++++++++++++--- .../load_nvidia_dynamic_library.py | 4 ++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 12bed47fb..8f7bd9e0c 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -53,15 +53,23 @@ def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, a return None -def _get_cuda_home(): +def _get_cuda_home(priority): + supported_priorities = ("first", "last") + assert priority in supported_priorities + env_priority = os.environ.get("CUDA_PYTHON_CUDA_HOME_PRIORITY") + if env_priority: + if env_priority not in supported_priorities: + raise RuntimeError(f"Invalid CUDA_PYTHON_CUDA_HOME_PRIORITY {env_priority!r} ({supported_priorities=})") + if priority != env_priority: + return None cuda_home = os.environ.get("CUDA_HOME") if cuda_home is None: cuda_home = os.environ.get("CUDA_PATH") return cuda_home -def _find_lib_dir_using_cuda_home(libname): - cuda_home = _get_cuda_home() +def _find_lib_dir_using_cuda_home(libname, priority): + cuda_home = _get_cuda_home(priority) if cuda_home is None: return None if IS_WINDOWS: @@ -126,7 +134,7 @@ def __init__(self, libname: str): self.attachments = [] self.abs_path = None - cuda_home_lib_dir = _find_lib_dir_using_cuda_home(libname) + cuda_home_lib_dir = _find_lib_dir_using_cuda_home(libname, "first") if IS_WINDOWS: self.lib_searched_for = f"{libname}*.dll" if cuda_home_lib_dir is not None: @@ -148,6 +156,18 @@ def __init__(self, libname: str): libname, self.lib_searched_for, self.error_messages, self.attachments ) + def retry_with_cuda_home_priority_last(self): + cuda_home_lib_dir = _find_lib_dir_using_cuda_home(self.libname, "last") + if cuda_home_lib_dir is not None: + if IS_WINDOWS: + self.abs_path = _find_dll_using_lib_dir( + cuda_home_lib_dir, self.libname, self.error_messages, self.attachments + ) + else: + self.abs_path = _find_so_using_lib_dir( + cuda_home_lib_dir, self.lib_searched_for, self.error_messages, self.attachments + ) + def retry_with_anchor_abs_path(self, anchor_abs_path): assert self.libname == "nvvm" nvvm_lib_dir = _find_nvvm_lib_dir_from_anchor_abs_path(anchor_abs_path) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index e9195e7b5..b9514e068 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -62,7 +62,7 @@ def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: loaded = load_with_system_search(libname, found.lib_searched_for) if loaded is not None: return loaded - if libname == "nvvm": + if libname == "nvvm" and ANCHOR_LIBNAME is not None: loaded_anchor = check_if_already_loaded_from_elsewhere(ANCHOR_LIBNAME) if loaded_anchor is not None: found.retry_with_anchor_abs_path(loaded_anchor.abs_path) @@ -70,6 +70,8 @@ def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: anchor_abs_path = _load_anchor_in_subprocess(ANCHOR_LIBNAME, found.error_messages) if anchor_abs_path is not None: found.retry_with_anchor_abs_path(anchor_abs_path) + if found.abs_path is None: + found.retry_with_cuda_home_priority_last() found.raise_if_abs_path_is_None() # Load the library from the found path From 782fcf6f0ba07ad48f1649907f8e28de7c18c7d6 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 20:30:46 -0700 Subject: [PATCH 20/28] Remove retry_with_anchor_abs_path() and make retry_with_cuda_home_priority_last() the default. --- .../find_nvidia_dynamic_library.py | 25 +++---------------- .../load_nvidia_dynamic_library.py | 14 +---------- 2 files changed, 5 insertions(+), 34 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index 8f7bd9e0c..f58c31420 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -53,23 +53,15 @@ def _find_dll_using_nvidia_bin_dirs(libname, lib_searched_for, error_messages, a return None -def _get_cuda_home(priority): - supported_priorities = ("first", "last") - assert priority in supported_priorities - env_priority = os.environ.get("CUDA_PYTHON_CUDA_HOME_PRIORITY") - if env_priority: - if env_priority not in supported_priorities: - raise RuntimeError(f"Invalid CUDA_PYTHON_CUDA_HOME_PRIORITY {env_priority!r} ({supported_priorities=})") - if priority != env_priority: - return None +def _get_cuda_home(): cuda_home = os.environ.get("CUDA_HOME") if cuda_home is None: cuda_home = os.environ.get("CUDA_PATH") return cuda_home -def _find_lib_dir_using_cuda_home(libname, priority): - cuda_home = _get_cuda_home(priority) +def _find_lib_dir_using_cuda_home(libname): + cuda_home = _get_cuda_home() if cuda_home is None: return None if IS_WINDOWS: @@ -134,30 +126,21 @@ def __init__(self, libname: str): self.attachments = [] self.abs_path = None - cuda_home_lib_dir = _find_lib_dir_using_cuda_home(libname, "first") if IS_WINDOWS: self.lib_searched_for = f"{libname}*.dll" - if cuda_home_lib_dir is not None: - self.abs_path = _find_dll_using_lib_dir( - cuda_home_lib_dir, libname, self.error_messages, self.attachments - ) if self.abs_path is None: self.abs_path = _find_dll_using_nvidia_bin_dirs( libname, self.lib_searched_for, self.error_messages, self.attachments ) else: self.lib_searched_for = f"lib{libname}.so" - if cuda_home_lib_dir is not None: - self.abs_path = _find_so_using_lib_dir( - cuda_home_lib_dir, self.lib_searched_for, self.error_messages, self.attachments - ) if self.abs_path is None: self.abs_path = _find_so_using_nvidia_lib_dirs( libname, self.lib_searched_for, self.error_messages, self.attachments ) def retry_with_cuda_home_priority_last(self): - cuda_home_lib_dir = _find_lib_dir_using_cuda_home(self.libname, "last") + cuda_home_lib_dir = _find_lib_dir_using_cuda_home(self.libname) if cuda_home_lib_dir is not None: if IS_WINDOWS: self.abs_path = _find_dll_using_lib_dir( diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index b9514e068..358785852 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -26,9 +26,6 @@ load_with_system_search, ) -# "nvvm" is found from this anchor -ANCHOR_LIBNAME = "nppc" # libnppc.so.12 1.6M, nppc64_12.dll 288K - def _load_anchor_in_subprocess(libname, error_messages): code = f"""\ @@ -62,16 +59,7 @@ def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: loaded = load_with_system_search(libname, found.lib_searched_for) if loaded is not None: return loaded - if libname == "nvvm" and ANCHOR_LIBNAME is not None: - loaded_anchor = check_if_already_loaded_from_elsewhere(ANCHOR_LIBNAME) - if loaded_anchor is not None: - found.retry_with_anchor_abs_path(loaded_anchor.abs_path) - else: - anchor_abs_path = _load_anchor_in_subprocess(ANCHOR_LIBNAME, found.error_messages) - if anchor_abs_path is not None: - found.retry_with_anchor_abs_path(anchor_abs_path) - if found.abs_path is None: - found.retry_with_cuda_home_priority_last() + found.retry_with_cuda_home_priority_last() found.raise_if_abs_path_is_None() # Load the library from the found path From 676ecb2d8fe88b78c6002ed4e4f5d8888b97f2ec Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 20:40:58 -0700 Subject: [PATCH 21/28] Update README.md to reflect new search priority --- cuda_bindings/cuda/bindings/_path_finder/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/README.md b/cuda_bindings/cuda/bindings/_path_finder/README.md index 5096246b8..fa51b56fa 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/README.md +++ b/cuda_bindings/cuda/bindings/_path_finder/README.md @@ -24,14 +24,10 @@ strategy for locating NVIDIA shared libraries: The absolute path of the already loaded library will be returned, along with the handle to the library. -1. **Environment variables** - - Relies on `CUDA_HOME` or `CUDA_PATH` environment variables if set - (in that order). - -2. **NVIDIA Python wheels** +1. **NVIDIA Python wheels** - Scans all site-packages to find libraries installed via NVIDIA Python wheels. -3. **OS default mechanisms / Conda environments** +2. **OS default mechanisms / Conda environments** - Falls back to native loader: - `dlopen()` on Linux - `LoadLibraryW()` on Windows @@ -42,6 +38,10 @@ strategy for locating NVIDIA shared libraries: - Linux: Via `$ORIGIN/../lib` on `RPATH` (of the `python` binary) - Windows: Via `%CONDA_PREFIX%\Library\bin` on system `PATH` +3. **Environment variables** + - Relies on `CUDA_HOME` or `CUDA_PATH` environment variables if set + (in that order). + Note that the search is done on a per-library basis. There is no centralized mechanism that ensures all libraries are found in the same way. From 73498c06e2176d3cfac0e198fe80e2d85f7ef2a2 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 21:24:27 -0700 Subject: [PATCH 22/28] SUPPORTED_LINUX_SONAMES does not need updates for CTK 12.9.0 --- cuda_bindings/cuda/bindings/_path_finder/supported_libs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py index e5d4c28c1..06023b21b 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -111,6 +111,7 @@ # cuda_12.5.1_555.42.06_linux.run # cuda_12.6.2_560.35.03_linux.run # cuda_12.8.0_570.86.10_linux.run +# cuda_12.9.0_575.51.03_linux.run # Generated with toolshed/build_path_finder_sonames.py SUPPORTED_LINUX_SONAMES = { "cublas": ( From 7661c13b4feff9157a08de4b7b04d3bc8748950b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 21:30:31 -0700 Subject: [PATCH 23/28] The only addition to SUPPORTED_WINDOWS_DLLS for CTK 12.9.0 is nvvm70.dll --- cuda_bindings/cuda/bindings/_path_finder/supported_libs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py index 06023b21b..14dc98a96 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py +++ b/cuda_bindings/cuda/bindings/_path_finder/supported_libs.py @@ -233,6 +233,7 @@ # cuda_12.5.1_555.85_windows.exe # cuda_12.6.2_560.94_windows.exe # cuda_12.8.1_572.61_windows.exe +# cuda_12.9.0_576.02_windows.txt # Generated with toolshed/build_path_finder_dlls.py (WITH MANUAL EDITS) SUPPORTED_WINDOWS_DLLS = { "cublas": ( @@ -340,6 +341,7 @@ "nvvm64.dll", "nvvm64_33_0.dll", "nvvm64_40_0.dll", + "nvvm70.dll", ), } From ddea021832bb36249abbacf8b0f6a070c6cd7bb1 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 May 2025 21:39:11 -0700 Subject: [PATCH 24/28] Make OSError in load_dl_windows.py abs_path_for_dynamic_library() more informative. --- .../cuda/bindings/_path_finder/load_dl_windows.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py index 1f0c9c7e2..7004500dc 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py @@ -35,7 +35,7 @@ def add_dll_directory(dll_abs_path: str) -> None: os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) -def abs_path_for_dynamic_library(handle: int) -> str: +def abs_path_for_dynamic_library(libname: str, handle: int) -> str: """Get the absolute path of a loaded dynamic library on Windows. Args: @@ -57,7 +57,8 @@ def abs_path_for_dynamic_library(handle: int) -> str: if n_chars == 0: raise OSError( - "GetModuleFileNameW failed. Long paths may require enabling the " + f"GetModuleFileNameW failed ({libname=!r}, {buf_size=}). " + "Long paths may require enabling the " "Windows 10+ long path registry setting. See: " "https://docs.python.org/3/using/windows.html#removing-the-max-path-limitation" ) @@ -99,7 +100,7 @@ def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]: except pywintypes.error: continue else: - return LoadedDL(handle, abs_path_for_dynamic_library(handle), True) + return LoadedDL(handle, abs_path_for_dynamic_library(libname, handle), True) return None @@ -118,7 +119,7 @@ def load_with_system_search(libname: str, _unused: str) -> Optional[LoadedDL]: for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) if handle: - return LoadedDL(handle, abs_path_for_dynamic_library(handle), False) + return LoadedDL(handle, abs_path_for_dynamic_library(libname, handle), False) return None From 55583d93a8543637cdc04fe238c3f1ceae946c23 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 6 May 2025 01:40:42 -0700 Subject: [PATCH 25/28] run_cuda_bindings_path_finder.py: optionally use args as libnames (to aid debugging) --- toolshed/run_cuda_bindings_path_finder.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/toolshed/run_cuda_bindings_path_finder.py b/toolshed/run_cuda_bindings_path_finder.py index cf173aa41..ca2193a81 100644 --- a/toolshed/run_cuda_bindings_path_finder.py +++ b/toolshed/run_cuda_bindings_path_finder.py @@ -14,9 +14,12 @@ def run(args): - assert len(args) == 0 + if args: + libnames = args + else: + libnames = ALL_LIBNAMES - for libname in ALL_LIBNAMES: + for libname in libnames: print(f"{libname=}") try: loaded_dl = path_finder._load_nvidia_dynamic_library(libname) From a576327216b91ead833a64fcdf278fc6c5cccd6b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 6 May 2025 02:06:18 -0700 Subject: [PATCH 26/28] Bug fix in load_dl_windows.py: ctypes.windll.kernel32.LoadLibraryW() returns an incompatible `handle`. Use win32api.LoadLibraryEx() instead to ensure self-consistency. --- .../bindings/_path_finder/load_dl_common.py | 22 ++++++++----------- .../bindings/_path_finder/load_dl_windows.py | 9 +++++--- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py index c5595f1de..7d896e10b 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -5,23 +5,19 @@ from typing import Callable, Optional from cuda.bindings._path_finder.run_python_code_safely import run_python_code_safely -from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES +from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES, IS_WINDOWS +if IS_WINDOWS: + import pywintypes -@dataclass -class LoadedDL: - """Represents a loaded dynamic library. + HandleType = pywintypes.HANDLE +else: + HandleType = int - Attributes: - handle: The library handle (can be converted to void* in Cython) - abs_path: The absolute path to the library file - was_already_loaded_from_elsewhere: Whether the library was already loaded - """ - # ATTENTION: To convert `handle` back to `void*` in cython: - # Linux: `cdef void* ptr = ` - # Windows: `cdef void* ptr = ` - handle: int +@dataclass +class LoadedDL: + handle: HandleType abs_path: Optional[str] was_already_loaded_from_elsewhere: bool diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py index 7004500dc..ec305be92 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_windows.py @@ -35,7 +35,7 @@ def add_dll_directory(dll_abs_path: str) -> None: os.environ["PATH"] = dirpath if curr_path is None else os.pathsep.join((curr_path, dirpath)) -def abs_path_for_dynamic_library(libname: str, handle: int) -> str: +def abs_path_for_dynamic_library(libname: str, handle: pywintypes.HANDLE) -> str: """Get the absolute path of a loaded dynamic library on Windows. Args: @@ -117,8 +117,11 @@ def load_with_system_search(libname: str, _unused: str) -> Optional[LoadedDL]: from cuda.bindings._path_finder.supported_libs import SUPPORTED_WINDOWS_DLLS for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): - handle = ctypes.windll.kernel32.LoadLibraryW(ctypes.c_wchar_p(dll_name)) - if handle: + try: + handle = win32api.LoadLibraryEx(dll_name, 0, 0) + except pywintypes.error: + continue + else: return LoadedDL(handle, abs_path_for_dynamic_library(libname, handle), False) return None From 5fb2d1fec4932b7b86b4111d87ba73ce8a3c1f71 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 6 May 2025 08:37:11 -0700 Subject: [PATCH 27/28] Remove _find_nvidia_dynamic_library.retry_with_anchor_abs_path() method. Move run_python_code_safely.py to test/ directory. --- .../find_nvidia_dynamic_library.py | 23 ------------------- .../bindings/_path_finder/load_dl_common.py | 14 ----------- .../load_nvidia_dynamic_library.py | 23 +------------------ .../run_python_code_safely.py | 0 cuda_bindings/tests/test_path_finder_load.py | 12 ++++++++-- 5 files changed, 11 insertions(+), 61 deletions(-) rename cuda_bindings/{cuda/bindings/_path_finder => tests}/run_python_code_safely.py (100%) diff --git a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py index f58c31420..9835b72d0 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/find_nvidia_dynamic_library.py @@ -108,17 +108,6 @@ def _find_dll_using_lib_dir(lib_dir, libname, error_messages, attachments): return None -def _find_nvvm_lib_dir_from_anchor_abs_path(anchor_abs_path): - nvvm_subdir = "bin" if IS_WINDOWS else "lib64" - while anchor_abs_path: - if os.path.isdir(anchor_abs_path): - nvvm_lib_dir = os.path.join(anchor_abs_path, "nvvm", nvvm_subdir) - if os.path.isdir(nvvm_lib_dir): - return nvvm_lib_dir - anchor_abs_path = os.path.dirname(anchor_abs_path) - return None - - class _find_nvidia_dynamic_library: def __init__(self, libname: str): self.libname = libname @@ -151,18 +140,6 @@ def retry_with_cuda_home_priority_last(self): cuda_home_lib_dir, self.lib_searched_for, self.error_messages, self.attachments ) - def retry_with_anchor_abs_path(self, anchor_abs_path): - assert self.libname == "nvvm" - nvvm_lib_dir = _find_nvvm_lib_dir_from_anchor_abs_path(anchor_abs_path) - if nvvm_lib_dir is None: - return - if IS_WINDOWS: - self.abs_path = _find_dll_using_lib_dir(nvvm_lib_dir, self.libname, self.error_messages, self.attachments) - else: - self.abs_path = _find_so_using_lib_dir( - nvvm_lib_dir, self.lib_searched_for, self.error_messages, self.attachments - ) - def raise_if_abs_path_is_None(self): if self.abs_path: return self.abs_path diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py index 7d896e10b..034b9d433 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_dl_common.py @@ -4,7 +4,6 @@ from dataclasses import dataclass from typing import Callable, Optional -from cuda.bindings._path_finder.run_python_code_safely import run_python_code_safely from cuda.bindings._path_finder.supported_libs import DIRECT_DEPENDENCIES, IS_WINDOWS if IS_WINDOWS: @@ -35,16 +34,3 @@ def load_dependencies(libname: str, load_func: Callable[[str], LoadedDL]) -> Non """ for dep in DIRECT_DEPENDENCIES.get(libname, ()): load_func(dep) - - -def load_in_subprocess(python_code, timeout=30): - # This is to avoid loading libraries into the parent process. - return run_python_code_safely(python_code, timeout=timeout) - - -def build_subprocess_failed_for_libname_message(libname, result): - return ( - f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" - f"--- stdout-from-subprocess ---\n{result.stdout}\n" - f"--- stderr-from-subprocess ---\n{result.stderr}\n" - ) diff --git a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py index 358785852..f8fe5ce4a 100644 --- a/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py +++ b/cuda_bindings/cuda/bindings/_path_finder/load_nvidia_dynamic_library.py @@ -2,15 +2,9 @@ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import functools -import json from cuda.bindings._path_finder.find_nvidia_dynamic_library import _find_nvidia_dynamic_library -from cuda.bindings._path_finder.load_dl_common import ( - LoadedDL, - build_subprocess_failed_for_libname_message, - load_dependencies, - load_in_subprocess, -) +from cuda.bindings._path_finder.load_dl_common import LoadedDL, load_dependencies from cuda.bindings._path_finder.supported_libs import IS_WINDOWS if IS_WINDOWS: @@ -27,21 +21,6 @@ ) -def _load_anchor_in_subprocess(libname, error_messages): - code = f"""\ -from cuda.bindings._path_finder.load_nvidia_dynamic_library import load_nvidia_dynamic_library -import json -import sys -loaded = load_nvidia_dynamic_library({libname!r}) -sys.stdout.write(json.dumps(loaded.abs_path, ensure_ascii=True)) -""" - result = load_in_subprocess(code) - if result.returncode == 0: - return json.loads(result.stdout) - error_messages.extend(build_subprocess_failed_for_libname_message(libname, result).splitlines()) - return None - - def _load_nvidia_dynamic_library_no_cache(libname: str) -> LoadedDL: # Check whether the library is already loaded into the current process by # some other component. This check uses OS-level mechanisms (e.g., diff --git a/cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py b/cuda_bindings/tests/run_python_code_safely.py similarity index 100% rename from cuda_bindings/cuda/bindings/_path_finder/run_python_code_safely.py rename to cuda_bindings/tests/run_python_code_safely.py diff --git a/cuda_bindings/tests/test_path_finder_load.py b/cuda_bindings/tests/test_path_finder_load.py index 8fc117df5..5c21e8a05 100644 --- a/cuda_bindings/tests/test_path_finder_load.py +++ b/cuda_bindings/tests/test_path_finder_load.py @@ -5,10 +5,10 @@ import sys import pytest +from run_python_code_safely import run_python_code_safely from cuda.bindings import path_finder from cuda.bindings._path_finder import supported_libs -from cuda.bindings._path_finder.load_dl_common import build_subprocess_failed_for_libname_message, load_in_subprocess ALL_LIBNAMES = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_ALL ALL_LIBNAMES_LINUX = path_finder._SUPPORTED_LIBNAMES + supported_libs.PARTIALLY_SUPPORTED_LIBNAMES_LINUX @@ -38,6 +38,14 @@ def test_all_libnames_expected_lib_symbols_consistency(): assert tuple(sorted(ALL_LIBNAMES)) == tuple(sorted(supported_libs.EXPECTED_LIB_SYMBOLS.keys())) +def build_subprocess_failed_for_libname_message(libname, result): + return ( + f"Subprocess failed for {libname=!r} with exit code {result.returncode}\n" + f"--- stdout-from-subprocess ---\n{result.stdout}\n" + f"--- stderr-from-subprocess ---\n{result.stderr}\n" + ) + + @pytest.mark.parametrize("libname", TEST_FIND_OR_LOAD_LIBNAMES) def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname): # We intentionally run each dynamic library operation in a subprocess @@ -68,7 +76,7 @@ def test_find_or_load_nvidia_dynamic_library(info_summary_append, libname): print(f"{{loaded_dl_fresh.abs_path!r}}") """ - result = load_in_subprocess(code) + result = run_python_code_safely(code, timeout=30) if result.returncode == 0: info_summary_append(f"abs_path={result.stdout.rstrip()}") else: From c6f220554bdbf761851f69f4a9b4372284911ca3 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 6 May 2025 10:02:33 -0700 Subject: [PATCH 28/28] Add missing SPDX-License-Identifier --- cuda_bindings/tests/run_python_code_safely.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuda_bindings/tests/run_python_code_safely.py b/cuda_bindings/tests/run_python_code_safely.py index 316cb3885..349ed9682 100644 --- a/cuda_bindings/tests/run_python_code_safely.py +++ b/cuda_bindings/tests/run_python_code_safely.py @@ -1,3 +1,6 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + import multiprocessing import queue # for Empty import subprocess # nosec B404