-
Notifications
You must be signed in to change notification settings - Fork 698
fix library detection #873
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,34 +11,18 @@ | |
|
||
HEADER_WIDTH = 60 | ||
|
||
def execute_and_return(command_string: str) -> Tuple[str, str]: | ||
def _decode(subprocess_err_out_tuple): | ||
return tuple( | ||
to_decode.decode("UTF-8").strip() | ||
for to_decode in subprocess_err_out_tuple | ||
) | ||
|
||
def execute_and_return_decoded_std_streams(command_string): | ||
return _decode( | ||
subprocess.Popen( | ||
shlex.split(command_string), | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
).communicate() | ||
) | ||
|
||
std_out, std_err = execute_and_return_decoded_std_streams(command_string) | ||
return std_out, std_err | ||
|
||
def find_file_recursive(folder, filename): | ||
folder = shlex.quote(folder) | ||
filename = shlex.quote(filename) | ||
cmd = f'find {folder} -name {filename}' | ||
out, err = execute_and_return(cmd) | ||
if len(err) > 0: | ||
raise RuntimeError('Something when wrong when trying to find file. Maybe you do not have a linux system?') | ||
import glob | ||
outs = [] | ||
try: | ||
for ext in ["so", "dll", "dylib"]: | ||
out = glob.glob(os.path.join(folder, "**", filename + ext)) | ||
outs.extend(out) | ||
except Exception as e: | ||
raise RuntimeError('Error: Something when wrong when trying to find file. {e}') | ||
|
||
return out | ||
return outs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the list instead of path cause errors in some edge cases? As I understand it, we either have |
||
|
||
|
||
def generate_bug_report_information(): | ||
|
@@ -48,30 +32,35 @@ def generate_bug_report_information(): | |
print('') | ||
|
||
if 'CONDA_PREFIX' in os.environ: | ||
paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*so') | ||
paths = find_file_recursive(os.environ['CONDA_PREFIX'], '*cuda*') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to make sure that this does not match non-binary files. How can we make sure that this is correct in most environments? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
print_header("ANACONDA CUDA PATHS") | ||
print(paths) | ||
print('') | ||
if isdir('/usr/local/'): | ||
paths = find_file_recursive('/usr/local', '*cuda*so') | ||
paths = find_file_recursive('/usr/local', '*cuda*') | ||
print_header("/usr/local CUDA PATHS") | ||
print(paths) | ||
print('') | ||
if 'CUDA_PATH' in os.environ and isdir(os.environ['CUDA_PATH']): | ||
paths = find_file_recursive(os.environ['CUDA_PATH'], '*cuda*') | ||
Titus-von-Koeller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
print_header("CUDA PATHS") | ||
print(paths) | ||
print('') | ||
|
||
if isdir(os.getcwd()): | ||
paths = find_file_recursive(os.getcwd(), '*cuda*so') | ||
paths = find_file_recursive(os.getcwd(), '*cuda*') | ||
print_header("WORKING DIRECTORY CUDA PATHS") | ||
print(paths) | ||
print('') | ||
|
||
print_header("LD_LIBRARY CUDA PATHS") | ||
if 'LD_LIBRARY_PATH' in os.environ: | ||
lib_path = os.environ['LD_LIBRARY_PATH'].strip() | ||
for path in set(lib_path.split(':')): | ||
for path in set(lib_path.split(os.pathsep)): | ||
try: | ||
if isdir(path): | ||
print_header(f"{path} CUDA PATHS") | ||
paths = find_file_recursive(path, '*cuda*so') | ||
paths = find_file_recursive(path, '*cuda*') | ||
print(paths) | ||
except: | ||
print(f'Could not read LD_LIBRARY_PATH: {path}') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
import ctypes as ct | ||
import os | ||
import errno | ||
import platform | ||
import torch | ||
from warnings import warn | ||
from itertools import product | ||
|
@@ -31,7 +32,11 @@ | |
# libcudart.so is missing by default for a conda install with PyTorch 2.0 and instead | ||
# we have libcudart.so.11.0 which causes a lot of errors before | ||
# not sure if libcudart.so.12.0 exists in pytorch installs, but it does not hurt | ||
CUDA_RUNTIME_LIBS: list = ["libcudart.so", 'libcudart.so.11.0', 'libcudart.so.12.0', 'libcudart.so.12.1', 'libcudart.so.12.2'] | ||
system = platform.system() | ||
if system == 'Windows': | ||
CUDA_RUNTIME_LIBS: list = ["nvcuda.dll"] | ||
else: # Linux or other | ||
CUDA_RUNTIME_LIBS: list = ["libcudart.so", 'libcudart.so.11.0', 'libcudart.so.12.0', 'libcudart.so.12.1', 'libcudart.so.12.2'] | ||
|
||
# this is a order list of backup paths to search CUDA in, if it cannot be found in the main environmental paths | ||
backup_paths = [] | ||
|
@@ -114,7 +119,9 @@ def manual_override(self): | |
'For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n' | ||
f'Loading CUDA version: BNB_CUDA_VERSION={os.environ["BNB_CUDA_VERSION"]}' | ||
f'\n{"="*80}\n\n')) | ||
self.binary_name = self.binary_name[:-6] + f'{os.environ["BNB_CUDA_VERSION"]}.so' | ||
binary_name = self.binary_name.rsplit(".", 1)[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit of a mess (based on my messy previous code). I have not looked at the details. Is there a way where we can clean this up slightly? |
||
suffix = ".so" if os.name != "nt" else ".dll" | ||
self.binary_name = binary_name[:-3] + f'{os.environ["BNB_CUDA_VERSION"]}.{suffix}' | ||
|
||
def run_cuda_setup(self): | ||
self.initialized = True | ||
|
@@ -131,10 +138,11 @@ def run_cuda_setup(self): | |
package_dir = Path(__file__).parent.parent | ||
binary_path = package_dir / self.binary_name | ||
|
||
suffix = ".so" if os.name != "nt" else ".dll" | ||
try: | ||
if not binary_path.exists(): | ||
self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?") | ||
legacy_binary_name = "libbitsandbytes_cpu.so" | ||
legacy_binary_name = f"libbitsandbytes_cpu{suffix}" | ||
self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...") | ||
binary_path = package_dir / legacy_binary_name | ||
if not binary_path.exists() or torch.cuda.is_available(): | ||
|
@@ -153,10 +161,10 @@ def run_cuda_setup(self): | |
self.add_log_entry('') | ||
self.generate_instructions() | ||
raise Exception('CUDA SETUP: Setup Failed!') | ||
self.lib = ct.cdll.LoadLibrary(binary_path) | ||
self.lib = ct.cdll.LoadLibrary(str(binary_path)) | ||
else: | ||
self.add_log_entry(f"CUDA SETUP: Loading binary {binary_path}...") | ||
self.lib = ct.cdll.LoadLibrary(binary_path) | ||
self.add_log_entry(f"CUDA SETUP: Loading binary {binary_path!s}...") | ||
self.lib = ct.cdll.LoadLibrary(str(binary_path)) | ||
except Exception as ex: | ||
self.add_log_entry(str(ex)) | ||
|
||
|
@@ -190,7 +198,7 @@ def is_cublasLt_compatible(cc): | |
return has_cublaslt | ||
|
||
def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]: | ||
return {Path(ld_path) for ld_path in paths_list_candidate.split(":") if ld_path} | ||
return {Path(ld_path) for ld_path in paths_list_candidate.split(os.pathsep) if ld_path} | ||
|
||
|
||
def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]: | ||
|
@@ -336,13 +344,14 @@ def get_compute_capabilities(): | |
|
||
def evaluate_cuda_setup(): | ||
cuda_setup = CUDASetup.get_instance() | ||
suffix = ".so" if os.name != "nt" else ".dll" | ||
if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0': | ||
cuda_setup.add_log_entry('') | ||
cuda_setup.add_log_entry('='*35 + 'BUG REPORT' + '='*35) | ||
cuda_setup.add_log_entry(('Welcome to bitsandbytes. For bug reports, please run\n\npython -m bitsandbytes\n\n'), | ||
('and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')) | ||
cuda_setup.add_log_entry('='*80) | ||
if not torch.cuda.is_available(): return 'libbitsandbytes_cpu.so', None, None, None | ||
if not torch.cuda.is_available(): return f'libbitsandbytes_cpu{suffix}', None, None, None | ||
|
||
cudart_path = determine_cuda_runtime_lib_path() | ||
ccs = get_compute_capabilities() | ||
|
@@ -366,9 +375,11 @@ def evaluate_cuda_setup(): | |
# since most installations will have the libcudart.so installed, but not the compiler | ||
|
||
if has_cublaslt: | ||
binary_name = f"libbitsandbytes_cuda{cuda_version_string}.so" | ||
binary_name = f"libbitsandbytes_cuda{cuda_version_string}" | ||
else: | ||
"if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so" | ||
binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt.so" | ||
"if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt" | ||
binary_name = f"libbitsandbytes_cuda{cuda_version_string}_nocublaslt" | ||
|
||
binary_name = f"{binary_name}{suffix}" | ||
|
||
return binary_name, cudart_path, cc, cuda_version_string |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is missing an
f
so the exception is never printed. (That's fixed in #984.)