Source code for besta.utils

"""General utility helpers used across BESTA."""

import os
import functools
import numpy as np
import psutil

[docs] def mkdir(path: str) -> None: """Create a directory path if it does not already exist.""" os.makedirs(path, exist_ok=True)
[docs] def available_memory_bytes() -> int: """Return the currently available system memory in bytes.""" return int(psutil.virtual_memory().available)
[docs] def convert_bytes(size_bytes, to_unit): """Convert a byte count into ``B``, ``KB``, ``MB``, or ``GB``.""" to_unit = to_unit.upper() if to_unit == 'B': return size_bytes elif to_unit == 'KB': return size_bytes / 1024 elif to_unit == 'MB': return size_bytes / (1024 ** 2) elif to_unit == 'GB': return size_bytes / (1024 ** 3) else: raise ValueError("Unit must be 'B', 'KB', 'MB', or 'GB'")
[docs] def predict_array_memory(array_shape, dtype=np.float64, unit='MB'): """ Predict the memory required for a NumPy array with given shape and data type. Parameters ---------- array_shape : tuple Shape of the array (e.g., (1000, 1000) for a 1000x1000 array). dtype : numpy.dtype, optional NumPy data type (default is np.float64). unit : {'B', 'KB', 'MB', 'GB'}, optional Unit for the returned memory size (default is 'MB'). Returns ------- float Estimated memory requirement in the specified unit. Raises ------ ValueError If invalid dtype or unit is provided. Examples -------- >>> predict_array_memory((1000, 1000)) 7.63 # MB for float64 array >>> predict_array_memory((500, 500, 500), np.float32, 'GB') 0.47 # GB for float32 array """ try: dtype_obj = np.dtype(dtype) element_size = dtype_obj.itemsize except Exception as e: raise ValueError(f"Invalid dtype provided: {e}") total_elements = np.prod(array_shape) total_bytes = total_elements * element_size return convert_bytes(total_bytes, unit)
[docs] def check_array_memory(array_shape, dtype=np.float64, unit='MB', safety_margin=0.2): """ Check if the current machine has enough RAM for creating a given array. Parameters ---------- array_shape : tuple Shape of the array (e.g., (1000, 1000) for a 1000x1000 array). dtype : numpy.dtype, optional NumPy data type (default is np.float64). unit : {'B', 'KB', 'MB', 'GB'}, optional Unit for error message reporting (default is 'MB'). safety_margin : float, optional Additional margin (fraction of the array size) for ensuring good performance. Default is 0.2 (20% more than the predicted memory requirement). Returns ------- bool True if sufficient memory is available. Raises ------ MemoryError If insufficient memory is available for the array. ValueError If invalid dtype, shape, or unit is provided. Examples -------- >>> check_array_memory((10000, 10000)) True # If 800MB+ available >>> check_array_memory((50000, 50000)) MemoryError: Insufficient memory available... """ # Calculate required memory try: required_mem = predict_array_memory(array_shape, dtype, 'B') except ValueError as e: raise ValueError(f"Invalid input parameters: {str(e)}") # Get available memory available_mem = available_memory_bytes() # Add safety margin total_needed = required_mem * (1 + safety_margin) # Convert for error message if total_needed > available_mem: req_mem_display = convert_bytes(required_mem, unit) avail_mem_display = convert_bytes(available_mem, unit) needed_mem_display = convert_bytes(total_needed, unit) raise MemoryError( f"Insufficient memory available. " f"Required: {req_mem_display:.2f} {unit} (plus safety margin), " f"Available: {avail_mem_display:.2f} {unit}, " f"Needed: {needed_mem_display:.2f} {unit}" ) return True
[docs] def expand_env_vars(arg_spec=0): """ Decorator that expands environment variables in a specified argument. The target argument can be specified either by position (int) or name (str). If no argument is specified, expands the first positional argument (default). Parameters ---------- arg_spec : int or str, optional Either the position (0-based index) or name of the argument to expand. Default is 0 (first positional argument). Returns ------- callable A decorator function that wraps the original function. Examples -------- # Expand first positional argument (default) >>> @expand_env_vars() ... def load_file(path): ... print(path) >>> load_file("$HOME/test.txt") # Expand named argument >>> @expand_env_vars('filename') ... def process_file(filename, mode='r'): ... print(filename, mode) >>> process_file("${TMPDIR}/data.txt") # Expand argument at position 1 >>> @expand_env_vars(1) ... def save_data(header, filepath): ... print(header, filepath) >>> save_data("results", "$APPDIR/output.dat") """ def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): # Handle different argument specifications if isinstance(arg_spec, int): # Positional argument expansion if arg_spec < len(args): expanded = os.path.expandvars(args[arg_spec]) args = args[:arg_spec] + (expanded,) + args[arg_spec+1:] elif isinstance(arg_spec, str): # Keyword argument expansion if arg_spec in kwargs: kwargs[arg_spec] = os.path.expandvars(kwargs[arg_spec]) else: raise TypeError("arg_spec must be int (position) or str (argument name)") return func(*args, **kwargs) return wrapper return decorator