InvokeAI/invokeai/backend/util/devices.py

from typing import Dict, Literal, Optional, Union

import torch
from deprecated import deprecated

from invokeai.app.services.config.config_default import get_config

# legacy APIs
TorchPrecisionNames = Literal["float32", "float16", "bfloat16"]
CPU_DEVICE = torch.device("cpu")
CUDA_DEVICE = torch.device("cuda")
MPS_DEVICE = torch.device("mps")


@deprecated("Use TorchDevice.choose_torch_dtype() instead.")  # type: ignore
def choose_precision(device: torch.device) -> TorchPrecisionNames:
    """Return the string representation of the recommended torch device."""
    torch_dtype = TorchDevice.choose_torch_dtype(device)
    return PRECISION_TO_NAME[torch_dtype]


@deprecated("Use TorchDevice.choose_torch_device() instead.")  # type: ignore
def choose_torch_device() -> torch.device:
    """Return the torch.device to use for accelerated inference."""
    return TorchDevice.choose_torch_device()


@deprecated("Use TorchDevice.choose_torch_dtype() instead.")  # type: ignore
def torch_dtype(device: torch.device) -> torch.dtype:
    """Return the torch precision for the recommended torch device."""
    return TorchDevice.choose_torch_dtype(device)


NAME_TO_PRECISION: Dict[TorchPrecisionNames, torch.dtype] = {
    "float32": torch.float32,
    "float16": torch.float16,
    "bfloat16": torch.bfloat16,
}
PRECISION_TO_NAME: Dict[torch.dtype, TorchPrecisionNames] = {v: k for k, v in NAME_TO_PRECISION.items()}


class TorchDevice:
    """Abstraction layer for torch devices."""

    CPU_DEVICE = torch.device("cpu")
    CUDA_DEVICE = torch.device("cuda")
    MPS_DEVICE = torch.device("mps")

    @classmethod
    def choose_torch_device(cls) -> torch.device:
        """Return the torch.device to use for accelerated inference."""
        app_config = get_config()
        if app_config.device != "auto":
            device = torch.device(app_config.device)
        elif torch.cuda.is_available():
            device = CUDA_DEVICE
        elif torch.backends.mps.is_available():
            device = MPS_DEVICE
        else:
            device = CPU_DEVICE
        return cls.normalize(device)

    @classmethod
    def choose_torch_dtype(cls, device: Optional[torch.device] = None) -> torch.dtype:
        """Return the precision to use for accelerated inference."""
        device = device or cls.choose_torch_device()
        config = get_config()
        if device.type == "cuda" and torch.cuda.is_available():
            device_name = torch.cuda.get_device_name(device)
            if "GeForce GTX 1660" in device_name or "GeForce GTX 1650" in device_name:
                # These GPUs have limited support for float16
                return cls._to_dtype("float32")
            elif config.precision == "auto":
                # Default to float16 for CUDA devices
                return cls._to_dtype("float16")
            else:
                # Use the user-defined precision
                return cls._to_dtype(config.precision)

        elif device.type == "mps" and torch.backends.mps.is_available():
            if config.precision == "auto":
                # Default to float16 for MPS devices
                return cls._to_dtype("float16")
            else:
                # Use the user-defined precision
                return cls._to_dtype(config.precision)
        # CPU / safe fallback
        return cls._to_dtype("float32")

    @classmethod
    def get_torch_device_name(cls) -> str:
        """Return the device name for the current torch device."""
        device = cls.choose_torch_device()
        return torch.cuda.get_device_name(device) if device.type == "cuda" else device.type.upper()

    @classmethod
    def normalize(cls, device: Union[str, torch.device]) -> torch.device:
        """Add the device index to CUDA devices."""
        device = torch.device(device)
        if device.index is None and device.type == "cuda" and torch.cuda.is_available():
            device = torch.device(device.type, torch.cuda.current_device())
        return device

    @classmethod
    def empty_cache(cls) -> None:
        """Clear the GPU device cache."""
        if torch.backends.mps.is_available():
            torch.mps.empty_cache()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    @classmethod
    def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype:
        return NAME_TO_PRECISION[precision_name]
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`from typing import Dict, Literal, Optional, Union`
fix(diffusers_pipeline): ensure `cuda.get_mem_info` always gets a specific device index. Also tighten up the typing of `device` attributes in general. 2023-02-18 00:29:03 +00:00
add support for Apple hardware using MPS acceleration 2022-08-31 04:33:23 +00:00			`import torch`
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`from deprecated import deprecated`
isort wip 2 2023-08-18 15:13:28 +00:00
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`from invokeai.app.services.config.config_default import get_config`
add support for Apple hardware using MPS acceleration 2022-08-31 04:33:23 +00:00
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`# legacy APIs`
			`TorchPrecisionNames = Literal["float32", "float16", "bfloat16"]`
fix(diffusers_pipeline): ensure `cuda.get_mem_info` always gets a specific device index. Also tighten up the typing of `device` attributes in general. 2023-02-18 00:29:03 +00:00			`CPU_DEVICE = torch.device("cpu")`
all files migrated; tweaks needed 2023-03-03 05:02:15 +00:00			`CUDA_DEVICE = torch.device("cuda")`
			`MPS_DEVICE = torch.device("mps")`
all vestiges of ldm.invoke removed 2023-03-03 06:02:00 +00:00
Apply black 2023-07-27 14:54:01 +00:00
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`@deprecated("Use TorchDevice.choose_torch_dtype() instead.") # type: ignore`
			`def choose_precision(device: torch.device) -> TorchPrecisionNames:`
			`"""Return the string representation of the recommended torch device."""`
			`torch_dtype = TorchDevice.choose_torch_dtype(device)`
			`return PRECISION_TO_NAME[torch_dtype]`


			`@deprecated("Use TorchDevice.choose_torch_device() instead.") # type: ignore`
fix(diffusers_pipeline): ensure `cuda.get_mem_info` always gets a specific device index. Also tighten up the typing of `device` attributes in general. 2023-02-18 00:29:03 +00:00			`def choose_torch_device() -> torch.device:`
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`"""Return the torch.device to use for accelerated inference."""`
			`return TorchDevice.choose_torch_device()`


			`@deprecated("Use TorchDevice.choose_torch_dtype() instead.") # type: ignore`
			`def torch_dtype(device: torch.device) -> torch.dtype:`
			`"""Return the torch precision for the recommended torch device."""`
			`return TorchDevice.choose_torch_dtype(device)`


			`NAME_TO_PRECISION: Dict[TorchPrecisionNames, torch.dtype] = {`
			`"float32": torch.float32,`
			`"float16": torch.float16,`
			`"bfloat16": torch.bfloat16,`
			`}`
			`PRECISION_TO_NAME: Dict[torch.dtype, TorchPrecisionNames] = {v: k for k, v in NAME_TO_PRECISION.items()}`


			`class TorchDevice:`
			`"""Abstraction layer for torch devices."""`

fix(backend): mps should not use `non_blocking` We can get black outputs when moving tensors from CPU to MPS. It appears MPS to CPU is fine. See: - https://github.com/pytorch/pytorch/issues/107455 - https://discuss.pytorch.org/t/should-we-set-non-blocking-to-true/38234/28 Changes: - Add properties for each device on `TorchDevice` as a convenience. - Add `get_non_blocking` static method on `TorchDevice`. This utility takes a torch device and returns the flag to be used for non_blocking when moving a tensor to the device provided. - Update model patching and caching APIs to use this new utility. Fixes: #6545 2024-06-27 09:15:23 +00:00			`CPU_DEVICE = torch.device("cpu")`
			`CUDA_DEVICE = torch.device("cuda")`
			`MPS_DEVICE = torch.device("mps")`

[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`@classmethod`
			`def choose_torch_device(cls) -> torch.device:`
			`"""Return the torch.device to use for accelerated inference."""`
			`app_config = get_config()`
			`if app_config.device != "auto":`
			`device = torch.device(app_config.device)`
			`elif torch.cuda.is_available():`
			`device = CUDA_DEVICE`
			`elif torch.backends.mps.is_available():`
			`device = MPS_DEVICE`
feat(backend): clean up choose_precision - Allow user-defined precision on MPS. - Use more explicit logic to handle all possible cases. - Add comments. - Remove the app_config args (they were effectively unused, just get the config using the singleton getter util) 2024-04-07 04:28:29 +00:00			`else:`
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`device = CPU_DEVICE`
			`return cls.normalize(device)`

			`@classmethod`
			`def choose_torch_dtype(cls, device: Optional[torch.device] = None) -> torch.dtype:`
			`"""Return the precision to use for accelerated inference."""`
			`device = device or cls.choose_torch_device()`
			`config = get_config()`
			`if device.type == "cuda" and torch.cuda.is_available():`
			`device_name = torch.cuda.get_device_name(device)`
			`if "GeForce GTX 1660" in device_name or "GeForce GTX 1650" in device_name:`
			`# These GPUs have limited support for float16`
			`return cls._to_dtype("float32")`
			`elif config.precision == "auto":`
			`# Default to float16 for CUDA devices`
			`return cls._to_dtype("float16")`
			`else:`
			`# Use the user-defined precision`
			`return cls._to_dtype(config.precision)`

			`elif device.type == "mps" and torch.backends.mps.is_available():`
			`if config.precision == "auto":`
			`# Default to float16 for MPS devices`
			`return cls._to_dtype("float16")`
			`else:`
			`# Use the user-defined precision`
			`return cls._to_dtype(config.precision)`
			`# CPU / safe fallback`
			`return cls._to_dtype("float32")`

			`@classmethod`
			`def get_torch_device_name(cls) -> str:`
			`"""Return the device name for the current torch device."""`
			`device = cls.choose_torch_device()`
			`return torch.cuda.get_device_name(device) if device.type == "cuda" else device.type.upper()`

			`@classmethod`
			`def normalize(cls, device: Union[str, torch.device]) -> torch.device:`
			`"""Add the device index to CUDA devices."""`
			`device = torch.device(device)`
			`if device.index is None and device.type == "cuda" and torch.cuda.is_available():`
fix(diffusers_pipeline): ensure `cuda.get_mem_info` always gets a specific device index. Also tighten up the typing of `device` attributes in general. 2023-02-18 00:29:03 +00:00			`device = torch.device(device.type, torch.cuda.current_device())`
[util] Add generic torch device class (#6174) * introduce new abstraction layer for GPU devices * add unit test for device abstraction * fix ruff * convert TorchDeviceSelect into a stateless class * move logic to select context-specific execution device into context API * add mock hardware environments to pytest * remove dangling mocker fixture * fix unit test for running on non-CUDA systems * remove unimplemented get_execution_device() call * remove autocast precision * Multiple changes: 1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to context.models.get_execution_device(). 2. Rename TorchDeviceSelect to TorchDevice 3. Added back the legacy public API defined in `invocation_api`, including choose_precision(). 4. Added a config file migration script to accommodate removal of precision=autocast. * add deprecation warnings to choose_torch_device() and choose_precision() * fix test crash * remove app_config argument from choose_torch_device() and choose_torch_dtype() --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-04-15 13:12:49 +00:00			`return device`

			`@classmethod`
			`def empty_cache(cls) -> None:`
			`"""Clear the GPU device cache."""`
			`if torch.backends.mps.is_available():`
			`torch.mps.empty_cache()`
			`if torch.cuda.is_available():`
			`torch.cuda.empty_cache()`

			`@classmethod`
			`def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype:`
			`return NAME_TO_PRECISION[precision_name]`