Merge branch 'main' into ryan/spandrel-upscale-tiling

2024-08-30 20:32:17 +00:00 · 2024-07-16 15:40:14 -04:00
parent 0428ce73a9 7ad32dcad2
commit 6b0ca88177
52 changed files with 4915 additions and 2066 deletions
--- a/invokeai/backend/image_util/lineart_anime.py
+++ b/invokeai/backend/image_util/lineart_anime.py
@ -98,7 +98,7 @@ class UnetSkipConnectionBlock(nn.Module):
        """
        super(UnetSkipConnectionBlock, self).__init__()
        self.outermost = outermost
-        if type(norm_layer) == functools.partial:
+        if isinstance(norm_layer, functools.partial):
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@ -124,16 +124,14 @@ class IPAdapter(RawModel):
            self.device, dtype=self.dtype
        )

-    def to(
-        self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None, non_blocking: bool = False
-    ):
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
        if device is not None:
            self.device = device
        if dtype is not None:
            self.dtype = dtype

-        self._image_proj_model.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)
-        self.attn_weights.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking)
+        self._image_proj_model.to(device=self.device, dtype=self.dtype)
+        self.attn_weights.to(device=self.device, dtype=self.dtype)

    def calc_size(self) -> int:
        # HACK(ryand): Fix this issue with circular imports.
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@ -11,7 +11,6 @@ from typing_extensions import Self

 from invokeai.backend.model_manager import BaseModelType
 from invokeai.backend.raw_model import RawModel
-from invokeai.backend.util.devices import TorchDevice


 class LoRALayerBase:
@ -57,14 +56,9 @@ class LoRALayerBase:
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        if self.bias is not None:
-            self.bias = self.bias.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.bias = self.bias.to(device=device, dtype=dtype)


 # TODO: find and debug lora/locon with bias
@ -106,19 +100,14 @@ class LoRALayer(LoRALayerBase):
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
-        super().to(device=device, dtype=dtype, non_blocking=non_blocking)
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)

-        self.up = self.up.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.down = self.down.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.up = self.up.to(device=device, dtype=dtype)
+        self.down = self.down.to(device=device, dtype=dtype)

        if self.mid is not None:
-            self.mid = self.mid.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.mid = self.mid.to(device=device, dtype=dtype)


 class LoHALayer(LoRALayerBase):
@ -167,23 +156,18 @@ class LoHALayer(LoRALayerBase):
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        super().to(device=device, dtype=dtype)

-        self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
        if self.t1 is not None:
-            self.t1 = self.t1.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.t1 = self.t1.to(device=device, dtype=dtype)

-        self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.t2 = self.t2.to(device=device, dtype=dtype)


 class LoKRLayer(LoRALayerBase):
@ -264,12 +248,7 @@ class LoKRLayer(LoRALayerBase):
                model_size += val.nelement() * val.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        super().to(device=device, dtype=dtype)

        if self.w1 is not None:
@ -277,19 +256,19 @@ class LoKRLayer(LoRALayerBase):
        else:
            assert self.w1_a is not None
            assert self.w1_b is not None
-            self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-            self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+            self.w1_b = self.w1_b.to(device=device, dtype=dtype)

        if self.w2 is not None:
-            self.w2 = self.w2.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w2 = self.w2.to(device=device, dtype=dtype)
        else:
            assert self.w2_a is not None
            assert self.w2_b is not None
-            self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking)
-            self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+            self.w2_b = self.w2_b.to(device=device, dtype=dtype)

        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            self.t2 = self.t2.to(device=device, dtype=dtype)


 class FullLayer(LoRALayerBase):
@ -319,15 +298,10 @@ class FullLayer(LoRALayerBase):
        model_size += self.weight.nelement() * self.weight.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        super().to(device=device, dtype=dtype)

-        self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.weight = self.weight.to(device=device, dtype=dtype)


 class IA3Layer(LoRALayerBase):
@ -359,16 +333,11 @@ class IA3Layer(LoRALayerBase):
        model_size += self.on_input.nelement() * self.on_input.element_size()
        return model_size

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ):
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
        super().to(device=device, dtype=dtype)

-        self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking)
-        self.on_input = self.on_input.to(device=device, dtype=dtype, non_blocking=non_blocking)
+        self.weight = self.weight.to(device=device, dtype=dtype)
+        self.on_input = self.on_input.to(device=device, dtype=dtype)


 AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
@ -390,15 +359,10 @@ class LoRAModelRaw(RawModel):  # (torch.nn.Module):
    def name(self) -> str:
        return self._name

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        # TODO: try revert if exception?
        for _key, layer in self.layers.items():
-            layer.to(device=device, dtype=dtype, non_blocking=non_blocking)
+            layer.to(device=device, dtype=dtype)

    def calc_size(self) -> int:
        model_size = 0
@ -521,7 +485,7 @@ class LoRAModelRaw(RawModel):  # (torch.nn.Module):
            # lower memory consumption by removing already parsed layer values
            state_dict[layer_key].clear()

-            layer.to(device=device, dtype=dtype, non_blocking=TorchDevice.get_non_blocking(device))
+            layer.to(device=device, dtype=dtype)
            model.layers[layer_key] = layer

        return model
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@ -289,11 +289,9 @@ class ModelCache(ModelCacheBase[AnyModel]):
                else:
                    new_dict: Dict[str, torch.Tensor] = {}
                    for k, v in cache_entry.state_dict.items():
-                        new_dict[k] = v.to(
-                            target_device, copy=True, non_blocking=TorchDevice.get_non_blocking(target_device)
-                        )
+                        new_dict[k] = v.to(target_device, copy=True)
                    cache_entry.model.load_state_dict(new_dict, assign=True)
-            cache_entry.model.to(target_device, non_blocking=TorchDevice.get_non_blocking(target_device))
+            cache_entry.model.to(target_device)
            cache_entry.device = target_device
        except Exception as e:  # blow away cache entry
            self._delete_cache_entry(cache_entry)
--- a/invokeai/backend/model_patcher.py
+++ b/invokeai/backend/model_patcher.py
@ -139,15 +139,12 @@ class ModelPatcher:
                        # We intentionally move to the target device first, then cast. Experimentally, this was found to
                        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
                        # same thing in a single call to '.to(...)'.
-                        layer.to(device=device, non_blocking=TorchDevice.get_non_blocking(device))
-                        layer.to(dtype=torch.float32, non_blocking=TorchDevice.get_non_blocking(device))
+                        layer.to(device=device)
+                        layer.to(dtype=torch.float32)
                        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
                        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
                        layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
-                        layer.to(
-                            device=TorchDevice.CPU_DEVICE,
-                            non_blocking=TorchDevice.get_non_blocking(TorchDevice.CPU_DEVICE),
-                        )
+                        layer.to(device=TorchDevice.CPU_DEVICE)

                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
                        if module.weight.shape != layer_weight.shape:
@ -156,7 +153,7 @@ class ModelPatcher:
                            layer_weight = layer_weight.reshape(module.weight.shape)

                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
-                        module.weight += layer_weight.to(dtype=dtype, non_blocking=TorchDevice.get_non_blocking(device))
+                        module.weight += layer_weight.to(dtype=dtype)

            yield  # wait for context manager exit

@ -164,9 +161,7 @@ class ModelPatcher:
            assert hasattr(model, "get_submodule")  # mypy not picking up fact that torch.nn.Module has get_submodule()
            with torch.no_grad():
                for module_key, weight in original_weights.items():
-                    model.get_submodule(module_key).weight.copy_(
-                        weight, non_blocking=TorchDevice.get_non_blocking(weight.device)
-                    )
+                    model.get_submodule(module_key).weight.copy_(weight)

    @classmethod
    @contextmanager
--- a/invokeai/backend/onnx/onnx_runtime.py
+++ b/invokeai/backend/onnx/onnx_runtime.py
@ -190,12 +190,7 @@ class IAIOnnxRuntimeModel(RawModel):
        return self.session.run(None, inputs)

    # compatability with RawModel ABC
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        pass

    # compatability with diffusers load code
--- a/invokeai/backend/raw_model.py
+++ b/invokeai/backend/raw_model.py
@ -18,10 +18,5 @@ class RawModel(ABC):
    """

    @abstractmethod
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        pass
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@ -65,17 +65,12 @@ class TextualInversionModelRaw(RawModel):

        return result

-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        non_blocking: bool = False,
-    ) -> None:
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        if not torch.cuda.is_available():
            return
        for emb in [self.embedding, self.embedding_2]:
            if emb is not None:
-                emb.to(device=device, dtype=dtype, non_blocking=non_blocking)
+                emb.to(device=device, dtype=dtype)

    def calc_size(self) -> int:
        """Get the size of this model in bytes."""
--- a/invokeai/backend/util/devices.py
+++ b/invokeai/backend/util/devices.py
@ -112,15 +112,3 @@ class TorchDevice:
    @classmethod
    def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype:
        return NAME_TO_PRECISION[precision_name]
-
-    @staticmethod
-    def get_non_blocking(to_device: torch.device) -> bool:
-        """Return the non_blocking flag to be used when moving a tensor to a given device.
-        MPS may have unexpected errors with non-blocking operations - we should not use non-blocking when moving _to_ MPS.
-        When moving _from_ MPS, we can use non-blocking operations.
-
-        See:
-        - https://github.com/pytorch/pytorch/issues/107455
-        - https://discuss.pytorch.org/t/should-we-set-non-blocking-to-true/38234/28
-        """
-        return False if to_device.type == "mps" else True