From 006d782cc8bcca9be8410dd4e68ec0d61efaef77 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 5 Aug 2023 20:35:38 -0700 Subject: [PATCH 1/5] =?UTF-8?q?refactor(diffusers=5Fpipeline):=20tidy=20im?= =?UTF-8?q?ports=20=F0=9F=9A=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../backend/stable_diffusion/diffusers_pipeline.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 624d47ff64..2c35245a6a 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -7,22 +7,19 @@ import secrets from collections.abc import Sequence from dataclasses import dataclass, field from typing import Any, Callable, Generic, List, Optional, Type, TypeVar, Union -from pydantic import Field -import einops import PIL.Image -import numpy as np -from accelerate.utils import set_seed +import einops import psutil import torch import torchvision.transforms as T +from accelerate.utils import set_seed from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.controlnet import ControlNetModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import ( StableDiffusionPipeline, ) - from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import ( StableDiffusionImg2ImgPipeline, ) @@ -31,21 +28,21 @@ from diffusers.pipelines.stable_diffusion.safety_checker import ( ) from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput -from diffusers.utils import PIL_INTERPOLATION from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.outputs import BaseOutput +from pydantic import Field from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from typing_extensions import ParamSpec from invokeai.app.services.config import InvokeAIAppConfig -from ..util import CPU_DEVICE, normalize_device from .diffusion import ( AttentionMapSaver, InvokeAIDiffuserComponent, PostprocessingSettings, ) from .offloading import FullyLoadedModelGroup, ModelGroup +from ..util import normalize_device @dataclass From b80abdd10161000155b1aa088fbd550fc50f61a5 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 5 Aug 2023 20:37:17 -0700 Subject: [PATCH 2/5] =?UTF-8?q?refactor(diffusers=5Fpipeline):=20remove=20?= =?UTF-8?q?unused=20image=5Ffrom=5Fembeddings=20=F0=9F=9A=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stable_diffusion/diffusers_pipeline.py | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 2c35245a6a..eaafc834ed 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -389,48 +389,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): submodels.append(value) return submodels - def image_from_embeddings( - self, - latents: torch.Tensor, - num_inference_steps: int, - conditioning_data: ConditioningData, - *, - noise: torch.Tensor, - callback: Callable[[PipelineIntermediateState], None] = None, - run_id=None, - ) -> InvokeAIStableDiffusionPipelineOutput: - r""" - Function invoked when calling the pipeline for generation. - - :param conditioning_data: - :param latents: Pre-generated un-noised latents, to be used as inputs for - image generation. Can be used to tweak the same generation with different prompts. - :param num_inference_steps: The number of denoising steps. More denoising steps usually lead to a higher quality - image at the expense of slower inference. - :param noise: Noise to add to the latents, sampled from a Gaussian distribution. - :param callback: - :param run_id: - """ - result_latents, result_attention_map_saver = self.latents_from_embeddings( - latents, - num_inference_steps, - conditioning_data, - noise=noise, - run_id=run_id, - callback=callback, - ) - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - torch.cuda.empty_cache() - - with torch.inference_mode(): - image = self.decode_latents(result_latents) - output = InvokeAIStableDiffusionPipelineOutput( - images=image, - nsfw_content_detected=[], - attention_map_saver=result_attention_map_saver, - ) - return self.check_for_safety(output, dtype=conditioning_data.dtype) - def latents_from_embeddings( self, latents: torch.Tensor, From 77033eabd314d78a6f6064e4d938c94787431d14 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 5 Aug 2023 20:41:47 -0700 Subject: [PATCH 3/5] =?UTF-8?q?refactor(diffusers=5Fpipeline):=20remove=20?= =?UTF-8?q?unused=20`precision`=20=F0=9F=9A=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- invokeai/app/invocations/generate.py | 20 +++++------- invokeai/app/invocations/latent.py | 32 ++++++++----------- .../stable_diffusion/diffusers_pipeline.py | 1 - 3 files changed, 21 insertions(+), 32 deletions(-) diff --git a/invokeai/app/invocations/generate.py b/invokeai/app/invocations/generate.py index d48c9f922e..f8d240e45c 100644 --- a/invokeai/app/invocations/generate.py +++ b/invokeai/app/invocations/generate.py @@ -1,26 +1,23 @@ # Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654) +from contextlib import contextmanager, ContextDecorator from functools import partial from typing import Literal, Optional, get_args -import torch from pydantic import Field from invokeai.app.models.image import ColorField, ImageCategory, ImageField, ResourceOrigin from invokeai.app.util.misc import SEED_MAX, get_random_seed from invokeai.backend.generator.inpaint import infill_methods - -from ...backend.generator import Inpaint, InvokeAIGenerator -from ...backend.stable_diffusion import PipelineIntermediateState -from ..util.step_callback import stable_diffusion_step_callback from .baseinvocation import BaseInvocation, InvocationConfig, InvocationContext -from .image import ImageOutput - -from ...backend.model_management.lora import ModelPatcher -from ...backend.stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline -from .model import UNetField, VaeField from .compel import ConditioningField -from contextlib import contextmanager, ExitStack, ContextDecorator +from .image import ImageOutput +from .model import UNetField, VaeField +from ..util.step_callback import stable_diffusion_step_callback +from ...backend.generator import Inpaint, InvokeAIGenerator +from ...backend.model_management.lora import ModelPatcher +from ...backend.stable_diffusion import PipelineIntermediateState +from ...backend.stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline SAMPLER_NAME_VALUES = Literal[tuple(InvokeAIGenerator.schedulers())] INFILL_METHODS = Literal[tuple(infill_methods())] @@ -193,7 +190,6 @@ class InpaintInvocation(BaseInvocation): safety_checker=None, feature_extractor=None, requires_safety_checker=False, - precision="float16" if dtype == torch.float16 else "float32", execution_device=device, ) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 6e2e0838bc..c15f84ddd0 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -5,15 +5,26 @@ from typing import List, Literal, Optional, Union import einops import torch -from diffusers import ControlNetModel from diffusers.image_processor import VaeImageProcessor +from diffusers.models.attention_processor import ( + AttnProcessor2_0, + LoRAAttnProcessor2_0, + LoRAXFormersAttnProcessor, + XFormersAttnProcessor, +) from diffusers.schedulers import SchedulerMixin as Scheduler from pydantic import BaseModel, Field, validator from invokeai.app.invocations.metadata import CoreMetadata +from invokeai.app.util.controlnet_utils import prepare_control_image from invokeai.app.util.step_callback import stable_diffusion_step_callback from invokeai.backend.model_management.models import ModelType, SilenceWarnings - +from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext +from .compel import ConditioningField +from .controlnet_image_processors import ControlField +from .image import ImageOutput +from .model import ModelInfo, UNetField, VaeField +from ..models.image import ImageCategory, ImageField, ResourceOrigin from ...backend.model_management import ModelPatcher from ...backend.stable_diffusion import PipelineIntermediateState from ...backend.stable_diffusion.diffusers_pipeline import ( @@ -24,23 +35,7 @@ from ...backend.stable_diffusion.diffusers_pipeline import ( ) from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import PostprocessingSettings from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP -from ...backend.model_management import ModelPatcher from ...backend.util.devices import choose_torch_device, torch_dtype, choose_precision -from ..models.image import ImageCategory, ImageField, ResourceOrigin -from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext -from .compel import ConditioningField -from .controlnet_image_processors import ControlField -from .image import ImageOutput -from .model import ModelInfo, UNetField, VaeField -from invokeai.app.util.controlnet_utils import prepare_control_image - -from diffusers.models.attention_processor import ( - AttnProcessor2_0, - LoRAAttnProcessor2_0, - LoRAXFormersAttnProcessor, - XFormersAttnProcessor, -) - DEFAULT_PRECISION = choose_precision(choose_torch_device()) @@ -231,7 +226,6 @@ class TextToLatentsInvocation(BaseInvocation): safety_checker=None, feature_extractor=None, requires_safety_checker=False, - precision="float16" if unet.dtype == torch.float16 else "float32", ) def prep_control_data( diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index eaafc834ed..48182a6be2 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -300,7 +300,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): safety_checker: Optional[StableDiffusionSafetyChecker], feature_extractor: Optional[CLIPFeatureExtractor], requires_safety_checker: bool = False, - precision: str = "float32", control_model: ControlNetModel = None, execution_device: Optional[torch.device] = None, ): From 6487e7d9064217dcc735db208f246d14477beff3 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 5 Aug 2023 21:19:29 -0700 Subject: [PATCH 4/5] =?UTF-8?q?refactor(diffusers=5Fpipeline):=20remove=20?= =?UTF-8?q?unused=20ModelGroup=20=F0=9F=9A=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit orphaned since #3550 removed the LazilyLoadedModelGroup code, probably unused since ModelCache took over responsibility for sequential offload somewhere around #3335. --- invokeai/app/invocations/generate.py | 1 - .../stable_diffusion/diffusers_pipeline.py | 71 +---- .../backend/stable_diffusion/offloading.py | 253 ------------------ 3 files changed, 7 insertions(+), 318 deletions(-) delete mode 100644 invokeai/backend/stable_diffusion/offloading.py diff --git a/invokeai/app/invocations/generate.py b/invokeai/app/invocations/generate.py index f8d240e45c..88a76e930c 100644 --- a/invokeai/app/invocations/generate.py +++ b/invokeai/app/invocations/generate.py @@ -190,7 +190,6 @@ class InpaintInvocation(BaseInvocation): safety_checker=None, feature_extractor=None, requires_safety_checker=False, - execution_device=device, ) yield OldModelInfo( diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 48182a6be2..6891c726dc 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -4,7 +4,6 @@ import dataclasses import inspect import math import secrets -from collections.abc import Sequence from dataclasses import dataclass, field from typing import Any, Callable, Generic, List, Optional, Type, TypeVar, Union @@ -41,7 +40,6 @@ from .diffusion import ( InvokeAIDiffuserComponent, PostprocessingSettings, ) -from .offloading import FullyLoadedModelGroup, ModelGroup from ..util import normalize_device @@ -286,8 +284,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): feature_extractor ([`CLIPFeatureExtractor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ - _model_group: ModelGroup - ID_LENGTH = 8 def __init__( @@ -301,7 +297,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): feature_extractor: Optional[CLIPFeatureExtractor], requires_safety_checker: bool = False, control_model: ControlNetModel = None, - execution_device: Optional[torch.device] = None, ): super().__init__( vae, @@ -326,9 +321,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): # control_model=control_model, ) self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) - - self._model_group = FullyLoadedModelGroup(execution_device or self.unet.device) - self._model_group.install(*self._submodels) self.control_model = control_model def _adjust_memory_efficient_attention(self, latents: torch.Tensor): @@ -364,30 +356,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): else: self.disable_attention_slicing() - def to(self, torch_device: Optional[Union[str, torch.device]] = None, silence_dtype_warnings=False): - # overridden method; types match the superclass. - if torch_device is None: - return self - self._model_group.set_device(torch.device(torch_device)) - self._model_group.ready() - - @property - def device(self) -> torch.device: - return self._model_group.execution_device - - @property - def _submodels(self) -> Sequence[torch.nn.Module]: - module_names, _, _ = self.extract_init_dict(dict(self.config)) - submodels = [] - for name in module_names.keys(): - if hasattr(self, name): - value = getattr(self, name) - else: - value = getattr(self.config, name) - if isinstance(value, torch.nn.Module): - submodels.append(value) - return submodels - def latents_from_embeddings( self, latents: torch.Tensor, @@ -404,7 +372,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): if self.scheduler.config.get("cpu_only", False): scheduler_device = torch.device("cpu") else: - scheduler_device = self._model_group.device_for(self.unet) + scheduler_device = self.unet.device if timesteps is None: self.scheduler.set_timesteps(num_inference_steps, device=scheduler_device) @@ -458,7 +426,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): (batch_size,), timesteps[0], dtype=timesteps.dtype, - device=self._model_group.device_for(self.unet), + device=self.unet.device, ) latents = self.scheduler.add_noise(latents, noise, batched_t) @@ -675,7 +643,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): # 6. Prepare latent variables initial_latents = self.non_noised_latents_from_image( init_image, - device=self._model_group.device_for(self.unet), + device=self.unet.device, dtype=self.unet.dtype, ) if seed is not None: @@ -725,7 +693,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): nsfw_content_detected=[], attention_map_saver=result_attention_maps, ) - return self.check_for_safety(output, dtype=conditioning_data.dtype) + return output def get_img2img_timesteps(self, num_inference_steps: int, strength: float, device=None) -> (torch.Tensor, int): img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) @@ -734,7 +702,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): if self.scheduler.config.get("cpu_only", False): scheduler_device = torch.device("cpu") else: - scheduler_device = self._model_group.device_for(self.unet) + scheduler_device = self.unet.device img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=scheduler_device) timesteps, adjusted_steps = img2img_pipeline.get_timesteps( @@ -760,7 +728,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): noise_func=None, seed=None, ) -> InvokeAIStableDiffusionPipelineOutput: - device = self._model_group.device_for(self.unet) + device = self.unet.device latents_dtype = self.unet.dtype if isinstance(init_image, PIL.Image.Image): @@ -831,42 +799,17 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): nsfw_content_detected=[], attention_map_saver=result_attention_maps, ) - return self.check_for_safety(output, dtype=conditioning_data.dtype) + return output def non_noised_latents_from_image(self, init_image, *, device: torch.device, dtype): init_image = init_image.to(device=device, dtype=dtype) with torch.inference_mode(): - self._model_group.load(self.vae) init_latent_dist = self.vae.encode(init_image).latent_dist init_latents = init_latent_dist.sample().to(dtype=dtype) # FIXME: uses torch.randn. make reproducible! init_latents = 0.18215 * init_latents return init_latents - def check_for_safety(self, output, dtype): - with torch.inference_mode(): - screened_images, has_nsfw_concept = self.run_safety_checker(output.images, dtype=dtype) - screened_attention_map_saver = None - if has_nsfw_concept is None or not has_nsfw_concept: - screened_attention_map_saver = output.attention_map_saver - return InvokeAIStableDiffusionPipelineOutput( - screened_images, - has_nsfw_concept, - # block the attention maps if NSFW content is detected - attention_map_saver=screened_attention_map_saver, - ) - - def run_safety_checker(self, image, device=None, dtype=None): - # overriding to use the model group for device info instead of requiring the caller to know. - if self.safety_checker is not None: - device = self._model_group.device_for(self.safety_checker) - return super().run_safety_checker(image, device, dtype) - - def decode_latents(self, latents): - # Explicit call to get the vae loaded, since `decode` isn't the forward method. - self._model_group.load(self.vae) - return super().decode_latents(latents) - def debug_latents(self, latents, msg): from invokeai.backend.image_util import debug_image diff --git a/invokeai/backend/stable_diffusion/offloading.py b/invokeai/backend/stable_diffusion/offloading.py deleted file mode 100644 index aa2426d514..0000000000 --- a/invokeai/backend/stable_diffusion/offloading.py +++ /dev/null @@ -1,253 +0,0 @@ -from __future__ import annotations - -import warnings -import weakref -from abc import ABCMeta, abstractmethod -from collections.abc import MutableMapping -from typing import Callable, Union - -import torch -from accelerate.utils import send_to_device -from torch.utils.hooks import RemovableHandle - -OFFLOAD_DEVICE = torch.device("cpu") - - -class _NoModel: - """Symbol that indicates no model is loaded. - - (We can't weakref.ref(None), so this was my best idea at the time to come up with something - type-checkable.) - """ - - def __bool__(self): - return False - - def to(self, device: torch.device): - pass - - def __repr__(self): - return "" - - -NO_MODEL = _NoModel() - - -class ModelGroup(metaclass=ABCMeta): - """ - A group of models. - - The use case I had in mind when writing this is the sub-models used by a DiffusionPipeline, - e.g. its text encoder, U-net, VAE, etc. - - Those models are :py:class:`diffusers.ModelMixin`, but "model" is interchangeable with - :py:class:`torch.nn.Module` here. - """ - - def __init__(self, execution_device: torch.device): - self.execution_device = execution_device - - @abstractmethod - def install(self, *models: torch.nn.Module): - """Add models to this group.""" - pass - - @abstractmethod - def uninstall(self, models: torch.nn.Module): - """Remove models from this group.""" - pass - - @abstractmethod - def uninstall_all(self): - """Remove all models from this group.""" - - @abstractmethod - def load(self, model: torch.nn.Module): - """Load this model to the execution device.""" - pass - - @abstractmethod - def offload_current(self): - """Offload the current model(s) from the execution device.""" - pass - - @abstractmethod - def ready(self): - """Ready this group for use.""" - pass - - @abstractmethod - def set_device(self, device: torch.device): - """Change which device models from this group will execute on.""" - pass - - @abstractmethod - def device_for(self, model) -> torch.device: - """Get the device the given model will execute on. - - The model should already be a member of this group. - """ - pass - - @abstractmethod - def __contains__(self, model): - """Check if the model is a member of this group.""" - pass - - def __repr__(self) -> str: - return f"<{self.__class__.__name__} object at {id(self):x}: " f"device={self.execution_device} >" - - -class LazilyLoadedModelGroup(ModelGroup): - """ - Only one model from this group is loaded on the GPU at a time. - - Running the forward method of a model will displace the previously-loaded model, - offloading it to CPU. - - If you call other methods on the model, e.g. ``model.encode(x)`` instead of ``model(x)``, - you will need to explicitly load it with :py:method:`.load(model)`. - - This implementation relies on pytorch forward-pre-hooks, and it will copy forward arguments - to the appropriate execution device, as long as they are positional arguments and not keyword - arguments. (I didn't make the rules; that's the way the pytorch 1.13 API works for hooks.) - """ - - _hooks: MutableMapping[torch.nn.Module, RemovableHandle] - _current_model_ref: Callable[[], Union[torch.nn.Module, _NoModel]] - - def __init__(self, execution_device: torch.device): - super().__init__(execution_device) - self._hooks = weakref.WeakKeyDictionary() - self._current_model_ref = weakref.ref(NO_MODEL) - - def install(self, *models: torch.nn.Module): - for model in models: - self._hooks[model] = model.register_forward_pre_hook(self._pre_hook) - - def uninstall(self, *models: torch.nn.Module): - for model in models: - hook = self._hooks.pop(model) - hook.remove() - if self.is_current_model(model): - # no longer hooked by this object, so don't claim to manage it - self.clear_current_model() - - def uninstall_all(self): - self.uninstall(*self._hooks.keys()) - - def _pre_hook(self, module: torch.nn.Module, forward_input): - self.load(module) - if len(forward_input) == 0: - warnings.warn( - f"Hook for {module.__class__.__name__} got no input. " f"Inputs must be positional, not keywords.", - stacklevel=3, - ) - return send_to_device(forward_input, self.execution_device) - - def load(self, module): - if not self.is_current_model(module): - self.offload_current() - self._load(module) - - def offload_current(self): - module = self._current_model_ref() - if module is not NO_MODEL: - module.to(OFFLOAD_DEVICE) - self.clear_current_model() - - def _load(self, module: torch.nn.Module) -> torch.nn.Module: - assert self.is_empty(), f"A model is already loaded: {self._current_model_ref()}" - module = module.to(self.execution_device) - self.set_current_model(module) - return module - - def is_current_model(self, model: torch.nn.Module) -> bool: - """Is the given model the one currently loaded on the execution device?""" - return self._current_model_ref() is model - - def is_empty(self): - """Are none of this group's models loaded on the execution device?""" - return self._current_model_ref() is NO_MODEL - - def set_current_model(self, value): - self._current_model_ref = weakref.ref(value) - - def clear_current_model(self): - self._current_model_ref = weakref.ref(NO_MODEL) - - def set_device(self, device: torch.device): - if device == self.execution_device: - return - self.execution_device = device - current = self._current_model_ref() - if current is not NO_MODEL: - current.to(device) - - def device_for(self, model): - if model not in self: - raise KeyError(f"This does not manage this model {type(model).__name__}", model) - return self.execution_device # this implementation only dispatches to one device - - def ready(self): - pass # always ready to load on-demand - - def __contains__(self, model): - return model in self._hooks - - def __repr__(self) -> str: - return ( - f"<{self.__class__.__name__} object at {id(self):x}: " - f"current_model={type(self._current_model_ref()).__name__} >" - ) - - -class FullyLoadedModelGroup(ModelGroup): - """ - A group of models without any implicit loading or unloading. - - :py:meth:`.ready` loads _all_ the models to the execution device at once. - """ - - _models: weakref.WeakSet - - def __init__(self, execution_device: torch.device): - super().__init__(execution_device) - self._models = weakref.WeakSet() - - def install(self, *models: torch.nn.Module): - for model in models: - self._models.add(model) - model.to(self.execution_device) - - def uninstall(self, *models: torch.nn.Module): - for model in models: - self._models.remove(model) - - def uninstall_all(self): - self.uninstall(*self._models) - - def load(self, model): - model.to(self.execution_device) - - def offload_current(self): - for model in self._models: - model.to(OFFLOAD_DEVICE) - - def ready(self): - for model in self._models: - self.load(model) - - def set_device(self, device: torch.device): - self.execution_device = device - for model in self._models: - if model.device != OFFLOAD_DEVICE: - model.to(device) - - def device_for(self, model): - if model not in self: - raise KeyError("This does not manage this model f{type(model).__name__}", model) - return self.execution_device # this implementation only dispatches to one device - - def __contains__(self, model): - return model in self._models From 59caff7ff09d1d78e8fe7786d1ca9f4f0c10ab71 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 5 Aug 2023 21:43:55 -0700 Subject: [PATCH 5/5] =?UTF-8?q?refactor(diffusers=5Fpipeline):=20remove=20?= =?UTF-8?q?unused=20img2img=20wrappers=20=F0=9F=9A=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit invokeai.app no longer needs this as a single method, as it builds on latents2latents instead. --- invokeai/backend/generator/img2img.py | 63 +--------------- invokeai/backend/generator/inpaint.py | 8 ++ .../stable_diffusion/diffusers_pipeline.py | 73 ------------------- 3 files changed, 9 insertions(+), 135 deletions(-) diff --git a/invokeai/backend/generator/img2img.py b/invokeai/backend/generator/img2img.py index 5490b2325c..8aaaff5deb 100644 --- a/invokeai/backend/generator/img2img.py +++ b/invokeai/backend/generator/img2img.py @@ -1,25 +1,11 @@ """ invokeai.backend.generator.img2img descends from .generator """ -from typing import Optional -import torch -from accelerate.utils import set_seed -from diffusers import logging - -from ..stable_diffusion import ( - ConditioningData, - PostprocessingSettings, - StableDiffusionGeneratorPipeline, -) from .base import Generator class Img2Img(Generator): - def __init__(self, model, precision): - super().__init__(model, precision) - self.init_latent = None # by get_noise() - def get_make_image( self, sampler, @@ -42,51 +28,4 @@ class Img2Img(Generator): Returns a function returning an image derived from the prompt and the initial image Return value depends on the seed at the time you call it. """ - self.perlin = perlin - - # noinspection PyTypeChecker - pipeline: StableDiffusionGeneratorPipeline = self.model - pipeline.scheduler = sampler - - uc, c, extra_conditioning_info = conditioning - conditioning_data = ConditioningData( - uc, - c, - cfg_scale, - extra_conditioning_info, - postprocessing_settings=PostprocessingSettings( - threshold=threshold, - warmup=warmup, - h_symmetry_time_pct=h_symmetry_time_pct, - v_symmetry_time_pct=v_symmetry_time_pct, - ), - ).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta) - - def make_image(x_T: torch.Tensor, seed: int): - # FIXME: use x_T for initial seeded noise - # We're not at the moment because the pipeline automatically resizes init_image if - # necessary, which the x_T input might not match. - # In the meantime, reset the seed prior to generating pipeline output so we at least get the same result. - logging.set_verbosity_error() # quench safety check warnings - pipeline_output = pipeline.img2img_from_embeddings( - init_image, - strength, - steps, - conditioning_data, - noise_func=self.get_noise_like, - callback=step_callback, - seed=seed, - ) - if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: - attention_maps_callback(pipeline_output.attention_map_saver) - return pipeline.numpy_to_pil(pipeline_output.images)[0] - - return make_image - - def get_noise_like(self, like: torch.Tensor): - device = like.device - x = torch.randn_like(like, device=device) - if self.perlin > 0.0: - shape = like.shape - x = (1 - self.perlin) * x + self.perlin * self.get_perlin_noise(shape[3], shape[2]) - return x + raise NotImplementedError("replaced by invokeai.app.invocations.latent.LatentsToLatentsInvocation") diff --git a/invokeai/backend/generator/inpaint.py b/invokeai/backend/generator/inpaint.py index 7aeb3d4809..494f213d11 100644 --- a/invokeai/backend/generator/inpaint.py +++ b/invokeai/backend/generator/inpaint.py @@ -377,3 +377,11 @@ class Inpaint(Img2Img): ) return corrected_result + + def get_noise_like(self, like: torch.Tensor): + device = like.device + x = torch.randn_like(like, device=device) + if self.perlin > 0.0: + shape = like.shape + x = (1 - self.perlin) * x + self.perlin * self.get_perlin_noise(shape[3], shape[2]) + return x diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 6891c726dc..c2c8165d02 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -622,79 +622,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): **kwargs, ).sample - def img2img_from_embeddings( - self, - init_image: Union[torch.FloatTensor, PIL.Image.Image], - strength: float, - num_inference_steps: int, - conditioning_data: ConditioningData, - *, - callback: Callable[[PipelineIntermediateState], None] = None, - run_id=None, - noise_func=None, - seed=None, - ) -> InvokeAIStableDiffusionPipelineOutput: - if isinstance(init_image, PIL.Image.Image): - init_image = image_resized_to_grid_as_tensor(init_image.convert("RGB")) - - if init_image.dim() == 3: - init_image = einops.rearrange(init_image, "c h w -> 1 c h w") - - # 6. Prepare latent variables - initial_latents = self.non_noised_latents_from_image( - init_image, - device=self.unet.device, - dtype=self.unet.dtype, - ) - if seed is not None: - set_seed(seed) - noise = noise_func(initial_latents) - - return self.img2img_from_latents_and_embeddings( - initial_latents, - num_inference_steps, - conditioning_data, - strength, - noise, - run_id, - callback, - ) - - def img2img_from_latents_and_embeddings( - self, - initial_latents, - num_inference_steps, - conditioning_data: ConditioningData, - strength, - noise: torch.Tensor, - run_id=None, - callback=None, - ) -> InvokeAIStableDiffusionPipelineOutput: - timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength) - result_latents, result_attention_maps = self.latents_from_embeddings( - latents=initial_latents - if strength < 1.0 - else torch.zeros_like(initial_latents, device=initial_latents.device, dtype=initial_latents.dtype), - num_inference_steps=num_inference_steps, - conditioning_data=conditioning_data, - timesteps=timesteps, - noise=noise, - run_id=run_id, - callback=callback, - ) - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - torch.cuda.empty_cache() - - with torch.inference_mode(): - image = self.decode_latents(result_latents) - output = InvokeAIStableDiffusionPipelineOutput( - images=image, - nsfw_content_detected=[], - attention_map_saver=result_attention_maps, - ) - return output - def get_img2img_timesteps(self, num_inference_steps: int, strength: float, device=None) -> (torch.Tensor, int): img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) assert img2img_pipeline.scheduler is self.scheduler