Merge branch 'main' into refactor/rename-get-logger

This commit is contained in:
Lincoln Stein
2023-09-23 14:49:07 -07:00
committed by GitHub
610 changed files with 25078 additions and 7503 deletions

View File

@ -1,5 +1,5 @@
"""
Initialization file for invokeai.backend
"""
from .model_management import ModelManager, ModelCache, BaseModelType, ModelType, SubModelType, ModelInfo # noqa: F401
from .model_management import BaseModelType, ModelCache, ModelInfo, ModelManager, ModelType, SubModelType # noqa: F401
from .model_management.models import SilenceWarnings # noqa: F401

View File

@ -3,12 +3,13 @@ This module defines a singleton object, "invisible_watermark" that
wraps the invisible watermark model. It respects the global "invisible_watermark"
configuration variable, that allows the watermarking to be supressed.
"""
import numpy as np
import cv2
from PIL import Image
import numpy as np
from imwatermark import WatermarkEncoder
from invokeai.app.services.config import InvokeAIAppConfig
from PIL import Image
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
config = InvokeAIAppConfig.get_config()

View File

@ -0,0 +1,46 @@
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Development Team
"""Very simple functions to fetch and print metadata from InvokeAI-generated images."""
import json
import sys
from pathlib import Path
from typing import Any, Dict
from PIL import Image
def get_invokeai_metadata(image_path: Path) -> Dict[str, Any]:
"""
Retrieve "invokeai_metadata" field from png image.
:param image_path: Path to the image to read metadata from.
May raise:
OSError -- image path not found
KeyError -- image doesn't contain the metadata field
"""
image: Image = Image.open(image_path)
return json.loads(image.text["invokeai_metadata"])
def print_invokeai_metadata(image_path: Path):
"""Pretty-print the metadata."""
try:
metadata = get_invokeai_metadata(image_path)
print(f"{image_path}:\n{json.dumps(metadata, sort_keys=True, indent=4)}")
except OSError:
print(f"{image_path}:\nNo file found.")
except KeyError:
print(f"{image_path}:\nNo metadata found.")
print()
def main():
"""Run the command-line utility."""
image_paths = sys.argv[1:]
if not image_paths:
print(f"Usage: {Path(sys.argv[0]).name} image1 image2 image3 ...")
print("\nPretty-print InvokeAI image metadata from the listed png files.")
sys.exit(-1)
for img in image_paths:
print_invokeai_metadata(img)

View File

@ -5,6 +5,7 @@ wraps the actual patchmatch object. It respects the global
be suppressed or deferred
"""
import numpy as np
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig

View File

@ -5,10 +5,11 @@ configuration variable, that allows the checker to be supressed.
"""
import numpy as np
from PIL import Image
from invokeai.backend import SilenceWarnings
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.util.devices import choose_torch_device
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend import SilenceWarnings
from invokeai.backend.util.devices import choose_torch_device
config = InvokeAIAppConfig.get_config()

View File

@ -2,9 +2,8 @@
Check that the invokeai_root is correctly configured and exit if not.
"""
import sys
from invokeai.app.services.config import (
InvokeAIAppConfig,
)
from invokeai.app.services.config import InvokeAIAppConfig
def check_invokeai_root(config: InvokeAIAppConfig):

View File

@ -6,68 +6,56 @@
#
# Coauthor: Kevin Turner http://github.com/keturn
#
import sys
import argparse
import io
import os
import psutil
import shutil
import sys
import textwrap
import torch
import traceback
import yaml
import warnings
from argparse import Namespace
from enum import Enum
from pathlib import Path
from shutil import get_terminal_size
from typing import get_type_hints, get_args, Any
from typing import Any, get_args, get_type_hints
from urllib import request
import npyscreen
import transformers
import omegaconf
import psutil
import torch
import transformers
import yaml
from diffusers import AutoencoderKL
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from huggingface_hub import HfFolder
from huggingface_hub import login as hf_hub_login
from omegaconf import OmegaConf
from pydantic.error_wrappers import ValidationError
from tqdm import tqdm
from transformers import (
CLIPTextModel,
CLIPTextConfig,
CLIPTokenizer,
AutoFeatureExtractor,
BertTokenizerFast,
)
import invokeai.configs as configs
from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextConfig, CLIPTextModel, CLIPTokenizer
from invokeai.app.services.config import (
InvokeAIAppConfig,
)
import invokeai.configs as configs
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.install.legacy_arg_parsing import legacy_parser
from invokeai.backend.install.model_install_backend import InstallSelections, ModelInstall, hf_download_from_pretrained
from invokeai.backend.model_management.model_probe import BaseModelType, ModelType
from invokeai.backend.util.logging import InvokeAILogger
from invokeai.frontend.install.model_install import addModelsForm, process_and_execute
# TO DO - Move all the frontend code into invokeai.frontend.install
from invokeai.frontend.install.widgets import (
SingleSelectColumnsSimple,
MultiSelectColumns,
CenteredButtonPress,
FileBox,
set_min_terminal_size,
CyclingForm,
MIN_COLS,
MIN_LINES,
CenteredButtonPress,
CyclingForm,
FileBox,
MultiSelectColumns,
SingleSelectColumnsSimple,
WindowTooSmallException,
set_min_terminal_size,
)
from invokeai.backend.install.legacy_arg_parsing import legacy_parser
from invokeai.backend.install.model_install_backend import (
hf_download_from_pretrained,
InstallSelections,
ModelInstall,
)
from invokeai.backend.model_management.model_probe import ModelType, BaseModelType
from pydantic.error_wrappers import ValidationError
warnings.filterwarnings("ignore")
transformers.logging.set_verbosity_error()

View File

@ -3,33 +3,26 @@ Migrate the models directory and models.yaml file from an existing
InvokeAI 2.3 installation to 3.0.0.
"""
import os
import argparse
import os
import shutil
import yaml
import transformers
import diffusers
import warnings
from dataclasses import dataclass
from pathlib import Path
from omegaconf import OmegaConf, DictConfig
from typing import Union
from diffusers import StableDiffusionPipeline, AutoencoderKL
import diffusers
import transformers
import yaml
from diffusers import AutoencoderKL, StableDiffusionPipeline
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from transformers import (
CLIPTextModel,
CLIPTokenizer,
AutoFeatureExtractor,
BertTokenizerFast,
)
from omegaconf import DictConfig, OmegaConf
from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextModel, CLIPTokenizer
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.model_management import ModelManager
from invokeai.backend.model_management.model_probe import ModelProbe, ModelType, BaseModelType, ModelProbeInfo
from invokeai.backend.model_management.model_probe import BaseModelType, ModelProbe, ModelProbeInfo, ModelType
warnings.filterwarnings("ignore")
transformers.logging.set_verbosity_error()

View File

@ -7,23 +7,23 @@ import warnings
from dataclasses import dataclass, field
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional, List, Dict, Callable, Union, Set
from typing import Callable, Dict, List, Optional, Set, Union
import requests
import torch
from diffusers import DiffusionPipeline
from diffusers import logging as dlogging
import torch
from huggingface_hub import hf_hub_url, HfFolder, HfApi
from huggingface_hub import HfApi, HfFolder, hf_hub_url
from omegaconf import OmegaConf
from tqdm import tqdm
import invokeai.configs as configs
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.model_management import ModelManager, ModelType, BaseModelType, ModelVariantType, AddModelResult
from invokeai.backend.model_management.model_probe import ModelProbe, SchedulerPredictionType, ModelProbeInfo
from invokeai.backend.model_management import AddModelResult, BaseModelType, ModelManager, ModelType, ModelVariantType
from invokeai.backend.model_management.model_probe import ModelProbe, ModelProbeInfo, SchedulerPredictionType
from invokeai.backend.util import download_with_resume
from invokeai.backend.util.devices import torch_dtype, choose_torch_device
from invokeai.backend.util.devices import choose_torch_device, torch_dtype
from ..util.logging import InvokeAILogger
warnings.filterwarnings("ignore")
@ -326,6 +326,16 @@ class ModelInstall(object):
elif f"learned_embeds.{suffix}" in files:
location = self._download_hf_model(repo_id, [f"learned_embeds.{suffix}"], staging)
break
elif "image_encoder.txt" in files and f"ip_adapter.{suffix}" in files: # IP-Adapter
files = ["image_encoder.txt", f"ip_adapter.{suffix}"]
location = self._download_hf_model(repo_id, files, staging)
break
elif f"model.{suffix}" in files and "config.json" in files:
# This elif-condition is pretty fragile, but it is intended to handle CLIP Vision models hosted
# by InvokeAI for use with IP-Adapters.
files = ["config.json", f"model.{suffix}"]
location = self._download_hf_model(repo_id, files, staging)
break
if not location:
logger.warning(f"Could not determine type of repo {repo_id}. Skipping install.")
return {}
@ -534,14 +544,17 @@ def hf_download_with_resume(
logger.info(f"{model_name}: Downloading...")
try:
with open(model_dest, open_mode) as file, tqdm(
desc=model_name,
initial=exist_size,
total=total + exist_size,
unit="iB",
unit_scale=True,
unit_divisor=1000,
) as bar:
with (
open(model_dest, open_mode) as file,
tqdm(
desc=model_name,
initial=exist_size,
total=total + exist_size,
unit="iB",
unit_scale=True,
unit_divisor=1000,
) as bar,
):
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)

View File

@ -0,0 +1,45 @@
# IP-Adapter Model Formats
The official IP-Adapter models are released here: [h94/IP-Adapter](https://huggingface.co/h94/IP-Adapter)
This official model repo does not integrate well with InvokeAI's current approach to model management, so we have defined a new file structure for IP-Adapter models. The InvokeAI format is described below.
## CLIP Vision Models
CLIP Vision models are organized in `diffusers`` format. The expected directory structure is:
```bash
ip_adapter_sd_image_encoder/
├── config.json
└── model.safetensors
```
## IP-Adapter Models
IP-Adapter models are stored in a directory containing two files
- `image_encoder.txt`: A text file containing the model identifier for the CLIP Vision encoder that is intended to be used with this IP-Adapter model.
- `ip_adapter.bin`: The IP-Adapter weights.
Sample directory structure:
```bash
ip_adapter_sd15/
├── image_encoder.txt
└── ip_adapter.bin
```
### Why save the weights in a .safetensors file?
The weights in `ip_adapter.bin` are stored in a nested dict, which is not supported by `safetensors`. This could be solved by splitting `ip_adapter.bin` into multiple files, but for now we have decided to maintain consistency with the checkpoint structure used in the official [h94/IP-Adapter](https://huggingface.co/h94/IP-Adapter) repo.
## InvokeAI Hosted IP-Adapters
Image Encoders:
- [InvokeAI/ip_adapter_sd_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sd_image_encoder)
- [InvokeAI/ip_adapter_sdxl_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sdxl_image_encoder)
IP-Adapters:
- [InvokeAI/ip_adapter_sd15](https://huggingface.co/InvokeAI/ip_adapter_sd15)
- [InvokeAI/ip_adapter_plus_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_sd15)
- [InvokeAI/ip_adapter_plus_face_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_face_sd15)
- [InvokeAI/ip_adapter_sdxl](https://huggingface.co/InvokeAI/ip_adapter_sdxl)
- [InvokeAI/ip_adapter_sdxl_vit_h](https://huggingface.co/InvokeAI/ip_adapter_sdxl_vit_h)

View File

View File

@ -0,0 +1,162 @@
# copied from https://github.com/tencent-ailab/IP-Adapter (Apache License 2.0)
# and modified as needed
# tencent-ailab comment:
# modified from https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from diffusers.models.attention_processor import AttnProcessor2_0 as DiffusersAttnProcessor2_0
# Create a version of AttnProcessor2_0 that is a sub-class of nn.Module. This is required for IP-Adapter state_dict
# loading.
class AttnProcessor2_0(DiffusersAttnProcessor2_0, nn.Module):
def __init__(self):
DiffusersAttnProcessor2_0.__init__(self)
nn.Module.__init__(self)
def __call__(
self,
attn,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
temb=None,
ip_adapter_image_prompt_embeds=None,
):
"""Re-definition of DiffusersAttnProcessor2_0.__call__(...) that accepts and ignores the
ip_adapter_image_prompt_embeds parameter.
"""
return DiffusersAttnProcessor2_0.__call__(
self, attn, hidden_states, encoder_hidden_states, attention_mask, temb
)
class IPAttnProcessor2_0(torch.nn.Module):
r"""
Attention processor for IP-Adapater for PyTorch 2.0.
Args:
hidden_size (`int`):
The hidden size of the attention layer.
cross_attention_dim (`int`):
The number of channels in the `encoder_hidden_states`.
scale (`float`, defaults to 1.0):
the weight scale of image prompt.
"""
def __init__(self, hidden_size, cross_attention_dim=None, scale=1.0):
super().__init__()
if not hasattr(F, "scaled_dot_product_attention"):
raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
self.hidden_size = hidden_size
self.cross_attention_dim = cross_attention_dim
self.scale = scale
self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
def __call__(
self,
attn,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
temb=None,
ip_adapter_image_prompt_embeds=None,
):
if encoder_hidden_states is not None:
# If encoder_hidden_states is not None, then we are doing cross-attention, not self-attention. In this case,
# we will apply IP-Adapter conditioning. We validate the inputs for IP-Adapter conditioning here.
assert ip_adapter_image_prompt_embeds is not None
# The batch dimensions should match.
assert ip_adapter_image_prompt_embeds.shape[0] == encoder_hidden_states.shape[0]
# The channel dimensions should match.
assert ip_adapter_image_prompt_embeds.shape[2] == encoder_hidden_states.shape[2]
ip_hidden_states = ip_adapter_image_prompt_embeds
residual = hidden_states
if attn.spatial_norm is not None:
hidden_states = attn.spatial_norm(hidden_states, temb)
input_ndim = hidden_states.ndim
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
batch_size, sequence_length, _ = (
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
)
if attention_mask is not None:
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
# scaled_dot_product_attention expects attention_mask shape to be
# (batch, heads, source_length, target_length)
attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
query = attn.to_q(hidden_states)
if encoder_hidden_states is None:
encoder_hidden_states = hidden_states
elif attn.norm_cross:
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
key = attn.to_k(encoder_hidden_states)
value = attn.to_v(encoder_hidden_states)
inner_dim = key.shape[-1]
head_dim = inner_dim // attn.heads
query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
# the output of sdp = (batch, num_heads, seq_len, head_dim)
# TODO: add support for attn.scale when we move to Torch 2.1
hidden_states = F.scaled_dot_product_attention(
query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
)
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
hidden_states = hidden_states.to(query.dtype)
if ip_hidden_states is not None:
ip_key = self.to_k_ip(ip_hidden_states)
ip_value = self.to_v_ip(ip_hidden_states)
ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
# the output of sdp = (batch, num_heads, seq_len, head_dim)
# TODO: add support for attn.scale when we move to Torch 2.1
ip_hidden_states = F.scaled_dot_product_attention(
query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
)
ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
ip_hidden_states = ip_hidden_states.to(query.dtype)
hidden_states = hidden_states + self.scale * ip_hidden_states
# linear proj
hidden_states = attn.to_out[0](hidden_states)
# dropout
hidden_states = attn.to_out[1](hidden_states)
if input_ndim == 4:
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
if attn.residual_connection:
hidden_states = hidden_states + residual
hidden_states = hidden_states / attn.rescale_output_factor
return hidden_states

View File

@ -0,0 +1,217 @@
# copied from https://github.com/tencent-ailab/IP-Adapter (Apache License 2.0)
# and modified as needed
from contextlib import contextmanager
from typing import Optional, Union
import torch
from diffusers.models import UNet2DConditionModel
from PIL import Image
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
from .attention_processor import AttnProcessor2_0, IPAttnProcessor2_0
from .resampler import Resampler
class ImageProjModel(torch.nn.Module):
"""Image Projection Model"""
def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024, clip_extra_context_tokens=4):
super().__init__()
self.cross_attention_dim = cross_attention_dim
self.clip_extra_context_tokens = clip_extra_context_tokens
self.proj = torch.nn.Linear(clip_embeddings_dim, self.clip_extra_context_tokens * cross_attention_dim)
self.norm = torch.nn.LayerNorm(cross_attention_dim)
@classmethod
def from_state_dict(cls, state_dict: dict[torch.Tensor], clip_extra_context_tokens=4):
"""Initialize an ImageProjModel from a state_dict.
The cross_attention_dim and clip_embeddings_dim are inferred from the shape of the tensors in the state_dict.
Args:
state_dict (dict[torch.Tensor]): The state_dict of model weights.
clip_extra_context_tokens (int, optional): Defaults to 4.
Returns:
ImageProjModel
"""
cross_attention_dim = state_dict["norm.weight"].shape[0]
clip_embeddings_dim = state_dict["proj.weight"].shape[-1]
model = cls(cross_attention_dim, clip_embeddings_dim, clip_extra_context_tokens)
model.load_state_dict(state_dict)
return model
def forward(self, image_embeds):
embeds = image_embeds
clip_extra_context_tokens = self.proj(embeds).reshape(
-1, self.clip_extra_context_tokens, self.cross_attention_dim
)
clip_extra_context_tokens = self.norm(clip_extra_context_tokens)
return clip_extra_context_tokens
class IPAdapter:
"""IP-Adapter: https://arxiv.org/pdf/2308.06721.pdf"""
def __init__(
self,
state_dict: dict[torch.Tensor],
device: torch.device,
dtype: torch.dtype = torch.float16,
num_tokens: int = 4,
):
self.device = device
self.dtype = dtype
self._num_tokens = num_tokens
self._clip_image_processor = CLIPImageProcessor()
self._state_dict = state_dict
self._image_proj_model = self._init_image_proj_model(self._state_dict["image_proj"])
# The _attn_processors will be initialized later when we have access to the UNet.
self._attn_processors = None
def to(self, device: torch.device, dtype: Optional[torch.dtype] = None):
self.device = device
if dtype is not None:
self.dtype = dtype
self._image_proj_model.to(device=self.device, dtype=self.dtype)
if self._attn_processors is not None:
torch.nn.ModuleList(self._attn_processors.values()).to(device=self.device, dtype=self.dtype)
def _init_image_proj_model(self, state_dict):
return ImageProjModel.from_state_dict(state_dict, self._num_tokens).to(self.device, dtype=self.dtype)
def _prepare_attention_processors(self, unet: UNet2DConditionModel):
"""Prepare a dict of attention processors that can later be injected into a unet, and load the IP-Adapter
attention weights into them.
Note that the `unet` param is only used to determine attention block dimensions and naming.
TODO(ryand): As a future improvement, this could all be inferred from the state_dict when the IPAdapter is
intialized.
"""
attn_procs = {}
for name in unet.attn_processors.keys():
cross_attention_dim = None if name.endswith("attn1.processor") else unet.config.cross_attention_dim
if name.startswith("mid_block"):
hidden_size = unet.config.block_out_channels[-1]
elif name.startswith("up_blocks"):
block_id = int(name[len("up_blocks.")])
hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
elif name.startswith("down_blocks"):
block_id = int(name[len("down_blocks.")])
hidden_size = unet.config.block_out_channels[block_id]
if cross_attention_dim is None:
attn_procs[name] = AttnProcessor2_0()
else:
attn_procs[name] = IPAttnProcessor2_0(
hidden_size=hidden_size,
cross_attention_dim=cross_attention_dim,
scale=1.0,
).to(self.device, dtype=self.dtype)
ip_layers = torch.nn.ModuleList(attn_procs.values())
ip_layers.load_state_dict(self._state_dict["ip_adapter"])
self._attn_processors = attn_procs
self._state_dict = None
# @genomancer: pushed scaling back out into its own method (like original Tencent implementation)
# which makes implementing begin_step_percent and end_step_percent easier
# but based on self._attn_processors (ala @Ryan) instead of original Tencent unet.attn_processors,
# which should make it easier to implement multiple IPAdapters
def set_scale(self, scale):
if self._attn_processors is not None:
for attn_processor in self._attn_processors.values():
if isinstance(attn_processor, IPAttnProcessor2_0):
attn_processor.scale = scale
@contextmanager
def apply_ip_adapter_attention(self, unet: UNet2DConditionModel, scale: float):
"""A context manager that patches `unet` with this IP-Adapter's attention processors while it is active.
Yields:
None
"""
if self._attn_processors is None:
# We only have to call _prepare_attention_processors(...) once, and then the result is cached and can be
# used on any UNet model (with the same dimensions).
self._prepare_attention_processors(unet)
# Set scale
self.set_scale(scale)
# for attn_processor in self._attn_processors.values():
# if isinstance(attn_processor, IPAttnProcessor2_0):
# attn_processor.scale = scale
orig_attn_processors = unet.attn_processors
# Make a (moderately-) shallow copy of the self._attn_processors dict, because unet.set_attn_processor(...)
# actually pops elements from the passed dict.
ip_adapter_attn_processors = {k: v for k, v in self._attn_processors.items()}
try:
unet.set_attn_processor(ip_adapter_attn_processors)
yield None
finally:
unet.set_attn_processor(orig_attn_processors)
@torch.inference_mode()
def get_image_embeds(self, pil_image, image_encoder: CLIPVisionModelWithProjection):
if isinstance(pil_image, Image.Image):
pil_image = [pil_image]
clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
clip_image_embeds = image_encoder(clip_image.to(self.device, dtype=self.dtype)).image_embeds
image_prompt_embeds = self._image_proj_model(clip_image_embeds)
uncond_image_prompt_embeds = self._image_proj_model(torch.zeros_like(clip_image_embeds))
return image_prompt_embeds, uncond_image_prompt_embeds
class IPAdapterPlus(IPAdapter):
"""IP-Adapter with fine-grained features"""
def _init_image_proj_model(self, state_dict):
return Resampler.from_state_dict(
state_dict=state_dict,
depth=4,
dim_head=64,
heads=12,
num_queries=self._num_tokens,
ff_mult=4,
).to(self.device, dtype=self.dtype)
@torch.inference_mode()
def get_image_embeds(self, pil_image, image_encoder: CLIPVisionModelWithProjection):
if isinstance(pil_image, Image.Image):
pil_image = [pil_image]
clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
clip_image = clip_image.to(self.device, dtype=self.dtype)
clip_image_embeds = image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
image_prompt_embeds = self._image_proj_model(clip_image_embeds)
uncond_clip_image_embeds = image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[
-2
]
uncond_image_prompt_embeds = self._image_proj_model(uncond_clip_image_embeds)
return image_prompt_embeds, uncond_image_prompt_embeds
def build_ip_adapter(
ip_adapter_ckpt_path: str, device: torch.device, dtype: torch.dtype = torch.float16
) -> Union[IPAdapter, IPAdapterPlus]:
state_dict = torch.load(ip_adapter_ckpt_path, map_location="cpu")
# Determine if the state_dict is from an IPAdapter or IPAdapterPlus based on the image_proj weights that it
# contains.
is_plus = "proj.weight" not in state_dict["image_proj"]
if is_plus:
return IPAdapterPlus(state_dict, device=device, dtype=dtype)
else:
return IPAdapter(state_dict, device=device, dtype=dtype)

View File

@ -0,0 +1,158 @@
# copied from https://github.com/tencent-ailab/IP-Adapter (Apache License 2.0)
# tencent ailab comment: modified from
# https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py
import math
import torch
import torch.nn as nn
# FFN
def FeedForward(dim, mult=4):
inner_dim = int(dim * mult)
return nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, inner_dim, bias=False),
nn.GELU(),
nn.Linear(inner_dim, dim, bias=False),
)
def reshape_tensor(x, heads):
bs, length, width = x.shape
# (bs, length, width) --> (bs, length, n_heads, dim_per_head)
x = x.view(bs, length, heads, -1)
# (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head)
x = x.transpose(1, 2)
# (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head)
x = x.reshape(bs, heads, length, -1)
return x
class PerceiverAttention(nn.Module):
def __init__(self, *, dim, dim_head=64, heads=8):
super().__init__()
self.scale = dim_head**-0.5
self.dim_head = dim_head
self.heads = heads
inner_dim = dim_head * heads
self.norm1 = nn.LayerNorm(dim)
self.norm2 = nn.LayerNorm(dim)
self.to_q = nn.Linear(dim, inner_dim, bias=False)
self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
self.to_out = nn.Linear(inner_dim, dim, bias=False)
def forward(self, x, latents):
"""
Args:
x (torch.Tensor): image features
shape (b, n1, D)
latent (torch.Tensor): latent features
shape (b, n2, D)
"""
x = self.norm1(x)
latents = self.norm2(latents)
b, l, _ = latents.shape
q = self.to_q(latents)
kv_input = torch.cat((x, latents), dim=-2)
k, v = self.to_kv(kv_input).chunk(2, dim=-1)
q = reshape_tensor(q, self.heads)
k = reshape_tensor(k, self.heads)
v = reshape_tensor(v, self.heads)
# attention
scale = 1 / math.sqrt(math.sqrt(self.dim_head))
weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards
weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
out = weight @ v
out = out.permute(0, 2, 1, 3).reshape(b, l, -1)
return self.to_out(out)
class Resampler(nn.Module):
def __init__(
self,
dim=1024,
depth=8,
dim_head=64,
heads=16,
num_queries=8,
embedding_dim=768,
output_dim=1024,
ff_mult=4,
):
super().__init__()
self.latents = nn.Parameter(torch.randn(1, num_queries, dim) / dim**0.5)
self.proj_in = nn.Linear(embedding_dim, dim)
self.proj_out = nn.Linear(dim, output_dim)
self.norm_out = nn.LayerNorm(output_dim)
self.layers = nn.ModuleList([])
for _ in range(depth):
self.layers.append(
nn.ModuleList(
[
PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads),
FeedForward(dim=dim, mult=ff_mult),
]
)
)
@classmethod
def from_state_dict(cls, state_dict: dict[torch.Tensor], depth=8, dim_head=64, heads=16, num_queries=8, ff_mult=4):
"""A convenience function that initializes a Resampler from a state_dict.
Some of the shape parameters are inferred from the state_dict (e.g. dim, embedding_dim, etc.). At the time of
writing, we did not have a need for inferring ALL of the shape parameters from the state_dict, but this would be
possible if needed in the future.
Args:
state_dict (dict[torch.Tensor]): The state_dict to load.
depth (int, optional):
dim_head (int, optional):
heads (int, optional):
ff_mult (int, optional):
Returns:
Resampler
"""
dim = state_dict["latents"].shape[2]
num_queries = state_dict["latents"].shape[1]
embedding_dim = state_dict["proj_in.weight"].shape[-1]
output_dim = state_dict["norm_out.weight"].shape[0]
model = cls(
dim=dim,
depth=depth,
dim_head=dim_head,
heads=heads,
num_queries=num_queries,
embedding_dim=embedding_dim,
output_dim=output_dim,
ff_mult=ff_mult,
)
model.load_state_dict(state_dict)
return model
def forward(self, x):
latents = self.latents.repeat(x.size(0), 1, 1)
x = self.proj_in(x)
for attn, ff in self.layers:
latents = attn(x, latents) + latents
latents = ff(latents) + latents
latents = self.proj_out(latents)
return self.norm_out(latents)

View File

@ -1,15 +1,19 @@
"""
Initialization file for invokeai.backend.model_management
"""
from .model_manager import ModelManager, ModelInfo, AddModelResult, SchedulerPredictionType # noqa: F401
from .model_cache import ModelCache # noqa: F401
# This import must be first
from .model_manager import ModelManager, ModelInfo, AddModelResult, SchedulerPredictionType # noqa: F401 isort: split
from .lora import ModelPatcher, ONNXModelPatcher # noqa: F401
from .model_cache import ModelCache # noqa: F401
from .models import ( # noqa: F401
BaseModelType,
ModelType,
SubModelType,
ModelVariantType,
ModelNotFoundException,
DuplicateModelException,
ModelNotFoundException,
ModelType,
ModelVariantType,
SubModelType,
)
from .model_merge import ModelMerger, MergeInterpolationMethod # noqa: F401
# This import must be last
from .model_merge import ModelMerger, MergeInterpolationMethod # noqa: F401 isort: split

View File

@ -25,12 +25,7 @@ from typing import Optional, Union
import requests
import torch
from diffusers.models import (
AutoencoderKL,
ControlNetModel,
PriorTransformer,
UNet2DConditionModel,
)
from diffusers.models import AutoencoderKL, ControlNetModel, PriorTransformer, UNet2DConditionModel
from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
@ -64,6 +59,7 @@ from transformers import (
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.util.logging import InvokeAILogger
from .models import BaseModelType, ModelVariantType
try:
@ -1203,8 +1199,8 @@ def download_from_original_stable_diffusion_ckpt(
StableDiffusionControlNetPipeline,
StableDiffusionInpaintPipeline,
StableDiffusionPipeline,
StableDiffusionXLPipeline,
StableDiffusionXLImg2ImgPipeline,
StableDiffusionXLPipeline,
StableUnCLIPImg2ImgPipeline,
StableUnCLIPPipeline,
)

View File

@ -2,8 +2,8 @@ from __future__ import annotations
import copy
from contextlib import contextmanager
from typing import Optional, Dict, Tuple, Any, Union, List
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
import torch
@ -14,7 +14,6 @@ from transformers import CLIPTextModel, CLIPTokenizer
from .models.lora import LoRAModel
"""
loras = [
(lora_model1, 0.7),
@ -307,9 +306,10 @@ class TextualInversionManager(BaseTextualInversionManager):
class ONNXModelPatcher:
from .models.base import IAIOnnxRuntimeModel
from diffusers import OnnxRuntimeModel
from .models.base import IAIOnnxRuntimeModel
@classmethod
@contextmanager
def apply_lora_unet(

View File

@ -17,18 +17,23 @@ context. Use like this:
"""
import gc
import hashlib
import os
import sys
import hashlib
from contextlib import suppress
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, Union, types, Optional, Type, Any
from typing import Any, Dict, Optional, Type, Union, types
import torch
import invokeai.backend.util.logging as logger
from .models import BaseModelType, ModelType, SubModelType, ModelBase
from ..util.devices import choose_torch_device
from .models import BaseModelType, ModelBase, ModelType, SubModelType
if choose_torch_device() == torch.device("mps"):
from torch import mps
# Maximum size of the cache, in gigs
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
@ -405,6 +410,8 @@ class ModelCache(object):
gc.collect()
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}")
@ -425,6 +432,8 @@ class ModelCache(object):
gc.collect()
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
def _local_model_hash(self, model_path: Union[str, Path]) -> str:
sha = hashlib.sha256()

View File

@ -25,6 +25,7 @@ Models are described using four attributes:
ModelType.Lora -- a LoRA or LyCORIS fine-tune
ModelType.TextualInversion -- a textual inversion embedding
ModelType.ControlNet -- a ControlNet model
ModelType.IPAdapter -- an IPAdapter model
3) BaseModelType -- an enum indicating the stable diffusion base model, one of:
BaseModelType.StableDiffusion1
@ -234,8 +235,8 @@ import textwrap
import types
from dataclasses import dataclass
from pathlib import Path
from shutil import rmtree, move
from typing import Optional, List, Literal, Tuple, Union, Dict, Set, Callable
from shutil import move, rmtree
from typing import Callable, Dict, List, Literal, Optional, Set, Tuple, Union
import torch
import yaml
@ -246,20 +247,21 @@ from pydantic import BaseModel, Field
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.util import CUDA_DEVICE, Chdir
from .model_cache import ModelCache, ModelLocker
from .model_search import ModelSearch
from .models import (
BaseModelType,
ModelType,
SubModelType,
ModelError,
SchedulerPredictionType,
MODEL_CLASSES,
ModelConfigBase,
ModelNotFoundException,
InvalidModelException,
BaseModelType,
DuplicateModelException,
InvalidModelException,
ModelBase,
ModelConfigBase,
ModelError,
ModelNotFoundException,
ModelType,
SchedulerPredictionType,
SubModelType,
)
# We are only starting to number the config file with release 3.
@ -999,8 +1001,8 @@ class ModelManager(object):
new_models_found = True
except DuplicateModelException as e:
self.logger.warning(e)
except InvalidModelException:
self.logger.warning(f"Not a valid model: {model_path}")
except InvalidModelException as e:
self.logger.warning(f"Not a valid model: {model_path}. {e}")
except NotImplementedError as e:
self.logger.warning(e)

View File

@ -9,13 +9,14 @@ Copyright (c) 2023 Lincoln Stein and the InvokeAI Development Team
import warnings
from enum import Enum
from pathlib import Path
from typing import List, Optional, Union
from diffusers import DiffusionPipeline
from diffusers import logging as dlogging
from typing import List, Union, Optional
import invokeai.backend.util.logging as logger
from ...backend.model_management import ModelManager, ModelType, BaseModelType, ModelVariantType, AddModelResult
from ...backend.model_management import AddModelResult, BaseModelType, ModelManager, ModelType, ModelVariantType
class MergeInterpolationMethod(str, Enum):

View File

@ -1,24 +1,26 @@
import json
import torch
import safetensors.torch
import re
from dataclasses import dataclass
from diffusers import ModelMixin, ConfigMixin
from pathlib import Path
from typing import Callable, Literal, Union, Dict, Optional
from typing import Callable, Dict, Literal, Optional, Union
import safetensors.torch
import torch
from diffusers import ConfigMixin, ModelMixin
from picklescan.scanner import scan_file_path
from invokeai.backend.model_management.models.ip_adapter import IPAdapterModelFormat
from .models import (
BaseModelType,
InvalidModelException,
ModelType,
ModelVariantType,
SchedulerPredictionType,
SilenceWarnings,
InvalidModelException,
)
from .util import lora_token_vector_length
from .models.base import read_checkpoint_meta
from .util import lora_token_vector_length
@dataclass
@ -52,7 +54,9 @@ class ModelProbe(object):
"StableDiffusionXLImg2ImgPipeline": ModelType.Main,
"StableDiffusionXLInpaintPipeline": ModelType.Main,
"AutoencoderKL": ModelType.Vae,
"AutoencoderTiny": ModelType.Vae,
"ControlNetModel": ModelType.ControlNet,
"CLIPVisionModelWithProjection": ModelType.CLIPVision,
}
@classmethod
@ -119,14 +123,18 @@ class ModelProbe(object):
and prediction_type == SchedulerPredictionType.VPrediction
),
format=format,
image_size=1024
if (base_type in {BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner})
else 768
if (
base_type == BaseModelType.StableDiffusion2
and prediction_type == SchedulerPredictionType.VPrediction
)
else 512,
image_size=(
1024
if (base_type in {BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner})
else (
768
if (
base_type == BaseModelType.StableDiffusion2
and prediction_type == SchedulerPredictionType.VPrediction
)
else 512
)
),
)
except Exception:
raise
@ -171,6 +179,7 @@ class ModelProbe(object):
Get the model type of a hugging-face style folder.
"""
class_name = None
error_hint = None
if model:
class_name = model.__class__.__name__
else:
@ -178,9 +187,10 @@ class ModelProbe(object):
return ModelType.ONNX
if (folder_path / "learned_embeds.bin").exists():
return ModelType.TextualInversion
if (folder_path / "pytorch_lora_weights.bin").exists():
return ModelType.Lora
if (folder_path / "image_encoder.txt").exists():
return ModelType.IPAdapter
i = folder_path / "model_index.json"
c = folder_path / "config.json"
@ -189,13 +199,24 @@ class ModelProbe(object):
if config_path:
with open(config_path, "r") as file:
conf = json.load(file)
class_name = conf["_class_name"]
if "_class_name" in conf:
class_name = conf["_class_name"]
elif "architectures" in conf:
class_name = conf["architectures"][0]
else:
class_name = None
else:
error_hint = f"No model_index.json or config.json found in {folder_path}."
if class_name and (type := cls.CLASS2TYPE.get(class_name)):
return type
else:
error_hint = f"class {class_name} is not one of the supported classes [{', '.join(cls.CLASS2TYPE.keys())}]"
# give up
raise InvalidModelException(f"Unable to determine model type for {folder_path}")
raise InvalidModelException(
f"Unable to determine model type for {folder_path}" + (f"; {error_hint}" if error_hint else "")
)
@classmethod
def _scan_and_load_checkpoint(cls, model_path: Path) -> dict:
@ -367,6 +388,16 @@ class ControlNetCheckpointProbe(CheckpointProbeBase):
raise InvalidModelException("Unable to determine base type for {self.checkpoint_path}")
class IPAdapterCheckpointProbe(CheckpointProbeBase):
def get_base_type(self) -> BaseModelType:
raise NotImplementedError()
class CLIPVisionCheckpointProbe(CheckpointProbeBase):
def get_base_type(self) -> BaseModelType:
raise NotImplementedError()
########################################################
# classes for probing folders
#######################################################
@ -439,16 +470,32 @@ class PipelineFolderProbe(FolderProbeBase):
class VaeFolderProbe(FolderProbeBase):
def get_base_type(self) -> BaseModelType:
if self._config_looks_like_sdxl():
return BaseModelType.StableDiffusionXL
elif self._name_looks_like_sdxl():
# but SD and SDXL VAE are the same shape (3-channel RGB to 4-channel float scaled down
# by a factor of 8), we can't necessarily tell them apart by config hyperparameters.
return BaseModelType.StableDiffusionXL
else:
return BaseModelType.StableDiffusion1
def _config_looks_like_sdxl(self) -> bool:
# config values that distinguish Stability's SD 1.x VAE from their SDXL VAE.
config_file = self.folder_path / "config.json"
if not config_file.exists():
raise InvalidModelException(f"Cannot determine base type for {self.folder_path}")
with open(config_file, "r") as file:
config = json.load(file)
return (
BaseModelType.StableDiffusionXL
if config.get("scaling_factor", 0) == 0.13025 and config.get("sample_size") in [512, 1024]
else BaseModelType.StableDiffusion1
)
return config.get("scaling_factor", 0) == 0.13025 and config.get("sample_size") in [512, 1024]
def _name_looks_like_sdxl(self) -> bool:
return bool(re.search(r"xl\b", self._guess_name(), re.IGNORECASE))
def _guess_name(self) -> str:
name = self.folder_path.name
if name == "vae":
name = self.folder_path.parent.name
return name
class TextualInversionFolderProbe(FolderProbeBase):
@ -486,11 +533,13 @@ class ControlNetFolderProbe(FolderProbeBase):
base_model = (
BaseModelType.StableDiffusion1
if dimension == 768
else BaseModelType.StableDiffusion2
if dimension == 1024
else BaseModelType.StableDiffusionXL
if dimension == 2048
else None
else (
BaseModelType.StableDiffusion2
if dimension == 1024
else BaseModelType.StableDiffusionXL
if dimension == 2048
else None
)
)
if not base_model:
raise InvalidModelException(f"Unable to determine model base for {self.folder_path}")
@ -510,15 +559,47 @@ class LoRAFolderProbe(FolderProbeBase):
return LoRACheckpointProbe(model_file, None).get_base_type()
class IPAdapterFolderProbe(FolderProbeBase):
def get_format(self) -> str:
return IPAdapterModelFormat.InvokeAI.value
def get_base_type(self) -> BaseModelType:
model_file = self.folder_path / "ip_adapter.bin"
if not model_file.exists():
raise InvalidModelException("Unknown IP-Adapter model format.")
state_dict = torch.load(model_file, map_location="cpu")
cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1]
if cross_attention_dim == 768:
return BaseModelType.StableDiffusion1
elif cross_attention_dim == 1024:
return BaseModelType.StableDiffusion2
elif cross_attention_dim == 2048:
return BaseModelType.StableDiffusionXL
else:
raise InvalidModelException(f"IP-Adapter had unexpected cross-attention dimension: {cross_attention_dim}.")
class CLIPVisionFolderProbe(FolderProbeBase):
def get_base_type(self) -> BaseModelType:
return BaseModelType.Any
############## register probe classes ######
ModelProbe.register_probe("diffusers", ModelType.Main, PipelineFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.Vae, VaeFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.Lora, LoRAFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.TextualInversion, TextualInversionFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.ControlNet, ControlNetFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.IPAdapter, IPAdapterFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.CLIPVision, CLIPVisionFolderProbe)
ModelProbe.register_probe("checkpoint", ModelType.Main, PipelineCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.Vae, VaeCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.Lora, LoRACheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.TextualInversion, TextualInversionCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.ControlNet, ControlNetCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.IPAdapter, IPAdapterCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.CLIPVision, CLIPVisionCheckpointProbe)
ModelProbe.register_probe("onnx", ModelType.ONNX, ONNXFolderProbe)

View File

@ -5,8 +5,8 @@ Abstract base class for recursive directory search for models.
import os
from abc import ABC, abstractmethod
from typing import List, Set, types
from pathlib import Path
from typing import List, Set, types
import invokeai.backend.util.logging as logger
@ -79,7 +79,7 @@ class ModelSearch(ABC):
self._models_found += 1
self._scanned_dirs.add(path)
except Exception as e:
self.logger.warning(str(e))
self.logger.warning(f"Failed to process '{path}': {e}")
for f in files:
path = Path(root) / f
@ -90,7 +90,7 @@ class ModelSearch(ABC):
self.on_model_found(path)
self._models_found += 1
except Exception as e:
self.logger.warning(str(e))
self.logger.warning(f"Failed to process '{path}': {e}")
class FindModels(ModelSearch):

View File

@ -1,29 +1,32 @@
import inspect
from enum import Enum
from pydantic import BaseModel
from typing import Literal, get_origin
from pydantic import BaseModel
from .base import ( # noqa: F401
BaseModelType,
ModelType,
SubModelType,
DuplicateModelException,
InvalidModelException,
ModelBase,
ModelConfigBase,
ModelError,
ModelNotFoundException,
ModelType,
ModelVariantType,
SchedulerPredictionType,
ModelError,
SilenceWarnings,
ModelNotFoundException,
InvalidModelException,
DuplicateModelException,
SubModelType,
)
from .stable_diffusion import StableDiffusion1Model, StableDiffusion2Model
from .sdxl import StableDiffusionXLModel
from .vae import VaeModel
from .lora import LoRAModel
from .clip_vision import CLIPVisionModel
from .controlnet import ControlNetModel # TODO:
from .textual_inversion import TextualInversionModel
from .ip_adapter import IPAdapterModel
from .lora import LoRAModel
from .sdxl import StableDiffusionXLModel
from .stable_diffusion import StableDiffusion1Model, StableDiffusion2Model
from .stable_diffusion_onnx import ONNXStableDiffusion1Model, ONNXStableDiffusion2Model
from .textual_inversion import TextualInversionModel
from .vae import VaeModel
MODEL_CLASSES = {
BaseModelType.StableDiffusion1: {
@ -33,6 +36,8 @@ MODEL_CLASSES = {
ModelType.Lora: LoRAModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
ModelType.IPAdapter: IPAdapterModel,
ModelType.CLIPVision: CLIPVisionModel,
},
BaseModelType.StableDiffusion2: {
ModelType.ONNX: ONNXStableDiffusion2Model,
@ -41,6 +46,8 @@ MODEL_CLASSES = {
ModelType.Lora: LoRAModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
ModelType.IPAdapter: IPAdapterModel,
ModelType.CLIPVision: CLIPVisionModel,
},
BaseModelType.StableDiffusionXL: {
ModelType.Main: StableDiffusionXLModel,
@ -50,6 +57,8 @@ MODEL_CLASSES = {
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
ModelType.ONNX: ONNXStableDiffusion2Model,
ModelType.IPAdapter: IPAdapterModel,
ModelType.CLIPVision: CLIPVisionModel,
},
BaseModelType.StableDiffusionXLRefiner: {
ModelType.Main: StableDiffusionXLModel,
@ -59,6 +68,19 @@ MODEL_CLASSES = {
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
ModelType.ONNX: ONNXStableDiffusion2Model,
ModelType.IPAdapter: IPAdapterModel,
ModelType.CLIPVision: CLIPVisionModel,
},
BaseModelType.Any: {
ModelType.CLIPVision: CLIPVisionModel,
# The following model types are not expected to be used with BaseModelType.Any.
ModelType.ONNX: ONNXStableDiffusion2Model,
ModelType.Main: StableDiffusion2Model,
ModelType.Vae: VaeModel,
ModelType.Lora: LoRAModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
ModelType.IPAdapter: IPAdapterModel,
},
# BaseModelType.Kandinsky2_1: {
# ModelType.Main: Kandinsky2_1Model,

View File

@ -1,29 +1,25 @@
import inspect
import json
import os
import sys
import typing
import inspect
import warnings
from abc import ABCMeta, abstractmethod
from contextlib import suppress
from enum import Enum
from pathlib import Path
from picklescan.scanner import scan_file_path
from typing import Any, Callable, Dict, Generic, List, Literal, Optional, Type, TypeVar, Union
import torch
import numpy as np
import onnx
import safetensors.torch
from diffusers import DiffusionPipeline, ConfigMixin
from onnx import numpy_helper
from onnxruntime import (
InferenceSession,
SessionOptions,
get_available_providers,
)
from pydantic import BaseModel, Field
from typing import List, Dict, Optional, Type, Literal, TypeVar, Generic, Callable, Any, Union
import torch
from diffusers import ConfigMixin, DiffusionPipeline
from diffusers import logging as diffusers_logging
from onnx import numpy_helper
from onnxruntime import InferenceSession, SessionOptions, get_available_providers
from picklescan.scanner import scan_file_path
from pydantic import BaseModel, Field
from transformers import logging as transformers_logging
@ -40,6 +36,7 @@ class ModelNotFoundException(Exception):
class BaseModelType(str, Enum):
Any = "any" # For models that are not associated with any particular base model.
StableDiffusion1 = "sd-1"
StableDiffusion2 = "sd-2"
StableDiffusionXL = "sdxl"
@ -54,6 +51,8 @@ class ModelType(str, Enum):
Lora = "lora"
ControlNet = "controlnet" # used by model_probe
TextualInversion = "embedding"
IPAdapter = "ip_adapter"
CLIPVision = "clip_vision"
class SubModelType(str, Enum):

View File

@ -0,0 +1,82 @@
import os
from enum import Enum
from typing import Literal, Optional
import torch
from transformers import CLIPVisionModelWithProjection
from invokeai.backend.model_management.models.base import (
BaseModelType,
InvalidModelException,
ModelBase,
ModelConfigBase,
ModelType,
SubModelType,
calc_model_size_by_data,
calc_model_size_by_fs,
classproperty,
)
class CLIPVisionModelFormat(str, Enum):
Diffusers = "diffusers"
class CLIPVisionModel(ModelBase):
class DiffusersConfig(ModelConfigBase):
model_format: Literal[CLIPVisionModelFormat.Diffusers]
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == ModelType.CLIPVision
super().__init__(model_path, base_model, model_type)
self.model_size = calc_model_size_by_fs(self.model_path)
@classmethod
def detect_format(cls, path: str) -> str:
if not os.path.exists(path):
raise ModuleNotFoundError(f"No CLIP Vision model at path '{path}'.")
if os.path.isdir(path) and os.path.exists(os.path.join(path, "config.json")):
return CLIPVisionModelFormat.Diffusers
raise InvalidModelException(f"Unexpected CLIP Vision model format: {path}")
@classproperty
def save_to_config(cls) -> bool:
return True
def get_size(self, child_type: Optional[SubModelType] = None) -> int:
if child_type is not None:
raise ValueError("There are no child models in a CLIP Vision model.")
return self.model_size
def get_model(
self,
torch_dtype: Optional[torch.dtype],
child_type: Optional[SubModelType] = None,
) -> CLIPVisionModelWithProjection:
if child_type is not None:
raise ValueError("There are no child models in a CLIP Vision model.")
model = CLIPVisionModelWithProjection.from_pretrained(self.model_path, torch_dtype=torch_dtype)
# Calculate a more accurate model size.
self.model_size = calc_model_size_by_data(model)
return model
@classmethod
def convert_if_required(
cls,
model_path: str,
output_path: str,
config: ModelConfigBase,
base_model: BaseModelType,
) -> str:
format = cls.detect_format(model_path)
if format == CLIPVisionModelFormat.Diffusers:
return model_path
else:
raise ValueError(f"Unsupported format: '{format}'.")

View File

@ -1,23 +1,26 @@
import os
import torch
from enum import Enum
from pathlib import Path
from typing import Optional, Literal
from typing import Literal, Optional
import torch
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
from .base import (
BaseModelType,
EmptyConfigLoader,
InvalidModelException,
ModelBase,
ModelConfigBase,
BaseModelType,
ModelNotFoundException,
ModelType,
SubModelType,
EmptyConfigLoader,
calc_model_size_by_fs,
calc_model_size_by_data,
calc_model_size_by_fs,
classproperty,
InvalidModelException,
ModelNotFoundException,
)
from invokeai.app.services.config import InvokeAIAppConfig
import invokeai.backend.util.logging as logger
class ControlNetModelFormat(str, Enum):

View File

@ -0,0 +1,92 @@
import os
import typing
from enum import Enum
from typing import Literal, Optional
import torch
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus, build_ip_adapter
from invokeai.backend.model_management.models.base import (
BaseModelType,
InvalidModelException,
ModelBase,
ModelConfigBase,
ModelType,
SubModelType,
classproperty,
)
class IPAdapterModelFormat(str, Enum):
# The custom IP-Adapter model format defined by InvokeAI.
InvokeAI = "invokeai"
class IPAdapterModel(ModelBase):
class InvokeAIConfig(ModelConfigBase):
model_format: Literal[IPAdapterModelFormat.InvokeAI]
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == ModelType.IPAdapter
super().__init__(model_path, base_model, model_type)
self.model_size = os.path.getsize(self.model_path)
@classmethod
def detect_format(cls, path: str) -> str:
if not os.path.exists(path):
raise ModuleNotFoundError(f"No IP-Adapter model at path '{path}'.")
if os.path.isdir(path):
model_file = os.path.join(path, "ip_adapter.bin")
image_encoder_config_file = os.path.join(path, "image_encoder.txt")
if os.path.exists(model_file) and os.path.exists(image_encoder_config_file):
return IPAdapterModelFormat.InvokeAI
raise InvalidModelException(f"Unexpected IP-Adapter model format: {path}")
@classproperty
def save_to_config(cls) -> bool:
return True
def get_size(self, child_type: Optional[SubModelType] = None) -> int:
if child_type is not None:
raise ValueError("There are no child models in an IP-Adapter model.")
return self.model_size
def get_model(
self,
torch_dtype: Optional[torch.dtype],
child_type: Optional[SubModelType] = None,
) -> typing.Union[IPAdapter, IPAdapterPlus]:
if child_type is not None:
raise ValueError("There are no child models in an IP-Adapter model.")
return build_ip_adapter(
ip_adapter_ckpt_path=os.path.join(self.model_path, "ip_adapter.bin"), device="cpu", dtype=torch_dtype
)
@classmethod
def convert_if_required(
cls,
model_path: str,
output_path: str,
config: ModelConfigBase,
base_model: BaseModelType,
) -> str:
format = cls.detect_format(model_path)
if format == IPAdapterModelFormat.InvokeAI:
return model_path
else:
raise ValueError(f"Unsupported format: '{format}'.")
def get_ip_adapter_image_encoder_model_id(model_path: str):
"""Read the ID of the image encoder associated with the IP-Adapter at `model_path`."""
image_encoder_config_file = os.path.join(model_path, "image_encoder.txt")
with open(image_encoder_config_file, "r") as f:
image_encoder_model = f.readline().strip()
return image_encoder_model

View File

@ -1,19 +1,21 @@
import os
import json
import os
from enum import Enum
from pydantic import Field
from typing import Literal, Optional
from omegaconf import OmegaConf
from pydantic import Field
from .base import (
ModelConfigBase,
BaseModelType,
DiffusersModel,
InvalidModelException,
ModelConfigBase,
ModelType,
ModelVariantType,
DiffusersModel,
read_checkpoint_meta,
classproperty,
InvalidModelException,
read_checkpoint_meta,
)
from omegaconf import OmegaConf
class StableDiffusionXLModelFormat(str, Enum):

View File

@ -1,26 +1,29 @@
import os
import json
import os
from enum import Enum
from pydantic import Field
from pathlib import Path
from typing import Literal, Optional, Union
from diffusers import StableDiffusionInpaintPipeline, StableDiffusionPipeline
from .base import (
ModelConfigBase,
BaseModelType,
ModelType,
ModelVariantType,
DiffusersModel,
SilenceWarnings,
read_checkpoint_meta,
classproperty,
InvalidModelException,
ModelNotFoundException,
)
from .sdxl import StableDiffusionXLModel
from omegaconf import OmegaConf
from pydantic import Field
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
from omegaconf import OmegaConf
from .base import (
BaseModelType,
DiffusersModel,
InvalidModelException,
ModelConfigBase,
ModelNotFoundException,
ModelType,
ModelVariantType,
SilenceWarnings,
classproperty,
read_checkpoint_meta,
)
from .sdxl import StableDiffusionXLModel
class StableDiffusion1ModelFormat(str, Enum):
@ -272,8 +275,8 @@ def _convert_ckpt_and_cache(
return output_path
# to avoid circular import errors
from ..convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
from ...util.devices import choose_torch_device, torch_dtype
from ..convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
model_base_to_model_type = {
BaseModelType.StableDiffusion1: "FrozenCLIPEmbedder",

View File

@ -2,15 +2,16 @@ from enum import Enum
from typing import Literal
from diffusers import OnnxRuntimeModel
from .base import (
ModelConfigBase,
BaseModelType,
DiffusersModel,
IAIOnnxRuntimeModel,
ModelConfigBase,
ModelType,
ModelVariantType,
DiffusersModel,
SchedulerPredictionType,
classproperty,
IAIOnnxRuntimeModel,
)

View File

@ -1,19 +1,20 @@
import os
import torch
from typing import Optional
from .base import (
ModelBase,
ModelConfigBase,
BaseModelType,
ModelType,
SubModelType,
classproperty,
ModelNotFoundException,
InvalidModelException,
)
import torch
# TODO: naming
from ..lora import TextualInversionModel as TextualInversionModelRaw
from .base import (
BaseModelType,
InvalidModelException,
ModelBase,
ModelConfigBase,
ModelNotFoundException,
ModelType,
SubModelType,
classproperty,
)
class TextualInversionModel(ModelBase):

View File

@ -8,19 +8,20 @@ import torch
from omegaconf import OmegaConf
from invokeai.app.services.config import InvokeAIAppConfig
from .base import (
BaseModelType,
EmptyConfigLoader,
InvalidModelException,
ModelBase,
ModelConfigBase,
BaseModelType,
ModelType,
SubModelType,
ModelVariantType,
EmptyConfigLoader,
calc_model_size_by_fs,
calc_model_size_by_data,
classproperty,
InvalidModelException,
ModelNotFoundException,
ModelType,
ModelVariantType,
SubModelType,
calc_model_size_by_data,
calc_model_size_by_fs,
classproperty,
)

View File

@ -1,15 +1,6 @@
"""
Initialization file for the invokeai.backend.stable_diffusion package
"""
from .diffusers_pipeline import ( # noqa: F401
ConditioningData,
PipelineIntermediateState,
StableDiffusionGeneratorPipeline,
)
from .diffusers_pipeline import PipelineIntermediateState, StableDiffusionGeneratorPipeline # noqa: F401
from .diffusion import InvokeAIDiffuserComponent # noqa: F401
from .diffusion.cross_attention_map_saving import AttentionMapSaver # noqa: F401
from .diffusion.shared_invokeai_diffusion import ( # noqa: F401
PostprocessingSettings,
BasicConditioningInfo,
SDXLConditioningInfo,
)

View File

@ -1,24 +1,20 @@
from __future__ import annotations
import dataclasses
import inspect
from dataclasses import dataclass, field
import math
from contextlib import nullcontext
from dataclasses import dataclass
from typing import Any, Callable, List, Optional, Union
import PIL.Image
import einops
import PIL.Image
import psutil
import torch
import torchvision.transforms as T
from diffusers.models import AutoencoderKL, UNet2DConditionModel
from diffusers.models.controlnet import ControlNetModel
from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import (
StableDiffusionPipeline,
)
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
)
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput
from diffusers.utils.import_utils import is_xformers_available
@ -27,13 +23,11 @@ from pydantic import Field
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from invokeai.app.services.config import InvokeAIAppConfig
from .diffusion import (
AttentionMapSaver,
InvokeAIDiffuserComponent,
PostprocessingSettings,
BasicConditioningInfo,
)
from ..util import normalize_device, auto_detect_slice_size
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningData
from ..util import auto_detect_slice_size, normalize_device
from .diffusion import AttentionMapSaver, InvokeAIDiffuserComponent
@dataclass
@ -103,7 +97,7 @@ class AddsMaskGuidance:
# Mask anything that has the same shape as prev_sample, return others as-is.
return output_class(
{
k: (self.apply_mask(v, self._t_for_field(k, t)) if are_like_tensors(prev_sample, v) else v)
k: self.apply_mask(v, self._t_for_field(k, t)) if are_like_tensors(prev_sample, v) else v
for k, v in step_output.items()
}
)
@ -170,39 +164,13 @@ class ControlNetData:
@dataclass
class ConditioningData:
unconditioned_embeddings: BasicConditioningInfo
text_embeddings: BasicConditioningInfo
guidance_scale: Union[float, List[float]]
"""
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
`guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf).
Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
"""
extra: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo] = None
scheduler_args: dict[str, Any] = field(default_factory=dict)
"""
Additional arguments to pass to invokeai_diffuser.do_latent_postprocessing().
"""
postprocessing_settings: Optional[PostprocessingSettings] = None
@property
def dtype(self):
return self.text_embeddings.dtype
def add_scheduler_args_if_applicable(self, scheduler, **kwargs):
scheduler_args = dict(self.scheduler_args)
step_method = inspect.signature(scheduler.step)
for name, value in kwargs.items():
try:
step_method.bind_partial(**{name: value})
except TypeError:
# FIXME: don't silently discard arguments
pass # debug("%s does not accept argument named %r", scheduler, name)
else:
scheduler_args[name] = value
return dataclasses.replace(self, scheduler_args=scheduler_args)
class IPAdapterData:
ip_adapter_model: IPAdapter = Field(default=None)
# TODO: change to polymorphic so can do different weights per step (once implemented...)
weight: Union[float, List[float]] = Field(default=1.0)
# weight: float = Field(default=1.0)
begin_step_percent: float = Field(default=0.0)
end_step_percent: float = Field(default=1.0)
@dataclass
@ -285,6 +253,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
)
self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward)
self.control_model = control_model
self.use_ip_adapter = False
def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
"""
@ -357,6 +326,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
additional_guidance: List[Callable] = None,
callback: Callable[[PipelineIntermediateState], None] = None,
control_data: List[ControlNetData] = None,
ip_adapter_data: Optional[IPAdapterData] = None,
mask: Optional[torch.Tensor] = None,
masked_latents: Optional[torch.Tensor] = None,
seed: Optional[int] = None,
@ -408,6 +378,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
conditioning_data,
additional_guidance=additional_guidance,
control_data=control_data,
ip_adapter_data=ip_adapter_data,
callback=callback,
)
finally:
@ -427,6 +398,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
*,
additional_guidance: List[Callable] = None,
control_data: List[ControlNetData] = None,
ip_adapter_data: Optional[IPAdapterData] = None,
callback: Callable[[PipelineIntermediateState], None] = None,
):
self._adjust_memory_efficient_attention(latents)
@ -439,12 +411,26 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
if timesteps.shape[0] == 0:
return latents, attention_map_saver
extra_conditioning_info = conditioning_data.extra
with self.invokeai_diffuser.custom_attention_context(
self.invokeai_diffuser.model,
extra_conditioning_info=extra_conditioning_info,
step_count=len(self.scheduler.timesteps),
):
if conditioning_data.extra is not None and conditioning_data.extra.wants_cross_attention_control:
attn_ctx = self.invokeai_diffuser.custom_attention_context(
self.invokeai_diffuser.model,
extra_conditioning_info=conditioning_data.extra,
step_count=len(self.scheduler.timesteps),
)
self.use_ip_adapter = False
elif ip_adapter_data is not None:
# TODO(ryand): Should we raise an exception if both custom attention and IP-Adapter attention are active?
# As it is now, the IP-Adapter will silently be skipped.
weight = ip_adapter_data.weight[0] if isinstance(ip_adapter_data.weight, List) else ip_adapter_data.weight
attn_ctx = ip_adapter_data.ip_adapter_model.apply_ip_adapter_attention(
unet=self.invokeai_diffuser.model,
scale=weight,
)
self.use_ip_adapter = True
else:
attn_ctx = nullcontext()
with attn_ctx:
if callback is not None:
callback(
PipelineIntermediateState(
@ -467,6 +453,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
total_step_count=len(timesteps),
additional_guidance=additional_guidance,
control_data=control_data,
ip_adapter_data=ip_adapter_data,
)
latents = step_output.prev_sample
@ -512,6 +499,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
total_step_count: int,
additional_guidance: List[Callable] = None,
control_data: List[ControlNetData] = None,
ip_adapter_data: Optional[IPAdapterData] = None,
):
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
timestep = t[0]
@ -522,6 +510,24 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# i.e. before or after passing it to InvokeAIDiffuserComponent
latent_model_input = self.scheduler.scale_model_input(latents, timestep)
# handle IP-Adapter
if self.use_ip_adapter and ip_adapter_data is not None: # somewhat redundant but logic is clearer
first_adapter_step = math.floor(ip_adapter_data.begin_step_percent * total_step_count)
last_adapter_step = math.ceil(ip_adapter_data.end_step_percent * total_step_count)
weight = (
ip_adapter_data.weight[step_index]
if isinstance(ip_adapter_data.weight, List)
else ip_adapter_data.weight
)
if step_index >= first_adapter_step and step_index <= last_adapter_step:
# only apply IP-Adapter if current step is within the IP-Adapter's begin/end step range
# ip_adapter_data.ip_adapter_model.set_scale(ip_adapter_data.weight)
ip_adapter_data.ip_adapter_model.set_scale(weight)
else:
# otherwise, set IP-Adapter scale to 0, so it has no effect
ip_adapter_data.ip_adapter_model.set_scale(0.0)
# handle ControlNet(s)
# default is no controlnet, so set controlnet processing output to None
controlnet_down_block_samples, controlnet_mid_block_sample = None, None
if control_data is not None:

View File

@ -3,9 +3,4 @@ Initialization file for invokeai.models.diffusion
"""
from .cross_attention_control import InvokeAICrossAttentionMixin # noqa: F401
from .cross_attention_map_saving import AttentionMapSaver # noqa: F401
from .shared_invokeai_diffusion import ( # noqa: F401
InvokeAIDiffuserComponent,
PostprocessingSettings,
BasicConditioningInfo,
SDXLConditioningInfo,
)
from .shared_invokeai_diffusion import InvokeAIDiffuserComponent # noqa: F401

View File

@ -0,0 +1,101 @@
import dataclasses
import inspect
from dataclasses import dataclass, field
from typing import Any, List, Optional, Union
import torch
from .cross_attention_control import Arguments
@dataclass
class ExtraConditioningInfo:
tokens_count_including_eos_bos: int
cross_attention_control_args: Optional[Arguments] = None
@property
def wants_cross_attention_control(self):
return self.cross_attention_control_args is not None
@dataclass
class BasicConditioningInfo:
embeds: torch.Tensor
# TODO(ryand): Right now we awkwardly copy the extra conditioning info from here up to `ConditioningData`. This
# should only be stored in one place.
extra_conditioning: Optional[ExtraConditioningInfo]
# weight: float
# mode: ConditioningAlgo
def to(self, device, dtype=None):
self.embeds = self.embeds.to(device=device, dtype=dtype)
return self
@dataclass
class SDXLConditioningInfo(BasicConditioningInfo):
pooled_embeds: torch.Tensor
add_time_ids: torch.Tensor
def to(self, device, dtype=None):
self.pooled_embeds = self.pooled_embeds.to(device=device, dtype=dtype)
self.add_time_ids = self.add_time_ids.to(device=device, dtype=dtype)
return super().to(device=device, dtype=dtype)
@dataclass(frozen=True)
class PostprocessingSettings:
threshold: float
warmup: float
h_symmetry_time_pct: Optional[float]
v_symmetry_time_pct: Optional[float]
@dataclass
class IPAdapterConditioningInfo:
cond_image_prompt_embeds: torch.Tensor
"""IP-Adapter image encoder conditioning embeddings.
Shape: (batch_size, num_tokens, encoding_dim).
"""
uncond_image_prompt_embeds: torch.Tensor
"""IP-Adapter image encoding embeddings to use for unconditional generation.
Shape: (batch_size, num_tokens, encoding_dim).
"""
@dataclass
class ConditioningData:
unconditioned_embeddings: BasicConditioningInfo
text_embeddings: BasicConditioningInfo
guidance_scale: Union[float, List[float]]
"""
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
`guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf).
Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
"""
extra: Optional[ExtraConditioningInfo] = None
scheduler_args: dict[str, Any] = field(default_factory=dict)
"""
Additional arguments to pass to invokeai_diffuser.do_latent_postprocessing().
"""
postprocessing_settings: Optional[PostprocessingSettings] = None
ip_adapter_conditioning: Optional[IPAdapterConditioningInfo] = None
@property
def dtype(self):
return self.text_embeddings.dtype
def add_scheduler_args_if_applicable(self, scheduler, **kwargs):
scheduler_args = dict(self.scheduler_args)
step_method = inspect.signature(scheduler.step)
for name, value in kwargs.items():
try:
step_method.bind_partial(**{name: value})
except TypeError:
# FIXME: don't silently discard arguments
pass # debug("%s does not accept argument named %r", scheduler, name)
else:
scheduler_args[name] = value
return dataclasses.replace(self, scheduler_args=scheduler_args)

View File

@ -11,16 +11,12 @@ import diffusers
import psutil
import torch
from compel.cross_attention_control import Arguments
from diffusers.models.attention_processor import Attention, AttentionProcessor, AttnProcessor, SlicedAttnProcessor
from diffusers.models.unet_2d_condition import UNet2DConditionModel
from diffusers.models.attention_processor import AttentionProcessor
from diffusers.models.attention_processor import (
Attention,
AttnProcessor,
SlicedAttnProcessor,
)
from torch import nn
import invokeai.backend.util.logging as logger
from ...util import torch_dtype
@ -380,11 +376,11 @@ def get_cross_attention_modules(model, which: CrossAttentionType) -> list[tuple[
# non-fatal error but .swap() won't work.
logger.error(
f"Error! CrossAttentionControl found an unexpected number of {cross_attention_class} modules in the model "
+ f"(expected {expected_count}, found {cross_attention_modules_in_model_count}). Either monkey-patching failed "
+ "or some assumption has changed about the structure of the model itself. Please fix the monkey-patching, "
+ f"and/or update the {expected_count} above to an appropriate number, and/or find and inform someone who knows "
+ "what it means. This error is non-fatal, but it is likely that .swap() and attention map display will not "
+ "work properly until it is fixed."
f"(expected {expected_count}, found {cross_attention_modules_in_model_count}). Either monkey-patching "
"failed or some assumption has changed about the structure of the model itself. Please fix the "
f"monkey-patching, and/or update the {expected_count} above to an appropriate number, and/or find and "
"inform someone who knows what it means. This error is non-fatal, but it is likely that .swap() and "
"attention map display will not work properly until it is fixed."
)
return attention_module_tuples
@ -581,6 +577,7 @@ class SlicedSwapCrossAttnProcesser(SlicedAttnProcessor):
attention_mask=None,
# kwargs
swap_cross_attn_context: SwapCrossAttnContext = None,
**kwargs,
):
attention_type = CrossAttentionType.SELF if encoder_hidden_states is None else CrossAttentionType.TOKENS

View File

@ -1,8 +1,7 @@
from __future__ import annotations
from contextlib import contextmanager
from dataclasses import dataclass
import math
from contextlib import contextmanager
from typing import Any, Callable, Optional, Union
import torch
@ -10,9 +9,14 @@ from diffusers import UNet2DConditionModel
from typing_extensions import TypeAlias
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
ConditioningData,
ExtraConditioningInfo,
PostprocessingSettings,
SDXLConditioningInfo,
)
from .cross_attention_control import (
Arguments,
Context,
CrossAttentionType,
SwapCrossAttnContext,
@ -31,37 +35,6 @@ ModelForwardCallback: TypeAlias = Union[
]
@dataclass
class BasicConditioningInfo:
embeds: torch.Tensor
extra_conditioning: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo]
# weight: float
# mode: ConditioningAlgo
def to(self, device, dtype=None):
self.embeds = self.embeds.to(device=device, dtype=dtype)
return self
@dataclass
class SDXLConditioningInfo(BasicConditioningInfo):
pooled_embeds: torch.Tensor
add_time_ids: torch.Tensor
def to(self, device, dtype=None):
self.pooled_embeds = self.pooled_embeds.to(device=device, dtype=dtype)
self.add_time_ids = self.add_time_ids.to(device=device, dtype=dtype)
return super().to(device=device, dtype=dtype)
@dataclass(frozen=True)
class PostprocessingSettings:
threshold: float
warmup: float
h_symmetry_time_pct: Optional[float]
v_symmetry_time_pct: Optional[float]
class InvokeAIDiffuserComponent:
"""
The aim of this component is to provide a single place for code that can be applied identically to
@ -75,15 +48,6 @@ class InvokeAIDiffuserComponent:
debug_thresholding = False
sequential_guidance = False
@dataclass
class ExtraConditioningInfo:
tokens_count_including_eos_bos: int
cross_attention_control_args: Optional[Arguments] = None
@property
def wants_cross_attention_control(self):
return self.cross_attention_control_args is not None
def __init__(
self,
model,
@ -103,30 +67,26 @@ class InvokeAIDiffuserComponent:
@contextmanager
def custom_attention_context(
self,
unet: UNet2DConditionModel, # note: also may futz with the text encoder depending on requested LoRAs
unet: UNet2DConditionModel,
extra_conditioning_info: Optional[ExtraConditioningInfo],
step_count: int,
):
old_attn_processors = None
if extra_conditioning_info and (extra_conditioning_info.wants_cross_attention_control):
old_attn_processors = unet.attn_processors
# Load lora conditions into the model
if extra_conditioning_info.wants_cross_attention_control:
self.cross_attention_control_context = Context(
arguments=extra_conditioning_info.cross_attention_control_args,
step_count=step_count,
)
setup_cross_attention_control_attention_processors(
unet,
self.cross_attention_control_context,
)
old_attn_processors = unet.attn_processors
try:
self.cross_attention_control_context = Context(
arguments=extra_conditioning_info.cross_attention_control_args,
step_count=step_count,
)
setup_cross_attention_control_attention_processors(
unet,
self.cross_attention_control_context,
)
yield None
finally:
self.cross_attention_control_context = None
if old_attn_processors is not None:
unet.set_attn_processor(old_attn_processors)
unet.set_attn_processor(old_attn_processors)
# TODO resuscitate attention map saving
# self.remove_attention_map_saving()
@ -376,11 +336,24 @@ class InvokeAIDiffuserComponent:
# methods below are called from do_diffusion_step and should be considered private to this class.
def _apply_standard_conditioning(self, x, sigma, conditioning_data, **kwargs):
# fast batched path
def _apply_standard_conditioning(self, x, sigma, conditioning_data: ConditioningData, **kwargs):
"""Runs the conditioned and unconditioned UNet forward passes in a single batch for faster inference speed at
the cost of higher memory usage.
"""
x_twice = torch.cat([x] * 2)
sigma_twice = torch.cat([sigma] * 2)
cross_attention_kwargs = None
if conditioning_data.ip_adapter_conditioning is not None:
cross_attention_kwargs = {
"ip_adapter_image_prompt_embeds": torch.cat(
[
conditioning_data.ip_adapter_conditioning.uncond_image_prompt_embeds,
conditioning_data.ip_adapter_conditioning.cond_image_prompt_embeds,
]
)
}
added_cond_kwargs = None
if type(conditioning_data.text_embeddings) is SDXLConditioningInfo:
added_cond_kwargs = {
@ -408,6 +381,7 @@ class InvokeAIDiffuserComponent:
x_twice,
sigma_twice,
both_conditionings,
cross_attention_kwargs=cross_attention_kwargs,
encoder_attention_mask=encoder_attention_mask,
added_cond_kwargs=added_cond_kwargs,
**kwargs,
@ -419,9 +393,12 @@ class InvokeAIDiffuserComponent:
self,
x: torch.Tensor,
sigma,
conditioning_data,
conditioning_data: ConditioningData,
**kwargs,
):
"""Runs the conditioned and unconditioned UNet forward passes sequentially for lower memory usage at the cost of
slower execution speed.
"""
# low-memory sequential path
uncond_down_block, cond_down_block = None, None
down_block_additional_residuals = kwargs.pop("down_block_additional_residuals", None)
@ -437,6 +414,13 @@ class InvokeAIDiffuserComponent:
if mid_block_additional_residual is not None:
uncond_mid_block, cond_mid_block = mid_block_additional_residual.chunk(2)
# Run unconditional UNet denoising.
cross_attention_kwargs = None
if conditioning_data.ip_adapter_conditioning is not None:
cross_attention_kwargs = {
"ip_adapter_image_prompt_embeds": conditioning_data.ip_adapter_conditioning.uncond_image_prompt_embeds
}
added_cond_kwargs = None
is_sdxl = type(conditioning_data.text_embeddings) is SDXLConditioningInfo
if is_sdxl:
@ -449,12 +433,21 @@ class InvokeAIDiffuserComponent:
x,
sigma,
conditioning_data.unconditioned_embeddings.embeds,
cross_attention_kwargs=cross_attention_kwargs,
down_block_additional_residuals=uncond_down_block,
mid_block_additional_residual=uncond_mid_block,
added_cond_kwargs=added_cond_kwargs,
**kwargs,
)
# Run conditional UNet denoising.
cross_attention_kwargs = None
if conditioning_data.ip_adapter_conditioning is not None:
cross_attention_kwargs = {
"ip_adapter_image_prompt_embeds": conditioning_data.ip_adapter_conditioning.cond_image_prompt_embeds
}
added_cond_kwargs = None
if is_sdxl:
added_cond_kwargs = {
"text_embeds": conditioning_data.text_embeddings.pooled_embeds,
@ -465,6 +458,7 @@ class InvokeAIDiffuserComponent:
x,
sigma,
conditioning_data.text_embeddings.embeds,
cross_attention_kwargs=cross_attention_kwargs,
down_block_additional_residuals=cond_down_block,
mid_block_additional_residual=cond_mid_block,
added_cond_kwargs=added_cond_kwargs,

View File

@ -1,18 +1,18 @@
from diffusers import (
DDIMScheduler,
DDPMScheduler,
DEISMultistepScheduler,
DPMSolverMultistepScheduler,
KDPM2DiscreteScheduler,
KDPM2AncestralDiscreteScheduler,
EulerDiscreteScheduler,
DPMSolverSDEScheduler,
DPMSolverSinglestepScheduler,
EulerAncestralDiscreteScheduler,
EulerDiscreteScheduler,
HeunDiscreteScheduler,
KDPM2AncestralDiscreteScheduler,
KDPM2DiscreteScheduler,
LMSDiscreteScheduler,
PNDMScheduler,
UniPCMultistepScheduler,
DPMSolverSinglestepScheduler,
DEISMultistepScheduler,
DDPMScheduler,
DPMSolverSDEScheduler,
)
SCHEDULER_MAP = dict(

View File

@ -24,13 +24,8 @@ import torch.utils.checkpoint
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed, ProjectConfiguration
from diffusers import (
AutoencoderKL,
DDPMScheduler,
StableDiffusionPipeline,
UNet2DConditionModel,
)
from accelerate.utils import ProjectConfiguration, set_seed
from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionPipeline, UNet2DConditionModel
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available

View File

@ -1,6 +1,7 @@
"""
Initialization file for invokeai.backend.util
"""
from .attention import auto_detect_slice_size # noqa: F401
from .devices import ( # noqa: F401
CPU_DEVICE,
CUDA_DEVICE,
@ -10,11 +11,4 @@ from .devices import ( # noqa: F401
normalize_device,
torch_dtype,
)
from .util import ( # noqa: F401
ask_user,
download_with_resume,
instantiate_from_config,
url_attachment_name,
Chdir,
)
from .attention import auto_detect_slice_size # noqa: F401
from .util import Chdir, ask_user, download_with_resume, instantiate_from_config, url_attachment_name # noqa: F401

View File

@ -3,8 +3,8 @@
Utility routine used for autodetection of optimal slice size
for attention mechanism.
"""
import torch
import psutil
import torch
def auto_detect_slice_size(latents: torch.Tensor) -> str:

View File

@ -0,0 +1,568 @@
# pylint: disable=line-too-long
# pylint: disable=broad-exception-caught
# pylint: disable=missing-function-docstring
"""Script to peform db maintenance and outputs directory management."""
import argparse
import datetime
import enum
import glob
import locale
import os
import shutil
import sqlite3
from pathlib import Path
import PIL
import PIL.ImageOps
import PIL.PngImagePlugin
import yaml
class ConfigMapper:
"""Configuration loader."""
def __init__(self): # noqa D107
pass
TIMESTAMP_STRING = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
INVOKE_DIRNAME = "invokeai"
YAML_FILENAME = "invokeai.yaml"
DATABASE_FILENAME = "invokeai.db"
database_path = None
database_backup_dir = None
outputs_path = None
archive_path = None
thumbnails_path = None
thumbnails_archive_path = None
def load(self):
"""Read paths from yaml config and validate."""
root = "."
if not self.__load_from_root_config(os.path.abspath(root)):
return False
return True
def __load_from_root_config(self, invoke_root):
"""Validate a yaml path exists, confirm the user wants to use it and load config."""
yaml_path = os.path.join(invoke_root, self.YAML_FILENAME)
if os.path.exists(yaml_path):
db_dir, outdir = self.__load_paths_from_yaml_file(yaml_path)
if db_dir is None or outdir is None:
print("The invokeai.yaml file was found but is missing the db_dir and/or outdir setting!")
return False
if os.path.isabs(db_dir):
self.database_path = os.path.join(db_dir, self.DATABASE_FILENAME)
else:
self.database_path = os.path.join(invoke_root, db_dir, self.DATABASE_FILENAME)
self.database_backup_dir = os.path.join(os.path.dirname(self.database_path), "backup")
if os.path.isabs(outdir):
self.outputs_path = os.path.join(outdir, "images")
self.archive_path = os.path.join(outdir, "images-archive")
else:
self.outputs_path = os.path.join(invoke_root, outdir, "images")
self.archive_path = os.path.join(invoke_root, outdir, "images-archive")
self.thumbnails_path = os.path.join(self.outputs_path, "thumbnails")
self.thumbnails_archive_path = os.path.join(self.archive_path, "thumbnails")
db_exists = os.path.exists(self.database_path)
outdir_exists = os.path.exists(self.outputs_path)
text = f"Found {self.YAML_FILENAME} file at {yaml_path}:"
text += f"\n Database : {self.database_path} - {'Exists!' if db_exists else 'Not Found!'}"
text += f"\n Outputs : {self.outputs_path}- {'Exists!' if outdir_exists else 'Not Found!'}"
print(text)
if db_exists and outdir_exists:
return True
else:
print(
"\nOne or more paths specified in invoke.yaml do not exist. Please inspect/correct the configuration and ensure the script is run in the developer console mode (option 8) from an Invoke AI root directory."
)
return False
else:
print(
f"Auto-discovery of configuration failed! Could not find ({yaml_path})!\n\nPlease ensure the script is run in the developer console mode (option 8) from an Invoke AI root directory."
)
return False
def __load_paths_from_yaml_file(self, yaml_path):
"""Load an Invoke AI yaml file and get the database and outputs paths."""
try:
with open(yaml_path, "rt", encoding=locale.getpreferredencoding()) as file:
yamlinfo = yaml.safe_load(file)
db_dir = yamlinfo.get("InvokeAI", {}).get("Paths", {}).get("db_dir", None)
outdir = yamlinfo.get("InvokeAI", {}).get("Paths", {}).get("outdir", None)
return db_dir, outdir
except Exception:
print(f"Failed to load paths from yaml file! {yaml_path}!")
return None, None
class MaintenanceStats:
"""DTO for tracking work progress."""
def __init__(self): # noqa D107
pass
time_start = datetime.datetime.utcnow()
count_orphaned_db_entries_cleaned = 0
count_orphaned_disk_files_cleaned = 0
count_orphaned_thumbnails_cleaned = 0
count_thumbnails_regenerated = 0
count_errors = 0
@staticmethod
def get_elapsed_time_string():
"""Get a friendly time string for the time elapsed since processing start."""
time_now = datetime.datetime.utcnow()
total_seconds = (time_now - MaintenanceStats.time_start).total_seconds()
hours = int((total_seconds) / 3600)
minutes = int(((total_seconds) % 3600) / 60)
seconds = total_seconds % 60
out_str = f"{hours} hour(s) -" if hours > 0 else ""
out_str += f"{minutes} minute(s) -" if minutes > 0 else ""
out_str += f"{seconds:.2f} second(s)"
return out_str
class DatabaseMapper:
"""Class to abstract database functionality."""
def __init__(self, database_path, database_backup_dir): # noqa D107
self.database_path = database_path
self.database_backup_dir = database_backup_dir
self.connection = None
self.cursor = None
def backup(self, timestamp_string):
"""Take a backup of the database."""
if not os.path.exists(self.database_backup_dir):
print(f"Database backup directory {self.database_backup_dir} does not exist -> creating...", end="")
os.makedirs(self.database_backup_dir)
print("Done!")
database_backup_path = os.path.join(self.database_backup_dir, f"backup-{timestamp_string}-invokeai.db")
print(f"Making DB Backup at {database_backup_path}...", end="")
shutil.copy2(self.database_path, database_backup_path)
print("Done!")
def connect(self):
"""Open connection to the database."""
self.connection = sqlite3.connect(self.database_path)
self.cursor = self.connection.cursor()
def get_all_image_files(self):
"""Get the full list of image file names from the database."""
sql_get_image_by_name = "SELECT image_name FROM images"
self.cursor.execute(sql_get_image_by_name)
rows = self.cursor.fetchall()
db_files = []
for row in rows:
db_files.append(row[0])
return db_files
def remove_image_file_record(self, filename: str):
"""Remove an image file reference from the database by filename."""
sanitized_filename = str.replace(filename, "'", "''") # prevent injection
sql_command = f"DELETE FROM images WHERE image_name='{sanitized_filename}'"
self.cursor.execute(sql_command)
self.connection.commit()
def does_image_exist(self, image_filename):
"""Check database if a image name already exists and return a boolean."""
sanitized_filename = str.replace(image_filename, "'", "''") # prevent injection
sql_get_image_by_name = f"SELECT image_name FROM images WHERE image_name='{sanitized_filename}'"
self.cursor.execute(sql_get_image_by_name)
rows = self.cursor.fetchall()
return True if len(rows) > 0 else False
def disconnect(self):
"""Disconnect from the db, cleaning up connections and cursors."""
if self.cursor is not None:
self.cursor.close()
if self.connection is not None:
self.connection.close()
class PhysicalFileMapper:
"""Containing class for script functionality."""
def __init__(self, outputs_path, thumbnails_path, archive_path, thumbnails_archive_path): # noqa D107
self.outputs_path = outputs_path
self.archive_path = archive_path
self.thumbnails_path = thumbnails_path
self.thumbnails_archive_path = thumbnails_archive_path
def create_archive_directories(self):
"""Create the directory for archiving orphaned image files."""
if not os.path.exists(self.archive_path):
print(f"Image archive directory ({self.archive_path}) does not exist -> creating...", end="")
os.makedirs(self.archive_path)
print("Created!")
if not os.path.exists(self.thumbnails_archive_path):
print(
f"Image thumbnails archive directory ({self.thumbnails_archive_path}) does not exist -> creating...",
end="",
)
os.makedirs(self.thumbnails_archive_path)
print("Created!")
def get_image_path_for_image_name(self, image_filename): # noqa D102
return os.path.join(self.outputs_path, image_filename)
def image_file_exists(self, image_filename): # noqa D102
return os.path.exists(self.get_image_path_for_image_name(image_filename))
def get_thumbnail_path_for_image(self, image_filename): # noqa D102
return os.path.join(self.thumbnails_path, os.path.splitext(image_filename)[0]) + ".webp"
def get_image_name_from_thumbnail_path(self, thumbnail_path): # noqa D102
return os.path.splitext(os.path.basename(thumbnail_path))[0] + ".png"
def thumbnail_exists_for_filename(self, image_filename): # noqa D102
return os.path.exists(self.get_thumbnail_path_for_image(image_filename))
def archive_image(self, image_filename): # noqa D102
if self.image_file_exists(image_filename):
image_path = self.get_image_path_for_image_name(image_filename)
shutil.move(image_path, self.archive_path)
def archive_thumbnail_by_image_filename(self, image_filename): # noqa D102
if self.thumbnail_exists_for_filename(image_filename):
thumbnail_path = self.get_thumbnail_path_for_image(image_filename)
shutil.move(thumbnail_path, self.thumbnails_archive_path)
def get_all_png_filenames_in_directory(self, directory_path): # noqa D102
filepaths = glob.glob(directory_path + "/*.png", recursive=False)
filenames = []
for filepath in filepaths:
filenames.append(os.path.basename(filepath))
return filenames
def get_all_thumbnails_with_full_path(self, thumbnails_directory): # noqa D102
return glob.glob(thumbnails_directory + "/*.webp", recursive=False)
def generate_thumbnail_for_image_name(self, image_filename): # noqa D102
# create thumbnail
file_path = self.get_image_path_for_image_name(image_filename)
thumb_path = self.get_thumbnail_path_for_image(image_filename)
thumb_size = 256, 256
with PIL.Image.open(file_path) as source_image:
source_image.thumbnail(thumb_size)
source_image.save(thumb_path, "webp")
class MaintenanceOperation(str, enum.Enum):
"""Enum class for operations."""
Ask = "ask"
CleanOrphanedDbEntries = "clean"
CleanOrphanedDiskFiles = "archive"
ReGenerateThumbnails = "thumbnails"
All = "all"
class InvokeAIDatabaseMaintenanceApp:
"""Main processor class for the application."""
_operation: MaintenanceOperation
_headless: bool = False
__stats: MaintenanceStats = MaintenanceStats()
def __init__(self, operation: MaintenanceOperation = MaintenanceOperation.Ask):
"""Initialize maintenance app."""
self._operation = MaintenanceOperation(operation)
self._headless = operation != MaintenanceOperation.Ask
def ask_for_operation(self) -> MaintenanceOperation:
"""Ask user to choose the operation to perform."""
while True:
print()
print("It is recommennded to run these operations as ordered below to avoid additional")
print("work being performed that will be discarded in a subsequent step.")
print()
print("Select maintenance operation:")
print()
print("1) Clean Orphaned Database Image Entries")
print(" Cleans entries in the database where the matching file was removed from")
print(" the outputs directory.")
print("2) Archive Orphaned Image Files")
print(" Files found in the outputs directory without an entry in the database are")
print(" moved to an archive directory.")
print("3) Re-Generate Missing Thumbnail Files")
print(" For files found in the outputs directory, re-generate a thumbnail if it")
print(" not found in the thumbnails directory.")
print()
print("(CTRL-C to quit)")
try:
input_option = int(input("Specify desired operation number (1-3): "))
operations = [
MaintenanceOperation.CleanOrphanedDbEntries,
MaintenanceOperation.CleanOrphanedDiskFiles,
MaintenanceOperation.ReGenerateThumbnails,
]
return operations[input_option - 1]
except (IndexError, ValueError):
print("\nInvalid selection!")
def ask_to_continue(self) -> bool:
"""Ask user whether they want to continue with the operation."""
while True:
input_choice = input("Do you wish to continue? (Y or N)? ")
if str.lower(input_choice) == "y":
return True
if str.lower(input_choice) == "n":
return False
def clean_orphaned_db_entries(
self, config: ConfigMapper, file_mapper: PhysicalFileMapper, db_mapper: DatabaseMapper
):
"""Clean dangling database entries that no longer point to a file in outputs."""
if self._headless:
print(f"Removing database references to images that no longer exist in {config.outputs_path}...")
else:
print()
print("===============================================================================")
print("= Clean Orphaned Database Entries")
print()
print("Perform this operation if you have removed files from the outputs/images")
print("directory but the database was never updated. You may see this as empty imaages")
print("in the app gallery, or images that only show an enlarged version of the")
print("thumbnail.")
print()
print(f"Database File Path : {config.database_path}")
print(f"Database backup will be taken at : {config.database_backup_dir}")
print(f"Outputs/Images Directory : {config.outputs_path}")
print(f"Outputs/Images Archive Directory : {config.archive_path}")
print("\nNotes about this operation:")
print("- This operation will find database image file entries that do not exist in the")
print(" outputs/images dir and remove those entries from the database.")
print("- This operation will target all image types including intermediate files.")
print("- If a thumbnail still exists in outputs/images/thumbnails matching the")
print(" orphaned entry, it will be moved to the archive directory.")
print()
if not self.ask_to_continue():
raise KeyboardInterrupt
file_mapper.create_archive_directories()
db_mapper.backup(config.TIMESTAMP_STRING)
db_mapper.connect()
db_files = db_mapper.get_all_image_files()
for db_file in db_files:
try:
if not file_mapper.image_file_exists(db_file):
print(f"Found orphaned image db entry {db_file}. Cleaning ...", end="")
db_mapper.remove_image_file_record(db_file)
print("Cleaned!")
if file_mapper.thumbnail_exists_for_filename(db_file):
print("A thumbnail was found, archiving ...", end="")
file_mapper.archive_thumbnail_by_image_filename(db_file)
print("Archived!")
self.__stats.count_orphaned_db_entries_cleaned += 1
except Exception as ex:
print("An error occurred cleaning db entry, error was:")
print(ex)
self.__stats.count_errors += 1
def clean_orphaned_disk_files(
self, config: ConfigMapper, file_mapper: PhysicalFileMapper, db_mapper: DatabaseMapper
):
"""Archive image files that no longer have entries in the database."""
if self._headless:
print(f"Archiving orphaned image files to {config.archive_path}...")
else:
print()
print("===============================================================================")
print("= Clean Orphaned Disk Files")
print()
print("Perform this operation if you have files that were copied into the outputs")
print("directory which are not referenced by the database. This can happen if you")
print("upgraded to a version with a fresh database, but re-used the outputs directory")
print("and now new images are mixed with the files not in the db. The script will")
print("archive these files so you can choose to delete them or re-import using the")
print("official import script.")
print()
print(f"Database File Path : {config.database_path}")
print(f"Database backup will be taken at : {config.database_backup_dir}")
print(f"Outputs/Images Directory : {config.outputs_path}")
print(f"Outputs/Images Archive Directory : {config.archive_path}")
print("\nNotes about this operation:")
print("- This operation will find image files not referenced by the database and move to an")
print(" archive directory.")
print("- This operation will target all image types including intermediate references.")
print("- The matching thumbnail will also be archived.")
print("- Any remaining orphaned thumbnails will also be archived.")
if not self.ask_to_continue():
raise KeyboardInterrupt
print()
file_mapper.create_archive_directories()
db_mapper.backup(config.TIMESTAMP_STRING)
db_mapper.connect()
phys_files = file_mapper.get_all_png_filenames_in_directory(config.outputs_path)
for phys_file in phys_files:
try:
if not db_mapper.does_image_exist(phys_file):
print(f"Found orphaned file {phys_file}, archiving...", end="")
file_mapper.archive_image(phys_file)
print("Archived!")
if file_mapper.thumbnail_exists_for_filename(phys_file):
print("Related thumbnail exists, archiving...", end="")
file_mapper.archive_thumbnail_by_image_filename(phys_file)
print("Archived!")
else:
print("No matching thumbnail existed to be cleaned.")
self.__stats.count_orphaned_disk_files_cleaned += 1
except Exception as ex:
print("Error found trying to archive file or thumbnail, error was:")
print(ex)
self.__stats.count_errors += 1
thumb_filepaths = file_mapper.get_all_thumbnails_with_full_path(config.thumbnails_path)
# archive any remaining orphaned thumbnails
for thumb_filepath in thumb_filepaths:
try:
thumb_src_image_name = file_mapper.get_image_name_from_thumbnail_path(thumb_filepath)
if not file_mapper.image_file_exists(thumb_src_image_name):
print(f"Found orphaned thumbnail {thumb_filepath}, archiving...", end="")
file_mapper.archive_thumbnail_by_image_filename(thumb_src_image_name)
print("Archived!")
self.__stats.count_orphaned_thumbnails_cleaned += 1
except Exception as ex:
print("Error found trying to archive thumbnail, error was:")
print(ex)
self.__stats.count_errors += 1
def regenerate_thumbnails(self, config: ConfigMapper, file_mapper: PhysicalFileMapper, *args):
"""Create missing thumbnails for any valid general images both in the db and on disk."""
if self._headless:
print("Regenerating missing image thumbnails...")
else:
print()
print("===============================================================================")
print("= Regenerate Thumbnails")
print()
print("This operation will find files that have no matching thumbnail on disk")
print("and regenerate those thumbnail files.")
print("NOTE: It is STRONGLY recommended that the user first clean/archive orphaned")
print(" disk files from the previous menu to avoid wasting time regenerating")
print(" thumbnails for orphaned files.")
print()
print(f"Outputs/Images Directory : {config.outputs_path}")
print(f"Outputs/Images Directory : {config.thumbnails_path}")
print("\nNotes about this operation:")
print("- This operation will find image files both referenced in the db and on disk")
print(" that do not have a matching thumbnail on disk and re-generate the thumbnail")
print(" file.")
if not self.ask_to_continue():
raise KeyboardInterrupt
print()
phys_files = file_mapper.get_all_png_filenames_in_directory(config.outputs_path)
for phys_file in phys_files:
try:
if not file_mapper.thumbnail_exists_for_filename(phys_file):
print(f"Found file without thumbnail {phys_file}...Regenerating Thumbnail...", end="")
file_mapper.generate_thumbnail_for_image_name(phys_file)
print("Done!")
self.__stats.count_thumbnails_regenerated += 1
except Exception as ex:
print("Error found trying to regenerate thumbnail, error was:")
print(ex)
self.__stats.count_errors += 1
def main(self): # noqa D107
print("\n===============================================================================")
print("Database and outputs Maintenance for Invoke AI 3.0.0 +")
print("===============================================================================\n")
config_mapper = ConfigMapper()
if not config_mapper.load():
print("\nInvalid configuration...exiting.\n")
return
file_mapper = PhysicalFileMapper(
config_mapper.outputs_path,
config_mapper.thumbnails_path,
config_mapper.archive_path,
config_mapper.thumbnails_archive_path,
)
db_mapper = DatabaseMapper(config_mapper.database_path, config_mapper.database_backup_dir)
op = self._operation
operations_to_perform = []
if op == MaintenanceOperation.Ask:
op = self.ask_for_operation()
if op in [MaintenanceOperation.CleanOrphanedDbEntries, MaintenanceOperation.All]:
operations_to_perform.append(self.clean_orphaned_db_entries)
if op in [MaintenanceOperation.CleanOrphanedDiskFiles, MaintenanceOperation.All]:
operations_to_perform.append(self.clean_orphaned_disk_files)
if op in [MaintenanceOperation.ReGenerateThumbnails, MaintenanceOperation.All]:
operations_to_perform.append(self.regenerate_thumbnails)
for operation in operations_to_perform:
operation(config_mapper, file_mapper, db_mapper)
print("\n===============================================================================")
print(f"= Maintenance Complete - Elapsed Time: {MaintenanceStats.get_elapsed_time_string()}")
print()
print(f"Orphaned db entries cleaned : {self.__stats.count_orphaned_db_entries_cleaned}")
print(f"Orphaned disk files archived : {self.__stats.count_orphaned_disk_files_cleaned}")
print(f"Orphaned thumbnail files archived : {self.__stats.count_orphaned_thumbnails_cleaned}")
print(f"Thumbnails regenerated : {self.__stats.count_thumbnails_regenerated}")
print(f"Errors during operation : {self.__stats.count_errors}")
print()
def main(): # noqa D107
parser = argparse.ArgumentParser(
description="InvokeAI image database maintenance utility",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""Operations:
ask Choose operation from a menu [default]
all Run all maintenance operations
clean Clean database of dangling entries
archive Archive orphaned image files
thumbnails Regenerate missing image thumbnails
""",
)
parser.add_argument("--root", default=".", type=Path, help="InvokeAI root directory")
parser.add_argument(
"--operation", default="ask", choices=[x.value for x in MaintenanceOperation], help="Operation to perform."
)
args = parser.parse_args()
try:
os.chdir(args.root)
app = InvokeAIDatabaseMaintenanceApp(args.operation)
app.main()
except KeyboardInterrupt:
print("\n\nUser cancelled execution.")
except FileNotFoundError:
print(f"Invalid root directory '{args.root}'.")
if __name__ == "__main__":
main()

View File

@ -1,12 +1,13 @@
from __future__ import annotations
from contextlib import nullcontext
from packaging import version
import platform
from contextlib import nullcontext
from typing import Union
import torch
from packaging import version
from torch import autocast
from typing import Union
from invokeai.app.services.config import InvokeAIAppConfig
CPU_DEVICE = torch.device("cpu")

View File

@ -772,11 +772,13 @@ diffusers.models.controlnet.ControlNetModel = ControlNetModel
# NOTE: with this patch, torch.compile crashes on 2.0 torch(already fixed in nightly)
# https://github.com/huggingface/diffusers/pull/4315
# https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/lora.py#L96C18-L96C18
def new_LoRACompatibleConv_forward(self, x):
def new_LoRACompatibleConv_forward(self, hidden_states, scale: float = 1.0):
if self.lora_layer is None:
return super(diffusers.models.lora.LoRACompatibleConv, self).forward(x)
return super(diffusers.models.lora.LoRACompatibleConv, self).forward(hidden_states)
else:
return super(diffusers.models.lora.LoRACompatibleConv, self).forward(x) + self.lora_layer(x)
return super(diffusers.models.lora.LoRACompatibleConv, self).forward(hidden_states) + (
scale * self.lora_layer(hidden_states)
)
diffusers.models.lora.LoRACompatibleConv.forward = new_LoRACompatibleConv_forward

View File

@ -178,7 +178,6 @@ InvokeAI:
import logging.handlers
import socket
import urllib.parse
from abc import abstractmethod
from pathlib import Path

View File

@ -1,11 +1,10 @@
import base64
import importlib
import io
import math
import multiprocessing as mp
import os
import re
import io
import base64
from collections import abc
from inspect import isfunction
from pathlib import Path
@ -19,6 +18,7 @@ from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import invokeai.backend.util.logging as logger
from .devices import torch_dtype