mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Merge branch 'main' into feat/onnx
This commit is contained in:
@ -12,4 +12,4 @@ from .model_management import (
|
||||
ModelManager, ModelCache, BaseModelType,
|
||||
ModelType, SubModelType, ModelInfo
|
||||
)
|
||||
from .safety_checker import SafetyChecker
|
||||
from .model_management.models import SilenceWarnings
|
||||
|
@ -28,7 +28,6 @@ from diffusers.schedulers import SchedulerMixin as Scheduler
|
||||
import invokeai.backend.util.logging as logger
|
||||
from ..image_util import configure_model_padding
|
||||
from ..util.util import rand_perlin_2d
|
||||
from ..safety_checker import SafetyChecker
|
||||
from ..stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline
|
||||
from ..stable_diffusion.schedulers import SCHEDULER_MAP
|
||||
|
||||
@ -52,7 +51,6 @@ class InvokeAIGeneratorBasicParams:
|
||||
v_symmetry_time_pct: Optional[float]=None
|
||||
variation_amount: float = 0.0
|
||||
with_variations: list=field(default_factory=list)
|
||||
safety_checker: Optional[SafetyChecker]=None
|
||||
|
||||
@dataclass
|
||||
class InvokeAIGeneratorOutput:
|
||||
@ -240,7 +238,6 @@ class Generator:
|
||||
self.seed = None
|
||||
self.latent_channels = model.unet.config.in_channels
|
||||
self.downsampling_factor = downsampling # BUG: should come from model or config
|
||||
self.safety_checker = None
|
||||
self.perlin = 0.0
|
||||
self.threshold = 0
|
||||
self.variation_amount = 0
|
||||
@ -277,12 +274,10 @@ class Generator:
|
||||
perlin=0.0,
|
||||
h_symmetry_time_pct=None,
|
||||
v_symmetry_time_pct=None,
|
||||
safety_checker: SafetyChecker=None,
|
||||
free_gpu_mem: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
scope = nullcontext
|
||||
self.safety_checker = safety_checker
|
||||
self.free_gpu_mem = free_gpu_mem
|
||||
attention_maps_images = []
|
||||
attention_maps_callback = lambda saver: attention_maps_images.append(
|
||||
@ -329,9 +324,6 @@ class Generator:
|
||||
# Pass on the seed in case a layer beneath us needs to generate noise on its own.
|
||||
image = make_image(x_T, seed)
|
||||
|
||||
if self.safety_checker is not None:
|
||||
image = self.safety_checker.check(image)
|
||||
|
||||
results.append([image, seed, attention_maps_images])
|
||||
|
||||
if image_callback is not None:
|
||||
|
34
invokeai/backend/image_util/invisible_watermark.py
Normal file
34
invokeai/backend/image_util/invisible_watermark.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""
|
||||
This module defines a singleton object, "invisible_watermark" that
|
||||
wraps the invisible watermark model. It respects the global "invisible_watermark"
|
||||
configuration variable, that allows the watermarking to be supressed.
|
||||
"""
|
||||
import numpy as np
|
||||
import cv2
|
||||
from PIL import Image
|
||||
from imwatermark import WatermarkEncoder
|
||||
from invokeai.app.services.config import InvokeAIAppConfig
|
||||
import invokeai.backend.util.logging as logger
|
||||
config = InvokeAIAppConfig.get_config()
|
||||
|
||||
class InvisibleWatermark:
|
||||
"""
|
||||
Wrapper around InvisibleWatermark module.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def invisible_watermark_available(self) -> bool:
|
||||
return config.invisible_watermark
|
||||
|
||||
@classmethod
|
||||
def add_watermark(self, image: Image, watermark_text:str) -> Image:
|
||||
if not self.invisible_watermark_available():
|
||||
return image
|
||||
logger.debug(f'Applying invisible watermark "{watermark_text}"')
|
||||
bgr = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR)
|
||||
encoder = WatermarkEncoder()
|
||||
encoder.set_watermark('bytes', watermark_text.encode('utf-8'))
|
||||
bgr_encoded = encoder.encode(bgr, 'dwtDct')
|
||||
return Image.fromarray(
|
||||
cv2.cvtColor(bgr_encoded, cv2.COLOR_BGR2RGB)
|
||||
).convert("RGBA")
|
63
invokeai/backend/image_util/safety_checker.py
Normal file
63
invokeai/backend/image_util/safety_checker.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""
|
||||
This module defines a singleton object, "safety_checker" that
|
||||
wraps the safety_checker model. It respects the global "nsfw_checker"
|
||||
configuration variable, that allows the checker to be supressed.
|
||||
"""
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from invokeai.backend import SilenceWarnings
|
||||
from invokeai.app.services.config import InvokeAIAppConfig
|
||||
from invokeai.backend.util.devices import choose_torch_device
|
||||
import invokeai.backend.util.logging as logger
|
||||
config = InvokeAIAppConfig.get_config()
|
||||
|
||||
CHECKER_PATH = 'core/convert/stable-diffusion-safety-checker'
|
||||
|
||||
class SafetyChecker:
|
||||
"""
|
||||
Wrapper around SafetyChecker model.
|
||||
"""
|
||||
safety_checker = None
|
||||
feature_extractor = None
|
||||
tried_load: bool = False
|
||||
|
||||
@classmethod
|
||||
def _load_safety_checker(self):
|
||||
if self.tried_load:
|
||||
return
|
||||
|
||||
if config.nsfw_checker:
|
||||
try:
|
||||
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from transformers import AutoFeatureExtractor
|
||||
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
|
||||
config.models_path / CHECKER_PATH
|
||||
)
|
||||
self.feature_extractor = AutoFeatureExtractor.from_pretrained(
|
||||
config.models_path / CHECKER_PATH)
|
||||
logger.info('NSFW checker initialized')
|
||||
except Exception as e:
|
||||
logger.warning(f'Could not load NSFW checker: {str(e)}')
|
||||
else:
|
||||
logger.info('NSFW checker loading disabled')
|
||||
self.tried_load = True
|
||||
|
||||
@classmethod
|
||||
def safety_checker_available(self) -> bool:
|
||||
self._load_safety_checker()
|
||||
return self.safety_checker is not None
|
||||
|
||||
@classmethod
|
||||
def has_nsfw_concept(self, image: Image) -> bool:
|
||||
if not self.safety_checker_available():
|
||||
return False
|
||||
|
||||
device = choose_torch_device()
|
||||
features = self.feature_extractor([image], return_tensors="pt")
|
||||
features.to(device)
|
||||
self.safety_checker.to(device)
|
||||
x_image = np.array(image).astype(np.float32) / 255.0
|
||||
x_image = x_image[None].transpose(0, 3, 1, 2)
|
||||
with SilenceWarnings():
|
||||
checked_image, has_nsfw_concept = self.safety_checker(images=x_image, clip_input=features.pixel_values)
|
||||
return has_nsfw_concept[0]
|
31
invokeai/backend/install/check_root.py
Normal file
31
invokeai/backend/install/check_root.py
Normal file
@ -0,0 +1,31 @@
|
||||
"""
|
||||
Check that the invokeai_root is correctly configured and exit if not.
|
||||
"""
|
||||
import sys
|
||||
from invokeai.app.services.config import (
|
||||
InvokeAIAppConfig,
|
||||
)
|
||||
|
||||
def check_invokeai_root(config: InvokeAIAppConfig):
|
||||
try:
|
||||
assert config.model_conf_path.exists()
|
||||
assert config.db_path.exists()
|
||||
assert config.models_path.exists()
|
||||
for model in [
|
||||
'CLIP-ViT-bigG-14-laion2B-39B-b160k',
|
||||
'bert-base-uncased',
|
||||
'clip-vit-large-patch14',
|
||||
'sd-vae-ft-mse',
|
||||
'stable-diffusion-2-clip',
|
||||
'stable-diffusion-safety-checker']:
|
||||
assert (config.models_path / f'core/convert/{model}').exists()
|
||||
except:
|
||||
print()
|
||||
print('== STARTUP ABORTED ==')
|
||||
print('** One or more necessary files is missing from your InvokeAI root directory **')
|
||||
print('** Please rerun the configuration script to fix this problem. **')
|
||||
print('** From the launcher, selection option [7]. **')
|
||||
print('** From the command line, activate the virtual environment and run "invokeai-configure --yes --skip-sd-weights" **')
|
||||
input('Press any key to continue...')
|
||||
sys.exit(0)
|
||||
|
@ -23,6 +23,7 @@ from urllib import request
|
||||
|
||||
import npyscreen
|
||||
import transformers
|
||||
import omegaconf
|
||||
from diffusers import AutoencoderKL
|
||||
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
||||
from huggingface_hub import HfFolder
|
||||
@ -31,6 +32,7 @@ from omegaconf import OmegaConf
|
||||
from tqdm import tqdm
|
||||
from transformers import (
|
||||
CLIPTextModel,
|
||||
CLIPTextConfig,
|
||||
CLIPTokenizer,
|
||||
AutoFeatureExtractor,
|
||||
BertTokenizerFast,
|
||||
@ -44,6 +46,7 @@ from invokeai.backend.util.logging import InvokeAILogger
|
||||
from invokeai.frontend.install.model_install import addModelsForm, process_and_execute
|
||||
from invokeai.frontend.install.widgets import (
|
||||
CenteredButtonPress,
|
||||
FileBox,
|
||||
IntTitleSlider,
|
||||
set_min_terminal_size,
|
||||
CyclingForm,
|
||||
@ -53,6 +56,7 @@ from invokeai.frontend.install.widgets import (
|
||||
from invokeai.backend.install.legacy_arg_parsing import legacy_parser
|
||||
from invokeai.backend.install.model_install_backend import (
|
||||
hf_download_from_pretrained,
|
||||
hf_download_with_resume,
|
||||
InstallSelections,
|
||||
ModelInstall,
|
||||
)
|
||||
@ -202,6 +206,15 @@ def download_conversion_models():
|
||||
pipeline = CLIPTextModel.from_pretrained(repo_id, subfolder="text_encoder", **kwargs)
|
||||
pipeline.save_pretrained(target_dir / 'stable-diffusion-2-clip' / 'text_encoder', safe_serialization=True)
|
||||
|
||||
# sd-xl - tokenizer_2
|
||||
repo_id = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
|
||||
_, model_name = repo_id.split('/')
|
||||
pipeline = CLIPTokenizer.from_pretrained(repo_id, **kwargs)
|
||||
pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
|
||||
|
||||
pipeline = CLIPTextConfig.from_pretrained(repo_id, **kwargs)
|
||||
pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
|
||||
|
||||
# VAE
|
||||
logger.info('Downloading stable diffusion VAE')
|
||||
vae = AutoencoderKL.from_pretrained('stabilityai/sd-vae-ft-mse', **kwargs)
|
||||
@ -285,47 +298,6 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
||||
color="CONTROL",
|
||||
)
|
||||
|
||||
self.nextrely += 1
|
||||
self.add_widget_intelligent(
|
||||
npyscreen.TitleFixedText,
|
||||
name="== BASIC OPTIONS ==",
|
||||
begin_entry_at=0,
|
||||
editable=False,
|
||||
color="CONTROL",
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.nextrely -= 1
|
||||
self.add_widget_intelligent(
|
||||
npyscreen.FixedText,
|
||||
value="Select an output directory for images:",
|
||||
editable=False,
|
||||
color="CONTROL",
|
||||
)
|
||||
self.outdir = self.add_widget_intelligent(
|
||||
npyscreen.TitleFilename,
|
||||
name="(<tab> autocompletes, ctrl-N advances):",
|
||||
value=str(default_output_dir()),
|
||||
select_dir=True,
|
||||
must_exist=False,
|
||||
use_two_lines=False,
|
||||
labelColor="GOOD",
|
||||
begin_entry_at=40,
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.nextrely += 1
|
||||
self.add_widget_intelligent(
|
||||
npyscreen.FixedText,
|
||||
value="Activate the NSFW checker to blur images showing potential sexual imagery:",
|
||||
editable=False,
|
||||
color="CONTROL",
|
||||
)
|
||||
self.nsfw_checker = self.add_widget_intelligent(
|
||||
npyscreen.Checkbox,
|
||||
name="NSFW checker",
|
||||
value=old_opts.nsfw_checker,
|
||||
relx=5,
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.nextrely += 1
|
||||
label = """HuggingFace access token (OPTIONAL) for automatic model downloads. See https://huggingface.co/settings/tokens."""
|
||||
for line in textwrap.wrap(label,width=window_width-6):
|
||||
@ -345,15 +317,6 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.nextrely += 1
|
||||
self.add_widget_intelligent(
|
||||
npyscreen.TitleFixedText,
|
||||
name="== ADVANCED OPTIONS ==",
|
||||
begin_entry_at=0,
|
||||
editable=False,
|
||||
color="CONTROL",
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.nextrely -= 1
|
||||
self.add_widget_intelligent(
|
||||
npyscreen.TitleFixedText,
|
||||
name="GPU Management",
|
||||
@ -409,21 +372,33 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
||||
self.nextrely += 1
|
||||
self.add_widget_intelligent(
|
||||
npyscreen.FixedText,
|
||||
value="Directories containing textual inversion, controlnet and LoRA models (<tab> autocompletes, ctrl-N advances):",
|
||||
value="Folder to recursively scan for new checkpoints, ControlNets, LoRAs and TI models (<tab> autocompletes, ctrl-N advances):",
|
||||
editable=False,
|
||||
color="CONTROL",
|
||||
)
|
||||
self.outdir = self.add_widget_intelligent(
|
||||
FileBox,
|
||||
name="Output directory for images (<tab> autocompletes, ctrl-N advances):",
|
||||
value=str(default_output_dir()),
|
||||
select_dir=True,
|
||||
must_exist=False,
|
||||
use_two_lines=False,
|
||||
labelColor="GOOD",
|
||||
begin_entry_at=40,
|
||||
max_height=3,
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.autoimport_dirs = {}
|
||||
for description, config_name, path in autoimport_paths(old_opts):
|
||||
self.autoimport_dirs[config_name] = self.add_widget_intelligent(
|
||||
npyscreen.TitleFilename,
|
||||
name=description+':',
|
||||
value=str(path),
|
||||
self.autoimport_dirs['autoimport_dir'] = self.add_widget_intelligent(
|
||||
FileBox,
|
||||
name=f'Autoimport Folder',
|
||||
value=str(config.root_path / config.autoimport_dir),
|
||||
select_dir=True,
|
||||
must_exist=False,
|
||||
use_two_lines=False,
|
||||
labelColor="GOOD",
|
||||
begin_entry_at=32,
|
||||
max_height = 3,
|
||||
scroll_exit=True
|
||||
)
|
||||
self.nextrely += 1
|
||||
@ -504,7 +479,6 @@ https://huggingface.co/spaces/CompVis/stable-diffusion-license
|
||||
|
||||
for attr in [
|
||||
"outdir",
|
||||
"nsfw_checker",
|
||||
"free_gpu_mem",
|
||||
"max_cache_size",
|
||||
"xformers_enabled",
|
||||
@ -540,7 +514,7 @@ class EditOptApplication(npyscreen.NPSAppManaged):
|
||||
"MAIN",
|
||||
editOptsForm,
|
||||
name="InvokeAI Startup Options",
|
||||
cycle_widgets=True,
|
||||
cycle_widgets=False,
|
||||
)
|
||||
if not (self.program_opts.skip_sd_weights or self.program_opts.default_only):
|
||||
self.model_select = self.addForm(
|
||||
@ -548,7 +522,7 @@ class EditOptApplication(npyscreen.NPSAppManaged):
|
||||
addModelsForm,
|
||||
name="Install Stable Diffusion Models",
|
||||
multipage=True,
|
||||
cycle_widgets=True,
|
||||
cycle_widgets=False,
|
||||
)
|
||||
|
||||
def new_opts(self):
|
||||
@ -560,15 +534,19 @@ def edit_opts(program_opts: Namespace, invokeai_opts: Namespace) -> argparse.Nam
|
||||
editApp.run()
|
||||
return editApp.new_opts()
|
||||
|
||||
|
||||
def default_startup_options(init_file: Path) -> Namespace:
|
||||
opts = InvokeAIAppConfig.get_config()
|
||||
if not init_file.exists():
|
||||
opts.nsfw_checker = True
|
||||
return opts
|
||||
|
||||
def default_user_selections(program_opts: Namespace) -> InstallSelections:
|
||||
installer = ModelInstall(config)
|
||||
|
||||
try:
|
||||
installer = ModelInstall(config)
|
||||
except omegaconf.errors.ConfigKeyError:
|
||||
logger.warning('Your models.yaml file is corrupt or out of date. Reinitializing')
|
||||
initialize_rootdir(config.root_path, True)
|
||||
installer = ModelInstall(config)
|
||||
|
||||
models = installer.all_models()
|
||||
return InstallSelections(
|
||||
install_models=[models[installer.default_model()].path or models[installer.default_model()].repo_id]
|
||||
@ -576,19 +554,8 @@ def default_user_selections(program_opts: Namespace) -> InstallSelections:
|
||||
else [models[x].path or models[x].repo_id for x in installer.recommended_models()]
|
||||
if program_opts.yes_to_all
|
||||
else list(),
|
||||
# scan_directory=None,
|
||||
# autoscan_on_startup=None,
|
||||
)
|
||||
|
||||
# -------------------------------------
|
||||
def autoimport_paths(config: InvokeAIAppConfig):
|
||||
return [
|
||||
('Checkpoints & diffusers models', 'autoimport_dir', config.root_path / config.autoimport_dir),
|
||||
('LoRA/LyCORIS models', 'lora_dir', config.root_path / config.lora_dir),
|
||||
('Controlnet models', 'controlnet_dir', config.root_path / config.controlnet_dir),
|
||||
('Textual Inversion Embeddings', 'embedding_dir', config.root_path / config.embedding_dir),
|
||||
]
|
||||
|
||||
# -------------------------------------
|
||||
def initialize_rootdir(root: Path, yes_to_all: bool = False):
|
||||
logger.info("** INITIALIZING INVOKEAI RUNTIME DIRECTORY **")
|
||||
@ -664,6 +631,9 @@ def write_opts(opts: Namespace, init_file: Path):
|
||||
with open(init_file,'w', encoding='utf-8') as file:
|
||||
file.write(new_config.to_yaml())
|
||||
|
||||
if hasattr(opts,'hf_token') and opts.hf_token:
|
||||
HfLogin(opts.hf_token)
|
||||
|
||||
# -------------------------------------
|
||||
def default_output_dir() -> Path:
|
||||
return config.root_path / "outputs"
|
||||
@ -689,7 +659,6 @@ def migrate_init_file(legacy_format:Path):
|
||||
|
||||
# a few places where the field names have changed and we have to
|
||||
# manually add in the new names/values
|
||||
new.nsfw_checker = old.safety_checker
|
||||
new.xformers_enabled = old.xformers
|
||||
new.conf_path = old.conf
|
||||
new.root = legacy_format.parent.resolve()
|
||||
|
@ -58,7 +58,15 @@ LEGACY_CONFIGS = {
|
||||
SchedulerPredictionType.Epsilon: 'v2-inpainting-inference.yaml',
|
||||
SchedulerPredictionType.VPrediction: 'v2-inpainting-inference-v.yaml',
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
BaseModelType.StableDiffusionXL: {
|
||||
ModelVariantType.Normal: 'sd_xl_base.yaml',
|
||||
},
|
||||
|
||||
BaseModelType.StableDiffusionXLRefiner: {
|
||||
ModelVariantType.Normal: 'sd_xl_refiner.yaml',
|
||||
},
|
||||
}
|
||||
|
||||
@dataclass
|
||||
@ -329,6 +337,7 @@ class ModelInstall(object):
|
||||
description = str(description),
|
||||
model_format = info.format,
|
||||
)
|
||||
legacy_conf = None
|
||||
if info.model_type == ModelType.Main:
|
||||
attributes.update(dict(variant = info.variant_type,))
|
||||
if info.format=="checkpoint":
|
||||
@ -343,11 +352,17 @@ class ModelInstall(object):
|
||||
except KeyError:
|
||||
legacy_conf = Path(self.config.legacy_conf_dir, 'v1-inference.yaml') # best guess
|
||||
|
||||
attributes.update(
|
||||
dict(
|
||||
config = str(legacy_conf)
|
||||
)
|
||||
if info.model_type == ModelType.ControlNet and info.format=="checkpoint":
|
||||
possible_conf = path.with_suffix('.yaml')
|
||||
if possible_conf.exists():
|
||||
legacy_conf = str(self.relative_to_root(possible_conf))
|
||||
|
||||
if legacy_conf:
|
||||
attributes.update(
|
||||
dict(
|
||||
config = str(legacy_conf)
|
||||
)
|
||||
)
|
||||
return attributes
|
||||
|
||||
def relative_to_root(self, path: Path)->Path:
|
||||
|
@ -4,6 +4,6 @@ Initialization file for invokeai.backend.model_management
|
||||
from .model_manager import ModelManager, ModelInfo, AddModelResult, SchedulerPredictionType
|
||||
from .model_cache import ModelCache
|
||||
from .lora import ModelPatcher, ONNXModelPatcher
|
||||
from .models import BaseModelType, ModelType, SubModelType, ModelVariantType, ModelNotFoundException
|
||||
from .models import BaseModelType, ModelType, SubModelType, ModelVariantType, ModelNotFoundException, DuplicateModelException
|
||||
from .model_merge import ModelMerger, MergeInterpolationMethod
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -485,7 +485,7 @@ class ModelPatcher:
|
||||
|
||||
@staticmethod
|
||||
def _lora_forward_hook(
|
||||
applied_loras: List[Tuple[LoraModel, float]],
|
||||
applied_loras: List[Tuple[LoRAModel, float]],
|
||||
layer_name: str,
|
||||
):
|
||||
|
||||
@ -530,7 +530,7 @@ class ModelPatcher:
|
||||
def apply_lora(
|
||||
cls,
|
||||
model: torch.nn.Module,
|
||||
loras: List[Tuple[LoraModel, float]],
|
||||
loras: List[Tuple[LoRAModel, float]],
|
||||
prefix: str,
|
||||
):
|
||||
original_weights = dict()
|
||||
|
@ -251,7 +251,9 @@ from .model_search import ModelSearch
|
||||
from .models import (
|
||||
BaseModelType, ModelType, SubModelType,
|
||||
ModelError, SchedulerPredictionType, MODEL_CLASSES,
|
||||
ModelConfigBase, ModelNotFoundException, InvalidModelException,
|
||||
ModelConfigBase,
|
||||
ModelNotFoundException, InvalidModelException,
|
||||
DuplicateModelException,
|
||||
)
|
||||
|
||||
# We are only starting to number the config file with release 3.
|
||||
@ -671,6 +673,7 @@ class ModelManager(object):
|
||||
|
||||
self.models[model_key] = model_config
|
||||
self.commit()
|
||||
|
||||
return AddModelResult(
|
||||
name = model_name,
|
||||
model_type = model_type,
|
||||
@ -838,7 +841,7 @@ class ModelManager(object):
|
||||
Returns the preamble for the config file.
|
||||
"""
|
||||
return textwrap.dedent(
|
||||
"""\
|
||||
"""
|
||||
# This file describes the alternative machine learning models
|
||||
# available to InvokeAI script.
|
||||
#
|
||||
@ -858,7 +861,7 @@ class ModelManager(object):
|
||||
loaded_files = set()
|
||||
new_models_found = False
|
||||
|
||||
self.logger.info(f'scanning {self.app_config.models_path} for new models')
|
||||
self.logger.info(f'Scanning {self.app_config.models_path} for new models')
|
||||
with Chdir(self.app_config.root_path):
|
||||
for model_key, model_config in list(self.models.items()):
|
||||
model_name, cur_base_model, cur_model_type = self.parse_key(model_key)
|
||||
@ -891,15 +894,18 @@ class ModelManager(object):
|
||||
model_name = model_path.name if model_path.is_dir() else model_path.stem
|
||||
model_key = self.create_key(model_name, cur_base_model, cur_model_type)
|
||||
|
||||
if model_key in self.models:
|
||||
raise Exception(f"Model with key {model_key} added twice")
|
||||
|
||||
if model_path.is_relative_to(self.app_config.root_path):
|
||||
model_path = model_path.relative_to(self.app_config.root_path)
|
||||
try:
|
||||
if model_key in self.models:
|
||||
raise DuplicateModelException(f"Model with key {model_key} added twice")
|
||||
|
||||
if model_path.is_relative_to(self.app_config.root_path):
|
||||
model_path = model_path.relative_to(self.app_config.root_path)
|
||||
|
||||
model_config: ModelConfigBase = model_class.probe_config(str(model_path))
|
||||
self.models[model_key] = model_config
|
||||
new_models_found = True
|
||||
except DuplicateModelException as e:
|
||||
self.logger.warning(e)
|
||||
except InvalidModelException:
|
||||
self.logger.warning(f"Not a valid model: {model_path}")
|
||||
except NotImplementedError as e:
|
||||
@ -938,20 +944,29 @@ class ModelManager(object):
|
||||
def models_found(self):
|
||||
return self.new_models_found
|
||||
|
||||
config = self.app_config
|
||||
|
||||
# LS: hacky
|
||||
# Patch in the SD VAE from core so that it is available for use by the UI
|
||||
try:
|
||||
self.heuristic_import({config.root_path / 'models/core/convert/sd-vae-ft-mse'})
|
||||
except:
|
||||
pass
|
||||
|
||||
installer = ModelInstall(config = self.app_config,
|
||||
model_manager = self,
|
||||
prediction_type_helper = ask_user_for_prediction_type,
|
||||
)
|
||||
config = self.app_config
|
||||
known_paths = {config.root_path / x['path'] for x in self.list_models()}
|
||||
directories = {config.root_path / x for x in [config.autoimport_dir,
|
||||
config.lora_dir,
|
||||
config.embedding_dir,
|
||||
config.controlnet_dir]
|
||||
config.controlnet_dir,
|
||||
] if x
|
||||
}
|
||||
scanner = ScanAndImport(directories, self.logger, ignore=known_paths, installer=installer)
|
||||
scanner.search()
|
||||
|
||||
return scanner.models_found()
|
||||
|
||||
def heuristic_import(self,
|
||||
|
@ -39,6 +39,7 @@ class ModelProbe(object):
|
||||
|
||||
CLASS2TYPE = {
|
||||
'StableDiffusionPipeline' : ModelType.Main,
|
||||
'StableDiffusionInpaintPipeline' : ModelType.Main,
|
||||
'StableDiffusionXLPipeline' : ModelType.Main,
|
||||
'StableDiffusionXLImg2ImgPipeline' : ModelType.Main,
|
||||
'AutoencoderKL' : ModelType.Vae,
|
||||
@ -252,10 +253,13 @@ class PipelineCheckpointProbe(CheckpointProbeBase):
|
||||
return BaseModelType.StableDiffusion1
|
||||
if key_name in state_dict and state_dict[key_name].shape[-1] == 1024:
|
||||
return BaseModelType.StableDiffusion2
|
||||
# TODO: Verify that this is correct! Need an XL checkpoint file for this.
|
||||
key_name = 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight'
|
||||
if key_name in state_dict and state_dict[key_name].shape[-1] == 2048:
|
||||
return BaseModelType.StableDiffusionXL
|
||||
raise InvalidModelException("Cannot determine base type")
|
||||
elif key_name in state_dict and state_dict[key_name].shape[-1] == 1280:
|
||||
return BaseModelType.StableDiffusionXLRefiner
|
||||
else:
|
||||
raise InvalidModelException("Cannot determine base type")
|
||||
|
||||
def get_scheduler_prediction_type(self)->SchedulerPredictionType:
|
||||
type = self.get_base_type()
|
||||
@ -401,7 +405,7 @@ class PipelineFolderProbe(FolderProbeBase):
|
||||
|
||||
in_channels = conf['in_channels']
|
||||
if in_channels == 9:
|
||||
return ModelVariantType.Inpainting
|
||||
return ModelVariantType.Inpaint
|
||||
elif in_channels == 5:
|
||||
return ModelVariantType.Depth
|
||||
elif in_channels == 4:
|
||||
|
@ -98,6 +98,6 @@ class FindModels(ModelSearch):
|
||||
|
||||
def list_models(self) -> List[Path]:
|
||||
self.search()
|
||||
return self.models_found
|
||||
return list(self.models_found)
|
||||
|
||||
|
||||
|
@ -2,7 +2,11 @@ import inspect
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel
|
||||
from typing import Literal, get_origin
|
||||
from .base import BaseModelType, ModelType, SubModelType, ModelBase, ModelConfigBase, ModelVariantType, SchedulerPredictionType, ModelError, SilenceWarnings, ModelNotFoundException, InvalidModelException
|
||||
from .base import (
|
||||
BaseModelType, ModelType, SubModelType, ModelBase, ModelConfigBase,
|
||||
ModelVariantType, SchedulerPredictionType, ModelError, SilenceWarnings,
|
||||
ModelNotFoundException, InvalidModelException, DuplicateModelException
|
||||
)
|
||||
from .stable_diffusion import StableDiffusion1Model, StableDiffusion2Model
|
||||
from .sdxl import StableDiffusionXLModel
|
||||
from .vae import VaeModel
|
||||
|
@ -21,6 +21,10 @@ import onnx
|
||||
from onnx import numpy_helper
|
||||
from onnx.external_data_helper import set_external_data
|
||||
from onnxruntime import InferenceSession, OrtValue, SessionOptions, ExecutionMode, GraphOptimizationLevel, get_available_providers
|
||||
|
||||
class DuplicateModelException(Exception):
|
||||
pass
|
||||
|
||||
class InvalidModelException(Exception):
|
||||
pass
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
import os
|
||||
import torch
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from typing import Optional, Literal
|
||||
from .base import (
|
||||
ModelBase,
|
||||
ModelConfigBase,
|
||||
@ -15,6 +16,7 @@ from .base import (
|
||||
InvalidModelException,
|
||||
ModelNotFoundException,
|
||||
)
|
||||
from invokeai.app.services.config import InvokeAIAppConfig
|
||||
|
||||
class ControlNetModelFormat(str, Enum):
|
||||
Checkpoint = "checkpoint"
|
||||
@ -24,8 +26,12 @@ class ControlNetModel(ModelBase):
|
||||
#model_class: Type
|
||||
#model_size: int
|
||||
|
||||
class Config(ModelConfigBase):
|
||||
model_format: ControlNetModelFormat
|
||||
class DiffusersConfig(ModelConfigBase):
|
||||
model_format: Literal[ControlNetModelFormat.Diffusers]
|
||||
|
||||
class CheckpointConfig(ModelConfigBase):
|
||||
model_format: Literal[ControlNetModelFormat.Checkpoint]
|
||||
config: str
|
||||
|
||||
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
|
||||
assert model_type == ModelType.ControlNet
|
||||
@ -99,13 +105,51 @@ class ControlNetModel(ModelBase):
|
||||
|
||||
@classmethod
|
||||
def convert_if_required(
|
||||
cls,
|
||||
model_path: str,
|
||||
output_path: str,
|
||||
config: ModelConfigBase,
|
||||
base_model: BaseModelType,
|
||||
) -> str:
|
||||
if cls.detect_format(model_path) == ControlNetModelFormat.Checkpoint:
|
||||
return _convert_controlnet_ckpt_and_cache(
|
||||
model_path = model_path,
|
||||
model_config = config.config,
|
||||
output_path = output_path,
|
||||
base_model = base_model,
|
||||
)
|
||||
else:
|
||||
return model_path
|
||||
|
||||
@classmethod
|
||||
def _convert_controlnet_ckpt_and_cache(
|
||||
cls,
|
||||
model_path: str,
|
||||
output_path: str,
|
||||
config: ModelConfigBase, # empty config or config of parent model
|
||||
base_model: BaseModelType,
|
||||
) -> str:
|
||||
if cls.detect_format(model_path) != ControlNetModelFormat.Diffusers:
|
||||
raise NotImplementedError("Checkpoint controlnet models currently unsupported")
|
||||
else:
|
||||
return model_path
|
||||
model_config: ControlNetModel.CheckpointConfig,
|
||||
) -> str:
|
||||
"""
|
||||
Convert the controlnet from checkpoint format to diffusers format,
|
||||
cache it to disk, and return Path to converted
|
||||
file. If already on disk then just returns Path.
|
||||
"""
|
||||
app_config = InvokeAIAppConfig.get_config()
|
||||
weights = app_config.root_path / model_path
|
||||
output_path = Path(output_path)
|
||||
|
||||
# return cached version if it exists
|
||||
if output_path.exists():
|
||||
return output_path
|
||||
|
||||
# to avoid circular import errors
|
||||
from ..convert_ckpt_to_diffusers import convert_controlnet_to_diffusers
|
||||
convert_controlnet_to_diffusers(
|
||||
weights,
|
||||
output_path,
|
||||
original_config_file = app_config.root_path / model_config,
|
||||
image_size = 512,
|
||||
scan_needed = True,
|
||||
from_safetensors = weights.suffix == ".safetensors"
|
||||
)
|
||||
return output_path
|
||||
|
@ -10,6 +10,7 @@ from .base import (
|
||||
SubModelType,
|
||||
classproperty,
|
||||
InvalidModelException,
|
||||
ModelNotFoundException,
|
||||
)
|
||||
# TODO: naming
|
||||
from ..lora import LoRAModel as LoRAModelRaw
|
||||
|
@ -1,5 +1,6 @@
|
||||
import os
|
||||
import json
|
||||
import invokeai.backend.util.logging as logger
|
||||
from enum import Enum
|
||||
from pydantic import Field
|
||||
from typing import Literal, Optional
|
||||
@ -48,7 +49,7 @@ class StableDiffusionXLModel(DiffusersModel):
|
||||
if model_format == StableDiffusionXLModelFormat.Checkpoint:
|
||||
if ckpt_config_path:
|
||||
ckpt_config = OmegaConf.load(ckpt_config_path)
|
||||
ckpt_config["model"]["params"]["unet_config"]["params"]["in_channels"]
|
||||
in_channels = ckpt_config["model"]["params"]["unet_config"]["params"]["in_channels"]
|
||||
|
||||
else:
|
||||
checkpoint = read_checkpoint_meta(path)
|
||||
@ -108,7 +109,20 @@ class StableDiffusionXLModel(DiffusersModel):
|
||||
config: ModelConfigBase,
|
||||
base_model: BaseModelType,
|
||||
) -> str:
|
||||
# The convert script adapted from the diffusers package uses
|
||||
# strings for the base model type. To avoid making too many
|
||||
# source code changes, we simply translate here
|
||||
model_base_to_model_type = {BaseModelType.StableDiffusionXL: 'SDXL',
|
||||
BaseModelType.StableDiffusionXLRefiner: 'SDXL-Refiner',
|
||||
}
|
||||
if isinstance(config, cls.CheckpointConfig):
|
||||
raise NotImplementedError('conversion of SDXL checkpoint models to diffusers format is not yet supported')
|
||||
from invokeai.backend.model_management.models.stable_diffusion import _convert_ckpt_and_cache
|
||||
return _convert_ckpt_and_cache(
|
||||
version=base_model,
|
||||
model_config=config,
|
||||
output_path=output_path,
|
||||
model_type=model_base_to_model_type[base_model],
|
||||
use_safetensors=False, # corrupts sdxl models for some reason
|
||||
)
|
||||
else:
|
||||
return model_path
|
||||
|
@ -15,9 +15,12 @@ from .base import (
|
||||
classproperty,
|
||||
InvalidModelException,
|
||||
)
|
||||
from .sdxl import StableDiffusionXLModel
|
||||
import invokeai.backend.util.logging as logger
|
||||
from invokeai.app.services.config import InvokeAIAppConfig
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
|
||||
class StableDiffusion1ModelFormat(str, Enum):
|
||||
Checkpoint = "checkpoint"
|
||||
Diffusers = "diffusers"
|
||||
@ -235,42 +238,17 @@ class StableDiffusion2Model(DiffusersModel):
|
||||
else:
|
||||
return model_path
|
||||
|
||||
def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
|
||||
ckpt_configs = {
|
||||
BaseModelType.StableDiffusion1: {
|
||||
ModelVariantType.Normal: "v1-inference.yaml",
|
||||
ModelVariantType.Inpaint: "v1-inpainting-inference.yaml",
|
||||
},
|
||||
BaseModelType.StableDiffusion2: {
|
||||
ModelVariantType.Normal: "v2-inference-v.yaml", # best guess, as we can't differentiate with base(512)
|
||||
ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
|
||||
ModelVariantType.Depth: "v2-midas-inference.yaml",
|
||||
},
|
||||
# note that these .yaml files don't yet exist!
|
||||
BaseModelType.StableDiffusionXL: {
|
||||
ModelVariantType.Normal: "xl-inference-v.yaml",
|
||||
ModelVariantType.Inpaint: "xl-inpainting-inference.yaml",
|
||||
ModelVariantType.Depth: "xl-midas-inference.yaml",
|
||||
}
|
||||
}
|
||||
|
||||
app_config = InvokeAIAppConfig.get_config()
|
||||
try:
|
||||
config_path = app_config.legacy_conf_path / ckpt_configs[version][variant]
|
||||
if config_path.is_relative_to(app_config.root_path):
|
||||
config_path = config_path.relative_to(app_config.root_path)
|
||||
return str(config_path)
|
||||
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
# TODO: rework
|
||||
# Note that convert_ckpt_to_diffuses does not currently support conversion of SDXL models
|
||||
# pass precision - currently defaulting to fp16
|
||||
def _convert_ckpt_and_cache(
|
||||
version: BaseModelType,
|
||||
model_config: Union[StableDiffusion1Model.CheckpointConfig, StableDiffusion2Model.CheckpointConfig],
|
||||
output_path: str,
|
||||
version: BaseModelType,
|
||||
model_config: Union[StableDiffusion1Model.CheckpointConfig,
|
||||
StableDiffusion2Model.CheckpointConfig,
|
||||
StableDiffusionXLModel.CheckpointConfig,
|
||||
],
|
||||
output_path: str,
|
||||
use_save_model: bool=False,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Convert the checkpoint model indicated in mconfig into a
|
||||
@ -289,6 +267,9 @@ def _convert_ckpt_and_cache(
|
||||
|
||||
# to avoid circular import errors
|
||||
from ..convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
|
||||
from ...util.devices import choose_torch_device, torch_dtype
|
||||
|
||||
logger.info(f'Converting {weights} to diffusers format')
|
||||
with SilenceWarnings():
|
||||
convert_ckpt_to_diffusers(
|
||||
weights,
|
||||
@ -298,5 +279,43 @@ def _convert_ckpt_and_cache(
|
||||
original_config_file=config_file,
|
||||
extract_ema=True,
|
||||
scan_needed=True,
|
||||
from_safetensors = weights.suffix == ".safetensors",
|
||||
precision = torch_dtype(choose_torch_device()),
|
||||
**kwargs,
|
||||
)
|
||||
return output_path
|
||||
|
||||
def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
|
||||
ckpt_configs = {
|
||||
BaseModelType.StableDiffusion1: {
|
||||
ModelVariantType.Normal: "v1-inference.yaml",
|
||||
ModelVariantType.Inpaint: "v1-inpainting-inference.yaml",
|
||||
},
|
||||
BaseModelType.StableDiffusion2: {
|
||||
ModelVariantType.Normal: "v2-inference-v.yaml", # best guess, as we can't differentiate with base(512)
|
||||
ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
|
||||
ModelVariantType.Depth: "v2-midas-inference.yaml",
|
||||
},
|
||||
BaseModelType.StableDiffusionXL: {
|
||||
ModelVariantType.Normal: "sd_xl_base.yaml",
|
||||
ModelVariantType.Inpaint: None,
|
||||
ModelVariantType.Depth: None,
|
||||
},
|
||||
BaseModelType.StableDiffusionXLRefiner: {
|
||||
ModelVariantType.Normal: "sd_xl_refiner.yaml",
|
||||
ModelVariantType.Inpaint: None,
|
||||
ModelVariantType.Depth: None,
|
||||
},
|
||||
}
|
||||
|
||||
app_config = InvokeAIAppConfig.get_config()
|
||||
try:
|
||||
config_path = app_config.legacy_conf_path / ckpt_configs[version][variant]
|
||||
if config_path.is_relative_to(app_config.root_path):
|
||||
config_path = config_path.relative_to(app_config.root_path)
|
||||
return str(config_path)
|
||||
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
|
@ -1,77 +0,0 @@
|
||||
'''
|
||||
SafetyChecker class - checks images against the StabilityAI NSFW filter
|
||||
and blurs images that contain potential NSFW content.
|
||||
'''
|
||||
import diffusers
|
||||
import numpy as np
|
||||
import torch
|
||||
import traceback
|
||||
from diffusers.pipelines.stable_diffusion.safety_checker import (
|
||||
StableDiffusionSafetyChecker,
|
||||
)
|
||||
from pathlib import Path
|
||||
from PIL import Image, ImageFilter
|
||||
from transformers import AutoFeatureExtractor
|
||||
|
||||
import invokeai.assets.web as web_assets
|
||||
import invokeai.backend.util.logging as logger
|
||||
from invokeai.app.services.config import InvokeAIAppConfig
|
||||
from .util import CPU_DEVICE
|
||||
|
||||
config = InvokeAIAppConfig.get_config()
|
||||
|
||||
class SafetyChecker(object):
|
||||
CAUTION_IMG = "caution.png"
|
||||
|
||||
def __init__(self, device: torch.device):
|
||||
path = Path(web_assets.__path__[0]) / self.CAUTION_IMG
|
||||
caution = Image.open(path)
|
||||
self.caution_img = caution.resize((caution.width // 2, caution.height // 2))
|
||||
self.device = device
|
||||
|
||||
try:
|
||||
safety_model_id = config.models_path / 'core/convert/stable-diffusion-safety-checker'
|
||||
feature_extractor_id = config.models_path / 'core/convert/stable-diffusion-safety-checker-extractor'
|
||||
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
|
||||
self.safety_feature_extractor = AutoFeatureExtractor.from_pretrained(feature_extractor_id)
|
||||
except Exception:
|
||||
logger.error(
|
||||
"An error was encountered while installing the safety checker:"
|
||||
)
|
||||
print(traceback.format_exc())
|
||||
|
||||
def check(self, image: Image.Image):
|
||||
"""
|
||||
Check provided image against the StabilityAI safety checker and return
|
||||
|
||||
"""
|
||||
|
||||
self.safety_checker.to(self.device)
|
||||
features = self.safety_feature_extractor([image], return_tensors="pt")
|
||||
features.to(self.device)
|
||||
|
||||
# unfortunately checker requires the numpy version, so we have to convert back
|
||||
x_image = np.array(image).astype(np.float32) / 255.0
|
||||
x_image = x_image[None].transpose(0, 3, 1, 2)
|
||||
|
||||
diffusers.logging.set_verbosity_error()
|
||||
checked_image, has_nsfw_concept = self.safety_checker(
|
||||
images=x_image, clip_input=features.pixel_values
|
||||
)
|
||||
self.safety_checker.to(CPU_DEVICE) # offload
|
||||
if has_nsfw_concept[0]:
|
||||
logger.warning(
|
||||
"An image with potential non-safe content has been detected. A blurred image will be returned."
|
||||
)
|
||||
return self.blur(image)
|
||||
else:
|
||||
return image
|
||||
|
||||
def blur(self, input):
|
||||
blurry = input.filter(filter=ImageFilter.GaussianBlur(radius=32))
|
||||
try:
|
||||
if caution := self.caution_img:
|
||||
blurry.paste(caution, (0, 0), caution)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return blurry
|
@ -219,6 +219,7 @@ class ControlNetData:
|
||||
begin_step_percent: float = Field(default=0.0)
|
||||
end_step_percent: float = Field(default=1.0)
|
||||
control_mode: str = Field(default="balanced")
|
||||
resize_mode: str = Field(default="just_resize")
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -653,7 +654,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
if cfg_injection:
|
||||
# Inferred ControlNet only for the conditional batch.
|
||||
# To apply the output of ControlNet to both the unconditional and conditional batches,
|
||||
# add 0 to the unconditional batch to keep it unchanged.
|
||||
# prepend zeros for unconditional batch
|
||||
down_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_samples]
|
||||
mid_sample = torch.cat([torch.zeros_like(mid_sample), mid_sample])
|
||||
|
||||
@ -954,53 +955,3 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
debug_image(
|
||||
img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True
|
||||
)
|
||||
|
||||
# Copied from diffusers pipeline_stable_diffusion_controlnet.py
|
||||
# Returns torch.Tensor of shape (batch_size, 3, height, width)
|
||||
@staticmethod
|
||||
def prepare_control_image(
|
||||
image,
|
||||
# FIXME: need to fix hardwiring of width and height, change to basing on latents dimensions?
|
||||
# latents,
|
||||
width=512, # should be 8 * latent.shape[3]
|
||||
height=512, # should be 8 * latent height[2]
|
||||
batch_size=1,
|
||||
num_images_per_prompt=1,
|
||||
device="cuda",
|
||||
dtype=torch.float16,
|
||||
do_classifier_free_guidance=True,
|
||||
control_mode="balanced"
|
||||
):
|
||||
|
||||
if not isinstance(image, torch.Tensor):
|
||||
if isinstance(image, PIL.Image.Image):
|
||||
image = [image]
|
||||
|
||||
if isinstance(image[0], PIL.Image.Image):
|
||||
images = []
|
||||
for image_ in image:
|
||||
image_ = image_.convert("RGB")
|
||||
image_ = image_.resize((width, height), resample=PIL_INTERPOLATION["lanczos"])
|
||||
image_ = np.array(image_)
|
||||
image_ = image_[None, :]
|
||||
images.append(image_)
|
||||
image = images
|
||||
image = np.concatenate(image, axis=0)
|
||||
image = np.array(image).astype(np.float32) / 255.0
|
||||
image = image.transpose(0, 3, 1, 2)
|
||||
image = torch.from_numpy(image)
|
||||
elif isinstance(image[0], torch.Tensor):
|
||||
image = torch.cat(image, dim=0)
|
||||
|
||||
image_batch_size = image.shape[0]
|
||||
if image_batch_size == 1:
|
||||
repeat_by = batch_size
|
||||
else:
|
||||
# image batch size is the same as prompt batch size
|
||||
repeat_by = num_images_per_prompt
|
||||
image = image.repeat_interleave(repeat_by, dim=0)
|
||||
image = image.to(device=device, dtype=dtype)
|
||||
cfg_injection = (control_mode == "more_control" or control_mode == "unbalanced")
|
||||
if do_classifier_free_guidance and not cfg_injection:
|
||||
image = torch.cat([image] * 2)
|
||||
return image
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Copyright (c) 2023 Lincoln D. Stein and The InvokeAI Development Team
|
||||
|
||||
"""
|
||||
invokeai.util.logging
|
||||
invokeai.backend.util.logging
|
||||
|
||||
Logging class for InvokeAI that produces console messages
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
import math
|
||||
import torch
|
||||
import diffusers
|
||||
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
@ -61,3 +63,150 @@ def new_torch_interpolate(input, size=None, scale_factor=None, mode='nearest', a
|
||||
return _torch_interpolate(input, size, scale_factor, mode, align_corners, recompute_scale_factor, antialias)
|
||||
|
||||
torch.nn.functional.interpolate = new_torch_interpolate
|
||||
|
||||
# TODO: refactor it
|
||||
_SlicedAttnProcessor = diffusers.models.attention_processor.SlicedAttnProcessor
|
||||
class ChunkedSlicedAttnProcessor:
|
||||
r"""
|
||||
Processor for implementing sliced attention.
|
||||
|
||||
Args:
|
||||
slice_size (`int`, *optional*):
|
||||
The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
|
||||
`attention_head_dim` must be a multiple of the `slice_size`.
|
||||
"""
|
||||
|
||||
def __init__(self, slice_size):
|
||||
assert isinstance(slice_size, int)
|
||||
slice_size = 1 # TODO: maybe implement chunking in batches too when enough memory
|
||||
self.slice_size = slice_size
|
||||
self._sliced_attn_processor = _SlicedAttnProcessor(slice_size)
|
||||
|
||||
def __call__(self, attn, hidden_states, encoder_hidden_states=None, attention_mask=None):
|
||||
if self.slice_size != 1 or attn.upcast_attention:
|
||||
return self._sliced_attn_processor(attn, hidden_states, encoder_hidden_states, attention_mask)
|
||||
|
||||
residual = hidden_states
|
||||
|
||||
input_ndim = hidden_states.ndim
|
||||
|
||||
if input_ndim == 4:
|
||||
batch_size, channel, height, width = hidden_states.shape
|
||||
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
|
||||
|
||||
batch_size, sequence_length, _ = (
|
||||
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
|
||||
)
|
||||
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
|
||||
|
||||
if attn.group_norm is not None:
|
||||
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
|
||||
|
||||
query = attn.to_q(hidden_states)
|
||||
dim = query.shape[-1]
|
||||
query = attn.head_to_batch_dim(query)
|
||||
|
||||
if encoder_hidden_states is None:
|
||||
encoder_hidden_states = hidden_states
|
||||
elif attn.norm_cross:
|
||||
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
|
||||
|
||||
key = attn.to_k(encoder_hidden_states)
|
||||
value = attn.to_v(encoder_hidden_states)
|
||||
key = attn.head_to_batch_dim(key)
|
||||
value = attn.head_to_batch_dim(value)
|
||||
|
||||
batch_size_attention, query_tokens, _ = query.shape
|
||||
hidden_states = torch.zeros(
|
||||
(batch_size_attention, query_tokens, dim // attn.heads), device=query.device, dtype=query.dtype
|
||||
)
|
||||
|
||||
chunk_tmp_tensor = torch.empty(self.slice_size, query.shape[1], key.shape[1], dtype=query.dtype, device=query.device)
|
||||
|
||||
for i in range(batch_size_attention // self.slice_size):
|
||||
start_idx = i * self.slice_size
|
||||
end_idx = (i + 1) * self.slice_size
|
||||
|
||||
query_slice = query[start_idx:end_idx]
|
||||
key_slice = key[start_idx:end_idx]
|
||||
attn_mask_slice = attention_mask[start_idx:end_idx] if attention_mask is not None else None
|
||||
|
||||
self.get_attention_scores_chunked(attn, query_slice, key_slice, attn_mask_slice, hidden_states[start_idx:end_idx], value[start_idx:end_idx], chunk_tmp_tensor)
|
||||
|
||||
hidden_states = attn.batch_to_head_dim(hidden_states)
|
||||
|
||||
# linear proj
|
||||
hidden_states = attn.to_out[0](hidden_states)
|
||||
# dropout
|
||||
hidden_states = attn.to_out[1](hidden_states)
|
||||
|
||||
if input_ndim == 4:
|
||||
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
|
||||
|
||||
if attn.residual_connection:
|
||||
hidden_states = hidden_states + residual
|
||||
|
||||
hidden_states = hidden_states / attn.rescale_output_factor
|
||||
|
||||
return hidden_states
|
||||
|
||||
|
||||
def get_attention_scores_chunked(self, attn, query, key, attention_mask, hidden_states, value, chunk):
|
||||
# batch size = 1
|
||||
assert query.shape[0] == 1
|
||||
assert key.shape[0] == 1
|
||||
assert value.shape[0] == 1
|
||||
assert hidden_states.shape[0] == 1
|
||||
|
||||
dtype = query.dtype
|
||||
if attn.upcast_attention:
|
||||
query = query.float()
|
||||
key = key.float()
|
||||
|
||||
#out_item_size = query.dtype.itemsize
|
||||
#if attn.upcast_attention:
|
||||
# out_item_size = torch.float32.itemsize
|
||||
out_item_size = query.element_size()
|
||||
if attn.upcast_attention:
|
||||
out_item_size = 4
|
||||
|
||||
chunk_size = 2 ** 29
|
||||
|
||||
out_size = query.shape[1] * key.shape[1] * out_item_size
|
||||
chunks_count = min(query.shape[1], math.ceil((out_size - 1) / chunk_size))
|
||||
chunk_step = max(1, int(query.shape[1] / chunks_count))
|
||||
|
||||
key = key.transpose(-1, -2)
|
||||
|
||||
def _get_chunk_view(tensor, start, length):
|
||||
if start + length > tensor.shape[1]:
|
||||
length = tensor.shape[1] - start
|
||||
#print(f"view: [{tensor.shape[0]},{tensor.shape[1]},{tensor.shape[2]}] - start: {start}, length: {length}")
|
||||
return tensor[:,start:start+length]
|
||||
|
||||
for chunk_pos in range(0, query.shape[1], chunk_step):
|
||||
if attention_mask is not None:
|
||||
torch.baddbmm(
|
||||
_get_chunk_view(attention_mask, chunk_pos, chunk_step),
|
||||
_get_chunk_view(query, chunk_pos, chunk_step),
|
||||
key,
|
||||
beta=1,
|
||||
alpha=attn.scale,
|
||||
out=chunk,
|
||||
)
|
||||
else:
|
||||
torch.baddbmm(
|
||||
torch.zeros((1,1,1), device=query.device, dtype=query.dtype),
|
||||
_get_chunk_view(query, chunk_pos, chunk_step),
|
||||
key,
|
||||
beta=0,
|
||||
alpha=attn.scale,
|
||||
out=chunk,
|
||||
)
|
||||
chunk = chunk.softmax(dim=-1)
|
||||
torch.bmm(chunk, value, out=_get_chunk_view(hidden_states, chunk_pos, chunk_step))
|
||||
|
||||
#del chunk
|
||||
|
||||
|
||||
diffusers.models.attention_processor.SlicedAttnProcessor = ChunkedSlicedAttnProcessor
|
||||
|
Reference in New Issue
Block a user