Merge branch 'main' into feat/onnx

This commit is contained in:
Brandon Rising
2023-07-26 10:42:31 -04:00
465 changed files with 14417 additions and 8732 deletions

View File

@ -12,4 +12,4 @@ from .model_management import (
ModelManager, ModelCache, BaseModelType,
ModelType, SubModelType, ModelInfo
)
from .safety_checker import SafetyChecker
from .model_management.models import SilenceWarnings

View File

@ -28,7 +28,6 @@ from diffusers.schedulers import SchedulerMixin as Scheduler
import invokeai.backend.util.logging as logger
from ..image_util import configure_model_padding
from ..util.util import rand_perlin_2d
from ..safety_checker import SafetyChecker
from ..stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline
from ..stable_diffusion.schedulers import SCHEDULER_MAP
@ -52,7 +51,6 @@ class InvokeAIGeneratorBasicParams:
v_symmetry_time_pct: Optional[float]=None
variation_amount: float = 0.0
with_variations: list=field(default_factory=list)
safety_checker: Optional[SafetyChecker]=None
@dataclass
class InvokeAIGeneratorOutput:
@ -240,7 +238,6 @@ class Generator:
self.seed = None
self.latent_channels = model.unet.config.in_channels
self.downsampling_factor = downsampling # BUG: should come from model or config
self.safety_checker = None
self.perlin = 0.0
self.threshold = 0
self.variation_amount = 0
@ -277,12 +274,10 @@ class Generator:
perlin=0.0,
h_symmetry_time_pct=None,
v_symmetry_time_pct=None,
safety_checker: SafetyChecker=None,
free_gpu_mem: bool = False,
**kwargs,
):
scope = nullcontext
self.safety_checker = safety_checker
self.free_gpu_mem = free_gpu_mem
attention_maps_images = []
attention_maps_callback = lambda saver: attention_maps_images.append(
@ -329,9 +324,6 @@ class Generator:
# Pass on the seed in case a layer beneath us needs to generate noise on its own.
image = make_image(x_T, seed)
if self.safety_checker is not None:
image = self.safety_checker.check(image)
results.append([image, seed, attention_maps_images])
if image_callback is not None:

View File

@ -0,0 +1,34 @@
"""
This module defines a singleton object, "invisible_watermark" that
wraps the invisible watermark model. It respects the global "invisible_watermark"
configuration variable, that allows the watermarking to be supressed.
"""
import numpy as np
import cv2
from PIL import Image
from imwatermark import WatermarkEncoder
from invokeai.app.services.config import InvokeAIAppConfig
import invokeai.backend.util.logging as logger
config = InvokeAIAppConfig.get_config()
class InvisibleWatermark:
"""
Wrapper around InvisibleWatermark module.
"""
@classmethod
def invisible_watermark_available(self) -> bool:
return config.invisible_watermark
@classmethod
def add_watermark(self, image: Image, watermark_text:str) -> Image:
if not self.invisible_watermark_available():
return image
logger.debug(f'Applying invisible watermark "{watermark_text}"')
bgr = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR)
encoder = WatermarkEncoder()
encoder.set_watermark('bytes', watermark_text.encode('utf-8'))
bgr_encoded = encoder.encode(bgr, 'dwtDct')
return Image.fromarray(
cv2.cvtColor(bgr_encoded, cv2.COLOR_BGR2RGB)
).convert("RGBA")

View File

@ -0,0 +1,63 @@
"""
This module defines a singleton object, "safety_checker" that
wraps the safety_checker model. It respects the global "nsfw_checker"
configuration variable, that allows the checker to be supressed.
"""
import numpy as np
from PIL import Image
from invokeai.backend import SilenceWarnings
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.util.devices import choose_torch_device
import invokeai.backend.util.logging as logger
config = InvokeAIAppConfig.get_config()
CHECKER_PATH = 'core/convert/stable-diffusion-safety-checker'
class SafetyChecker:
"""
Wrapper around SafetyChecker model.
"""
safety_checker = None
feature_extractor = None
tried_load: bool = False
@classmethod
def _load_safety_checker(self):
if self.tried_load:
return
if config.nsfw_checker:
try:
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from transformers import AutoFeatureExtractor
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
config.models_path / CHECKER_PATH
)
self.feature_extractor = AutoFeatureExtractor.from_pretrained(
config.models_path / CHECKER_PATH)
logger.info('NSFW checker initialized')
except Exception as e:
logger.warning(f'Could not load NSFW checker: {str(e)}')
else:
logger.info('NSFW checker loading disabled')
self.tried_load = True
@classmethod
def safety_checker_available(self) -> bool:
self._load_safety_checker()
return self.safety_checker is not None
@classmethod
def has_nsfw_concept(self, image: Image) -> bool:
if not self.safety_checker_available():
return False
device = choose_torch_device()
features = self.feature_extractor([image], return_tensors="pt")
features.to(device)
self.safety_checker.to(device)
x_image = np.array(image).astype(np.float32) / 255.0
x_image = x_image[None].transpose(0, 3, 1, 2)
with SilenceWarnings():
checked_image, has_nsfw_concept = self.safety_checker(images=x_image, clip_input=features.pixel_values)
return has_nsfw_concept[0]

View File

@ -0,0 +1,31 @@
"""
Check that the invokeai_root is correctly configured and exit if not.
"""
import sys
from invokeai.app.services.config import (
InvokeAIAppConfig,
)
def check_invokeai_root(config: InvokeAIAppConfig):
try:
assert config.model_conf_path.exists()
assert config.db_path.exists()
assert config.models_path.exists()
for model in [
'CLIP-ViT-bigG-14-laion2B-39B-b160k',
'bert-base-uncased',
'clip-vit-large-patch14',
'sd-vae-ft-mse',
'stable-diffusion-2-clip',
'stable-diffusion-safety-checker']:
assert (config.models_path / f'core/convert/{model}').exists()
except:
print()
print('== STARTUP ABORTED ==')
print('** One or more necessary files is missing from your InvokeAI root directory **')
print('** Please rerun the configuration script to fix this problem. **')
print('** From the launcher, selection option [7]. **')
print('** From the command line, activate the virtual environment and run "invokeai-configure --yes --skip-sd-weights" **')
input('Press any key to continue...')
sys.exit(0)

View File

@ -23,6 +23,7 @@ from urllib import request
import npyscreen
import transformers
import omegaconf
from diffusers import AutoencoderKL
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from huggingface_hub import HfFolder
@ -31,6 +32,7 @@ from omegaconf import OmegaConf
from tqdm import tqdm
from transformers import (
CLIPTextModel,
CLIPTextConfig,
CLIPTokenizer,
AutoFeatureExtractor,
BertTokenizerFast,
@ -44,6 +46,7 @@ from invokeai.backend.util.logging import InvokeAILogger
from invokeai.frontend.install.model_install import addModelsForm, process_and_execute
from invokeai.frontend.install.widgets import (
CenteredButtonPress,
FileBox,
IntTitleSlider,
set_min_terminal_size,
CyclingForm,
@ -53,6 +56,7 @@ from invokeai.frontend.install.widgets import (
from invokeai.backend.install.legacy_arg_parsing import legacy_parser
from invokeai.backend.install.model_install_backend import (
hf_download_from_pretrained,
hf_download_with_resume,
InstallSelections,
ModelInstall,
)
@ -202,6 +206,15 @@ def download_conversion_models():
pipeline = CLIPTextModel.from_pretrained(repo_id, subfolder="text_encoder", **kwargs)
pipeline.save_pretrained(target_dir / 'stable-diffusion-2-clip' / 'text_encoder', safe_serialization=True)
# sd-xl - tokenizer_2
repo_id = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
_, model_name = repo_id.split('/')
pipeline = CLIPTokenizer.from_pretrained(repo_id, **kwargs)
pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
pipeline = CLIPTextConfig.from_pretrained(repo_id, **kwargs)
pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
# VAE
logger.info('Downloading stable diffusion VAE')
vae = AutoencoderKL.from_pretrained('stabilityai/sd-vae-ft-mse', **kwargs)
@ -285,47 +298,6 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
color="CONTROL",
)
self.nextrely += 1
self.add_widget_intelligent(
npyscreen.TitleFixedText,
name="== BASIC OPTIONS ==",
begin_entry_at=0,
editable=False,
color="CONTROL",
scroll_exit=True,
)
self.nextrely -= 1
self.add_widget_intelligent(
npyscreen.FixedText,
value="Select an output directory for images:",
editable=False,
color="CONTROL",
)
self.outdir = self.add_widget_intelligent(
npyscreen.TitleFilename,
name="(<tab> autocompletes, ctrl-N advances):",
value=str(default_output_dir()),
select_dir=True,
must_exist=False,
use_two_lines=False,
labelColor="GOOD",
begin_entry_at=40,
scroll_exit=True,
)
self.nextrely += 1
self.add_widget_intelligent(
npyscreen.FixedText,
value="Activate the NSFW checker to blur images showing potential sexual imagery:",
editable=False,
color="CONTROL",
)
self.nsfw_checker = self.add_widget_intelligent(
npyscreen.Checkbox,
name="NSFW checker",
value=old_opts.nsfw_checker,
relx=5,
scroll_exit=True,
)
self.nextrely += 1
label = """HuggingFace access token (OPTIONAL) for automatic model downloads. See https://huggingface.co/settings/tokens."""
for line in textwrap.wrap(label,width=window_width-6):
@ -345,15 +317,6 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
scroll_exit=True,
)
self.nextrely += 1
self.add_widget_intelligent(
npyscreen.TitleFixedText,
name="== ADVANCED OPTIONS ==",
begin_entry_at=0,
editable=False,
color="CONTROL",
scroll_exit=True,
)
self.nextrely -= 1
self.add_widget_intelligent(
npyscreen.TitleFixedText,
name="GPU Management",
@ -409,21 +372,33 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
self.nextrely += 1
self.add_widget_intelligent(
npyscreen.FixedText,
value="Directories containing textual inversion, controlnet and LoRA models (<tab> autocompletes, ctrl-N advances):",
value="Folder to recursively scan for new checkpoints, ControlNets, LoRAs and TI models (<tab> autocompletes, ctrl-N advances):",
editable=False,
color="CONTROL",
)
self.outdir = self.add_widget_intelligent(
FileBox,
name="Output directory for images (<tab> autocompletes, ctrl-N advances):",
value=str(default_output_dir()),
select_dir=True,
must_exist=False,
use_two_lines=False,
labelColor="GOOD",
begin_entry_at=40,
max_height=3,
scroll_exit=True,
)
self.autoimport_dirs = {}
for description, config_name, path in autoimport_paths(old_opts):
self.autoimport_dirs[config_name] = self.add_widget_intelligent(
npyscreen.TitleFilename,
name=description+':',
value=str(path),
self.autoimport_dirs['autoimport_dir'] = self.add_widget_intelligent(
FileBox,
name=f'Autoimport Folder',
value=str(config.root_path / config.autoimport_dir),
select_dir=True,
must_exist=False,
use_two_lines=False,
labelColor="GOOD",
begin_entry_at=32,
max_height = 3,
scroll_exit=True
)
self.nextrely += 1
@ -504,7 +479,6 @@ https://huggingface.co/spaces/CompVis/stable-diffusion-license
for attr in [
"outdir",
"nsfw_checker",
"free_gpu_mem",
"max_cache_size",
"xformers_enabled",
@ -540,7 +514,7 @@ class EditOptApplication(npyscreen.NPSAppManaged):
"MAIN",
editOptsForm,
name="InvokeAI Startup Options",
cycle_widgets=True,
cycle_widgets=False,
)
if not (self.program_opts.skip_sd_weights or self.program_opts.default_only):
self.model_select = self.addForm(
@ -548,7 +522,7 @@ class EditOptApplication(npyscreen.NPSAppManaged):
addModelsForm,
name="Install Stable Diffusion Models",
multipage=True,
cycle_widgets=True,
cycle_widgets=False,
)
def new_opts(self):
@ -560,15 +534,19 @@ def edit_opts(program_opts: Namespace, invokeai_opts: Namespace) -> argparse.Nam
editApp.run()
return editApp.new_opts()
def default_startup_options(init_file: Path) -> Namespace:
opts = InvokeAIAppConfig.get_config()
if not init_file.exists():
opts.nsfw_checker = True
return opts
def default_user_selections(program_opts: Namespace) -> InstallSelections:
installer = ModelInstall(config)
try:
installer = ModelInstall(config)
except omegaconf.errors.ConfigKeyError:
logger.warning('Your models.yaml file is corrupt or out of date. Reinitializing')
initialize_rootdir(config.root_path, True)
installer = ModelInstall(config)
models = installer.all_models()
return InstallSelections(
install_models=[models[installer.default_model()].path or models[installer.default_model()].repo_id]
@ -576,19 +554,8 @@ def default_user_selections(program_opts: Namespace) -> InstallSelections:
else [models[x].path or models[x].repo_id for x in installer.recommended_models()]
if program_opts.yes_to_all
else list(),
# scan_directory=None,
# autoscan_on_startup=None,
)
# -------------------------------------
def autoimport_paths(config: InvokeAIAppConfig):
return [
('Checkpoints & diffusers models', 'autoimport_dir', config.root_path / config.autoimport_dir),
('LoRA/LyCORIS models', 'lora_dir', config.root_path / config.lora_dir),
('Controlnet models', 'controlnet_dir', config.root_path / config.controlnet_dir),
('Textual Inversion Embeddings', 'embedding_dir', config.root_path / config.embedding_dir),
]
# -------------------------------------
def initialize_rootdir(root: Path, yes_to_all: bool = False):
logger.info("** INITIALIZING INVOKEAI RUNTIME DIRECTORY **")
@ -664,6 +631,9 @@ def write_opts(opts: Namespace, init_file: Path):
with open(init_file,'w', encoding='utf-8') as file:
file.write(new_config.to_yaml())
if hasattr(opts,'hf_token') and opts.hf_token:
HfLogin(opts.hf_token)
# -------------------------------------
def default_output_dir() -> Path:
return config.root_path / "outputs"
@ -689,7 +659,6 @@ def migrate_init_file(legacy_format:Path):
# a few places where the field names have changed and we have to
# manually add in the new names/values
new.nsfw_checker = old.safety_checker
new.xformers_enabled = old.xformers
new.conf_path = old.conf
new.root = legacy_format.parent.resolve()

View File

@ -58,7 +58,15 @@ LEGACY_CONFIGS = {
SchedulerPredictionType.Epsilon: 'v2-inpainting-inference.yaml',
SchedulerPredictionType.VPrediction: 'v2-inpainting-inference-v.yaml',
}
}
},
BaseModelType.StableDiffusionXL: {
ModelVariantType.Normal: 'sd_xl_base.yaml',
},
BaseModelType.StableDiffusionXLRefiner: {
ModelVariantType.Normal: 'sd_xl_refiner.yaml',
},
}
@dataclass
@ -329,6 +337,7 @@ class ModelInstall(object):
description = str(description),
model_format = info.format,
)
legacy_conf = None
if info.model_type == ModelType.Main:
attributes.update(dict(variant = info.variant_type,))
if info.format=="checkpoint":
@ -343,11 +352,17 @@ class ModelInstall(object):
except KeyError:
legacy_conf = Path(self.config.legacy_conf_dir, 'v1-inference.yaml') # best guess
attributes.update(
dict(
config = str(legacy_conf)
)
if info.model_type == ModelType.ControlNet and info.format=="checkpoint":
possible_conf = path.with_suffix('.yaml')
if possible_conf.exists():
legacy_conf = str(self.relative_to_root(possible_conf))
if legacy_conf:
attributes.update(
dict(
config = str(legacy_conf)
)
)
return attributes
def relative_to_root(self, path: Path)->Path:

View File

@ -4,6 +4,6 @@ Initialization file for invokeai.backend.model_management
from .model_manager import ModelManager, ModelInfo, AddModelResult, SchedulerPredictionType
from .model_cache import ModelCache
from .lora import ModelPatcher, ONNXModelPatcher
from .models import BaseModelType, ModelType, SubModelType, ModelVariantType, ModelNotFoundException
from .models import BaseModelType, ModelType, SubModelType, ModelVariantType, ModelNotFoundException, DuplicateModelException
from .model_merge import ModelMerger, MergeInterpolationMethod

File diff suppressed because it is too large Load Diff

View File

@ -485,7 +485,7 @@ class ModelPatcher:
@staticmethod
def _lora_forward_hook(
applied_loras: List[Tuple[LoraModel, float]],
applied_loras: List[Tuple[LoRAModel, float]],
layer_name: str,
):
@ -530,7 +530,7 @@ class ModelPatcher:
def apply_lora(
cls,
model: torch.nn.Module,
loras: List[Tuple[LoraModel, float]],
loras: List[Tuple[LoRAModel, float]],
prefix: str,
):
original_weights = dict()

View File

@ -251,7 +251,9 @@ from .model_search import ModelSearch
from .models import (
BaseModelType, ModelType, SubModelType,
ModelError, SchedulerPredictionType, MODEL_CLASSES,
ModelConfigBase, ModelNotFoundException, InvalidModelException,
ModelConfigBase,
ModelNotFoundException, InvalidModelException,
DuplicateModelException,
)
# We are only starting to number the config file with release 3.
@ -671,6 +673,7 @@ class ModelManager(object):
self.models[model_key] = model_config
self.commit()
return AddModelResult(
name = model_name,
model_type = model_type,
@ -838,7 +841,7 @@ class ModelManager(object):
Returns the preamble for the config file.
"""
return textwrap.dedent(
"""\
"""
# This file describes the alternative machine learning models
# available to InvokeAI script.
#
@ -858,7 +861,7 @@ class ModelManager(object):
loaded_files = set()
new_models_found = False
self.logger.info(f'scanning {self.app_config.models_path} for new models')
self.logger.info(f'Scanning {self.app_config.models_path} for new models')
with Chdir(self.app_config.root_path):
for model_key, model_config in list(self.models.items()):
model_name, cur_base_model, cur_model_type = self.parse_key(model_key)
@ -891,15 +894,18 @@ class ModelManager(object):
model_name = model_path.name if model_path.is_dir() else model_path.stem
model_key = self.create_key(model_name, cur_base_model, cur_model_type)
if model_key in self.models:
raise Exception(f"Model with key {model_key} added twice")
if model_path.is_relative_to(self.app_config.root_path):
model_path = model_path.relative_to(self.app_config.root_path)
try:
if model_key in self.models:
raise DuplicateModelException(f"Model with key {model_key} added twice")
if model_path.is_relative_to(self.app_config.root_path):
model_path = model_path.relative_to(self.app_config.root_path)
model_config: ModelConfigBase = model_class.probe_config(str(model_path))
self.models[model_key] = model_config
new_models_found = True
except DuplicateModelException as e:
self.logger.warning(e)
except InvalidModelException:
self.logger.warning(f"Not a valid model: {model_path}")
except NotImplementedError as e:
@ -938,20 +944,29 @@ class ModelManager(object):
def models_found(self):
return self.new_models_found
config = self.app_config
# LS: hacky
# Patch in the SD VAE from core so that it is available for use by the UI
try:
self.heuristic_import({config.root_path / 'models/core/convert/sd-vae-ft-mse'})
except:
pass
installer = ModelInstall(config = self.app_config,
model_manager = self,
prediction_type_helper = ask_user_for_prediction_type,
)
config = self.app_config
known_paths = {config.root_path / x['path'] for x in self.list_models()}
directories = {config.root_path / x for x in [config.autoimport_dir,
config.lora_dir,
config.embedding_dir,
config.controlnet_dir]
config.controlnet_dir,
] if x
}
scanner = ScanAndImport(directories, self.logger, ignore=known_paths, installer=installer)
scanner.search()
return scanner.models_found()
def heuristic_import(self,

View File

@ -39,6 +39,7 @@ class ModelProbe(object):
CLASS2TYPE = {
'StableDiffusionPipeline' : ModelType.Main,
'StableDiffusionInpaintPipeline' : ModelType.Main,
'StableDiffusionXLPipeline' : ModelType.Main,
'StableDiffusionXLImg2ImgPipeline' : ModelType.Main,
'AutoencoderKL' : ModelType.Vae,
@ -252,10 +253,13 @@ class PipelineCheckpointProbe(CheckpointProbeBase):
return BaseModelType.StableDiffusion1
if key_name in state_dict and state_dict[key_name].shape[-1] == 1024:
return BaseModelType.StableDiffusion2
# TODO: Verify that this is correct! Need an XL checkpoint file for this.
key_name = 'model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight'
if key_name in state_dict and state_dict[key_name].shape[-1] == 2048:
return BaseModelType.StableDiffusionXL
raise InvalidModelException("Cannot determine base type")
elif key_name in state_dict and state_dict[key_name].shape[-1] == 1280:
return BaseModelType.StableDiffusionXLRefiner
else:
raise InvalidModelException("Cannot determine base type")
def get_scheduler_prediction_type(self)->SchedulerPredictionType:
type = self.get_base_type()
@ -401,7 +405,7 @@ class PipelineFolderProbe(FolderProbeBase):
in_channels = conf['in_channels']
if in_channels == 9:
return ModelVariantType.Inpainting
return ModelVariantType.Inpaint
elif in_channels == 5:
return ModelVariantType.Depth
elif in_channels == 4:

View File

@ -98,6 +98,6 @@ class FindModels(ModelSearch):
def list_models(self) -> List[Path]:
self.search()
return self.models_found
return list(self.models_found)

View File

@ -2,7 +2,11 @@ import inspect
from enum import Enum
from pydantic import BaseModel
from typing import Literal, get_origin
from .base import BaseModelType, ModelType, SubModelType, ModelBase, ModelConfigBase, ModelVariantType, SchedulerPredictionType, ModelError, SilenceWarnings, ModelNotFoundException, InvalidModelException
from .base import (
BaseModelType, ModelType, SubModelType, ModelBase, ModelConfigBase,
ModelVariantType, SchedulerPredictionType, ModelError, SilenceWarnings,
ModelNotFoundException, InvalidModelException, DuplicateModelException
)
from .stable_diffusion import StableDiffusion1Model, StableDiffusion2Model
from .sdxl import StableDiffusionXLModel
from .vae import VaeModel

View File

@ -21,6 +21,10 @@ import onnx
from onnx import numpy_helper
from onnx.external_data_helper import set_external_data
from onnxruntime import InferenceSession, OrtValue, SessionOptions, ExecutionMode, GraphOptimizationLevel, get_available_providers
class DuplicateModelException(Exception):
pass
class InvalidModelException(Exception):
pass

View File

@ -1,7 +1,8 @@
import os
import torch
from enum import Enum
from typing import Optional
from pathlib import Path
from typing import Optional, Literal
from .base import (
ModelBase,
ModelConfigBase,
@ -15,6 +16,7 @@ from .base import (
InvalidModelException,
ModelNotFoundException,
)
from invokeai.app.services.config import InvokeAIAppConfig
class ControlNetModelFormat(str, Enum):
Checkpoint = "checkpoint"
@ -24,8 +26,12 @@ class ControlNetModel(ModelBase):
#model_class: Type
#model_size: int
class Config(ModelConfigBase):
model_format: ControlNetModelFormat
class DiffusersConfig(ModelConfigBase):
model_format: Literal[ControlNetModelFormat.Diffusers]
class CheckpointConfig(ModelConfigBase):
model_format: Literal[ControlNetModelFormat.Checkpoint]
config: str
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == ModelType.ControlNet
@ -99,13 +105,51 @@ class ControlNetModel(ModelBase):
@classmethod
def convert_if_required(
cls,
model_path: str,
output_path: str,
config: ModelConfigBase,
base_model: BaseModelType,
) -> str:
if cls.detect_format(model_path) == ControlNetModelFormat.Checkpoint:
return _convert_controlnet_ckpt_and_cache(
model_path = model_path,
model_config = config.config,
output_path = output_path,
base_model = base_model,
)
else:
return model_path
@classmethod
def _convert_controlnet_ckpt_and_cache(
cls,
model_path: str,
output_path: str,
config: ModelConfigBase, # empty config or config of parent model
base_model: BaseModelType,
) -> str:
if cls.detect_format(model_path) != ControlNetModelFormat.Diffusers:
raise NotImplementedError("Checkpoint controlnet models currently unsupported")
else:
return model_path
model_config: ControlNetModel.CheckpointConfig,
) -> str:
"""
Convert the controlnet from checkpoint format to diffusers format,
cache it to disk, and return Path to converted
file. If already on disk then just returns Path.
"""
app_config = InvokeAIAppConfig.get_config()
weights = app_config.root_path / model_path
output_path = Path(output_path)
# return cached version if it exists
if output_path.exists():
return output_path
# to avoid circular import errors
from ..convert_ckpt_to_diffusers import convert_controlnet_to_diffusers
convert_controlnet_to_diffusers(
weights,
output_path,
original_config_file = app_config.root_path / model_config,
image_size = 512,
scan_needed = True,
from_safetensors = weights.suffix == ".safetensors"
)
return output_path

View File

@ -10,6 +10,7 @@ from .base import (
SubModelType,
classproperty,
InvalidModelException,
ModelNotFoundException,
)
# TODO: naming
from ..lora import LoRAModel as LoRAModelRaw

View File

@ -1,5 +1,6 @@
import os
import json
import invokeai.backend.util.logging as logger
from enum import Enum
from pydantic import Field
from typing import Literal, Optional
@ -48,7 +49,7 @@ class StableDiffusionXLModel(DiffusersModel):
if model_format == StableDiffusionXLModelFormat.Checkpoint:
if ckpt_config_path:
ckpt_config = OmegaConf.load(ckpt_config_path)
ckpt_config["model"]["params"]["unet_config"]["params"]["in_channels"]
in_channels = ckpt_config["model"]["params"]["unet_config"]["params"]["in_channels"]
else:
checkpoint = read_checkpoint_meta(path)
@ -108,7 +109,20 @@ class StableDiffusionXLModel(DiffusersModel):
config: ModelConfigBase,
base_model: BaseModelType,
) -> str:
# The convert script adapted from the diffusers package uses
# strings for the base model type. To avoid making too many
# source code changes, we simply translate here
model_base_to_model_type = {BaseModelType.StableDiffusionXL: 'SDXL',
BaseModelType.StableDiffusionXLRefiner: 'SDXL-Refiner',
}
if isinstance(config, cls.CheckpointConfig):
raise NotImplementedError('conversion of SDXL checkpoint models to diffusers format is not yet supported')
from invokeai.backend.model_management.models.stable_diffusion import _convert_ckpt_and_cache
return _convert_ckpt_and_cache(
version=base_model,
model_config=config,
output_path=output_path,
model_type=model_base_to_model_type[base_model],
use_safetensors=False, # corrupts sdxl models for some reason
)
else:
return model_path

View File

@ -15,9 +15,12 @@ from .base import (
classproperty,
InvalidModelException,
)
from .sdxl import StableDiffusionXLModel
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
from omegaconf import OmegaConf
class StableDiffusion1ModelFormat(str, Enum):
Checkpoint = "checkpoint"
Diffusers = "diffusers"
@ -235,42 +238,17 @@ class StableDiffusion2Model(DiffusersModel):
else:
return model_path
def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
ckpt_configs = {
BaseModelType.StableDiffusion1: {
ModelVariantType.Normal: "v1-inference.yaml",
ModelVariantType.Inpaint: "v1-inpainting-inference.yaml",
},
BaseModelType.StableDiffusion2: {
ModelVariantType.Normal: "v2-inference-v.yaml", # best guess, as we can't differentiate with base(512)
ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
ModelVariantType.Depth: "v2-midas-inference.yaml",
},
# note that these .yaml files don't yet exist!
BaseModelType.StableDiffusionXL: {
ModelVariantType.Normal: "xl-inference-v.yaml",
ModelVariantType.Inpaint: "xl-inpainting-inference.yaml",
ModelVariantType.Depth: "xl-midas-inference.yaml",
}
}
app_config = InvokeAIAppConfig.get_config()
try:
config_path = app_config.legacy_conf_path / ckpt_configs[version][variant]
if config_path.is_relative_to(app_config.root_path):
config_path = config_path.relative_to(app_config.root_path)
return str(config_path)
except:
return None
# TODO: rework
# Note that convert_ckpt_to_diffuses does not currently support conversion of SDXL models
# pass precision - currently defaulting to fp16
def _convert_ckpt_and_cache(
version: BaseModelType,
model_config: Union[StableDiffusion1Model.CheckpointConfig, StableDiffusion2Model.CheckpointConfig],
output_path: str,
version: BaseModelType,
model_config: Union[StableDiffusion1Model.CheckpointConfig,
StableDiffusion2Model.CheckpointConfig,
StableDiffusionXLModel.CheckpointConfig,
],
output_path: str,
use_save_model: bool=False,
**kwargs,
) -> str:
"""
Convert the checkpoint model indicated in mconfig into a
@ -289,6 +267,9 @@ def _convert_ckpt_and_cache(
# to avoid circular import errors
from ..convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
from ...util.devices import choose_torch_device, torch_dtype
logger.info(f'Converting {weights} to diffusers format')
with SilenceWarnings():
convert_ckpt_to_diffusers(
weights,
@ -298,5 +279,43 @@ def _convert_ckpt_and_cache(
original_config_file=config_file,
extract_ema=True,
scan_needed=True,
from_safetensors = weights.suffix == ".safetensors",
precision = torch_dtype(choose_torch_device()),
**kwargs,
)
return output_path
def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
ckpt_configs = {
BaseModelType.StableDiffusion1: {
ModelVariantType.Normal: "v1-inference.yaml",
ModelVariantType.Inpaint: "v1-inpainting-inference.yaml",
},
BaseModelType.StableDiffusion2: {
ModelVariantType.Normal: "v2-inference-v.yaml", # best guess, as we can't differentiate with base(512)
ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
ModelVariantType.Depth: "v2-midas-inference.yaml",
},
BaseModelType.StableDiffusionXL: {
ModelVariantType.Normal: "sd_xl_base.yaml",
ModelVariantType.Inpaint: None,
ModelVariantType.Depth: None,
},
BaseModelType.StableDiffusionXLRefiner: {
ModelVariantType.Normal: "sd_xl_refiner.yaml",
ModelVariantType.Inpaint: None,
ModelVariantType.Depth: None,
},
}
app_config = InvokeAIAppConfig.get_config()
try:
config_path = app_config.legacy_conf_path / ckpt_configs[version][variant]
if config_path.is_relative_to(app_config.root_path):
config_path = config_path.relative_to(app_config.root_path)
return str(config_path)
except:
return None

View File

@ -1,77 +0,0 @@
'''
SafetyChecker class - checks images against the StabilityAI NSFW filter
and blurs images that contain potential NSFW content.
'''
import diffusers
import numpy as np
import torch
import traceback
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
)
from pathlib import Path
from PIL import Image, ImageFilter
from transformers import AutoFeatureExtractor
import invokeai.assets.web as web_assets
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import InvokeAIAppConfig
from .util import CPU_DEVICE
config = InvokeAIAppConfig.get_config()
class SafetyChecker(object):
CAUTION_IMG = "caution.png"
def __init__(self, device: torch.device):
path = Path(web_assets.__path__[0]) / self.CAUTION_IMG
caution = Image.open(path)
self.caution_img = caution.resize((caution.width // 2, caution.height // 2))
self.device = device
try:
safety_model_id = config.models_path / 'core/convert/stable-diffusion-safety-checker'
feature_extractor_id = config.models_path / 'core/convert/stable-diffusion-safety-checker-extractor'
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
self.safety_feature_extractor = AutoFeatureExtractor.from_pretrained(feature_extractor_id)
except Exception:
logger.error(
"An error was encountered while installing the safety checker:"
)
print(traceback.format_exc())
def check(self, image: Image.Image):
"""
Check provided image against the StabilityAI safety checker and return
"""
self.safety_checker.to(self.device)
features = self.safety_feature_extractor([image], return_tensors="pt")
features.to(self.device)
# unfortunately checker requires the numpy version, so we have to convert back
x_image = np.array(image).astype(np.float32) / 255.0
x_image = x_image[None].transpose(0, 3, 1, 2)
diffusers.logging.set_verbosity_error()
checked_image, has_nsfw_concept = self.safety_checker(
images=x_image, clip_input=features.pixel_values
)
self.safety_checker.to(CPU_DEVICE) # offload
if has_nsfw_concept[0]:
logger.warning(
"An image with potential non-safe content has been detected. A blurred image will be returned."
)
return self.blur(image)
else:
return image
def blur(self, input):
blurry = input.filter(filter=ImageFilter.GaussianBlur(radius=32))
try:
if caution := self.caution_img:
blurry.paste(caution, (0, 0), caution)
except FileNotFoundError:
pass
return blurry

View File

@ -219,6 +219,7 @@ class ControlNetData:
begin_step_percent: float = Field(default=0.0)
end_step_percent: float = Field(default=1.0)
control_mode: str = Field(default="balanced")
resize_mode: str = Field(default="just_resize")
@dataclass
@ -653,7 +654,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
if cfg_injection:
# Inferred ControlNet only for the conditional batch.
# To apply the output of ControlNet to both the unconditional and conditional batches,
# add 0 to the unconditional batch to keep it unchanged.
# prepend zeros for unconditional batch
down_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_samples]
mid_sample = torch.cat([torch.zeros_like(mid_sample), mid_sample])
@ -954,53 +955,3 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
debug_image(
img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True
)
# Copied from diffusers pipeline_stable_diffusion_controlnet.py
# Returns torch.Tensor of shape (batch_size, 3, height, width)
@staticmethod
def prepare_control_image(
image,
# FIXME: need to fix hardwiring of width and height, change to basing on latents dimensions?
# latents,
width=512, # should be 8 * latent.shape[3]
height=512, # should be 8 * latent height[2]
batch_size=1,
num_images_per_prompt=1,
device="cuda",
dtype=torch.float16,
do_classifier_free_guidance=True,
control_mode="balanced"
):
if not isinstance(image, torch.Tensor):
if isinstance(image, PIL.Image.Image):
image = [image]
if isinstance(image[0], PIL.Image.Image):
images = []
for image_ in image:
image_ = image_.convert("RGB")
image_ = image_.resize((width, height), resample=PIL_INTERPOLATION["lanczos"])
image_ = np.array(image_)
image_ = image_[None, :]
images.append(image_)
image = images
image = np.concatenate(image, axis=0)
image = np.array(image).astype(np.float32) / 255.0
image = image.transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
elif isinstance(image[0], torch.Tensor):
image = torch.cat(image, dim=0)
image_batch_size = image.shape[0]
if image_batch_size == 1:
repeat_by = batch_size
else:
# image batch size is the same as prompt batch size
repeat_by = num_images_per_prompt
image = image.repeat_interleave(repeat_by, dim=0)
image = image.to(device=device, dtype=dtype)
cfg_injection = (control_mode == "more_control" or control_mode == "unbalanced")
if do_classifier_free_guidance and not cfg_injection:
image = torch.cat([image] * 2)
return image

View File

@ -1,7 +1,7 @@
# Copyright (c) 2023 Lincoln D. Stein and The InvokeAI Development Team
"""
invokeai.util.logging
invokeai.backend.util.logging
Logging class for InvokeAI that produces console messages

View File

@ -1,4 +1,6 @@
import math
import torch
import diffusers
if torch.backends.mps.is_available():
@ -61,3 +63,150 @@ def new_torch_interpolate(input, size=None, scale_factor=None, mode='nearest', a
return _torch_interpolate(input, size, scale_factor, mode, align_corners, recompute_scale_factor, antialias)
torch.nn.functional.interpolate = new_torch_interpolate
# TODO: refactor it
_SlicedAttnProcessor = diffusers.models.attention_processor.SlicedAttnProcessor
class ChunkedSlicedAttnProcessor:
r"""
Processor for implementing sliced attention.
Args:
slice_size (`int`, *optional*):
The number of steps to compute attention. Uses as many slices as `attention_head_dim // slice_size`, and
`attention_head_dim` must be a multiple of the `slice_size`.
"""
def __init__(self, slice_size):
assert isinstance(slice_size, int)
slice_size = 1 # TODO: maybe implement chunking in batches too when enough memory
self.slice_size = slice_size
self._sliced_attn_processor = _SlicedAttnProcessor(slice_size)
def __call__(self, attn, hidden_states, encoder_hidden_states=None, attention_mask=None):
if self.slice_size != 1 or attn.upcast_attention:
return self._sliced_attn_processor(attn, hidden_states, encoder_hidden_states, attention_mask)
residual = hidden_states
input_ndim = hidden_states.ndim
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
batch_size, sequence_length, _ = (
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
)
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
query = attn.to_q(hidden_states)
dim = query.shape[-1]
query = attn.head_to_batch_dim(query)
if encoder_hidden_states is None:
encoder_hidden_states = hidden_states
elif attn.norm_cross:
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
key = attn.to_k(encoder_hidden_states)
value = attn.to_v(encoder_hidden_states)
key = attn.head_to_batch_dim(key)
value = attn.head_to_batch_dim(value)
batch_size_attention, query_tokens, _ = query.shape
hidden_states = torch.zeros(
(batch_size_attention, query_tokens, dim // attn.heads), device=query.device, dtype=query.dtype
)
chunk_tmp_tensor = torch.empty(self.slice_size, query.shape[1], key.shape[1], dtype=query.dtype, device=query.device)
for i in range(batch_size_attention // self.slice_size):
start_idx = i * self.slice_size
end_idx = (i + 1) * self.slice_size
query_slice = query[start_idx:end_idx]
key_slice = key[start_idx:end_idx]
attn_mask_slice = attention_mask[start_idx:end_idx] if attention_mask is not None else None
self.get_attention_scores_chunked(attn, query_slice, key_slice, attn_mask_slice, hidden_states[start_idx:end_idx], value[start_idx:end_idx], chunk_tmp_tensor)
hidden_states = attn.batch_to_head_dim(hidden_states)
# linear proj
hidden_states = attn.to_out[0](hidden_states)
# dropout
hidden_states = attn.to_out[1](hidden_states)
if input_ndim == 4:
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
if attn.residual_connection:
hidden_states = hidden_states + residual
hidden_states = hidden_states / attn.rescale_output_factor
return hidden_states
def get_attention_scores_chunked(self, attn, query, key, attention_mask, hidden_states, value, chunk):
# batch size = 1
assert query.shape[0] == 1
assert key.shape[0] == 1
assert value.shape[0] == 1
assert hidden_states.shape[0] == 1
dtype = query.dtype
if attn.upcast_attention:
query = query.float()
key = key.float()
#out_item_size = query.dtype.itemsize
#if attn.upcast_attention:
# out_item_size = torch.float32.itemsize
out_item_size = query.element_size()
if attn.upcast_attention:
out_item_size = 4
chunk_size = 2 ** 29
out_size = query.shape[1] * key.shape[1] * out_item_size
chunks_count = min(query.shape[1], math.ceil((out_size - 1) / chunk_size))
chunk_step = max(1, int(query.shape[1] / chunks_count))
key = key.transpose(-1, -2)
def _get_chunk_view(tensor, start, length):
if start + length > tensor.shape[1]:
length = tensor.shape[1] - start
#print(f"view: [{tensor.shape[0]},{tensor.shape[1]},{tensor.shape[2]}] - start: {start}, length: {length}")
return tensor[:,start:start+length]
for chunk_pos in range(0, query.shape[1], chunk_step):
if attention_mask is not None:
torch.baddbmm(
_get_chunk_view(attention_mask, chunk_pos, chunk_step),
_get_chunk_view(query, chunk_pos, chunk_step),
key,
beta=1,
alpha=attn.scale,
out=chunk,
)
else:
torch.baddbmm(
torch.zeros((1,1,1), device=query.device, dtype=query.dtype),
_get_chunk_view(query, chunk_pos, chunk_step),
key,
beta=0,
alpha=attn.scale,
out=chunk,
)
chunk = chunk.softmax(dim=-1)
torch.bmm(chunk, value, out=_get_chunk_view(hidden_states, chunk_pos, chunk_step))
#del chunk
diffusers.models.attention_processor.SlicedAttnProcessor = ChunkedSlicedAttnProcessor