mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Compare commits
68 Commits
invokeai-b
...
v2.3.5-rc2
Author | SHA1 | Date | |
---|---|---|---|
73be58a0b5 | |||
5a7d11bca8 | |||
5bbf7fe34a | |||
bfb968bbe8 | |||
6db72f83a2 | |||
432e526999 | |||
830740b93b | |||
ff3f289342 | |||
34abbb3589 | |||
c0eb1a9921 | |||
2ddd0301f4 | |||
ce6629b6f5 | |||
994a76aeaa | |||
144dfe4a5b | |||
5dbc63e2ae | |||
c6ae1edc82 | |||
0f3c456d59 | |||
2cd0e036ac | |||
a45b3387c0 | |||
c088cf0344 | |||
264af3c054 | |||
b332432a88 | |||
7f7d5894fa | |||
96c39b61cf | |||
40744ed996 | |||
2a2c86896a | |||
f36452d650 | |||
e5188309ec | |||
aabe79686e | |||
a9e8005a92 | |||
c2e6d98e66 | |||
40d9b5dc27 | |||
216b1c3a4a | |||
1a704efff1 | |||
f49d2619be | |||
da96ec9dd5 | |||
298ccda365 | |||
967d853020 | |||
e91117bc74 | |||
4d58444153 | |||
3667eb4d0d | |||
203a7157e1 | |||
47883860a6 | |||
6365a7c790 | |||
5fcb3d90e4 | |||
8f17d17208 | |||
c6ecf3afc5 | |||
2c449bfb34 | |||
8fb4b05556 | |||
4d7289b20f | |||
d81584c8fd | |||
0bc5dcc663 | |||
1183bf96ed | |||
d81394cda8 | |||
0eda1a03e1 | |||
be7e067c95 | |||
afa3cdce27 | |||
6dfbd1c677 | |||
a775c7730e | |||
16c97ca0cb | |||
e24dd97b80 | |||
5a54039dd7 | |||
9385edb453 | |||
2251d3abfe | |||
0b22a3f34d | |||
2528e14fe9 | |||
16ccc807cc | |||
66364501d5 |
34
.github/CODEOWNERS
vendored
34
.github/CODEOWNERS
vendored
@ -1,13 +1,13 @@
|
||||
# continuous integration
|
||||
/.github/workflows/ @mauwii @lstein @blessedcoolant
|
||||
/.github/workflows/ @lstein @blessedcoolant
|
||||
|
||||
# documentation
|
||||
/docs/ @lstein @mauwii @blessedcoolant
|
||||
mkdocs.yml @mauwii @lstein
|
||||
/docs/ @lstein @blessedcoolant
|
||||
mkdocs.yml @lstein @ebr
|
||||
|
||||
# installation and configuration
|
||||
/pyproject.toml @mauwii @lstein @ebr
|
||||
/docker/ @mauwii
|
||||
/pyproject.toml @lstein @ebr
|
||||
/docker/ @lstein
|
||||
/scripts/ @ebr @lstein @blessedcoolant
|
||||
/installer/ @ebr @lstein
|
||||
ldm/invoke/config @lstein @ebr
|
||||
@ -21,13 +21,13 @@ invokeai/configs @lstein @ebr @blessedcoolant
|
||||
|
||||
# generation and model management
|
||||
/ldm/*.py @lstein @blessedcoolant
|
||||
/ldm/generate.py @lstein @keturn
|
||||
/ldm/generate.py @lstein @gregghelt2
|
||||
/ldm/invoke/args.py @lstein @blessedcoolant
|
||||
/ldm/invoke/ckpt* @lstein @blessedcoolant
|
||||
/ldm/invoke/ckpt_generator @lstein @blessedcoolant
|
||||
/ldm/invoke/CLI.py @lstein @blessedcoolant
|
||||
/ldm/invoke/config @lstein @ebr @mauwii @blessedcoolant
|
||||
/ldm/invoke/generator @keturn @damian0815
|
||||
/ldm/invoke/config @lstein @ebr @blessedcoolant
|
||||
/ldm/invoke/generator @gregghelt2 @damian0815
|
||||
/ldm/invoke/globals.py @lstein @blessedcoolant
|
||||
/ldm/invoke/merge_diffusers.py @lstein @blessedcoolant
|
||||
/ldm/invoke/model_manager.py @lstein @blessedcoolant
|
||||
@ -36,17 +36,17 @@ invokeai/configs @lstein @ebr @blessedcoolant
|
||||
/ldm/invoke/restoration @lstein @blessedcoolant
|
||||
|
||||
# attention, textual inversion, model configuration
|
||||
/ldm/models @damian0815 @keturn @blessedcoolant
|
||||
/ldm/models @damian0815 @gregghelt2 @blessedcoolant
|
||||
/ldm/modules/textual_inversion_manager.py @lstein @blessedcoolant
|
||||
/ldm/modules/attention.py @damian0815 @keturn
|
||||
/ldm/modules/diffusionmodules @damian0815 @keturn
|
||||
/ldm/modules/distributions @damian0815 @keturn
|
||||
/ldm/modules/ema.py @damian0815 @keturn
|
||||
/ldm/modules/attention.py @damian0815 @gregghelt2
|
||||
/ldm/modules/diffusionmodules @damian0815 @gregghelt2
|
||||
/ldm/modules/distributions @damian0815 @gregghelt2
|
||||
/ldm/modules/ema.py @damian0815 @gregghelt2
|
||||
/ldm/modules/embedding_manager.py @lstein
|
||||
/ldm/modules/encoders @damian0815 @keturn
|
||||
/ldm/modules/image_degradation @damian0815 @keturn
|
||||
/ldm/modules/losses @damian0815 @keturn
|
||||
/ldm/modules/x_transformer.py @damian0815 @keturn
|
||||
/ldm/modules/encoders @damian0815 @gregghelt2
|
||||
/ldm/modules/image_degradation @damian0815 @gregghelt2
|
||||
/ldm/modules/losses @damian0815 @gregghelt2
|
||||
/ldm/modules/x_transformer.py @damian0815 @gregghelt2
|
||||
|
||||
# Nodes
|
||||
apps/ @Kyle0654 @jpphoto
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -233,5 +233,3 @@ installer/install.sh
|
||||
installer/update.bat
|
||||
installer/update.sh
|
||||
|
||||
# no longer stored in source directory
|
||||
models
|
||||
|
@ -30,7 +30,6 @@ from ldm.invoke.conditioning import (
|
||||
get_tokens_for_prompt_object,
|
||||
get_prompt_structure,
|
||||
split_weighted_subprompts,
|
||||
get_tokenizer,
|
||||
)
|
||||
from ldm.invoke.generator.diffusers_pipeline import PipelineIntermediateState
|
||||
from ldm.invoke.generator.inpaint import infill_methods
|
||||
@ -38,11 +37,11 @@ from ldm.invoke.globals import (
|
||||
Globals,
|
||||
global_converted_ckpts_dir,
|
||||
global_models_dir,
|
||||
global_lora_models_dir,
|
||||
)
|
||||
from ldm.invoke.pngwriter import PngWriter, retrieve_metadata
|
||||
from compel.prompt_parser import Blend
|
||||
from ldm.invoke.merge_diffusers import merge_diffusion_models
|
||||
from ldm.modules.lora_manager import LoraManager
|
||||
|
||||
# Loading Arguments
|
||||
opt = Args()
|
||||
@ -524,20 +523,12 @@ class InvokeAIWebServer:
|
||||
@socketio.on("getLoraModels")
|
||||
def get_lora_models():
|
||||
try:
|
||||
lora_path = global_lora_models_dir()
|
||||
loras = []
|
||||
for root, _, files in os.walk(lora_path):
|
||||
models = [
|
||||
Path(root, x)
|
||||
for x in files
|
||||
if Path(x).suffix in [".ckpt", ".pt", ".safetensors"]
|
||||
]
|
||||
loras = loras + models
|
||||
|
||||
model = self.generate.model
|
||||
lora_mgr = LoraManager(model)
|
||||
loras = lora_mgr.list_compatible_loras()
|
||||
found_loras = []
|
||||
for lora in sorted(loras, key=lambda s: s.stem.lower()):
|
||||
location = str(lora.resolve()).replace("\\", "/")
|
||||
found_loras.append({"name": lora.stem, "location": location})
|
||||
for lora in sorted(loras, key=str.casefold):
|
||||
found_loras.append({"name":lora,"location":str(loras[lora])})
|
||||
socketio.emit("foundLoras", found_loras)
|
||||
except Exception as e:
|
||||
self.handle_exceptions(e)
|
||||
@ -1314,7 +1305,7 @@ class InvokeAIWebServer:
|
||||
None
|
||||
if type(parsed_prompt) is Blend
|
||||
else get_tokens_for_prompt_object(
|
||||
get_tokenizer(self.generate.model), parsed_prompt
|
||||
self.generate.model.tokenizer, parsed_prompt
|
||||
)
|
||||
)
|
||||
attention_maps_image_base64_url = (
|
||||
|
@ -80,7 +80,8 @@ trinart-2.0:
|
||||
repo_id: stabilityai/sd-vae-ft-mse
|
||||
recommended: False
|
||||
waifu-diffusion-1.4:
|
||||
description: An SD-1.5 model trained on 680k anime/manga-style images (2.13 GB)
|
||||
description: An SD-2.1 model trained on 5.4M anime/manga-style images (4.27 GB)
|
||||
revision: main
|
||||
repo_id: hakurei/waifu-diffusion
|
||||
format: diffusers
|
||||
vae:
|
||||
|
File diff suppressed because one or more lines are too long
2
invokeai/frontend/dist/index.html
vendored
2
invokeai/frontend/dist/index.html
vendored
@ -5,7 +5,7 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>InvokeAI - A Stable Diffusion Toolkit</title>
|
||||
<link rel="shortcut icon" type="icon" href="./assets/favicon-0d253ced.ico" />
|
||||
<script type="module" crossorigin src="./assets/index-f56b39bc.js"></script>
|
||||
<script type="module" crossorigin src="./assets/index-b12e648e.js"></script>
|
||||
<link rel="stylesheet" href="./assets/index-2ab0eb58.css">
|
||||
</head>
|
||||
|
||||
|
@ -33,6 +33,10 @@ import {
|
||||
setIntermediateImage,
|
||||
} from 'features/gallery/store/gallerySlice';
|
||||
|
||||
import {
|
||||
getLoraModels,
|
||||
getTextualInversionTriggers,
|
||||
} from 'app/socketio/actions';
|
||||
import type { RootState } from 'app/store';
|
||||
import { addImageToStagingArea } from 'features/canvas/store/canvasSlice';
|
||||
import {
|
||||
@ -463,6 +467,8 @@ const makeSocketIOListeners = (
|
||||
const { model_name, model_list } = data;
|
||||
dispatch(setModelList(model_list));
|
||||
dispatch(setCurrentStatus(i18n.t('common.statusModelChanged')));
|
||||
dispatch(getLoraModels());
|
||||
dispatch(getTextualInversionTriggers());
|
||||
dispatch(setIsProcessing(false));
|
||||
dispatch(setIsCancelable(true));
|
||||
dispatch(
|
||||
|
File diff suppressed because one or more lines are too long
@ -633,9 +633,8 @@ class Generate:
|
||||
except RuntimeError:
|
||||
# Clear the CUDA cache on an exception
|
||||
self.clear_cuda_cache()
|
||||
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
print(">> Could not generate image.")
|
||||
print("** Could not generate image.")
|
||||
raise
|
||||
|
||||
toc = time.time()
|
||||
print("\n>> Usage stats:")
|
||||
|
@ -1 +1 @@
|
||||
__version__='2.3.4'
|
||||
__version__='2.3.5-rc2'
|
||||
|
@ -15,19 +15,10 @@ from compel import Compel
|
||||
from compel.prompt_parser import FlattenedPrompt, Blend, Fragment, CrossAttentionControlSubstitute, PromptParser, \
|
||||
Conjunction
|
||||
from .devices import torch_dtype
|
||||
from .generator.diffusers_pipeline import StableDiffusionGeneratorPipeline
|
||||
from ..models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent
|
||||
from ldm.invoke.globals import Globals
|
||||
|
||||
def get_tokenizer(model) -> CLIPTokenizer:
|
||||
# TODO remove legacy ckpt fallback handling
|
||||
return (getattr(model, 'tokenizer', None) # diffusers
|
||||
or model.cond_stage_model.tokenizer) # ldm
|
||||
|
||||
def get_text_encoder(model) -> Any:
|
||||
# TODO remove legacy ckpt fallback handling
|
||||
return (getattr(model, 'text_encoder', None) # diffusers
|
||||
or UnsqueezingLDMTransformer(model.cond_stage_model.transformer)) # ldm
|
||||
|
||||
class UnsqueezingLDMTransformer:
|
||||
def __init__(self, ldm_transformer):
|
||||
self.ldm_transformer = ldm_transformer
|
||||
@ -41,15 +32,15 @@ class UnsqueezingLDMTransformer:
|
||||
return insufficiently_unsqueezed_tensor.unsqueeze(0)
|
||||
|
||||
|
||||
def get_uc_and_c_and_ec(prompt_string, model, log_tokens=False, skip_normalize_legacy_blend=False):
|
||||
def get_uc_and_c_and_ec(prompt_string,
|
||||
model: StableDiffusionGeneratorPipeline,
|
||||
log_tokens=False, skip_normalize_legacy_blend=False):
|
||||
# lazy-load any deferred textual inversions.
|
||||
# this might take a couple of seconds the first time a textual inversion is used.
|
||||
model.textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms(prompt_string)
|
||||
|
||||
tokenizer = get_tokenizer(model)
|
||||
text_encoder = get_text_encoder(model)
|
||||
compel = Compel(tokenizer=tokenizer,
|
||||
text_encoder=text_encoder,
|
||||
compel = Compel(tokenizer=model.tokenizer,
|
||||
text_encoder=model.text_encoder,
|
||||
textual_inversion_manager=model.textual_inversion_manager,
|
||||
dtype_for_device_getter=torch_dtype)
|
||||
|
||||
@ -78,14 +69,20 @@ def get_uc_and_c_and_ec(prompt_string, model, log_tokens=False, skip_normalize_l
|
||||
negative_conjunction = Compel.parse_prompt_string(negative_prompt_string)
|
||||
negative_prompt: FlattenedPrompt | Blend = negative_conjunction.prompts[0]
|
||||
|
||||
tokens_count = get_max_token_count(model.tokenizer, positive_prompt)
|
||||
if log_tokens or getattr(Globals, "log_tokenization", False):
|
||||
log_tokenization(positive_prompt, negative_prompt, tokenizer=tokenizer)
|
||||
log_tokenization(positive_prompt, negative_prompt, tokenizer=model.tokenizer)
|
||||
|
||||
c, options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt)
|
||||
uc, _ = compel.build_conditioning_tensor_for_prompt_object(negative_prompt)
|
||||
|
||||
tokens_count = get_max_token_count(tokenizer, positive_prompt)
|
||||
# some LoRA models also mess with the text encoder, so they must be active while compel builds conditioning tensors
|
||||
lora_conditioning_ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(tokens_count_including_eos_bos=tokens_count,
|
||||
lora_conditions=lora_conditions)
|
||||
with InvokeAIDiffuserComponent.custom_attention_context(model.unet,
|
||||
extra_conditioning_info=lora_conditioning_ec,
|
||||
step_count=-1):
|
||||
c, options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt)
|
||||
uc, _ = compel.build_conditioning_tensor_for_prompt_object(negative_prompt)
|
||||
|
||||
# now build the "real" ec
|
||||
ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(tokens_count_including_eos_bos=tokens_count,
|
||||
cross_attention_control_args=options.get(
|
||||
'cross_attention_control', None),
|
||||
|
@ -196,16 +196,6 @@ class addModelsForm(npyscreen.FormMultiPage):
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.nextrely += 1
|
||||
self.convert_models = self.add_widget_intelligent(
|
||||
npyscreen.TitleSelectOne,
|
||||
name="== CONVERT IMPORTED MODELS INTO DIFFUSERS==",
|
||||
values=["Keep original format", "Convert to diffusers"],
|
||||
value=0,
|
||||
begin_entry_at=4,
|
||||
max_height=4,
|
||||
hidden=True, # will appear when imported models box is edited
|
||||
scroll_exit=True,
|
||||
)
|
||||
self.cancel = self.add_widget_intelligent(
|
||||
npyscreen.ButtonPress,
|
||||
name="CANCEL",
|
||||
@ -240,8 +230,6 @@ class addModelsForm(npyscreen.FormMultiPage):
|
||||
self.show_directory_fields.addVisibleWhenSelected(i)
|
||||
|
||||
self.show_directory_fields.when_value_edited = self._clear_scan_directory
|
||||
self.import_model_paths.when_value_edited = self._show_hide_convert
|
||||
self.autoload_directory.when_value_edited = self._show_hide_convert
|
||||
|
||||
def resize(self):
|
||||
super().resize()
|
||||
@ -252,13 +240,6 @@ class addModelsForm(npyscreen.FormMultiPage):
|
||||
if not self.show_directory_fields.value:
|
||||
self.autoload_directory.value = ""
|
||||
|
||||
def _show_hide_convert(self):
|
||||
model_paths = self.import_model_paths.value or ""
|
||||
autoload_directory = self.autoload_directory.value or ""
|
||||
self.convert_models.hidden = (
|
||||
len(model_paths) == 0 and len(autoload_directory) == 0
|
||||
)
|
||||
|
||||
def _get_starter_model_labels(self) -> List[str]:
|
||||
window_width, window_height = get_terminal_size()
|
||||
label_width = 25
|
||||
@ -318,7 +299,6 @@ class addModelsForm(npyscreen.FormMultiPage):
|
||||
.scan_directory: Path to a directory of models to scan and import
|
||||
.autoscan_on_startup: True if invokeai should scan and import at startup time
|
||||
.import_model_paths: list of URLs, repo_ids and file paths to import
|
||||
.convert_to_diffusers: if True, convert legacy checkpoints into diffusers
|
||||
"""
|
||||
# we're using a global here rather than storing the result in the parentapp
|
||||
# due to some bug in npyscreen that is causing attributes to be lost
|
||||
@ -354,7 +334,6 @@ class addModelsForm(npyscreen.FormMultiPage):
|
||||
|
||||
# URLs and the like
|
||||
selections.import_model_paths = self.import_model_paths.value.split()
|
||||
selections.convert_to_diffusers = self.convert_models.value[0] == 1
|
||||
|
||||
|
||||
class AddModelApplication(npyscreen.NPSAppManaged):
|
||||
@ -367,7 +346,6 @@ class AddModelApplication(npyscreen.NPSAppManaged):
|
||||
scan_directory=None,
|
||||
autoscan_on_startup=None,
|
||||
import_model_paths=None,
|
||||
convert_to_diffusers=None,
|
||||
)
|
||||
|
||||
def onStart(self):
|
||||
@ -387,7 +365,6 @@ def process_and_execute(opt: Namespace, selections: Namespace):
|
||||
directory_to_scan = selections.scan_directory
|
||||
scan_at_startup = selections.autoscan_on_startup
|
||||
potential_models_to_install = selections.import_model_paths
|
||||
convert_to_diffusers = selections.convert_to_diffusers
|
||||
|
||||
install_requested_models(
|
||||
install_initial_models=models_to_install,
|
||||
@ -395,7 +372,6 @@ def process_and_execute(opt: Namespace, selections: Namespace):
|
||||
scan_directory=Path(directory_to_scan) if directory_to_scan else None,
|
||||
external_models=potential_models_to_install,
|
||||
scan_at_startup=scan_at_startup,
|
||||
convert_to_diffusers=convert_to_diffusers,
|
||||
precision="float32"
|
||||
if opt.full_precision
|
||||
else choose_precision(torch.device(choose_torch_device())),
|
||||
|
@ -11,6 +11,7 @@ from tempfile import TemporaryFile
|
||||
|
||||
import requests
|
||||
from diffusers import AutoencoderKL
|
||||
from diffusers import logging as dlogging
|
||||
from huggingface_hub import hf_hub_url
|
||||
from omegaconf import OmegaConf
|
||||
from omegaconf.dictconfig import DictConfig
|
||||
@ -68,7 +69,6 @@ def install_requested_models(
|
||||
scan_directory: Path = None,
|
||||
external_models: List[str] = None,
|
||||
scan_at_startup: bool = False,
|
||||
convert_to_diffusers: bool = False,
|
||||
precision: str = "float16",
|
||||
purge_deleted: bool = False,
|
||||
config_file_path: Path = None,
|
||||
@ -114,17 +114,16 @@ def install_requested_models(
|
||||
try:
|
||||
model_manager.heuristic_import(
|
||||
path_url_or_repo,
|
||||
convert=convert_to_diffusers,
|
||||
config_file_callback=_pick_configuration_file,
|
||||
commit_to_conf=config_file_path
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(-1)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f'An exception has occurred: {str(e)}')
|
||||
|
||||
if scan_at_startup and scan_directory.is_dir():
|
||||
argument = '--autoconvert' if convert_to_diffusers else '--autoimport'
|
||||
argument = '--autoconvert'
|
||||
initfile = Path(Globals.root, Globals.initfile)
|
||||
replacement = Path(Globals.root, f'{Globals.initfile}.new')
|
||||
directory = str(scan_directory).replace('\\','/')
|
||||
@ -296,13 +295,21 @@ def _download_diffusion_weights(
|
||||
mconfig: DictConfig, access_token: str, precision: str = "float32"
|
||||
):
|
||||
repo_id = mconfig["repo_id"]
|
||||
revision = mconfig.get('revision',None)
|
||||
model_class = (
|
||||
StableDiffusionGeneratorPipeline
|
||||
if mconfig.get("format", None) == "diffusers"
|
||||
else AutoencoderKL
|
||||
)
|
||||
extra_arg_list = [{"revision": "fp16"}, {}] if precision == "float16" else [{}]
|
||||
extra_arg_list = [{"revision": revision}] if revision \
|
||||
else [{"revision": "fp16"}, {}] if precision == "float16" \
|
||||
else [{}]
|
||||
path = None
|
||||
|
||||
# quench safety checker warnings
|
||||
verbosity = dlogging.get_verbosity()
|
||||
dlogging.set_verbosity_error()
|
||||
|
||||
for extra_args in extra_arg_list:
|
||||
try:
|
||||
path = download_from_hf(
|
||||
@ -318,6 +325,7 @@ def _download_diffusion_weights(
|
||||
print(f"An unexpected error occurred while downloading the model: {e})")
|
||||
if path:
|
||||
break
|
||||
dlogging.set_verbosity(verbosity)
|
||||
return path
|
||||
|
||||
|
||||
@ -448,6 +456,8 @@ def new_config_file_contents(
|
||||
stanza["description"] = mod["description"]
|
||||
stanza["repo_id"] = mod["repo_id"]
|
||||
stanza["format"] = mod["format"]
|
||||
if "revision" in mod:
|
||||
stanza["revision"] = mod["revision"]
|
||||
# diffusers don't need width and height (probably .ckpt doesn't either)
|
||||
# so we no longer require these in INITIAL_MODELS.yaml
|
||||
if "width" in mod:
|
||||
@ -472,10 +482,9 @@ def new_config_file_contents(
|
||||
|
||||
conf[model] = stanza
|
||||
|
||||
# if no default model was chosen, then we select the first
|
||||
# one in the list
|
||||
# if no default model was chosen, then we select the first one in the list
|
||||
if not default_selected:
|
||||
conf[list(successfully_downloaded.keys())[0]]["default"] = True
|
||||
conf[list(conf.keys())[0]]["default"] = True
|
||||
|
||||
return OmegaConf.to_yaml(conf)
|
||||
|
||||
|
@ -400,8 +400,15 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
@property
|
||||
def _submodels(self) -> Sequence[torch.nn.Module]:
|
||||
module_names, _, _ = self.extract_init_dict(dict(self.config))
|
||||
values = [getattr(self, name) for name in module_names.keys()]
|
||||
return [m for m in values if isinstance(m, torch.nn.Module)]
|
||||
submodels = []
|
||||
for name in module_names.keys():
|
||||
if hasattr(self, name):
|
||||
value = getattr(self, name)
|
||||
else:
|
||||
value = getattr(self.config, name)
|
||||
if isinstance(value, torch.nn.Module):
|
||||
submodels.append(value)
|
||||
return submodels
|
||||
|
||||
def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int,
|
||||
conditioning_data: ConditioningData,
|
||||
@ -467,11 +474,12 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
if additional_guidance is None:
|
||||
additional_guidance = []
|
||||
extra_conditioning_info = conditioning_data.extra
|
||||
with self.invokeai_diffuser.custom_attention_context(extra_conditioning_info=extra_conditioning_info,
|
||||
step_count=len(self.scheduler.timesteps)
|
||||
with InvokeAIDiffuserComponent.custom_attention_context(self.invokeai_diffuser.model,
|
||||
extra_conditioning_info=extra_conditioning_info,
|
||||
step_count=len(self.scheduler.timesteps)
|
||||
):
|
||||
|
||||
yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps,
|
||||
yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.config.num_train_timesteps,
|
||||
latents=latents)
|
||||
|
||||
batch_size = latents.shape[0]
|
||||
@ -755,7 +763,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
@property
|
||||
def channels(self) -> int:
|
||||
"""Compatible with DiffusionWrapper"""
|
||||
return self.unet.in_channels
|
||||
return self.unet.config.in_channels
|
||||
|
||||
def decode_latents(self, latents):
|
||||
# Explicit call to get the vae loaded, since `decode` isn't the forward method.
|
||||
|
@ -372,12 +372,6 @@ class ModelManager(object):
|
||||
)
|
||||
from ldm.invoke.ckpt_to_diffuser import load_pipeline_from_original_stable_diffusion_ckpt
|
||||
|
||||
# try:
|
||||
# if self.list_models()[self.current_model]['status'] == 'active':
|
||||
# self.offload_model(self.current_model)
|
||||
# except Exception:
|
||||
# pass
|
||||
|
||||
if self._has_cuda():
|
||||
torch.cuda.empty_cache()
|
||||
pipeline = load_pipeline_from_original_stable_diffusion_ckpt(
|
||||
@ -423,9 +417,9 @@ class ModelManager(object):
|
||||
pipeline_args.update(cache_dir=global_cache_dir("hub"))
|
||||
if using_fp16:
|
||||
pipeline_args.update(torch_dtype=torch.float16)
|
||||
fp_args_list = [{"revision": "fp16"}, {}]
|
||||
else:
|
||||
fp_args_list = [{}]
|
||||
revision = mconfig.get('revision') or ('fp16' if using_fp16 else None)
|
||||
fp_args_list = [{"revision": revision}] if revision else []
|
||||
fp_args_list.append({})
|
||||
|
||||
verbosity = dlogging.get_verbosity()
|
||||
dlogging.set_verbosity_error()
|
||||
@ -1162,7 +1156,7 @@ class ModelManager(object):
|
||||
return self.device.type == "cuda"
|
||||
|
||||
def _diffuser_sha256(
|
||||
self, name_or_path: Union[str, Path], chunksize=4096
|
||||
self, name_or_path: Union[str, Path], chunksize=16777216
|
||||
) -> Union[str, bytes]:
|
||||
path = None
|
||||
if isinstance(name_or_path, Path):
|
||||
|
@ -14,7 +14,6 @@ from torch import nn
|
||||
|
||||
from compel.cross_attention_control import Arguments
|
||||
from diffusers.models.unet_2d_condition import UNet2DConditionModel
|
||||
from diffusers.models.cross_attention import AttnProcessor
|
||||
from ldm.invoke.devices import torch_dtype
|
||||
|
||||
|
||||
@ -163,7 +162,7 @@ class Context:
|
||||
|
||||
class InvokeAICrossAttentionMixin:
|
||||
"""
|
||||
Enable InvokeAI-flavoured CrossAttention calculation, which does aggressive low-memory slicing and calls
|
||||
Enable InvokeAI-flavoured Attention calculation, which does aggressive low-memory slicing and calls
|
||||
through both to an attention_slice_wrangler and a slicing_strategy_getter for custom attention map wrangling
|
||||
and dymamic slicing strategy selection.
|
||||
"""
|
||||
@ -178,7 +177,7 @@ class InvokeAICrossAttentionMixin:
|
||||
Set custom attention calculator to be called when attention is calculated
|
||||
:param wrangler: Callback, with args (module, suggested_attention_slice, dim, offset, slice_size),
|
||||
which returns either the suggested_attention_slice or an adjusted equivalent.
|
||||
`module` is the current CrossAttention module for which the callback is being invoked.
|
||||
`module` is the current Attention module for which the callback is being invoked.
|
||||
`suggested_attention_slice` is the default-calculated attention slice
|
||||
`dim` is -1 if the attenion map has not been sliced, or 0 or 1 for dimension-0 or dimension-1 slicing.
|
||||
If `dim` is >= 0, `offset` and `slice_size` specify the slice start and length.
|
||||
@ -288,16 +287,7 @@ class InvokeAICrossAttentionMixin:
|
||||
return self.einsum_op_tensor_mem(q, k, v, 32)
|
||||
|
||||
|
||||
|
||||
def restore_default_cross_attention(model, is_running_diffusers: bool, processors_to_restore: Optional[AttnProcessor]=None):
|
||||
if is_running_diffusers:
|
||||
unet = model
|
||||
unet.set_attn_processor(processors_to_restore or CrossAttnProcessor())
|
||||
else:
|
||||
remove_attention_function(model)
|
||||
|
||||
|
||||
def override_cross_attention(model, context: Context, is_running_diffusers = False):
|
||||
def setup_cross_attention_control_attention_processors(unet: UNet2DConditionModel, context: Context):
|
||||
"""
|
||||
Inject attention parameters and functions into the passed in model to enable cross attention editing.
|
||||
|
||||
@ -323,26 +313,19 @@ def override_cross_attention(model, context: Context, is_running_diffusers = Fal
|
||||
|
||||
context.cross_attention_mask = mask.to(device)
|
||||
context.cross_attention_index_map = indices.to(device)
|
||||
if is_running_diffusers:
|
||||
unet = model
|
||||
old_attn_processors = unet.attn_processors
|
||||
if torch.backends.mps.is_available():
|
||||
# see note in StableDiffusionGeneratorPipeline.__init__ about borked slicing on MPS
|
||||
unet.set_attn_processor(SwapCrossAttnProcessor())
|
||||
else:
|
||||
# try to re-use an existing slice size
|
||||
default_slice_size = 4
|
||||
slice_size = next((p.slice_size for p in old_attn_processors.values() if type(p) is SlicedAttnProcessor), default_slice_size)
|
||||
unet.set_attn_processor(SlicedSwapCrossAttnProcesser(slice_size=slice_size))
|
||||
old_attn_processors = unet.attn_processors
|
||||
if torch.backends.mps.is_available():
|
||||
# see note in StableDiffusionGeneratorPipeline.__init__ about borked slicing on MPS
|
||||
unet.set_attn_processor(SwapCrossAttnProcessor())
|
||||
else:
|
||||
context.register_cross_attention_modules(model)
|
||||
inject_attention_function(model, context)
|
||||
|
||||
|
||||
# try to re-use an existing slice size
|
||||
default_slice_size = 4
|
||||
slice_size = next((p.slice_size for p in old_attn_processors.values() if type(p) is SlicedAttnProcessor), default_slice_size)
|
||||
unet.set_attn_processor(SlicedSwapCrossAttnProcesser(slice_size=slice_size))
|
||||
|
||||
|
||||
def get_cross_attention_modules(model, which: CrossAttentionType) -> list[tuple[str, InvokeAICrossAttentionMixin]]:
|
||||
from ldm.modules.attention import CrossAttention # avoid circular import
|
||||
from ldm.modules.attention import CrossAttention # avoid circular import # TODO: rename as in diffusers?
|
||||
cross_attention_class: type = InvokeAIDiffusersCrossAttention if isinstance(model,UNet2DConditionModel) else CrossAttention
|
||||
which_attn = "attn1" if which is CrossAttentionType.SELF else "attn2"
|
||||
attention_module_tuples = [(name,module) for name, module in model.named_modules() if
|
||||
@ -448,7 +431,7 @@ def get_mem_free_total(device):
|
||||
|
||||
|
||||
|
||||
class InvokeAIDiffusersCrossAttention(diffusers.models.attention.CrossAttention, InvokeAICrossAttentionMixin):
|
||||
class InvokeAIDiffusersCrossAttention(diffusers.models.attention.Attention, InvokeAICrossAttentionMixin):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@ -473,8 +456,8 @@ class InvokeAIDiffusersCrossAttention(diffusers.models.attention.CrossAttention,
|
||||
"""
|
||||
# base implementation
|
||||
|
||||
class CrossAttnProcessor:
|
||||
def __call__(self, attn: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None):
|
||||
class AttnProcessor:
|
||||
def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, attention_mask=None):
|
||||
batch_size, sequence_length, _ = hidden_states.shape
|
||||
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length)
|
||||
|
||||
@ -503,7 +486,7 @@ from dataclasses import field, dataclass
|
||||
|
||||
import torch
|
||||
|
||||
from diffusers.models.cross_attention import CrossAttention, CrossAttnProcessor, SlicedAttnProcessor
|
||||
from diffusers.models.attention_processor import Attention, AttnProcessor, SlicedAttnProcessor
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -548,7 +531,7 @@ class SlicedSwapCrossAttnProcesser(SlicedAttnProcessor):
|
||||
|
||||
# TODO: dynamically pick slice size based on memory conditions
|
||||
|
||||
def __call__(self, attn: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None,
|
||||
def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, attention_mask=None,
|
||||
# kwargs
|
||||
swap_cross_attn_context: SwapCrossAttnContext=None):
|
||||
|
||||
|
@ -12,17 +12,6 @@ class DDIMSampler(Sampler):
|
||||
self.invokeai_diffuser = InvokeAIDiffuserComponent(self.model,
|
||||
model_forward_callback = lambda x, sigma, cond: self.model.apply_model(x, sigma, cond))
|
||||
|
||||
def prepare_to_sample(self, t_enc, **kwargs):
|
||||
super().prepare_to_sample(t_enc, **kwargs)
|
||||
|
||||
extra_conditioning_info = kwargs.get('extra_conditioning_info', None)
|
||||
all_timesteps_count = kwargs.get('all_timesteps_count', t_enc)
|
||||
|
||||
if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
|
||||
self.invokeai_diffuser.override_attention_processors(extra_conditioning_info, step_count = all_timesteps_count)
|
||||
else:
|
||||
self.invokeai_diffuser.restore_default_cross_attention()
|
||||
|
||||
|
||||
# This is the central routine
|
||||
@torch.no_grad()
|
||||
|
@ -38,15 +38,6 @@ class CFGDenoiser(nn.Module):
|
||||
model_forward_callback=lambda x, sigma, cond: self.inner_model(x, sigma, cond=cond))
|
||||
|
||||
|
||||
def prepare_to_sample(self, t_enc, **kwargs):
|
||||
|
||||
extra_conditioning_info = kwargs.get('extra_conditioning_info', None)
|
||||
|
||||
if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
|
||||
self.invokeai_diffuser.override_attention_processors(extra_conditioning_info, step_count = t_enc)
|
||||
else:
|
||||
self.invokeai_diffuser.restore_default_cross_attention()
|
||||
|
||||
|
||||
def forward(self, x, sigma, uncond, cond, cond_scale):
|
||||
next_x = self.invokeai_diffuser.do_diffusion_step(x, sigma, uncond, cond, cond_scale)
|
||||
|
@ -14,17 +14,6 @@ class PLMSSampler(Sampler):
|
||||
def __init__(self, model, schedule='linear', device=None, **kwargs):
|
||||
super().__init__(model,schedule,model.num_timesteps, device)
|
||||
|
||||
def prepare_to_sample(self, t_enc, **kwargs):
|
||||
super().prepare_to_sample(t_enc, **kwargs)
|
||||
|
||||
extra_conditioning_info = kwargs.get('extra_conditioning_info', None)
|
||||
all_timesteps_count = kwargs.get('all_timesteps_count', t_enc)
|
||||
|
||||
if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
|
||||
self.invokeai_diffuser.override_attention_processors(extra_conditioning_info, step_count = all_timesteps_count)
|
||||
else:
|
||||
self.invokeai_diffuser.restore_default_cross_attention()
|
||||
|
||||
|
||||
# this is the essential routine
|
||||
@torch.no_grad()
|
||||
|
@ -1,18 +1,17 @@
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from math import ceil
|
||||
from typing import Callable, Optional, Union, Any, Dict
|
||||
from typing import Callable, Optional, Union, Any
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from diffusers.models.cross_attention import AttnProcessor
|
||||
from diffusers import UNet2DConditionModel
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from ldm.invoke.globals import Globals
|
||||
from ldm.models.diffusion.cross_attention_control import (
|
||||
Arguments,
|
||||
restore_default_cross_attention,
|
||||
override_cross_attention,
|
||||
setup_cross_attention_control_attention_processors,
|
||||
Context,
|
||||
get_cross_attention_modules,
|
||||
CrossAttentionType,
|
||||
@ -84,66 +83,45 @@ class InvokeAIDiffuserComponent:
|
||||
self.cross_attention_control_context = None
|
||||
self.sequential_guidance = Globals.sequential_guidance
|
||||
|
||||
@classmethod
|
||||
@contextmanager
|
||||
def custom_attention_context(
|
||||
self, extra_conditioning_info: Optional[ExtraConditioningInfo], step_count: int
|
||||
clss,
|
||||
unet: UNet2DConditionModel, # note: also may futz with the text encoder depending on requested LoRAs
|
||||
extra_conditioning_info: Optional[ExtraConditioningInfo],
|
||||
step_count: int
|
||||
):
|
||||
old_attn_processor = None
|
||||
old_attn_processors = None
|
||||
if extra_conditioning_info and (
|
||||
extra_conditioning_info.wants_cross_attention_control
|
||||
| extra_conditioning_info.has_lora_conditions
|
||||
):
|
||||
old_attn_processor = self.override_attention_processors(
|
||||
extra_conditioning_info, step_count=step_count
|
||||
)
|
||||
old_attn_processors = unet.attn_processors
|
||||
# Load lora conditions into the model
|
||||
if extra_conditioning_info.has_lora_conditions:
|
||||
for condition in extra_conditioning_info.lora_conditions:
|
||||
condition() # target model is stored in condition state for some reason
|
||||
if extra_conditioning_info.wants_cross_attention_control:
|
||||
cross_attention_control_context = Context(
|
||||
arguments=extra_conditioning_info.cross_attention_control_args,
|
||||
step_count=step_count,
|
||||
)
|
||||
setup_cross_attention_control_attention_processors(
|
||||
unet,
|
||||
cross_attention_control_context,
|
||||
)
|
||||
|
||||
try:
|
||||
yield None
|
||||
finally:
|
||||
if old_attn_processor is not None:
|
||||
self.restore_default_cross_attention(old_attn_processor)
|
||||
if old_attn_processors is not None:
|
||||
unet.set_attn_processor(old_attn_processors)
|
||||
if extra_conditioning_info and extra_conditioning_info.has_lora_conditions:
|
||||
for lora_condition in extra_conditioning_info.lora_conditions:
|
||||
lora_condition.unload()
|
||||
# TODO resuscitate attention map saving
|
||||
# self.remove_attention_map_saving()
|
||||
|
||||
def override_attention_processors(
|
||||
self, conditioning: ExtraConditioningInfo, step_count: int
|
||||
) -> Dict[str, AttnProcessor]:
|
||||
"""
|
||||
setup cross attention .swap control. for diffusers this replaces the attention processor, so
|
||||
the previous attention processor is returned so that the caller can restore it later.
|
||||
"""
|
||||
old_attn_processors = self.model.attn_processors
|
||||
|
||||
# Load lora conditions into the model
|
||||
if conditioning.has_lora_conditions:
|
||||
for condition in conditioning.lora_conditions:
|
||||
condition(self.model)
|
||||
|
||||
if conditioning.wants_cross_attention_control:
|
||||
self.cross_attention_control_context = Context(
|
||||
arguments=conditioning.cross_attention_control_args,
|
||||
step_count=step_count,
|
||||
)
|
||||
override_cross_attention(
|
||||
self.model,
|
||||
self.cross_attention_control_context,
|
||||
is_running_diffusers=self.is_running_diffusers,
|
||||
)
|
||||
return old_attn_processors
|
||||
|
||||
def restore_default_cross_attention(
|
||||
self, processors_to_restore: Optional[dict[str, "AttnProcessor"]] = None
|
||||
):
|
||||
self.cross_attention_control_context = None
|
||||
restore_default_cross_attention(
|
||||
self.model,
|
||||
is_running_diffusers=self.is_running_diffusers,
|
||||
processors_to_restore=processors_to_restore,
|
||||
)
|
||||
|
||||
def setup_attention_map_saving(self, saver: AttentionMapSaver):
|
||||
def callback(slice, dim, offset, slice_size, key):
|
||||
if dim is not None:
|
||||
|
@ -1,15 +1,16 @@
|
||||
import re
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from compel import Compel
|
||||
from diffusers.models import UNet2DConditionModel
|
||||
from filelock import FileLock, Timeout
|
||||
from safetensors.torch import load_file
|
||||
from torch.utils.hooks import RemovableHandle
|
||||
from transformers import CLIPTextModel
|
||||
|
||||
from ldm.invoke.devices import choose_torch_device
|
||||
from ..invoke.globals import global_lora_models_dir, Globals
|
||||
from ..invoke.devices import choose_torch_device
|
||||
|
||||
"""
|
||||
This module supports loading LoRA weights trained with https://github.com/kohya-ss/sd-scripts
|
||||
@ -17,6 +18,11 @@ To be removed once support for diffusers LoRA weights is well supported
|
||||
"""
|
||||
|
||||
|
||||
class IncompatibleModelException(Exception):
|
||||
"Raised when there is an attempt to load a LoRA into a model that is incompatible with it"
|
||||
pass
|
||||
|
||||
|
||||
class LoRALayer:
|
||||
lora_name: str
|
||||
name: str
|
||||
@ -31,18 +37,14 @@ class LoRALayer:
|
||||
self.name = name
|
||||
self.scale = alpha / rank if (alpha and rank) else 1.0
|
||||
|
||||
def forward(self, lora, input_h, output):
|
||||
def forward(self, lora, input_h):
|
||||
if self.mid is None:
|
||||
output = (
|
||||
output
|
||||
+ self.up(self.down(*input_h)) * lora.multiplier * self.scale
|
||||
)
|
||||
weight = self.up(self.down(*input_h))
|
||||
else:
|
||||
output = (
|
||||
output
|
||||
+ self.up(self.mid(self.down(*input_h))) * lora.multiplier * self.scale
|
||||
)
|
||||
return output
|
||||
weight = self.up(self.mid(self.down(*input_h)))
|
||||
|
||||
return weight * lora.multiplier * self.scale
|
||||
|
||||
|
||||
class LoHALayer:
|
||||
lora_name: str
|
||||
@ -64,8 +66,7 @@ class LoHALayer:
|
||||
self.name = name
|
||||
self.scale = alpha / rank if (alpha and rank) else 1.0
|
||||
|
||||
def forward(self, lora, input_h, output):
|
||||
|
||||
def forward(self, lora, input_h):
|
||||
if type(self.org_module) == torch.nn.Conv2d:
|
||||
op = torch.nn.functional.conv2d
|
||||
extra_args = dict(
|
||||
@ -80,21 +81,87 @@ class LoHALayer:
|
||||
extra_args = {}
|
||||
|
||||
if self.t1 is None:
|
||||
weight = ((self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b))
|
||||
weight = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
|
||||
|
||||
else:
|
||||
rebuild1 = torch.einsum('i j k l, j r, i p -> p r k l', self.t1, self.w1_b, self.w1_a)
|
||||
rebuild2 = torch.einsum('i j k l, j r, i p -> p r k l', self.t2, self.w2_b, self.w2_a)
|
||||
rebuild1 = torch.einsum(
|
||||
"i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a
|
||||
)
|
||||
rebuild2 = torch.einsum(
|
||||
"i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a
|
||||
)
|
||||
weight = rebuild1 * rebuild2
|
||||
|
||||
|
||||
bias = self.bias if self.bias is not None else 0
|
||||
return output + op(
|
||||
return op(
|
||||
*input_h,
|
||||
(weight + bias).view(self.org_module.weight.shape),
|
||||
None,
|
||||
**extra_args,
|
||||
) * lora.multiplier * self.scale
|
||||
|
||||
class LoKRLayer:
|
||||
lora_name: str
|
||||
name: str
|
||||
scale: float
|
||||
|
||||
w1: Optional[torch.Tensor] = None
|
||||
w1_a: Optional[torch.Tensor] = None
|
||||
w1_b: Optional[torch.Tensor] = None
|
||||
w2: Optional[torch.Tensor] = None
|
||||
w2_a: Optional[torch.Tensor] = None
|
||||
w2_b: Optional[torch.Tensor] = None
|
||||
t2: Optional[torch.Tensor] = None
|
||||
bias: Optional[torch.Tensor] = None
|
||||
|
||||
org_module: torch.nn.Module
|
||||
|
||||
def __init__(self, lora_name: str, name: str, rank=4, alpha=1.0):
|
||||
self.lora_name = lora_name
|
||||
self.name = name
|
||||
self.scale = alpha / rank if (alpha and rank) else 1.0
|
||||
|
||||
def forward(self, lora, input_h):
|
||||
|
||||
if type(self.org_module) == torch.nn.Conv2d:
|
||||
op = torch.nn.functional.conv2d
|
||||
extra_args = dict(
|
||||
stride=self.org_module.stride,
|
||||
padding=self.org_module.padding,
|
||||
dilation=self.org_module.dilation,
|
||||
groups=self.org_module.groups,
|
||||
)
|
||||
|
||||
else:
|
||||
op = torch.nn.functional.linear
|
||||
extra_args = {}
|
||||
|
||||
w1 = self.w1
|
||||
if w1 is None:
|
||||
w1 = self.w1_a @ self.w1_b
|
||||
|
||||
w2 = self.w2
|
||||
if w2 is None:
|
||||
if self.t2 is None:
|
||||
w2 = self.w2_a @ self.w2_b
|
||||
else:
|
||||
w2 = torch.einsum('i j k l, i p, j r -> p r k l', self.t2, self.w2_a, self.w2_b)
|
||||
|
||||
|
||||
if len(w2.shape) == 4:
|
||||
w1 = w1.unsqueeze(2).unsqueeze(2)
|
||||
w2 = w2.contiguous()
|
||||
weight = torch.kron(w1, w2).reshape(self.org_module.weight.shape)
|
||||
|
||||
|
||||
bias = self.bias if self.bias is not None else 0
|
||||
return op(
|
||||
*input_h,
|
||||
(weight + bias).view(self.org_module.weight.shape),
|
||||
None,
|
||||
**extra_args
|
||||
) * lora.multiplier * self.scale
|
||||
|
||||
|
||||
class LoRAModuleWrapper:
|
||||
unet: UNet2DConditionModel
|
||||
@ -111,12 +178,22 @@ class LoRAModuleWrapper:
|
||||
self.applied_loras = {}
|
||||
self.loaded_loras = {}
|
||||
|
||||
self.UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel", "Attention", "ResnetBlock2D", "Downsample2D", "Upsample2D", "SpatialTransformer"]
|
||||
self.TEXT_ENCODER_TARGET_REPLACE_MODULE = ["ResidualAttentionBlock", "CLIPAttention", "CLIPMLP"]
|
||||
self.UNET_TARGET_REPLACE_MODULE = [
|
||||
"Transformer2DModel",
|
||||
"Attention",
|
||||
"ResnetBlock2D",
|
||||
"Downsample2D",
|
||||
"Upsample2D",
|
||||
"SpatialTransformer",
|
||||
]
|
||||
self.TEXT_ENCODER_TARGET_REPLACE_MODULE = [
|
||||
"ResidualAttentionBlock",
|
||||
"CLIPAttention",
|
||||
"CLIPMLP",
|
||||
]
|
||||
self.LORA_PREFIX_UNET = "lora_unet"
|
||||
self.LORA_PREFIX_TEXT_ENCODER = "lora_te"
|
||||
|
||||
|
||||
def find_modules(
|
||||
prefix, root_module: torch.nn.Module, target_replace_modules
|
||||
) -> dict[str, torch.nn.Module]:
|
||||
@ -147,7 +224,6 @@ class LoRAModuleWrapper:
|
||||
self.LORA_PREFIX_UNET, unet, self.UNET_TARGET_REPLACE_MODULE
|
||||
)
|
||||
|
||||
|
||||
def lora_forward_hook(self, name):
|
||||
wrapper = self
|
||||
|
||||
@ -159,7 +235,7 @@ class LoRAModuleWrapper:
|
||||
layer = lora.layers.get(name, None)
|
||||
if layer is None:
|
||||
continue
|
||||
output = layer.forward(lora, input_h, output)
|
||||
output += layer.forward(lora, input_h)
|
||||
return output
|
||||
|
||||
return lora_forward
|
||||
@ -180,6 +256,7 @@ class LoRAModuleWrapper:
|
||||
def clear_loaded_loras(self):
|
||||
self.loaded_loras.clear()
|
||||
|
||||
|
||||
class LoRA:
|
||||
name: str
|
||||
layers: dict[str, LoRALayer]
|
||||
@ -205,7 +282,6 @@ class LoRA:
|
||||
state_dict_groupped[stem] = dict()
|
||||
state_dict_groupped[stem][leaf] = value
|
||||
|
||||
|
||||
for stem, values in state_dict_groupped.items():
|
||||
if stem.startswith(self.wrapper.LORA_PREFIX_TEXT_ENCODER):
|
||||
wrapped = self.wrapper.text_modules.get(stem, None)
|
||||
@ -226,34 +302,59 @@ class LoRA:
|
||||
if "alpha" in values:
|
||||
alpha = values["alpha"].item()
|
||||
|
||||
if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
|
||||
if (
|
||||
"bias_indices" in values
|
||||
and "bias_values" in values
|
||||
and "bias_size" in values
|
||||
):
|
||||
bias = torch.sparse_coo_tensor(
|
||||
values["bias_indices"],
|
||||
values["bias_values"],
|
||||
tuple(values["bias_size"]),
|
||||
).to(device=self.device, dtype=self.dtype)
|
||||
|
||||
|
||||
# lora and locon
|
||||
if "lora_down.weight" in values:
|
||||
value_down = values["lora_down.weight"]
|
||||
value_mid = values.get("lora_mid.weight", None)
|
||||
value_up = values["lora_up.weight"]
|
||||
value_mid = values.get("lora_mid.weight", None)
|
||||
value_up = values["lora_up.weight"]
|
||||
|
||||
if type(wrapped) == torch.nn.Conv2d:
|
||||
if value_mid is not None:
|
||||
layer_down = torch.nn.Conv2d(value_down.shape[1], value_down.shape[0], (1, 1), bias=False)
|
||||
layer_mid = torch.nn.Conv2d(value_mid.shape[1], value_mid.shape[0], wrapped.kernel_size, wrapped.stride, wrapped.padding, bias=False)
|
||||
layer_down = torch.nn.Conv2d(
|
||||
value_down.shape[1], value_down.shape[0], (1, 1), bias=False
|
||||
)
|
||||
layer_mid = torch.nn.Conv2d(
|
||||
value_mid.shape[1],
|
||||
value_mid.shape[0],
|
||||
wrapped.kernel_size,
|
||||
wrapped.stride,
|
||||
wrapped.padding,
|
||||
bias=False,
|
||||
)
|
||||
else:
|
||||
layer_down = torch.nn.Conv2d(value_down.shape[1], value_down.shape[0], wrapped.kernel_size, wrapped.stride, wrapped.padding, bias=False)
|
||||
layer_mid = None
|
||||
layer_down = torch.nn.Conv2d(
|
||||
value_down.shape[1],
|
||||
value_down.shape[0],
|
||||
wrapped.kernel_size,
|
||||
wrapped.stride,
|
||||
wrapped.padding,
|
||||
bias=False,
|
||||
)
|
||||
layer_mid = None
|
||||
|
||||
layer_up = torch.nn.Conv2d(value_up.shape[1], value_up.shape[0], (1, 1), bias=False)
|
||||
layer_up = torch.nn.Conv2d(
|
||||
value_up.shape[1], value_up.shape[0], (1, 1), bias=False
|
||||
)
|
||||
|
||||
elif type(wrapped) == torch.nn.Linear:
|
||||
layer_down = torch.nn.Linear(value_down.shape[1], value_down.shape[0], bias=False)
|
||||
layer_mid = None
|
||||
layer_up = torch.nn.Linear(value_up.shape[1], value_up.shape[0], bias=False)
|
||||
layer_down = torch.nn.Linear(
|
||||
value_down.shape[1], value_down.shape[0], bias=False
|
||||
)
|
||||
layer_mid = None
|
||||
layer_up = torch.nn.Linear(
|
||||
value_up.shape[1], value_up.shape[0], bias=False
|
||||
)
|
||||
|
||||
else:
|
||||
print(
|
||||
@ -261,52 +362,90 @@ class LoRA:
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
with torch.no_grad():
|
||||
layer_down.weight.copy_(value_down)
|
||||
if layer_mid is not None:
|
||||
layer_mid.weight.copy_(value_mid)
|
||||
layer_up.weight.copy_(value_up)
|
||||
|
||||
|
||||
layer_down.to(device=self.device, dtype=self.dtype)
|
||||
if layer_mid is not None:
|
||||
layer_mid.to(device=self.device, dtype=self.dtype)
|
||||
layer_up.to(device=self.device, dtype=self.dtype)
|
||||
|
||||
|
||||
rank = value_down.shape[0]
|
||||
|
||||
layer = LoRALayer(self.name, stem, rank, alpha)
|
||||
#layer.bias = bias # TODO: find and debug lora/locon with bias
|
||||
# layer.bias = bias # TODO: find and debug lora/locon with bias
|
||||
layer.down = layer_down
|
||||
layer.mid = layer_mid
|
||||
layer.up = layer_up
|
||||
|
||||
# loha
|
||||
elif "hada_w1_b" in values:
|
||||
|
||||
rank = values["hada_w1_b"].shape[0]
|
||||
|
||||
layer = LoHALayer(self.name, stem, rank, alpha)
|
||||
layer.org_module = wrapped
|
||||
layer.bias = bias
|
||||
|
||||
layer.w1_a = values["hada_w1_a"].to(device=self.device, dtype=self.dtype)
|
||||
layer.w1_b = values["hada_w1_b"].to(device=self.device, dtype=self.dtype)
|
||||
layer.w2_a = values["hada_w2_a"].to(device=self.device, dtype=self.dtype)
|
||||
layer.w2_b = values["hada_w2_b"].to(device=self.device, dtype=self.dtype)
|
||||
layer.w1_a = values["hada_w1_a"].to(
|
||||
device=self.device, dtype=self.dtype
|
||||
)
|
||||
layer.w1_b = values["hada_w1_b"].to(
|
||||
device=self.device, dtype=self.dtype
|
||||
)
|
||||
layer.w2_a = values["hada_w2_a"].to(
|
||||
device=self.device, dtype=self.dtype
|
||||
)
|
||||
layer.w2_b = values["hada_w2_b"].to(
|
||||
device=self.device, dtype=self.dtype
|
||||
)
|
||||
|
||||
if "hada_t1" in values:
|
||||
layer.t1 = values["hada_t1"].to(device=self.device, dtype=self.dtype)
|
||||
layer.t1 = values["hada_t1"].to(
|
||||
device=self.device, dtype=self.dtype
|
||||
)
|
||||
else:
|
||||
layer.t1 = None
|
||||
|
||||
if "hada_t2" in values:
|
||||
layer.t2 = values["hada_t2"].to(device=self.device, dtype=self.dtype)
|
||||
layer.t2 = values["hada_t2"].to(
|
||||
device=self.device, dtype=self.dtype
|
||||
)
|
||||
else:
|
||||
layer.t2 = None
|
||||
|
||||
# lokr
|
||||
elif "lokr_w1_b" in values or "lokr_w1" in values:
|
||||
|
||||
if "lokr_w1_b" in values:
|
||||
rank = values["lokr_w1_b"].shape[0]
|
||||
elif "lokr_w2_b" in values:
|
||||
rank = values["lokr_w2_b"].shape[0]
|
||||
else:
|
||||
rank = None # unscaled
|
||||
|
||||
layer = LoKRLayer(self.name, stem, rank, alpha)
|
||||
layer.org_module = wrapped
|
||||
layer.bias = bias
|
||||
|
||||
if "lokr_w1" in values:
|
||||
layer.w1 = values["lokr_w1"].to(device=self.device, dtype=self.dtype)
|
||||
else:
|
||||
layer.w1_a = values["lokr_w1_a"].to(device=self.device, dtype=self.dtype)
|
||||
layer.w1_b = values["lokr_w1_b"].to(device=self.device, dtype=self.dtype)
|
||||
|
||||
if "lokr_w2" in values:
|
||||
layer.w2 = values["lokr_w2"].to(device=self.device, dtype=self.dtype)
|
||||
else:
|
||||
layer.w2_a = values["lokr_w2_a"].to(device=self.device, dtype=self.dtype)
|
||||
layer.w2_b = values["lokr_w2_b"].to(device=self.device, dtype=self.dtype)
|
||||
|
||||
if "lokr_t2" in values:
|
||||
layer.t2 = values["lokr_t2"].to(device=self.device, dtype=self.dtype)
|
||||
|
||||
|
||||
else:
|
||||
print(
|
||||
f">> Encountered unknown lora layer module in {self.name}: {stem} - {type(wrapped).__name__}"
|
||||
@ -317,9 +456,13 @@ class LoRA:
|
||||
|
||||
|
||||
class KohyaLoraManager:
|
||||
def __init__(self, pipe, lora_path):
|
||||
lora_path = None
|
||||
vector_length_cache_path = None
|
||||
|
||||
def __init__(self, pipe):
|
||||
self.lora_path = Path(global_lora_models_dir())
|
||||
self.vector_length_cache_path = self.lora_path / '.vectorlength.cache'
|
||||
self.unet = pipe.unet
|
||||
self.lora_path = lora_path
|
||||
self.wrapper = LoRAModuleWrapper(pipe.unet, pipe.text_encoder)
|
||||
self.text_encoder = pipe.text_encoder
|
||||
self.device = torch.device(choose_torch_device())
|
||||
@ -332,6 +475,9 @@ class KohyaLoraManager:
|
||||
else:
|
||||
checkpoint = torch.load(path_file, map_location="cpu")
|
||||
|
||||
if not self.check_model_compatibility(checkpoint):
|
||||
raise IncompatibleModelException
|
||||
|
||||
lora = LoRA(name, self.device, self.dtype, self.wrapper, multiplier)
|
||||
lora.load_from_dict(checkpoint)
|
||||
self.wrapper.loaded_loras[name] = lora
|
||||
@ -339,12 +485,14 @@ class KohyaLoraManager:
|
||||
return lora
|
||||
|
||||
def apply_lora_model(self, name, mult: float = 1.0):
|
||||
path_file = None
|
||||
for suffix in ["ckpt", "safetensors", "pt"]:
|
||||
path_file = Path(self.lora_path, f"{name}.{suffix}")
|
||||
if path_file.exists():
|
||||
path_files = [x for x in Path(self.lora_path).glob(f"**/{name}.{suffix}")]
|
||||
if len(path_files):
|
||||
path_file = path_files[0]
|
||||
print(f" | Loading lora {path_file.name} with weight {mult}")
|
||||
break
|
||||
if not path_file.exists():
|
||||
if not path_file:
|
||||
print(f" ** Unable to find lora: {name}")
|
||||
return
|
||||
|
||||
@ -355,13 +503,90 @@ class KohyaLoraManager:
|
||||
lora.multiplier = mult
|
||||
self.wrapper.applied_loras[name] = lora
|
||||
|
||||
def unload_applied_lora(self, lora_name: str):
|
||||
def unload_applied_lora(self, lora_name: str) -> bool:
|
||||
"""If the indicated LoRA has previously been applied then
|
||||
unload it and return True. Return False if the LoRA was
|
||||
not previously applied (for status reporting)
|
||||
"""
|
||||
if lora_name in self.wrapper.applied_loras:
|
||||
del self.wrapper.applied_loras[lora_name]
|
||||
return True
|
||||
return False
|
||||
|
||||
def unload_lora(self, lora_name: str):
|
||||
def unload_lora(self, lora_name: str) -> bool:
|
||||
if lora_name in self.wrapper.loaded_loras:
|
||||
del self.wrapper.loaded_loras[lora_name]
|
||||
return True
|
||||
return False
|
||||
|
||||
def clear_loras(self):
|
||||
self.wrapper.clear_applied_loras()
|
||||
|
||||
def check_model_compatibility(self, checkpoint) -> bool:
|
||||
"""Checks whether the LoRA checkpoint is compatible with the token vector
|
||||
length of the model that this manager is associated with.
|
||||
"""
|
||||
model_token_vector_length = (
|
||||
self.text_encoder.get_input_embeddings().weight.data[0].shape[0]
|
||||
)
|
||||
lora_token_vector_length = self.vector_length_from_checkpoint(checkpoint)
|
||||
return model_token_vector_length == lora_token_vector_length
|
||||
|
||||
@staticmethod
|
||||
def vector_length_from_checkpoint(checkpoint: dict) -> int:
|
||||
"""Return the vector token length for the passed LoRA checkpoint object.
|
||||
This is used to determine which SD model version the LoRA was based on.
|
||||
768 -> SDv1
|
||||
1024-> SDv2
|
||||
"""
|
||||
key1 = "lora_te_text_model_encoder_layers_0_mlp_fc1.lora_down.weight"
|
||||
key2 = "lora_te_text_model_encoder_layers_0_self_attn_k_proj.hada_w1_a"
|
||||
lora_token_vector_length = (
|
||||
checkpoint[key1].shape[1]
|
||||
if key1 in checkpoint
|
||||
else checkpoint[key2].shape[0]
|
||||
if key2 in checkpoint
|
||||
else 768
|
||||
)
|
||||
return lora_token_vector_length
|
||||
|
||||
@classmethod
|
||||
def vector_length_from_checkpoint_file(self, checkpoint_path: Path) -> int:
|
||||
with LoraVectorLengthCache(self.vector_length_cache_path) as cache:
|
||||
if str(checkpoint_path) not in cache:
|
||||
if checkpoint_path.suffix == ".safetensors":
|
||||
checkpoint = load_file(
|
||||
checkpoint_path.absolute().as_posix(), device="cpu"
|
||||
)
|
||||
else:
|
||||
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
||||
cache[str(checkpoint_path)] = KohyaLoraManager.vector_length_from_checkpoint(
|
||||
checkpoint
|
||||
)
|
||||
return cache[str(checkpoint_path)]
|
||||
|
||||
class LoraVectorLengthCache(object):
|
||||
def __init__(self, cache_path: Path):
|
||||
self.cache_path = cache_path
|
||||
print(f'DEBUG: lock path = {Path(cache_path.parent, ".cachelock")}')
|
||||
self.lock = FileLock(Path(cache_path.parent, ".cachelock"))
|
||||
self.cache = {}
|
||||
|
||||
def __enter__(self):
|
||||
self.lock.acquire(timeout=10)
|
||||
try:
|
||||
if self.cache_path.exists():
|
||||
with open(self.cache_path, "r") as json_file:
|
||||
self.cache = json.load(json_file)
|
||||
except Timeout:
|
||||
print(
|
||||
"** Can't acquire lock on lora vector length cache. Operations will be slower"
|
||||
)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
self.cache_path.unlink()
|
||||
return self.cache
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
with open(self.cache_path, "w") as json_file:
|
||||
json.dump(self.cache, json_file)
|
||||
self.lock.release()
|
||||
|
@ -1,66 +1,101 @@
|
||||
import os
|
||||
from diffusers import StableDiffusionPipeline
|
||||
from pathlib import Path
|
||||
|
||||
from diffusers import UNet2DConditionModel, StableDiffusionPipeline
|
||||
from ldm.invoke.globals import global_lora_models_dir
|
||||
from .kohya_lora_manager import KohyaLoraManager
|
||||
from .kohya_lora_manager import KohyaLoraManager, IncompatibleModelException
|
||||
from typing import Optional, Dict
|
||||
|
||||
class LoraCondition:
|
||||
name: str
|
||||
weight: float
|
||||
|
||||
def __init__(self, name, weight: float = 1.0, kohya_manager: Optional[KohyaLoraManager]=None):
|
||||
def __init__(self,
|
||||
name,
|
||||
weight: float = 1.0,
|
||||
unet: UNet2DConditionModel=None, # for diffusers format LoRAs
|
||||
kohya_manager: Optional[KohyaLoraManager]=None, # for KohyaLoraManager-compatible LoRAs
|
||||
):
|
||||
self.name = name
|
||||
self.weight = weight
|
||||
self.kohya_manager = kohya_manager
|
||||
self.unet = unet
|
||||
|
||||
def __call__(self, model):
|
||||
def __call__(self):
|
||||
# TODO: make model able to load from huggingface, rather then just local files
|
||||
path = Path(global_lora_models_dir(), self.name)
|
||||
if path.is_dir():
|
||||
if model.load_attn_procs:
|
||||
if not self.unet:
|
||||
print(f" ** Unable to load diffusers-format LoRA {self.name}: unet is None")
|
||||
return
|
||||
if self.unet.load_attn_procs:
|
||||
file = Path(path, "pytorch_lora_weights.bin")
|
||||
if file.is_file():
|
||||
print(f">> Loading LoRA: {path}")
|
||||
model.load_attn_procs(path.absolute().as_posix())
|
||||
self.unet.load_attn_procs(path.absolute().as_posix())
|
||||
else:
|
||||
print(f" ** Unable to find valid LoRA at: {path}")
|
||||
else:
|
||||
print(" ** Invalid Model to load LoRA")
|
||||
elif self.kohya_manager:
|
||||
self.kohya_manager.apply_lora_model(self.name,self.weight)
|
||||
try:
|
||||
self.kohya_manager.apply_lora_model(self.name,self.weight)
|
||||
except IncompatibleModelException:
|
||||
print(f" ** LoRA {self.name} is incompatible with this model; will generate without the LoRA applied.")
|
||||
else:
|
||||
print(" ** Unable to load LoRA")
|
||||
|
||||
def unload(self):
|
||||
if self.kohya_manager:
|
||||
if self.kohya_manager and self.kohya_manager.unload_applied_lora(self.name):
|
||||
print(f'>> unloading LoRA {self.name}')
|
||||
self.kohya_manager.unload_applied_lora(self.name)
|
||||
|
||||
|
||||
class LoraManager:
|
||||
def __init__(self, pipe):
|
||||
def __init__(self, pipe: StableDiffusionPipeline):
|
||||
# Kohya class handles lora not generated through diffusers
|
||||
self.kohya = KohyaLoraManager(pipe, global_lora_models_dir())
|
||||
self.kohya = KohyaLoraManager(pipe)
|
||||
self.unet = pipe.unet
|
||||
|
||||
def set_loras_conditions(self, lora_weights: list):
|
||||
conditions = []
|
||||
if len(lora_weights) > 0:
|
||||
for lora in lora_weights:
|
||||
conditions.append(LoraCondition(lora.model, lora.weight, self.kohya))
|
||||
conditions.append(LoraCondition(lora.model, lora.weight, self.unet, self.kohya))
|
||||
|
||||
if len(conditions) > 0:
|
||||
return conditions
|
||||
|
||||
return None
|
||||
|
||||
def list_compatible_loras(self)->Dict[str, Path]:
|
||||
'''
|
||||
List all the LoRAs in the global lora directory that
|
||||
are compatible with the current model. Return a dictionary
|
||||
of the lora basename and its path.
|
||||
'''
|
||||
model_length = self.kohya.text_encoder.get_input_embeddings().weight.data[0].shape[0]
|
||||
return self.list_loras(model_length)
|
||||
|
||||
@classmethod
|
||||
def list_loras(self)->Dict[str, Path]:
|
||||
@staticmethod
|
||||
def list_loras(token_vector_length:int=None)->Dict[str, Path]:
|
||||
'''List the LoRAS in the global lora directory.
|
||||
If token_vector_length is provided, then only return
|
||||
LoRAS that have the indicated length:
|
||||
768: v1 models
|
||||
1024: v2 models
|
||||
'''
|
||||
path = Path(global_lora_models_dir())
|
||||
models_found = dict()
|
||||
for root,_,files in os.walk(path):
|
||||
for x in files:
|
||||
name = Path(x).stem
|
||||
suffix = Path(x).suffix
|
||||
if suffix in [".ckpt", ".pt", ".safetensors"]:
|
||||
models_found[name]=Path(root,x)
|
||||
if suffix not in [".ckpt", ".pt", ".safetensors"]:
|
||||
continue
|
||||
path = Path(root,x)
|
||||
if token_vector_length is None:
|
||||
models_found[name]=Path(root,x) # unconditional addition
|
||||
elif token_vector_length == KohyaLoraManager.vector_length_from_checkpoint_file(path):
|
||||
models_found[name]=Path(root,x) # conditional on the base model matching
|
||||
return models_found
|
||||
|
||||
|
@ -34,7 +34,7 @@ dependencies = [
|
||||
"clip_anytorch",
|
||||
"compel~=1.1.0",
|
||||
"datasets",
|
||||
"diffusers[torch]~=0.14",
|
||||
"diffusers[torch]~=0.15.0",
|
||||
"dnspython==2.2.1",
|
||||
"einops",
|
||||
"eventlet",
|
||||
|
Reference in New Issue
Block a user