Merge branch 'main' into refactor/nodes-on-generator

This commit is contained in:
Jonathan 2023-03-10 12:34:00 -06:00 committed by GitHub
commit 370e8281b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 30 additions and 136 deletions

View File

@ -148,7 +148,7 @@ manager, please follow these steps:
=== "CUDA (NVidia)" === "CUDA (NVidia)"
```bash ```bash
pip install InvokeAI[xformers] --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu117 pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu117
``` ```
=== "ROCm (AMD)" === "ROCm (AMD)"

View File

@ -495,18 +495,6 @@ class Generate:
torch.cuda.reset_peak_memory_stats() torch.cuda.reset_peak_memory_stats()
results = list() results = list()
init_image = None
mask_image = None
try:
if (
self.free_gpu_mem
and self.model.cond_stage_model.device != self.model.device
):
self.model.cond_stage_model.device = self.model.device
self.model.cond_stage_model.to(self.model.device)
except AttributeError:
pass
try: try:
uc, c, extra_conditioning_info = get_uc_and_c_and_ec( uc, c, extra_conditioning_info = get_uc_and_c_and_ec(

View File

@ -1274,7 +1274,7 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
tokenizer=tokenizer, tokenizer=tokenizer,
unet=unet.to(precision), unet=unet.to(precision),
scheduler=scheduler, scheduler=scheduler,
safety_checker=safety_checker.to(precision), safety_checker=None if return_generator_pipeline else safety_checker.to(precision),
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
) )
else: else:

View File

@ -108,7 +108,7 @@ class ModelManager(object):
if model_name in self.models: if model_name in self.models:
requested_model = self.models[model_name]["model"] requested_model = self.models[model_name]["model"]
print(f">> Retrieving model {model_name} from system RAM cache") print(f">> Retrieving model {model_name} from system RAM cache")
self.models[model_name]["model"] = self._model_from_cpu(requested_model) requested_model.ready()
width = self.models[model_name]["width"] width = self.models[model_name]["width"]
height = self.models[model_name]["height"] height = self.models[model_name]["height"]
hash = self.models[model_name]["hash"] hash = self.models[model_name]["hash"]
@ -503,7 +503,7 @@ class ModelManager(object):
print(f">> Offloading {model_name} to CPU") print(f">> Offloading {model_name} to CPU")
model = self.models[model_name]["model"] model = self.models[model_name]["model"]
self.models[model_name]["model"] = self._model_to_cpu(model) model.offload_all()
gc.collect() gc.collect()
if self._has_cuda(): if self._has_cuda():
@ -1048,43 +1048,6 @@ class ModelManager(object):
self.stack.remove(model_name) self.stack.remove(model_name)
self.models.pop(model_name, None) self.models.pop(model_name, None)
def _model_to_cpu(self, model):
if self.device == CPU_DEVICE:
return model
if isinstance(model, StableDiffusionGeneratorPipeline):
model.offload_all()
return model
model.cond_stage_model.device = CPU_DEVICE
model.to(CPU_DEVICE)
for submodel in ("first_stage_model", "cond_stage_model", "model"):
try:
getattr(model, submodel).to(CPU_DEVICE)
except AttributeError:
pass
return model
def _model_from_cpu(self, model):
if self.device == CPU_DEVICE:
return model
if isinstance(model, StableDiffusionGeneratorPipeline):
model.ready()
return model
model.to(self.device)
model.cond_stage_model.device = self.device
for submodel in ("first_stage_model", "cond_stage_model", "model"):
try:
getattr(model, submodel).to(self.device)
except AttributeError:
pass
return model
def _pop_oldest_model(self): def _pop_oldest_model(self):
""" """
Remove the first element of the FIFO, which ought Remove the first element of the FIFO, which ought

View File

@ -3,7 +3,6 @@ Initialization file for invokeai.backend.prompting
""" """
from .conditioning import ( from .conditioning import (
get_prompt_structure, get_prompt_structure,
get_tokenizer,
get_tokens_for_prompt_object, get_tokens_for_prompt_object,
get_uc_and_c_and_ec, get_uc_and_c_and_ec,
split_weighted_subprompts, split_weighted_subprompts,

View File

@ -7,7 +7,7 @@ get_uc_and_c_and_ec() get the conditioned and unconditioned latent, an
""" """
import re import re
from typing import Any, Optional, Union from typing import Optional, Union
from compel import Compel from compel import Compel
from compel.prompt_parser import ( from compel.prompt_parser import (
@ -17,7 +17,6 @@ from compel.prompt_parser import (
Fragment, Fragment,
PromptParser, PromptParser,
) )
from transformers import CLIPTextModel, CLIPTokenizer
from invokeai.backend.globals import Globals from invokeai.backend.globals import Globals
@ -25,36 +24,6 @@ from ..stable_diffusion import InvokeAIDiffuserComponent
from ..util import torch_dtype from ..util import torch_dtype
def get_tokenizer(model) -> CLIPTokenizer:
# TODO remove legacy ckpt fallback handling
return (
getattr(model, "tokenizer", None) # diffusers
or model.cond_stage_model.tokenizer
) # ldm
def get_text_encoder(model) -> Any:
# TODO remove legacy ckpt fallback handling
return getattr(
model, "text_encoder", None
) or UnsqueezingLDMTransformer( # diffusers
model.cond_stage_model.transformer
) # ldm
class UnsqueezingLDMTransformer:
def __init__(self, ldm_transformer):
self.ldm_transformer = ldm_transformer
@property
def device(self):
return self.ldm_transformer.device
def __call__(self, *args, **kwargs):
insufficiently_unsqueezed_tensor = self.ldm_transformer(*args, **kwargs)
return insufficiently_unsqueezed_tensor.unsqueeze(0)
def get_uc_and_c_and_ec( def get_uc_and_c_and_ec(
prompt_string, model, log_tokens=False, skip_normalize_legacy_blend=False prompt_string, model, log_tokens=False, skip_normalize_legacy_blend=False
): ):
@ -64,13 +33,13 @@ def get_uc_and_c_and_ec(
prompt_string prompt_string
) )
tokenizer = get_tokenizer(model) tokenizer = model.tokenizer
text_encoder = get_text_encoder(model)
compel = Compel( compel = Compel(
tokenizer=tokenizer, tokenizer=tokenizer,
text_encoder=text_encoder, text_encoder=model.text_encoder,
textual_inversion_manager=model.textual_inversion_manager, textual_inversion_manager=model.textual_inversion_manager,
dtype_for_device_getter=torch_dtype, dtype_for_device_getter=torch_dtype,
truncate_long_prompts=False
) )
# get rid of any newline characters # get rid of any newline characters
@ -82,12 +51,12 @@ def get_uc_and_c_and_ec(
legacy_blend = try_parse_legacy_blend( legacy_blend = try_parse_legacy_blend(
positive_prompt_string, skip_normalize_legacy_blend positive_prompt_string, skip_normalize_legacy_blend
) )
positive_prompt: FlattenedPrompt | Blend positive_prompt: Union[FlattenedPrompt, Blend]
if legacy_blend is not None: if legacy_blend is not None:
positive_prompt = legacy_blend positive_prompt = legacy_blend
else: else:
positive_prompt = Compel.parse_prompt_string(positive_prompt_string) positive_prompt = Compel.parse_prompt_string(positive_prompt_string)
negative_prompt: FlattenedPrompt | Blend = Compel.parse_prompt_string( negative_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(
negative_prompt_string negative_prompt_string
) )
@ -96,6 +65,7 @@ def get_uc_and_c_and_ec(
c, options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt) c, options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt)
uc, _ = compel.build_conditioning_tensor_for_prompt_object(negative_prompt) uc, _ = compel.build_conditioning_tensor_for_prompt_object(negative_prompt)
[c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
tokens_count = get_max_token_count(tokenizer, positive_prompt) tokens_count = get_max_token_count(tokenizer, positive_prompt)
@ -116,12 +86,12 @@ def get_prompt_structure(
legacy_blend = try_parse_legacy_blend( legacy_blend = try_parse_legacy_blend(
positive_prompt_string, skip_normalize_legacy_blend positive_prompt_string, skip_normalize_legacy_blend
) )
positive_prompt: FlattenedPrompt | Blend positive_prompt: Union[FlattenedPrompt, Blend]
if legacy_blend is not None: if legacy_blend is not None:
positive_prompt = legacy_blend positive_prompt = legacy_blend
else: else:
positive_prompt = Compel.parse_prompt_string(positive_prompt_string) positive_prompt = Compel.parse_prompt_string(positive_prompt_string)
negative_prompt: FlattenedPrompt | Blend = Compel.parse_prompt_string( negative_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(
negative_prompt_string negative_prompt_string
) )
@ -129,7 +99,7 @@ def get_prompt_structure(
def get_max_token_count( def get_max_token_count(
tokenizer, prompt: Union[FlattenedPrompt, Blend], truncate_if_too_long=True tokenizer, prompt: Union[FlattenedPrompt, Blend], truncate_if_too_long=False
) -> int: ) -> int:
if type(prompt) is Blend: if type(prompt) is Blend:
blend: Blend = prompt blend: Blend = prompt
@ -245,7 +215,7 @@ def log_tokenization_for_prompt_object(
) )
def log_tokenization_for_text(text, tokenizer, display_label=None): def log_tokenization_for_text(text, tokenizer, display_label=None, truncate_if_too_long=False):
"""shows how the prompt is tokenized """shows how the prompt is tokenized
# usually tokens have '</w>' to indicate end-of-word, # usually tokens have '</w>' to indicate end-of-word,
# but for readability it has been replaced with ' ' # but for readability it has been replaced with ' '
@ -260,11 +230,11 @@ def log_tokenization_for_text(text, tokenizer, display_label=None):
token = tokens[i].replace("</w>", " ") token = tokens[i].replace("</w>", " ")
# alternate color # alternate color
s = (usedTokens % 6) + 1 s = (usedTokens % 6) + 1
if i < tokenizer.model_max_length: if truncate_if_too_long and i >= tokenizer.model_max_length:
discarded = discarded + f"\x1b[0;3{s};40m{token}"
else:
tokenized = tokenized + f"\x1b[0;3{s};40m{token}" tokenized = tokenized + f"\x1b[0;3{s};40m{token}"
usedTokens += 1 usedTokens += 1
else: # over max token length
discarded = discarded + f"\x1b[0;3{s};40m{token}"
if usedTokens > 0: if usedTokens > 0:
print(f'\n>> [TOKENLOG] Tokens {display_label or ""} ({usedTokens}):') print(f'\n>> [TOKENLOG] Tokens {display_label or ""} ({usedTokens}):')

View File

@ -54,16 +54,6 @@ class PipelineIntermediateState:
attention_map_saver: Optional[AttentionMapSaver] = None attention_map_saver: Optional[AttentionMapSaver] = None
# copied from configs/stable-diffusion/v1-inference.yaml
_default_personalization_config_params = dict(
placeholder_strings=["*"],
initializer_wods=["sculpture"],
per_image_tokens=False,
num_vectors_per_token=1,
progressive_words=False,
)
@dataclass @dataclass
class AddsMaskLatents: class AddsMaskLatents:
"""Add the channels required for inpainting model input. """Add the channels required for inpainting model input.
@ -175,7 +165,7 @@ def image_resized_to_grid_as_tensor(
:param normalize: scale the range to [-1, 1] instead of [0, 1] :param normalize: scale the range to [-1, 1] instead of [0, 1]
:param multiple_of: resize the input so both dimensions are a multiple of this :param multiple_of: resize the input so both dimensions are a multiple of this
""" """
w, h = trim_to_multiple_of(*image.size) w, h = trim_to_multiple_of(*image.size, multiple_of=multiple_of)
transformation = T.Compose( transformation = T.Compose(
[ [
T.Resize((h, w), T.InterpolationMode.LANCZOS), T.Resize((h, w), T.InterpolationMode.LANCZOS),
@ -290,10 +280,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
[CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
scheduler ([`SchedulerMixin`]): scheduler ([`SchedulerMixin`]):
A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
safety_checker ([`StableDiffusionSafetyChecker`]): safety_checker ([`StableDiffusionSafetyChecker`]):
Classification module that estimates whether generated images could be considered offsensive or harmful. Classification module that estimates whether generated images could be considered offensive or harmful.
Please, refer to the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4) for details. Please, refer to the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4) for details.
feature_extractor ([`CLIPFeatureExtractor`]): feature_extractor ([`CLIPFeatureExtractor`]):
Model that extracts features from generated images to be used as inputs for the `safety_checker`. Model that extracts features from generated images to be used as inputs for the `safety_checker`.
@ -436,11 +426,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
""" """
Ready this pipeline's models. Ready this pipeline's models.
i.e. pre-load them to the GPU if appropriate. i.e. preload them to the GPU if appropriate.
""" """
self._model_group.ready() self._model_group.ready()
def to(self, torch_device: Optional[Union[str, torch.device]] = None): def to(self, torch_device: Optional[Union[str, torch.device]] = None, silence_dtype_warnings=False):
# overridden method; types match the superclass. # overridden method; types match the superclass.
if torch_device is None: if torch_device is None:
return self return self
@ -917,20 +907,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
device=self._model_group.device_for(self.unet), device=self._model_group.device_for(self.unet),
) )
@property
def cond_stage_model(self):
return self.embeddings_provider
@torch.inference_mode()
def _tokenize(self, prompt: Union[str, List[str]]):
return self.tokenizer(
prompt,
padding="max_length",
max_length=self.tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
@property @property
def channels(self) -> int: def channels(self) -> int:
"""Compatible with DiffusionWrapper""" """Compatible with DiffusionWrapper"""
@ -942,11 +918,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
return super().decode_latents(latents) return super().decode_latents(latents)
def debug_latents(self, latents, msg): def debug_latents(self, latents, msg):
from invokeai.backend.image_util import debug_image
with torch.inference_mode(): with torch.inference_mode():
from ldm.util import debug_image
decoded = self.numpy_to_pil(self.decode_latents(latents)) decoded = self.numpy_to_pil(self.decode_latents(latents))
for i, img in enumerate(decoded): for i, img in enumerate(decoded):
debug_image( debug_image(
img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True
) )

View File

@ -29,7 +29,6 @@ from ..image_util import PngWriter, retrieve_metadata
from ...frontend.merge.merge_diffusers import merge_diffusion_models from ...frontend.merge.merge_diffusers import merge_diffusion_models
from ..prompting import ( from ..prompting import (
get_prompt_structure, get_prompt_structure,
get_tokenizer,
get_tokens_for_prompt_object, get_tokens_for_prompt_object,
) )
from ..stable_diffusion import PipelineIntermediateState from ..stable_diffusion import PipelineIntermediateState
@ -1274,7 +1273,7 @@ class InvokeAIWebServer:
None None
if type(parsed_prompt) is Blend if type(parsed_prompt) is Blend
else get_tokens_for_prompt_object( else get_tokens_for_prompt_object(
get_tokenizer(self.generate.model), parsed_prompt self.generate.model.tokenizer, parsed_prompt
) )
) )
attention_maps_image_base64_url = ( attention_maps_image_base64_url = (

View File

@ -38,7 +38,7 @@ dependencies = [
"albumentations", "albumentations",
"click", "click",
"clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
"compel==0.1.7", "compel==0.1.10",
"datasets", "datasets",
"diffusers[torch]~=0.14", "diffusers[torch]~=0.14",
"dnspython==2.2.1", "dnspython==2.2.1",