From 178f0c78d888cb762bf6eb2a87f350621c1528e2 Mon Sep 17 00:00:00 2001 From: damian0815 Date: Tue, 8 Nov 2022 12:59:34 +0100 Subject: [PATCH 01/30] Fix #1362 by improving VRAM usage patterns when doing .swap() commit ef3f7a26e242b73c2beb0195c7fd8f654ef47f55 Author: damian0815 Date: Tue Nov 8 12:18:37 2022 +0100 remove log spam commit 7189d649622d4668b120b0dd278388ad672142c4 Author: damian0815 Date: Tue Nov 8 12:10:28 2022 +0100 change the way saved slicing strategy is applied commit 01c40f751ab72955140165c16f95ae411732265b Author: damian0815 Date: Tue Nov 8 12:04:43 2022 +0100 fix slicing_strategy_getter callsite commit f8cfe25150a346958903316bc710737d99839923 Author: damian0815 Date: Tue Nov 8 11:56:22 2022 +0100 cleanup, consistent dim=0 also tested commit 5bf9b1e890d48e962afd4a668a219b68271e5dc1 Author: damian0815 Date: Tue Nov 8 11:34:09 2022 +0100 refactored context, tested with non-sliced cross attention control commit d58a46e39bf562e7459290d2444256e8c08ad0b6 Author: damian0815 Date: Sun Nov 6 00:41:52 2022 +0100 cleanup commit 7e2c658b4c06fe239311b65b9bb16fa3adec7fd7 Author: damian0815 Date: Sat Nov 5 22:57:31 2022 +0100 disable logs commit 20ee89d93841b070738b3d8a4385c93b097d92eb Author: damian0815 Date: Sat Nov 5 22:36:58 2022 +0100 slice saved attention if necessary commit 0a7684a22c880ec0f48cc22bfed4526358f71546 Author: damian0815 Date: Sat Nov 5 22:32:38 2022 +0100 raise instead of asserting commit 7083104c7f3a0d8fd96e94a2f391de50a3c942e4 Author: damian0815 Date: Sat Nov 5 22:31:00 2022 +0100 store dim when saving slices commit f7c0808ed383ec1dc70645288a798ed2aa4fa85c Author: damian0815 Date: Sat Nov 5 22:27:16 2022 +0100 don't retry on exception commit 749a721e939b3fe7c1741e7998dab6bd2c85a0cb Author: damian0815 Date: Sat Nov 5 22:24:50 2022 +0100 stuff commit 032ab90e9533be8726301ec91b97137e2aadef9a Author: damian0815 Date: Sat Nov 5 22:20:17 2022 +0100 more logging commit 3dc34b387f033482305360e605809d95a40bf6f8 Author: damian0815 Date: Sat Nov 5 22:16:47 2022 +0100 logs commit 901c4c1aa4b9bcef695a6551867ec8149e6e6a93 Author: damian0815 Date: Sat Nov 5 22:12:39 2022 +0100 actually set save_slicing_strategy to True commit f780e0a0a7c6b6a3db320891064da82589358c8a Author: damian0815 Date: Sat Nov 5 22:10:35 2022 +0100 store slicing strategy commit 93bb6d566fd18c5c69ef7dacc8f74ba2cf671cb7 Author: damian Date: Sat Nov 5 20:43:48 2022 +0100 still not it commit 5e3a9541f8ae00bde524046963910323e20c40b7 Author: damian Date: Sat Nov 5 17:20:02 2022 +0100 wip offloading attention slices on-demand commit 4c2966aa856b6f3b446216da3619ae931552ef08 Author: damian0815 Date: Sat Nov 5 15:47:40 2022 +0100 pre-emptive offloading, idk if it works commit 572576755e9f0a878d38e8173e485126c0efbefb Author: root Date: Sat Nov 5 11:25:32 2022 +0000 push attention slices to cpu. slow but saves memory. commit b57c83a68f2ac03976ebc89ce2ff03812d6d185f Author: damian0815 Date: Sat Nov 5 12:04:22 2022 +0100 verbose logging commit 3a5dae116f110a96585d9eb71d713b5ed2bc3d2b Author: damian0815 Date: Sat Nov 5 11:50:48 2022 +0100 wip fixing mem strategy crash (4 test on runpod) commit 3cf237db5fae0c7b0b4cc3c47c81830bdb2ae7de Author: damian0815 Date: Fri Nov 4 09:02:40 2022 +0100 wip, only works on cuda --- .../diffusion/cross_attention_control.py | 269 +++++++++++------- .../diffusion/shared_invokeai_diffusion.py | 41 ++- ldm/modules/attention.py | 67 +++-- 3 files changed, 234 insertions(+), 143 deletions(-) diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index 9c8c597869..ff90a24856 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -1,10 +1,13 @@ -from enum import Enum +import enum +from typing import Optional import torch # adapted from bloc97's CrossAttentionControl colab # https://github.com/bloc97/CrossAttentionControl + + class CrossAttentionControl: class Arguments: @@ -27,7 +30,14 @@ class CrossAttentionControl: print('warning: cross-attention control options are not working properly for >1 edit') self.edit_options = non_none_edit_options[0] + class Context: + + class Action(enum.Enum): + NONE = 0 + SAVE = 1, + APPLY = 2 + def __init__(self, arguments: 'CrossAttentionControl.Arguments', step_count: int): """ :param arguments: Arguments for the cross-attention control process @@ -36,14 +46,124 @@ class CrossAttentionControl: self.arguments = arguments self.step_count = step_count + self.self_cross_attention_module_identifiers = [] + self.tokens_cross_attention_module_identifiers = [] + + self.saved_cross_attention_maps = {} + + self.clear_requests(cleanup=True) + + def register_cross_attention_modules(self, model): + for name,module in CrossAttentionControl.get_attention_modules(model, + CrossAttentionControl.CrossAttentionType.SELF): + self.self_cross_attention_module_identifiers.append(name) + for name,module in CrossAttentionControl.get_attention_modules(model, + CrossAttentionControl.CrossAttentionType.TOKENS): + self.tokens_cross_attention_module_identifiers.append(name) + + def request_save_attention_maps(self, cross_attention_type: 'CrossAttentionControl.CrossAttentionType'): + if cross_attention_type == CrossAttentionControl.CrossAttentionType.SELF: + self.self_cross_attention_action = CrossAttentionControl.Context.Action.SAVE + else: + self.tokens_cross_attention_action = CrossAttentionControl.Context.Action.SAVE + + def request_apply_saved_attention_maps(self, cross_attention_type: 'CrossAttentionControl.CrossAttentionType'): + if cross_attention_type == CrossAttentionControl.CrossAttentionType.SELF: + self.self_cross_attention_action = CrossAttentionControl.Context.Action.APPLY + else: + self.tokens_cross_attention_action = CrossAttentionControl.Context.Action.APPLY + + def is_tokens_cross_attention(self, module_identifier) -> bool: + return module_identifier in self.tokens_cross_attention_module_identifiers + + def get_should_save_maps(self, module_identifier: str) -> bool: + if module_identifier in self.self_cross_attention_module_identifiers: + return self.self_cross_attention_action == CrossAttentionControl.Context.Action.SAVE + elif module_identifier in self.tokens_cross_attention_module_identifiers: + return self.tokens_cross_attention_action == CrossAttentionControl.Context.Action.SAVE + return False + + def get_should_apply_saved_maps(self, module_identifier: str) -> bool: + if module_identifier in self.self_cross_attention_module_identifiers: + return self.self_cross_attention_action == CrossAttentionControl.Context.Action.APPLY + elif module_identifier in self.tokens_cross_attention_module_identifiers: + return self.tokens_cross_attention_action == CrossAttentionControl.Context.Action.APPLY + return False + + def get_active_cross_attention_control_types_for_step(self, percent_through:float=None)\ + -> list['CrossAttentionControl.CrossAttentionType']: + """ + Should cross-attention control be applied on the given step? + :param percent_through: How far through the step sequence are we (0.0=pure noise, 1.0=completely denoised image). Expected range 0.0..<1.0. + :return: A list of attention types that cross-attention control should be performed for on the given step. May be []. + """ + if percent_through is None: + return [CrossAttentionControl.CrossAttentionType.SELF, CrossAttentionControl.CrossAttentionType.TOKENS] + + opts = self.arguments.edit_options + to_control = [] + if opts['s_start'] <= percent_through and percent_through < opts['s_end']: + to_control.append(CrossAttentionControl.CrossAttentionType.SELF) + if opts['t_start'] <= percent_through and percent_through < opts['t_end']: + to_control.append(CrossAttentionControl.CrossAttentionType.TOKENS) + return to_control + + def save_slice(self, identifier: str, slice: torch.Tensor, dim: Optional[int], offset: int, + slice_size: Optional[int]): + if identifier not in self.saved_cross_attention_maps: + self.saved_cross_attention_maps[identifier] = { + 'dim': dim, + 'slice_size': slice_size, + 'slices': {offset or 0: slice} + } + else: + self.saved_cross_attention_maps[identifier]['slices'][offset or 0] = slice + + def get_slice(self, identifier: str, requested_dim: Optional[int], requested_offset: int, slice_size: int): + saved_attention_dict = self.saved_cross_attention_maps[identifier] + if requested_dim is None: + if saved_attention_dict['dim'] is not None: + raise RuntimeError(f"dim mismatch: expected dim=None, have {saved_attention_dict['dim']}") + return saved_attention_dict['slices'][0] + + if saved_attention_dict['dim'] == requested_dim: + if slice_size != saved_attention_dict['slice_size']: + raise RuntimeError( + f"slice_size mismatch: expected slice_size={slice_size}, have {saved_attention_dict['slice_size']}") + return saved_attention_dict['slices'][requested_offset] + + if saved_attention_dict['dim'] == None: + whole_saved_attention = saved_attention_dict['slices'][0] + if requested_dim == 0: + return whole_saved_attention[requested_offset:requested_offset + slice_size] + elif requested_dim == 1: + return whole_saved_attention[:, requested_offset:requested_offset + slice_size] + + raise RuntimeError(f"Cannot convert dim {saved_attention_dict['dim']} to requested dim {requested_dim}") + + def get_slicing_strategy(self, identifier: str) -> Optional[tuple[int, int]]: + saved_attention = self.saved_cross_attention_maps.get(identifier, None) + if saved_attention is None: + return None, None + return saved_attention['dim'], saved_attention['slice_size'] + + def clear_requests(self, cleanup=True): + self.tokens_cross_attention_action = CrossAttentionControl.Context.Action.NONE + self.self_cross_attention_action = CrossAttentionControl.Context.Action.NONE + if cleanup: + self.saved_cross_attention_maps = {} + + def offload_saved_attention_slices_to_cpu(self): + for key, map_dict in self.saved_cross_attention_maps.items(): + for offset, slice in map_dict['slices'].items(): + map_dict[offset] = slice.to('cpu') + @classmethod def remove_cross_attention_control(cls, model): cls.remove_attention_function(model) @classmethod - def setup_cross_attention_control(cls, model, - cross_attention_control_args: Arguments - ): + def setup_cross_attention_control(cls, model, context: Context): """ Inject attention parameters and functions into the passed in model to enable cross attention editing. @@ -53,7 +173,7 @@ class CrossAttentionControl: """ # adapted from init_attention_edit - device = cross_attention_control_args.edited_conditioning.device + device = context.arguments.edited_conditioning.device # urgh. should this be hardcoded? max_length = 77 @@ -61,141 +181,82 @@ class CrossAttentionControl: mask = torch.zeros(max_length) indices_target = torch.arange(max_length, dtype=torch.long) indices = torch.zeros(max_length, dtype=torch.long) - for name, a0, a1, b0, b1 in cross_attention_control_args.edit_opcodes: + for name, a0, a1, b0, b1 in context.arguments.edit_opcodes: if b0 < max_length: if name == "equal":# or (name == "replace" and a1 - a0 == b1 - b0): # these tokens have not been edited indices[b0:b1] = indices_target[a0:a1] mask[b0:b1] = 1 - cls.inject_attention_function(model) - - for m in cls.get_attention_modules(model, cls.CrossAttentionType.SELF): - m.last_attn_slice_mask = None - m.last_attn_slice_indices = None - - for m in cls.get_attention_modules(model, cls.CrossAttentionType.TOKENS): - m.last_attn_slice_mask = mask.to(device) - m.last_attn_slice_indices = indices.to(device) + context.register_cross_attention_modules(model) + context.cross_attention_mask = mask.to(device) + context.cross_attention_index_map = indices.to(device) + cls.inject_attention_function(model, context) - class CrossAttentionType(Enum): + class CrossAttentionType(enum.Enum): SELF = 1 TOKENS = 2 - @classmethod - def get_active_cross_attention_control_types_for_step(cls, context: 'CrossAttentionControl.Context', percent_through:float=None)\ - -> list['CrossAttentionControl.CrossAttentionType']: - """ - Should cross-attention control be applied on the given step? - :param percent_through: How far through the step sequence are we (0.0=pure noise, 1.0=completely denoised image). Expected range 0.0..<1.0. - :return: A list of attention types that cross-attention control should be performed for on the given step. May be []. - """ - if percent_through is None: - return [cls.CrossAttentionType.SELF, cls.CrossAttentionType.TOKENS] - - opts = context.arguments.edit_options - to_control = [] - if opts['s_start'] <= percent_through and percent_through < opts['s_end']: - to_control.append(cls.CrossAttentionType.SELF) - if opts['t_start'] <= percent_through and percent_through < opts['t_end']: - to_control.append(cls.CrossAttentionType.TOKENS) - return to_control - - @classmethod def get_attention_modules(cls, model, which: CrossAttentionType): which_attn = "attn1" if which is cls.CrossAttentionType.SELF else "attn2" - return [module for name, module in model.named_modules() if + return [(name,module) for name, module in model.named_modules() if type(module).__name__ == "CrossAttention" and which_attn in name] - @classmethod - def clear_requests(cls, model, clear_attn_slice=True): - self_attention_modules = cls.get_attention_modules(model, cls.CrossAttentionType.SELF) - tokens_attention_modules = cls.get_attention_modules(model, cls.CrossAttentionType.TOKENS) - for m in self_attention_modules+tokens_attention_modules: - m.save_last_attn_slice = False - m.use_last_attn_slice = False - if clear_attn_slice: - m.last_attn_slice = None @classmethod - def request_save_attention_maps(cls, model, cross_attention_type: CrossAttentionType): - modules = cls.get_attention_modules(model, cross_attention_type) - for m in modules: - # clear out the saved slice in case the outermost dim changes - m.last_attn_slice = None - m.save_last_attn_slice = True - - @classmethod - def request_apply_saved_attention_maps(cls, model, cross_attention_type: CrossAttentionType): - modules = cls.get_attention_modules(model, cross_attention_type) - for m in modules: - m.use_last_attn_slice = True - - - - @classmethod - def inject_attention_function(cls, unet): + def inject_attention_function(cls, unet, context: 'CrossAttentionControl.Context'): # ORIGINAL SOURCE CODE: https://github.com/huggingface/diffusers/blob/91ddd2a25b848df0fa1262d4f1cd98c7ccb87750/src/diffusers/models/attention.py#L276 - def attention_slice_wrangler(self, attention_scores, suggested_attention_slice, dim, offset, slice_size): + def attention_slice_wrangler(module, suggested_attention_slice:torch.Tensor, dim, offset, slice_size): - #print("in wrangler with suggested_attention_slice shape", suggested_attention_slice.shape, "dim", dim) + #memory_usage = suggested_attention_slice.element_size() * suggested_attention_slice.nelement() - attn_slice = suggested_attention_slice - if dim is not None: - start = offset - end = start+slice_size - #print(f"in wrangler, sliced dim {dim} {start}-{end}, use_last_attn_slice is {self.use_last_attn_slice}, save_last_attn_slice is {self.save_last_attn_slice}") - #else: - # print(f"in wrangler, whole, use_last_attn_slice is {self.use_last_attn_slice}, save_last_attn_slice is {self.save_last_attn_slice}") + attention_slice = suggested_attention_slice - if self.use_last_attn_slice: - if dim is None: - last_attn_slice = self.last_attn_slice - # print("took whole slice of shape", attn_slice.shape, "from complete shape", self.last_attn_slice.shape) + if context.get_should_save_maps(module.identifier): + #print(module.identifier, "saving suggested_attention_slice of shape", + # suggested_attention_slice.shape, "dim", dim, "offset", offset) + slice_to_save = attention_slice.to('cpu') if dim is not None else attention_slice + context.save_slice(module.identifier, slice_to_save, dim=dim, offset=offset, slice_size=slice_size) + elif context.get_should_apply_saved_maps(module.identifier): + #print(module.identifier, "applying saved attention slice for dim", dim, "offset", offset) + saved_attention_slice = context.get_slice(module.identifier, dim, offset, slice_size) + + # slice may have been offloaded to CPU + saved_attention_slice = saved_attention_slice.to(suggested_attention_slice.device) + + if context.is_tokens_cross_attention(module.identifier): + index_map = context.cross_attention_index_map + remapped_saved_attention_slice = torch.index_select(saved_attention_slice, -1, index_map) + this_attention_slice = suggested_attention_slice + + mask = context.cross_attention_mask + saved_mask = mask + this_mask = 1 - mask + attention_slice = remapped_saved_attention_slice * saved_mask + \ + this_attention_slice * this_mask else: - last_attn_slice = self.last_attn_slice[offset] - - if self.last_attn_slice_mask is None: # just use everything - attn_slice = last_attn_slice - else: - last_attn_slice_mask = self.last_attn_slice_mask - remapped_last_attn_slice = torch.index_select(last_attn_slice, -1, self.last_attn_slice_indices) + attention_slice = saved_attention_slice - this_attn_slice = attn_slice - this_attn_slice_mask = 1 - last_attn_slice_mask - attn_slice = this_attn_slice * this_attn_slice_mask + \ - remapped_last_attn_slice * last_attn_slice_mask - - if self.save_last_attn_slice: - if dim is None: - self.last_attn_slice = attn_slice - else: - if self.last_attn_slice is None: - self.last_attn_slice = { offset: attn_slice } - else: - self.last_attn_slice[offset] = attn_slice - - return attn_slice + return attention_slice for name, module in unet.named_modules(): module_name = type(module).__name__ if module_name == "CrossAttention": - module.last_attn_slice = None - module.last_attn_slice_indices = None - module.last_attn_slice_mask = None - module.use_last_attn_weights = False - module.use_last_attn_slice = False - module.save_last_attn_slice = False + module.identifier = name module.set_attention_slice_wrangler(attention_slice_wrangler) + module.set_slicing_strategy_getter(lambda module, module_identifier=name: \ + context.get_slicing_strategy(module_identifier)) @classmethod def remove_attention_function(cls, unet): + # clear wrangler callback for name, module in unet.named_modules(): module_name = type(module).__name__ if module_name == "CrossAttention": module.set_attention_slice_wrangler(None) + module.set_slicing_strategy_getter(None) diff --git a/ldm/models/diffusion/shared_invokeai_diffusion.py b/ldm/models/diffusion/shared_invokeai_diffusion.py index 5a9cc3eb74..1b181ba388 100644 --- a/ldm/models/diffusion/shared_invokeai_diffusion.py +++ b/ldm/models/diffusion/shared_invokeai_diffusion.py @@ -1,9 +1,11 @@ +import traceback from math import ceil from typing import Callable, Optional, Union import torch from ldm.models.diffusion.cross_attention_control import CrossAttentionControl +from ldm.modules.attention import get_mem_free_total class InvokeAIDiffuserComponent: @@ -34,7 +36,7 @@ class InvokeAIDiffuserComponent: """ self.model = model self.model_forward_callback = model_forward_callback - + self.cross_attention_control_context = None def setup_cross_attention_control(self, conditioning: ExtraConditioningInfo, step_count: int): self.conditioning = conditioning @@ -42,11 +44,7 @@ class InvokeAIDiffuserComponent: arguments=self.conditioning.cross_attention_control_args, step_count=step_count ) - CrossAttentionControl.setup_cross_attention_control(self.model, - cross_attention_control_args=self.conditioning.cross_attention_control_args - ) - #todo: refactor edited_conditioning, edit_opcodes, edit_options into a struct - #todo: apply edit_options using step_count + CrossAttentionControl.setup_cross_attention_control(self.model, self.cross_attention_control_context) def remove_cross_attention_control(self): self.conditioning = None @@ -54,6 +52,7 @@ class InvokeAIDiffuserComponent: CrossAttentionControl.remove_cross_attention_control(self.model) + def do_diffusion_step(self, x: torch.Tensor, sigma: torch.Tensor, unconditioning: Union[torch.Tensor,dict], conditioning: Union[torch.Tensor,dict], @@ -70,12 +69,12 @@ class InvokeAIDiffuserComponent: :return: the new latents after applying the model to x using unscaled unconditioning and CFG-scaled conditioning. """ - CrossAttentionControl.clear_requests(self.model) cross_attention_control_types_to_do = [] + context: CrossAttentionControl.Context = self.cross_attention_control_context if self.cross_attention_control_context is not None: percent_through = self.estimate_percent_through(step_index, sigma) - cross_attention_control_types_to_do = CrossAttentionControl.get_active_cross_attention_control_types_for_step(self.cross_attention_control_context, percent_through) + cross_attention_control_types_to_do = context.get_active_cross_attention_control_types_for_step(percent_through) wants_cross_attention_control = (len(cross_attention_control_types_to_do) > 0) wants_hybrid_conditioning = isinstance(conditioning, dict) @@ -124,7 +123,7 @@ class InvokeAIDiffuserComponent: return unconditioned_next_x, conditioned_next_x - def apply_cross_attention_controlled_conditioning(self, x, sigma, unconditioning, conditioning, cross_attention_control_types_to_do): + def apply_cross_attention_controlled_conditioning(self, x:torch.Tensor, sigma, unconditioning, conditioning, cross_attention_control_types_to_do): # print('pct', percent_through, ': doing cross attention control on', cross_attention_control_types_to_do) # slower non-batched path (20% slower on mac MPS) # We are only interested in using attention maps for conditioned_next_x, but batching them with generation of @@ -134,31 +133,29 @@ class InvokeAIDiffuserComponent: # representing batched uncond + cond, but then when it comes to applying the saved attention, the # wrangler gets an attention tensor which only has shape[0]=8, representing just self.edited_conditionings.) # todo: give CrossAttentionControl's `wrangler` function more info so it can work with a batched call as well. + context:CrossAttentionControl.Context = self.cross_attention_control_context try: unconditioned_next_x = self.model_forward_callback(x, sigma, unconditioning) # process x using the original prompt, saving the attention maps - for type in cross_attention_control_types_to_do: - CrossAttentionControl.request_save_attention_maps(self.model, type) + #print("saving attention maps for", cross_attention_control_types_to_do) + for ca_type in cross_attention_control_types_to_do: + context.request_save_attention_maps(ca_type) _ = self.model_forward_callback(x, sigma, conditioning) - CrossAttentionControl.clear_requests(self.model, clear_attn_slice=False) + context.clear_requests(cleanup=False) # process x again, using the saved attention maps to control where self.edited_conditioning will be applied - for type in cross_attention_control_types_to_do: - CrossAttentionControl.request_apply_saved_attention_maps(self.model, type) + #print("applying saved attention maps for", cross_attention_control_types_to_do) + for ca_type in cross_attention_control_types_to_do: + context.request_apply_saved_attention_maps(ca_type) edited_conditioning = self.conditioning.cross_attention_control_args.edited_conditioning conditioned_next_x = self.model_forward_callback(x, sigma, edited_conditioning) - CrossAttentionControl.clear_requests(self.model) + finally: + context.clear_requests(cleanup=True) - return unconditioned_next_x, conditioned_next_x - - except RuntimeError: - # make sure we clean out the attention slices we're storing on the model - # TODO don't store things on the model - CrossAttentionControl.clear_requests(self.model) - raise + return unconditioned_next_x, conditioned_next_x def estimate_percent_through(self, step_index, sigma): if step_index is not None and self.cross_attention_control_context is not None: diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 4c36fa8a6c..05f6183029 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -1,6 +1,6 @@ from inspect import isfunction import math -from typing import Callable +from typing import Callable, Optional import torch import torch.nn.functional as F @@ -151,6 +151,17 @@ class SpatialSelfAttention(nn.Module): return x+h_ +def get_mem_free_total(device): + #only on cuda + if not torch.cuda.is_available(): + return None + stats = torch.cuda.memory_stats(device) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(device) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch + return mem_free_total class CrossAttention(nn.Module): @@ -173,31 +184,43 @@ class CrossAttention(nn.Module): self.mem_total_gb = psutil.virtual_memory().total // (1 << 30) + self.cached_mem_free_total = None self.attention_slice_wrangler = None + self.slicing_strategy_getter = None - def set_attention_slice_wrangler(self, wrangler:Callable[[nn.Module, torch.Tensor, torch.Tensor, int, int, int], torch.Tensor]): + def set_attention_slice_wrangler(self, wrangler: Optional[Callable[[nn.Module, torch.Tensor, int, int, int], torch.Tensor]]): ''' Set custom attention calculator to be called when attention is calculated - :param wrangler: Callback, with args (self, attention_scores, suggested_attention_slice, dim, offset, slice_size), + :param wrangler: Callback, with args (module, suggested_attention_slice, dim, offset, slice_size), which returns either the suggested_attention_slice or an adjusted equivalent. - self is the current CrossAttention module for which the callback is being invoked. - attention_scores are the scores for attention - suggested_attention_slice is a softmax(dim=-1) over attention_scores - dim is -1 if the call is non-sliced, or 0 or 1 for dimension-0 or dimension-1 slicing. - If dim is >= 0, offset and slice_size specify the slice start and length. + `module` is the current CrossAttention module for which the callback is being invoked. + `suggested_attention_slice` is the default-calculated attention slice + `dim` is -1 if the attenion map has not been sliced, or 0 or 1 for dimension-0 or dimension-1 slicing. + If `dim` is >= 0, `offset` and `slice_size` specify the slice start and length. Pass None to use the default attention calculation. :return: ''' self.attention_slice_wrangler = wrangler + def set_slicing_strategy_getter(self, getter: Optional[Callable[[nn.Module], tuple[int,int]]]): + self.slicing_strategy_getter = getter + + def cache_free_memory_count(self, device): + self.cached_mem_free_total = get_mem_free_total(device) + print("free cuda memory: ", self.cached_mem_free_total) + + def clear_cached_free_memory_count(self): + self.cached_mem_free_total = None + def einsum_lowest_level(self, q, k, v, dim, offset, slice_size): # calculate attention scores attention_scores = einsum('b i d, b j d -> b i j', q, k) - # calculate attenion slice by taking the best scores for each latent pixel + # calculate attention slice by taking the best scores for each latent pixel default_attention_slice = attention_scores.softmax(dim=-1, dtype=attention_scores.dtype) - if self.attention_slice_wrangler is not None: - attention_slice = self.attention_slice_wrangler(self, attention_scores, default_attention_slice, dim, offset, slice_size) + attention_slice_wrangler = self.attention_slice_wrangler + if attention_slice_wrangler is not None: + attention_slice = attention_slice_wrangler(self, default_attention_slice, dim, offset, slice_size) else: attention_slice = default_attention_slice @@ -240,17 +263,27 @@ class CrossAttention(nn.Module): return self.einsum_op_slice_dim1(q, k, v, max(q.shape[1] // div, 1)) def einsum_op_cuda(self, q, k, v): - stats = torch.cuda.memory_stats(q.device) - mem_active = stats['active_bytes.all.current'] - mem_reserved = stats['reserved_bytes.all.current'] - mem_free_cuda, _ = torch.cuda.mem_get_info(q.device) - mem_free_torch = mem_reserved - mem_active - mem_free_total = mem_free_cuda + mem_free_torch + # check if we already have a slicing strategy (this should only happen during cross-attention controlled generation) + slicing_strategy_getter = self.slicing_strategy_getter + if slicing_strategy_getter is not None: + (dim, slice_size) = slicing_strategy_getter(self) + if dim is not None: + # print("using saved slicing strategy with dim", dim, "slice size", slice_size) + if dim == 0: + return self.einsum_op_slice_dim0(q, k, v, slice_size) + elif dim == 1: + return self.einsum_op_slice_dim1(q, k, v, slice_size) + + # fallback for when there is no saved strategy, or saved strategy does not slice + mem_free_total = self.cached_mem_free_total or get_mem_free_total(q.device) # Divide factor of safety as there's copying and fragmentation return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20)) + def get_attention_mem_efficient(self, q, k, v): if q.device.type == 'cuda': + torch.cuda.empty_cache() + #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device)) return self.einsum_op_cuda(q, k, v) if q.device.type == 'mps': From 5214742d02e0d19a39852504e198cbd7b2d4f54e Mon Sep 17 00:00:00 2001 From: damian0815 Date: Tue, 8 Nov 2022 14:28:38 +0100 Subject: [PATCH 02/30] don't suppress exceptions when doing cross-attention control --- ldm/models/diffusion/shared_invokeai_diffusion.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ldm/models/diffusion/shared_invokeai_diffusion.py b/ldm/models/diffusion/shared_invokeai_diffusion.py index 1b181ba388..0a18eb25c8 100644 --- a/ldm/models/diffusion/shared_invokeai_diffusion.py +++ b/ldm/models/diffusion/shared_invokeai_diffusion.py @@ -151,10 +151,12 @@ class InvokeAIDiffuserComponent: context.request_apply_saved_attention_maps(ca_type) edited_conditioning = self.conditioning.cross_attention_control_args.edited_conditioning conditioned_next_x = self.model_forward_callback(x, sigma, edited_conditioning) - - finally: context.clear_requests(cleanup=True) + except: + context.clear_requests(cleanup=True) + raise + return unconditioned_next_x, conditioned_next_x def estimate_percent_through(self, step_index, sigma): From 9342ad8d97d635d31038c1bd978110c54a20326b Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 9 Nov 2022 02:07:13 +0000 Subject: [PATCH 03/30] prevent crash when switching to an invalid model --- ldm/generate.py | 4 ++++ ldm/invoke/model_cache.py | 11 +++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index a437d3baf4..e2d4a40de7 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -802,6 +802,10 @@ class Generate: # the model cache does the loading and offloading cache = self.model_cache + if not cache.valid_model(model_name): + print(f'** "{model_name}" is not a known model name. Please check your models.yaml file') + return self.model + cache.print_vram_usage() # have to get rid of all references to model in order diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 7b434941df..1999973ea8 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -41,15 +41,22 @@ class ModelCache(object): self.stack = [] # this is an LRU FIFO self.current_model = None + def valid_model(self, model_name:str)->bool: + ''' + Given a model name, returns True if it is a valid + identifier. + ''' + return model_name in self.config + def get_model(self, model_name:str): ''' Given a model named identified in models.yaml, return the model object. If in RAM will load into GPU VRAM. If on disk, will load from there. ''' - if model_name not in self.config: + if not self.valid_model(model_name): print(f'** "{model_name}" is not a known model name. Please check your models.yaml file') - return None + return self.current_model if self.current_model != model_name: if model_name not in self.models: # make room for a new one From af62958323d861bd13681d474a6ef1a71eaf371b Mon Sep 17 00:00:00 2001 From: mauwii Date: Mon, 7 Nov 2022 03:22:20 +0100 Subject: [PATCH 04/30] update environment-mac.yml --- environment-mac.yml | 76 ++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/environment-mac.yml b/environment-mac.yml index e0db02c3b9..fa043584e6 100644 --- a/environment-mac.yml +++ b/environment-mac.yml @@ -2,12 +2,15 @@ name: invokeai channels: - pytorch - conda-forge + - defaults dependencies: - - python=3.9.13 - - pip=22.2.2 - - - pytorch=1.12.1 - - torchvision=0.13.1 + - python=3.10 + - pip>=22.2 + - pytorch=1.12 + - pytorch-lightning=1.7 + - torchvision + - torchmetrics=0.10 + - torch-fidelity=0.3 # I suggest to keep the other deps sorted for convenience. # To determine what the latest versions should be, run: @@ -17,42 +20,39 @@ dependencies: # CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n invokeai-updated | awk ' {print " - " $1 "==" $2;} ' # ``` - - albumentations=1.2.1 - - coloredlogs=15.0.1 - - diffusers=0.6.0 - - einops=0.4.1 - - grpcio=1.46.4 + - albumentations=1.2 + - coloredlogs=15.0 + - diffusers=0.6 + - einops=0.3 + - eventlet + - grpcio=1.46 + - flask=2.1 + - flask-socketio=5.3 + - flask-cors=3.0 - humanfriendly=10.0 - - imageio=2.21.2 - - imageio-ffmpeg=0.4.7 - - imgaug=0.4.0 - - kornia=0.6.7 - - mpmath=1.2.1 - - nomkl # arm64 has only 1.0 while x64 needs 3.0 - - numpy=1.23.4 - - omegaconf=2.1.1 - - openh264=2.3.0 - - onnx=1.12.0 - - onnxruntime=1.12.1 - - pudb=2022.1 - - pytorch-lightning=1.7.7 - - scipy=1.9.3 - - streamlit=1.12.2 - - sympy=1.10.1 - - tensorboard=2.10.0 - - torchmetrics=0.10.1 - - py-opencv=4.6.0 - - flask=2.1.3 - - flask-socketio=5.3.0 - - flask-cors=3.0.10 - - eventlet=0.33.1 - - protobuf=3.20.1 - - send2trash=1.8.0 - - transformers=4.23.1 - - torch-fidelity=0.3.0 + - imageio=2.21 + - imageio-ffmpeg=0.4 + - imgaug=0.4 + - kornia=0.6 + - mpmath=1.2 + - nomkl=3 + - numpy=1.23 + - omegaconf + - openh264=2.3 + - onnx=1.12 + - onnxruntime=1.12 + - pudb=2019.2 + - protobuf=3.20 + - py-opencv=4.6 + - scipy=1.9 + - streamlit=1.12 + - sympy=1.10 + - send2trash=1.8 + - tensorboard=2.10 + - transformers=4.23 - pip: - - getpass_asterisk - dependency_injector==4.40.0 + - getpass_asterisk - realesrgan==0.2.5.0 - test-tube==0.7.5 - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers From f1982cb6d855f330d29127bddfe14517c065964c Mon Sep 17 00:00:00 2001 From: mauwii Date: Mon, 7 Nov 2022 03:22:48 +0100 Subject: [PATCH 05/30] update push triggers in test-invoke-conda.yml --- .github/workflows/test-invoke-conda.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index a144303cc3..ceb5552a61 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -4,7 +4,8 @@ on: branches: - 'main' - 'development' - - 'fix-gh-actions-fork' + - 'update*' + - 'release*' pull_request: branches: - 'main' From ae0ce826090221802c242f0ed6ff47f486d4adef Mon Sep 17 00:00:00 2001 From: mauwii Date: Mon, 7 Nov 2022 04:32:32 +0100 Subject: [PATCH 06/30] add 2 missed versions unpinned them for testing purpose with linux container, 4got to re-pin --- environment-mac.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment-mac.yml b/environment-mac.yml index fa043584e6..ef9f0c3584 100644 --- a/environment-mac.yml +++ b/environment-mac.yml @@ -8,7 +8,7 @@ dependencies: - pip>=22.2 - pytorch=1.12 - pytorch-lightning=1.7 - - torchvision + - torchvision=0.13 - torchmetrics=0.10 - torch-fidelity=0.3 @@ -37,7 +37,7 @@ dependencies: - mpmath=1.2 - nomkl=3 - numpy=1.23 - - omegaconf + - omegaconf=2.1 - openh264=2.3 - onnx=1.12 - onnxruntime=1.12 From 443c9110f1d8043f9da590f59cf20824d94a0791 Mon Sep 17 00:00:00 2001 From: mauwii Date: Mon, 7 Nov 2022 04:34:23 +0100 Subject: [PATCH 07/30] remove push triggers, since pr trigger is enough --- .github/workflows/test-invoke-conda.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index ceb5552a61..e9a0719040 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -4,8 +4,6 @@ on: branches: - 'main' - 'development' - - 'update*' - - 'release*' pull_request: branches: - 'main' From 7aa7be6b240aa669c7cd99ac4a9f8515508f6b5e Mon Sep 17 00:00:00 2001 From: mauwii Date: Tue, 8 Nov 2022 17:25:47 +0100 Subject: [PATCH 08/30] use taming-transformers-rom1504, remove -e ... ... to address required changes --- environment-mac.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/environment-mac.yml b/environment-mac.yml index ef9f0c3584..141e835908 100644 --- a/environment-mac.yml +++ b/environment-mac.yml @@ -52,14 +52,14 @@ dependencies: - transformers=4.23 - pip: - dependency_injector==4.40.0 - - getpass_asterisk + - taming-transformers-rom1504 - realesrgan==0.2.5.0 - test-tube==0.7.5 - - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - - -e git+https://github.com/openai/CLIP.git@main#egg=clip - - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion - - -e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan - - -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg + - git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers + - git+https://github.com/openai/CLIP.git@main#egg=clip + - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion + - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan + - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . variables: PYTORCH_ENABLE_MPS_FALLBACK: 1 From 9686bf0ea8b7307768031c7435f79d2006ebde95 Mon Sep 17 00:00:00 2001 From: mauwii Date: Tue, 8 Nov 2022 18:00:33 +0100 Subject: [PATCH 09/30] switch back to `getpass_asterisk`... ... until preload_models.py is ready --- environment-mac.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-mac.yml b/environment-mac.yml index 141e835908..f4a909d273 100644 --- a/environment-mac.yml +++ b/environment-mac.yml @@ -52,7 +52,7 @@ dependencies: - transformers=4.23 - pip: - dependency_injector==4.40.0 - - taming-transformers-rom1504 + - getpass_asterisk - realesrgan==0.2.5.0 - test-tube==0.7.5 - git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers From 9d39d6ecb34987192f5ac34e83cbf320a29bc16e Mon Sep 17 00:00:00 2001 From: mauwii Date: Tue, 8 Nov 2022 18:34:16 +0100 Subject: [PATCH 10/30] add PIP_EXISTS_ACTION=w to test-invoke-conda`s env --- .github/workflows/test-invoke-conda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index e9a0719040..3363941408 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -37,6 +37,7 @@ jobs: runs-on: ${{ matrix.os }} env: CONDA_ENV_NAME: invokeai + PIP_EXISTS_ACTION: w defaults: run: shell: ${{ matrix.default-shell }} From 143b18af8a00bfaaec1c9786677d1a93737b4d9b Mon Sep 17 00:00:00 2001 From: mauwii Date: Tue, 8 Nov 2022 19:28:31 +0100 Subject: [PATCH 11/30] update pip dependencies - remove realesrgan - add git+https://github.com/invoke-ai/Real-ESRGAN.git - remove git+https://github.com/CompVis/taming-transformers.git - add taming-transformers-rom1504 - change TencentARC/GFPGAN to invoke-ai/GFPGAN --- environment-mac.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/environment-mac.yml b/environment-mac.yml index f4a909d273..f0870e0de2 100644 --- a/environment-mac.yml +++ b/environment-mac.yml @@ -53,12 +53,12 @@ dependencies: - pip: - dependency_injector==4.40.0 - getpass_asterisk - - realesrgan==0.2.5.0 + - taming-transformers-rom1504 - test-tube==0.7.5 - - git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion - - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan + - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan + - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . variables: From 78c1d07c4be04b0fe4b980926c99ece89d39c1eb Mon Sep 17 00:00:00 2001 From: mauwii Date: Tue, 8 Nov 2022 22:06:48 +0100 Subject: [PATCH 12/30] update environment-linux-aarch64.yml --- environment-linux-aarch64.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/environment-linux-aarch64.yml b/environment-linux-aarch64.yml index de762f8b85..8accc711b0 100644 --- a/environment-linux-aarch64.yml +++ b/environment-linux-aarch64.yml @@ -29,17 +29,15 @@ dependencies: - torch-fidelity=0.3.0 - tokenizers>=0.11.1,!=0.11.3,<0.13 - pip: + - dependency_injector==4.40.0 - getpass_asterisk - omegaconf==2.1.1 - - realesrgan==0.2.5.0 - - test-tube>=0.7.5 - pyreadline3 - - dependency_injector==4.40.0 - - -e git+https://github.com/openai/CLIP.git@main#egg=clip - - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion - - -e git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan - - -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg + - taming-transformers-rom1504 + - test-tube>=0.7.5 + - git+https://github.com/openai/CLIP.git@main#egg=clip + - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion + - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan + - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan + - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . -variables: - PYTORCH_ENABLE_MPS_FALLBACK: 1 From 72338506ed1d1f8e00e83e3e4dc86c4bf9d1d8f2 Mon Sep 17 00:00:00 2001 From: mauwii Date: Tue, 8 Nov 2022 22:07:32 +0100 Subject: [PATCH 13/30] update environment.yml --- environment.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/environment.yml b/environment.yml index ae07e11c3a..8cf8779ee3 100644 --- a/environment.yml +++ b/environment.yml @@ -36,10 +36,10 @@ dependencies: - eventlet - getpass_asterisk - kornia==0.6.0 + - taming-transformers-rom1504 - git+https://github.com/openai/CLIP.git@main#egg=clip - - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion - - -e git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan - - -e git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan - - -e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg + - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion + - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan + - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan + - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . From c54eb000553d4632247f5ea58162029fc3a7127a Mon Sep 17 00:00:00 2001 From: mauwii Date: Wed, 9 Nov 2022 01:26:04 +0100 Subject: [PATCH 14/30] update python version --- environment-linux-aarch64.yml | 2 +- environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment-linux-aarch64.yml b/environment-linux-aarch64.yml index 8accc711b0..ac52b50621 100644 --- a/environment-linux-aarch64.yml +++ b/environment-linux-aarch64.yml @@ -3,7 +3,7 @@ channels: - pytorch - conda-forge dependencies: - - python=3.9.* + - python=3.10 - pip>=22.2.2 - cudatoolkit - pytorch diff --git a/environment.yml b/environment.yml index 8cf8779ee3..4c97a42dd5 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge - defaults dependencies: - - python>=3.9 + - python=3.10 - pip=22.2.2 - numpy=1.23.3 - torchvision=0.13.1 From 6d9638ba3138ab09fba8c37b301984fe985d83f2 Mon Sep 17 00:00:00 2001 From: mauwii Date: Wed, 9 Nov 2022 01:27:37 +0100 Subject: [PATCH 15/30] remove PIP_EXISTS_ACTION from env --- .github/workflows/test-invoke-conda.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index 3363941408..e9a0719040 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -37,7 +37,6 @@ jobs: runs-on: ${{ matrix.os }} env: CONDA_ENV_NAME: invokeai - PIP_EXISTS_ACTION: w defaults: run: shell: ${{ matrix.default-shell }} From 12a33f6e2d5ac0dc36bb18694d4e599ccbb45c58 Mon Sep 17 00:00:00 2001 From: mauwii Date: Wed, 9 Nov 2022 02:01:42 +0100 Subject: [PATCH 16/30] fix conflict in environment-linux-aarch64.yml --- environment-linux-aarch64.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/environment-linux-aarch64.yml b/environment-linux-aarch64.yml index ac52b50621..b00ab6e78c 100644 --- a/environment-linux-aarch64.yml +++ b/environment-linux-aarch64.yml @@ -2,15 +2,16 @@ name: invokeai channels: - pytorch - conda-forge + - defaults dependencies: - python=3.10 - pip>=22.2.2 - cudatoolkit - pytorch - torchvision - - numpy=1.19 - - imageio=2.9.0 - - opencv=4.6.0 + - numpy=1.23 + - imageio=2.21 + - opencv=4.6 - pillow=8.* - flask=2.1.* - flask_cors=3.0.10 @@ -25,7 +26,7 @@ dependencies: - einops=0.3.0 - kornia=0.6 - torchmetrics=0.7.0 - - transformers=4.21.3 + - transformers=4.23 - torch-fidelity=0.3.0 - tokenizers>=0.11.1,!=0.11.3,<0.13 - pip: From e06a6ed4c8e12248b6fe06c7c0dc5da59b4d0ad8 Mon Sep 17 00:00:00 2001 From: mauwii Date: Wed, 9 Nov 2022 02:53:45 +0100 Subject: [PATCH 17/30] add changes required by @tildebyte --- environment-linux-aarch64.yml | 5 ++--- environment-mac.yml | 5 ++--- environment.yml | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/environment-linux-aarch64.yml b/environment-linux-aarch64.yml index b00ab6e78c..63b95a412d 100644 --- a/environment-linux-aarch64.yml +++ b/environment-linux-aarch64.yml @@ -30,15 +30,14 @@ dependencies: - torch-fidelity=0.3.0 - tokenizers>=0.11.1,!=0.11.3,<0.13 - pip: - - dependency_injector==4.40.0 - getpass_asterisk - omegaconf==2.1.1 - pyreadline3 - taming-transformers-rom1504 - test-tube>=0.7.5 - git+https://github.com/openai/CLIP.git@main#egg=clip - - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion + - git+https://github.com/invoke-ai/k-diffusion.git@mps#egg=k_diffusion - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan - - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan + - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . diff --git a/environment-mac.yml b/environment-mac.yml index f0870e0de2..2d48544a58 100644 --- a/environment-mac.yml +++ b/environment-mac.yml @@ -51,14 +51,13 @@ dependencies: - tensorboard=2.10 - transformers=4.23 - pip: - - dependency_injector==4.40.0 - getpass_asterisk - taming-transformers-rom1504 - test-tube==0.7.5 - git+https://github.com/openai/CLIP.git@main#egg=clip - - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion + - git+https://github.com/invoke-ai/k-diffusion.git@mps#egg=k_diffusion - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan - - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan + - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . variables: diff --git a/environment.yml b/environment.yml index 4c97a42dd5..7d38a1e3de 100644 --- a/environment.yml +++ b/environment.yml @@ -32,14 +32,13 @@ dependencies: - flask==2.1.3 - flask_socketio==5.3.0 - flask_cors==3.0.10 - - dependency_injector==4.40.0 - eventlet - getpass_asterisk - kornia==0.6.0 - taming-transformers-rom1504 - git+https://github.com/openai/CLIP.git@main#egg=clip - - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion + - git+https://github.com/invoke-ai/k-diffusion.git@mps#egg=k_diffusion - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan - - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan + - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . From 8cf3883adc174679444e150faf015e0c81430b84 Mon Sep 17 00:00:00 2001 From: mauwii Date: Wed, 9 Nov 2022 03:11:49 +0100 Subject: [PATCH 18/30] re-change TencentARC/GFPGAN to invoke-ai/GFPGAN --- environment-linux-aarch64.yml | 2 +- environment-mac.yml | 2 +- environment.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environment-linux-aarch64.yml b/environment-linux-aarch64.yml index 63b95a412d..c7a76b821b 100644 --- a/environment-linux-aarch64.yml +++ b/environment-linux-aarch64.yml @@ -38,6 +38,6 @@ dependencies: - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/invoke-ai/k-diffusion.git@mps#egg=k_diffusion - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan - - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan + - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . diff --git a/environment-mac.yml b/environment-mac.yml index 2d48544a58..29f5197be9 100644 --- a/environment-mac.yml +++ b/environment-mac.yml @@ -57,7 +57,7 @@ dependencies: - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/invoke-ai/k-diffusion.git@mps#egg=k_diffusion - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan - - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan + - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . variables: diff --git a/environment.yml b/environment.yml index 7d38a1e3de..3d5c44d391 100644 --- a/environment.yml +++ b/environment.yml @@ -39,6 +39,6 @@ dependencies: - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/invoke-ai/k-diffusion.git@mps#egg=k_diffusion - git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan - - git+https://github.com/TencentARC/GFPGAN.git#egg=gfpgan + - git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan - git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg - -e . From 9e4545b2fc187771ecb7c06c197638d16453a6a7 Mon Sep 17 00:00:00 2001 From: Mike DiGiovanni Date: Wed, 9 Nov 2022 16:44:59 -0500 Subject: [PATCH 19/30] Fixes typos in README.md --- frontend/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/README.md b/frontend/README.md index f597cc6f23..4becbb221f 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -5,7 +5,7 @@ - `python scripts/dream.py --web` serves both frontend and backend at http://localhost:9090 -## Evironment +## Environment Install [node](https://nodejs.org/en/download/) (includes npm) and optionally [yarn](https://yarnpkg.com/getting-started/install). @@ -15,7 +15,7 @@ packages. ## Dev -1. From `frontend/`, run `npm dev` / `yarn dev` to start the dev server. +1. From `frontend/`, run `npm run dev` / `yarn dev` to start the dev server. 2. Run `python scripts/dream.py --web`. 3. Navigate to the dev server address e.g. `http://localhost:5173/`. From c248ae44d43ff5f4873b56b6f9b20b04f731eaa3 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 10 Nov 2022 16:49:25 -0500 Subject: [PATCH 20/30] Revert "Resize hires as an image" This reverts commit d05b1b3544dfa7b3c071ac05b4f014a52e08fa50. --- ldm/invoke/generator/txt2img2img.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index d95ad78196..759ba2dba4 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -10,8 +10,6 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.invoke.generator.omnibus import Omnibus from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent from PIL import Image -from ldm.invoke.devices import choose_autocast -from ldm.invoke.image_util import InitImageResizer class Txt2Img2Img(Generator): def __init__(self, model, precision): @@ -46,13 +44,16 @@ class Txt2Img2Img(Generator): ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False ) + #x = self.get_noise(init_width, init_height) + x = x_T + if self.free_gpu_mem and self.model.model.device != self.model.device: self.model.model.to(self.model.device) samples, _ = sampler.sample( batch_size = 1, S = steps, - x_T = x_T, + x_T = x, conditioning = c, shape = shape, verbose = False, @@ -68,21 +69,11 @@ class Txt2Img2Img(Generator): ) # resizing - - image = self.sample_to_image(samples) - image = InitImageResizer(image).resize(width, height) - - image = np.array(image).astype(np.float32) / 255.0 - image = image[None].transpose(0, 3, 1, 2) - image = torch.from_numpy(image) - image = 2.0 * image - 1.0 - image = image.to(self.model.device) - - scope = choose_autocast(self.precision) - with scope(self.model.device.type): - samples = self.model.get_first_stage_encoding( - self.model.encode_first_stage(image) - ) # move back to latent space + samples = torch.nn.functional.interpolate( + samples, + size=(height // self.downsampling_factor, width // self.downsampling_factor), + mode="bilinear" + ) t_enc = int(strength * steps) ddim_sampler = DDIMSampler(self.model, device=self.model.device) From 47ddda1f643452b2a5e9f81820d93f2020df04ce Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 10 Nov 2022 16:49:25 -0500 Subject: [PATCH 21/30] Revert "Log strength with hires" This reverts commit 82d4904c07670d514e9a541de7c9ecd8489eeae2. --- ldm/invoke/args.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 3c3b2059d5..8e0d641870 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -247,8 +247,6 @@ class Args(object): switches.append('--seamless') if a['hires_fix']: switches.append('--hires_fix') - if a['strength'] and a['strength']>0: - switches.append(f'-f {a["strength"]}') # img2img generations have parameters relevant only to them and have special handling if a['init_img'] and len(a['init_img'])>0: From 48aa6416dc194d0caf4ddff988d953a42dc585b1 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 8 Nov 2022 15:22:32 +0000 Subject: [PATCH 22/30] enable outcropping of random JPG/PNG images - Works best with runwayML inpainting model - Numerous code changes required to propagate seed to final metadata. Original code predicated on the image being generated within InvokeAI. --- ldm/generate.py | 13 +++++++------ ldm/invoke/generator/base.py | 2 +- ldm/invoke/prompt_parser.py | 2 +- ldm/invoke/restoration/outcrop.py | 5 +++-- scripts/invoke.py | 7 +++++-- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index e2d4a40de7..746e56dd11 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -566,12 +566,13 @@ class Generate: args = metadata_from_png(image_path) seed = args.seed - prompt = args.prompt - print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') - - if not seed: - print('* Could not recover seed for image. Replacing with 42. This will not affect image quality') - seed = 42 + prompt = args.prompt or '' + if seed == 0: + seed = random.randrange(0, np.iinfo(np.uint32).max) + opt.seed = seed + print(f'>> generated new seed {seed} and prompt "{prompt}" for {image_path}') + else: + print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') # try to reuse the same filename prefix as the original file. # we take everything up to the first period diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index 3c6eca08a2..719d08c7c0 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -63,7 +63,7 @@ class Generator(): **kwargs ) results = [] - seed = seed if seed is not None else self.new_seed() + seed = seed if seed is not None and seed > 0 else self.new_seed() first_seed = seed seed, initial_noise = self.generate_initial_noise(seed, width, height) diff --git a/ldm/invoke/prompt_parser.py b/ldm/invoke/prompt_parser.py index 3dbcc1bb4b..42c83188aa 100644 --- a/ldm/invoke/prompt_parser.py +++ b/ldm/invoke/prompt_parser.py @@ -636,7 +636,7 @@ def split_weighted_subprompts(text, skip_normalize=False)->list: weight_sum = sum(map(lambda x: x[1], parsed_prompts)) if weight_sum == 0: print( - "Warning: Subprompt weights add up to zero. Discarding and using even weights instead.") + "* Warning: Subprompt weights add up to zero. Discarding and using even weights instead.") equal_weight = 1 / max(len(parsed_prompts), 1) return [(x[0], equal_weight) for x in parsed_prompts] return [(x[0], x[1] / weight_sum) for x in parsed_prompts] diff --git a/ldm/invoke/restoration/outcrop.py b/ldm/invoke/restoration/outcrop.py index b5d42250c5..1a0aaf2c8f 100644 --- a/ldm/invoke/restoration/outcrop.py +++ b/ldm/invoke/restoration/outcrop.py @@ -28,11 +28,12 @@ class Outcrop(object): self.generate._set_sampler() def wrapped_callback(img,seed,**kwargs): - image_callback(img,orig_opt.seed,use_prefix=prefix,**kwargs) + preferred_seed = orig_opt.seed if orig_opt.seed> 0 else seed + image_callback(img,preferred_seed,use_prefix=prefix,**kwargs) result= self.generate.prompt2image( orig_opt.prompt, - seed = orig_opt.seed, # uncomment to make it deterministic + seed = orig_opt.seed if orig_opt.seed>0 else opt.seed, sampler = self.generate.sampler, steps = opt.steps, cfg_scale = opt.cfg_scale, diff --git a/scripts/invoke.py b/scripts/invoke.py index 1e9a84295e..325a3ca6c4 100755 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -29,6 +29,7 @@ infile = None def main(): """Initialize command-line parsers and the diffusion model""" global infile + print('* Initializing, be patient...') opt = Args() args = opt.parse_args() @@ -46,7 +47,6 @@ def main(): print('--max_loaded_models must be >= 1; using 1') args.max_loaded_models = 1 - print('* Initializing, be patient...') from ldm.generate import Generate # these two lines prevent a horrible warning message from appearing @@ -656,7 +656,10 @@ def do_postprocess (gen, opt, callback): def add_postprocessing_to_metadata(opt,original_file,new_file,tool,command): original_file = original_file if os.path.exists(original_file) else os.path.join(opt.outdir,original_file) new_file = new_file if os.path.exists(new_file) else os.path.join(opt.outdir,new_file) - meta = retrieve_metadata(original_file)['sd-metadata'] + try: + meta = retrieve_metadata(original_file)['sd-metadata'] + except AttributeError: + meta = retrieve_metadata(new_file)['sd-metadata'] if 'image' not in meta: meta = metadata_dumps(opt,seeds=[opt.seed])['image'] meta['image'] = {} From 8aa94d5774af2c392ba70ba3f15baaa4e0afe481 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 8 Nov 2022 17:27:42 +0000 Subject: [PATCH 23/30] enhance outcropping with ability to direct contents of new regions - When outcropping an image you can now add a `--new_prompt` option, to specify a new prompt to be used instead of the original one used to generate the image. - Similarly you can provide a new seed using `--seed` (or `-S`). A seed of zero will pick one randomly. - This PR also fixes the crash that happened when trying to outcrop an image that does not contain InvokeAI metadata. --- docs/features/OUTPAINTING.md | 15 +++++++++++++++ ldm/generate.py | 24 ++++++++++++++++-------- ldm/invoke/args.py | 5 +++++ ldm/invoke/restoration/outcrop.py | 4 ++-- scripts/invoke.py | 14 ++++++++++---- 5 files changed, 48 insertions(+), 14 deletions(-) diff --git a/docs/features/OUTPAINTING.md b/docs/features/OUTPAINTING.md index 122c732605..380467d571 100644 --- a/docs/features/OUTPAINTING.md +++ b/docs/features/OUTPAINTING.md @@ -92,6 +92,21 @@ The new image is larger than the original (576x704) because 64 pixels were added to the top and right sides. You will need enough VRAM to process an image of this size. +#### Outcropping non-InvokeAI images + +You can outcrop an arbitrary image that was not generated by InvokeAI, +but your results will vary. The `inpainting-1.5` model is highly +recommended, but if not feasible, then you may be able to improve the +output by conditioning the outcropping with a text prompt that +describes the scene using the `--new_prompt` argument: + +```bash +invoke> !fix images/vacation.png --outcrop top 128 --new_prompt "family vacation" +``` + +You may also provide a different seed for outcropping to use by passing +`-S`. A seed of "0" will generate a new random seed. + A number of caveats: 1. Although you can specify any pixel values, they will be rounded up to the diff --git a/ldm/generate.py b/ldm/generate.py index 746e56dd11..472f8f071d 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -561,18 +561,22 @@ class Generate: ): # retrieve the seed from the image; seed = None - image_metadata = None prompt = None args = metadata_from_png(image_path) - seed = args.seed - prompt = args.prompt or '' - if seed == 0: - seed = random.randrange(0, np.iinfo(np.uint32).max) - opt.seed = seed - print(f'>> generated new seed {seed} and prompt "{prompt}" for {image_path}') + if opt.seed is not None: + seed = opt.seed + elif args.seed >= 0: + seed = args.seed else: - print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') + seed = random.randrange(0, np.iinfo(np.uint32).max) + + if opt.prompt is not None: + prompt = opt.prompt + else: + prompt = args.prompt + + print(f'>> using seed {seed} and prompt "{prompt}" for {image_path}') # try to reuse the same filename prefix as the original file. # we take everything up to the first period @@ -619,6 +623,10 @@ class Generate: extend_instructions[direction]=int(pixels) except ValueError: print(f'** invalid extension instruction. Use ..., as in "top 64 left 128 right 64 bottom 64"') + + opt.seed = seed + opt.prompt = prompt + if len(extend_instructions)>0: restorer = Outcrop(image,self,) return restorer.process ( diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 8e0d641870..5a2d7ae97c 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -864,6 +864,11 @@ class Args(object): default=32, help='When outpainting, the tile size to use for filling outpaint areas', ) + postprocessing_group.add_argument( + '--new_prompt', + type=str, + help='Change the text prompt applied during postprocessing (default, use original generation prompt)', + ) postprocessing_group.add_argument( '-ft', '--facetool', diff --git a/ldm/invoke/restoration/outcrop.py b/ldm/invoke/restoration/outcrop.py index 1a0aaf2c8f..ac0bf41b9e 100644 --- a/ldm/invoke/restoration/outcrop.py +++ b/ldm/invoke/restoration/outcrop.py @@ -32,8 +32,8 @@ class Outcrop(object): image_callback(img,preferred_seed,use_prefix=prefix,**kwargs) result= self.generate.prompt2image( - orig_opt.prompt, - seed = orig_opt.seed if orig_opt.seed>0 else opt.seed, + opt.prompt, + seed = opt.seed if opt.seed else orig_opt.seed, sampler = self.generate.sampler, steps = opt.steps, cfg_scale = opt.cfg_scale, diff --git a/scripts/invoke.py b/scripts/invoke.py index 325a3ca6c4..ad8c5ca062 100755 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -277,7 +277,7 @@ def main_loop(gen, opt): filename = f'{prefix}.{use_prefix}.{seed}.png' tm = opt.text_mask[0] th = opt.text_mask[1] if len(opt.text_mask)>1 else 0.5 - formatted_dream_prompt = f'!mask {opt.prompt} -tm {tm} {th}' + formatted_dream_prompt = f'!mask {opt.input_file_path} -tm {tm} {th}' path = file_writer.save_image_and_prompt_to_png( image = image, dream_prompt = formatted_dream_prompt, @@ -317,7 +317,7 @@ def main_loop(gen, opt): tool = re.match('postprocess:(\w+)',opt.last_operation).groups()[0] add_postprocessing_to_metadata( opt, - opt.prompt, + opt.input_file_path, filename, tool, formatted_dream_prompt, @@ -614,10 +614,16 @@ def do_textmask(gen, opt, callback): ) def do_postprocess (gen, opt, callback): - file_path = opt.prompt # treat the prompt as the file pathname + file_path = opt.prompt # treat the prompt as the file pathname + if opt.new_prompt is not None: + opt.prompt = opt.new_prompt + else: + opt.prompt = None + if os.path.dirname(file_path) == '': #basename given file_path = os.path.join(opt.outdir,file_path) + opt.input_file_path = file_path tool=None if opt.facetool_strength > 0: tool = opt.facetool @@ -707,7 +713,7 @@ def prepare_image_metadata( elif len(prior_variations) > 0: formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed) elif operation == 'postprocess': - formatted_dream_prompt = '!fix '+opt.dream_prompt_str(seed=seed) + formatted_dream_prompt = '!fix '+opt.dream_prompt_str(seed=seed,prompt=opt.input_file_path) else: formatted_dream_prompt = opt.dream_prompt_str(seed=seed) return filename,formatted_dream_prompt From 832f18332038a2a64b4d3a7c8558ab2ade6eba46 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 10 Nov 2022 20:16:47 +0000 Subject: [PATCH 24/30] fix #1402 --- ldm/generate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ldm/generate.py b/ldm/generate.py index 472f8f071d..4cabeb6810 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -1042,7 +1042,9 @@ class Generate: return True return False - def _check_for_erasure(self, image): + def _check_for_erasure(self, image:Image.Image)->bool: + if image.mode not in ('RGBA','RGB'): + return False width, height = image.size pixdata = image.load() colored = 0 From 4b4111a802511d0916a1962a0dcbc456f5ebf2dd Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 10 Nov 2022 21:27:25 +0000 Subject: [PATCH 25/30] fix invoke.py crash if no models.yaml file present - Script will now offer the user the ability to create a minimal models.yaml and then gracefully exit. - Closes #1420 --- ldm/invoke/readline.py | 2 ++ scripts/invoke.py | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/readline.py b/ldm/invoke/readline.py index 7d87ede755..4e95e9b063 100644 --- a/ldm/invoke/readline.py +++ b/ldm/invoke/readline.py @@ -284,6 +284,7 @@ class Completer(object): switch,partial_path = match.groups() partial_path = partial_path.lstrip() + matches = list() path = os.path.expanduser(partial_path) @@ -321,6 +322,7 @@ class Completer(object): matches.append( switch+os.path.join(os.path.dirname(full_path), node) ) + return matches class DummyCompleter(Completer): diff --git a/scripts/invoke.py b/scripts/invoke.py index ad8c5ca062..50f995c26d 100755 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -90,7 +90,12 @@ def main(): safety_checker=opt.safety_checker, max_loaded_models=opt.max_loaded_models, ) - except (FileNotFoundError, IOError, KeyError) as e: + except FileNotFoundError: + print('** You appear to be missing configs/models.yaml') + print('** You can either exit this script and run scripts/preload_models.py, or fix the problem now.') + emergency_model_create(opt) + sys.exit(-1) + except (IOError, KeyError) as e: print(f'{e}. Aborting.') sys.exit(-1) @@ -482,6 +487,7 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: command = '-h' return command, operation + def add_weights_to_config(model_path:str, gen, opt, completer): print(f'>> Model import in process. Please enter the values needed to configure this model:') print() @@ -887,6 +893,36 @@ def write_commands(opt, file_path:str, outfilepath:str): f.write('\n'.join(commands)) print(f'>> File {outfilepath} with commands created') +def emergency_model_create(opt:Args): + completer = get_completer(opt) + completer.complete_extensions(('.yaml','.yml','.ckpt','.vae.pt')) + completer.set_default_dir('.') + valid_path = False + while not valid_path: + weights_file = input('Enter the path to a downloaded models file, or ^C to exit: ') + valid_path = os.path.exists(weights_file) + dir,basename = os.path.split(weights_file) + + valid_name = False + while not valid_name: + name = input('Enter a short name for this model (no spaces): ') + name = 'unnamed model' if len(name)==0 else name + valid_name = ' ' not in name + + description = input('Enter a description for this model: ') + description = 'no description' if len(description)==0 else description + + with open(opt.conf, 'w', encoding='utf-8') as f: + f.write(f'{name}:\n') + f.write(f' description: {description}\n') + f.write(f' weights: {weights_file}\n') + f.write(f' config: ./configs/stable-diffusion/v1-inference.yaml\n') + f.write(f' width: 512\n') + f.write(f' height: 512\n') + f.write(f' default: true\n') + print(f'Config file {opt.conf} is created. This script will now exit.') + print(f'After restarting you may examine the entry with !models and edit it with !edit.') + ###################################### if __name__ == '__main__': From 8dc7f119e50e9d97307d0b83dfc86f97cb43d9e8 Mon Sep 17 00:00:00 2001 From: Kyle Schouviller Date: Thu, 10 Nov 2022 21:43:56 -0800 Subject: [PATCH 26/30] Fix performance issue introduced by torch cuda cache clear during generation --- ldm/modules/attention.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 05f6183029..94bb8a2916 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -282,7 +282,6 @@ class CrossAttention(nn.Module): def get_attention_mem_efficient(self, q, k, v): if q.device.type == 'cuda': - torch.cuda.empty_cache() #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device)) return self.einsum_op_cuda(q, k, v) From af040e97af6d69f158c61846841d2a3dd93f4887 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 11 Nov 2022 04:41:02 +0000 Subject: [PATCH 27/30] prevent two models from being marked default in models.yaml --- ldm/invoke/model_cache.py | 9 ++++++--- scripts/invoke.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 1999973ea8..d4007c46de 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -109,10 +109,13 @@ class ModelCache(object): Set the default model. The change will not take effect until you call model_cache.commit() ''' + print(f'DEBUG: before set_default_model()\n{OmegaConf.to_yaml(self.config)}') assert model_name in self.models,f"unknown model '{model_name}'" - for model in self.models: - self.models[model].pop('default',None) - self.models[model_name]['default'] = True + config = self.config + for model in config: + config[model].pop('default',None) + config[model_name]['default'] = True + print(f'DEBUG: after set_default_model():\n{OmegaConf.to_yaml(self.config)}') def list_models(self) -> dict: ''' diff --git a/scripts/invoke.py b/scripts/invoke.py index 50f995c26d..1c1d44dd16 100755 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -584,7 +584,7 @@ def write_config_file(conf_path, gen, model_name, new_config, clobber=False, mak try: print('>> Verifying that new model loads...') - yaml_str = gen.model_cache.add_model(model_name, new_config, clobber) + gen.model_cache.add_model(model_name, new_config, clobber) assert gen.set_model(model_name) is not None, 'model failed to load' except AssertionError as e: print(f'** aborting **') From b57c81ab389cff0bebe4ce37636ea1db0b17c849 Mon Sep 17 00:00:00 2001 From: Kyle Schouviller Date: Thu, 10 Nov 2022 14:53:49 -0800 Subject: [PATCH 28/30] Remove editable flag from clipseg in requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 939463e36e..fce5c87abf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,4 +38,4 @@ git+https://github.com/openai/CLIP.git@main#egg=clip git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion git+https://github.com/invoke-ai/Real-ESRGAN.git#egg=realesrgan git+https://github.com/invoke-ai/GFPGAN.git#egg=gfpgan --e git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg +git+https://github.com/invoke-ai/clipseg.git@models-rename#egg=clipseg From 1fb7b50be7505c077cfc81d69d8d8d2a656698fd Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 11 Nov 2022 10:30:44 -0500 Subject: [PATCH 29/30] Revert "enhance outcropping with ability to direct contents of new regions" This reverts commit 8aa94d5774af2c392ba70ba3f15baaa4e0afe481. --- docs/features/OUTPAINTING.md | 15 --------------- ldm/generate.py | 24 ++++++++---------------- ldm/invoke/args.py | 5 ----- ldm/invoke/restoration/outcrop.py | 4 ++-- scripts/invoke.py | 14 ++++---------- 5 files changed, 14 insertions(+), 48 deletions(-) diff --git a/docs/features/OUTPAINTING.md b/docs/features/OUTPAINTING.md index 380467d571..122c732605 100644 --- a/docs/features/OUTPAINTING.md +++ b/docs/features/OUTPAINTING.md @@ -92,21 +92,6 @@ The new image is larger than the original (576x704) because 64 pixels were added to the top and right sides. You will need enough VRAM to process an image of this size. -#### Outcropping non-InvokeAI images - -You can outcrop an arbitrary image that was not generated by InvokeAI, -but your results will vary. The `inpainting-1.5` model is highly -recommended, but if not feasible, then you may be able to improve the -output by conditioning the outcropping with a text prompt that -describes the scene using the `--new_prompt` argument: - -```bash -invoke> !fix images/vacation.png --outcrop top 128 --new_prompt "family vacation" -``` - -You may also provide a different seed for outcropping to use by passing -`-S`. A seed of "0" will generate a new random seed. - A number of caveats: 1. Although you can specify any pixel values, they will be rounded up to the diff --git a/ldm/generate.py b/ldm/generate.py index 4cabeb6810..aa4bd7fc8e 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -561,22 +561,18 @@ class Generate: ): # retrieve the seed from the image; seed = None + image_metadata = None prompt = None args = metadata_from_png(image_path) - if opt.seed is not None: - seed = opt.seed - elif args.seed >= 0: - seed = args.seed + seed = args.seed + prompt = args.prompt or '' + if seed == 0: + seed = random.randrange(0, np.iinfo(np.uint32).max) + opt.seed = seed + print(f'>> generated new seed {seed} and prompt "{prompt}" for {image_path}') else: - seed = random.randrange(0, np.iinfo(np.uint32).max) - - if opt.prompt is not None: - prompt = opt.prompt - else: - prompt = args.prompt - - print(f'>> using seed {seed} and prompt "{prompt}" for {image_path}') + print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') # try to reuse the same filename prefix as the original file. # we take everything up to the first period @@ -623,10 +619,6 @@ class Generate: extend_instructions[direction]=int(pixels) except ValueError: print(f'** invalid extension instruction. Use ..., as in "top 64 left 128 right 64 bottom 64"') - - opt.seed = seed - opt.prompt = prompt - if len(extend_instructions)>0: restorer = Outcrop(image,self,) return restorer.process ( diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 5a2d7ae97c..8e0d641870 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -864,11 +864,6 @@ class Args(object): default=32, help='When outpainting, the tile size to use for filling outpaint areas', ) - postprocessing_group.add_argument( - '--new_prompt', - type=str, - help='Change the text prompt applied during postprocessing (default, use original generation prompt)', - ) postprocessing_group.add_argument( '-ft', '--facetool', diff --git a/ldm/invoke/restoration/outcrop.py b/ldm/invoke/restoration/outcrop.py index ac0bf41b9e..1a0aaf2c8f 100644 --- a/ldm/invoke/restoration/outcrop.py +++ b/ldm/invoke/restoration/outcrop.py @@ -32,8 +32,8 @@ class Outcrop(object): image_callback(img,preferred_seed,use_prefix=prefix,**kwargs) result= self.generate.prompt2image( - opt.prompt, - seed = opt.seed if opt.seed else orig_opt.seed, + orig_opt.prompt, + seed = orig_opt.seed if orig_opt.seed>0 else opt.seed, sampler = self.generate.sampler, steps = opt.steps, cfg_scale = opt.cfg_scale, diff --git a/scripts/invoke.py b/scripts/invoke.py index 1c1d44dd16..25917f7030 100755 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -282,7 +282,7 @@ def main_loop(gen, opt): filename = f'{prefix}.{use_prefix}.{seed}.png' tm = opt.text_mask[0] th = opt.text_mask[1] if len(opt.text_mask)>1 else 0.5 - formatted_dream_prompt = f'!mask {opt.input_file_path} -tm {tm} {th}' + formatted_dream_prompt = f'!mask {opt.prompt} -tm {tm} {th}' path = file_writer.save_image_and_prompt_to_png( image = image, dream_prompt = formatted_dream_prompt, @@ -322,7 +322,7 @@ def main_loop(gen, opt): tool = re.match('postprocess:(\w+)',opt.last_operation).groups()[0] add_postprocessing_to_metadata( opt, - opt.input_file_path, + opt.prompt, filename, tool, formatted_dream_prompt, @@ -620,16 +620,10 @@ def do_textmask(gen, opt, callback): ) def do_postprocess (gen, opt, callback): - file_path = opt.prompt # treat the prompt as the file pathname - if opt.new_prompt is not None: - opt.prompt = opt.new_prompt - else: - opt.prompt = None - + file_path = opt.prompt # treat the prompt as the file pathname if os.path.dirname(file_path) == '': #basename given file_path = os.path.join(opt.outdir,file_path) - opt.input_file_path = file_path tool=None if opt.facetool_strength > 0: tool = opt.facetool @@ -719,7 +713,7 @@ def prepare_image_metadata( elif len(prior_variations) > 0: formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed) elif operation == 'postprocess': - formatted_dream_prompt = '!fix '+opt.dream_prompt_str(seed=seed,prompt=opt.input_file_path) + formatted_dream_prompt = '!fix '+opt.dream_prompt_str(seed=seed) else: formatted_dream_prompt = opt.dream_prompt_str(seed=seed) return filename,formatted_dream_prompt From 78f7bef1a3f42bb6055725c11785eb4e57b988eb Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 11 Nov 2022 10:30:44 -0500 Subject: [PATCH 30/30] Revert "enable outcropping of random JPG/PNG images" This reverts commit 48aa6416dc194d0caf4ddff988d953a42dc585b1. --- ldm/generate.py | 13 ++++++------- ldm/invoke/generator/base.py | 2 +- ldm/invoke/prompt_parser.py | 2 +- ldm/invoke/restoration/outcrop.py | 5 ++--- scripts/invoke.py | 7 ++----- 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index aa4bd7fc8e..55cd5c5435 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -566,13 +566,12 @@ class Generate: args = metadata_from_png(image_path) seed = args.seed - prompt = args.prompt or '' - if seed == 0: - seed = random.randrange(0, np.iinfo(np.uint32).max) - opt.seed = seed - print(f'>> generated new seed {seed} and prompt "{prompt}" for {image_path}') - else: - print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') + prompt = args.prompt + print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') + + if not seed: + print('* Could not recover seed for image. Replacing with 42. This will not affect image quality') + seed = 42 # try to reuse the same filename prefix as the original file. # we take everything up to the first period diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index 719d08c7c0..3c6eca08a2 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -63,7 +63,7 @@ class Generator(): **kwargs ) results = [] - seed = seed if seed is not None and seed > 0 else self.new_seed() + seed = seed if seed is not None else self.new_seed() first_seed = seed seed, initial_noise = self.generate_initial_noise(seed, width, height) diff --git a/ldm/invoke/prompt_parser.py b/ldm/invoke/prompt_parser.py index 42c83188aa..3dbcc1bb4b 100644 --- a/ldm/invoke/prompt_parser.py +++ b/ldm/invoke/prompt_parser.py @@ -636,7 +636,7 @@ def split_weighted_subprompts(text, skip_normalize=False)->list: weight_sum = sum(map(lambda x: x[1], parsed_prompts)) if weight_sum == 0: print( - "* Warning: Subprompt weights add up to zero. Discarding and using even weights instead.") + "Warning: Subprompt weights add up to zero. Discarding and using even weights instead.") equal_weight = 1 / max(len(parsed_prompts), 1) return [(x[0], equal_weight) for x in parsed_prompts] return [(x[0], x[1] / weight_sum) for x in parsed_prompts] diff --git a/ldm/invoke/restoration/outcrop.py b/ldm/invoke/restoration/outcrop.py index 1a0aaf2c8f..b5d42250c5 100644 --- a/ldm/invoke/restoration/outcrop.py +++ b/ldm/invoke/restoration/outcrop.py @@ -28,12 +28,11 @@ class Outcrop(object): self.generate._set_sampler() def wrapped_callback(img,seed,**kwargs): - preferred_seed = orig_opt.seed if orig_opt.seed> 0 else seed - image_callback(img,preferred_seed,use_prefix=prefix,**kwargs) + image_callback(img,orig_opt.seed,use_prefix=prefix,**kwargs) result= self.generate.prompt2image( orig_opt.prompt, - seed = orig_opt.seed if orig_opt.seed>0 else opt.seed, + seed = orig_opt.seed, # uncomment to make it deterministic sampler = self.generate.sampler, steps = opt.steps, cfg_scale = opt.cfg_scale, diff --git a/scripts/invoke.py b/scripts/invoke.py index 25917f7030..cca49bcdc9 100755 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -29,7 +29,6 @@ infile = None def main(): """Initialize command-line parsers and the diffusion model""" global infile - print('* Initializing, be patient...') opt = Args() args = opt.parse_args() @@ -47,6 +46,7 @@ def main(): print('--max_loaded_models must be >= 1; using 1') args.max_loaded_models = 1 + print('* Initializing, be patient...') from ldm.generate import Generate # these two lines prevent a horrible warning message from appearing @@ -662,10 +662,7 @@ def do_postprocess (gen, opt, callback): def add_postprocessing_to_metadata(opt,original_file,new_file,tool,command): original_file = original_file if os.path.exists(original_file) else os.path.join(opt.outdir,original_file) new_file = new_file if os.path.exists(new_file) else os.path.join(opt.outdir,new_file) - try: - meta = retrieve_metadata(original_file)['sd-metadata'] - except AttributeError: - meta = retrieve_metadata(new_file)['sd-metadata'] + meta = retrieve_metadata(original_file)['sd-metadata'] if 'image' not in meta: meta = metadata_dumps(opt,seeds=[opt.seed])['image'] meta['image'] = {}