Merge branch 'main' into 2.3-documentation-fixes

2024-08-30 20:32:17 +00:00 · 2023-02-07 17:08:10 -05:00
parent b92dc8db83 f0f3520bca
commit 13d12a0ceb
290 changed files with 10991 additions and 3790 deletions
--- a/ldm/invoke/CLI.py
+++ b/ldm/invoke/CLI.py
@ -712,10 +712,12 @@ def _get_model_name_and_desc(model_manager,completer,model_name:str='',model_des
 def optimize_model(model_name_or_path:str, gen, opt, completer):
    manager = gen.model_manager
    ckpt_path = None
+    original_config_file = None

    if (model_info := manager.model_info(model_name_or_path)):
        if 'weights' in model_info:
            ckpt_path = Path(model_info['weights'])
+            original_config_file = Path(model_info['config'])
            model_name = model_name_or_path
            model_description = model_info['description']
        else:
@ -723,12 +725,18 @@ def optimize_model(model_name_or_path:str, gen, opt, completer):
            return
    elif os.path.exists(model_name_or_path):
        ckpt_path = Path(model_name_or_path)
-        model_name,model_description = _get_model_name_and_desc(
+        model_name, model_description = _get_model_name_and_desc(
            manager,
            completer,
            ckpt_path.stem,
            f'Converted model {ckpt_path.stem}'
        )
+        is_inpainting = input('Is this an inpainting model? [n] ').startswith(('y','Y'))
+        original_config_file = Path(
+            'configs',
+            'stable-diffusion',
+            'v1-inpainting-inference.yaml' if is_inpainting else 'v1-inference.yaml'
+        )
    else:
        print(f'** {model_name_or_path} is neither an existing model nor the path to a .ckpt file')
        return
@ -736,6 +744,9 @@ def optimize_model(model_name_or_path:str, gen, opt, completer):
    if not ckpt_path.is_absolute():
        ckpt_path = Path(Globals.root,ckpt_path)

+    if original_config_file and not original_config_file.is_absolute():
+        original_config_file = Path(Globals.root,original_config_file)
+
    diffuser_path = Path(Globals.root, 'models',Globals.converted_ckpts_dir,model_name)
    if diffuser_path.exists():
        print(f'** {model_name_or_path} is already optimized. Will not overwrite. If this is an error, please remove the directory {diffuser_path} and try again.')
@ -751,6 +762,7 @@ def optimize_model(model_name_or_path:str, gen, opt, completer):
        model_name=model_name,
        model_description=model_description,
        vae = vae,
+        original_config_file = original_config_file,
        commit_to_conf=opt.conf,
    )
    if not new_config:
--- a/ldm/invoke/_version.py
+++ b/ldm/invoke/_version.py
@ -1 +1 @@
-__version__='2.3.0-rc4'
+__version__='2.3.0-rc5'
--- a/ldm/invoke/ckpt_to_diffuser.py
+++ b/ldm/invoke/ckpt_to_diffuser.py
@ -22,7 +22,11 @@ import re
 import torch
 import warnings
 from pathlib import Path
-from ldm.invoke.globals import Globals, global_cache_dir
+from ldm.invoke.globals import (
+    Globals,
+    global_cache_dir,
+    global_config_dir,
+    )
 from safetensors.torch import load_file
 from typing import Union

@ -826,7 +830,7 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
    :param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when
    running stable diffusion 2.1.
    '''
-    
+
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        verbosity = dlogging.get_verbosity()
@ -852,13 +856,16 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
            key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"

            if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024:
-                original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v2-inference-v.yaml')
+                original_config_file = global_config_dir() / 'stable-diffusion' / 'v2-inference-v.yaml'

                if global_step == 110000:
                    # v2.1 needs to upcast attention
                    upcast_attention = True
+            elif str(checkpoint_path).lower().find('inpaint') >= 0: # brittle - please pass original_config_file parameter!
+                print(f'  | checkpoint has "inpaint" in name, assuming an inpainting model')
+                original_config_file = global_config_dir() / 'stable-diffusion' / 'v1-inpainting-inference.yaml'
            else:
-                original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v1-inference.yaml')
+                original_config_file = global_config_dir() / 'stable-diffusion' / 'v1-inference.yaml'

        original_config = OmegaConf.load(original_config_file)

--- a/ldm/invoke/generator/base.py
+++ b/ldm/invoke/generator/base.py
@ -122,6 +122,11 @@ class Generator:

                seed = self.new_seed()

+                # Free up memory from the last generation.
+                clear_cuda_cache = kwargs['clear_cuda_cache'] or None
+                if clear_cuda_cache is not None:
+                    clear_cuda_cache()
+
        return results

    def sample_to_image(self,samples)->Image.Image:
@ -240,7 +245,12 @@ class Generator:

    def get_perlin_noise(self,width,height):
        fixdevice = 'cpu' if (self.model.device.type == 'mps') else self.model.device
-        noise = torch.stack([rand_perlin_2d((height, width), (8, 8), device = self.model.device).to(fixdevice) for _ in range(self.latent_channels)], dim=0).to(self.model.device)
+        # limit noise to only the diffusion image channels, not the mask channels
+        input_channels = min(self.latent_channels, 4)
+        noise = torch.stack([
+            rand_perlin_2d((height, width),
+                           (8, 8),
+                           device = self.model.device).to(fixdevice) for _ in range(input_channels)], dim=0).to(self.model.device)
        return noise

    def new_seed(self):
@ -341,3 +351,27 @@ class Generator:

    def torch_dtype(self)->torch.dtype:
        return torch.float16 if self.precision == 'float16' else torch.float32
+
+    # returns a tensor filled with random numbers from a normal distribution
+    def get_noise(self,width,height):
+        device         = self.model.device
+        # limit noise to only the diffusion image channels, not the mask channels
+        input_channels = min(self.latent_channels, 4)
+        if self.use_mps_noise or device.type == 'mps':
+            x = torch.randn([1,
+                             input_channels,
+                             height // self.downsampling_factor,
+                             width  // self.downsampling_factor],
+                            dtype=self.torch_dtype(),
+                            device='cpu').to(device)
+        else:
+            x = torch.randn([1,
+                             input_channels,
+                             height // self.downsampling_factor,
+                             width  // self.downsampling_factor],
+                            dtype=self.torch_dtype(),
+                            device=device)
+        if self.perlin > 0.0:
+            perlin_noise = self.get_perlin_noise(width  // self.downsampling_factor, height // self.downsampling_factor)
+            x = (1-self.perlin)*x + self.perlin*perlin_noise
+        return x
--- a/ldm/invoke/generator/diffusers_pipeline.py
+++ b/ldm/invoke/generator/diffusers_pipeline.py
@ -317,7 +317,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                # fix is in https://github.com/kulinseth/pytorch/pull/222 but no idea when it will get merged to pytorch mainline.
                pass
            else:
-                self.enable_attention_slicing(slice_size='auto')
+                self.enable_attention_slicing(slice_size='max')

    def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int,
                              conditioning_data: ConditioningData,
--- a/ldm/invoke/generator/img2img.py
+++ b/ldm/invoke/generator/img2img.py
@ -63,22 +63,3 @@ class Img2Img(Generator):
            shape = like.shape
            x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(shape[3], shape[2])
        return x
-
-    def get_noise(self,width,height):
-        # copy of the Txt2Img.get_noise
-        device         = self.model.device
-        if self.use_mps_noise or device.type == 'mps':
-            x = torch.randn([1,
-                                self.latent_channels,
-                                height // self.downsampling_factor,
-                                width  // self.downsampling_factor],
-                               device='cpu').to(device)
-        else:
-            x = torch.randn([1,
-                                self.latent_channels,
-                                height // self.downsampling_factor,
-                                width  // self.downsampling_factor],
-                               device=device)
-        if self.perlin > 0.0:
-            x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(width  // self.downsampling_factor, height // self.downsampling_factor)
-        return x
--- a/ldm/invoke/generator/txt2img.py
+++ b/ldm/invoke/generator/txt2img.py
@ -51,26 +51,4 @@ class Txt2Img(Generator):
        return make_image


-    # returns a tensor filled with random numbers from a normal distribution
-    def get_noise(self,width,height):
-        device         = self.model.device
-        # limit noise to only the diffusion image channels, not the mask channels
-        input_channels = min(self.latent_channels, 4)
-        if self.use_mps_noise or device.type == 'mps':
-            x = torch.randn([1,
-                             input_channels,
-                             height // self.downsampling_factor,
-                             width  // self.downsampling_factor],
-                            dtype=self.torch_dtype(),
-                            device='cpu').to(device)
-        else:
-            x = torch.randn([1,
-                             input_channels,
-                             height // self.downsampling_factor,
-                             width  // self.downsampling_factor],
-                            dtype=self.torch_dtype(),
-                            device=device)
-        if self.perlin > 0.0:
-            x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(width  // self.downsampling_factor, height // self.downsampling_factor)
-        return x

--- a/ldm/invoke/generator/txt2img2img.py
+++ b/ldm/invoke/generator/txt2img2img.py
@ -65,6 +65,11 @@ class Txt2Img2Img(Generator):
                mode="bilinear"
            )

+            # Free up memory from the last generation.
+            clear_cuda_cache = kwargs['clear_cuda_cache'] or None
+            if clear_cuda_cache is not None:
+                clear_cuda_cache()
+
            second_pass_noise = self.get_noise_like(resized_latents)

            verbosity = get_verbosity()
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py