Load legacy ckpt files as diffusers models (#2468)

* refactor ckpt_to_diffuser to allow converted pipeline to remain in memory - This idea was introduced by Damian - Note that although I attempted to use the updated HuggingFace module pipelines/stable_diffusion/convert_from_ckpt.py, it was unable to convert safetensors files for reasons I didn't dig into. - Default is to extract EMA weights. * add --ckpt_convert option to load legacy ckpt files as diffusers models - not quite working - I'm getting artifacts and glitches in the converted diffuser models - leave as draft for time being * do not include safety checker in converted files * add ability to control which vae is used API now allows the caller to pass an external VAE model to the checkpoint conversion process. In this way, if an external VAE is specified in the checkpoint's config stanza, this VAE will be used when constructing the diffusers model. Tested with both regular and inpainting 1.X models. Not tested with SD 2.X models! --------- Co-authored-by: Jonathan <34005131+JPPhoto@users.noreply.github.com> Co-authored-by: Damian Stewart <null@damianstewart.com>
2024-08-30 20:32:17 +00:00 · 2023-02-02 15:15:44 -05:00
parent 1fe5ec32f5
commit fc3378bb74
5 changed files with 158 additions and 60 deletions
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py
@ -150,6 +150,10 @@ class ModelManager(object):
        '''
        Return true if this is a legacy (.ckpt) model
        '''
+        # if we are converting legacy files automatically, then
+        # there are no legacy ckpts!
+        if Globals.ckpt_convert:
+            return False
        info = self.model_info(model_name)
        if 'weights' in info and info['weights'].endswith(('.ckpt','.safetensors')):
            return True
@ -340,6 +344,26 @@ class ModelManager(object):
            config = os.path.join(Globals.root,config)
        if not os.path.isabs(weights):
            weights = os.path.normpath(os.path.join(Globals.root,weights))
+
+        # if converting automatically to diffusers, then we do the conversion and return
+        # a diffusers pipeline
+        if Globals.ckpt_convert:
+            print(f'>> Converting legacy checkpoint {model_name} into a diffusers model...')
+            from ldm.invoke.ckpt_to_diffuser import load_pipeline_from_original_stable_diffusion_ckpt
+            if vae_config := self._choose_diffusers_vae(model_name):
+                vae = self._load_vae(vae_config)
+            pipeline = load_pipeline_from_original_stable_diffusion_ckpt(
+                checkpoint_path = weights,
+                original_config_file = config,
+                vae = vae,
+            )
+            return (
+                pipeline.to(self.device).to(torch.float16 if self.precision == 'float16' else torch.float32),
+                width,
+                height,
+                'NOHASH'
+                )
+
        # scan model
        self.scan_model(model_name, weights)

@ -484,7 +508,7 @@ class ModelManager(object):
        return pipeline, width, height, model_hash

    def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path:
-        if isinstance(model_name,DictConfig):
+        if isinstance(model_name,DictConfig) or isinstance(model_name,dict):
            mconfig = model_name
        elif model_name in self.config:
            mconfig = self.config[model_name]
@ -664,6 +688,7 @@ class ModelManager(object):
                           diffusers_path:Path,
                           model_name=None,
                           model_description=None,
+                           vae= None,
                           commit_to_conf:Path=None,
    )->dict:
        '''
@ -681,39 +706,24 @@ class ModelManager(object):
        model_description = model_description or 'Optimized version of {model_name}'
        print(f'>> Optimizing {model_name} (30-60s)')
        try:
-            verbosity =transformers.logging.get_verbosity()
-            transformers.logging.set_verbosity_error()
-            convert_ckpt_to_diffuser(ckpt_path, diffusers_path,extract_ema=True)
-            transformers.logging.set_verbosity(verbosity)
-            print(f'>> Success. Optimized model is now located at {str(diffusers_path)}')
-            print(f'>> Writing new config file entry for {model_name}')
+            # By passing the specified VAE too the conversion function, the autoencoder
+            # will be built into the model rather than tacked on afterward via the config file
+            vae_model = self._load_vae(vae) if vae else None
+            convert_ckpt_to_diffuser(
+                ckpt_path,
+                diffusers_path,
+                extract_ema = True,
+                vae = vae_model,
+            )
+            print(f'  | Success. Optimized model is now located at {str(diffusers_path)}')
+            print(f'  | Writing new config file entry for {model_name}')
            new_config = dict(
                path=str(diffusers_path),
                description=model_description,
                format='diffusers',
            )
-
-            # HACK (LS): in the event that the original entry is using a custom ckpt VAE, we try to
-            # map that VAE onto a diffuser VAE using a hard-coded dictionary.
-            # I would prefer to do this differently: We load the ckpt model into memory, swap the
-            # VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
-            # VAE is built into the model. However, when I tried this I got obscure key errors.
-            if model_name in self.config and (vae_ckpt_path := self.model_info(model_name)['vae']):
-                vae_basename = Path(vae_ckpt_path).stem
-                diffusers_vae = None
-                if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
-                    print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
-                    new_config.update(
-                        vae = {'repo_id': diffusers_vae}
-                    )
-                else:
-                    print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
-                    print(f'** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
-                    new_config.update(
-                        vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
-                    )
-
-            self.del_model(model_name)
+            if model_name in self.config:
+                self.del_model(model_name)
            self.add_model(model_name, new_config, True)
            if commit_to_conf:
                self.commit(commit_to_conf)
@ -742,6 +752,27 @@ class ModelManager(object):

        return search_folder, found_models

+    def _choose_diffusers_vae(self, model_name:str, vae:str=None)->Union[dict,str]:
+        
+        # In the event that the original entry is using a custom ckpt VAE, we try to
+        # map that VAE onto a diffuser VAE using a hard-coded dictionary.
+        # I would prefer to do this differently: We load the ckpt model into memory, swap the
+        # VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
+        # VAE is built into the model. However, when I tried this I got obscure key errors.
+        if vae:
+            return vae
+        if model_name in self.config and (vae_ckpt_path := self.model_info(model_name).get('vae',None)):
+            vae_basename = Path(vae_ckpt_path).stem
+            diffusers_vae = None
+            if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
+                print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
+                vae = {'repo_id': diffusers_vae}
+            else:
+                print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
+                print('** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
+                vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
+        return vae
+
    def _make_cache_room(self) -> None:
        num_loaded_models = len(self.models)
        if num_loaded_models >= self.max_loaded_models:
@ -976,7 +1007,7 @@ class ModelManager(object):
            f.write(hash)
        return hash

-    def _load_vae(self, vae_config):
+    def _load_vae(self, vae_config)->AutoencoderKL:
        vae_args = {}
        name_or_path = self.model_name_or_path(vae_config)
        using_fp16 = self.precision == 'float16'