diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py
index 3a23f890e8..b433e063d1 100644
--- a/ldm/invoke/CLI.py
+++ b/ldm/invoke/CLI.py
@@ -44,11 +44,13 @@ def main():
             print('--max_loaded_models must be >= 1; using 1')
             args.max_loaded_models = 1
 
-    # alert - setting a global here
+    # alert - setting a few globals here
     Globals.try_patchmatch = args.patchmatch
     Globals.always_use_cpu = args.always_use_cpu
     Globals.internet_available = args.internet_available and check_internet()
     Globals.disable_xformers = not args.xformers
+    Globals.ckpt_convert = args.ckpt_convert
+    
     print(f'>> Internet connectivity is {Globals.internet_available}')
 
     if not args.conf:
@@ -717,11 +719,16 @@ def optimize_model(model_name_or_path:str, gen, opt, completer):
         print(f'** {model_name_or_path} is already optimized. Will not overwrite. If this is an error, please remove the directory {diffuser_path} and try again.')
         return
 
+    vae = None
+    if input('Replace this model\'s VAE with "stabilityai/sd-vae-ft-mse"? [n] ').strip() in ('y','Y'):
+        vae = dict(repo_id='stabilityai/sd-vae-ft-mse')
+
     new_config = gen.model_manager.convert_and_import(
         ckpt_path,
         diffuser_path,
         model_name=model_name,
         model_description=model_description,
+        vae = vae,
         commit_to_conf=opt.conf,
     )
     if not new_config:
diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py
index c918e4fba7..3904d2f573 100644
--- a/ldm/invoke/args.py
+++ b/ldm/invoke/args.py
@@ -503,6 +503,13 @@ class Args(object):
             help=f'Set model precision. Defaults to auto selected based on device. Options: {", ".join(PRECISION_CHOICES)}',
             default='auto',
         )
+        model_group.add_argument(
+            '--ckpt_convert',
+            action=argparse.BooleanOptionalAction,
+            dest='ckpt_convert',
+            default=False,
+            help='Load legacy ckpt files as diffusers. Pass --no-ckpt-convert to inhibit this behavior',
+        )
         model_group.add_argument(
             '--internet',
             action=argparse.BooleanOptionalAction,
diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py
index 9b1735f831..fe56051aa3 100644
--- a/ldm/invoke/ckpt_to_diffuser.py
+++ b/ldm/invoke/ckpt_to_diffuser.py
@@ -23,6 +23,7 @@ import torch
 from pathlib import Path
 from ldm.invoke.globals import Globals, global_cache_dir
 from safetensors.torch import load_file
+from typing import Union
 
 try:
     from omegaconf import OmegaConf
@@ -46,9 +47,11 @@ from diffusers import (
 )
 from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
 from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline
-from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
+from diffusers.utils import is_safetensors_available
 from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextModel, CLIPTokenizer, CLIPVisionConfig
 
+from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline
+
 def shave_segments(path, n_shave_prefix_segments=1):
     """
     Removes segments. Positive values shave the first segments, negative shave the last segments.
@@ -318,11 +321,10 @@ def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False
     unet_key = "model.diffusion_model."
     # at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
     if sum(k.startswith("model_ema") for k in keys) > 100:
-        print(f"Checkpoint {path} has both EMA and non-EMA weights.")
+        print(f"  | Checkpoint {path} has both EMA and non-EMA weights.")
         if extract_ema:
             print(
-                "In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
-                " weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
+                '  | Extracting EMA weights (usually better for inference)'
             )
             for key in keys:
                 if key.startswith("model.diffusion_model"):
@@ -330,8 +332,7 @@ def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False
                     unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key)
         else:
             print(
-                "In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
-                " weights (usually better for inference), please make sure to add the `--extract_ema` flag."
+                '  | Extracting only the non-EMA weights (usually better for fine-tuning)'
             )
 
     for key in keys:
@@ -784,17 +785,44 @@ def convert_open_clip_checkpoint(checkpoint):
 
     return text_model
 
-def convert_ckpt_to_diffuser(checkpoint_path:str,
-                             dump_path:str,
-                             original_config_file:str=None,
-                             num_in_channels:int=None,
-                             scheduler_type:str='pndm',
-                             pipeline_type:str=None,
-                             image_size:int=None,
-                             prediction_type:str=None,
-                             extract_ema:bool=False,
-                             upcast_attn:bool=False,
-                             ):
+def load_pipeline_from_original_stable_diffusion_ckpt(
+        checkpoint_path:str,
+        original_config_file:str=None,
+        num_in_channels:int=None,
+        scheduler_type:str='pndm',
+        pipeline_type:str=None,
+        image_size:int=None,
+        prediction_type:str=None,
+        extract_ema:bool=True,
+        upcast_attn:bool=False,
+        vae:AutoencoderKL=None
+)->StableDiffusionGeneratorPipeline:
+    '''
+    Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
+    config file.
+
+    Although many of the arguments can be automatically inferred, some of these rely on brittle checks against the
+    global step count, which will likely fail for models that have undergone further fine-tuning. Therefore, it is
+    recommended that you override the default values and/or supply an `original_config_file` wherever possible.
+
+    :param checkpoint_path: Path to `.ckpt` file. 
+    :param original_config_file: Path to `.yaml` config file corresponding to the original architecture. 
+      If `None`, will be automatically inferred by looking for a key that only exists in SD2.0 models.
+    :param image_size: The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Diffusion v2
+      Base. Use 768 for Stable Diffusion v2.
+    :param prediction_type: The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion
+     v1.X and Stable Diffusion v2 Base. Use `'v-prediction'` for Stable Diffusion v2.
+    :param num_in_channels: The number of input channels. If `None` number of input channels will be automatically
+    inferred. 
+    :param scheduler_type: Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler",
+     "euler-ancestral", "dpm", "ddim"]`. :param model_type: The pipeline type. `None` to automatically infer, or one of
+     `["FrozenOpenCLIPEmbedder", "FrozenCLIPEmbedder", "PaintByExample"]`. :param extract_ema: Only relevant for
+     checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights
+     or not. Defaults to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher
+     quality images for inference. Non-EMA weights are usually better to continue fine-tuning.
+    :param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when
+    running stable diffusion 2.1.
+    '''
 
     checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path)
     cache_dir = global_cache_dir('hub')
@@ -803,7 +831,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
     if "global_step" in checkpoint:
         global_step = checkpoint["global_step"]
     else:
-        print("global_step key not found in model")
+        print("  | global_step key not found in model")
         global_step = None
 
     # sometimes there is a state_dict key and sometimes not
@@ -893,12 +921,16 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
 
     unet.load_state_dict(converted_unet_checkpoint)
 
-    # Convert the VAE model.
-    vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
-    converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
+    # Convert the VAE model, or use the one passed
+    if not vae:
+        print(f'  | Using checkpoint model\'s original VAE')
+        vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
+        converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
 
-    vae = AutoencoderKL(**vae_config)
-    vae.load_state_dict(converted_vae_checkpoint)
+        vae = AutoencoderKL(**vae_config)
+        vae.load_state_dict(converted_vae_checkpoint)
+    else:
+        print(f'  | Using external VAE specified in config')
 
     # Convert the text model.
     model_type = pipeline_type
@@ -907,8 +939,11 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
 
     if model_type == "FrozenOpenCLIPEmbedder":
         text_model = convert_open_clip_checkpoint(checkpoint)
-        tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer",cache_dir=global_cache_dir('diffusers'))
-        pipe = StableDiffusionPipeline(
+        tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2",
+                                                  subfolder="tokenizer",
+                                                  cache_dir=global_cache_dir('diffusers')
+                                                  )
+        pipe = StableDiffusionGeneratorPipeline(
             vae=vae,
             text_encoder=text_model,
             tokenizer=tokenizer,
@@ -933,15 +968,14 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
     elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']:
         text_model = convert_ldm_clip_checkpoint(checkpoint)
         tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
-        safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
         feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
-        pipe = StableDiffusionPipeline(
+        pipe = StableDiffusionGeneratorPipeline(
             vae=vae,
             text_encoder=text_model,
             tokenizer=tokenizer,
             unet=unet,
             scheduler=scheduler,
-            safety_checker=safety_checker,
+            safety_checker=None,
             feature_extractor=feature_extractor,
         )
     else:
@@ -950,7 +984,23 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
         tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir)
         pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
 
+    return pipe
+
+def convert_ckpt_to_diffuser(
+        checkpoint_path:Union[str,Path],
+        dump_path:Union[str,Path],
+        **kwargs,
+):
+    '''
+    Takes all the arguments of load_pipeline_from_original_stable_diffusion_ckpt(),
+    and in addition a path-like object indicating the location of the desired diffusers
+    model to be written.
+    '''
+    pipe = load_pipeline_from_original_stable_diffusion_ckpt(
+        checkpoint_path,
+        **kwargs
+    )
     pipe.save_pretrained(
         dump_path,
-        safe_serialization=1,                 
+        safe_serialization=is_safetensors_available(),
     )
diff --git a/ldm/invoke/globals.py b/ldm/invoke/globals.py
index 4bc1d1ccf0..6bfa0ecd9d 100644
--- a/ldm/invoke/globals.py
+++ b/ldm/invoke/globals.py
@@ -51,6 +51,9 @@ Globals.disable_xformers = False
 # whether we are forcing full precision
 Globals.full_precision = False
 
+# whether we should convert ckpt files into diffusers models on the fly
+Globals.ckpt_convert = False
+
 def global_config_file()->Path:
     return Path(Globals.root, Globals.config_dir, Globals.models_file)
 
diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py
index dbc690ec54..ea8bc8d83c 100644
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py
@@ -150,6 +150,10 @@ class ModelManager(object):
         '''
         Return true if this is a legacy (.ckpt) model
         '''
+        # if we are converting legacy files automatically, then
+        # there are no legacy ckpts!
+        if Globals.ckpt_convert:
+            return False
         info = self.model_info(model_name)
         if 'weights' in info and info['weights'].endswith(('.ckpt','.safetensors')):
             return True
@@ -340,6 +344,26 @@ class ModelManager(object):
             config = os.path.join(Globals.root,config)
         if not os.path.isabs(weights):
             weights = os.path.normpath(os.path.join(Globals.root,weights))
+
+        # if converting automatically to diffusers, then we do the conversion and return
+        # a diffusers pipeline
+        if Globals.ckpt_convert:
+            print(f'>> Converting legacy checkpoint {model_name} into a diffusers model...')
+            from ldm.invoke.ckpt_to_diffuser import load_pipeline_from_original_stable_diffusion_ckpt
+            if vae_config := self._choose_diffusers_vae(model_name):
+                vae = self._load_vae(vae_config)
+            pipeline = load_pipeline_from_original_stable_diffusion_ckpt(
+                checkpoint_path = weights,
+                original_config_file = config,
+                vae = vae,
+            )
+            return (
+                pipeline.to(self.device).to(torch.float16 if self.precision == 'float16' else torch.float32),
+                width,
+                height,
+                'NOHASH'
+                )
+
         # scan model
         self.scan_model(model_name, weights)
 
@@ -484,7 +508,7 @@ class ModelManager(object):
         return pipeline, width, height, model_hash
 
     def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path:
-        if isinstance(model_name,DictConfig):
+        if isinstance(model_name,DictConfig) or isinstance(model_name,dict):
             mconfig = model_name
         elif model_name in self.config:
             mconfig = self.config[model_name]
@@ -664,6 +688,7 @@ class ModelManager(object):
                            diffusers_path:Path,
                            model_name=None,
                            model_description=None,
+                           vae= None,
                            commit_to_conf:Path=None,
     )->dict:
         '''
@@ -681,39 +706,24 @@ class ModelManager(object):
         model_description = model_description or 'Optimized version of {model_name}'
         print(f'>> Optimizing {model_name} (30-60s)')
         try:
-            verbosity =transformers.logging.get_verbosity()
-            transformers.logging.set_verbosity_error()
-            convert_ckpt_to_diffuser(ckpt_path, diffusers_path,extract_ema=True)
-            transformers.logging.set_verbosity(verbosity)
-            print(f'>> Success. Optimized model is now located at {str(diffusers_path)}')
-            print(f'>> Writing new config file entry for {model_name}')
+            # By passing the specified VAE too the conversion function, the autoencoder
+            # will be built into the model rather than tacked on afterward via the config file
+            vae_model = self._load_vae(vae) if vae else None
+            convert_ckpt_to_diffuser(
+                ckpt_path,
+                diffusers_path,
+                extract_ema = True,
+                vae = vae_model,
+            )
+            print(f'  | Success. Optimized model is now located at {str(diffusers_path)}')
+            print(f'  | Writing new config file entry for {model_name}')
             new_config = dict(
                 path=str(diffusers_path),
                 description=model_description,
                 format='diffusers',
             )
-
-            # HACK (LS): in the event that the original entry is using a custom ckpt VAE, we try to
-            # map that VAE onto a diffuser VAE using a hard-coded dictionary.
-            # I would prefer to do this differently: We load the ckpt model into memory, swap the
-            # VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
-            # VAE is built into the model. However, when I tried this I got obscure key errors.
-            if model_name in self.config and (vae_ckpt_path := self.model_info(model_name)['vae']):
-                vae_basename = Path(vae_ckpt_path).stem
-                diffusers_vae = None
-                if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
-                    print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
-                    new_config.update(
-                        vae = {'repo_id': diffusers_vae}
-                    )
-                else:
-                    print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
-                    print(f'** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
-                    new_config.update(
-                        vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
-                    )
-
-            self.del_model(model_name)
+            if model_name in self.config:
+                self.del_model(model_name)
             self.add_model(model_name, new_config, True)
             if commit_to_conf:
                 self.commit(commit_to_conf)
@@ -742,6 +752,27 @@ class ModelManager(object):
 
         return search_folder, found_models
 
+    def _choose_diffusers_vae(self, model_name:str, vae:str=None)->Union[dict,str]:
+        
+        # In the event that the original entry is using a custom ckpt VAE, we try to
+        # map that VAE onto a diffuser VAE using a hard-coded dictionary.
+        # I would prefer to do this differently: We load the ckpt model into memory, swap the
+        # VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
+        # VAE is built into the model. However, when I tried this I got obscure key errors.
+        if vae:
+            return vae
+        if model_name in self.config and (vae_ckpt_path := self.model_info(model_name).get('vae',None)):
+            vae_basename = Path(vae_ckpt_path).stem
+            diffusers_vae = None
+            if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
+                print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
+                vae = {'repo_id': diffusers_vae}
+            else:
+                print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
+                print('** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
+                vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
+        return vae
+
     def _make_cache_room(self) -> None:
         num_loaded_models = len(self.models)
         if num_loaded_models >= self.max_loaded_models:
@@ -976,7 +1007,7 @@ class ModelManager(object):
             f.write(hash)
         return hash
 
-    def _load_vae(self, vae_config):
+    def _load_vae(self, vae_config)->AutoencoderKL:
         vae_args = {}
         name_or_path = self.model_name_or_path(vae_config)
         using_fp16 = self.precision == 'float16'