all files migrated; tweaks needed

2024-08-30 20:32:17 +00:00 · 2023-03-03 00:02:15 -05:00
parent 3f0b0f3250
commit 6a990565ff
496 changed files with 276 additions and 934 deletions
--- a/invokeai/backend/init.py
+++ b/invokeai/backend/init.py
@ -1,8 +1,8 @@
 '''
 Initialization file for invokeai.backend
 '''
-# this is causing circular import issues
-# from .invoke_ai_web_server import InvokeAIWebServer
-from .model_manager import ModelManager
+from .model_management import ModelManager
+from .generate import Generate
+


--- a/invokeai/backend/args.py
+++ b/invokeai/backend/args.py
--- a/invokeai/backend/modules/init.py
+++ b/invokeai/backend/modules/init.py
--- a/invokeai/backend/config/invokeai_configure.py
+++ b/invokeai/backend/config/invokeai_configure.py
@ -0,0 +1,860 @@
+#!/usr/bin/env python
+# Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
+# Before running stable-diffusion on an internet-isolated machine,
+# run this script from one with internet connectivity. The
+# two machines must share a common .cache directory.
+#
+# Coauthor: Kevin Turner http://github.com/keturn
+#
+print("Loading Python libraries...\n")
+import argparse
+import io
+import os
+import re
+import shutil
+import sys
+import traceback
+import warnings
+from argparse import Namespace
+from pathlib import Path
+from urllib import request
+from shutil import get_terminal_size
+
+import npyscreen
+import torch
+import transformers
+from diffusers import AutoencoderKL
+from huggingface_hub import HfFolder
+from huggingface_hub import login as hf_hub_login
+from omegaconf import OmegaConf
+from tqdm import tqdm
+from transformers import (
+    AutoProcessor,
+    CLIPSegForImageSegmentation,
+    CLIPTextModel,
+    CLIPTokenizer,
+)
+
+import invokeai.configs as configs
+
+from ..args import PRECISION_CHOICES, Args
+from ..globals import Globals, global_config_dir, global_config_file, global_cache_dir
+from ...frontend.config.model_install import addModelsForm, process_and_execute
+from .model_install_backend import (
+    default_dataset,
+    download_from_hf,
+    recommended_datasets,
+    hf_download_with_resume,
+)
+from ...frontend.config.widgets import IntTitleSlider, CenteredButtonPress, set_min_terminal_size
+
+
+warnings.filterwarnings("ignore")
+
+transformers.logging.set_verbosity_error()
+
+# --------------------------globals-----------------------
+Model_dir = "models"
+Weights_dir = "ldm/stable-diffusion-v1/"
+
+# the initial "configs" dir is now bundled in the `invokeai.configs` package
+Dataset_path = Path(configs.__path__[0]) / "INITIAL_MODELS.yaml"
+
+Default_config_file = Path(global_config_dir()) / "models.yaml"
+SD_Configs = Path(global_config_dir()) / "stable-diffusion"
+
+Datasets = OmegaConf.load(Dataset_path)
+
+# minimum size for the UI
+MIN_COLS = 135
+MIN_LINES = 45
+
+INIT_FILE_PREAMBLE = """# InvokeAI initialization file
+# This is the InvokeAI initialization file, which contains command-line default values.
+# Feel free to edit. If anything goes wrong, you can re-initialize this file by deleting
+# or renaming it and then running invokeai-configure again.
+# Place  frequently-used startup commands here, one or more per line.
+# Examples:
+# --outdir=D:\data\images
+# --no-nsfw_checker
+# --web --host=0.0.0.0
+# --steps=20
+# -Ak_euler_a -C10.0
+"""
+
+# --------------------------------------------
+def postscript(errors: None):
+    if not any(errors):
+        message = f"""
+** INVOKEAI INSTALLATION SUCCESSFUL **
+If you installed manually from source or with 'pip install': activate the virtual environment
+then run one of the following commands to start InvokeAI.
+
+Web UI:
+   invokeai --web # (connect to http://localhost:9090)
+   invokeai --web --host 0.0.0.0 # (connect to http://your-lan-ip:9090 from another computer on the local network)
+
+Command-line interface:
+   invokeai
+
+If you installed using an installation script, run:
+  {Globals.root}/invoke.{"bat" if sys.platform == "win32" else "sh"}
+
+Add the '--help' argument to see all of the command-line switches available for use.
+"""
+
+    else:
+        message = "\n** There were errors during installation. It is possible some of the models were not fully downloaded.\n"
+        for err in errors:
+            message += f"\t - {err}\n"
+        message += "Please check the logs above and correct any issues."
+
+    print(message)
+
+
+# ---------------------------------------------
+def yes_or_no(prompt: str, default_yes=True):
+    default = "y" if default_yes else "n"
+    response = input(f"{prompt} [{default}] ") or default
+    if default_yes:
+        return response[0] not in ("n", "N")
+    else:
+        return response[0] in ("y", "Y")
+
+
+# ---------------------------------------------
+def HfLogin(access_token) -> str:
+    """
+    Helper for logging in to Huggingface
+    The stdout capture is needed to hide the irrelevant "git credential helper" warning
+    """
+
+    capture = io.StringIO()
+    sys.stdout = capture
+    try:
+        hf_hub_login(token=access_token, add_to_git_credential=False)
+        sys.stdout = sys.__stdout__
+    except Exception as exc:
+        sys.stdout = sys.__stdout__
+        print(exc)
+        raise exc
+
+
+# -------------------------------------
+class ProgressBar:
+    def __init__(self, model_name="file"):
+        self.pbar = None
+        self.name = model_name
+
+    def __call__(self, block_num, block_size, total_size):
+        if not self.pbar:
+            self.pbar = tqdm(
+                desc=self.name,
+                initial=0,
+                unit="iB",
+                unit_scale=True,
+                unit_divisor=1000,
+                total=total_size,
+            )
+        self.pbar.update(block_size)
+
+
+# ---------------------------------------------
+def download_with_progress_bar(model_url: str, model_dest: str, label: str = "the"):
+    try:
+        print(f"Installing {label} model file {model_url}...", end="", file=sys.stderr)
+        if not os.path.exists(model_dest):
+            os.makedirs(os.path.dirname(model_dest), exist_ok=True)
+            request.urlretrieve(
+                model_url, model_dest, ProgressBar(os.path.basename(model_dest))
+            )
+            print("...downloaded successfully", file=sys.stderr)
+        else:
+            print("...exists", file=sys.stderr)
+    except Exception:
+        print("...download failed", file=sys.stderr)
+        print(f"Error downloading {label} model", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)
+
+
+# ---------------------------------------------
+# this will preload the Bert tokenizer fles
+def download_bert():
+    print(
+        "Installing bert tokenizer...",
+        file=sys.stderr
+    )
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=DeprecationWarning)
+        from transformers import BertTokenizerFast
+        download_from_hf(BertTokenizerFast, "bert-base-uncased")
+
+
+# ---------------------------------------------
+def download_sd1_clip():
+    print("Installing SD1 clip model...", file=sys.stderr)
+    version = "openai/clip-vit-large-patch14"
+    download_from_hf(CLIPTokenizer, version)
+    download_from_hf(CLIPTextModel, version)
+
+# ---------------------------------------------
+def download_sd2_clip():
+    version = 'stabilityai/stable-diffusion-2'
+    print("Installing SD2 clip model...", file=sys.stderr)
+    download_from_hf(CLIPTokenizer, version, subfolder='tokenizer')
+    download_from_hf(CLIPTextModel, version, subfolder='text_encoder')
+
+# ---------------------------------------------
+def download_realesrgan():
+    print("Installing models from RealESRGAN...", file=sys.stderr)
+    model_url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth"
+    wdn_model_url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth"
+
+    model_dest = os.path.join(
+        Globals.root, "models/realesrgan/realesr-general-x4v3.pth"
+    )
+
+    wdn_model_dest = os.path.join(
+        Globals.root, "models/realesrgan/realesr-general-wdn-x4v3.pth"
+    )
+
+    download_with_progress_bar(model_url, model_dest, "RealESRGAN")
+    download_with_progress_bar(wdn_model_url, wdn_model_dest, "RealESRGANwdn")
+
+
+def download_gfpgan():
+    print("Installing GFPGAN models...", file=sys.stderr)
+    for model in (
+        [
+            "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth",
+            "./models/gfpgan/GFPGANv1.4.pth",
+        ],
+        [
+            "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth",
+            "./models/gfpgan/weights/detection_Resnet50_Final.pth",
+        ],
+        [
+            "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth",
+            "./models/gfpgan/weights/parsing_parsenet.pth",
+        ],
+    ):
+        model_url, model_dest = model[0], os.path.join(Globals.root, model[1])
+        download_with_progress_bar(model_url, model_dest, "GFPGAN weights")
+
+
+# ---------------------------------------------
+def download_codeformer():
+    print("Installing CodeFormer model file...", file=sys.stderr)
+    model_url = (
+        "https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth"
+    )
+    model_dest = os.path.join(Globals.root, "models/codeformer/codeformer.pth")
+    download_with_progress_bar(model_url, model_dest, "CodeFormer")
+
+
+# ---------------------------------------------
+def download_clipseg():
+    print("Installing clipseg model for text-based masking...", file=sys.stderr)
+    CLIPSEG_MODEL = "CIDAS/clipseg-rd64-refined"
+    try:
+        download_from_hf(AutoProcessor, CLIPSEG_MODEL)
+        download_from_hf(CLIPSegForImageSegmentation, CLIPSEG_MODEL)
+    except Exception:
+        print("Error installing clipseg model:")
+        print(traceback.format_exc())
+
+
+# -------------------------------------
+def download_safety_checker():
+    print("Installing model for NSFW content detection...", file=sys.stderr)
+    try:
+        from diffusers.pipelines.stable_diffusion.safety_checker import (
+            StableDiffusionSafetyChecker,
+        )
+        from transformers import AutoFeatureExtractor
+    except ModuleNotFoundError:
+        print("Error installing NSFW checker model:")
+        print(traceback.format_exc())
+        return
+    safety_model_id = "CompVis/stable-diffusion-safety-checker"
+    print("AutoFeatureExtractor...", file=sys.stderr)
+    download_from_hf(AutoFeatureExtractor, safety_model_id)
+    print("StableDiffusionSafetyChecker...", file=sys.stderr)
+    download_from_hf(StableDiffusionSafetyChecker, safety_model_id)
+
+
+# -------------------------------------
+def download_vaes():
+    print("Installing stabilityai VAE...", file=sys.stderr)
+    try:
+        # first the diffusers version
+        repo_id = "stabilityai/sd-vae-ft-mse"
+        args = dict(
+            cache_dir=global_cache_dir("diffusers"),
+        )
+        if not AutoencoderKL.from_pretrained(repo_id, **args):
+            raise Exception(f"download of {repo_id} failed")
+
+        repo_id = "stabilityai/sd-vae-ft-mse-original"
+        model_name = "vae-ft-mse-840000-ema-pruned.ckpt"
+        # next the legacy checkpoint version
+        if not hf_download_with_resume(
+            repo_id=repo_id,
+            model_name=model_name,
+            model_dir=str(Globals.root / Model_dir / Weights_dir),
+        ):
+            raise Exception(f"download of {model_name} failed")
+    except Exception as e:
+        print(f"Error downloading StabilityAI standard VAE: {str(e)}", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)
+
+
+# -------------------------------------
+def get_root(root: str = None) -> str:
+    if root:
+        return root
+    elif os.environ.get("INVOKEAI_ROOT"):
+        return os.environ.get("INVOKEAI_ROOT")
+    else:
+        return Globals.root
+
+
+# -------------------------------------
+class editOptsForm(npyscreen.FormMultiPage):
+    # for responsive resizing - disabled
+    # FIX_MINIMUM_SIZE_WHEN_CREATED = False
+    
+    def create(self):
+        program_opts = self.parentApp.program_opts
+        old_opts = self.parentApp.invokeai_opts
+        first_time = not (Globals.root / Globals.initfile).exists()
+        access_token = HfFolder.get_token()
+        window_width,window_height = get_terminal_size()
+        for i in [
+            "Configure startup settings. You can come back and change these later.",
+            "Use ctrl-N and ctrl-P to move to the <N>ext and <P>revious fields.",
+            "Use cursor arrows to make a checkbox selection, and space to toggle.",
+        ]:
+            self.add_widget_intelligent(
+                npyscreen.FixedText,
+                value=i,
+                editable=False,
+                color="CONTROL",
+            )
+
+        self.nextrely += 1
+        self.add_widget_intelligent(
+            npyscreen.TitleFixedText,
+            name="== BASIC OPTIONS ==",
+            begin_entry_at=0,
+            editable=False,
+            color="CONTROL",
+            scroll_exit=True,
+        )
+        self.nextrely -= 1
+        self.add_widget_intelligent(
+            npyscreen.FixedText,
+            value="Select an output directory for images:",
+            editable=False,
+            color="CONTROL",
+        )
+        self.outdir = self.add_widget_intelligent(
+            npyscreen.TitleFilename,
+            name="(<tab> autocompletes, ctrl-N advances):",
+            value=old_opts.outdir or str(default_output_dir()),
+            select_dir=True,
+            must_exist=False,
+            use_two_lines=False,
+            labelColor="GOOD",
+            begin_entry_at=40,
+            scroll_exit=True,
+        )
+        self.nextrely += 1
+        self.add_widget_intelligent(
+            npyscreen.FixedText,
+            value="Activate the NSFW checker to blur images showing potential sexual imagery:",
+            editable=False,
+            color="CONTROL",
+        )
+        self.safety_checker = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="NSFW checker",
+            value=old_opts.safety_checker,
+            relx=5,
+            scroll_exit=True,
+        )
+        self.nextrely += 1
+        for i in [
+            "If you have an account at HuggingFace you may paste your access token here",
+            'to allow InvokeAI to download styles & subjects from the "Concept Library".',
+            "See https://huggingface.co/settings/tokens",
+        ]:
+            self.add_widget_intelligent(
+                npyscreen.FixedText,
+                value=i,
+                editable=False,
+                color="CONTROL",
+            )
+
+        self.hf_token = self.add_widget_intelligent(
+            npyscreen.TitlePassword,
+            name="Access Token (ctrl-shift-V pastes):",
+            value=access_token,
+            begin_entry_at=42,
+            use_two_lines=False,
+            scroll_exit=True,
+        )
+        self.nextrely += 1
+        self.add_widget_intelligent(
+            npyscreen.TitleFixedText,
+            name="== ADVANCED OPTIONS ==",
+            begin_entry_at=0,
+            editable=False,
+            color="CONTROL",
+            scroll_exit=True,
+        )
+        self.nextrely -= 1
+        self.add_widget_intelligent(
+            npyscreen.TitleFixedText,
+            name="GPU Management",
+            begin_entry_at=0,
+            editable=False,
+            color="CONTROL",
+            scroll_exit=True,
+        )
+        self.nextrely -= 1
+        self.free_gpu_mem = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Free GPU memory after each generation",
+            value=old_opts.free_gpu_mem,
+            relx=5,
+            scroll_exit=True,
+        )
+        self.xformers = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Enable xformers support if available",
+            value=old_opts.xformers,
+            relx=5,
+            scroll_exit=True,
+        )
+        self.ckpt_convert = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Load legacy checkpoint models into memory as diffusers models",
+            value=old_opts.ckpt_convert,
+            relx=5,
+            scroll_exit=True,
+        )
+        self.always_use_cpu = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="Force CPU to be used on GPU systems",
+            value=old_opts.always_use_cpu,
+            relx=5,
+            scroll_exit=True,
+        )
+        precision = old_opts.precision or (
+            "float32" if program_opts.full_precision else "auto"
+        )
+        self.precision = self.add_widget_intelligent(
+            npyscreen.TitleSelectOne,
+            name="Precision",
+            values=PRECISION_CHOICES,
+            value=PRECISION_CHOICES.index(precision),
+            begin_entry_at=3,
+            max_height=len(PRECISION_CHOICES) + 1,
+            scroll_exit=True,
+        )
+        self.max_loaded_models = self.add_widget_intelligent(
+            IntTitleSlider,
+            name="Number of models to cache in CPU memory (each will use 2-4 GB!)",
+            value=old_opts.max_loaded_models,
+            out_of=10,
+            lowest=1,
+            begin_entry_at=4,
+            scroll_exit=True,
+        )
+        self.nextrely += 1
+        self.add_widget_intelligent(
+            npyscreen.FixedText,
+            value="Directory containing embedding/textual inversion files:",
+            editable=False,
+            color="CONTROL",
+        )
+        self.embedding_path = self.add_widget_intelligent(
+            npyscreen.TitleFilename,
+            name="(<tab> autocompletes, ctrl-N advances):",
+            value=str(default_embedding_dir()),
+            select_dir=True,
+            must_exist=False,
+            use_two_lines=False,
+            labelColor="GOOD",
+            begin_entry_at=40,
+            scroll_exit=True,
+        )
+        self.nextrely += 1
+        self.add_widget_intelligent(
+            npyscreen.TitleFixedText,
+            name="== LICENSE ==",
+            begin_entry_at=0,
+            editable=False,
+            color="CONTROL",
+            scroll_exit=True,
+        )
+        self.nextrely -= 1
+        for i in [
+            "BY DOWNLOADING THE STABLE DIFFUSION WEIGHT FILES, YOU AGREE TO HAVE READ",
+            "AND ACCEPTED THE CREATIVEML RESPONSIBLE AI LICENSE LOCATED AT",
+            "https://huggingface.co/spaces/CompVis/stable-diffusion-license",
+        ]:
+            self.add_widget_intelligent(
+                npyscreen.FixedText,
+                value=i,
+                editable=False,
+                color="CONTROL",
+            )
+        self.license_acceptance = self.add_widget_intelligent(
+            npyscreen.Checkbox,
+            name="I accept the CreativeML Responsible AI License",
+            value=not first_time,
+            relx=2,
+            scroll_exit=True,
+        )
+        self.nextrely += 1
+        label = (
+            "DONE"
+            if program_opts.skip_sd_weights or program_opts.default_only
+            else "NEXT"
+        )
+        self.ok_button = self.add_widget_intelligent(
+            CenteredButtonPress,
+            name=label,
+            relx=(window_width - len(label)) // 2,
+            rely=-3,
+            when_pressed_function=self.on_ok,
+        )
+
+    def on_ok(self):
+        options = self.marshall_arguments()
+        if self.validate_field_values(options):
+            self.parentApp.new_opts = options
+            if hasattr(self.parentApp, "model_select"):
+                self.parentApp.setNextForm("MODELS")
+            else:
+                self.parentApp.setNextForm(None)
+            self.editing = False
+        else:
+            self.editing = True
+
+    def validate_field_values(self, opt: Namespace) -> bool:
+        bad_fields = []
+        if not opt.license_acceptance:
+            bad_fields.append(
+                "Please accept the license terms before proceeding to model downloads"
+            )
+        if not Path(opt.outdir).parent.exists():
+            bad_fields.append(
+                f"The output directory does not seem to be valid. Please check that {str(Path(opt.outdir).parent)} is an existing directory."
+            )
+        if not Path(opt.embedding_path).parent.exists():
+            bad_fields.append(
+                f"The embedding directory does not seem to be valid. Please check that {str(Path(opt.embedding_path).parent)} is an existing directory."
+            )
+        if len(bad_fields) > 0:
+            message = "The following problems were detected and must be corrected:\n"
+            for problem in bad_fields:
+                message += f"* {problem}\n"
+            npyscreen.notify_confirm(message)
+            return False
+        else:
+            return True
+
+    def marshall_arguments(self):
+        new_opts = Namespace()
+
+        for attr in [
+            "outdir",
+            "safety_checker",
+            "free_gpu_mem",
+            "max_loaded_models",
+            "xformers",
+            "always_use_cpu",
+            "embedding_path",
+            "ckpt_convert",
+        ]:
+            setattr(new_opts, attr, getattr(self, attr).value)
+
+        new_opts.hf_token = self.hf_token.value
+        new_opts.license_acceptance = self.license_acceptance.value
+        new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
+
+        return new_opts
+
+
+class EditOptApplication(npyscreen.NPSAppManaged):
+    def __init__(self, program_opts: Namespace, invokeai_opts: Namespace):
+        super().__init__()
+        self.program_opts = program_opts
+        self.invokeai_opts = invokeai_opts
+        self.user_cancelled = False
+        self.user_selections = default_user_selections(program_opts)
+
+    def onStart(self):
+        npyscreen.setTheme(npyscreen.Themes.DefaultTheme)
+        self.options = self.addForm(
+            "MAIN",
+            editOptsForm,
+            name="InvokeAI Startup Options",
+        )
+        if not (self.program_opts.skip_sd_weights or self.program_opts.default_only):
+            self.model_select = self.addForm(
+                "MODELS",
+                addModelsForm,
+                name="Install Stable Diffusion Models",
+                multipage=True,
+            )
+
+    def new_opts(self):
+        return self.options.marshall_arguments()
+
+
+def edit_opts(program_opts: Namespace, invokeai_opts: Namespace) -> argparse.Namespace:
+    editApp = EditOptApplication(program_opts, invokeai_opts)
+    editApp.run()
+    return editApp.new_opts()
+
+
+def default_startup_options(init_file: Path) -> Namespace:
+    opts = Args().parse_args([])
+    outdir = Path(opts.outdir)
+    if not outdir.is_absolute():
+        opts.outdir = str(Globals.root / opts.outdir)
+    if not init_file.exists():
+        opts.safety_checker = True
+    return opts
+
+
+def default_user_selections(program_opts: Namespace) -> Namespace:
+    return Namespace(
+        starter_models=default_dataset()
+        if program_opts.default_only
+        else recommended_datasets()
+        if program_opts.yes_to_all
+        else dict(),
+        purge_deleted_models=False,
+        scan_directory=None,
+        autoscan_on_startup=None,
+        import_model_paths=None,
+        convert_to_diffusers=None,
+    )
+
+
+# -------------------------------------
+def initialize_rootdir(root: str, yes_to_all: bool = False):
+    print("** INITIALIZING INVOKEAI RUNTIME DIRECTORY **")
+
+    for name in (
+        "models",
+        "configs",
+        "embeddings",
+        "text-inversion-output",
+        "text-inversion-training-data",
+    ):
+        os.makedirs(os.path.join(root, name), exist_ok=True)
+
+    configs_src = Path(configs.__path__[0])
+    configs_dest = Path(root) / "configs"
+    if not os.path.samefile(configs_src, configs_dest):
+        shutil.copytree(configs_src, configs_dest, dirs_exist_ok=True)
+
+
+# -------------------------------------
+def run_console_ui(
+    program_opts: Namespace, initfile: Path = None
+) -> (Namespace, Namespace):
+    # parse_args() will read from init file if present
+    invokeai_opts = default_startup_options(initfile)
+
+    set_min_terminal_size(MIN_COLS, MIN_LINES)
+    editApp = EditOptApplication(program_opts, invokeai_opts)
+    editApp.run()
+    if editApp.user_cancelled:
+        return (None, None)
+    else:
+        return (editApp.new_opts, editApp.user_selections)
+
+# -------------------------------------
+def write_opts(opts: Namespace, init_file: Path):
+    """
+    Update the invokeai.init file with values from opts Namespace
+    """
+    # touch file if it doesn't exist
+    if not init_file.exists():
+        with open(init_file, "w") as f:
+            f.write(INIT_FILE_PREAMBLE)
+
+    # We want to write in the changed arguments without clobbering
+    # any other initialization values the user has entered. There is
+    # no good way to do this because of the one-way nature of
+    # argparse: i.e. --outdir could be --outdir, --out, or -o
+    # initfile needs to be replaced with a fully structured format
+    # such as yaml; this is a hack that will work much of the time
+    args_to_skip = re.compile(
+        "^--?(o|out|no-xformer|xformer|no-ckpt|ckpt|free|no-nsfw|nsfw|prec|max_load|embed|always|ckpt|free_gpu)"
+    )
+    # fix windows paths
+    opts.outdir = opts.outdir.replace('\\','/')
+    opts.embedding_path = opts.embedding_path.replace('\\','/')
+    new_file = f"{init_file}.new"
+    try:
+        lines = [x.strip() for x in open(init_file, "r").readlines()]
+        with open(new_file, "w") as out_file:
+            for line in lines:
+                if len(line) > 0 and not args_to_skip.match(line):
+                    out_file.write(line + "\n")
+            out_file.write(
+                f"""
+--outdir={opts.outdir}
+--embedding_path={opts.embedding_path}
+--precision={opts.precision}
+--max_loaded_models={int(opts.max_loaded_models)}
+--{'no-' if not opts.safety_checker else ''}nsfw_checker
+--{'no-' if not opts.xformers else ''}xformers
+--{'no-' if not opts.ckpt_convert else ''}ckpt_convert
+{'--free_gpu_mem' if opts.free_gpu_mem else ''}
+{'--always_use_cpu' if opts.always_use_cpu else ''}
+"""
+            )
+    except OSError as e:
+        print(f"** An error occurred while writing the init file: {str(e)}")
+
+    os.replace(new_file, init_file)
+
+    if opts.hf_token:
+        HfLogin(opts.hf_token)
+
+
+# -------------------------------------
+def default_output_dir() -> Path:
+    return Globals.root / "outputs"
+
+
+# -------------------------------------
+def default_embedding_dir() -> Path:
+    return Globals.root / "embeddings"
+
+
+# -------------------------------------
+def write_default_options(program_opts: Namespace, initfile: Path):
+    opt = default_startup_options(initfile)
+    opt.hf_token = HfFolder.get_token()
+    write_opts(opt, initfile)
+
+
+# -------------------------------------
+def main():
+    parser = argparse.ArgumentParser(description="InvokeAI model downloader")
+    parser.add_argument(
+        "--skip-sd-weights",
+        dest="skip_sd_weights",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="skip downloading the large Stable Diffusion weight files",
+    )
+    parser.add_argument(
+        "--skip-support-models",
+        dest="skip_support_models",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="skip downloading the support models",
+    )
+    parser.add_argument(
+        "--full-precision",
+        dest="full_precision",
+        action=argparse.BooleanOptionalAction,
+        type=bool,
+        default=False,
+        help="use 32-bit weights instead of faster 16-bit weights",
+    )
+    parser.add_argument(
+        "--yes",
+        "-y",
+        dest="yes_to_all",
+        action="store_true",
+        help='answer "yes" to all prompts',
+    )
+    parser.add_argument(
+        "--default_only",
+        action="store_true",
+        help="when --yes specified, only install the default model",
+    )
+    parser.add_argument(
+        "--config_file",
+        "-c",
+        dest="config_file",
+        type=str,
+        default=None,
+        help="path to configuration file to create",
+    )
+    parser.add_argument(
+        "--root_dir",
+        dest="root",
+        type=str,
+        default=None,
+        help="path to root of install directory",
+    )
+    opt = parser.parse_args()
+
+    # setting a global here
+    Globals.root = Path(os.path.expanduser(get_root(opt.root) or ""))
+
+    errors = set()
+
+    try:
+        models_to_download = default_user_selections(opt)
+
+        # We check for to see if the runtime directory is correctly initialized.
+        init_file = Path(Globals.root, Globals.initfile)
+        if not init_file.exists() or not global_config_file().exists():
+            initialize_rootdir(Globals.root, opt.yes_to_all)
+
+        if opt.yes_to_all:
+            write_default_options(opt, init_file)
+            init_options = Namespace(
+                precision="float32" if opt.full_precision else "float16"
+            )
+        else:
+            init_options, models_to_download = run_console_ui(opt, init_file)
+            if init_options:
+                write_opts(init_options, init_file)
+            else:
+                print(
+                    '\n** CANCELLED AT USER\'S REQUEST. USE THE "invoke.sh" LAUNCHER TO RUN LATER **\n'
+                )
+                sys.exit(0)
+
+        if opt.skip_support_models:
+            print("\n** SKIPPING SUPPORT MODEL DOWNLOADS PER USER REQUEST **")
+        else:
+            print("\n** DOWNLOADING SUPPORT MODELS **")
+            download_bert()
+            download_sd1_clip()
+            download_sd2_clip()
+            download_realesrgan()
+            download_gfpgan()
+            download_codeformer()
+            download_clipseg()
+            download_safety_checker()
+            download_vaes()
+
+        if opt.skip_sd_weights:
+            print("\n** SKIPPING DIFFUSION WEIGHTS DOWNLOAD PER USER REQUEST **")
+        elif models_to_download:
+            print("\n** DOWNLOADING DIFFUSION WEIGHTS **")
+            process_and_execute(opt, models_to_download)
+
+        postscript(errors=errors)
+    except KeyboardInterrupt:
+        print("\nGoodbye! Come back soon.")
+
+# -------------------------------------
+if __name__ == "__main__":
+    main()
--- a/invokeai/backend/config/model_install_backend.py
+++ b/invokeai/backend/config/model_install_backend.py
@ -0,0 +1,455 @@
+"""
+Utility (backend) functions used by model_install.py
+"""
+import os
+import re
+import shutil
+import sys
+import warnings
+from pathlib import Path
+from tempfile import TemporaryFile
+
+import requests
+from diffusers import AutoencoderKL
+from huggingface_hub import hf_hub_url
+from omegaconf import OmegaConf
+from omegaconf.dictconfig import DictConfig
+from tqdm import tqdm
+from typing import List
+
+import invokeai.configs as configs
+from ..stable_diffusion import StableDiffusionGeneratorPipeline
+from ..globals import Globals, global_cache_dir, global_config_dir
+from ..model_management import ModelManager
+
+warnings.filterwarnings("ignore")
+
+# --------------------------globals-----------------------
+Model_dir = "models"
+Weights_dir = "ldm/stable-diffusion-v1/"
+
+# the initial "configs" dir is now bundled in the `invokeai.configs` package
+Dataset_path = Path(configs.__path__[0]) / "INITIAL_MODELS.yaml"
+
+# initial models omegaconf
+Datasets = None
+
+Config_preamble = """
+# This file describes the alternative machine learning models
+# available to InvokeAI script.
+#
+# To add a new model, follow the examples below. Each
+# model requires a model config file, a weights file,
+# and the width and height of the images it
+# was trained on.
+"""
+
+def default_config_file():
+    return Path(global_config_dir()) / "models.yaml"
+
+def sd_configs():
+    return Path(global_config_dir()) / "stable-diffusion"
+
+def initial_models():
+    global Datasets
+    if Datasets:
+        return Datasets
+    return (Datasets := OmegaConf.load(Dataset_path))
+
+def install_requested_models(
+        install_initial_models: List[str] = None,
+        remove_models: List[str] = None,
+        scan_directory: Path = None,
+        external_models: List[str] = None,
+        scan_at_startup: bool = False,
+        convert_to_diffusers: bool = False,
+        precision: str = "float16",
+        purge_deleted: bool = False,
+        config_file_path: Path = None,
+):
+    '''
+    Entry point for installing/deleting starter models, or installing external models.
+    '''
+    config_file_path=config_file_path or default_config_file()
+    if not config_file_path.exists():
+        open(config_file_path,'w')
+            
+    model_manager= ModelManager(OmegaConf.load(config_file_path),precision=precision)
+    
+    if remove_models and len(remove_models) > 0:
+        print("== DELETING UNCHECKED STARTER MODELS ==")
+        for model in remove_models:
+            print(f'{model}...')
+            model_manager.del_model(model, delete_files=purge_deleted)
+        model_manager.commit(config_file_path)
+    
+    if install_initial_models and len(install_initial_models) > 0:
+        print("== INSTALLING SELECTED STARTER MODELS ==")
+        successfully_downloaded = download_weight_datasets(
+            models=install_initial_models,
+            access_token=None,
+            precision=precision,
+        )  # FIX: for historical reasons, we don't use model manager here
+        update_config_file(successfully_downloaded, config_file_path)
+        if len(successfully_downloaded) < len(install_initial_models):
+            print("** Some of the model downloads were not successful")
+
+    # due to above, we have to reload the model manager because conf file
+    # was changed behind its back
+    model_manager= ModelManager(OmegaConf.load(config_file_path),precision=precision)
+
+    external_models = external_models or list()
+    if scan_directory:
+        external_models.append(str(scan_directory))
+
+    if len(external_models)>0:
+        print("== INSTALLING EXTERNAL MODELS ==")
+        for path_url_or_repo in external_models:
+            try:
+                model_manager.heuristic_import(
+                    path_url_or_repo,
+                    convert=convert_to_diffusers,
+                    commit_to_conf=config_file_path
+                )
+            except KeyboardInterrupt:
+                sys.exit(-1)
+            except Exception:
+                pass
+
+    if scan_at_startup and scan_directory.is_dir():
+        argument = '--autoconvert' if convert_to_diffusers else '--autoimport'
+        initfile = Path(Globals.root, Globals.initfile)
+        replacement = Path(Globals.root, f'{Globals.initfile}.new')
+        directory = str(scan_directory).replace('\\','/')
+        with open(initfile,'r') as input:
+            with open(replacement,'w') as output:
+                while line := input.readline():
+                    if not line.startswith(argument):
+                        output.writelines([line])
+                output.writelines([f'{argument} {directory}'])
+        os.replace(replacement,initfile)
+
+# -------------------------------------
+def yes_or_no(prompt: str, default_yes=True):
+    default = "y" if default_yes else "n"
+    response = input(f"{prompt} [{default}] ") or default
+    if default_yes:
+        return response[0] not in ("n", "N")
+    else:
+        return response[0] in ("y", "Y")
+
+
+# -------------------------------------
+def get_root(root: str = None) -> str:
+    if root:
+        return root
+    elif os.environ.get("INVOKEAI_ROOT"):
+        return os.environ.get("INVOKEAI_ROOT")
+    else:
+        return Globals.root
+
+
+# ---------------------------------------------
+def recommended_datasets() -> dict:
+    datasets = dict()
+    for ds in initial_models().keys():
+        if initial_models()[ds].get("recommended", False):
+            datasets[ds] = True
+    return datasets
+
+
+# ---------------------------------------------
+def default_dataset() -> dict:
+    datasets = dict()
+    for ds in initial_models().keys():
+        if initial_models()[ds].get("default", False):
+            datasets[ds] = True
+    return datasets
+
+
+# ---------------------------------------------
+def all_datasets() -> dict:
+    datasets = dict()
+    for ds in initial_models().keys():
+        datasets[ds] = True
+    return datasets
+
+
+# ---------------------------------------------
+# look for legacy model.ckpt in models directory and offer to
+# normalize its name
+def migrate_models_ckpt():
+    model_path = os.path.join(Globals.root, Model_dir, Weights_dir)
+    if not os.path.exists(os.path.join(model_path, "model.ckpt")):
+        return
+    new_name = initial_models()["stable-diffusion-1.4"]["file"]
+    print('The Stable Diffusion v4.1 "model.ckpt" is already installed. The name will be changed to {new_name} to avoid confusion.')
+    print(f"model.ckpt => {new_name}")
+    os.replace(
+        os.path.join(model_path, "model.ckpt"), os.path.join(model_path, new_name)
+    )
+
+
+# ---------------------------------------------
+def download_weight_datasets(
+    models: List[str], access_token: str, precision: str = "float32"
+):
+    migrate_models_ckpt()
+    successful = dict()
+    for mod in models:
+        print(f"Downloading {mod}:")
+        successful[mod] = _download_repo_or_file(
+            initial_models()[mod], access_token, precision=precision
+        )
+    return successful
+
+
+def _download_repo_or_file(
+    mconfig: DictConfig, access_token: str, precision: str = "float32"
+) -> Path:
+    path = None
+    if mconfig["format"] == "ckpt":
+        path = _download_ckpt_weights(mconfig, access_token)
+    else:
+        path = _download_diffusion_weights(mconfig, access_token, precision=precision)
+        if "vae" in mconfig and "repo_id" in mconfig["vae"]:
+            _download_diffusion_weights(
+                mconfig["vae"], access_token, precision=precision
+            )
+    return path
+
+
+def _download_ckpt_weights(mconfig: DictConfig, access_token: str) -> Path:
+    repo_id = mconfig["repo_id"]
+    filename = mconfig["file"]
+    cache_dir = os.path.join(Globals.root, Model_dir, Weights_dir)
+    return hf_download_with_resume(
+        repo_id=repo_id,
+        model_dir=cache_dir,
+        model_name=filename,
+        access_token=access_token,
+    )
+
+
+# ---------------------------------------------
+def download_from_hf(
+    model_class: object, model_name: str, cache_subdir: Path = Path("hub"), **kwargs
+):
+    path = global_cache_dir(cache_subdir)
+    model = model_class.from_pretrained(
+        model_name,
+        cache_dir=path,
+        resume_download=True,
+        **kwargs,
+    )
+    model_name = "--".join(("models", *model_name.split("/")))
+    return path / model_name if model else None
+
+
+def _download_diffusion_weights(
+    mconfig: DictConfig, access_token: str, precision: str = "float32"
+):
+    repo_id = mconfig["repo_id"]
+    model_class = (
+        StableDiffusionGeneratorPipeline
+        if mconfig.get("format", None) == "diffusers"
+        else AutoencoderKL
+    )
+    extra_arg_list = [{"revision": "fp16"}, {}] if precision == "float16" else [{}]
+    path = None
+    for extra_args in extra_arg_list:
+        try:
+            path = download_from_hf(
+                model_class,
+                repo_id,
+                cache_subdir="diffusers",
+                safety_checker=None,
+                **extra_args,
+            )
+        except OSError as e:
+            if str(e).startswith("fp16 is not a valid"):
+                pass
+            else:
+                print(f"An unexpected error occurred while downloading the model: {e})")
+        if path:
+            break
+    return path
+
+
+# ---------------------------------------------
+def hf_download_with_resume(
+    repo_id: str, model_dir: str, model_name: str, access_token: str = None
+) -> Path:
+    model_dest = Path(os.path.join(model_dir, model_name))
+    os.makedirs(model_dir, exist_ok=True)
+
+    url = hf_hub_url(repo_id, model_name)
+
+    header = {"Authorization": f"Bearer {access_token}"} if access_token else {}
+    open_mode = "wb"
+    exist_size = 0
+
+    if os.path.exists(model_dest):
+        exist_size = os.path.getsize(model_dest)
+        header["Range"] = f"bytes={exist_size}-"
+        open_mode = "ab"
+
+    resp = requests.get(url, headers=header, stream=True)
+    total = int(resp.headers.get("content-length", 0))
+
+    if (
+        resp.status_code == 416
+    ):  # "range not satisfiable", which means nothing to return
+        print(f"* {model_name}: complete file found. Skipping.")
+        return model_dest
+    elif resp.status_code != 200:
+        print(f"** An error occurred during downloading {model_name}: {resp.reason}")
+    elif exist_size > 0:
+        print(f"* {model_name}: partial file found. Resuming...")
+    else:
+        print(f"* {model_name}: Downloading...")
+
+    try:
+        if total < 2000:
+            print(f"*** ERROR DOWNLOADING {model_name}: {resp.text}")
+            return None
+
+        with open(model_dest, open_mode) as file, tqdm(
+            desc=model_name,
+            initial=exist_size,
+            total=total + exist_size,
+            unit="iB",
+            unit_scale=True,
+            unit_divisor=1000,
+        ) as bar:
+            for data in resp.iter_content(chunk_size=1024):
+                size = file.write(data)
+                bar.update(size)
+    except Exception as e:
+        print(f"An error occurred while downloading {model_name}: {str(e)}")
+        return None
+    return model_dest
+
+
+# ---------------------------------------------
+def update_config_file(successfully_downloaded: dict, config_file: Path):
+    config_file = (
+        Path(config_file) if config_file is not None else default_config_file()
+    )
+
+    # In some cases (incomplete setup, etc), the default configs directory might be missing.
+    # Create it if it doesn't exist.
+    # this check is ignored if opt.config_file is specified - user is assumed to know what they
+    # are doing if they are passing a custom config file from elsewhere.
+    if config_file is default_config_file() and not config_file.parent.exists():
+        configs_src = Dataset_path.parent
+        configs_dest = default_config_file().parent
+        shutil.copytree(configs_src, configs_dest, dirs_exist_ok=True)
+
+    yaml = new_config_file_contents(successfully_downloaded, config_file)
+
+    try:
+        backup = None
+        if os.path.exists(config_file):
+            print(
+                f"** {config_file.name} exists. Renaming to {config_file.stem}.yaml.orig"
+            )
+            backup = config_file.with_suffix(".yaml.orig")
+            ## Ugh. Windows is unable to overwrite an existing backup file, raises a WinError 183
+            if sys.platform == "win32" and backup.is_file():
+                backup.unlink()
+            config_file.rename(backup)
+
+        with TemporaryFile() as tmp:
+            tmp.write(Config_preamble.encode())
+            tmp.write(yaml.encode())
+
+            with open(str(config_file.expanduser().resolve()), "wb") as new_config:
+                tmp.seek(0)
+                new_config.write(tmp.read())
+
+    except Exception as e:
+        print(f"**Error creating config file {config_file}: {str(e)} **")
+        if backup is not None:
+            print("restoring previous config file")
+            ## workaround, for WinError 183, see above
+            if sys.platform == "win32" and config_file.is_file():
+                config_file.unlink()
+            backup.rename(config_file)
+        return
+
+    print(f"Successfully created new configuration file {config_file}")
+
+
+# ---------------------------------------------
+def new_config_file_contents(
+        successfully_downloaded: dict, config_file: Path,
+) -> str:
+    if config_file.exists():
+        conf = OmegaConf.load(str(config_file.expanduser().resolve()))
+    else:
+        conf = OmegaConf.create()
+
+    default_selected = None
+    for model in successfully_downloaded:
+        # a bit hacky - what we are doing here is seeing whether a checkpoint
+        # version of the model was previously defined, and whether the current
+        # model is a diffusers (indicated with a path)
+        if conf.get(model) and Path(successfully_downloaded[model]).is_dir():
+            delete_weights(model, conf[model])
+
+        stanza = {}
+        mod = initial_models()[model]
+        stanza["description"] = mod["description"]
+        stanza["repo_id"] = mod["repo_id"]
+        stanza["format"] = mod["format"]
+        # diffusers don't need width and height (probably .ckpt doesn't either)
+        # so we no longer require these in INITIAL_MODELS.yaml
+        if "width" in mod:
+            stanza["width"] = mod["width"]
+        if "height" in mod:
+            stanza["height"] = mod["height"]
+        if "file" in mod:
+            stanza["weights"] = os.path.relpath(
+                successfully_downloaded[model], start=Globals.root
+            )
+            stanza["config"] = os.path.normpath(os.path.join(sd_configs(), mod["config"]))
+        if "vae" in mod:
+            if "file" in mod["vae"]:
+                stanza["vae"] = os.path.normpath(
+                    os.path.join(Model_dir, Weights_dir, mod["vae"]["file"])
+                )
+            else:
+                stanza["vae"] = mod["vae"]
+        if mod.get("default", False):
+            stanza["default"] = True
+            default_selected = True
+
+        conf[model] = stanza
+
+    # if no default model was chosen, then we select the first
+    # one in the list
+    if not default_selected:
+        conf[list(successfully_downloaded.keys())[0]]["default"] = True
+
+    return OmegaConf.to_yaml(conf)
+
+
+# ---------------------------------------------
+def delete_weights(model_name: str, conf_stanza: dict):
+    if not (weights := conf_stanza.get("weights")):
+        return
+    if re.match("/VAE/", conf_stanza.get("config")):
+        return
+
+    print(
+        f"\n** The checkpoint version of {model_name} is superseded by the diffusers version. Deleting the original file {weights}?"
+    )
+          
+    weights = Path(weights)
+    if not weights.is_absolute():
+        weights = Path(Globals.root) / weights
+        try:
+            weights.unlink()
+        except OSError as e:
+            print(str(e))
--- a/invokeai/backend/generate.py
+++ b/invokeai/backend/generate.py
--- a/invokeai/backend/generator/base.py
+++ b/invokeai/backend/generator/base.py
@ -23,7 +23,7 @@ from tqdm import trange

 import invokeai.assets.web as web_assets
 from ..stable_diffusion.diffusion.ddpm import DiffusionWrapper
-from ..util import rand_perlin_2d
+from ..util.util import rand_perlin_2d

 downsampling = 8
 CAUTION_IMG = 'caution.png'
--- a/invokeai/backend/globals.py
+++ b/invokeai/backend/globals.py
@ -0,0 +1,115 @@
+'''
+invokeai.backend.globals defines a small number of global variables that would
+otherwise have to be passed through long and complex call chains.
+
+It defines a Namespace object named "Globals" that contains
+the attributes:
+
+  - root           - the root directory under which "models" and "outputs" can be found
+  - initfile       - path to the initialization file
+  - try_patchmatch - option to globally disable loading of 'patchmatch' module
+  - always_use_cpu - force use of CPU even if GPU is available
+'''
+
+import os
+import os.path as osp
+from argparse import Namespace
+from pathlib import Path
+from typing import Union
+
+Globals = Namespace()
+
+# Where to look for the initialization file and other key components
+Globals.initfile = 'invokeai.init'
+Globals.models_file = 'models.yaml'
+Globals.models_dir = 'models'
+Globals.config_dir = 'configs'
+Globals.autoscan_dir = 'weights'
+Globals.converted_ckpts_dir = 'converted_ckpts'
+
+# Set the default root directory. This can be overwritten by explicitly
+# passing the `--root <directory>` argument on the command line.
+# logic is:
+# 1) use INVOKEAI_ROOT environment variable (no check for this being a valid directory)
+# 2) use VIRTUAL_ENV environment variable, with a check for initfile being there
+# 3) use ~/invokeai
+
+if os.environ.get('INVOKEAI_ROOT'):
+    Globals.root = osp.abspath(os.environ.get('INVOKEAI_ROOT'))
+elif os.environ.get('VIRTUAL_ENV') and Path(os.environ.get('VIRTUAL_ENV'),'..',Globals.initfile).exists():
+    Globals.root = osp.abspath(osp.join(os.environ.get('VIRTUAL_ENV'), '..'))
+else:
+    Globals.root = osp.abspath(osp.expanduser('~/invokeai'))
+
+# Try loading patchmatch
+Globals.try_patchmatch = True
+
+# Use CPU even if GPU is available (main use case is for debugging MPS issues)
+Globals.always_use_cpu = False
+
+# Whether the internet is reachable for dynamic downloads
+# The CLI will test connectivity at startup time.
+Globals.internet_available = True
+
+# Whether to disable xformers
+Globals.disable_xformers = False
+
+# Low-memory tradeoff for guidance calculations.
+Globals.sequential_guidance = False
+
+# whether we are forcing full precision
+Globals.full_precision = False
+
+# whether we should convert ckpt files into diffusers models on the fly
+Globals.ckpt_convert = True
+
+# logging tokenization everywhere
+Globals.log_tokenization = False
+
+def global_config_file()->Path:
+    return Path(Globals.root, Globals.config_dir, Globals.models_file)
+
+def global_config_dir()->Path:
+    return Path(Globals.root, Globals.config_dir)
+
+def global_models_dir()->Path:
+    return Path(Globals.root, Globals.models_dir)
+
+def global_autoscan_dir()->Path:
+    return Path(Globals.root, Globals.autoscan_dir)
+
+def global_converted_ckpts_dir()->Path:
+    return Path(global_models_dir(), Globals.converted_ckpts_dir)
+
+def global_set_root(root_dir:Union[str,Path]):
+    Globals.root = root_dir
+
+def global_cache_dir(subdir:Union[str,Path]='')->Path:
+    '''
+    Returns Path to the model cache directory. If a subdirectory
+    is provided, it will be appended to the end of the path, allowing
+    for huggingface-style conventions:
+         global_cache_dir('diffusers')
+         global_cache_dir('hub')
+    Current HuggingFace documentation (mid-Jan 2023) indicates that
+    transformers models will be cached into a "transformers" subdirectory,
+    but in practice they seem to go into "hub". But if needed:
+         global_cache_dir('transformers')
+    One other caveat is that HuggingFace is moving some diffusers models
+    into the "hub" subdirectory as well, so this will need to be revisited
+    from time to time.
+    '''
+    home: str = os.getenv('HF_HOME')
+
+    if home is None:
+        home = os.getenv('XDG_CACHE_HOME')
+
+        if home is not None:
+            # Set `home` to $XDG_CACHE_HOME/huggingface, which is the default location mentioned in HuggingFace Hub Client Library.
+            # See: https://huggingface.co/docs/huggingface_hub/main/en/package_reference/environment_variables#xdgcachehome
+            home += os.sep + 'huggingface'
+
+    if home is not None:
+        return Path(home,subdir)
+    else:
+        return Path(Globals.root,'models',subdir)
--- a/invokeai/backend/image_util/init.py
+++ b/invokeai/backend/image_util/init.py
@ -9,6 +9,7 @@ from .pngwriter import (PngWriter,
                        retrieve_metadata,
                        write_metadata,
                        )
+from .seamless import configure_model_padding

 def debug_image(
    debug_image, debug_text, debug_show=True, debug_result=False, debug_status=False
--- a/invokeai/backend/image_util/patchmatch.py
+++ b/invokeai/backend/image_util/patchmatch.py
@ -4,7 +4,7 @@ wraps the actual patchmatch object. It respects the global
 "try_patchmatch" attribute, so that patchmatch loading can
 be suppressed or deferred
 '''
-from ldm.invoke.globals import Globals
+from invokeai.backend.globals import Globals
 import numpy as  np

 class PatchMatch:
--- a/invokeai/backend/image_util/seamless.py
+++ b/invokeai/backend/image_util/seamless.py
@ -0,0 +1,31 @@
+import torch.nn as nn
+
+def _conv_forward_asymmetric(self, input, weight, bias):
+    """
+    Patch for Conv2d._conv_forward that supports asymmetric padding
+    """
+    working = nn.functional.pad(input, self.asymmetric_padding['x'], mode=self.asymmetric_padding_mode['x'])
+    working = nn.functional.pad(working, self.asymmetric_padding['y'], mode=self.asymmetric_padding_mode['y'])
+    return nn.functional.conv2d(working, weight, bias, self.stride, nn.modules.utils._pair(0), self.dilation, self.groups)
+
+def configure_model_padding(model, seamless, seamless_axes):
+    """
+    Modifies the 2D convolution layers to use a circular padding mode based on the `seamless` and `seamless_axes` options.
+    """
+    # TODO: get an explicit interface for this in diffusers: https://github.com/huggingface/diffusers/issues/556
+    for m in model.modules():
+        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+            if seamless:
+                m.asymmetric_padding_mode = {}
+                m.asymmetric_padding = {}
+                m.asymmetric_padding_mode['x'] = 'circular' if ('x' in seamless_axes) else 'constant'
+                m.asymmetric_padding['x'] = (m._reversed_padding_repeated_twice[0], m._reversed_padding_repeated_twice[1], 0, 0)
+                m.asymmetric_padding_mode['y'] = 'circular' if ('y' in seamless_axes) else 'constant'
+                m.asymmetric_padding['y'] = (0, 0, m._reversed_padding_repeated_twice[2], m._reversed_padding_repeated_twice[3])
+                m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
+            else:
+                m._conv_forward = nn.Conv2d._conv_forward.__get__(m, nn.Conv2d)
+                if hasattr(m, 'asymmetric_padding_mode'):
+                    del m.asymmetric_padding_mode
+                if hasattr(m, 'asymmetric_padding'):
+                    del m.asymmetric_padding
--- a/invokeai/backend/image_util/txt2mask.py
+++ b/invokeai/backend/image_util/txt2mask.py
@ -32,7 +32,7 @@ import numpy as  np
 from transformers import AutoProcessor, CLIPSegForImageSegmentation
 from PIL import Image, ImageOps
 from torchvision import transforms
-from ldm.invoke.globals import global_cache_dir
+from invokeai.backend.globals import global_cache_dir

 CLIPSEG_MODEL = 'CIDAS/clipseg-rd64-refined'
 CLIPSEG_SIZE = 352
--- a/invokeai/backend/model_management/init.py
+++ b/invokeai/backend/model_management/init.py
@ -0,0 +1,8 @@
+'''
+Initialization file for invokeai.backend.model_management
+'''
+from .model_manager import ModelManager
+from .convert_ckpt_to_diffusers import (load_pipeline_from_original_stable_diffusion_ckpt,
+                                        convert_ckpt_to_diffusers)
+from ...frontend.merge.merge_diffusers import (merge_diffusion_models,
+                                               merge_diffusion_models_and_commit)
--- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py
+++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@ -31,14 +31,13 @@ from omegaconf import OmegaConf
 from omegaconf.dictconfig import DictConfig
 from picklescan.scanner import scan_file_path

-from .devices import CPU_DEVICE
-from ldm.invoke.globals import Globals, global_cache_dir
-from .util import (
+from ..util import CPU_DEVICE
+from invokeai.backend.globals import Globals, global_cache_dir
+from ..util import (
    ask_user,
    download_with_resume,
-    url_attachment_name,
 )
-from .stable_diffusion import StableDiffusionGeneratorPipeline
+from ..stable_diffusion import StableDiffusionGeneratorPipeline

 class SDLegacyType(Enum):
    V1 = 1
@ -416,6 +415,51 @@ class ModelManager(object):

        return pipeline, width, height, model_hash

+    def _load_ckpt_model(self, model_name, mconfig):
+        config = mconfig.config
+        weights = mconfig.weights
+        vae = mconfig.get("vae")
+        width = mconfig.width
+        height = mconfig.height
+
+        if not os.path.isabs(config):
+            config = os.path.join(Globals.root, config)
+        if not os.path.isabs(weights):
+            weights = os.path.normpath(os.path.join(Globals.root, weights))
+
+        # Convert to diffusers and return a diffusers pipeline
+        print(
+            f">> Converting legacy checkpoint {model_name} into a diffusers model..."
+        )
+        
+        from . import  load_pipeline_from_original_stable_diffusion_ckpt
+
+        self.offload_model(self.current_model)
+        if vae_config := self._choose_diffusers_vae(model_name):
+            vae = self._load_vae(vae_config)
+        if self._has_cuda():
+            torch.cuda.empty_cache()
+        pipeline = load_pipeline_from_original_stable_diffusion_ckpt(
+            checkpoint_path=weights,
+            original_config_file=config,
+            vae=vae,
+            return_generator_pipeline=True,
+            precision=torch.float16
+            if self.precision == "float16"
+            else torch.float32,
+        )
+        if self.sequential_offload:
+            pipeline.enable_offload_submodels(self.device)
+        else:
+            pipeline.to(self.device)
+
+        return (
+            pipeline,
+            width,
+            height,
+            "NOHASH",
+        )
+
    def model_name_or_path(self, model_name: Union[str, DictConfig]) -> str | Path:
        if isinstance(model_name, DictConfig) or isinstance(model_name, dict):
            mconfig = model_name
@ -519,66 +563,6 @@ class ModelManager(object):
            self.commit(commit_to_conf)
        return model_name

-    def import_ckpt_model(
-        self,
-        weights: Union[str, Path],
-        config: Union[str, Path] = "configs/stable-diffusion/v1-inference.yaml",
-        vae: Union[str, Path] = None,
-        model_name: str = None,
-        model_description: str = None,
-        commit_to_conf: Path = None,
-    ) -> str:
-        """
-        Attempts to install the indicated ckpt file and returns True if successful.
-
-        "weights" can be either a path-like object corresponding to a local .ckpt file
-        or a http/https URL pointing to a remote model.
-
-        "vae" is a Path or str object pointing to a ckpt or safetensors file to be used
-        as the VAE for this model.
-
-        "config" is the model config file to use with this ckpt file. It defaults to
-        v1-inference.yaml. If a URL is provided, the config will be downloaded.
-
-        You can optionally provide a model name and/or description. If not provided,
-        then these will be derived from the weight file name. If you provide a commit_to_conf
-        path to the configuration file, then the new entry will be committed to the
-        models.yaml file.
-
-        Return value is the name of the imported file, or None if an error occurred.
-        """
-        if str(weights).startswith(("http:", "https:")):
-            model_name = model_name or url_attachment_name(weights)
-
-        weights_path = self._resolve_path(weights, "models/ldm/stable-diffusion-v1")
-        config_path = self._resolve_path(config, "configs/stable-diffusion")
-
-        if weights_path is None or not weights_path.exists():
-            return
-        if config_path is None or not config_path.exists():
-            return
-
-        model_name = (
-            model_name or Path(weights).stem
-        )  # note this gives ugly pathnames if used on a URL without a Content-Disposition header
-        model_description = (
-            model_description or f"Imported stable diffusion weights file {model_name}"
-        )
-        new_config = dict(
-            weights=str(weights_path),
-            config=str(config_path),
-            description=model_description,
-            format="ckpt",
-            width=512,
-            height=512,
-        )
-        if vae:
-            new_config["vae"] = vae
-        self.add_model(model_name, new_config, True)
-        if commit_to_conf:
-            self.commit(commit_to_conf)
-        return model_name
-
    @classmethod
    def probe_model_type(self, checkpoint: dict) -> SDLegacyType:
        """
@ -746,36 +730,18 @@ class ModelManager(object):
            )
            return
        
-        if convert:
-            diffuser_path = Path(
-                Globals.root, "models", Globals.converted_ckpts_dir, model_path.stem
-            )
-            model_name = self.convert_and_import(
-                model_path,
-                diffusers_path=diffuser_path,
-                vae=dict(repo_id="stabilityai/sd-vae-ft-mse"),
-                model_name=model_name,
-                model_description=description,
-                original_config_file=model_config_file,
-                commit_to_conf=commit_to_conf,
-            )
-        else:
-            model_name = self.import_ckpt_model(
-                model_path,
-                config=model_config_file,
-                model_name=model_name,
-                model_description=description,
-                vae=str(
-                    Path(
-                        Globals.root,
-                        "models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt",
-                    )
-                ),
-                commit_to_conf=commit_to_conf,
-            )
-
-        if commit_to_conf:
-            self.commit(commit_to_conf)
+        diffuser_path = Path(
+            Globals.root, "models", Globals.converted_ckpts_dir, model_path.stem
+        )
+        model_name = self.convert_and_import(
+            model_path,
+            diffusers_path=diffuser_path,
+            vae=dict(repo_id="stabilityai/sd-vae-ft-mse"),
+            model_name=model_name,
+            model_description=description,
+            original_config_file=model_config_file,
+            commit_to_conf=commit_to_conf,
+        )
        return model_name

    def convert_and_import(
@ -800,7 +766,7 @@ class ModelManager(object):

        new_config = None

-        from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser
+        from . import convert_ckpt_to_diffusers

        if diffusers_path.exists():
            print(
@ -815,7 +781,7 @@ class ModelManager(object):
            # By passing the specified VAE to the conversion function, the autoencoder
            # will be built into the model rather than tacked on afterward via the config file
            vae_model = self._load_vae(vae) if vae else None
-            convert_ckpt_to_diffuser(
+            convert_ckpt_to_diffusers (
                ckpt_path,
                diffusers_path,
                extract_ema=True,
--- a/invokeai/backend/prompting/conditioning.py
+++ b/invokeai/backend/prompting/conditioning.py
@ -13,9 +13,9 @@ from transformers import CLIPTokenizer, CLIPTextModel

 from compel import Compel
 from compel.prompt_parser import FlattenedPrompt, Blend, Fragment, CrossAttentionControlSubstitute, PromptParser
-from ..devices import torch_dtype
+from ..util import torch_dtype
 from ..stable_diffusion import InvokeAIDiffuserComponent
-from ldm.invoke.globals import Globals
+from invokeai.backend.globals import Globals

 def get_tokenizer(model) -> CLIPTokenizer:
    # TODO remove legacy ckpt fallback handling
--- a/invokeai/backend/restoration/init.py
+++ b/invokeai/backend/restoration/init.py
@ -0,0 +1,4 @@
+'''
+Initialization file for the ldm.invoke.restoration package
+'''
+from .base import Restoration
--- a/invokeai/backend/restoration/base.py
+++ b/invokeai/backend/restoration/base.py
@ -0,0 +1,38 @@
+class Restoration():
+    def __init__(self) -> None:
+        pass
+
+    def load_face_restore_models(self, gfpgan_model_path='./models/gfpgan/GFPGANv1.4.pth'):
+        # Load GFPGAN
+        gfpgan = self.load_gfpgan(gfpgan_model_path)
+        if gfpgan.gfpgan_model_exists:
+            print('>> GFPGAN Initialized')
+        else:
+            print('>> GFPGAN Disabled')
+            gfpgan = None
+
+        # Load CodeFormer
+        codeformer = self.load_codeformer()
+        if codeformer.codeformer_model_exists:
+            print('>> CodeFormer Initialized')
+        else:
+            print('>> CodeFormer Disabled')
+            codeformer = None
+
+        return gfpgan, codeformer
+
+    # Face Restore Models
+    def load_gfpgan(self, gfpgan_model_path):
+        from .gfpgan import GFPGAN
+        return GFPGAN(gfpgan_model_path)
+
+    def load_codeformer(self):
+        from .codeformer import CodeFormerRestoration
+        return CodeFormerRestoration()
+
+    # Upscale Models
+    def load_esrgan(self, esrgan_bg_tile=400):
+        from .realesrgan import ESRGAN
+        esrgan = ESRGAN(esrgan_bg_tile)
+        print('>> ESRGAN Initialized')
+        return esrgan;
--- a/invokeai/backend/restoration/codeformer.py
+++ b/invokeai/backend/restoration/codeformer.py
@ -0,0 +1,108 @@
+import os
+import torch
+import numpy as np
+import warnings
+import sys
+from invokeai.backend.globals import Globals
+
+pretrained_model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth'
+
+class CodeFormerRestoration():
+    def __init__(self,
+            codeformer_dir='models/codeformer',
+            codeformer_model_path='codeformer.pth') -> None:
+
+        if not os.path.isabs(codeformer_dir):
+            codeformer_dir = os.path.join(Globals.root, codeformer_dir)
+
+        self.model_path = os.path.join(codeformer_dir, codeformer_model_path)
+        self.codeformer_model_exists = os.path.isfile(self.model_path)
+
+        if not self.codeformer_model_exists:
+            print('## NOT FOUND: CodeFormer model not found at ' + self.model_path)
+        sys.path.append(os.path.abspath(codeformer_dir))
+
+    def process(self, image, strength, device, seed=None, fidelity=0.75):
+        if seed is not None:
+            print(f'>> CodeFormer - Restoring Faces for image seed:{seed}')
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', category=DeprecationWarning)
+            warnings.filterwarnings('ignore', category=UserWarning)
+
+            from basicsr.utils.download_util import load_file_from_url
+            from basicsr.utils import img2tensor, tensor2img
+            from facexlib.utils.face_restoration_helper import FaceRestoreHelper
+            from ldm.invoke.restoration.codeformer_arch import CodeFormer
+            from torchvision.transforms.functional import normalize
+            from PIL import Image
+
+            cf_class = CodeFormer
+
+            cf = cf_class(
+                dim_embd=512,
+                codebook_size=1024,
+                n_head=8,
+                n_layers=9,
+                connect_list=['32', '64', '128', '256']
+            ).to(device)
+
+            # note that this file should already be downloaded and cached at
+            # this point
+            checkpoint_path = load_file_from_url(url=pretrained_model_url,
+                                                 model_dir=os.path.abspath(os.path.dirname(self.model_path)),
+                                                 progress=True
+            )
+            checkpoint = torch.load(checkpoint_path)['params_ema']
+            cf.load_state_dict(checkpoint)
+            cf.eval()
+
+            image = image.convert('RGB')
+            # Codeformer expects a BGR np array; make array and flip channels
+            bgr_image_array = np.array(image, dtype=np.uint8)[...,::-1]
+
+            face_helper = FaceRestoreHelper(
+                upscale_factor=1,
+                use_parse=True,
+                device=device,
+                model_rootpath=os.path.join(Globals.root,'models','gfpgan','weights'),
+            )
+            face_helper.clean_all()
+            face_helper.read_image(bgr_image_array)
+            face_helper.get_face_landmarks_5(resize=640, eye_dist_threshold=5)
+            face_helper.align_warp_face()
+
+            for idx, cropped_face in enumerate(face_helper.cropped_faces):
+                cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
+                normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
+                cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
+
+                try:
+                    with torch.no_grad():
+                        output = cf(cropped_face_t, w=fidelity, adain=True)[0]
+                        restored_face = tensor2img(output.squeeze(0), rgb2bgr=True, min_max=(-1, 1))
+                    del output
+                    torch.cuda.empty_cache()
+                except RuntimeError as error:
+                    print(f'\tFailed inference for CodeFormer: {error}.')
+                    restored_face = cropped_face
+
+                restored_face = restored_face.astype('uint8')
+                face_helper.add_restored_face(restored_face)
+
+
+            face_helper.get_inverse_affine(None)
+
+            restored_img = face_helper.paste_faces_to_input_image()
+
+            # Flip the channels back to RGB
+            res = Image.fromarray(restored_img[...,::-1])
+
+            if strength < 1.0:
+                # Resize the image to the new image if the sizes have changed
+                if restored_img.size != image.size:
+                    image = image.resize(res.size)
+                res = Image.blend(image, res, strength)
+
+            cf = None
+
+            return res
--- a/invokeai/backend/restoration/codeformer_arch.py
+++ b/invokeai/backend/restoration/codeformer_arch.py
@ -0,0 +1,275 @@
+import math
+import numpy as np
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+from typing import Optional, List
+
+from .vqgan_arch import *
+from basicsr.utils import get_root_logger
+from basicsr.utils.registry import ARCH_REGISTRY
+
+def calc_mean_std(feat, eps=1e-5):
+    """Calculate mean and std for adaptive_instance_normalization.
+
+    Args:
+        feat (Tensor): 4D tensor.
+        eps (float): A small value added to the variance to avoid
+            divide-by-zero. Default: 1e-5.
+    """
+    size = feat.size()
+    assert len(size) == 4, 'The input feature should be 4D tensor.'
+    b, c = size[:2]
+    feat_var = feat.view(b, c, -1).var(dim=2) + eps
+    feat_std = feat_var.sqrt().view(b, c, 1, 1)
+    feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
+    return feat_mean, feat_std
+
+def adaptive_instance_normalization(content_feat, style_feat):
+    """Adaptive instance normalization.
+
+    Adjust the reference features to have the similar color and illuminations
+    as those in the degradate features.
+
+    Args:
+        content_feat (Tensor): The reference feature.
+        style_feat (Tensor): The degradate features.
+    """
+    size = content_feat.size()
+    style_mean, style_std = calc_mean_std(style_feat)
+    content_mean, content_std = calc_mean_std(content_feat)
+    normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
+    return normalized_feat * style_std.expand(size) + style_mean.expand(size)
+
+
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+
+
+class TransformerSALayer(nn.Module):
+    def __init__(self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu"):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout)
+        # Implementation of Feedforward model - MLP
+        self.linear1 = nn.Linear(embed_dim, dim_mlp)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_mlp, embed_dim)
+
+        self.norm1 = nn.LayerNorm(embed_dim)
+        self.norm2 = nn.LayerNorm(embed_dim)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.activation = _get_activation_fn(activation)
+
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+
+    def forward(self, tgt,
+                tgt_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+
+        # self attention
+        tgt2 = self.norm1(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+
+        # ffn
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout2(tgt2)
+        return tgt
+
+class Fuse_sft_block(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super().__init__()
+        self.encode_enc = ResBlock(2*in_ch, out_ch)
+
+        self.scale = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+
+        self.shift = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+
+    def forward(self, enc_feat, dec_feat, w=1):
+        enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1))
+        scale = self.scale(enc_feat)
+        shift = self.shift(enc_feat)
+        residual = w * (dec_feat * scale + shift)
+        out = dec_feat + residual
+        return out
+
+
+@ARCH_REGISTRY.register()
+class CodeFormer(VQAutoEncoder):
+    def __init__(self, dim_embd=512, n_head=8, n_layers=9,
+                codebook_size=1024, latent_size=256,
+                connect_list=['32', '64', '128', '256'],
+                fix_modules=['quantize','generator']):
+        super(CodeFormer, self).__init__(512, 64, [1, 2, 2, 4, 4, 8], 'nearest',2, [16], codebook_size)
+
+        if fix_modules is not None:
+            for module in fix_modules:
+                for param in getattr(self, module).parameters():
+                    param.requires_grad = False
+
+        self.connect_list = connect_list
+        self.n_layers = n_layers
+        self.dim_embd = dim_embd
+        self.dim_mlp = dim_embd*2
+
+        self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd))
+        self.feat_emb = nn.Linear(256, self.dim_embd)
+
+        # transformer
+        self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0)
+                                    for _ in range(self.n_layers)])
+
+        # logits_predict head
+        self.idx_pred_layer = nn.Sequential(
+            nn.LayerNorm(dim_embd),
+            nn.Linear(dim_embd, codebook_size, bias=False))
+
+        self.channels = {
+            '16': 512,
+            '32': 256,
+            '64': 256,
+            '128': 128,
+            '256': 128,
+            '512': 64,
+        }
+
+        # after second residual block for > 16, before attn layer for ==16
+        self.fuse_encoder_block = {'512':2, '256':5, '128':8, '64':11, '32':14, '16':18}
+        # after first residual block for > 16, before attn layer for ==16
+        self.fuse_generator_block = {'16':6, '32': 9, '64':12, '128':15, '256':18, '512':21}
+
+        # fuse_convs_dict
+        self.fuse_convs_dict = nn.ModuleDict()
+        for f_size in self.connect_list:
+            in_ch = self.channels[f_size]
+            self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch)
+
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+
+    def forward(self, x, w=0, detach_16=True, code_only=False, adain=False):
+        # ################### Encoder #####################
+        enc_feat_dict = {}
+        out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.encoder.blocks):
+            x = block(x)
+            if i in out_list:
+                enc_feat_dict[str(x.shape[-1])] = x.clone()
+
+        lq_feat = x
+        # ################# Transformer ###################
+        # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat)
+        pos_emb = self.position_emb.unsqueeze(1).repeat(1,x.shape[0],1)
+        # BCHW -> BC(HW) -> (HW)BC
+        feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2,0,1))
+        query_emb = feat_emb
+        # Transformer encoder
+        for layer in self.ft_layers:
+            query_emb = layer(query_emb, query_pos=pos_emb)
+
+        # output logits
+        logits = self.idx_pred_layer(query_emb) # (hw)bn
+        logits = logits.permute(1,0,2) # (hw)bn -> b(hw)n
+
+        if code_only: # for training stage II
+          # logits doesn't need softmax before cross_entropy loss
+            return logits, lq_feat
+
+        # ################# Quantization ###################
+        # if self.training:
+        #     quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight])
+        #     # b(hw)c -> bc(hw) -> bchw
+        #     quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape)
+        # ------------
+        soft_one_hot = F.softmax(logits, dim=2)
+        _, top_idx = torch.topk(soft_one_hot, 1, dim=2)
+        quant_feat = self.quantize.get_codebook_feat(top_idx, shape=[x.shape[0],16,16,256])
+        # preserve gradients
+        # quant_feat = lq_feat + (quant_feat - lq_feat).detach()
+
+        if detach_16:
+            quant_feat = quant_feat.detach() # for training stage III
+        if adain:
+            quant_feat = adaptive_instance_normalization(quant_feat, lq_feat)
+
+        # ################## Generator ####################
+        x = quant_feat
+        fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list]
+
+        for i, block in enumerate(self.generator.blocks):
+            x = block(x)
+            if i in fuse_list: # fuse after i-th block
+                f_size = str(x.shape[-1])
+                if w>0:
+                    x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w)
+        out = x
+        # logits doesn't need softmax before cross_entropy loss
+        return out, logits, lq_feat
--- a/invokeai/backend/restoration/gfpgan.py
+++ b/invokeai/backend/restoration/gfpgan.py
@ -0,0 +1,87 @@
+import torch
+import warnings
+import os
+import sys
+import numpy as np
+from invokeai.backend.globals import Globals
+
+from PIL import Image
+
+
+class GFPGAN():
+    def __init__(
+            self,
+            gfpgan_model_path='models/gfpgan/GFPGANv1.4.pth'
+    ) -> None:
+
+        if not os.path.isabs(gfpgan_model_path):
+            gfpgan_model_path=os.path.abspath(os.path.join(Globals.root,gfpgan_model_path))
+        self.model_path = gfpgan_model_path
+        self.gfpgan_model_exists = os.path.isfile(self.model_path)
+
+        if not self.gfpgan_model_exists:
+            print('## NOT FOUND: GFPGAN model not found at ' + self.model_path)
+            return None
+
+    def model_exists(self):
+        return os.path.isfile(self.model_path)
+
+    def process(self, image, strength: float, seed: str = None):
+        if seed is not None:
+            print(f'>> GFPGAN - Restoring Faces for image seed:{seed}')
+
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', category=DeprecationWarning)
+            warnings.filterwarnings('ignore', category=UserWarning)
+            cwd = os.getcwd()
+            os.chdir(os.path.join(Globals.root,'models'))
+            try:
+                from gfpgan import GFPGANer
+                self.gfpgan = GFPGANer(
+                    model_path=self.model_path,
+                    upscale=1,
+                    arch='clean',
+                    channel_multiplier=2,
+                    bg_upsampler=None,
+                )
+            except Exception:
+                import traceback
+                print('>> Error loading GFPGAN:', file=sys.stderr)
+                print(traceback.format_exc(), file=sys.stderr)
+            os.chdir(cwd)
+
+        if self.gfpgan is None:
+            print(
+                f'>> WARNING: GFPGAN not initialized.'
+            )
+            print(
+                f'>> Download https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth to {self.model_path}'
+            )
+
+        image = image.convert('RGB')
+
+        # GFPGAN expects a BGR np array; make array and flip channels
+        bgr_image_array = np.array(image, dtype=np.uint8)[...,::-1]
+
+        _, _, restored_img = self.gfpgan.enhance(
+            bgr_image_array,
+            has_aligned=False,
+            only_center_face=False,
+            paste_back=True,
+        )
+
+        # Flip the channels back to RGB
+        res = Image.fromarray(restored_img[...,::-1])
+
+        if strength < 1.0:
+            # Resize the image to the new image if the sizes have changed
+            if restored_img.size != image.size:
+                image = image.resize(res.size)
+            res = Image.blend(image, res, strength)
+
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        self.gfpgan = None
+
+        return res
--- a/invokeai/backend/restoration/outcrop.py
+++ b/invokeai/backend/restoration/outcrop.py
@ -0,0 +1,108 @@
+import warnings
+import math
+from PIL import Image, ImageFilter
+
+class Outcrop(object):
+    def __init__(
+            self,
+            image,
+            generate,  # current generate object
+    ):
+        self.image     = image
+        self.generate  = generate
+
+    def process (
+            self,
+            extents:dict,
+            opt,                   # current options
+            orig_opt,              # ones originally used to generate the image
+            image_callback = None,
+            prefix = None
+    ):
+        # grow and mask the image
+        extended_image = self._extend_all(extents)
+
+        # switch samplers temporarily
+        curr_sampler = self.generate.sampler
+        self.generate.sampler_name = opt.sampler_name
+        self.generate._set_sampler()
+
+        def wrapped_callback(img,seed,**kwargs):
+            preferred_seed = orig_opt.seed if orig_opt.seed is not None and orig_opt.seed >= 0 else seed
+            image_callback(img,preferred_seed,use_prefix=prefix,**kwargs)
+
+        result= self.generate.prompt2image(
+            opt.prompt,
+            seed        = opt.seed or orig_opt.seed,
+            sampler     = self.generate.sampler,
+            steps       = opt.steps,
+            cfg_scale   = opt.cfg_scale,
+            ddim_eta    = self.generate.ddim_eta,
+            width       = extended_image.width,
+            height      = extended_image.height,
+            init_img    = extended_image,
+            strength    = 0.90,
+            image_callback = wrapped_callback if image_callback else None,
+            seam_size = opt.seam_size or 96,
+            seam_blur = opt.seam_blur or 16,
+            seam_strength = opt.seam_strength or 0.7,
+            seam_steps = 20,
+            tile_size = 32,
+            color_match = True,
+            force_outpaint = True,  # this just stops the warning about erased regions
+        )
+
+        # swap sampler back
+        self.generate.sampler = curr_sampler
+        return result
+
+    def _extend_all(
+            self,
+            extents:dict,
+    ) -> Image:
+        '''
+        Extend the image in direction ('top','bottom','left','right') by
+        the indicated value. The image canvas is extended, and the empty
+        rectangular section will be filled with a blurred copy of the
+        adjacent image.
+        '''
+        image = self.image
+        for direction in extents:
+            assert direction in ['top', 'left', 'bottom', 'right'],'Direction must be one of "top", "left", "bottom", "right"'
+            pixels = extents[direction]
+            # round pixels up to the nearest 64
+            pixels = math.ceil(pixels/64) * 64
+            print(f'>> extending image {direction}ward by {pixels} pixels')
+            image = self._rotate(image,direction)
+            image = self._extend(image,pixels)
+            image = self._rotate(image,direction,reverse=True)
+        return image
+
+    def _rotate(self,image:Image,direction:str,reverse=False) -> Image:
+        '''
+        Rotates image so that the area to extend is always at the top top.
+        Simplifies logic later. The reverse argument, if true, will undo the
+        previous transpose.
+        '''
+        transposes = {
+            'right':  ['ROTATE_90','ROTATE_270'],
+            'bottom': ['ROTATE_180','ROTATE_180'],
+            'left':   ['ROTATE_270','ROTATE_90']
+        }
+        if direction not in transposes:
+            return image
+        transpose = transposes[direction][1 if reverse else 0]
+        return image.transpose(Image.Transpose.__dict__[transpose])
+
+    def _extend(self,image:Image,pixels:int)-> Image:
+        extended_img = Image.new('RGBA',(image.width,image.height+pixels))
+
+        extended_img.paste((0,0,0),[0,0,image.width,image.height+pixels])
+        extended_img.paste(image,box=(0,pixels))
+
+        # now make the top part transparent to use as a mask
+        alpha = extended_img.getchannel('A')
+        alpha.paste(0,(0,0,extended_img.width,pixels))
+        extended_img.putalpha(alpha)
+
+        return extended_img
--- a/invokeai/backend/restoration/outpaint.py
+++ b/invokeai/backend/restoration/outpaint.py
@ -0,0 +1,92 @@
+import warnings
+import math
+from PIL import Image, ImageFilter
+
+class Outpaint(object):
+    def __init__(self, image, generate):
+        self.image     = image
+        self.generate  = generate
+
+    def process(self, opt, old_opt, image_callback = None, prefix = None):
+        image = self._create_outpaint_image(self.image, opt.out_direction)
+
+        seed   = old_opt.seed
+        prompt = old_opt.prompt
+
+        def wrapped_callback(img,seed,**kwargs):
+            image_callback(img,seed,use_prefix=prefix,**kwargs)
+
+
+        return self.generate.prompt2image(
+            prompt,
+            seed           = seed,
+            sampler        = self.generate.sampler,
+            steps          = opt.steps,
+            cfg_scale      = opt.cfg_scale,
+            ddim_eta       = self.generate.ddim_eta,
+            width          = opt.width,
+            height         = opt.height,
+            init_img       = image,
+            strength       = 0.83,
+            image_callback = wrapped_callback,
+            prefix         = prefix,
+        )
+
+    def _create_outpaint_image(self, image, direction_args):
+        assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.'
+
+        if len(direction_args) == 1:
+            direction = direction_args[0]
+            pixels = None
+        elif len(direction_args) == 2:
+            direction = direction_args[0]
+            pixels = int(direction_args[1])
+
+        assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"'
+
+        image = image.convert("RGBA")
+        # we always extend top, but rotate to extend along the requested side
+        if direction == 'left':
+            image = image.transpose(Image.Transpose.ROTATE_270)
+        elif direction == 'bottom':
+            image = image.transpose(Image.Transpose.ROTATE_180)
+        elif direction == 'right':
+            image = image.transpose(Image.Transpose.ROTATE_90)
+
+        pixels = image.height//2 if pixels is None else int(pixels)
+        assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size'
+
+        # the top part of the image is taken from the source image mirrored
+        # coordinates (0,0) are the upper left corner of an image
+        top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA")
+        top = top.crop((0, top.height - pixels, top.width, top.height))
+
+        # setting all alpha of the top part to 0
+        alpha = top.getchannel("A")
+        alpha.paste(0, (0, 0, top.width, top.height))
+        top.putalpha(alpha)
+
+        # taking the bottom from the original image
+        bottom = image.crop((0, 0, image.width, image.height - pixels))
+
+        new_img = image.copy()
+        new_img.paste(top, (0, 0))
+        new_img.paste(bottom, (0, pixels))
+
+        # create a 10% dither in the middle
+        dither = min(image.height//10, pixels)
+        for x in range(0, image.width, 2):
+            for y in range(pixels - dither, pixels + dither):
+                (r, g, b, a) = new_img.getpixel((x, y))
+                new_img.putpixel((x, y), (r, g, b, 0))
+
+        # let's rotate back again
+        if direction == 'left':
+            new_img = new_img.transpose(Image.Transpose.ROTATE_90)
+        elif direction == 'bottom':
+            new_img = new_img.transpose(Image.Transpose.ROTATE_180)
+        elif direction == 'right':
+            new_img = new_img.transpose(Image.Transpose.ROTATE_270)
+
+        return new_img
+
--- a/invokeai/backend/restoration/realesrgan.py
+++ b/invokeai/backend/restoration/realesrgan.py
@ -0,0 +1,92 @@
+import torch
+import warnings
+import numpy as np
+import os
+
+from invokeai.backend.globals import Globals
+from PIL import Image
+from PIL.Image import Image as ImageType
+
+class ESRGAN():
+    def __init__(self, bg_tile_size=400) -> None:
+        self.bg_tile_size = bg_tile_size
+
+        if not torch.cuda.is_available():  # CPU or MPS on M1
+            use_half_precision = False
+        else:
+            use_half_precision = True
+
+    def load_esrgan_bg_upsampler(self, denoise_str):
+        if not torch.cuda.is_available():  # CPU or MPS on M1
+            use_half_precision = False
+        else:
+            use_half_precision = True
+
+        from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+        from realesrgan import RealESRGANer
+
+        model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+        model_path = os.path.join(Globals.root, 'models/realesrgan/realesr-general-x4v3.pth')
+        wdn_model_path = os.path.join(Globals.root, 'models/realesrgan/realesr-general-wdn-x4v3.pth')
+        scale = 4
+
+        bg_upsampler = RealESRGANer(
+            scale=scale,
+            model_path=[model_path, wdn_model_path],
+            model=model,
+            tile=self.bg_tile_size,
+            dni_weight=[denoise_str, 1 - denoise_str],
+            tile_pad=10,
+            pre_pad=0,
+            half=use_half_precision,
+        )
+
+        return bg_upsampler
+
+    def process(self, image: ImageType, strength: float, seed: str = None, upsampler_scale: int = 2, denoise_str: float = 0.75):
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', category=DeprecationWarning)
+            warnings.filterwarnings('ignore', category=UserWarning)
+
+            try:
+                upsampler = self.load_esrgan_bg_upsampler(denoise_str)
+            except Exception:
+                import traceback
+                import sys
+                print('>> Error loading Real-ESRGAN:', file=sys.stderr)
+                print(traceback.format_exc(), file=sys.stderr)
+
+        if upsampler_scale == 0:
+            print('>> Real-ESRGAN: Invalid scaling option. Image not upscaled.')
+            return image
+
+        if seed is not None:
+            print(
+                f'>> Real-ESRGAN Upscaling seed:{seed}, scale:{upsampler_scale}x, tile:{self.bg_tile_size}, denoise:{denoise_str}'
+            )
+        # ESRGAN outputs images with partial transparency if given RGBA images; convert to RGB
+        image = image.convert("RGB")
+
+        # REALSRGAN expects a BGR np array; make array and flip channels
+        bgr_image_array = np.array(image, dtype=np.uint8)[...,::-1]
+
+        output, _ = upsampler.enhance(
+            bgr_image_array,
+            outscale=upsampler_scale,
+            alpha_upsampler='realesrgan',
+        )
+
+        # Flip the channels back to RGB
+        res = Image.fromarray(output[...,::-1])
+
+        if strength < 1.0:
+            # Resize the image to the new image if the sizes have changed
+            if output.size != image.size:
+                image = image.resize(res.size)
+            res = Image.blend(image, res, strength)
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        upsampler = None
+
+        return res
--- a/invokeai/backend/restoration/vqgan_arch.py
+++ b/invokeai/backend/restoration/vqgan_arch.py
@ -0,0 +1,435 @@
+'''
+VQGAN code, adapted from the original created by the Unleashing Transformers authors:
+https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py
+
+'''
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+from basicsr.utils import get_root_logger
+from basicsr.utils.registry import ARCH_REGISTRY
+
+def normalize(in_channels):
+    return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+
+
+@torch.jit.script
+def swish(x):
+    return x*torch.sigmoid(x)
+
+
+#  Define VQVAE classes
+class VectorQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, beta):
+        super(VectorQuantizer, self).__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.beta = beta  # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+        self.embedding = nn.Embedding(self.codebook_size, self.emb_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.codebook_size, 1.0 / self.codebook_size)
+
+    def forward(self, z):
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.emb_dim)
+
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = (z_flattened ** 2).sum(dim=1, keepdim=True) + (self.embedding.weight**2).sum(1) - \
+            2 * torch.matmul(z_flattened, self.embedding.weight.t())
+
+        mean_distance = torch.mean(d)
+        # find closest encodings
+        # min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1)
+        min_encoding_scores, min_encoding_indices = torch.topk(d, 1, dim=1, largest=False)
+        # [0-1], higher score, higher confidence
+        min_encoding_scores = torch.exp(-min_encoding_scores/10)
+
+        min_encodings = torch.zeros(min_encoding_indices.shape[0], self.codebook_size).to(z)
+        min_encodings.scatter_(1, min_encoding_indices, 1)
+
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+        # compute loss for embedding
+        loss = torch.mean((z_q.detach()-z)**2) + self.beta * torch.mean((z_q - z.detach()) ** 2)
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+
+        # perplexity
+        e_mean = torch.mean(min_encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+        return z_q, loss, {
+            "perplexity": perplexity,
+            "min_encodings": min_encodings,
+            "min_encoding_indices": min_encoding_indices,
+            "min_encoding_scores": min_encoding_scores,
+            "mean_distance": mean_distance
+            }
+
+    def get_codebook_feat(self, indices, shape):
+        # input indices: batch*token_num -> (batch*token_num)*1
+        # shape: batch, height, width, channel
+        indices = indices.view(-1,1)
+        min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices)
+        min_encodings.scatter_(1, indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+
+        if shape is not None:  # reshape back to match original input shape
+            z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous()
+
+        return z_q
+
+
+class GumbelQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, num_hiddens, straight_through=False, kl_weight=5e-4, temp_init=1.0):
+        super().__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.straight_through = straight_through
+        self.temperature = temp_init
+        self.kl_weight = kl_weight
+        self.proj = nn.Conv2d(num_hiddens, codebook_size, 1)  # projects last encoder layer to quantized logits
+        self.embed = nn.Embedding(codebook_size, emb_dim)
+
+    def forward(self, z):
+        hard = self.straight_through if self.training else True
+
+        logits = self.proj(z)
+
+        soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard)
+
+        z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight)
+
+        # + kl divergence to the prior loss
+        qy = F.softmax(logits, dim=1)
+        diff = self.kl_weight * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean()
+        min_encoding_indices = soft_one_hot.argmax(dim=1)
+
+        return z_q, diff, {
+            "min_encoding_indices": min_encoding_indices
+        }
+
+
+class Downsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+
+    def forward(self, x):
+        pad = (0, 1, 0, 1)
+        x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+        x = self.conv(x)
+        return x
+
+
+class Upsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        x = self.conv(x)
+
+        return x
+
+
+class ResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels=None):
+        super(ResBlock, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = in_channels if out_channels is None else out_channels
+        self.norm1 = normalize(in_channels)
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.norm2 = normalize(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        if self.in_channels != self.out_channels:
+            self.conv_out = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+
+    def forward(self, x_in):
+        x = x_in
+        x = self.norm1(x)
+        x = swish(x)
+        x = self.conv1(x)
+        x = self.norm2(x)
+        x = swish(x)
+        x = self.conv2(x)
+        if self.in_channels != self.out_channels:
+            x_in = self.conv_out(x_in)
+
+        return x + x_in
+
+
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+
+        self.norm = normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h*w)
+        q = q.permute(0, 2, 1)
+        k = k.reshape(b, c, h*w)
+        w_ = torch.bmm(q, k)
+        w_ = w_ * (int(c)**(-0.5))
+        w_ = F.softmax(w_, dim=2)
+
+        # attend to values
+        v = v.reshape(b, c, h*w)
+        w_ = w_.permute(0, 2, 1)
+        h_ = torch.bmm(v, w_)
+        h_ = h_.reshape(b, c, h, w)
+
+        h_ = self.proj_out(h_)
+
+        return x+h_
+
+
+class Encoder(nn.Module):
+    def __init__(self, in_channels, nf, emb_dim, ch_mult, num_res_blocks, resolution, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.attn_resolutions = attn_resolutions
+
+        curr_res = self.resolution
+        in_ch_mult = (1,)+tuple(ch_mult)
+
+        blocks = []
+        # initial convultion
+        blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1))
+
+        # residual and downsampling blocks, with attention on smaller res (16x16)
+        for i in range(self.num_resolutions):
+            block_in_ch = nf * in_ch_mult[i]
+            block_out_ch = nf * ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+
+            if i != self.num_resolutions - 1:
+                blocks.append(Downsample(block_in_ch))
+                curr_res = curr_res // 2
+
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+
+        # normalise and convert to latent size
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, emb_dim, kernel_size=3, stride=1, padding=1))
+        self.blocks = nn.ModuleList(blocks)
+
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+
+        return x
+
+
+class Generator(nn.Module):
+    def __init__(self, nf, emb_dim, ch_mult, res_blocks, img_size, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.ch_mult = ch_mult
+        self.num_resolutions = len(self.ch_mult)
+        self.num_res_blocks = res_blocks
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.in_channels = emb_dim
+        self.out_channels = 3
+        block_in_ch = self.nf * self.ch_mult[-1]
+        curr_res = self.resolution // 2 ** (self.num_resolutions-1)
+
+        blocks = []
+        # initial conv
+        blocks.append(nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1))
+
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+
+        for i in reversed(range(self.num_resolutions)):
+            block_out_ch = self.nf * self.ch_mult[i]
+
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+
+                if curr_res in self.attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+
+            if i != 0:
+                blocks.append(Upsample(block_in_ch))
+                curr_res = curr_res * 2
+
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1))
+
+        self.blocks = nn.ModuleList(blocks)
+
+
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+
+        return x
+
+
+@ARCH_REGISTRY.register()
+class VQAutoEncoder(nn.Module):
+    def __init__(self, img_size, nf, ch_mult, quantizer="nearest", res_blocks=2, attn_resolutions=[16], codebook_size=1024, emb_dim=256,
+                beta=0.25, gumbel_straight_through=False, gumbel_kl_weight=1e-8, model_path=None):
+        super().__init__()
+        logger = get_root_logger()
+        self.in_channels = 3
+        self.nf = nf
+        self.n_blocks = res_blocks
+        self.codebook_size = codebook_size
+        self.embed_dim = emb_dim
+        self.ch_mult = ch_mult
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.quantizer_type = quantizer
+        self.encoder = Encoder(
+            self.in_channels,
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+        if self.quantizer_type == "nearest":
+            self.beta = beta #0.25
+            self.quantize = VectorQuantizer(self.codebook_size, self.embed_dim, self.beta)
+        elif self.quantizer_type == "gumbel":
+            self.gumbel_num_hiddens = emb_dim
+            self.straight_through = gumbel_straight_through
+            self.kl_weight = gumbel_kl_weight
+            self.quantize = GumbelQuantizer(
+                self.codebook_size,
+                self.embed_dim,
+                self.gumbel_num_hiddens,
+                self.straight_through,
+                self.kl_weight
+            )
+        self.generator = Generator(
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_ema' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_ema'])
+                logger.info(f'vqgan is loaded from: {model_path} [params_ema]')
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+                logger.info(f'vqgan is loaded from: {model_path} [params]')
+            else:
+                raise ValueError(f'Wrong params!')
+
+
+    def forward(self, x):
+        x = self.encoder(x)
+        quant, codebook_loss, quant_stats = self.quantize(x)
+        x = self.generator(quant)
+        return x, codebook_loss, quant_stats
+
+
+
+# patch based discriminator
+@ARCH_REGISTRY.register()
+class VQGANDiscriminator(nn.Module):
+    def __init__(self, nc=3, ndf=64, n_layers=4, model_path=None):
+        super().__init__()
+
+        layers = [nn.Conv2d(nc, ndf, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, True)]
+        ndf_mult = 1
+        ndf_mult_prev = 1
+        for n in range(1, n_layers):  # gradually increase the number of filters
+            ndf_mult_prev = ndf_mult
+            ndf_mult = min(2 ** n, 8)
+            layers += [
+                nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=2, padding=1, bias=False),
+                nn.BatchNorm2d(ndf * ndf_mult),
+                nn.LeakyReLU(0.2, True)
+            ]
+
+        ndf_mult_prev = ndf_mult
+        ndf_mult = min(2 ** n_layers, 8)
+
+        layers += [
+            nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(ndf * ndf_mult),
+            nn.LeakyReLU(0.2, True)
+        ]
+
+        layers += [
+            nn.Conv2d(ndf * ndf_mult, 1, kernel_size=4, stride=1, padding=1)]  # output 1 channel prediction map
+        self.main = nn.Sequential(*layers)
+
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_d' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_d'])
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+            else:
+                raise ValueError(f'Wrong params!')
+
+    def forward(self, x):
+        return self.main(x)
--- a/invokeai/backend/stable_diffusion/concepts_lib.py
+++ b/invokeai/backend/stable_diffusion/concepts_lib.py
@ -10,7 +10,7 @@ import traceback
 from typing import Callable
 from urllib import request, error as ul_error
 from huggingface_hub import HfFolder, hf_hub_url, ModelSearchArguments, ModelFilter, HfApi
-from ldm.invoke.globals import Globals
+from invokeai.backend.globals import Globals

 class HuggingFaceConceptsLibrary(object):
    def __init__(self, root=None):
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@ -26,11 +26,11 @@ from torchvision.transforms.functional import resize as tv_resize
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 from typing_extensions import ParamSpec

-from ldm.invoke.globals import Globals
-from ..stable_diffusion.diffusion import InvokeAIDiffuserComponent, PostprocessingSettings, AttentionMapSaver
-from ..stable_diffusion.textual_inversion_manager import TextualInversionManager
-from ..stable_diffusion.offloading import LazilyLoadedModelGroup, FullyLoadedModelGroup, ModelGroup
-from ..devices import normalize_device, CPU_DEVICE
+from invokeai.backend.globals import Globals
+from .diffusion import InvokeAIDiffuserComponent, PostprocessingSettings, AttentionMapSaver
+from .textual_inversion_manager import TextualInversionManager
+from .offloading import LazilyLoadedModelGroup, FullyLoadedModelGroup, ModelGroup
+from ..util import normalize_device, CPU_DEVICE
 from compel import EmbeddingsProvider

@dataclass
--- a/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py
+++ b/invokeai/backend/stable_diffusion/diffusion/cross_attention_control.py
@ -15,7 +15,7 @@ from torch import nn
 from compel.cross_attention_control import Arguments
 from diffusers.models.unet_2d_condition import UNet2DConditionModel
 from diffusers.models.cross_attention import AttnProcessor
-from ...devices import torch_dtype
+from ...util import torch_dtype


 class CrossAttentionType(enum.Enum):
--- a/invokeai/backend/stable_diffusion/diffusion/ddpm.py
+++ b/invokeai/backend/stable_diffusion/diffusion/ddpm.py
@ -23,7 +23,7 @@ from omegaconf import ListConfig
 import urllib

 from ..textual_inversion_manager import TextualInversionManager
-from ...util import (
+from ...util.util import (
    log_txt_as_img,
    exists,
    default,
--- a/invokeai/backend/stable_diffusion/diffusion/plms.py
+++ b/invokeai/backend/stable_diffusion/diffusion/plms.py
@ -4,7 +4,7 @@ import torch
 import numpy as np
 from tqdm import tqdm
 from functools import partial
-from ...devices import choose_torch_device
+from ...util import choose_torch_device
 from .shared_invokeai_diffusion import InvokeAIDiffuserComponent
 from .sampler import Sampler
 from ..diffusionmodules.util import noise_like
--- a/invokeai/backend/stable_diffusion/diffusion/sampler.py
+++ b/invokeai/backend/stable_diffusion/diffusion/sampler.py
@ -7,7 +7,7 @@ import torch
 import numpy as np
 from tqdm import tqdm
 from functools import partial
-from ...devices import choose_torch_device
+from ...util import choose_torch_device
 from .shared_invokeai_diffusion import InvokeAIDiffuserComponent

 from ..diffusionmodules.util import (
--- a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
+++ b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
@ -8,7 +8,7 @@ import torch
 from diffusers.models.cross_attention import AttnProcessor
 from typing_extensions import TypeAlias

-from ldm.invoke.globals import Globals
+from invokeai.backend.globals import Globals
 from .cross_attention_control import Arguments, \
    restore_default_cross_attention, override_cross_attention, Context, get_cross_attention_modules, \
    CrossAttentionType, SwapCrossAttnContext
--- a/invokeai/backend/stable_diffusion/diffusionmodules/util.py
+++ b/invokeai/backend/stable_diffusion/diffusionmodules/util.py
@ -15,7 +15,7 @@ import torch.nn as nn
 import numpy as np
 from einops import repeat

-from ...util import instantiate_from_config
+from ...util.util import instantiate_from_config


 def make_beta_schedule(
--- a/invokeai/backend/stable_diffusion/encoders/modules.py
+++ b/invokeai/backend/stable_diffusion/encoders/modules.py
@ -10,7 +10,7 @@ from einops import repeat
 from transformers import CLIPTokenizer, CLIPTextModel

 from ldm.invoke.devices import choose_torch_device
-from ldm.invoke.globals import global_cache_dir
+from invokeai.backend.globals import global_cache_dir
 from ldm.modules.x_transformer import (
    Encoder,
    TransformerWrapper,
--- a/invokeai/backend/training/init.py
+++ b/invokeai/backend/training/init.py
@ -0,0 +1,4 @@
+'''
+Initialization file for invokeai.backend.training
+'''
+from .textual_inversion_training import do_textual_inversion_training, parse_args
--- a/invokeai/backend/training/textual_inversion_training.py
+++ b/invokeai/backend/training/textual_inversion_training.py
--- a/invokeai/backend/util/init.py
+++ b/invokeai/backend/util/init.py
@ -0,0 +1,18 @@
+'''
+Initialization file for invokeai.backend.util
+'''
+from .devices import (choose_torch_device,
+                      choose_precision,
+                      normalize_device,
+                      torch_dtype,
+                      CPU_DEVICE,
+                      CUDA_DEVICE,
+                      MPS_DEVICE,
+                      )
+from .util import (ask_user,
+                   download_with_resume,
+                   instantiate_from_config,
+                   url_attachment_name,
+                   )
+from .log import write_log
+                  
--- a/invokeai/backend/util/devices.py
+++ b/invokeai/backend/util/devices.py
@ -5,9 +5,11 @@ from contextlib import nullcontext
 import torch
 from torch import autocast

-from ldm.invoke.globals import Globals
+from invokeai.backend.globals import Globals

 CPU_DEVICE = torch.device("cpu")
+CUDA_DEVICE = torch.device("cuda")
+MPS_DEVICE = torch.device("mps")

 def choose_torch_device() -> torch.device:
    '''Convenience routine for guessing which GPU device to run model on'''
--- a/invokeai/backend/util/log.py
+++ b/invokeai/backend/util/log.py
@ -0,0 +1,66 @@
+"""
+Functions for better format logging
+    write_log -- logs the name of the output image, prompt, and prompt args to the terminal and different types of file
+        1 write_log_message -- Writes a message to the console
+        2 write_log_files -- Writes a message to files
+        2.1 write_log_default -- File in plain text
+        2.2 write_log_txt -- File in txt format
+        2.3 write_log_markdown -- File in markdown format
+"""
+
+import os
+
+
+def write_log(results, log_path, file_types, output_cntr):
+    """
+    logs the name of the output image, prompt, and prompt args to the terminal and files
+    """
+    output_cntr = write_log_message(results, output_cntr)
+    write_log_files(results, log_path, file_types)
+    return output_cntr
+
+
+def write_log_message(results, output_cntr):
+    """logs to the terminal"""
+    if len(results) == 0:
+        return output_cntr
+    log_lines = [f"{path}: {prompt}\n" for path, prompt in results]
+    if len(log_lines)>1:
+        subcntr = 1
+        for l in log_lines:
+           print(f"[{output_cntr}.{subcntr}] {l}", end="")
+           subcntr += 1
+    else:
+           print(f"[{output_cntr}] {log_lines[0]}", end="")
+    return output_cntr+1
+
+def write_log_files(results, log_path, file_types):
+    for file_type in file_types:
+        if file_type == "txt":
+            write_log_txt(log_path, results)
+        elif file_type == "md" or file_type == "markdown":
+            write_log_markdown(log_path, results)
+        else:
+            print(f"'{file_type}' format is not supported, so write in plain text")
+            write_log_default(log_path, results, file_type)
+
+
+def write_log_default(log_path, results, file_type):
+    plain_txt_lines = [f"{path}: {prompt}\n" for path, prompt in results]
+    with open(log_path + "." + file_type, "a", encoding="utf-8") as file:
+        file.writelines(plain_txt_lines)
+
+
+def write_log_txt(log_path, results):
+    txt_lines = [f"{path}: {prompt}\n" for path, prompt in results]
+    with open(log_path + ".txt", "a", encoding="utf-8") as file:
+        file.writelines(txt_lines)
+
+
+def write_log_markdown(log_path, results):
+    md_lines = []
+    for path, prompt in results:
+        file_name = os.path.basename(path)
+        md_lines.append(f"## {file_name}\n![]({file_name})\n\n{prompt}\n")
+    with open(log_path + ".md", "a", encoding="utf-8") as file:
+        file.writelines(md_lines)
--- a/invokeai/backend/util/util.py
+++ b/invokeai/backend/util/util.py
--- a/invokeai/backend/web/init.py
+++ b/invokeai/backend/web/init.py
@ -0,0 +1,4 @@
+'''
+Initialization file for the web backend.
+'''
+from .invoke_ai_web_server import InvokeAIWebServer
--- a/invokeai/backend/web/invoke_ai_web_server.py
+++ b/invokeai/backend/web/invoke_ai_web_server.py
@ -12,7 +12,7 @@ from threading import Event
 from uuid import uuid4

 import eventlet
-import invokeai.frontend.dist as frontend
+import invokeai.frontend.web.dist as frontend
 from PIL import Image
 from PIL.Image import Image as ImageType
 from compel.prompt_parser import Blend
@ -20,24 +20,24 @@ from flask import Flask, redirect, send_from_directory, request, make_response
 from flask_socketio import SocketIO
 from werkzeug.utils import secure_filename

-from invokeai.backend.modules.get_canvas_generation_mode import (
+from .modules.get_canvas_generation_mode import (
    get_canvas_generation_mode,
 )
 from .modules.parameters import parameters_to_command
-from .prompting import (get_tokens_for_prompt_object,
-                        get_prompt_structure,
-                        get_tokenizer
-                        )
-from .image_util import PngWriter, retrieve_metadata
-from .generator import infill_methods 
-from .stable_diffusion import PipelineIntermediateState
+from ..prompting import (get_tokens_for_prompt_object,
+                         get_prompt_structure,
+                         get_tokenizer
+                         )
+from ..image_util import PngWriter, retrieve_metadata
+from ..generator import infill_methods 
+from ..stable_diffusion import PipelineIntermediateState

-from ldm.generate import Generate
-from ldm.invoke.args import Args, APP_ID, APP_VERSION, calculate_init_img_hash
-from ldm.invoke.globals import ( Globals, global_converted_ckpts_dir,
-                                 global_models_dir
-                                )
-from ldm.invoke.merge_diffusers import merge_diffusion_models
+from .. import Generate
+from ..args import Args, APP_ID, APP_VERSION, calculate_init_img_hash
+from ..globals import ( Globals, global_converted_ckpts_dir,
+                        global_models_dir
+                       )
+from ..model_management import merge_diffusion_models

 # Loading Arguments
 opt = Args()
@ -236,7 +236,7 @@ class InvokeAIWebServer:
                sys.exit(0)
        else:
            useSSL = args.certfile or args.keyfile
-            print(">> Started Invoke AI Web Server!")
+            print(">> Started Invoke AI Web Server")
            if self.host == "0.0.0.0":
                print(
                    f"Point your browser at http{'s' if useSSL else ''}://localhost:{self.port} or use the host's DNS name or IP address."
--- a/invokeai/backend/web/modules/init.py
+++ b/invokeai/backend/web/modules/init.py
--- a/invokeai/backend/web/modules/create_cmd_parser.py
+++ b/invokeai/backend/web/modules/create_cmd_parser.py
--- a/invokeai/backend/web/modules/get_canvas_generation_mode.py
+++ b/invokeai/backend/web/modules/get_canvas_generation_mode.py
--- a/invokeai/backend/web/modules/parameters.py
+++ b/invokeai/backend/web/modules/parameters.py
@ -1,4 +1,4 @@
-from invokeai.backend.modules.parse_seed_weights import parse_seed_weights
+from .parse_seed_weights import parse_seed_weights
 import argparse

 SAMPLER_CHOICES = [
--- a/invokeai/backend/web/modules/parse_seed_weights.py
+++ b/invokeai/backend/web/modules/parse_seed_weights.py
--- a/invokeai/backend/web/modules/test_images/init-img_full_transparency.png
+++ b/invokeai/backend/web/modules/test_images/init-img_full_transparency.png
--- a/invokeai/backend/web/modules/test_images/init-img_opaque.png
+++ b/invokeai/backend/web/modules/test_images/init-img_opaque.png
--- a/invokeai/backend/web/modules/test_images/init-img_partial_transparency.png
+++ b/invokeai/backend/web/modules/test_images/init-img_partial_transparency.png
--- a/invokeai/backend/web/modules/test_images/init-mask_has_mask.png
+++ b/invokeai/backend/web/modules/test_images/init-mask_has_mask.png
--- a/invokeai/backend/web/modules/test_images/init-mask_no_mask.png
+++ b/invokeai/backend/web/modules/test_images/init-mask_no_mask.png