Merge branch 'main' into dev/installer

2024-08-30 20:32:17 +00:00 · 2023-02-01 17:50:22 -05:00 · 2023-02-01 17:50:22 -05:00 · 8ce9f07223
commit 8ce9f07223
parent 11ac50a6ea 31146eb797
27 changed files with 593 additions and 214 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,19 +1,18 @@
 *
+!assets/caution.png
 !backend
-!environments-and-requirements
-!frontend
+!frontend/dist
 !ldm
-!main.py
+!pyproject.toml
+!README.md
 !scripts
-!server
-!static
-!setup.py

 # Guard against pulling in any models that might exist in the directory tree
-**/*.pt*
+**.pt*

 # unignore configs, but only ignore the custom models.yaml, in case it exists
 !configs
 configs/models.yaml
+configs/models.yaml.orig

 **/__pycache__
--- a/.github/workflows/build-cloud-img.yml
+++ b/.github/workflows/build-cloud-img.yml
@ -21,6 +21,7 @@ env:

 jobs:
  docker:
+    if: github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/build-container.yml
+++ b/.github/workflows/build-container.yml
@ -3,63 +3,60 @@ on:
  push:
    branches:
      - 'main'
+    tags:
+      - 'v*.*.*'

 jobs:
  docker:
+    if: github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
-        registry:
-          - ghcr.io
        flavor:
          - amd
          - cuda
-          # - cloud
        include:
          - flavor: amd
-            pip-requirements: requirements-lin-amd.txt
+            pip-extra-index-url: 'https://download.pytorch.org/whl/rocm5.2'
            dockerfile: docker-build/Dockerfile
            platforms: linux/amd64,linux/arm64
          - flavor: cuda
-            pip-requirements: requirements-lin-cuda.txt
+            pip-extra-index-url: ''
            dockerfile: docker-build/Dockerfile
            platforms: linux/amd64,linux/arm64
-          # - flavor: cloud
-          #   pip-requirements: requirements-lin-cuda.txt
-          #   dockerfile: docker-build/Dockerfile.cloud
-          #   platforms: linux/amd64
    runs-on: ubuntu-latest
    name: ${{ matrix.flavor }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3

-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
-
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v4
        with:
-          images: ${{ matrix.registry }}/${{ github.repository }}-${{ matrix.flavor }}
+          images: ghcr.io/${{ github.repository }}-${{ matrix.flavor }}
          tags: |
            type=ref,event=branch
            type=ref,event=tag
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
+            type=semver,pattern={{major}}
            type=sha
          flavor: |
            latest=true

+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2

-      - if: github.event_name != 'pull_request'
-        name: Docker login
+      - name: Login to GitHub Container Registry
+        if: github.event_name != 'pull_request'
        uses: docker/login-action@v2
        with:
-          registry: ${{ matrix.registry }}
-          username: ${{ github.actor }}
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build container
@ -71,4 +68,6 @@ jobs:
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
-          build-args: pip_requirements=${{ matrix.pip-requirements }}
+          build-args: PIP_EXTRA_INDEX_URL=${{ matrix.pip-extra-index-url }}
+          # cache-from: type=gha
+          # cache-to: type=gha,mode=max
--- a/.github/workflows/clean-caches.yml
+++ b/.github/workflows/clean-caches.yml
@ -0,0 +1,34 @@
+name: cleanup caches by a branch
+on:
+  pull_request:
+    types:
+      - closed
+  workflow_dispatch:
+
+jobs:
+  cleanup:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+
+      - name: Cleanup
+        run: |
+          gh extension install actions/gh-actions-cache
+
+          REPO=${{ github.repository }}
+          BRANCH=${{ github.ref }}
+
+          echo "Fetching list of cache key"
+          cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 )
+
+          ## Setting this to not fail the workflow while deleting cache keys.
+          set +e
+          echo "Deleting caches..."
+          for cacheKey in $cacheKeysForPR
+          do
+              gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
+          done
+          echo "Done"
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/lint-frontend.yml
+++ b/.github/workflows/lint-frontend.yml
@ -14,6 +14,7 @@ defaults:

 jobs:
  lint-frontend:
+    if: github.event.pull_request.draft == false
    runs-on: ubuntu-22.04
    steps:
      - name: Setup Node 18
--- a/.github/workflows/mkdocs-material.yml
+++ b/.github/workflows/mkdocs-material.yml
@ -7,6 +7,7 @@ on:

 jobs:
  mkdocs-material:
+    if: github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    steps:
      - name: checkout sources
--- a/.github/workflows/pyflakes.yml
+++ b/.github/workflows/pyflakes.yml
@ -9,6 +9,7 @@ on:
 jobs:
  pyflakes:
    name: runner / pyflakes
+    if: github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
--- a/docker-build/Dockerfile
+++ b/docker-build/Dockerfile
@ -1,59 +1,71 @@
-FROM python:3.10-slim AS builder
+# syntax=docker/dockerfile:1
+FROM python:3.9-slim AS python-base

 # use bash
 SHELL [ "/bin/bash", "-c" ]

 # Install necesarry packages
-RUN apt-get update \
+RUN \
+  --mount=type=cache,target=/var/cache/apt,sharing=locked \
+  --mount=type=cache,target=/var/lib/apt,sharing=locked \
+  apt-get update \
+  && apt-get install -y \
+    --no-install-recommends \
+    libgl1-mesa-glx=20.3.* \
+    libglib2.0-0=2.66.* \
+    libopencv-dev=4.5.* \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/*
+
+ARG APPDIR=/usr/src/app
+ENV APPDIR ${APPDIR}
+WORKDIR ${APPDIR}
+
+FROM python-base AS builder
+
+RUN \
+  --mount=type=cache,target=/var/cache/apt,sharing=locked \
+  --mount=type=cache,target=/var/lib/apt,sharing=locked \
+  apt-get update \
  && apt-get install -y \
    --no-install-recommends \
    gcc=4:10.2.* \
-    libgl1-mesa-glx=20.3.* \
-    libglib2.0-0=2.66.* \
    python3-dev=3.9.* \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*

-# set WORKDIR, PATH and copy sources
-ARG APPDIR=/usr/src/app
-WORKDIR ${APPDIR}
-ENV PATH ${APPDIR}/.venv/bin:$PATH
-ARG PIP_REQUIREMENTS=requirements-lin-cuda.txt
-COPY . ./environments-and-requirements/${PIP_REQUIREMENTS} ./
+# copy sources
+COPY --link . .
+ARG PIP_EXTRA_INDEX_URL
+ENV PIP_EXTRA_INDEX_URL ${PIP_EXTRA_INDEX_URL}

 # install requirements
-RUN python3 -m venv .venv \
-  && pip install \
-    --upgrade \
+RUN python3 -m venv invokeai \
+  && ${APPDIR}/invokeai/bin/pip \
+    install \
    --no-cache-dir \
-    'wheel>=0.38.4' \
-  && pip install \
-    --no-cache-dir \
-    -r ${PIP_REQUIREMENTS}
+    --use-pep517 \
+    .

-FROM python:3.10-slim AS runtime
+FROM python-base AS runtime

 # setup environment
-ARG APPDIR=/usr/src/app
-WORKDIR ${APPDIR}
-COPY --from=builder ${APPDIR} .
-ENV \
-  PATH=${APPDIR}/.venv/bin:$PATH \
-  INVOKEAI_ROOT=/data \
-  INVOKE_MODEL_RECONFIGURE=--yes
+COPY --link . .
+COPY --from=builder ${APPDIR}/invokeai ${APPDIR}/invokeai
+ENV PATH=${APPDIR}/invokeai/bin:$PATH
+ENV INVOKEAI_ROOT=/data
+ENV INVOKE_MODEL_RECONFIGURE="--yes --default_only"

-# Install necesarry packages
-RUN apt-get update \
+# build patchmatch
+RUN \
+  --mount=type=cache,target=/var/cache/apt,sharing=locked \
+  --mount=type=cache,target=/var/lib/apt,sharing=locked \
+  apt-get update \
  && apt-get install -y \
    --no-install-recommends \
    build-essential=12.9 \
-    libgl1-mesa-glx=20.3.* \
-    libglib2.0-0=2.66.* \
-    libopencv-dev=4.5.* \
-  && ln -sf \
-    /usr/lib/"$(arch)"-linux-gnu/pkgconfig/opencv4.pc \
-    /usr/lib/"$(arch)"-linux-gnu/pkgconfig/opencv.pc \
-  && python3 -c "from patchmatch import patch_match" \
+  && PYTHONDONTWRITEBYTECODE=1 \
+    python3 -c "from patchmatch import patch_match" \
  && apt-get remove -y \
    --autoremove \
    build-essential \
@ -61,5 +73,6 @@ RUN apt-get update \
  && rm -rf /var/lib/apt/lists/*

 # set Entrypoint and default CMD
-ENTRYPOINT [ "python3", "scripts/invoke.py" ]
+ENTRYPOINT [ "invoke" ]
 CMD [ "--web", "--host=0.0.0.0" ]
+VOLUME [ "/data" ]
--- a/docker-build/build.sh
+++ b/docker-build/build.sh
@ -2,34 +2,41 @@
 set -e

 # How to use: https://invoke-ai.github.io/InvokeAI/installation/INSTALL_DOCKER/#setup
+#
+# Some possible pip extra-index urls (cuda 11.7 is available without extra url):
+#
+#   CUDA 11.6:  https://download.pytorch.org/whl/cu116
+#   ROCm 5.2:   https://download.pytorch.org/whl/rocm5.2
+#   CPU:        https://download.pytorch.org/whl/cpu
+#
+#   as found on https://pytorch.org/get-started/locally/

-source ./docker-build/env.sh \
-  || echo "please execute docker-build/build.sh from repository root" \
-  || exit 1
+cd "$(dirname "$0")" || exit 1

-PIP_REQUIREMENTS=${PIP_REQUIREMENTS:-requirements-lin-cuda.txt}
-DOCKERFILE=${INVOKE_DOCKERFILE:-docker-build/Dockerfile}
+source ./env.sh
+
+DOCKERFILE=${INVOKE_DOCKERFILE:-"./Dockerfile"}

 # print the settings
 echo -e "You are using these values:\n"
 echo -e "Dockerfile:\t ${DOCKERFILE}"
-echo -e "Requirements:\t ${PIP_REQUIREMENTS}"
+echo -e "extra-index-url: ${PIP_EXTRA_INDEX_URL:-none}"
 echo -e "Volumename:\t ${VOLUMENAME}"
 echo -e "arch:\t\t ${ARCH}"
 echo -e "Platform:\t ${PLATFORM}"
 echo -e "Invokeai_tag:\t ${INVOKEAI_TAG}\n"

 if [[ -n "$(docker volume ls -f name="${VOLUMENAME}" -q)" ]]; then
-  echo -e "Volume already exists\n"
+    echo -e "Volume already exists\n"
 else
-  echo -n "createing docker volume "
-  docker volume create "${VOLUMENAME}"
+    echo -n "createing docker volume "
+    docker volume create "${VOLUMENAME}"
 fi

 # Build Container
 docker build \
-  --platform="${PLATFORM}" \
-  --tag="${INVOKEAI_TAG}" \
-  --build-arg="PIP_REQUIREMENTS=${PIP_REQUIREMENTS}" \
-  --file="${DOCKERFILE}" \
-  .
+    --platform="${PLATFORM}" \
+    --tag="${INVOKEAI_TAG}" \
+    ${PIP_EXTRA_INDEX_URL:+--build-arg=PIP_EXTRA_INDEX_URL="${PIP_EXTRA_INDEX_URL}"} \
+    --file="${DOCKERFILE}" \
+    ..
--- a/docker-build/env.sh
+++ b/docker-build/env.sh
@ -7,4 +7,4 @@ ARCH=${ARCH:-$(uname -m)}
 PLATFORM=${PLATFORM:-Linux/${ARCH}}
 CONTAINER_FLAVOR=${CONTAINER_FLAVOR:-cuda}
 INVOKEAI_BRANCH=$(git branch --show)
-INVOKEAI_TAG=${REPOSITORY_NAME,,}-${CONTAINER_FLAVOR}:${INVOKEAI_TAG:-${INVOKEAI_BRANCH/\//-}}
+INVOKEAI_TAG=${REPOSITORY_NAME,,}-${CONTAINER_FLAVOR}:${INVOKEAI_TAG:-${INVOKEAI_BRANCH##*/}}
--- a/docker-build/run.sh
+++ b/docker-build/run.sh
@ -4,17 +4,14 @@ set -e
 # How to use: https://invoke-ai.github.io/InvokeAI/installation/INSTALL_DOCKER/#run-the-container
 # IMPORTANT: You need to have a token on huggingface.co to be able to download the checkpoints!!!

-source ./docker-build/env.sh \
-  || echo "please run from repository root" \
-  || exit 1
+cd "$(dirname "$0")" || exit 1

-# check if HUGGINGFACE_TOKEN is available
-# You must have accepted the terms of use for required models
-HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN:?Please set your token for Huggingface as HUGGINGFACE_TOKEN}
+source ./env.sh

 echo -e "You are using these values:\n"
-echo -e "Volumename:\t ${VOLUMENAME}"
-echo -e "Invokeai_tag:\t ${INVOKEAI_TAG}\n"
+echo -e "Volumename:\t${VOLUMENAME}"
+echo -e "Invokeai_tag:\t${INVOKEAI_TAG}"
+echo -e "local Models:\t${MODELSPATH:-unset}\n"

 docker run \
  --interactive \
@ -23,8 +20,10 @@ docker run \
  --platform="$PLATFORM" \
  --name="${REPOSITORY_NAME,,}" \
  --hostname="${REPOSITORY_NAME,,}" \
-  --mount="source=$VOLUMENAME,target=/data" \
-  --env="HUGGINGFACE_TOKEN=${HUGGINGFACE_TOKEN}" \
+  --mount=source="$VOLUMENAME",target=/data \
+  ${MODELSPATH:+-u "$(id -u):$(id -g)"} \
+  ${MODELSPATH:+--mount=type=bind,source=${MODELSPATH},target=/data/models} \
+  ${HUGGING_FACE_HUB_TOKEN:+--env=HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}} \
  --publish=9090:9090 \
  --cap-add=sys_nice \
  ${GPU_FLAGS:+--gpus=${GPU_FLAGS}} \
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -52,12 +52,17 @@ introduces several changes you should know about.
  path: models/diffusers/hakurei-haifu-diffusion-1.4
  ```

-2. The format of the models directory has changed to mimic the
-   HuggingFace cache directory. By default, diffusers models are
-   now automatically downloaded and retrieved from the directory
-   `ROOTDIR/models/diffusers`, while other models are stored in
-   the directory `ROOTDIR/models/hub`. This organization is the
-   same as that used by HuggingFace for its cache management.
+2. In order of precedence, InvokeAI will now use HF_HOME, then
+   XDG_CACHE_HOME, then finally default to `ROOTDIR/models` to
+   store HuggingFace diffusers models.
+
+   Consequently, the format of the models directory has changed to
+   mimic the HuggingFace cache directory. When HF_HOME and XDG_HOME
+   are not set, diffusers models are now automatically downloaded
+   and retrieved from the directory `ROOTDIR/models/diffusers`,
+   while other models are stored in the directory
+   `ROOTDIR/models/hub`. This organization is the same as that used
+   by HuggingFace for its cache management.

   This allows you to share diffusers and ckpt model files easily with
   other machine learning applications that use the HuggingFace
@ -66,7 +71,13 @@ introduces several changes you should know about.
   cache models in. To tell InvokeAI to use the standard HuggingFace
   cache directory, you would set HF_HOME like this (Linux/Mac):

-   `export HF_HOME=~/.cache/hugging_face`
+   `export HF_HOME=~/.cache/huggingface`
+
+   Both HuggingFace and InvokeAI will fall back to the XDG_CACHE_HOME
+   environment variable if HF_HOME is not set; this path
+   takes precedence over `ROOTDIR/models` to allow for the same sharing
+   with other machine learning applications that use HuggingFace
+   libraries.

 3. If you upgrade to InvokeAI 2.3.* from an earlier version, there
   will be a one-time migration from the old models directory format
--- a/docs/features/PROMPTS.md
+++ b/docs/features/PROMPTS.md
@ -239,28 +239,24 @@ Generate an image with a given prompt, record the seed of the image, and then
 use the `prompt2prompt` syntax to substitute words in the original prompt for
 words in a new prompt. This works for `img2img` as well.

- `a ("fluffy cat").swap("smiling dog") eating a hotdog`.
-    - quotes optional: `a (fluffy cat).swap(smiling dog) eating a hotdog`.
-    - for single word substitutions parentheses are also optional:
-      `a cat.swap(dog) eating a hotdog`.
- Supports options `s_start`, `s_end`, `t_start`, `t_end` (each 0-1) loosely
-  corresponding to bloc97's `prompt_edit_spatial_start/_end` and
-  `prompt_edit_tokens_start/_end` but with the math swapped to make it easier to
-  intuitively understand.
-    - Example usage:`a (cat).swap(dog, s_end=0.3) eating a hotdog` - the `s_end`
-      argument means that the "spatial" (self-attention) edit will stop having any
-      effect after 30% (=0.3) of the steps have been done, leaving Stable
-      Diffusion with 70% of the steps where it is free to decide for itself how to
-      reshape the cat-form into a dog form.
-    - The numbers represent a percentage through the step sequence where the edits
-      should happen. 0 means the start (noisy starting image), 1 is the end (final
-      image).
-        - For img2img, the step sequence does not start at 0 but instead at
-          (1-strength) - so if strength is 0.7, s_start and s_end must both be
-          greater than 0.3 (1-0.7) to have any effect.
- Convenience option `shape_freedom` (0-1) to specify how much "freedom" Stable
-  Diffusion should have to change the shape of the subject being swapped.
-    - `a (cat).swap(dog, shape_freedom=0.5) eating a hotdog`.
+For example, consider the prompt `a cat.swap(dog) playing with a ball in the forest`. Normally, because of the word words interact with each other when doing a stable diffusion image generation, these two prompts would generate different compositions:
+  - `a cat playing with a ball in the forest`
+  - `a dog playing with a ball in the forest`
+
+| `a cat playing with a ball in the forest` | `a dog playing with a ball in the forest` |
+| --- | --- |
+| img | img |
+
+
+      - For multiple word swaps, use parentheses: `a (fluffy cat).swap(barking dog) playing with a ball in the forest`.
+      - To swap a comma, use quotes: `a ("fluffy, grey cat").swap("big, barking dog") playing with a ball in the forest`.
+- Supports options `t_start` and `t_end` (each 0-1) loosely corresponding to bloc97's `prompt_edit_tokens_start/_end` but with the math swapped to make it easier to
+  intuitively understand. `t_start` and `t_end` are used to control on which steps cross-attention control should run. With the default values `t_start=0` and `t_end=1`, cross-attention control is active on every step of image generation. Other values can be used to turn cross-attention control off for part of the image generation process.
+    - For example, if doing a diffusion with 10 steps for the prompt is `a cat.swap(dog, t_start=0.3, t_end=1.0) playing with a ball in the forest`, the first 3 steps will be run as `a cat playing with a ball in the forest`, while the last 7 steps will run as `a dog playing with a ball in the forest`, but the pixels that represent `dog` will be locked to the pixels that would have represented `cat` if the `cat` prompt had been used instead.
+    - Conversely, for `a cat.swap(dog, t_start=0, t_end=0.7) playing with a ball in the forest`, the first 7 steps will run as `a dog playing with a ball in the forest` with the pixels that represent `dog` locked to the same pixels that would have represented `cat` if the `cat` prompt was being used instead. The final 3 steps will just run `a cat playing with a ball in the forest`.
+    > For img2img, the step sequence does not start at 0 but instead at `(1.0-strength)` - so if the img2img `strength` is `0.7`, `t_start` and `t_end` must both be greater than `0.3` (`1.0-0.7`) to have any effect.
+
+Prompt2prompt `.swap()` is not compatible with xformers, which will be temporarily disabled when doing a `.swap()` - so you should expect to use more VRAM and run slower that with xformers enabled.

 The `prompt2prompt` code is based off
 [bloc97's colab](https://github.com/bloc97/CrossAttentionControl).
--- a/environments-and-requirements/requirements-lin-cuda.txt
+++ b/environments-and-requirements/requirements-lin-cuda.txt
@ -2,4 +2,5 @@
 -r environments-and-requirements/requirements-base.txt
 torch>=1.13.1
 torchvision>=0.14.1
+xformers~=0.16
 -e .
--- a/environments-and-requirements/requirements-win-colab-cuda.txt
+++ b/environments-and-requirements/requirements-win-colab-cuda.txt
@ -3,4 +3,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu117 --trusted-host https://download.pytorch.org
 torch==1.13.1
 torchvision==0.14.1
+xformers~=0.16
 -e .
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -20,6 +20,7 @@ import torch
 import transformers
 from PIL import Image, ImageOps
 from diffusers.pipeline_utils import DiffusionPipeline
+from diffusers.utils.import_utils import is_xformers_available
 from omegaconf import OmegaConf
 from pytorch_lightning import seed_everything, logging

@ -203,6 +204,14 @@ class Generate:
            self.precision = choose_precision(self.device)
        Globals.full_precision = self.precision=='float32'

+        if is_xformers_available():
+            if not Globals.disable_xformers:
+                print('>> xformers memory-efficient attention is available and enabled')
+            else:
+                print('>> xformers memory-efficient attention is available but disabled')
+        else:
+            print('>> xformers not installed')
+            
        # model caching system for fast switching
        self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models)
        # don't accept invalid models
--- a/ldm/invoke/CLI.py
+++ b/ldm/invoke/CLI.py
@ -53,10 +53,11 @@ def main():

    if not args.conf:
        if not os.path.exists(os.path.join(Globals.root,'configs','models.yaml')):
-            print(f"\n** Error. The file {os.path.join(Globals.root,'configs','models.yaml')} could not be found.")
-            print('** Please check the location of your invokeai directory and use the --root_dir option to point to the correct path.')
-            print('** This script will now exit.')
-            sys.exit(-1)
+            report_model_error(opt, e)
+            # print(f"\n** Error. The file {os.path.join(Globals.root,'configs','models.yaml')} could not be found.")
+            # print('** Please check the location of your invokeai directory and use the --root_dir option to point to the correct path.')
+            # print('** This script will now exit.')
+            # sys.exit(-1)

    print(f'>> {ldm.invoke.__app_name__}, version {ldm.invoke.__version__}')
    print(f'>> InvokeAI runtime directory is "{Globals.root}"')
@ -789,8 +790,8 @@ def _get_model_name(existing_names,completer,default_name:str='')->str:
        model_name = input(f'Short name for this model [{default_name}]: ').strip()
        if len(model_name)==0:
            model_name = default_name
-        if not re.match('^[\w._+-]+$',model_name):
-            print('** model name must contain only words, digits and the characters "._+-" **')
+        if not re.match('^[\w._+:/-]+$',model_name):
+            print('** model name must contain only words, digits and the characters "._+:/-" **')
        elif model_name != default_name and model_name in existing_names:
            print(f'** the name {model_name} is already in use. Pick another.')
        else:
--- a/ldm/invoke/generator/diffusers_pipeline.py
+++ b/ldm/invoke/generator/diffusers_pipeline.py
@ -24,9 +24,6 @@ from ...models.diffusion import cross_attention_control
 from ...models.diffusion.cross_attention_map_saving import AttentionMapSaver
 from ...modules.prompt_to_embeddings_converter import WeightedPromptFragmentsToEmbeddingsConverter

-# monkeypatch diffusers CrossAttention 🙈
-# this is to make prompt2prompt and (future) attention maps work
-attention.CrossAttention = cross_attention_control.InvokeAIDiffusersCrossAttention

 from diffusers.models import AutoencoderKL, UNet2DConditionModel
 from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
@ -295,7 +292,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            safety_checker=safety_checker,
            feature_extractor=feature_extractor,
        )
-        self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward)
+        self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward, is_running_diffusers=True)
        use_full_precision = (precision == 'float32' or precision == 'autocast')
        self.textual_inversion_manager = TextualInversionManager(tokenizer=self.tokenizer,
                                                                 text_encoder=self.text_encoder,
@ -307,8 +304,23 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            textual_inversion_manager=self.textual_inversion_manager
        )

+        self._enable_memory_efficient_attention()
+
+
+    def _enable_memory_efficient_attention(self):
+        """
+        if xformers is available, use it, otherwise use sliced attention.
+        """
        if is_xformers_available() and not Globals.disable_xformers:
            self.enable_xformers_memory_efficient_attention()
+        else:
+            if torch.backends.mps.is_available():
+                # until pytorch #91617 is fixed, slicing is borked on MPS
+                # https://github.com/pytorch/pytorch/issues/91617
+                # fix is in https://github.com/kulinseth/pytorch/pull/222 but no idea when it will get merged to pytorch mainline.
+                pass
+            else:
+                self.enable_attention_slicing(slice_size='auto')

    def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int,
                              conditioning_data: ConditioningData,
@ -373,42 +385,40 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        if additional_guidance is None:
            additional_guidance = []
        extra_conditioning_info = conditioning_data.extra
-        if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
-            self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info,
-                                                                 step_count=len(self.scheduler.timesteps))
-        else:
-            self.invokeai_diffuser.remove_cross_attention_control()
+        with self.invokeai_diffuser.custom_attention_context(extra_conditioning_info=extra_conditioning_info,
+                                                             step_count=len(self.scheduler.timesteps)
+                                                             ):

-        yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps,
-                                        latents=latents)
+            yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps,
+                                            latents=latents)

-        batch_size = latents.shape[0]
-        batched_t = torch.full((batch_size,), timesteps[0],
-                               dtype=timesteps.dtype, device=self.unet.device)
-        latents = self.scheduler.add_noise(latents, noise, batched_t)
+            batch_size = latents.shape[0]
+            batched_t = torch.full((batch_size,), timesteps[0],
+                                   dtype=timesteps.dtype, device=self.unet.device)
+            latents = self.scheduler.add_noise(latents, noise, batched_t)

-        attention_map_saver: Optional[AttentionMapSaver] = None
-        self.invokeai_diffuser.remove_attention_map_saving()
-        for i, t in enumerate(self.progress_bar(timesteps)):
-            batched_t.fill_(t)
-            step_output = self.step(batched_t, latents, conditioning_data,
-                                    step_index=i,
-                                    total_step_count=len(timesteps),
-                                    additional_guidance=additional_guidance)
-            latents = step_output.prev_sample
-            predicted_original = getattr(step_output, 'pred_original_sample', None)
+            attention_map_saver: Optional[AttentionMapSaver] = None

-            if i == len(timesteps)-1 and extra_conditioning_info is not None:
-                eos_token_index = extra_conditioning_info.tokens_count_including_eos_bos - 1
-                attention_map_token_ids = range(1, eos_token_index)
-                attention_map_saver = AttentionMapSaver(token_ids=attention_map_token_ids, latents_shape=latents.shape[-2:])
-                self.invokeai_diffuser.setup_attention_map_saving(attention_map_saver)
+            for i, t in enumerate(self.progress_bar(timesteps)):
+                batched_t.fill_(t)
+                step_output = self.step(batched_t, latents, conditioning_data,
+                                        step_index=i,
+                                        total_step_count=len(timesteps),
+                                        additional_guidance=additional_guidance)
+                latents = step_output.prev_sample
+                predicted_original = getattr(step_output, 'pred_original_sample', None)

-            yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents,
-                                            predicted_original=predicted_original, attention_map_saver=attention_map_saver)
+                # TODO resuscitate attention map saving
+                #if i == len(timesteps)-1 and extra_conditioning_info is not None:
+                #    eos_token_index = extra_conditioning_info.tokens_count_including_eos_bos - 1
+                #    attention_map_token_ids = range(1, eos_token_index)
+                #    attention_map_saver = AttentionMapSaver(token_ids=attention_map_token_ids, latents_shape=latents.shape[-2:])
+                #    self.invokeai_diffuser.setup_attention_map_saving(attention_map_saver)

-        self.invokeai_diffuser.remove_attention_map_saving()
-        return latents, attention_map_saver
+                yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents,
+                                                predicted_original=predicted_original, attention_map_saver=attention_map_saver)
+
+            return latents, attention_map_saver

    @torch.inference_mode()
    def step(self, t: torch.Tensor, latents: torch.Tensor,
@ -447,7 +457,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):

        return step_output

-    def _unet_forward(self, latents, t, text_embeddings):
+    def _unet_forward(self, latents, t, text_embeddings, cross_attention_kwargs: Optional[dict[str,Any]] = None):
        """predict the noise residual"""
        if is_inpainting_model(self.unet) and latents.size(1) == 4:
            # Pad out normal non-inpainting inputs for an inpainting model.
@ -460,7 +470,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                initial_image_latents=torch.zeros_like(latents[:1], device=latents.device, dtype=latents.dtype)
            ).add_mask_channels(latents)

-        return self.unet(latents, t, encoder_hidden_states=text_embeddings).sample
+        return self.unet(sample=latents,
+                         timestep=t,
+                         encoder_hidden_states=text_embeddings,
+                         cross_attention_kwargs=cross_attention_kwargs).sample

    def img2img_from_embeddings(self,
                                init_image: Union[torch.FloatTensor, PIL.Image.Image],
@ -531,6 +544,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB'))

        init_image = init_image.to(device=device, dtype=latents_dtype)
+        mask = mask.to(device=device, dtype=latents_dtype)

        if init_image.dim() == 3:
            init_image = init_image.unsqueeze(0)
@ -549,17 +563,22 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):

        if mask.dim() == 3:
            mask = mask.unsqueeze(0)
-        mask = tv_resize(mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR) \
+        latent_mask = tv_resize(mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR) \
            .to(device=device, dtype=latents_dtype)

        guidance: List[Callable] = []

        if is_inpainting_model(self.unet):
+            # You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint
+            # (that's why there's a mask!) but it seems to really want that blanked out.
+            masked_init_image = init_image * torch.where(mask < 0.5, 1, 0)
+            masked_latents = self.non_noised_latents_from_image(masked_init_image, device=device, dtype=latents_dtype)
+
            # TODO: we should probably pass this in so we don't have to try/finally around setting it.
            self.invokeai_diffuser.model_forward_callback = \
-                AddsMaskLatents(self._unet_forward, mask, init_image_latents)
+                AddsMaskLatents(self._unet_forward, latent_mask, masked_latents)
        else:
-            guidance.append(AddsMaskGuidance(mask, init_image_latents, self.scheduler, noise))
+            guidance.append(AddsMaskGuidance(latent_mask, init_image_latents, self.scheduler, noise))

        try:
            result_latents, result_attention_maps = self.latents_from_embeddings(
@ -578,11 +597,20 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_maps)
            return self.check_for_safety(output, dtype=conditioning_data.dtype)

-    def non_noised_latents_from_image(self, init_image, *, device, dtype):
+    def non_noised_latents_from_image(self, init_image, *, device: torch.device, dtype):
        init_image = init_image.to(device=device, dtype=dtype)
        with torch.inference_mode():
+            if device.type == 'mps':
+                # workaround for torch MPS bug that has been fixed in https://github.com/kulinseth/pytorch/pull/222
+                # TODO remove this workaround once kulinseth#222 is merged to pytorch mainline
+                self.vae.to('cpu')
+                init_image = init_image.to('cpu')
            init_latent_dist = self.vae.encode(init_image).latent_dist
            init_latents = init_latent_dist.sample().to(dtype=dtype)  # FIXME: uses torch.randn. make reproducible!
+            if device.type == 'mps':
+                self.vae.to(device)
+                init_latents = init_latents.to(device)
+
        init_latents = 0.18215 * init_latents
        return init_latents

--- a/ldm/invoke/generator/inpaint.py
+++ b/ldm/invoke/generator/inpaint.py
@ -19,10 +19,12 @@ from ldm.util import debug_image


 def infill_methods()->list[str]:
-    methods = list()
+    methods = [
+        "tile",
+        "solid",
+    ]
    if PatchMatch.patchmatch_available():
-        methods.append('patchmatch')
-    methods.append('tile')
+        methods.insert(0, 'patchmatch')
    return methods

 class Inpaint(Img2Img):
@ -182,6 +184,7 @@ class Inpaint(Img2Img):
                       infill_method = None,
                       inpaint_width=None,
                       inpaint_height=None,
+                       inpaint_fill:tuple(int)=(0x7F, 0x7F, 0x7F, 0xFF),
                       attention_maps_callback=None,
                       **kwargs):
        """
@ -202,12 +205,17 @@ class Inpaint(Img2Img):
            # Do infill
            if infill_method == 'patchmatch' and PatchMatch.patchmatch_available():
                init_filled = self.infill_patchmatch(self.pil_image.copy())
-            else: # if infill_method == 'tile': # Only two methods right now, so always use 'tile' if not patchmatch
+            elif infill_method == 'tile':
                init_filled = self.tile_fill_missing(
                    self.pil_image.copy(),
                    seed = self.seed,
                    tile_size = tile_size
                )
+            elif infill_method == 'solid':
+                solid_bg = PIL.Image.new("RGBA", init_image.size, inpaint_fill)
+                init_filled = PIL.Image.alpha_composite(solid_bg, init_image)
+            else:
+                raise ValueError(f"Non-supported infill type {infill_method}", infill_method)
            init_filled.paste(init_image, (0,0), init_image.split()[-1])

            # Resize if requested for inpainting
--- a/ldm/invoke/generator/txt2img2img.py
+++ b/ldm/invoke/generator/txt2img2img.py
@ -3,10 +3,10 @@ ldm.invoke.generator.txt2img inherits from ldm.invoke.generator
 '''

 import math
-from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error
 from typing import Callable, Optional

 import torch
+from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error

 from ldm.invoke.generator.base import Generator
 from ldm.invoke.generator.diffusers_pipeline import trim_to_multiple_of, StableDiffusionGeneratorPipeline, \
@ -38,10 +38,6 @@ class Txt2Img2Img(Generator):
                uc, c, cfg_scale, extra_conditioning_info,
                threshold = ThresholdSettings(threshold, warmup=0.2) if threshold else None)
            .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
-        scale_dim = min(width, height)
-        scale = 512 / scale_dim
-
-        init_width, init_height = trim_to_multiple_of(scale * width, scale * height)

        def make_image(x_T):

@ -54,6 +50,10 @@ class Txt2Img2Img(Generator):
                # TODO: threshold = threshold,
            )

+            # Get our initial generation width and height directly from the latent output so
+            # the message below is accurate.
+            init_width = first_pass_latent_output.size()[3] * self.downsampling_factor
+            init_height = first_pass_latent_output.size()[2] * self.downsampling_factor
            print(
                  f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height} using DDIM sampling"
                 )
@ -106,27 +106,35 @@ class Txt2Img2Img(Generator):
    def get_noise(self,width,height,scale = True):
        # print(f"Get noise: {width}x{height}")
        if scale:
-            trained_square = 512 * 512
-            actual_square = width * height
-            scale = math.sqrt(trained_square / actual_square)
-            scaled_width = math.ceil(scale * width / 64) * 64
-            scaled_height = math.ceil(scale * height / 64) * 64
+            # Scale the input width and height for the initial generation
+            # Make their area equivalent to the model's resolution area (e.g. 512*512 = 262144),
+            # while keeping the minimum dimension at least 0.5 * resolution (e.g. 512*0.5 = 256)
+
+            aspect = width / height
+            dimension = self.model.unet.config.sample_size * self.model.vae_scale_factor
+            min_dimension = math.floor(dimension * 0.5)
+            model_area = dimension * dimension # hardcoded for now since all models are trained on square images
+
+            if aspect > 1.0:
+                init_height = max(min_dimension, math.sqrt(model_area / aspect))
+                init_width = init_height * aspect
+            else:
+                init_width = max(min_dimension, math.sqrt(model_area * aspect))
+                init_height = init_width / aspect
+
+            scaled_width, scaled_height = trim_to_multiple_of(math.floor(init_width), math.floor(init_height))
+
        else:
            scaled_width = width
            scaled_height = height

-        device      = self.model.device
+        device = self.model.device
+        channels = self.latent_channels
+        if channels == 9:
+            channels = 4  # we don't really want noise for all the mask channels
+        shape = (1, channels,
+                 scaled_height // self.downsampling_factor, scaled_width // self.downsampling_factor)
        if self.use_mps_noise or device.type == 'mps':
-            return torch.randn([1,
-                                self.latent_channels,
-                                scaled_height // self.downsampling_factor,
-                                scaled_width  // self.downsampling_factor],
-                               dtype=self.torch_dtype(),
-                               device='cpu').to(device)
+            return torch.randn(shape, dtype=self.torch_dtype(), device='cpu').to(device)
        else:
-            return torch.randn([1,
-                                self.latent_channels,
-                                scaled_height // self.downsampling_factor,
-                                scaled_width  // self.downsampling_factor],
-                               dtype=self.torch_dtype(),
-                               device=device)
+            return torch.randn(shape, dtype=self.torch_dtype(), device=device)
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py
@ -125,7 +125,7 @@ class ModelManager(object):
        Set the default model. The change will not take
        effect until you call model_manager.commit()
        '''
-        assert model_name in self.models,f"unknown model '{model_name}'"
+        assert model_name in self.model_names(), f"unknown model '{model_name}'"

        config = self.config
        for model in config:
--- a/ldm/invoke/prompt_parser.py
+++ b/ldm/invoke/prompt_parser.py
@ -155,7 +155,7 @@ class CrossAttentionControlSubstitute(CrossAttentionControlledFragment):
        default_options = {
            's_start': 0.0,
            's_end': 0.2062994740159002, # ~= shape_freedom=0.5
-            't_start': 0.0,
+            't_start': 0.1,
            't_end': 1.0
        }
        merged_options = default_options
--- a/ldm/models/diffusion/cross_attention_control.py
+++ b/ldm/models/diffusion/cross_attention_control.py
@ -7,8 +7,10 @@ import torch
 import diffusers
 from torch import nn
 from diffusers.models.unet_2d_condition import UNet2DConditionModel
+from diffusers.models.cross_attention import AttnProcessor
 from ldm.invoke.devices import torch_dtype

+
 # adapted from bloc97's CrossAttentionControl colab
 # https://github.com/bloc97/CrossAttentionControl

@ -304,11 +306,15 @@ class InvokeAICrossAttentionMixin:



-def remove_cross_attention_control(model):
-    remove_attention_function(model)
+def restore_default_cross_attention(model, is_running_diffusers: bool, restore_attention_processor: Optional[AttnProcessor]=None):
+    if is_running_diffusers:
+        unet = model
+        unet.set_attn_processor(restore_attention_processor or CrossAttnProcessor())
+    else:
+        remove_attention_function(model)


-def setup_cross_attention_control(model, context: Context):
+def override_cross_attention(model, context: Context, is_running_diffusers = False):
    """
    Inject attention parameters and functions into the passed in model to enable cross attention editing.

@ -323,7 +329,7 @@ def setup_cross_attention_control(model, context: Context):
    # urgh. should this be hardcoded?
    max_length = 77
    # mask=1 means use base prompt attention, mask=0 means use edited prompt attention
-    mask = torch.zeros(max_length)
+    mask = torch.zeros(max_length, dtype=torch_dtype(device))
    indices_target = torch.arange(max_length, dtype=torch.long)
    indices = torch.arange(max_length, dtype=torch.long)
    for name, a0, a1, b0, b1 in context.arguments.edit_opcodes:
@ -333,10 +339,26 @@ def setup_cross_attention_control(model, context: Context):
                indices[b0:b1] = indices_target[a0:a1]
                mask[b0:b1] = 1

-    context.register_cross_attention_modules(model)
    context.cross_attention_mask = mask.to(device)
    context.cross_attention_index_map = indices.to(device)
-    inject_attention_function(model, context)
+    if is_running_diffusers:
+        unet = model
+        old_attn_processors = unet.attn_processors
+        if torch.backends.mps.is_available():
+            # see note in StableDiffusionGeneratorPipeline.__init__ about borked slicing on MPS
+            unet.set_attn_processor(SwapCrossAttnProcessor())
+        else:
+            # try to re-use an existing slice size
+            default_slice_size = 4
+            slice_size = next((p.slice_size for p in old_attn_processors.values() if type(p) is SlicedAttnProcessor), default_slice_size)
+            unet.set_attn_processor(SlicedSwapCrossAttnProcesser(slice_size=slice_size))
+        return old_attn_processors
+    else:
+        context.register_cross_attention_modules(model)
+        inject_attention_function(model, context)
+        return None
+
+


 def get_cross_attention_modules(model, which: CrossAttentionType) -> list[tuple[str, InvokeAICrossAttentionMixin]]:
@ -445,6 +467,7 @@ def get_mem_free_total(device):
    return mem_free_total


+
 class InvokeAIDiffusersCrossAttention(diffusers.models.attention.CrossAttention, InvokeAICrossAttentionMixin):

    def __init__(self, **kwargs):
@ -460,3 +483,176 @@ class InvokeAIDiffusersCrossAttention(diffusers.models.attention.CrossAttention,
        hidden_states = self.reshape_batch_dim_to_heads(attention_result)
        return hidden_states

+
+
+
+
+## 🧨diffusers implementation follows
+
+
+"""
+# base implementation
+
+class CrossAttnProcessor:
+    def __call__(self, attn: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None):
+        batch_size, sequence_length, _ = hidden_states.shape
+        attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length)
+
+        query = attn.to_q(hidden_states)
+        query = attn.head_to_batch_dim(query)
+
+        encoder_hidden_states = encoder_hidden_states if encoder_hidden_states is not None else hidden_states
+        key = attn.to_k(encoder_hidden_states)
+        value = attn.to_v(encoder_hidden_states)
+        key = attn.head_to_batch_dim(key)
+        value = attn.head_to_batch_dim(value)
+
+        attention_probs = attn.get_attention_scores(query, key, attention_mask)
+        hidden_states = torch.bmm(attention_probs, value)
+        hidden_states = attn.batch_to_head_dim(hidden_states)
+
+        # linear proj
+        hidden_states = attn.to_out[0](hidden_states)
+        # dropout
+        hidden_states = attn.to_out[1](hidden_states)
+
+        return hidden_states
+
+"""
+from dataclasses import field, dataclass
+
+import torch
+
+from diffusers.models.cross_attention import CrossAttention, CrossAttnProcessor, SlicedAttnProcessor, AttnProcessor
+
+
+@dataclass
+class SwapCrossAttnContext:
+    modified_text_embeddings: torch.Tensor
+    index_map: torch.Tensor # maps from original prompt token indices to the equivalent tokens in the modified prompt
+    mask: torch.Tensor # in the target space of the index_map
+    cross_attention_types_to_do: list[CrossAttentionType] = field(default_factory=list)
+
+    def __int__(self,
+                cac_types_to_do: [CrossAttentionType],
+                modified_text_embeddings: torch.Tensor,
+                index_map: torch.Tensor,
+                mask: torch.Tensor):
+        self.cross_attention_types_to_do = cac_types_to_do
+        self.modified_text_embeddings = modified_text_embeddings
+        self.index_map = index_map
+        self.mask = mask
+
+    def wants_cross_attention_control(self, attn_type: CrossAttentionType) -> bool:
+        return attn_type in self.cross_attention_types_to_do
+
+    @classmethod
+    def make_mask_and_index_map(cls, edit_opcodes: list[tuple[str, int, int, int, int]], max_length: int) \
+            -> tuple[torch.Tensor, torch.Tensor]:
+
+        # mask=1 means use original prompt attention, mask=0 means use modified prompt attention
+        mask = torch.zeros(max_length)
+        indices_target = torch.arange(max_length, dtype=torch.long)
+        indices = torch.arange(max_length, dtype=torch.long)
+        for name, a0, a1, b0, b1 in edit_opcodes:
+            if b0 < max_length:
+                if name == "equal":
+                    # these tokens remain the same as in the original prompt
+                    indices[b0:b1] = indices_target[a0:a1]
+                    mask[b0:b1] = 1
+
+        return mask, indices
+
+
+class SlicedSwapCrossAttnProcesser(SlicedAttnProcessor):
+
+    # TODO: dynamically pick slice size based on memory conditions
+
+    def __call__(self, attn: CrossAttention, hidden_states, encoder_hidden_states=None, attention_mask=None,
+                 # kwargs
+                 swap_cross_attn_context: SwapCrossAttnContext=None):
+
+        attention_type = CrossAttentionType.SELF if encoder_hidden_states is None else CrossAttentionType.TOKENS
+
+        # if cross-attention control is not in play, just call through to the base implementation.
+        if attention_type is CrossAttentionType.SELF or \
+            swap_cross_attn_context is None or \
+            not swap_cross_attn_context.wants_cross_attention_control(attention_type):
+            #print(f"SwapCrossAttnContext for {attention_type} not active - passing request to superclass")
+            return super().__call__(attn, hidden_states, encoder_hidden_states, attention_mask)
+        #else:
+        #    print(f"SwapCrossAttnContext for {attention_type} active")
+
+        batch_size, sequence_length, _ = hidden_states.shape
+        attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length)
+
+        query = attn.to_q(hidden_states)
+        dim = query.shape[-1]
+        query = attn.head_to_batch_dim(query)
+
+        original_text_embeddings = encoder_hidden_states
+        modified_text_embeddings = swap_cross_attn_context.modified_text_embeddings
+        original_text_key = attn.to_k(original_text_embeddings)
+        modified_text_key = attn.to_k(modified_text_embeddings)
+        original_value = attn.to_v(original_text_embeddings)
+        modified_value = attn.to_v(modified_text_embeddings)
+
+        original_text_key = attn.head_to_batch_dim(original_text_key)
+        modified_text_key = attn.head_to_batch_dim(modified_text_key)
+        original_value = attn.head_to_batch_dim(original_value)
+        modified_value = attn.head_to_batch_dim(modified_value)
+
+        # compute slices and prepare output tensor
+        batch_size_attention = query.shape[0]
+        hidden_states = torch.zeros(
+            (batch_size_attention, sequence_length, dim // attn.heads), device=query.device, dtype=query.dtype
+        )
+
+        # do slices
+        for i in range(max(1,hidden_states.shape[0] // self.slice_size)):
+            start_idx = i * self.slice_size
+            end_idx = (i + 1) * self.slice_size
+
+            query_slice = query[start_idx:end_idx]
+            original_key_slice = original_text_key[start_idx:end_idx]
+            modified_key_slice = modified_text_key[start_idx:end_idx]
+            attn_mask_slice = attention_mask[start_idx:end_idx] if attention_mask is not None else None
+
+            original_attn_slice = attn.get_attention_scores(query_slice, original_key_slice, attn_mask_slice)
+            modified_attn_slice = attn.get_attention_scores(query_slice, modified_key_slice, attn_mask_slice)
+
+            # because the prompt modifications may result in token sequences shifted forwards or backwards,
+            # the original attention probabilities must be remapped to account for token index changes in the
+            # modified prompt
+            remapped_original_attn_slice = torch.index_select(original_attn_slice, -1,
+                                                              swap_cross_attn_context.index_map)
+
+            # only some tokens taken from the original attention probabilities. this is controlled by the mask.
+            mask = swap_cross_attn_context.mask
+            inverse_mask = 1 - mask
+            attn_slice = \
+                remapped_original_attn_slice * mask + \
+                modified_attn_slice * inverse_mask
+
+            del remapped_original_attn_slice, modified_attn_slice
+
+            attn_slice = torch.bmm(attn_slice, modified_value[start_idx:end_idx])
+            hidden_states[start_idx:end_idx] = attn_slice
+
+
+        # done
+        hidden_states = attn.batch_to_head_dim(hidden_states)
+
+        # linear proj
+        hidden_states = attn.to_out[0](hidden_states)
+        # dropout
+        hidden_states = attn.to_out[1](hidden_states)
+
+        return hidden_states
+
+
+class SwapCrossAttnProcessor(SlicedSwapCrossAttnProcesser):
+
+    def __init__(self):
+        super(SwapCrossAttnProcessor, self).__init__(slice_size=int(1e9)) # massive slice size = don't slice
+
--- a/ldm/models/diffusion/ddim.py
+++ b/ldm/models/diffusion/ddim.py
@ -19,9 +19,9 @@ class DDIMSampler(Sampler):
        all_timesteps_count = kwargs.get('all_timesteps_count', t_enc)

        if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
-            self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info, step_count = all_timesteps_count)
+            self.invokeai_diffuser.override_cross_attention(extra_conditioning_info, step_count = all_timesteps_count)
        else:
-            self.invokeai_diffuser.remove_cross_attention_control()
+            self.invokeai_diffuser.restore_default_cross_attention()


    # This is the central routine
--- a/ldm/models/diffusion/ksampler.py
+++ b/ldm/models/diffusion/ksampler.py
@ -43,9 +43,9 @@ class CFGDenoiser(nn.Module):
        extra_conditioning_info = kwargs.get('extra_conditioning_info', None)

        if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
-            self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info, step_count = t_enc)
+            self.invokeai_diffuser.override_cross_attention(extra_conditioning_info, step_count = t_enc)
        else:
-            self.invokeai_diffuser.remove_cross_attention_control()
+            self.invokeai_diffuser.restore_default_cross_attention()


    def forward(self, x, sigma, uncond, cond, cond_scale):
--- a/ldm/models/diffusion/plms.py
+++ b/ldm/models/diffusion/plms.py
@ -21,9 +21,9 @@ class PLMSSampler(Sampler):
        all_timesteps_count = kwargs.get('all_timesteps_count', t_enc)

        if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
-            self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info, step_count = all_timesteps_count)
+            self.invokeai_diffuser.override_cross_attention(extra_conditioning_info, step_count = all_timesteps_count)
        else:
-            self.invokeai_diffuser.remove_cross_attention_control()
+            self.invokeai_diffuser.restore_default_cross_attention()


    # this is the essential routine
--- a/ldm/models/diffusion/shared_invokeai_diffusion.py
+++ b/ldm/models/diffusion/shared_invokeai_diffusion.py
@ -1,14 +1,16 @@
 import math
+from contextlib import contextmanager
 from dataclasses import dataclass
 from math import ceil
-from typing import Callable, Optional, Union
+from typing import Callable, Optional, Union, Any, Dict

 import numpy as np
 import torch

+from diffusers.models.cross_attention import AttnProcessor
 from ldm.models.diffusion.cross_attention_control import Arguments, \
-    remove_cross_attention_control, setup_cross_attention_control, Context, get_cross_attention_modules, \
-    CrossAttentionType
+    restore_default_cross_attention, override_cross_attention, Context, get_cross_attention_modules, \
+    CrossAttentionType, SwapCrossAttnContext
 from ldm.models.diffusion.cross_attention_map_saving import AttentionMapSaver


@ -30,39 +32,68 @@ class InvokeAIDiffuserComponent:
    debug_thresholding = False


+    @dataclass
    class ExtraConditioningInfo:
-        def __init__(self, tokens_count_including_eos_bos:int, cross_attention_control_args: Optional[Arguments]):
-            self.tokens_count_including_eos_bos = tokens_count_including_eos_bos
-            self.cross_attention_control_args = cross_attention_control_args
+
+        tokens_count_including_eos_bos: int
+        cross_attention_control_args: Optional[Arguments] = None

        @property
        def wants_cross_attention_control(self):
            return self.cross_attention_control_args is not None

+
    def __init__(self, model, model_forward_callback:
-                    Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]
-                ):
+                    Callable[[torch.Tensor, torch.Tensor, torch.Tensor, Optional[dict[str,Any]]], torch.Tensor],
+                 is_running_diffusers: bool=False,
+                 ):
        """
        :param model: the unet model to pass through to cross attention control
        :param model_forward_callback: a lambda with arguments (x, sigma, conditioning_to_apply). will be called repeatedly. most likely, this should simply call model.forward(x, sigma, conditioning)
        """
        self.conditioning = None
        self.model = model
+        self.is_running_diffusers = is_running_diffusers
        self.model_forward_callback = model_forward_callback
        self.cross_attention_control_context = None

-    def setup_cross_attention_control(self, conditioning: ExtraConditioningInfo, step_count: int):
+    @contextmanager
+    def custom_attention_context(self,
+                                 extra_conditioning_info: Optional[ExtraConditioningInfo],
+                                 step_count: int):
+        do_swap = extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control
+        old_attn_processor = None
+        if do_swap:
+            old_attn_processor = self.override_cross_attention(extra_conditioning_info,
+                                                               step_count=step_count)
+        try:
+            yield None
+        finally:
+            if old_attn_processor is not None:
+                self.restore_default_cross_attention(old_attn_processor)
+            # TODO resuscitate attention map saving
+            #self.remove_attention_map_saving()
+
+    def override_cross_attention(self, conditioning: ExtraConditioningInfo, step_count: int) -> Dict[str, AttnProcessor]:
+        """
+        setup cross attention .swap control. for diffusers this replaces the attention processor, so
+        the previous attention processor is returned so that the caller can restore it later.
+        """
        self.conditioning = conditioning
        self.cross_attention_control_context = Context(
            arguments=self.conditioning.cross_attention_control_args,
            step_count=step_count
        )
-        setup_cross_attention_control(self.model, self.cross_attention_control_context)
+        return override_cross_attention(self.model,
+                                        self.cross_attention_control_context,
+                                        is_running_diffusers=self.is_running_diffusers)

-    def remove_cross_attention_control(self):
+    def restore_default_cross_attention(self, restore_attention_processor: Optional['AttnProcessor']=None):
        self.conditioning = None
        self.cross_attention_control_context = None
-        remove_cross_attention_control(self.model)
+        restore_default_cross_attention(self.model,
+                                        is_running_diffusers=self.is_running_diffusers,
+                                        restore_attention_processor=restore_attention_processor)

    def setup_attention_map_saving(self, saver: AttentionMapSaver):
        def callback(slice, dim, offset, slice_size, key):
@ -168,7 +199,41 @@ class InvokeAIDiffuserComponent:
        return unconditioned_next_x, conditioned_next_x


-    def apply_cross_attention_controlled_conditioning(self, x:torch.Tensor, sigma, unconditioning, conditioning, cross_attention_control_types_to_do):
+    def apply_cross_attention_controlled_conditioning(self,
+                                                     x: torch.Tensor,
+                                                     sigma,
+                                                     unconditioning,
+                                                     conditioning,
+                                                     cross_attention_control_types_to_do):
+        if self.is_running_diffusers:
+            return self.apply_cross_attention_controlled_conditioning__diffusers(x, sigma, unconditioning, conditioning, cross_attention_control_types_to_do)
+        else:
+            return self.apply_cross_attention_controlled_conditioning__compvis(x, sigma, unconditioning, conditioning, cross_attention_control_types_to_do)
+
+    def apply_cross_attention_controlled_conditioning__diffusers(self,
+                                                                 x: torch.Tensor,
+                                                                 sigma,
+                                                                 unconditioning,
+                                                                 conditioning,
+                                                                 cross_attention_control_types_to_do):
+        context: Context = self.cross_attention_control_context
+
+        cross_attn_processor_context = SwapCrossAttnContext(modified_text_embeddings=context.arguments.edited_conditioning,
+                                                            index_map=context.cross_attention_index_map,
+                                                            mask=context.cross_attention_mask,
+                                                            cross_attention_types_to_do=[])
+        # no cross attention for unconditioning (negative prompt)
+        unconditioned_next_x = self.model_forward_callback(x, sigma, unconditioning,
+                                                           {"swap_cross_attn_context": cross_attn_processor_context})
+
+        # do requested cross attention types for conditioning (positive prompt)
+        cross_attn_processor_context.cross_attention_types_to_do = cross_attention_control_types_to_do
+        conditioned_next_x = self.model_forward_callback(x, sigma, conditioning,
+                                                         {"swap_cross_attn_context": cross_attn_processor_context})
+        return unconditioned_next_x, conditioned_next_x
+
+
+    def apply_cross_attention_controlled_conditioning__compvis(self, x:torch.Tensor, sigma, unconditioning, conditioning, cross_attention_control_types_to_do):
        # print('pct', percent_through, ': doing cross attention control on', cross_attention_control_types_to_do)
        # slower non-batched path (20% slower on mac MPS)
        # We are only interested in using attention maps for conditioned_next_x, but batching them with generation of