Merge branch 'development' into development

2025-07-25 21:05:37 +00:00 · 2022-09-07 15:23:58 -04:00
parent dc500946ad edada042b3
commit 8b0d1e59fe
6 changed files with 160 additions and 71 deletions
--- a/Stable_Diffusion_AI_Notebook.ipynb
+++ b/Stable_Diffusion_AI_Notebook.ipynb
@ -3,7 +3,6 @@
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
-      "name": "Stable_Diffusion_AI_Notebook.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "private_outputs": true
@ -22,18 +21,18 @@
    {
      "cell_type": "markdown",
      "source": [
-        "# Stable Diffusion AI Notebook\n",
+        "# Stable Diffusion AI Notebook (Release 1.13)\n",
        "\n",
        "<img src=\"https://user-images.githubusercontent.com/60411196/186547976-d9de378a-9de8-4201-9c25-c057a9c59bad.jpeg\" alt=\"stable-diffusion-ai\" width=\"170px\"/> <br>\n",
        "#### Instructions:\n",
        "1. Execute each cell in order to mount a Dream bot and create images from text. <br>\n",
-        "2. Once cells 1-8 were run correctly you'll be executing a terminal in cell #9, you'll to enter `pipenv run scripts/dream.py` command to run Dream bot.<br> \n",
+        "2. Once cells 1-8 were run correctly you'll be executing a terminal in cell #9, you'll need to enter `python scripts/dream.py` command to run Dream bot.<br> \n",
        "3. After launching dream bot, you'll see: <br> `Dream > ` in terminal. <br> Insert a command, eg. `Dream > Astronaut floating in a distant galaxy`, or type `-h` for help.\n",
-        "3. After completion you'll see your generated images in path `stable-diffusion/outputs/img-samples/`, you can also display images in cell #10.\n",
+        "3. After completion you'll see your generated images in path `stable-diffusion/outputs/img-samples/`, you can also show last generated images in cell #10.\n",
        "4. To quit Dream bot use `q` command. <br> \n",
        "---\n",
        "<font color=\"red\">Note:</font> It takes some time to load, but after installing all dependencies you can use the bot all time you want while colab instance is up. <br>\n",
-        "<font color=\"red\">Requirements:</font> For this notebook to work you need to have [Stable-Diffusion-v-1-4](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original) stored in your Google Drive, it will be needed in cell #6\n",
+        "<font color=\"red\">Requirements:</font> For this notebook to work you need to have [Stable-Diffusion-v-1-4](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original) stored in your Google Drive, it will be needed in cell #7\n",
        "##### For more details visit Github repository: [lstein/stable-diffusion](https://github.com/lstein/stable-diffusion)\n",
        "---\n"
      ],
@ -41,6 +40,15 @@
        "id": "ycYWcsEKc6w7"
      }
    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ◢ Installation"
+      ],
+      "metadata": {
+        "id": "dr32VLxlnouf"
+      }
+    },
    {
      "cell_type": "code",
      "source": [
@ -68,43 +76,28 @@
        "from os.path import exists\n",
        "\n",
        "if exists(\"/content/stable-diffusion/\")==True:\n",
+        "  %cd /content/stable-diffusion/\n",
        "  print(\"Already downloaded repo\")\n",
        "else:\n",
        "  !git clone --quiet https://github.com/lstein/stable-diffusion.git # Original repo\n",
-        "  %cd stable-diffusion/\n",
-        "  !git checkout --quiet tags/release-1.09\n",
-        "  "
+        "  %cd /content/stable-diffusion/\n",
+        "  !git checkout --quiet tags/release-1.13"
      ]
    },
    {
      "cell_type": "code",
      "source": [
-        "#@title 3. Install Python 3.8 \n",
-        "%%capture --no-stderr\n",
+        "#@title 3. Install dependencies\n",
        "import gc\n",
-        "!apt-get -qq install python3.8\n",
-        "gc.collect()"
-      ],
-      "metadata": {
-        "id": "daHlozvwKesj",
-        "cellView": "form"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "#@title 4. Install dependencies from file in a VirtualEnv\n",
-        "#@markdown Be patient, it takes ~ 5 - 7min <br>\n",
-        "%%capture --no-stderr\n",
-        "#Virtual environment\n",
-        "!pip install pipenv -q\n",
+        "\n",
+        "if exists(\"/content/stable-diffusion/requirements-colab.txt\")==True:\n",
+        "  %cd /content/stable-diffusion/\n",
+        "  print(\"Already downloaded requirements file\")\n",
+        "else:\n",
+        "  !wget https://raw.githubusercontent.com/lstein/stable-diffusion/development/requirements-colab.txt\n",
        "!pip install colab-xterm\n",
-        "%load_ext colabxterm\n",
-        "!pipenv --python 3.8\n",
-        "!pipenv install -r requirements.txt --skip-lock\n",
-        "gc.collect()\n"
+        "!pip install -r requirements-colab.txt\n",
+        "gc.collect()"
      ],
      "metadata": {
        "cellView": "form",
@ -116,7 +109,44 @@
    {
      "cell_type": "code",
      "source": [
-        "#@title 5. Mount google Drive\n",
+        "#@title 4. Load small ML models required\n",
+        "%cd /content/stable-diffusion/\n",
+        "!python scripts/preload_models.py\n",
+        "gc.collect()"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "ChIDWxLVHGGJ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 5. Restart Runtime\n",
+        "exit()"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "8rSMhgnAttQa"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ◢ Configuration"
+      ],
+      "metadata": {
+        "id": "795x1tMoo8b1"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 6. Mount google Drive\n",
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
@ -130,7 +160,7 @@
    {
      "cell_type": "code",
      "source": [
-        "#@title 6. Drive Path to model\n",
+        "#@title 7. Drive Path to model\n",
        "#@markdown Path should start with /content/drive/path-to-your-file <br>\n",
        "#@markdown <font color=\"red\">Note:</font> Model should be downloaded from https://huggingface.co <br>\n",
        "#@markdown Lastest release: [Stable-Diffusion-v-1-4](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)\n",
@ -152,7 +182,7 @@
    {
      "cell_type": "code",
      "source": [
-        "#@title 7. Symlink to model\n",
+        "#@title 8. Symlink to model\n",
        "\n",
        "from os.path import exists\n",
        "import os \n",
@ -181,32 +211,27 @@
      "outputs": []
    },
    {
-      "cell_type": "code",
+      "cell_type": "markdown",
      "source": [
-        "#@title 8. Load small ML models required\n",
-        "%%capture --no-stderr\n",
-        "!pipenv run scripts/preload_models.py\n",
-        "gc.collect()"
+        "## ◢ Execution"
      ],
      "metadata": {
-        "cellView": "form",
-        "id": "ChIDWxLVHGGJ"
-      },
-      "execution_count": null,
-      "outputs": []
+        "id": "Mc28N0_NrCQH"
+      }
    },
    {
      "cell_type": "code",
      "source": [
        "#@title 9. Run Terminal and Execute Dream bot\n",
        "#@markdown <font color=\"blue\">Steps:</font> <br>\n",
-        "#@markdown 1. Execute command `pipenv run scripts/dream.py` to run dream bot.<br>\n",
+        "#@markdown 1. Execute command `python scripts/dream.py` to run dream bot.<br>\n",
        "#@markdown 2. After initialized you'll see `Dream>` line.<br>\n",
        "#@markdown 3. Example text: `Astronaut floating in a distant galaxy` <br>\n",
        "#@markdown 4. To quit Dream bot use: `q` command.<br>\n",
        "\n",
-        "#Run from virtual env\n",
-        "\n",
+        "import gc\n",
+        "%cd /content/stable-diffusion/\n",
+        "%load_ext colabxterm\n",
        "%xterm\n",
        "gc.collect()"
      ],
@ -220,18 +245,18 @@
    {
      "cell_type": "code",
      "source": [
-        "#@title 10. Show generated images\n",
-        "\n",
+        "#@title 10. Show the last 15 generated images\n",
+        "import gc\n",
        "import glob\n",
        "import matplotlib.pyplot as plt\n",
        "import matplotlib.image as mpimg\n",
        "%matplotlib inline\n",
        "\n",
        "images = []\n",
-        "for img_path in glob.glob('/content/stable-diffusion/outputs/img-samples/*.png'):\n",
+        "for img_path in sorted(glob.glob('/content/stable-diffusion/outputs/img-samples/*.png'), reverse=True):\n",
        "    images.append(mpimg.imread(img_path))\n",
        "\n",
-        "# Remove ticks and labels on x-axis and y-axis both\n",
+        "images = images[:15] \n",
        "\n",
        "plt.figure(figsize=(20,10))\n",
        "\n",
@ -253,4 +278,4 @@
      "outputs": []
    }
  ]
-}
+}
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -505,12 +505,22 @@ class Generate:

    def _load_model_from_config(self, config, ckpt):
        print(f'>> Loading model from {ckpt}')
+
+        # for usage statistics
+        device_type = choose_torch_device()
+        if device_type == 'cuda':
+            torch.cuda.reset_peak_memory_stats() 
+        tic = time.time()
+
+        # this does the work
        pl_sd = torch.load(ckpt, map_location='cpu')
        sd = pl_sd['state_dict']
        model = instantiate_from_config(config.model)
        m, u = model.load_state_dict(sd, strict=False)
        model.to(self.device)
        model.eval()
+
+        
        if self.full_precision:
            print(
                '>> Using slower but more accurate full-precision math (--full_precision)'
@ -520,6 +530,20 @@ class Generate:
                '>> Using half precision math. Call with --full_precision to use more accurate but VRAM-intensive full precision.'
            )
            model.half()
+
+        # usage statistics
+        toc = time.time()
+        print(
+            f'>> Model loaded in', '%4.2fs' % (toc - tic)
+        )
+        if device_type == 'cuda':
+            print(
+                '>> Max VRAM used to load the model:',
+                '%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9),
+                '\n>> Current VRAM usage:'
+                '%4.2fG' % (torch.cuda.memory_allocated() / 1e9),
+            )
+
        return model

    def _load_img(self, path, width, height, fit=False):
--- a/ldm/modules/attention.py
+++ b/ldm/modules/attention.py
@ -1,9 +1,10 @@
-from inspect import isfunction
 import math
+from inspect import isfunction
+
 import torch
 import torch.nn.functional as F
-from torch import nn, einsum
 from einops import rearrange, repeat
+from torch import nn, einsum

 from ldm.modules.diffusionmodules.util import checkpoint

@ -13,7 +14,7 @@ def exists(val):


 def uniq(arr):
-    return{el: True for el in arr}.keys()
+    return {el: True for el in arr}.keys()


 def default(val, d):
@ -82,14 +83,14 @@ class LinearAttention(nn.Module):
        super().__init__()
        self.heads = heads
        hidden_dim = dim_head * heads
-        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias = False)
+        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
        self.to_out = nn.Conv2d(hidden_dim, dim, 1)

    def forward(self, x):
        b, c, h, w = x.shape
        qkv = self.to_qkv(x)
-        q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads = self.heads, qkv=3)
-        k = k.softmax(dim=-1)  
+        q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads=self.heads, qkv=3)
+        k = k.softmax(dim=-1)
        context = torch.einsum('bhdn,bhen->bhde', k, v)
        out = torch.einsum('bhde,bhdn->bhen', context, q)
        out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w)
@ -131,12 +132,12 @@ class SpatialSelfAttention(nn.Module):
        v = self.v(h_)

        # compute attention
-        b,c,h,w = q.shape
+        b, c, h, w = q.shape
        q = rearrange(q, 'b c h w -> b (h w) c')
        k = rearrange(k, 'b c h w -> b c (h w)')
        w_ = torch.einsum('bij,bjk->bik', q, k)

-        w_ = w_ * (int(c)**(-0.5))
+        w_ = w_ * (int(c) ** (-0.5))
        w_ = torch.nn.functional.softmax(w_, dim=2)

        # attend to values
@ -146,7 +147,7 @@ class SpatialSelfAttention(nn.Module):
        h_ = rearrange(h_, 'b c (h w) -> b c h w', h=h)
        h_ = self.proj_out(h_)

-        return x+h_
+        return x + h_


 class CrossAttention(nn.Module):
@ -174,6 +175,7 @@ class CrossAttention(nn.Module):
        context = default(context, x)
        k = self.to_k(context)
        v = self.to_v(context)
+        device_type = x.device.type
        del context, x

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
@ -188,9 +190,11 @@ class CrossAttention(nn.Module):
            sim.masked_fill_(~mask, max_neg_value)
            del mask

-        # attention, what we cannot get enough of, by halves
-        sim[4:] = sim[4:].softmax(dim=-1)
-        sim[:4] = sim[:4].softmax(dim=-1)
+        if device_type == 'mps':      #special case for M1 - disable neonsecret optimization
+            sim = sim.softmax(dim=-1)
+        else:
+            sim[4:] = sim[4:].softmax(dim=-1)
+            sim[:4] = sim[:4].softmax(dim=-1)

        sim = einsum('b i j, b j d -> b i d', sim, v)
        sim = rearrange(sim, '(b h) n d -> b n (h d)', h=h)
@ -200,7 +204,8 @@ class CrossAttention(nn.Module):
 class BasicTransformerBlock(nn.Module):
    def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True):
        super().__init__()
-        self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout)  # is a self-attention
+        self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head,
+                                    dropout=dropout)  # is a self-attention
        self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff)
        self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim,
                                    heads=n_heads, dim_head=d_head, dropout=dropout)  # is self-attn if context is none
@ -228,6 +233,7 @@ class SpatialTransformer(nn.Module):
    Then apply standard transformer action.
    Finally, reshape to image
    """
+
    def __init__(self, in_channels, n_heads, d_head,
                 depth=1, dropout=0., context_dim=None):
        super().__init__()
@ -243,7 +249,7 @@ class SpatialTransformer(nn.Module):

        self.transformer_blocks = nn.ModuleList(
            [BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim)
-                for d in range(depth)]
+             for d in range(depth)]
        )

        self.proj_out = zero_module(nn.Conv2d(inner_dim,
--- a/requirements-colab.txt
+++ b/requirements-colab.txt
@ -0,0 +1,26 @@
+albumentations==0.4.3
+clean-fid==0.1.29
+einops==0.3.0
+huggingface-hub==0.8.1
+imageio-ffmpeg==0.4.2
+imageio==2.9.0
+kornia==0.6.0
+numpy==1.21.6
+omegaconf==2.1.1
+opencv-python==4.6.0.66
+pillow==9.2.0
+pip>=22
+pudb==2019.2
+pytorch-lightning==1.4.2
+streamlit==1.12.0
+taming-transformers-rom1504==0.0.6
+test-tube>=0.7.5
+torch-fidelity==0.3.0
+torchmetrics==0.6.0
+torchtext==0.6.0
+transformers==4.19.2
+torch==1.12.1+cu113
+torchvision==0.13.1+cu113
+git+https://github.com/openai/CLIP.git@main#egg=clip
+git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion
+-e .
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -91,11 +91,7 @@ def main():
        print(">> changed to seamless tiling mode")

    # preload the model
-    tic = time.time()
    t2i.load_model()
-    print(
-        f'>> model loaded in', '%4.2fs' % (time.time() - tic)
-    )

    if not infile:
        print(
--- a/static/dream_web/index.js
+++ b/static/dream_web/index.js
@ -154,6 +154,12 @@ async function generateSubmit(form) {
 }

 window.onload = () => {
+    document.querySelector("#prompt").addEventListener("keydown", (e) => {
+      if (e.key === "Enter" && !e.shiftKey) {
+        const form = e.target.form;
+        generateSubmit(form);
+      }
+    });
    document.querySelector("#generate-form").addEventListener('submit', (e) => {
        e.preventDefault();
        const form = e.target;
@ -180,6 +186,12 @@ window.onload = () => {
            console.error(e);
        });
    });
+    document.documentElement.addEventListener('keydown', (e) => {
+      if (e.key === "Escape")
+        fetch('/cancel').catch(err => {
+          console.error(err);
+        });
+    });

    if (!config.gfpgan_model_exists) {
        document.querySelector("#gfpgan").style.display = 'none';