diff --git a/Stable_Diffusion_AI_Notebook.ipynb b/Stable_Diffusion_AI_Notebook.ipynb
index defc158346..3508a62efa 100644
--- a/Stable_Diffusion_AI_Notebook.ipynb
+++ b/Stable_Diffusion_AI_Notebook.ipynb
@@ -3,7 +3,6 @@
"nbformat_minor": 0,
"metadata": {
"colab": {
- "name": "Stable_Diffusion_AI_Notebook.ipynb",
"provenance": [],
"collapsed_sections": [],
"private_outputs": true
@@ -22,18 +21,18 @@
{
"cell_type": "markdown",
"source": [
- "# Stable Diffusion AI Notebook\n",
+ "# Stable Diffusion AI Notebook (Release 1.13)\n",
"\n",
"
\n",
"#### Instructions:\n",
"1. Execute each cell in order to mount a Dream bot and create images from text.
\n",
- "2. Once cells 1-8 were run correctly you'll be executing a terminal in cell #9, you'll to enter `pipenv run scripts/dream.py` command to run Dream bot.
\n",
+ "2. Once cells 1-8 were run correctly you'll be executing a terminal in cell #9, you'll need to enter `python scripts/dream.py` command to run Dream bot.
\n",
"3. After launching dream bot, you'll see:
`Dream > ` in terminal.
Insert a command, eg. `Dream > Astronaut floating in a distant galaxy`, or type `-h` for help.\n",
- "3. After completion you'll see your generated images in path `stable-diffusion/outputs/img-samples/`, you can also display images in cell #10.\n",
+ "3. After completion you'll see your generated images in path `stable-diffusion/outputs/img-samples/`, you can also show last generated images in cell #10.\n",
"4. To quit Dream bot use `q` command.
\n",
"---\n",
"Note: It takes some time to load, but after installing all dependencies you can use the bot all time you want while colab instance is up.
\n",
- "Requirements: For this notebook to work you need to have [Stable-Diffusion-v-1-4](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original) stored in your Google Drive, it will be needed in cell #6\n",
+ "Requirements: For this notebook to work you need to have [Stable-Diffusion-v-1-4](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original) stored in your Google Drive, it will be needed in cell #7\n",
"##### For more details visit Github repository: [lstein/stable-diffusion](https://github.com/lstein/stable-diffusion)\n",
"---\n"
],
@@ -41,6 +40,15 @@
"id": "ycYWcsEKc6w7"
}
},
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## ◢ Installation"
+ ],
+ "metadata": {
+ "id": "dr32VLxlnouf"
+ }
+ },
{
"cell_type": "code",
"source": [
@@ -68,43 +76,28 @@
"from os.path import exists\n",
"\n",
"if exists(\"/content/stable-diffusion/\")==True:\n",
+ " %cd /content/stable-diffusion/\n",
" print(\"Already downloaded repo\")\n",
"else:\n",
" !git clone --quiet https://github.com/lstein/stable-diffusion.git # Original repo\n",
- " %cd stable-diffusion/\n",
- " !git checkout --quiet tags/release-1.09\n",
- " "
+ " %cd /content/stable-diffusion/\n",
+ " !git checkout --quiet tags/release-1.13"
]
},
{
"cell_type": "code",
"source": [
- "#@title 3. Install Python 3.8 \n",
- "%%capture --no-stderr\n",
+ "#@title 3. Install dependencies\n",
"import gc\n",
- "!apt-get -qq install python3.8\n",
- "gc.collect()"
- ],
- "metadata": {
- "id": "daHlozvwKesj",
- "cellView": "form"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "#@title 4. Install dependencies from file in a VirtualEnv\n",
- "#@markdown Be patient, it takes ~ 5 - 7min
\n",
- "%%capture --no-stderr\n",
- "#Virtual environment\n",
- "!pip install pipenv -q\n",
+ "\n",
+ "if exists(\"/content/stable-diffusion/requirements-colab.txt\")==True:\n",
+ " %cd /content/stable-diffusion/\n",
+ " print(\"Already downloaded requirements file\")\n",
+ "else:\n",
+ " !wget https://raw.githubusercontent.com/lstein/stable-diffusion/development/requirements-colab.txt\n",
"!pip install colab-xterm\n",
- "%load_ext colabxterm\n",
- "!pipenv --python 3.8\n",
- "!pipenv install -r requirements.txt --skip-lock\n",
- "gc.collect()\n"
+ "!pip install -r requirements-colab.txt\n",
+ "gc.collect()"
],
"metadata": {
"cellView": "form",
@@ -116,7 +109,44 @@
{
"cell_type": "code",
"source": [
- "#@title 5. Mount google Drive\n",
+ "#@title 4. Load small ML models required\n",
+ "%cd /content/stable-diffusion/\n",
+ "!python scripts/preload_models.py\n",
+ "gc.collect()"
+ ],
+ "metadata": {
+ "cellView": "form",
+ "id": "ChIDWxLVHGGJ"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@title 5. Restart Runtime\n",
+ "exit()"
+ ],
+ "metadata": {
+ "cellView": "form",
+ "id": "8rSMhgnAttQa"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## ◢ Configuration"
+ ],
+ "metadata": {
+ "id": "795x1tMoo8b1"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@title 6. Mount google Drive\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
@@ -130,7 +160,7 @@
{
"cell_type": "code",
"source": [
- "#@title 6. Drive Path to model\n",
+ "#@title 7. Drive Path to model\n",
"#@markdown Path should start with /content/drive/path-to-your-file
\n",
"#@markdown Note: Model should be downloaded from https://huggingface.co
\n",
"#@markdown Lastest release: [Stable-Diffusion-v-1-4](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)\n",
@@ -152,7 +182,7 @@
{
"cell_type": "code",
"source": [
- "#@title 7. Symlink to model\n",
+ "#@title 8. Symlink to model\n",
"\n",
"from os.path import exists\n",
"import os \n",
@@ -181,32 +211,27 @@
"outputs": []
},
{
- "cell_type": "code",
+ "cell_type": "markdown",
"source": [
- "#@title 8. Load small ML models required\n",
- "%%capture --no-stderr\n",
- "!pipenv run scripts/preload_models.py\n",
- "gc.collect()"
+ "## ◢ Execution"
],
"metadata": {
- "cellView": "form",
- "id": "ChIDWxLVHGGJ"
- },
- "execution_count": null,
- "outputs": []
+ "id": "Mc28N0_NrCQH"
+ }
},
{
"cell_type": "code",
"source": [
"#@title 9. Run Terminal and Execute Dream bot\n",
"#@markdown Steps:
\n",
- "#@markdown 1. Execute command `pipenv run scripts/dream.py` to run dream bot.
\n",
+ "#@markdown 1. Execute command `python scripts/dream.py` to run dream bot.
\n",
"#@markdown 2. After initialized you'll see `Dream>` line.
\n",
"#@markdown 3. Example text: `Astronaut floating in a distant galaxy`
\n",
"#@markdown 4. To quit Dream bot use: `q` command.
\n",
"\n",
- "#Run from virtual env\n",
- "\n",
+ "import gc\n",
+ "%cd /content/stable-diffusion/\n",
+ "%load_ext colabxterm\n",
"%xterm\n",
"gc.collect()"
],
@@ -220,18 +245,18 @@
{
"cell_type": "code",
"source": [
- "#@title 10. Show generated images\n",
- "\n",
+ "#@title 10. Show the last 15 generated images\n",
+ "import gc\n",
"import glob\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.image as mpimg\n",
"%matplotlib inline\n",
"\n",
"images = []\n",
- "for img_path in glob.glob('/content/stable-diffusion/outputs/img-samples/*.png'):\n",
+ "for img_path in sorted(glob.glob('/content/stable-diffusion/outputs/img-samples/*.png'), reverse=True):\n",
" images.append(mpimg.imread(img_path))\n",
"\n",
- "# Remove ticks and labels on x-axis and y-axis both\n",
+ "images = images[:15] \n",
"\n",
"plt.figure(figsize=(20,10))\n",
"\n",
@@ -253,4 +278,4 @@
"outputs": []
}
]
-}
\ No newline at end of file
+}
diff --git a/ldm/generate.py b/ldm/generate.py
index 3cae680724..6e9465d975 100644
--- a/ldm/generate.py
+++ b/ldm/generate.py
@@ -505,12 +505,22 @@ class Generate:
def _load_model_from_config(self, config, ckpt):
print(f'>> Loading model from {ckpt}')
+
+ # for usage statistics
+ device_type = choose_torch_device()
+ if device_type == 'cuda':
+ torch.cuda.reset_peak_memory_stats()
+ tic = time.time()
+
+ # this does the work
pl_sd = torch.load(ckpt, map_location='cpu')
sd = pl_sd['state_dict']
model = instantiate_from_config(config.model)
m, u = model.load_state_dict(sd, strict=False)
model.to(self.device)
model.eval()
+
+
if self.full_precision:
print(
'>> Using slower but more accurate full-precision math (--full_precision)'
@@ -520,6 +530,20 @@ class Generate:
'>> Using half precision math. Call with --full_precision to use more accurate but VRAM-intensive full precision.'
)
model.half()
+
+ # usage statistics
+ toc = time.time()
+ print(
+ f'>> Model loaded in', '%4.2fs' % (toc - tic)
+ )
+ if device_type == 'cuda':
+ print(
+ '>> Max VRAM used to load the model:',
+ '%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9),
+ '\n>> Current VRAM usage:'
+ '%4.2fG' % (torch.cuda.memory_allocated() / 1e9),
+ )
+
return model
def _load_img(self, path, width, height, fit=False):
diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py
index 7d14ad0938..c2f905688c 100644
--- a/ldm/modules/attention.py
+++ b/ldm/modules/attention.py
@@ -1,9 +1,10 @@
-from inspect import isfunction
import math
+from inspect import isfunction
+
import torch
import torch.nn.functional as F
-from torch import nn, einsum
from einops import rearrange, repeat
+from torch import nn, einsum
from ldm.modules.diffusionmodules.util import checkpoint
@@ -13,7 +14,7 @@ def exists(val):
def uniq(arr):
- return{el: True for el in arr}.keys()
+ return {el: True for el in arr}.keys()
def default(val, d):
@@ -82,14 +83,14 @@ class LinearAttention(nn.Module):
super().__init__()
self.heads = heads
hidden_dim = dim_head * heads
- self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias = False)
+ self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
self.to_out = nn.Conv2d(hidden_dim, dim, 1)
def forward(self, x):
b, c, h, w = x.shape
qkv = self.to_qkv(x)
- q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads = self.heads, qkv=3)
- k = k.softmax(dim=-1)
+ q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads=self.heads, qkv=3)
+ k = k.softmax(dim=-1)
context = torch.einsum('bhdn,bhen->bhde', k, v)
out = torch.einsum('bhde,bhdn->bhen', context, q)
out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w)
@@ -131,12 +132,12 @@ class SpatialSelfAttention(nn.Module):
v = self.v(h_)
# compute attention
- b,c,h,w = q.shape
+ b, c, h, w = q.shape
q = rearrange(q, 'b c h w -> b (h w) c')
k = rearrange(k, 'b c h w -> b c (h w)')
w_ = torch.einsum('bij,bjk->bik', q, k)
- w_ = w_ * (int(c)**(-0.5))
+ w_ = w_ * (int(c) ** (-0.5))
w_ = torch.nn.functional.softmax(w_, dim=2)
# attend to values
@@ -146,7 +147,7 @@ class SpatialSelfAttention(nn.Module):
h_ = rearrange(h_, 'b c (h w) -> b c h w', h=h)
h_ = self.proj_out(h_)
- return x+h_
+ return x + h_
class CrossAttention(nn.Module):
@@ -174,6 +175,7 @@ class CrossAttention(nn.Module):
context = default(context, x)
k = self.to_k(context)
v = self.to_v(context)
+ device_type = x.device.type
del context, x
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
@@ -188,9 +190,11 @@ class CrossAttention(nn.Module):
sim.masked_fill_(~mask, max_neg_value)
del mask
- # attention, what we cannot get enough of, by halves
- sim[4:] = sim[4:].softmax(dim=-1)
- sim[:4] = sim[:4].softmax(dim=-1)
+ if device_type == 'mps': #special case for M1 - disable neonsecret optimization
+ sim = sim.softmax(dim=-1)
+ else:
+ sim[4:] = sim[4:].softmax(dim=-1)
+ sim[:4] = sim[:4].softmax(dim=-1)
sim = einsum('b i j, b j d -> b i d', sim, v)
sim = rearrange(sim, '(b h) n d -> b n (h d)', h=h)
@@ -200,7 +204,8 @@ class CrossAttention(nn.Module):
class BasicTransformerBlock(nn.Module):
def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True):
super().__init__()
- self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout) # is a self-attention
+ self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head,
+ dropout=dropout) # is a self-attention
self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff)
self.attn2 = CrossAttention(query_dim=dim, context_dim=context_dim,
heads=n_heads, dim_head=d_head, dropout=dropout) # is self-attn if context is none
@@ -228,6 +233,7 @@ class SpatialTransformer(nn.Module):
Then apply standard transformer action.
Finally, reshape to image
"""
+
def __init__(self, in_channels, n_heads, d_head,
depth=1, dropout=0., context_dim=None):
super().__init__()
@@ -243,7 +249,7 @@ class SpatialTransformer(nn.Module):
self.transformer_blocks = nn.ModuleList(
[BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim)
- for d in range(depth)]
+ for d in range(depth)]
)
self.proj_out = zero_module(nn.Conv2d(inner_dim,
diff --git a/requirements-colab.txt b/requirements-colab.txt
new file mode 100644
index 0000000000..f9cc5600ea
--- /dev/null
+++ b/requirements-colab.txt
@@ -0,0 +1,26 @@
+albumentations==0.4.3
+clean-fid==0.1.29
+einops==0.3.0
+huggingface-hub==0.8.1
+imageio-ffmpeg==0.4.2
+imageio==2.9.0
+kornia==0.6.0
+numpy==1.21.6
+omegaconf==2.1.1
+opencv-python==4.6.0.66
+pillow==9.2.0
+pip>=22
+pudb==2019.2
+pytorch-lightning==1.4.2
+streamlit==1.12.0
+taming-transformers-rom1504==0.0.6
+test-tube>=0.7.5
+torch-fidelity==0.3.0
+torchmetrics==0.6.0
+torchtext==0.6.0
+transformers==4.19.2
+torch==1.12.1+cu113
+torchvision==0.13.1+cu113
+git+https://github.com/openai/CLIP.git@main#egg=clip
+git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion
+-e .
diff --git a/scripts/dream.py b/scripts/dream.py
index 05e19ec8ba..1d8e204181 100755
--- a/scripts/dream.py
+++ b/scripts/dream.py
@@ -91,11 +91,7 @@ def main():
print(">> changed to seamless tiling mode")
# preload the model
- tic = time.time()
t2i.load_model()
- print(
- f'>> model loaded in', '%4.2fs' % (time.time() - tic)
- )
if not infile:
print(
diff --git a/static/dream_web/index.js b/static/dream_web/index.js
index 4d1fbe3778..2d2dce1452 100644
--- a/static/dream_web/index.js
+++ b/static/dream_web/index.js
@@ -154,6 +154,12 @@ async function generateSubmit(form) {
}
window.onload = () => {
+ document.querySelector("#prompt").addEventListener("keydown", (e) => {
+ if (e.key === "Enter" && !e.shiftKey) {
+ const form = e.target.form;
+ generateSubmit(form);
+ }
+ });
document.querySelector("#generate-form").addEventListener('submit', (e) => {
e.preventDefault();
const form = e.target;
@@ -180,6 +186,12 @@ window.onload = () => {
console.error(e);
});
});
+ document.documentElement.addEventListener('keydown', (e) => {
+ if (e.key === "Escape")
+ fetch('/cancel').catch(err => {
+ console.error(err);
+ });
+ });
if (!config.gfpgan_model_exists) {
document.querySelector("#gfpgan").style.display = 'none';