Merge branch 'development' of https://github.com/pbaylies/stable-diffusion into development

2025-07-25 21:05:37 +00:00 · 2022-09-12 18:35:25 -04:00
parent 5941ee620c 0a6c98e47d
commit e1f6ea2be7
4 changed files with 93 additions and 65 deletions
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -1,17 +1,30 @@
 # **Interactive Command-Line Interface**

-The `dream.py` script, located in `scripts/dream.py`, provides an interactive interface to image generation similar to the "dream mothership" bot that Stable AI provided on its Discord server.
+The `dream.py` script, located in `scripts/dream.py`, provides an
+interactive interface to image generation similar to the "dream
+mothership" bot that Stable AI provided on its Discord server.

-Unlike the txt2img.py and img2img.py scripts provided in the original CompViz/stable-diffusion source code repository, the time-consuming initialization of the AI model initialization only happens once. After that image generation
-from the command-line interface is very fast.
+Unlike the txt2img.py and img2img.py scripts provided in the original
+CompViz/stable-diffusion source code repository, the time-consuming
+initialization of the AI model initialization only happens once. After
+that image generation from the command-line interface is very fast.

-The script uses the readline library to allow for in-line editing, command history (up and down arrows), autocompletion, and more. To help keep track of which prompts generated which images, the script writes a log file of image names and prompts to the selected output directory.
+The script uses the readline library to allow for in-line editing,
+command history (up and down arrows), autocompletion, and more. To
+help keep track of which prompts generated which images, the script
+writes a log file of image names and prompts to the selected output
+directory.

-In addition, as of version 1.02, it also writes the prompt into the PNG file's metadata where it can be retrieved using scripts/images2prompt.py
+In addition, as of version 1.02, it also writes the prompt into the
+PNG file's metadata where it can be retrieved using
+scripts/images2prompt.py

 The script is confirmed to work on Linux, Windows and Mac systems.

-_Note:_ This script runs from the command-line or can be used as a Web application. The Web GUI is currently rudimentary, but a much better replacement is on its way.
+_Note:_ This script runs from the command-line or can be used as a Web
+application. The Web GUI is currently rudimentary, but a much better
+replacement is on its way.
+

 ```
 (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py
@ -183,6 +196,56 @@ well as the --mask (-M) argument:
 | --init_mask <path> | -M<path>   | None                |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|


+# Shortcuts
+
+Since one so frequently refers back to a previously-generated seed or
+image, dream.py provides an easy shortcut that avoids having to cut
+and paste these values.
+
+Here's how it works. Say you generated 6 images of a man-eating snail:
+
+~~~~
+dream> man-eating snail -n6
+...
+>> Usage stats:
+>>   6 image(s) generated in 79.85s
+>>   Max VRAM used for this generation: 3.36G. Current VRAM utilization:2.21G
+>>   Max VRAM used since script start:  3.36G
+Outputs:
+[1] outputs/img-samples/000210.1414805682.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1414805682
+[2] outputs/img-samples/000210.3312885013.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S3312885013
+[3] outputs/img-samples/000210.1398528919.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1398528919
+[4] outputs/img-samples/000210.92626031.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S92626031
+[5] outputs/img-samples/000210.1733666373.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1733666373
+[6] outputs/img-samples/000210.2453524229.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S2453524229
+~~~~
+
+The last image generated (with seed 2453524229) looks really good. So let's
+pick that one for variation generation. Instead of cutting and pasting
+the argument -S2453524229, we can simply refer to the most recent seed as
+-1, and write:
+
+~~~~
+dream> man-eating snail -v0.1 -n10 -S-1
+>> Reusing previous seed 2453524229
+...etc...
+~~~~
+
+You can use -2 to refer to the second to last seed, -3 to the third to
+last, etc. It works with both individual images and grids. However,
+the numbering system only extends across the last group of images
+generated and doesn't reach back to earlier commands.
+
+The initial image (-I or --init_img) argument works in a similar
+way. To use the second-to-most-recent snail image as the initial
+image for an img2img render, you could refer to it as -I-2:
+
+~~~~
+dream> glowing science-fiction snail -I -2 -n4
+>> Reusing previous image outputs/img-samples/000213.2150458613.png
+...etc...
+~~~~
+
 # Command-line editing and completion

 If you are on a Macintosh or Linux machine, the command-line offers
--- a/ldm/dream/devices.py
+++ b/ldm/dream/devices.py
@ -13,8 +13,9 @@ def choose_torch_device() -> str:
 def choose_autocast_device(device):
    '''Returns an autocast compatible device from a torch device'''
    device_type = device.type # this returns 'mps' on M1
-    # autocast only supports cuda or cpu
-    if device_type in ('cuda','cpu'):
+    if device_type == 'cuda':
        return device_type,autocast
+    elif device_type == 'cpu':
+        return device_type,nullcontext
    else:
        return 'cpu',nullcontext
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -111,7 +111,6 @@ class Generate:
            height                = 512,
            sampler_name          = 'k_lms',
            ddim_eta              = 0.0,  # deterministic
-            precision             = 'autocast',
            full_precision        = False,
            strength              = 0.75,  # default in scripts/img2img.py
            seamless              = False,
@ -129,7 +128,6 @@ class Generate:
        self.sampler_name             = sampler_name
        self.grid                     = grid
        self.ddim_eta                 = ddim_eta
-        self.precision                = precision
        self.full_precision           = True if choose_torch_device() == 'mps' else full_precision
        self.strength                 = strength
        self.seamless                 = seamless
--- a/ldm/modules/diffusionmodules/model.py
+++ b/ldm/modules/diffusionmodules/model.py
@ -121,30 +121,17 @@ class ResnetBlock(nn.Module):
                                                    padding=0)

    def forward(self, x, temb):
-        h1 = x
-        h2 = self.norm1(h1)
-        del h1
-
-        h3 = nonlinearity(h2)
-        del h2
-
-        h4 = self.conv1(h3)
-        del h3
+        h = self.norm1(x)
+        h = nonlinearity(h)
+        h = self.conv1(h)

        if temb is not None:
-            h4 = h4 + self.temb_proj(nonlinearity(temb))[:,:,None,None]
+            h = h + self.temb_proj(nonlinearity(temb))[:,:,None,None]

-        h5 = self.norm2(h4)
-        del h4
-
-        h6 = nonlinearity(h5)
-        del h5
-
-        h7 = self.dropout(h6)
-        del h6
-
-        h8 = self.conv2(h7)
-        del h7
+        h = self.norm2(h)
+        h = nonlinearity(h)
+        h = self.dropout(h)
+        h = self.conv2(h)

        if self.in_channels != self.out_channels:
            if self.use_conv_shortcut:
@ -152,7 +139,7 @@ class ResnetBlock(nn.Module):
            else:
                x = self.nin_shortcut(x)

-        return x + h8
+        return x + h

 class LinAttnBlock(LinearAttention):
    """to match AttnBlock usage"""
@ -209,8 +196,7 @@ class AttnBlock(nn.Module):

        h_ = torch.zeros_like(k, device=q.device)

-        device_type = 'mps' if q.device.type == 'mps' else 'cuda'
-        if device_type == 'cuda':
+        if q.device.type == 'cuda':
            stats = torch.cuda.memory_stats(q.device)
            mem_active = stats['active_bytes.all.current']
            mem_reserved = stats['reserved_bytes.all.current']
@ -599,22 +585,16 @@ class Decoder(nn.Module):
        temb = None

        # z to block_in
-        h1 = self.conv_in(z)
+        h = self.conv_in(z)

        # middle
-        h2 = self.mid.block_1(h1, temb)
-        del h1
-
-        h3 = self.mid.attn_1(h2)
-        del h2
-
-        h = self.mid.block_2(h3, temb)
-        del h3
+        h = self.mid.block_1(h, temb)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h, temb)

        # prepare for up sampling
-        device_type = 'mps' if h.device.type == 'mps' else 'cuda'
        gc.collect()
-        if device_type == 'cuda':
+        if h.device.type == 'cuda':
            torch.cuda.empty_cache()

        # upsampling
@ -622,33 +602,19 @@ class Decoder(nn.Module):
            for i_block in range(self.num_res_blocks+1):
                h = self.up[i_level].block[i_block](h, temb)
                if len(self.up[i_level].attn) > 0:
-                    t = h
-                    h = self.up[i_level].attn[i_block](t)
-                    del t
-
+                    h = self.up[i_level].attn[i_block](h)
            if i_level != 0:
-                t = h
-                h = self.up[i_level].upsample(t)
-                del t
+                h = self.up[i_level].upsample(h)

        # end
        if self.give_pre_end:
            return h

-        h1 = self.norm_out(h)
-        del h
-
-        h2 = nonlinearity(h1)
-        del h1
-
-        h = self.conv_out(h2)
-        del h2
-
+        h = self.norm_out(h)
+        h = nonlinearity(h)
+        h = self.conv_out(h)
        if self.tanh_out:
-            t = h
-            h = torch.tanh(t)
-            del t
-
+            h = torch.tanh(h)
        return h