Merge branch 'development' of https://github.com/pbaylies/stable-diffusion into development

2024-08-30 20:32:17 +00:00 · 2022-09-12 18:35:25 -04:00
parent 5941ee620c 0a6c98e47d
commit e1f6ea2be7
4 changed files with 93 additions and 65 deletions
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -1,17 +1,30 @@
 # **Interactive Command-Line Interface**
-The `dream.py` script, located in `scripts/dream.py`, provides an interactive interface to image generation similar to the "dream mothership" bot that Stable AI provided on its Discord server.
+The `dream.py` script, located in `scripts/dream.py`, provides an
 interactive interface to image generation similar to the "dream
 mothership" bot that Stable AI provided on its Discord server.
-Unlike the txt2img.py and img2img.py scripts provided in the original CompViz/stable-diffusion source code repository, the time-consuming initialization of the AI model initialization only happens once. After that image generation
+Unlike the txt2img.py and img2img.py scripts provided in the original
-from the command-line interface is very fast.
+CompViz/stable-diffusion source code repository, the time-consuming
 initialization of the AI model initialization only happens once. After
 that image generation from the command-line interface is very fast.
-The script uses the readline library to allow for in-line editing, command history (up and down arrows), autocompletion, and more. To help keep track of which prompts generated which images, the script writes a log file of image names and prompts to the selected output directory.
+The script uses the readline library to allow for in-line editing,
 command history (up and down arrows), autocompletion, and more. To
 help keep track of which prompts generated which images, the script
 writes a log file of image names and prompts to the selected output
 directory.
-In addition, as of version 1.02, it also writes the prompt into the PNG file's metadata where it can be retrieved using scripts/images2prompt.py
+In addition, as of version 1.02, it also writes the prompt into the
 PNG file's metadata where it can be retrieved using
 scripts/images2prompt.py
 The script is confirmed to work on Linux, Windows and Mac systems.
-_Note:_ This script runs from the command-line or can be used as a Web application. The Web GUI is currently rudimentary, but a much better replacement is on its way.
+_Note:_ This script runs from the command-line or can be used as a Web
 application. The Web GUI is currently rudimentary, but a much better
 replacement is on its way.
 ```
 (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py
@ -183,6 +196,56 @@ well as the --mask (-M) argument:
 | --init_mask <path> | -M<path>   | None                |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|
 # Shortcuts
 Since one so frequently refers back to a previously-generated seed or
 image, dream.py provides an easy shortcut that avoids having to cut
 and paste these values.
 Here's how it works. Say you generated 6 images of a man-eating snail:
 ~~~~
 dream> man-eating snail -n6
 ...
 >> Usage stats:
 >>   6 image(s) generated in 79.85s
 >>   Max VRAM used for this generation: 3.36G. Current VRAM utilization:2.21G
 >>   Max VRAM used since script start:  3.36G
 Outputs:
 [1] outputs/img-samples/000210.1414805682.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1414805682
 [2] outputs/img-samples/000210.3312885013.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S3312885013
 [3] outputs/img-samples/000210.1398528919.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1398528919
 [4] outputs/img-samples/000210.92626031.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S92626031
 [5] outputs/img-samples/000210.1733666373.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1733666373
 [6] outputs/img-samples/000210.2453524229.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S2453524229
 ~~~~
 The last image generated (with seed 2453524229) looks really good. So let's
 pick that one for variation generation. Instead of cutting and pasting
 the argument -S2453524229, we can simply refer to the most recent seed as
 -1, and write:
 ~~~~
 dream> man-eating snail -v0.1 -n10 -S-1
 >> Reusing previous seed 2453524229
 ...etc...
 ~~~~
 You can use -2 to refer to the second to last seed, -3 to the third to
 last, etc. It works with both individual images and grids. However,
 the numbering system only extends across the last group of images
 generated and doesn't reach back to earlier commands.
 The initial image (-I or --init_img) argument works in a similar
 way. To use the second-to-most-recent snail image as the initial
 image for an img2img render, you could refer to it as -I-2:
 ~~~~
 dream> glowing science-fiction snail -I -2 -n4
 >> Reusing previous image outputs/img-samples/000213.2150458613.png
 ...etc...
 ~~~~
 # Command-line editing and completion
 If you are on a Macintosh or Linux machine, the command-line offers
--- a/ldm/dream/devices.py
+++ b/ldm/dream/devices.py
@ -13,8 +13,9 @@ def choose_torch_device() -> str:
 def choose_autocast_device(device):
    '''Returns an autocast compatible device from a torch device'''
    device_type = device.type # this returns 'mps' on M1
-    # autocast only supports cuda or cpu
+    if device_type == 'cuda':
    if device_type in ('cuda','cpu'):
        return device_type,autocast
    elif device_type == 'cpu':
        return device_type,nullcontext
    else:
        return 'cpu',nullcontext
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -111,7 +111,6 @@ class Generate:
            height                = 512,
            sampler_name          = 'k_lms',
            ddim_eta              = 0.0,  # deterministic
            precision             = 'autocast',
            full_precision        = False,
            strength              = 0.75,  # default in scripts/img2img.py
            seamless              = False,
@ -129,7 +128,6 @@ class Generate:
        self.sampler_name             = sampler_name
        self.grid                     = grid
        self.ddim_eta                 = ddim_eta
        self.precision                = precision
        self.full_precision           = True if choose_torch_device() == 'mps' else full_precision
        self.strength                 = strength
        self.seamless                 = seamless
--- a/ldm/modules/diffusionmodules/model.py
+++ b/ldm/modules/diffusionmodules/model.py
@ -121,30 +121,17 @@ class ResnetBlock(nn.Module):
                                                    padding=0)
    def forward(self, x, temb):
-        h1 = x
+        h = self.norm1(x)
-        h2 = self.norm1(h1)
+        h = nonlinearity(h)
-        del h1
+        h = self.conv1(h)
        h3 = nonlinearity(h2)
        del h2
        h4 = self.conv1(h3)
        del h3
        if temb is not None:
-            h4 = h4 + self.temb_proj(nonlinearity(temb))[:,:,None,None]
+            h = h + self.temb_proj(nonlinearity(temb))[:,:,None,None]
-        h5 = self.norm2(h4)
+        h = self.norm2(h)
-        del h4
+        h = nonlinearity(h)
-
+        h = self.dropout(h)
-        h6 = nonlinearity(h5)
+        h = self.conv2(h)
        del h5
        h7 = self.dropout(h6)
        del h6
        h8 = self.conv2(h7)
        del h7
        if self.in_channels != self.out_channels:
            if self.use_conv_shortcut:
@ -152,7 +139,7 @@ class ResnetBlock(nn.Module):
            else:
                x = self.nin_shortcut(x)
-        return x + h8
+        return x + h
 class LinAttnBlock(LinearAttention):
    """to match AttnBlock usage"""
@ -209,8 +196,7 @@ class AttnBlock(nn.Module):
        h_ = torch.zeros_like(k, device=q.device)
-        device_type = 'mps' if q.device.type == 'mps' else 'cuda'
+        if q.device.type == 'cuda':
        if device_type == 'cuda':
            stats = torch.cuda.memory_stats(q.device)
            mem_active = stats['active_bytes.all.current']
            mem_reserved = stats['reserved_bytes.all.current']
@ -599,22 +585,16 @@ class Decoder(nn.Module):
        temb = None
        # z to block_in
-        h1 = self.conv_in(z)
+        h = self.conv_in(z)
        # middle
-        h2 = self.mid.block_1(h1, temb)
+        h = self.mid.block_1(h, temb)
-        del h1
+        h = self.mid.attn_1(h)
-
+        h = self.mid.block_2(h, temb)
        h3 = self.mid.attn_1(h2)
        del h2
        h = self.mid.block_2(h3, temb)
        del h3
        # prepare for up sampling
        device_type = 'mps' if h.device.type == 'mps' else 'cuda'
        gc.collect()
-        if device_type == 'cuda':
+        if h.device.type == 'cuda':
            torch.cuda.empty_cache()
        # upsampling
@ -622,33 +602,19 @@ class Decoder(nn.Module):
            for i_block in range(self.num_res_blocks+1):
                h = self.up[i_level].block[i_block](h, temb)
                if len(self.up[i_level].attn) > 0:
-                    t = h
+                    h = self.up[i_level].attn[i_block](h)
                    h = self.up[i_level].attn[i_block](t)
                    del t
            if i_level != 0:
-                t = h
+                h = self.up[i_level].upsample(h)
                h = self.up[i_level].upsample(t)
                del t
        # end
        if self.give_pre_end:
            return h
-        h1 = self.norm_out(h)
+        h = self.norm_out(h)
-        del h
+        h = nonlinearity(h)
-
+        h = self.conv_out(h)
        h2 = nonlinearity(h1)
        del h1
        h = self.conv_out(h2)
        del h2
        if self.tanh_out:
-            t = h
+            h = torch.tanh(h)
            h = torch.tanh(t)
            del t
        return h