add option to show intermediate latent space

2024-08-30 20:32:17 +00:00 · 2022-11-01 11:17:43 +01:00
parent be1393a41c
commit cdb107dcda
9 changed files with 751 additions and 7 deletions
--- a/ldm/invoke/generator/base.py
+++ b/ldm/invoke/generator/base.py
@ -116,6 +116,29 @@ class Generator():
        )
        return Image.fromarray(x_sample.astype(np.uint8))

+        # write an approximate RGB image from latent samples for a single step to PNG
+
+    def sample_to_lowres_estimated_image(self,samples):
+        # adapted from code by @erucipe and @keturn here:
+        # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
+
+        # these numbers were determined empirically by @keturn
+        v1_4_latent_rgb_factors = torch.tensor([
+                    # R        G        B
+                    [ 0.298, 0.207, 0.208],  # L1
+                    [ 0.187, 0.286, 0.173],  # L2
+                    [-0.158, 0.189, 0.264],  # L3
+                    [-0.184, -0.271, -0.473],  # L4
+        ], dtype=samples.dtype, device=samples.device)
+
+        latent_image = samples[0].permute(1, 2, 0) @ v1_4_latent_rgb_factors
+        latents_ubyte = (((latent_image + 1) / 2)
+                         .clamp(0, 1)  # change scale from -1..1 to 0..1
+                         .mul(0xFF)  # to 0..255
+                         .byte()).cpu()
+
+        return Image.fromarray(latents_ubyte.numpy())
+
    def generate_initial_noise(self, seed, width, height):
        initial_noise = None
        if self.variation_amount > 0 or len(self.with_variations) > 0:
--- a/ldm/invoke/server.py
+++ b/ldm/invoke/server.py
@ -34,6 +34,7 @@ def build_opt(post_data, seed, gfpgan_model_exists):
    setattr(opt, 'facetool_strength', float(post_data['facetool_strength']) if gfpgan_model_exists else 0)
    setattr(opt, 'upscale', [int(post_data['upscale_level']), float(post_data['upscale_strength'])] if post_data['upscale_level'] != '' else None)
    setattr(opt, 'progress_images', 'progress_images' in post_data)
+    setattr(opt, 'progress_latents', 'progress_latents' in post_data)
    setattr(opt, 'seed', None if int(post_data['seed']) == -1 else int(post_data['seed']))
    setattr(opt, 'threshold', float(post_data['threshold']))
    setattr(opt, 'perlin', float(post_data['perlin']))
@ -227,8 +228,13 @@ class DreamServer(BaseHTTPRequestHandler):
            # since rendering images is moderately expensive, only render every 5th image
            # and don't bother with the last one, since it'll render anyway
            nonlocal step_index
-            if opt.progress_images and step % 5 == 0 and step < opt.steps - 1:
-                image = self.model.sample_to_image(sample)
+
+            wants_progress_latents = opt.progress_latents
+            wants_progress_image = opt.progress_image and step % 5 == 0
+
+            if (wants_progress_image | wants_progress_latents) and step < opt.steps - 1:
+                image = self.model.sample_to_image(sample) if wants_progress_image \
+                        else self.model.sample_to_lowres_estimated_image(sample)
                step_index_padded = str(step_index).rjust(len(str(opt.steps)), '0')
                name = f'{prefix}.{opt.seed}.{step_index_padded}.png'
                metadata = f'{opt.prompt} -S{opt.seed} [intermediate]'