Merge branch 'development' into patch-1

2024-08-30 20:32:17 +00:00 · 2022-09-26 03:29:33 -04:00
parent 4247e75426 61b19d406c
commit b512d198f0
7 changed files with 155 additions and 125 deletions
--- a/docs/help/TROUBLESHOOT.md
+++ b/docs/help/TROUBLESHOOT.md
@ -27,10 +27,25 @@ rm ${PIP_LOG}

 **SOLUTION**

-Enter the stable-diffusion directory and completely remove the `src` directory and all its contents.
-The safest way to do this is to enter the stable-diffusion directory and give the command
-`git clean -f`. If this still doesn't fix the problem, try "conda clean -all" and then restart at
-the `conda env create` step.
+Conda sometimes gets stuck  at the last PIP step, in which several git repositories are
+cloned and built.
+
+Enter the stable-diffusion directory and completely remove the `src`
+directory and all its contents.  The safest way to do this is to enter
+the stable-diffusion directory and give the command `git clean -f`. If
+this still doesn't fix the problem, try "conda clean -all" and then
+restart at the `conda env create` step.
+
+To further understand the problem to checking the install lot using this method:
+
+```bash
+export PIP_LOG="/tmp/pip_log.txt"
+touch ${PIP_LOG}
+tail -f ${PIP_LOG} & 
+conda env create -f environment-mac.yaml --debug --verbose
+killall tail
+rm ${PIP_LOG}
+```

 ---

--- a/docs/installation/INSTALL_MAC.md
+++ b/docs/installation/INSTALL_MAC.md
@ -125,7 +125,7 @@ ln -s "$PATH_TO_CKPT/sd-v1-4.ckpt" \
    === "Intel x86_64"

        ```bash
-        PIP_EXISTS_ACTION=w CONDA_SUBDIR=osx-x86_64 \
+        PIP_EXISTS_ACTION=w CONDA_SUBDIR=osx-64 \
          conda env create \
          -f environment-mac.yaml \
          && conda activate ldm
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -594,7 +594,7 @@ class Args(object):
            '--upscale',
            nargs='+',
            type=float,
-            help='Scale factor (2, 4) for upscaling final output followed by upscaling strength (0-1.0). If strength not specified, defaults to 0.75',
+            help='Scale factor (1, 2, 3, 4, etc..) for upscaling final output followed by upscaling strength (0-1.0). If strength not specified, defaults to 0.75',
            default=None,
        )
        postprocessing_group.add_argument(
--- a/ldm/dream/generator/embiggen.py
+++ b/ldm/dream/generator/embiggen.py
@ -5,17 +5,41 @@ and generates with ldm.dream.generator.img2img

 import torch
 import numpy as  np
+from tqdm import trange
 from PIL               import Image
 from ldm.dream.generator.base      import Generator
-from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.dream.generator.img2img   import Img2Img
-
+from ldm.dream.devices import choose_autocast

 class Embiggen(Generator):
    def __init__(self, model, precision):
        super().__init__(model, precision)
        self.init_latent         = None

+    # Replace generate because Embiggen doesn't need/use most of what it does normallly
+    def generate(self,prompt,iterations=1,seed=None,
+                 image_callback=None, step_callback=None,
+                 **kwargs):
+        scope      = choose_autocast(self.precision)
+        make_image = self.get_make_image(
+            prompt,
+            step_callback = step_callback,
+            **kwargs
+        )
+        results             = []
+        seed                = seed if seed else self.new_seed()
+
+        # Noise will be generated by the Img2Img generator when called
+        with scope(self.model.device.type), self.model.ema_scope():
+            for n in trange(iterations, desc='Generating'):
+                # make_image will call Img2Img which will do the equivalent of get_noise itself
+                image = make_image()
+                results.append([image, seed])
+                if image_callback is not None:
+                    image_callback(image, seed)
+                seed = self.new_seed() 
+        return results
+
    @torch.no_grad()
    def get_make_image(
        self,
@ -151,8 +175,19 @@ class Embiggen(Generator):
                # Clamp values to max 255
                if distanceToLR > 255:
                    distanceToLR = 255
-                # Place the pixel as invert of distance
-                agradientC.putpixel((x, y), int(255 - distanceToLR))
+                #Place the pixel as invert of distance     
+                agradientC.putpixel((x, y), round(255 - distanceToLR))
+        
+        # Create alternative asymmetric diagonal corner to use on "tailing" intersections to prevent hard edges
+        # Fits for a left-fading gradient on the bottom side and full opacity on the right side.
+        agradientAsymC = Image.new('L', (256, 256))
+        for y in range(256):
+            for x in range(256):
+                value = round(max(0, x-(255-y)) * (255 / max(1,y)))
+                #Clamp values
+                value = max(0, value)
+                value = min(255, value)
+                agradientAsymC.putpixel((x, y), value)

        # Create alpha layers default fully white
        alphaLayerL = Image.new("L", (width, height), 255)
@ -163,8 +198,13 @@ class Embiggen(Generator):
        alphaLayerT.paste(agradientT, (0, 0))
        alphaLayerLTC.paste(agradientL, (0, 0))
        alphaLayerLTC.paste(agradientT, (0, 0))
-        alphaLayerLTC.paste(agradientC.resize(
-            (overlap_size_x, overlap_size_y)), (0, 0))
+        alphaLayerLTC.paste(agradientC.resize((overlap_size_x, overlap_size_y)), (0, 0))
+        # make masks with an asymmetric upper-right corner so when the curved transparent corner of the next tile
+        # to its right is placed it doesn't reveal a hard trailing semi-transparent edge in the overlapping space
+        alphaLayerTaC = alphaLayerT.copy()
+        alphaLayerTaC.paste(agradientAsymC.rotate(270).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0))
+        alphaLayerLTaC = alphaLayerLTC.copy()
+        alphaLayerLTaC.paste(agradientAsymC.rotate(270).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0))

        if embiggen_tiles:
            # Individual unconnected sides
@ -242,7 +282,7 @@ class Embiggen(Generator):
        del agradientT
        del agradientC

-        def make_image(x_T):
+        def make_image():
            # Make main tiles -------------------------------------------------
            if embiggen_tiles:
                print(f'>> Making {len(embiggen_tiles)} Embiggen tiles...')
@ -251,7 +291,20 @@ class Embiggen(Generator):
                    f'>> Making {(emb_tiles_x * emb_tiles_y)} Embiggen tiles ({emb_tiles_x}x{emb_tiles_y})...')

            emb_tile_store = []
+            # Although we could use the same seed for every tile for determinism, at higher strengths this may
+            # produce duplicated structures for each tile and make the tiling effect more obvious
+            # instead track and iterate a local seed we pass to Img2Img
+            seed = self.seed
+            seedintlimit = np.iinfo(np.uint32).max - 1 # only retreive this one from numpy
+
            for tile in range(emb_tiles_x * emb_tiles_y):
+                # Don't iterate on first tile
+                if tile != 0:
+                    if seed < seedintlimit:
+                        seed += 1
+                    else:
+                        seed = 0
+
                # Determine if this is a re-run and replace
                if embiggen_tiles and not tile in embiggen_tiles:
                    continue
@ -294,21 +347,20 @@ class Embiggen(Generator):

                tile_results = gen_img2img.generate(
                    prompt,
-                    iterations=1,
-                    seed=self.seed,
-                    sampler=sampler,
-                    steps=steps,
-                    cfg_scale=cfg_scale,
-                    conditioning=conditioning,
-                    ddim_eta=ddim_eta,
-                    image_callback=None,  # called only after the final image is generated
-                    step_callback=step_callback,   # called after each intermediate image is generated
-                    width=width,
-                    height=height,
-                    init_img=init_img,        # img2img doesn't need this, but it might in the future
-                    init_image=newinitimage,    # notice that init_image is different from init_img
-                    mask_image=None,
-                    strength=strength,
+                    iterations     = 1,
+                    seed           = seed,
+                    sampler        = sampler,
+                    steps          = steps,
+                    cfg_scale      = cfg_scale,
+                    conditioning   = conditioning,
+                    ddim_eta       = ddim_eta,
+                    image_callback = None,  # called only after the final image is generated
+                    step_callback  = step_callback,   # called after each intermediate image is generated
+                    width          = width,
+                    height         = height,
+                    init_image     = newinitimage,    # notice that init_image is different from init_img
+                    mask_image     = None,
+                    strength       = strength,
                )

                emb_tile_store.append(tile_results[0][0])
@ -382,7 +434,7 @@ class Embiggen(Generator):
                            elif emb_row_i == emb_tiles_y - 1:
                                if emb_column_i == 0:
                                    if (tile+1) in embiggen_tiles: # Look-ahead right
-                                        intileimage.putalpha(alphaLayerT)
+                                        intileimage.putalpha(alphaLayerTaC)
                                    else:
                                        intileimage.putalpha(alphaLayerRTC)
                                elif emb_column_i == emb_tiles_x - 1:
@ -390,7 +442,7 @@ class Embiggen(Generator):
                                    intileimage.putalpha(alphaLayerLTC)
                                else:
                                    if (tile+1) in embiggen_tiles: # Look-ahead right
-                                        intileimage.putalpha(alphaLayerLTC)
+                                        intileimage.putalpha(alphaLayerLTaC)
                                    else:
                                        intileimage.putalpha(alphaLayerABB)
                            # vertical middle of image
@ -398,7 +450,7 @@ class Embiggen(Generator):
                                if emb_column_i == 0:
                                    if (tile+1) in embiggen_tiles: # Look-ahead right
                                        if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down
-                                            intileimage.putalpha(alphaLayerT)
+                                            intileimage.putalpha(alphaLayerTaC)
                                        else:
                                            intileimage.putalpha(alphaLayerTB)
                                    elif (tile+emb_tiles_x) in embiggen_tiles:  # Look-ahead down only
@ -413,7 +465,7 @@ class Embiggen(Generator):
                                else:
                                    if (tile+1) in embiggen_tiles: # Look-ahead right
                                        if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down
-                                            intileimage.putalpha(alphaLayerLTC)
+                                            intileimage.putalpha(alphaLayerLTaC)
                                        else:
                                            intileimage.putalpha(alphaLayerABR)
                                    elif (tile+emb_tiles_x) in embiggen_tiles:  # Look-ahead down only
@ -425,9 +477,15 @@ class Embiggen(Generator):
                            if emb_row_i == 0 and emb_column_i >= 1:
                                intileimage.putalpha(alphaLayerL)
                            elif emb_row_i >= 1 and emb_column_i == 0:
+                                if emb_column_i + 1 == emb_tiles_x: # If we don't have anything that can be placed to the right
                                    intileimage.putalpha(alphaLayerT)
                                else:
+                                    intileimage.putalpha(alphaLayerTaC)
+                            else:
+                                if emb_column_i + 1 == emb_tiles_x: # If we don't have anything that can be placed to the right
                                    intileimage.putalpha(alphaLayerLTC)
+                                else:
+                                    intileimage.putalpha(alphaLayerLTaC)
                    # Layer tile onto final image
                    outputsuperimage.alpha_composite(intileimage, (left, top))
            else:
--- a/ldm/dream/restoration/realesrgan.py
+++ b/ldm/dream/restoration/realesrgan.py
@ -14,45 +14,22 @@ class ESRGAN():
        else:
            use_half_precision = True

-    def load_esrgan_bg_upsampler(self, upsampler_scale):
+    def load_esrgan_bg_upsampler(self):
        if not torch.cuda.is_available():  # CPU or MPS on M1
            use_half_precision = False
        else:
            use_half_precision = True

-        model_path = {
-            2: 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
-            4: 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth',
-        }
-
-        if upsampler_scale not in model_path:
-            return None
-        else:
-            from basicsr.archs.rrdbnet_arch import RRDBNet
+        from realesrgan.archs.srvgg_arch import SRVGGNetCompact
        from realesrgan import RealESRGANer

-            if upsampler_scale == 4:
-                model = RRDBNet(
-                    num_in_ch=3,
-                    num_out_ch=3,
-                    num_feat=64,
-                    num_block=23,
-                    num_grow_ch=32,
-                    scale=4,
-                )
-            if upsampler_scale == 2:
-                model = RRDBNet(
-                    num_in_ch=3,
-                    num_out_ch=3,
-                    num_feat=64,
-                    num_block=23,
-                    num_grow_ch=32,
-                    scale=2,
-                )
+        model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+        model_path = 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
+        scale = 4

        bg_upsampler = RealESRGANer(
-                scale=upsampler_scale,
-                model_path=model_path[upsampler_scale],
+            scale=scale,
+            model_path=model_path,
            model=model,
            tile=self.bg_tile_size,
            tile_pad=10,
@ -63,24 +40,27 @@ class ESRGAN():
        return bg_upsampler

    def process(self, image, strength: float, seed: str = None, upsampler_scale: int = 2):
-        if seed is not None:
-            print(
-                f'>> Real-ESRGAN Upscaling seed:{seed} : scale:{upsampler_scale}x'
-            )
-
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)
            warnings.filterwarnings('ignore', category=UserWarning)

            try:
-                upsampler = self.load_esrgan_bg_upsampler(upsampler_scale)
+                upsampler = self.load_esrgan_bg_upsampler()
            except Exception:
                import traceback
                import sys
-
                print('>> Error loading Real-ESRGAN:', file=sys.stderr)
                print(traceback.format_exc(), file=sys.stderr)

+        if upsampler_scale == 0:
+            print('>> Real-ESRGAN: Invalid scaling option. Image not upscaled.')
+            return image
+
+        if seed is not None:
+            print(
+                f'>> Real-ESRGAN Upscaling seed:{seed} : scale:{upsampler_scale}x'
+            )
+        
        output, _ = upsampler.enhance(
            np.array(image, dtype=np.uint8),
            outscale=upsampler_scale,
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -497,11 +497,8 @@ class Generate:
        prompt = None
        try:
            args = metadata_from_png(image_path)
-            if len(args) > 1:
-                print("* Can't postprocess a grid")
-                return
-            seed   = args[0].seed
-            prompt = args[0].prompt
+            seed   = args.seed
+            prompt = args.prompt
            print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')
        except:
            m    = re.search('(\d+)\.png$',image_path)
@ -724,14 +721,6 @@ class Generate:
        for r in image_list:
            image, seed = r
            try:
-                if upscale is not None:
-                    if self.esrgan is not None:
-                        if len(upscale) < 2:
-                            upscale.append(0.75)
-                        image = self.esrgan.process(
-                            image, upscale[1], seed, int(upscale[0]))
-                    else:
-                        print(">> ESRGAN is disabled. Image not upscaled.")
                if strength > 0:
                    if self.gfpgan is not None or self.codeformer is not None:
                        if facetool == 'gfpgan':
@ -747,6 +736,14 @@ class Generate:
                                image = self.codeformer.process(image=image, strength=strength, device=cf_device, seed=seed, fidelity=codeformer_fidelity)
                    else:
                        print(">> Face Restoration is disabled.")
+                if upscale is not None:
+                    if self.esrgan is not None:
+                        if len(upscale) < 2:
+                            upscale.append(0.75)
+                        image = self.esrgan.process(
+                            image, upscale[1], seed, int(upscale[0]))
+                    else:
+                        print(">> ESRGAN is disabled. Image not upscaled.")
            except Exception as e:
                print(
                    f'>> Error running RealESRGAN or GFPGAN. Your image was not upscaled.\n{e}'
--- a/scripts/preload_models.py
+++ b/scripts/preload_models.py
@ -49,33 +49,13 @@ except ModuleNotFoundError:
 if gfpgan:
    print('Loading models from RealESRGAN and facexlib')
    try:
-        from basicsr.archs.rrdbnet_arch import RRDBNet
+        from realesrgan.archs.srvgg_arch import SRVGGNetCompact
        from facexlib.utils.face_restoration_helper import FaceRestoreHelper

-        RealESRGANer(
-            scale=2,
-            model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
-            model=RRDBNet(
-                num_in_ch=3,
-                num_out_ch=3,
-                num_feat=64,
-                num_block=23,
-                num_grow_ch=32,
-                scale=2,
-            ),
-        )
-
        RealESRGANer(
            scale=4,
-            model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth',
-            model=RRDBNet(
-                num_in_ch=3,
-                num_out_ch=3,
-                num_feat=64,
-                num_block=23,
-                num_grow_ch=32,
-                scale=4,
-            ),
+            model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth',
+            model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
        )

        FaceRestoreHelper(1, det_model='retinaface_resnet50')