all vestiges of ldm.invoke removed

2024-08-30 20:32:17 +00:00 · 2023-03-03 01:02:00 -05:00
parent 6a990565ff
commit 60a98cacef
126 changed files with 8514 additions and 6520 deletions
--- a/invokeai/backend/image_util/init.py
+++ b/invokeai/backend/image_util/init.py
@ -1,15 +1,12 @@
-'''
+"""
 Initialization file for invokeai.backend.image_util methods.
-'''
+"""
 from .patchmatch import PatchMatch
+from .pngwriter import PngWriter, PromptFormatter, retrieve_metadata, write_metadata
+from .seamless import configure_model_padding
 from .txt2mask import Txt2Mask
 from .util import InitImageResizer, make_grid
-from .pngwriter import (PngWriter,
-                        PromptFormatter,
-                        retrieve_metadata,
-                        write_metadata,
-                        )
-from .seamless import configure_model_padding
+

 def debug_image(
    debug_image, debug_text, debug_show=True, debug_result=False, debug_status=False
@ -25,5 +22,3 @@ def debug_image(

    if debug_result:
        return image_copy
-
-
--- a/invokeai/backend/image_util/patchmatch.py
+++ b/invokeai/backend/image_util/patchmatch.py
@ -1,20 +1,22 @@
-'''
+"""
 This module defines a singleton object, "patchmatch" that
 wraps the actual patchmatch object. It respects the global
 "try_patchmatch" attribute, so that patchmatch loading can
 be suppressed or deferred
-'''
+"""
+import numpy as np
+
 from invokeai.backend.globals import Globals
-import numpy as  np
+

 class PatchMatch:
-    '''
+    """
    Thin class wrapper around the patchmatch function.
-    '''
+    """

    patch_match = None
-    tried_load:bool = False
-    
+    tried_load: bool = False
+
    def __init__(self):
        super().__init__()

@ -24,21 +26,22 @@ class PatchMatch:
            return
        if Globals.try_patchmatch:
            from patchmatch import patch_match as pm
+
            if pm.patchmatch_available:
-                print('>> Patchmatch initialized')
+                print(">> Patchmatch initialized")
            else:
-                print('>> Patchmatch not loaded (nonfatal)')
+                print(">> Patchmatch not loaded (nonfatal)")
            self.patch_match = pm
        else:
-            print('>> Patchmatch loading disabled')
+            print(">> Patchmatch loading disabled")
        self.tried_load = True

    @classmethod
-    def patchmatch_available(self)->bool:
+    def patchmatch_available(self) -> bool:
        self._load_patch_match()
        return self.patch_match and self.patch_match.patchmatch_available

    @classmethod
-    def inpaint(self,*args,**kwargs)->np.ndarray:
+    def inpaint(self, *args, **kwargs) -> np.ndarray:
        if self.patchmatch_available():
-            return self.patch_match.inpaint(*args,**kwargs)
+            return self.patch_match.inpaint(*args, **kwargs)
--- a/invokeai/backend/image_util/pngwriter.py
+++ b/invokeai/backend/image_util/pngwriter.py
@ -6,10 +6,11 @@ PngWriter -- Converts Images generated by T2I into PNGs, finds

 Exports function retrieve_metadata(path)
 """
+import json
 import os
 import re
-import json
-from PIL import PngImagePlugin, Image
+
+from PIL import Image, PngImagePlugin

 # -------------------image generation utils-----

@ -25,52 +26,57 @@ class PngWriter:
        dirlist = sorted(os.listdir(self.outdir), reverse=True)
        # find the first filename that matches our pattern or return 000000.0.png
        existing_name = next(
-            (f for f in dirlist if re.match('^(\d+)\..*\.png', f)),
-            '0000000.0.png',
+            (f for f in dirlist if re.match("^(\d+)\..*\.png", f)),
+            "0000000.0.png",
        )
-        basecount = int(existing_name.split('.', 1)[0]) + 1
-        return f'{basecount:06}'
+        basecount = int(existing_name.split(".", 1)[0]) + 1
+        return f"{basecount:06}"

    # saves image named _image_ to outdir/name, writing metadata from prompt
    # returns full path of output
-    def save_image_and_prompt_to_png(self, image, dream_prompt, name, metadata=None, compress_level=6):
+    def save_image_and_prompt_to_png(
+        self, image, dream_prompt, name, metadata=None, compress_level=6
+    ):
        path = os.path.join(self.outdir, name)
        info = PngImagePlugin.PngInfo()
-        info.add_text('Dream', dream_prompt)
+        info.add_text("Dream", dream_prompt)
        if metadata:
-            info.add_text('sd-metadata', json.dumps(metadata))
-        image.save(path, 'PNG', pnginfo=info, compress_level=compress_level)
+            info.add_text("sd-metadata", json.dumps(metadata))
+        image.save(path, "PNG", pnginfo=info, compress_level=compress_level)
        return path

-    def retrieve_metadata(self,img_basename):
-        '''
+    def retrieve_metadata(self, img_basename):
+        """
        Given a PNG filename stored in outdir, returns the "sd-metadata"
        metadata stored there, as a dict
-        '''
-        path = os.path.join(self.outdir,img_basename)
+        """
+        path = os.path.join(self.outdir, img_basename)
        all_metadata = retrieve_metadata(path)
-        return all_metadata['sd-metadata']
+        return all_metadata["sd-metadata"]
+

 def retrieve_metadata(img_path):
-    '''
+    """
    Given a path to a PNG image, returns the "sd-metadata"
    metadata stored there, as a dict
-    '''
+    """
    im = Image.open(img_path)
-    if hasattr(im, 'text'):
-        md = im.text.get('sd-metadata', '{}')
-        dream_prompt = im.text.get('Dream', '')
+    if hasattr(im, "text"):
+        md = im.text.get("sd-metadata", "{}")
+        dream_prompt = im.text.get("Dream", "")
    else:
        # When trying to retrieve metadata from images without a 'text' payload, such as JPG images.
-        md = '{}'
-        dream_prompt = ''
-    return {'sd-metadata': json.loads(md), 'Dream': dream_prompt}
+        md = "{}"
+        dream_prompt = ""
+    return {"sd-metadata": json.loads(md), "Dream": dream_prompt}

-def write_metadata(img_path:str, meta:dict):
+
+def write_metadata(img_path: str, meta: dict):
    im = Image.open(img_path)
    info = PngImagePlugin.PngInfo()
-    info.add_text('sd-metadata', json.dumps(meta))
-    im.save(img_path,'PNG',pnginfo=info)
+    info.add_text("sd-metadata", json.dumps(meta))
+    im.save(img_path, "PNG", pnginfo=info)
+

 class PromptFormatter:
    def __init__(self, t2i, opt):
@ -86,28 +92,30 @@ class PromptFormatter:

        switches = list()
        switches.append(f'"{opt.prompt}"')
-        switches.append(f'-s{opt.steps        or t2i.steps}')
-        switches.append(f'-W{opt.width        or t2i.width}')
-        switches.append(f'-H{opt.height       or t2i.height}')
-        switches.append(f'-C{opt.cfg_scale    or t2i.cfg_scale}')
-        switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
-# to do: put model name into the t2i object
-#        switches.append(f'--model{t2i.model_name}')
+        switches.append(f"-s{opt.steps        or t2i.steps}")
+        switches.append(f"-W{opt.width        or t2i.width}")
+        switches.append(f"-H{opt.height       or t2i.height}")
+        switches.append(f"-C{opt.cfg_scale    or t2i.cfg_scale}")
+        switches.append(f"-A{opt.sampler_name or t2i.sampler_name}")
+        # to do: put model name into the t2i object
+        #        switches.append(f'--model{t2i.model_name}')
        if opt.seamless or t2i.seamless:
-            switches.append(f'--seamless')
+            switches.append(f"--seamless")
        if opt.init_img:
-            switches.append(f'-I{opt.init_img}')
+            switches.append(f"-I{opt.init_img}")
        if opt.fit:
-            switches.append(f'--fit')
+            switches.append(f"--fit")
        if opt.strength and opt.init_img is not None:
-            switches.append(f'-f{opt.strength or t2i.strength}')
+            switches.append(f"-f{opt.strength or t2i.strength}")
        if opt.gfpgan_strength:
-            switches.append(f'-G{opt.gfpgan_strength}')
+            switches.append(f"-G{opt.gfpgan_strength}")
        if opt.upscale:
            switches.append(f'-U {" ".join([str(u) for u in opt.upscale])}')
        if opt.variation_amount > 0:
-            switches.append(f'-v{opt.variation_amount}')
+            switches.append(f"-v{opt.variation_amount}")
        if opt.with_variations:
-            formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in opt.with_variations)
-            switches.append(f'-V{formatted_variations}')
-        return ' '.join(switches)
+            formatted_variations = ",".join(
+                f"{seed}:{weight}" for seed, weight in opt.with_variations
+            )
+            switches.append(f"-V{formatted_variations}")
+        return " ".join(switches)
--- a/invokeai/backend/image_util/seamless.py
+++ b/invokeai/backend/image_util/seamless.py
@ -1,12 +1,26 @@
 import torch.nn as nn

+
 def _conv_forward_asymmetric(self, input, weight, bias):
    """
    Patch for Conv2d._conv_forward that supports asymmetric padding
    """
-    working = nn.functional.pad(input, self.asymmetric_padding['x'], mode=self.asymmetric_padding_mode['x'])
-    working = nn.functional.pad(working, self.asymmetric_padding['y'], mode=self.asymmetric_padding_mode['y'])
-    return nn.functional.conv2d(working, weight, bias, self.stride, nn.modules.utils._pair(0), self.dilation, self.groups)
+    working = nn.functional.pad(
+        input, self.asymmetric_padding["x"], mode=self.asymmetric_padding_mode["x"]
+    )
+    working = nn.functional.pad(
+        working, self.asymmetric_padding["y"], mode=self.asymmetric_padding_mode["y"]
+    )
+    return nn.functional.conv2d(
+        working,
+        weight,
+        bias,
+        self.stride,
+        nn.modules.utils._pair(0),
+        self.dilation,
+        self.groups,
+    )
+

 def configure_model_padding(model, seamless, seamless_axes):
    """
@ -18,14 +32,28 @@ def configure_model_padding(model, seamless, seamless_axes):
            if seamless:
                m.asymmetric_padding_mode = {}
                m.asymmetric_padding = {}
-                m.asymmetric_padding_mode['x'] = 'circular' if ('x' in seamless_axes) else 'constant'
-                m.asymmetric_padding['x'] = (m._reversed_padding_repeated_twice[0], m._reversed_padding_repeated_twice[1], 0, 0)
-                m.asymmetric_padding_mode['y'] = 'circular' if ('y' in seamless_axes) else 'constant'
-                m.asymmetric_padding['y'] = (0, 0, m._reversed_padding_repeated_twice[2], m._reversed_padding_repeated_twice[3])
+                m.asymmetric_padding_mode["x"] = (
+                    "circular" if ("x" in seamless_axes) else "constant"
+                )
+                m.asymmetric_padding["x"] = (
+                    m._reversed_padding_repeated_twice[0],
+                    m._reversed_padding_repeated_twice[1],
+                    0,
+                    0,
+                )
+                m.asymmetric_padding_mode["y"] = (
+                    "circular" if ("y" in seamless_axes) else "constant"
+                )
+                m.asymmetric_padding["y"] = (
+                    0,
+                    0,
+                    m._reversed_padding_repeated_twice[2],
+                    m._reversed_padding_repeated_twice[3],
+                )
                m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
            else:
                m._conv_forward = nn.Conv2d._conv_forward.__get__(m, nn.Conv2d)
-                if hasattr(m, 'asymmetric_padding_mode'):
+                if hasattr(m, "asymmetric_padding_mode"):
                    del m.asymmetric_padding_mode
-                if hasattr(m, 'asymmetric_padding'):
+                if hasattr(m, "asymmetric_padding"):
                    del m.asymmetric_padding
--- a/invokeai/backend/image_util/txt2mask.py
+++ b/invokeai/backend/image_util/txt2mask.py
@ -1,9 +1,9 @@
-'''Makes available the Txt2Mask class, which assists in the automatic
+"""Makes available the Txt2Mask class, which assists in the automatic
 assignment of masks via text prompt using clipseg.

 Here is typical usage:

-    from ldm.invoke.txt2mask import Txt2Mask, SegmentedGrayscale
+    from invokeai.backend.image_util.txt2mask import Txt2Mask, SegmentedGrayscale
    from PIL import Image

    txt2mask = Txt2Mask(self.device)
@ -25,31 +25,39 @@ the mask that exceed the indicated confidence threshold. Values range
 from 0.0 to 1.0. The higher the threshold, the more confident the
 algorithm is. In limited testing, I have found that values around 0.5
 work fine.
-'''
+"""

+import numpy as np
 import torch
-import numpy as  np
-from transformers import AutoProcessor, CLIPSegForImageSegmentation
 from PIL import Image, ImageOps
 from torchvision import transforms
+from transformers import AutoProcessor, CLIPSegForImageSegmentation
+
 from invokeai.backend.globals import global_cache_dir

-CLIPSEG_MODEL = 'CIDAS/clipseg-rd64-refined'
+CLIPSEG_MODEL = "CIDAS/clipseg-rd64-refined"
 CLIPSEG_SIZE = 352

+
 class SegmentedGrayscale(object):
-    def __init__(self, image:Image, heatmap:torch.Tensor):
+    def __init__(self, image: Image, heatmap: torch.Tensor):
        self.heatmap = heatmap
        self.image = image

-    def to_grayscale(self,invert:bool=False)->Image:
-        return self._rescale(Image.fromarray(np.uint8(255 - self.heatmap * 255 if invert else self.heatmap * 255)))
+    def to_grayscale(self, invert: bool = False) -> Image:
+        return self._rescale(
+            Image.fromarray(
+                np.uint8(255 - self.heatmap * 255 if invert else self.heatmap * 255)
+            )
+        )

-    def to_mask(self,threshold:float=0.5)->Image:
+    def to_mask(self, threshold: float = 0.5) -> Image:
        discrete_heatmap = self.heatmap.lt(threshold).int()
-        return self._rescale(Image.fromarray(np.uint8(discrete_heatmap*255),mode='L'))
+        return self._rescale(
+            Image.fromarray(np.uint8(discrete_heatmap * 255), mode="L")
+        )

-    def to_transparent(self,invert:bool=False)->Image:
+    def to_transparent(self, invert: bool = False) -> Image:
        transparent_image = self.image.copy()
        # For img2img, we want the selected regions to be transparent,
        # but to_grayscale() returns the opposite. Thus invert.
@ -58,70 +66,77 @@ class SegmentedGrayscale(object):
        return transparent_image

    # unscales and uncrops the 352x352 heatmap so that it matches the image again
-    def _rescale(self, heatmap:Image)->Image:
-        size = self.image.width if (self.image.width > self.image.height) else self.image.height
-        resized_image = heatmap.resize(
-            (size,size),
-            resample=Image.Resampling.LANCZOS
+    def _rescale(self, heatmap: Image) -> Image:
+        size = (
+            self.image.width
+            if (self.image.width > self.image.height)
+            else self.image.height
        )
-        return resized_image.crop((0,0,self.image.width,self.image.height))
+        resized_image = heatmap.resize((size, size), resample=Image.Resampling.LANCZOS)
+        return resized_image.crop((0, 0, self.image.width, self.image.height))
+

 class Txt2Mask(object):
-    '''
+    """
    Create new Txt2Mask object. The optional device argument can be one of
    'cuda', 'mps' or 'cpu'.
-    '''
-    def __init__(self,device='cpu',refined=False):
-        print('>> Initializing clipseg model for text to mask inference')
+    """
+
+    def __init__(self, device="cpu", refined=False):
+        print(">> Initializing clipseg model for text to mask inference")

        # BUG: we are not doing anything with the device option at this time
        self.device = device
-        self.processor = AutoProcessor.from_pretrained(CLIPSEG_MODEL,
-                                                       cache_dir=global_cache_dir('hub')
-                                                       )
-        self.model = CLIPSegForImageSegmentation.from_pretrained(CLIPSEG_MODEL,
-                                                                 cache_dir=global_cache_dir('hub')
-                                                                 )
+        self.processor = AutoProcessor.from_pretrained(
+            CLIPSEG_MODEL, cache_dir=global_cache_dir("hub")
+        )
+        self.model = CLIPSegForImageSegmentation.from_pretrained(
+            CLIPSEG_MODEL, cache_dir=global_cache_dir("hub")
+        )

    @torch.no_grad()
-    def segment(self, image, prompt:str) -> SegmentedGrayscale:
-        '''
+    def segment(self, image, prompt: str) -> SegmentedGrayscale:
+        """
        Given a prompt string such as "a bagel", tries to identify the object in the
        provided image and returns a SegmentedGrayscale object in which the brighter
        pixels indicate where the object is inferred to be.
-        '''
-        transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-            transforms.Resize((CLIPSEG_SIZE, CLIPSEG_SIZE)), # must be multiple of 64...
-        ])
+        """
+        transform = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                transforms.Normalize(
+                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+                ),
+                transforms.Resize(
+                    (CLIPSEG_SIZE, CLIPSEG_SIZE)
+                ),  # must be multiple of 64...
+            ]
+        )

        if type(image) is str:
-            image = Image.open(image).convert('RGB')
+            image = Image.open(image).convert("RGB")

        image = ImageOps.exif_transpose(image)
        img = self._scale_and_crop(image)

-        inputs = self.processor(text=[prompt],
-                                images=[img],
-                                padding=True,
-                                return_tensors='pt')
+        inputs = self.processor(
+            text=[prompt], images=[img], padding=True, return_tensors="pt"
+        )
        outputs = self.model(**inputs)
        heatmap = torch.sigmoid(outputs.logits)
        return SegmentedGrayscale(image, heatmap)

-    def _scale_and_crop(self, image:Image)->Image:
-        scaled_image = Image.new('RGB',(CLIPSEG_SIZE,CLIPSEG_SIZE))
-        if image.width > image.height: # width is constraint
+    def _scale_and_crop(self, image: Image) -> Image:
+        scaled_image = Image.new("RGB", (CLIPSEG_SIZE, CLIPSEG_SIZE))
+        if image.width > image.height:  # width is constraint
            scale = CLIPSEG_SIZE / image.width
        else:
            scale = CLIPSEG_SIZE / image.height
        scaled_image.paste(
            image.resize(
-                (int(scale * image.width),
-                 int(scale * image.height)
-                ),
-                resample=Image.Resampling.LANCZOS
-            ),box=(0,0)
+                (int(scale * image.width), int(scale * image.height)),
+                resample=Image.Resampling.LANCZOS,
+            ),
+            box=(0, 0),
        )
        return scaled_image
--- a/invokeai/backend/image_util/util.py
+++ b/invokeai/backend/image_util/util.py
@ -1,12 +1,15 @@
-from math import sqrt, floor, ceil
+from math import ceil, floor, sqrt
+
 from PIL import Image

-class InitImageResizer():
+
+class InitImageResizer:
    """Simple class to create resized copies of an Image while preserving the aspect ratio."""
-    def __init__(self,Image):
+
+    def __init__(self, Image):
        self.image = Image

-    def resize(self,width=None,height=None) -> Image:
+    def resize(self, width=None, height=None) -> Image:
        """
        Return a copy of the image resized to fit within
        a box width x height. The aspect ratio is
@ -18,37 +21,36 @@ class InitImageResizer():
        Everything is floored to the nearest multiple of 64 so
        that it can be passed to img2img()
        """
-        im    = self.image
+        im = self.image

-        ar = im.width/float(im.height)
+        ar = im.width / float(im.height)

        # Infer missing values from aspect ratio
-        if not(width or height): # both missing
-            width  = im.width
+        if not (width or height):  # both missing
+            width = im.width
            height = im.height
-        elif not height:           # height missing
-            height = int(width/ar)
-        elif not width:            # width missing
-            width  = int(height*ar)
+        elif not height:  # height missing
+            height = int(width / ar)
+        elif not width:  # width missing
+            width = int(height * ar)

-        w_scale = width/im.width
-        h_scale = height/im.height
-        scale = min(w_scale,h_scale)
-        (rw,rh) = (int(scale*im.width),int(scale*im.height))
+        w_scale = width / im.width
+        h_scale = height / im.height
+        scale = min(w_scale, h_scale)
+        (rw, rh) = (int(scale * im.width), int(scale * im.height))

-        #round everything to multiples of 64
-        width,height,rw,rh = map(
-            lambda x: x-x%64, (width,height,rw,rh)
-        )
+        # round everything to multiples of 64
+        width, height, rw, rh = map(lambda x: x - x % 64, (width, height, rw, rh))

        # no resize necessary, but return a copy
        if im.width == width and im.height == height:
            return im.copy()

        # otherwise resize the original image so that it fits inside the bounding box
-        resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS)
+        resized_image = self.image.resize((rw, rh), resample=Image.Resampling.LANCZOS)
        return resized_image

+
 def make_grid(image_list, rows=None, cols=None):
    image_cnt = len(image_list)
    if None in (rows, cols):
@ -57,7 +59,7 @@ def make_grid(image_list, rows=None, cols=None):
    width = image_list[0].width
    height = image_list[0].height

-    grid_img = Image.new('RGB', (width * cols, height * rows))
+    grid_img = Image.new("RGB", (width * cols, height * rows))
    i = 0
    for r in range(0, rows):
        for c in range(0, cols):
@ -67,4 +69,3 @@ def make_grid(image_list, rows=None, cols=None):
            i = i + 1

    return grid_img
-