all vestiges of ldm.invoke removed

This commit is contained in:
Lincoln Stein
2023-03-03 01:02:00 -05:00
parent 6a990565ff
commit 60a98cacef
126 changed files with 8514 additions and 6520 deletions

View File

@ -1,15 +1,12 @@
'''
"""
Initialization file for invokeai.backend.image_util methods.
'''
"""
from .patchmatch import PatchMatch
from .pngwriter import PngWriter, PromptFormatter, retrieve_metadata, write_metadata
from .seamless import configure_model_padding
from .txt2mask import Txt2Mask
from .util import InitImageResizer, make_grid
from .pngwriter import (PngWriter,
PromptFormatter,
retrieve_metadata,
write_metadata,
)
from .seamless import configure_model_padding
def debug_image(
debug_image, debug_text, debug_show=True, debug_result=False, debug_status=False
@ -25,5 +22,3 @@ def debug_image(
if debug_result:
return image_copy

View File

@ -1,20 +1,22 @@
'''
"""
This module defines a singleton object, "patchmatch" that
wraps the actual patchmatch object. It respects the global
"try_patchmatch" attribute, so that patchmatch loading can
be suppressed or deferred
'''
"""
import numpy as np
from invokeai.backend.globals import Globals
import numpy as np
class PatchMatch:
'''
"""
Thin class wrapper around the patchmatch function.
'''
"""
patch_match = None
tried_load:bool = False
tried_load: bool = False
def __init__(self):
super().__init__()
@ -24,21 +26,22 @@ class PatchMatch:
return
if Globals.try_patchmatch:
from patchmatch import patch_match as pm
if pm.patchmatch_available:
print('>> Patchmatch initialized')
print(">> Patchmatch initialized")
else:
print('>> Patchmatch not loaded (nonfatal)')
print(">> Patchmatch not loaded (nonfatal)")
self.patch_match = pm
else:
print('>> Patchmatch loading disabled')
print(">> Patchmatch loading disabled")
self.tried_load = True
@classmethod
def patchmatch_available(self)->bool:
def patchmatch_available(self) -> bool:
self._load_patch_match()
return self.patch_match and self.patch_match.patchmatch_available
@classmethod
def inpaint(self,*args,**kwargs)->np.ndarray:
def inpaint(self, *args, **kwargs) -> np.ndarray:
if self.patchmatch_available():
return self.patch_match.inpaint(*args,**kwargs)
return self.patch_match.inpaint(*args, **kwargs)

View File

@ -6,10 +6,11 @@ PngWriter -- Converts Images generated by T2I into PNGs, finds
Exports function retrieve_metadata(path)
"""
import json
import os
import re
import json
from PIL import PngImagePlugin, Image
from PIL import Image, PngImagePlugin
# -------------------image generation utils-----
@ -25,52 +26,57 @@ class PngWriter:
dirlist = sorted(os.listdir(self.outdir), reverse=True)
# find the first filename that matches our pattern or return 000000.0.png
existing_name = next(
(f for f in dirlist if re.match('^(\d+)\..*\.png', f)),
'0000000.0.png',
(f for f in dirlist if re.match("^(\d+)\..*\.png", f)),
"0000000.0.png",
)
basecount = int(existing_name.split('.', 1)[0]) + 1
return f'{basecount:06}'
basecount = int(existing_name.split(".", 1)[0]) + 1
return f"{basecount:06}"
# saves image named _image_ to outdir/name, writing metadata from prompt
# returns full path of output
def save_image_and_prompt_to_png(self, image, dream_prompt, name, metadata=None, compress_level=6):
def save_image_and_prompt_to_png(
self, image, dream_prompt, name, metadata=None, compress_level=6
):
path = os.path.join(self.outdir, name)
info = PngImagePlugin.PngInfo()
info.add_text('Dream', dream_prompt)
info.add_text("Dream", dream_prompt)
if metadata:
info.add_text('sd-metadata', json.dumps(metadata))
image.save(path, 'PNG', pnginfo=info, compress_level=compress_level)
info.add_text("sd-metadata", json.dumps(metadata))
image.save(path, "PNG", pnginfo=info, compress_level=compress_level)
return path
def retrieve_metadata(self,img_basename):
'''
def retrieve_metadata(self, img_basename):
"""
Given a PNG filename stored in outdir, returns the "sd-metadata"
metadata stored there, as a dict
'''
path = os.path.join(self.outdir,img_basename)
"""
path = os.path.join(self.outdir, img_basename)
all_metadata = retrieve_metadata(path)
return all_metadata['sd-metadata']
return all_metadata["sd-metadata"]
def retrieve_metadata(img_path):
'''
"""
Given a path to a PNG image, returns the "sd-metadata"
metadata stored there, as a dict
'''
"""
im = Image.open(img_path)
if hasattr(im, 'text'):
md = im.text.get('sd-metadata', '{}')
dream_prompt = im.text.get('Dream', '')
if hasattr(im, "text"):
md = im.text.get("sd-metadata", "{}")
dream_prompt = im.text.get("Dream", "")
else:
# When trying to retrieve metadata from images without a 'text' payload, such as JPG images.
md = '{}'
dream_prompt = ''
return {'sd-metadata': json.loads(md), 'Dream': dream_prompt}
md = "{}"
dream_prompt = ""
return {"sd-metadata": json.loads(md), "Dream": dream_prompt}
def write_metadata(img_path:str, meta:dict):
def write_metadata(img_path: str, meta: dict):
im = Image.open(img_path)
info = PngImagePlugin.PngInfo()
info.add_text('sd-metadata', json.dumps(meta))
im.save(img_path,'PNG',pnginfo=info)
info.add_text("sd-metadata", json.dumps(meta))
im.save(img_path, "PNG", pnginfo=info)
class PromptFormatter:
def __init__(self, t2i, opt):
@ -86,28 +92,30 @@ class PromptFormatter:
switches = list()
switches.append(f'"{opt.prompt}"')
switches.append(f'-s{opt.steps or t2i.steps}')
switches.append(f'-W{opt.width or t2i.width}')
switches.append(f'-H{opt.height or t2i.height}')
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')
switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
# to do: put model name into the t2i object
# switches.append(f'--model{t2i.model_name}')
switches.append(f"-s{opt.steps or t2i.steps}")
switches.append(f"-W{opt.width or t2i.width}")
switches.append(f"-H{opt.height or t2i.height}")
switches.append(f"-C{opt.cfg_scale or t2i.cfg_scale}")
switches.append(f"-A{opt.sampler_name or t2i.sampler_name}")
# to do: put model name into the t2i object
# switches.append(f'--model{t2i.model_name}')
if opt.seamless or t2i.seamless:
switches.append(f'--seamless')
switches.append(f"--seamless")
if opt.init_img:
switches.append(f'-I{opt.init_img}')
switches.append(f"-I{opt.init_img}")
if opt.fit:
switches.append(f'--fit')
switches.append(f"--fit")
if opt.strength and opt.init_img is not None:
switches.append(f'-f{opt.strength or t2i.strength}')
switches.append(f"-f{opt.strength or t2i.strength}")
if opt.gfpgan_strength:
switches.append(f'-G{opt.gfpgan_strength}')
switches.append(f"-G{opt.gfpgan_strength}")
if opt.upscale:
switches.append(f'-U {" ".join([str(u) for u in opt.upscale])}')
if opt.variation_amount > 0:
switches.append(f'-v{opt.variation_amount}')
switches.append(f"-v{opt.variation_amount}")
if opt.with_variations:
formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in opt.with_variations)
switches.append(f'-V{formatted_variations}')
return ' '.join(switches)
formatted_variations = ",".join(
f"{seed}:{weight}" for seed, weight in opt.with_variations
)
switches.append(f"-V{formatted_variations}")
return " ".join(switches)

View File

@ -1,12 +1,26 @@
import torch.nn as nn
def _conv_forward_asymmetric(self, input, weight, bias):
"""
Patch for Conv2d._conv_forward that supports asymmetric padding
"""
working = nn.functional.pad(input, self.asymmetric_padding['x'], mode=self.asymmetric_padding_mode['x'])
working = nn.functional.pad(working, self.asymmetric_padding['y'], mode=self.asymmetric_padding_mode['y'])
return nn.functional.conv2d(working, weight, bias, self.stride, nn.modules.utils._pair(0), self.dilation, self.groups)
working = nn.functional.pad(
input, self.asymmetric_padding["x"], mode=self.asymmetric_padding_mode["x"]
)
working = nn.functional.pad(
working, self.asymmetric_padding["y"], mode=self.asymmetric_padding_mode["y"]
)
return nn.functional.conv2d(
working,
weight,
bias,
self.stride,
nn.modules.utils._pair(0),
self.dilation,
self.groups,
)
def configure_model_padding(model, seamless, seamless_axes):
"""
@ -18,14 +32,28 @@ def configure_model_padding(model, seamless, seamless_axes):
if seamless:
m.asymmetric_padding_mode = {}
m.asymmetric_padding = {}
m.asymmetric_padding_mode['x'] = 'circular' if ('x' in seamless_axes) else 'constant'
m.asymmetric_padding['x'] = (m._reversed_padding_repeated_twice[0], m._reversed_padding_repeated_twice[1], 0, 0)
m.asymmetric_padding_mode['y'] = 'circular' if ('y' in seamless_axes) else 'constant'
m.asymmetric_padding['y'] = (0, 0, m._reversed_padding_repeated_twice[2], m._reversed_padding_repeated_twice[3])
m.asymmetric_padding_mode["x"] = (
"circular" if ("x" in seamless_axes) else "constant"
)
m.asymmetric_padding["x"] = (
m._reversed_padding_repeated_twice[0],
m._reversed_padding_repeated_twice[1],
0,
0,
)
m.asymmetric_padding_mode["y"] = (
"circular" if ("y" in seamless_axes) else "constant"
)
m.asymmetric_padding["y"] = (
0,
0,
m._reversed_padding_repeated_twice[2],
m._reversed_padding_repeated_twice[3],
)
m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
else:
m._conv_forward = nn.Conv2d._conv_forward.__get__(m, nn.Conv2d)
if hasattr(m, 'asymmetric_padding_mode'):
if hasattr(m, "asymmetric_padding_mode"):
del m.asymmetric_padding_mode
if hasattr(m, 'asymmetric_padding'):
if hasattr(m, "asymmetric_padding"):
del m.asymmetric_padding

View File

@ -1,9 +1,9 @@
'''Makes available the Txt2Mask class, which assists in the automatic
"""Makes available the Txt2Mask class, which assists in the automatic
assignment of masks via text prompt using clipseg.
Here is typical usage:
from ldm.invoke.txt2mask import Txt2Mask, SegmentedGrayscale
from invokeai.backend.image_util.txt2mask import Txt2Mask, SegmentedGrayscale
from PIL import Image
txt2mask = Txt2Mask(self.device)
@ -25,31 +25,39 @@ the mask that exceed the indicated confidence threshold. Values range
from 0.0 to 1.0. The higher the threshold, the more confident the
algorithm is. In limited testing, I have found that values around 0.5
work fine.
'''
"""
import numpy as np
import torch
import numpy as np
from transformers import AutoProcessor, CLIPSegForImageSegmentation
from PIL import Image, ImageOps
from torchvision import transforms
from transformers import AutoProcessor, CLIPSegForImageSegmentation
from invokeai.backend.globals import global_cache_dir
CLIPSEG_MODEL = 'CIDAS/clipseg-rd64-refined'
CLIPSEG_MODEL = "CIDAS/clipseg-rd64-refined"
CLIPSEG_SIZE = 352
class SegmentedGrayscale(object):
def __init__(self, image:Image, heatmap:torch.Tensor):
def __init__(self, image: Image, heatmap: torch.Tensor):
self.heatmap = heatmap
self.image = image
def to_grayscale(self,invert:bool=False)->Image:
return self._rescale(Image.fromarray(np.uint8(255 - self.heatmap * 255 if invert else self.heatmap * 255)))
def to_grayscale(self, invert: bool = False) -> Image:
return self._rescale(
Image.fromarray(
np.uint8(255 - self.heatmap * 255 if invert else self.heatmap * 255)
)
)
def to_mask(self,threshold:float=0.5)->Image:
def to_mask(self, threshold: float = 0.5) -> Image:
discrete_heatmap = self.heatmap.lt(threshold).int()
return self._rescale(Image.fromarray(np.uint8(discrete_heatmap*255),mode='L'))
return self._rescale(
Image.fromarray(np.uint8(discrete_heatmap * 255), mode="L")
)
def to_transparent(self,invert:bool=False)->Image:
def to_transparent(self, invert: bool = False) -> Image:
transparent_image = self.image.copy()
# For img2img, we want the selected regions to be transparent,
# but to_grayscale() returns the opposite. Thus invert.
@ -58,70 +66,77 @@ class SegmentedGrayscale(object):
return transparent_image
# unscales and uncrops the 352x352 heatmap so that it matches the image again
def _rescale(self, heatmap:Image)->Image:
size = self.image.width if (self.image.width > self.image.height) else self.image.height
resized_image = heatmap.resize(
(size,size),
resample=Image.Resampling.LANCZOS
def _rescale(self, heatmap: Image) -> Image:
size = (
self.image.width
if (self.image.width > self.image.height)
else self.image.height
)
return resized_image.crop((0,0,self.image.width,self.image.height))
resized_image = heatmap.resize((size, size), resample=Image.Resampling.LANCZOS)
return resized_image.crop((0, 0, self.image.width, self.image.height))
class Txt2Mask(object):
'''
"""
Create new Txt2Mask object. The optional device argument can be one of
'cuda', 'mps' or 'cpu'.
'''
def __init__(self,device='cpu',refined=False):
print('>> Initializing clipseg model for text to mask inference')
"""
def __init__(self, device="cpu", refined=False):
print(">> Initializing clipseg model for text to mask inference")
# BUG: we are not doing anything with the device option at this time
self.device = device
self.processor = AutoProcessor.from_pretrained(CLIPSEG_MODEL,
cache_dir=global_cache_dir('hub')
)
self.model = CLIPSegForImageSegmentation.from_pretrained(CLIPSEG_MODEL,
cache_dir=global_cache_dir('hub')
)
self.processor = AutoProcessor.from_pretrained(
CLIPSEG_MODEL, cache_dir=global_cache_dir("hub")
)
self.model = CLIPSegForImageSegmentation.from_pretrained(
CLIPSEG_MODEL, cache_dir=global_cache_dir("hub")
)
@torch.no_grad()
def segment(self, image, prompt:str) -> SegmentedGrayscale:
'''
def segment(self, image, prompt: str) -> SegmentedGrayscale:
"""
Given a prompt string such as "a bagel", tries to identify the object in the
provided image and returns a SegmentedGrayscale object in which the brighter
pixels indicate where the object is inferred to be.
'''
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
transforms.Resize((CLIPSEG_SIZE, CLIPSEG_SIZE)), # must be multiple of 64...
])
"""
transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
transforms.Resize(
(CLIPSEG_SIZE, CLIPSEG_SIZE)
), # must be multiple of 64...
]
)
if type(image) is str:
image = Image.open(image).convert('RGB')
image = Image.open(image).convert("RGB")
image = ImageOps.exif_transpose(image)
img = self._scale_and_crop(image)
inputs = self.processor(text=[prompt],
images=[img],
padding=True,
return_tensors='pt')
inputs = self.processor(
text=[prompt], images=[img], padding=True, return_tensors="pt"
)
outputs = self.model(**inputs)
heatmap = torch.sigmoid(outputs.logits)
return SegmentedGrayscale(image, heatmap)
def _scale_and_crop(self, image:Image)->Image:
scaled_image = Image.new('RGB',(CLIPSEG_SIZE,CLIPSEG_SIZE))
if image.width > image.height: # width is constraint
def _scale_and_crop(self, image: Image) -> Image:
scaled_image = Image.new("RGB", (CLIPSEG_SIZE, CLIPSEG_SIZE))
if image.width > image.height: # width is constraint
scale = CLIPSEG_SIZE / image.width
else:
scale = CLIPSEG_SIZE / image.height
scaled_image.paste(
image.resize(
(int(scale * image.width),
int(scale * image.height)
),
resample=Image.Resampling.LANCZOS
),box=(0,0)
(int(scale * image.width), int(scale * image.height)),
resample=Image.Resampling.LANCZOS,
),
box=(0, 0),
)
return scaled_image

View File

@ -1,12 +1,15 @@
from math import sqrt, floor, ceil
from math import ceil, floor, sqrt
from PIL import Image
class InitImageResizer():
class InitImageResizer:
"""Simple class to create resized copies of an Image while preserving the aspect ratio."""
def __init__(self,Image):
def __init__(self, Image):
self.image = Image
def resize(self,width=None,height=None) -> Image:
def resize(self, width=None, height=None) -> Image:
"""
Return a copy of the image resized to fit within
a box width x height. The aspect ratio is
@ -18,37 +21,36 @@ class InitImageResizer():
Everything is floored to the nearest multiple of 64 so
that it can be passed to img2img()
"""
im = self.image
im = self.image
ar = im.width/float(im.height)
ar = im.width / float(im.height)
# Infer missing values from aspect ratio
if not(width or height): # both missing
width = im.width
if not (width or height): # both missing
width = im.width
height = im.height
elif not height: # height missing
height = int(width/ar)
elif not width: # width missing
width = int(height*ar)
elif not height: # height missing
height = int(width / ar)
elif not width: # width missing
width = int(height * ar)
w_scale = width/im.width
h_scale = height/im.height
scale = min(w_scale,h_scale)
(rw,rh) = (int(scale*im.width),int(scale*im.height))
w_scale = width / im.width
h_scale = height / im.height
scale = min(w_scale, h_scale)
(rw, rh) = (int(scale * im.width), int(scale * im.height))
#round everything to multiples of 64
width,height,rw,rh = map(
lambda x: x-x%64, (width,height,rw,rh)
)
# round everything to multiples of 64
width, height, rw, rh = map(lambda x: x - x % 64, (width, height, rw, rh))
# no resize necessary, but return a copy
if im.width == width and im.height == height:
return im.copy()
# otherwise resize the original image so that it fits inside the bounding box
resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS)
resized_image = self.image.resize((rw, rh), resample=Image.Resampling.LANCZOS)
return resized_image
def make_grid(image_list, rows=None, cols=None):
image_cnt = len(image_list)
if None in (rows, cols):
@ -57,7 +59,7 @@ def make_grid(image_list, rows=None, cols=None):
width = image_list[0].width
height = image_list[0].height
grid_img = Image.new('RGB', (width * cols, height * rows))
grid_img = Image.new("RGB", (width * cols, height * rows))
i = 0
for r in range(0, rows):
for c in range(0, cols):
@ -67,4 +69,3 @@ def make_grid(image_list, rows=None, cols=None):
i = i + 1
return grid_img