mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
add auto-creation of mask for inpainting (#438)
* now use a single init image for both image and mask * turn on debugging for now to write out mask and image * add back -M option as a fallback
This commit is contained in:
parent
a69ca31f34
commit
7996a30e3a
127
README.md
127
README.md
@ -22,22 +22,24 @@ text-to-image generator. This fork supports:
|
|||||||
generating images in your browser.
|
generating images in your browser.
|
||||||
|
|
||||||
3. Support for img2img in which you provide a seed image to guide the
|
3. Support for img2img in which you provide a seed image to guide the
|
||||||
image creation. (inpainting & masking coming soon)
|
image creation
|
||||||
|
|
||||||
4. A notebook for running the code on Google Colab.
|
4. Preliminary inpainting support.
|
||||||
|
|
||||||
5. Upscaling and face fixing using the optional ESRGAN and GFPGAN
|
5. A notebook for running the code on Google Colab.
|
||||||
|
|
||||||
|
6. Upscaling and face fixing using the optional ESRGAN and GFPGAN
|
||||||
packages.
|
packages.
|
||||||
|
|
||||||
6. Weighted subprompts for prompt tuning.
|
7. Weighted subprompts for prompt tuning.
|
||||||
|
|
||||||
7. [Image variations](VARIATIONS.md) which allow you to systematically
|
8. [Image variations](VARIATIONS.md) which allow you to systematically
|
||||||
generate variations of an image you like and combine two or more
|
generate variations of an image you like and combine two or more
|
||||||
images together to combine the best features of both.
|
images together to combine the best features of both.
|
||||||
|
|
||||||
8. Textual inversion for customization of the prompt language and images.
|
9. Textual inversion for customization of the prompt language and images.
|
||||||
|
|
||||||
8. ...and more!
|
10. ...and more!
|
||||||
|
|
||||||
This fork is rapidly evolving, so use the Issues panel to report bugs
|
This fork is rapidly evolving, so use the Issues panel to report bugs
|
||||||
and make feature requests, and check back periodically for
|
and make feature requests, and check back periodically for
|
||||||
@ -75,9 +77,10 @@ log file of image names and prompts to the selected output directory.
|
|||||||
In addition, as of version 1.02, it also writes the prompt into the PNG
|
In addition, as of version 1.02, it also writes the prompt into the PNG
|
||||||
file's metadata where it can be retrieved using scripts/images2prompt.py
|
file's metadata where it can be retrieved using scripts/images2prompt.py
|
||||||
|
|
||||||
The script is confirmed to work on Linux and Windows systems. It should
|
The script is confirmed to work on Linux, Windows and Mac
|
||||||
work on MacOSX as well, but this is not confirmed. Note that this script
|
systems. Note that this script runs from the command-line or can be used
|
||||||
runs from the command-line (CMD or Terminal window), and does not have a GUI.
|
as a Web application. The Web GUI is currently rudimentary, but a much
|
||||||
|
better replacement is on its way.
|
||||||
|
|
||||||
```
|
```
|
||||||
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py
|
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py
|
||||||
@ -97,7 +100,7 @@ dream> "there's a fly in my soup" -n6 -g
|
|||||||
dream> q
|
dream> q
|
||||||
|
|
||||||
# this shows how to retrieve the prompt stored in the saved image's metadata
|
# this shows how to retrieve the prompt stored in the saved image's metadata
|
||||||
(ldm) ~/stable-diffusion$ python3 ./scripts/images2prompt.py outputs/img_samples/*.png
|
(ldm) ~/stable-diffusion$ python ./scripts/images2prompt.py outputs/img_samples/*.png
|
||||||
00009.png: "ashley judd riding a camel" -s150 -S 416354203
|
00009.png: "ashley judd riding a camel" -s150 -S 416354203
|
||||||
00010.png: "ashley judd riding a camel" -s150 -S 1362479620
|
00010.png: "ashley judd riding a camel" -s150 -S 1362479620
|
||||||
00011.png: "there's a fly in my soup" -n6 -g -S 2685670268
|
00011.png: "there's a fly in my soup" -n6 -g -S 2685670268
|
||||||
@ -118,29 +121,68 @@ The script itself also recognizes a series of command-line switches
|
|||||||
that will change important global defaults, such as the directory for
|
that will change important global defaults, such as the directory for
|
||||||
image outputs and the location of the model weight files.
|
image outputs and the location of the model weight files.
|
||||||
|
|
||||||
|
## Hardware Requirements
|
||||||
|
|
||||||
|
You will need one of:
|
||||||
|
|
||||||
|
1. An NVIDIA-based graphics card with 8 GB or more of VRAM memory*.
|
||||||
|
|
||||||
|
2. An Apple computer with an M1 chip.**
|
||||||
|
|
||||||
|
3. At least 12 GB of main memory RAM.
|
||||||
|
|
||||||
|
4. At least 6 GB of free disk space for the machine learning model,
|
||||||
|
python, and all its dependencies.
|
||||||
|
|
||||||
|
* If you are have a Nvidia 10xx series card (e.g. the 1080ti), please
|
||||||
|
run the dream script in full-precision mode as shown below.
|
||||||
|
|
||||||
|
** Similarly, specify full-precision mode on Apple M1 hardware.
|
||||||
|
|
||||||
|
To run in full-precision mode, start dream.py with the
|
||||||
|
--full_precision flag:
|
||||||
|
|
||||||
|
~~~~
|
||||||
|
(ldm) ~/stable-diffusion$ python scripts/dream.py --full_precision
|
||||||
|
~~~~
|
||||||
|
|
||||||
## Image-to-Image
|
## Image-to-Image
|
||||||
|
|
||||||
This script also provides an img2img feature that lets you seed your
|
This script also provides an img2img feature that lets you seed your
|
||||||
creations with a drawing or photo. This is a really cool feature that tells
|
creations with an initial drawing or photo. This is a really cool
|
||||||
stable diffusion to build the prompt on top of the image you provide, preserving
|
feature that tells stable diffusion to build the prompt on top of the
|
||||||
the original's basic shape and layout. To use it, provide the --init_img
|
image you provide, preserving the original's basic shape and
|
||||||
option as shown here:
|
layout. To use it, provide the --init_img option as shown here:
|
||||||
|
|
||||||
```
|
```
|
||||||
dream> "waterfall and rainbow" --init_img=./init-images/crude_drawing.png --strength=0.5 -s100 -n4
|
dream> "waterfall and rainbow" --init_img=./init-images/crude_drawing.png --strength=0.5 -s100 -n4
|
||||||
```
|
```
|
||||||
|
|
||||||
The --init_img (-I) option gives the path to the seed picture. --strength (-f) controls how much
|
The --init_img (-I) option gives the path to the seed
|
||||||
the original will be modified, ranging from 0.0 (keep the original intact), to 1.0 (ignore the original
|
picture. --strength (-f) controls how much the original will be
|
||||||
completely). The default is 0.75, and ranges from 0.25-0.75 give interesting results.
|
modified, ranging from 0.0 (keep the original intact), to 1.0 (ignore
|
||||||
|
the original completely). The default is 0.75, and ranges from
|
||||||
|
0.25-0.75 give interesting results.
|
||||||
|
|
||||||
You may also pass a -v<count> option to generate count variants on the original image. This is done by
|
You may also pass a -v<count> option to generate count variants on the
|
||||||
passing the first generated image back into img2img the requested number of times. It generates interesting
|
original image. This is done by passing the first generated image back
|
||||||
|
into img2img the requested number of times. It generates interesting
|
||||||
variants.
|
variants.
|
||||||
|
|
||||||
|
If the initial image contains transparent regions, then Stable
|
||||||
|
Diffusion will only draw within the transparent regions, a process
|
||||||
|
called "inpainting". However, for this to work correctly, the color
|
||||||
|
information underneath the transparent needs to be preserved, not
|
||||||
|
erased. See [Creating Transparent Images for
|
||||||
|
Inpainting](#creating-transparent-images-for-inpainting) for details.
|
||||||
|
|
||||||
## Seamless Tiling
|
## Seamless Tiling
|
||||||
|
|
||||||
The seamless tiling mode causes generated images to seamlessly tile with itself. To use it, add the --seamless option when starting the script which will result in all generated images to tile, or for each dream> prompt as shown here:
|
The seamless tiling mode causes generated images to seamlessly tile
|
||||||
|
with itself. To use it, add the --seamless option when starting the
|
||||||
|
script which will result in all generated images to tile, or for each
|
||||||
|
dream> prompt as shown here:
|
||||||
|
|
||||||
```
|
```
|
||||||
dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
|
dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
|
||||||
```
|
```
|
||||||
@ -774,6 +816,49 @@ of branch>
|
|||||||
You will need to go through the install procedure again, but it should
|
You will need to go through the install procedure again, but it should
|
||||||
be fast because all the dependencies are already loaded.
|
be fast because all the dependencies are already loaded.
|
||||||
|
|
||||||
|
# Creating Transparent Regions for Inpainting
|
||||||
|
|
||||||
|
Inpainting is really cool. To do it, you start with an initial image
|
||||||
|
and use a photoeditor to make one or more regions transparent
|
||||||
|
(i.e. they have a "hole" in them). You then provide the path to this
|
||||||
|
image at the dream> command line using the -I switch. Stable Diffusion
|
||||||
|
will only paint within the transparent region.
|
||||||
|
|
||||||
|
There's a catch. In the current implementation, you have to prepare
|
||||||
|
the initial image correctly so that the underlying colors are
|
||||||
|
preserved under the transparent area. Many imaging editing
|
||||||
|
applications will by default erase the color information under the
|
||||||
|
transparent pixels and replace them with white or black, which will
|
||||||
|
lead to suboptimal inpainting. You also must take care to export the
|
||||||
|
PNG file in such a way that the color information is preserved.
|
||||||
|
|
||||||
|
If your photoeditor is erasing the underlying color information,
|
||||||
|
dream.py will give you a big fat warning. If you can't find a way to
|
||||||
|
coax your photoeditor to retain color values under transparent areas,
|
||||||
|
then you can combine the -I and -M switches to provide both the
|
||||||
|
original unedited image and the masked (partially transparent) image:
|
||||||
|
|
||||||
|
~~~~
|
||||||
|
dream> man with cat on shoulder -I./images/man.png -M./images/man-transparent.png
|
||||||
|
~~~~
|
||||||
|
|
||||||
|
We are hoping to get rid of the need for this workaround in an
|
||||||
|
upcoming release.
|
||||||
|
|
||||||
|
## Recipe for GIMP
|
||||||
|
|
||||||
|
GIMP is a popular Linux photoediting tool.
|
||||||
|
|
||||||
|
1. Open image in GIMP.
|
||||||
|
2. Layer->Transparency->Add Alpha Channel
|
||||||
|
2. Use lasoo tool to select region to mask
|
||||||
|
3. Choose Select -> Float to create a floating selection
|
||||||
|
4. Open the Layers toolbar (^L) and select "Floating Selection"
|
||||||
|
5. Set opacity to 0%
|
||||||
|
6. Export as PNG
|
||||||
|
7. In the export dialogue, Make sure the "Save colour values from
|
||||||
|
transparent pixels" checkbox is selected.
|
||||||
|
|
||||||
# Contributing
|
# Contributing
|
||||||
|
|
||||||
Anyone who wishes to contribute to this project, whether
|
Anyone who wishes to contribute to this project, whether
|
||||||
|
@ -16,7 +16,7 @@ class Inpaint(Img2Img):
|
|||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
|
def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
|
||||||
conditioning,init_image,init_mask,strength,
|
conditioning,init_image,mask_image,strength,
|
||||||
step_callback=None,**kwargs):
|
step_callback=None,**kwargs):
|
||||||
"""
|
"""
|
||||||
Returns a function returning an image derived from the prompt and
|
Returns a function returning an image derived from the prompt and
|
||||||
@ -24,8 +24,8 @@ class Inpaint(Img2Img):
|
|||||||
the time you call it. kwargs are 'init_latent' and 'strength'
|
the time you call it. kwargs are 'init_latent' and 'strength'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
init_mask = init_mask[0][0].unsqueeze(0).repeat(4,1,1).unsqueeze(0)
|
mask_image = mask_image[0][0].unsqueeze(0).repeat(4,1,1).unsqueeze(0)
|
||||||
init_mask = repeat(init_mask, '1 ... -> b ...', b=1)
|
mask_image = repeat(mask_image, '1 ... -> b ...', b=1)
|
||||||
|
|
||||||
# PLMS sampler not supported yet, so ignore previous sampler
|
# PLMS sampler not supported yet, so ignore previous sampler
|
||||||
if not isinstance(sampler,DDIMSampler):
|
if not isinstance(sampler,DDIMSampler):
|
||||||
@ -66,7 +66,7 @@ class Inpaint(Img2Img):
|
|||||||
img_callback = step_callback,
|
img_callback = step_callback,
|
||||||
unconditional_guidance_scale = cfg_scale,
|
unconditional_guidance_scale = cfg_scale,
|
||||||
unconditional_conditioning = uc,
|
unconditional_conditioning = uc,
|
||||||
mask = init_mask,
|
mask = mask_image,
|
||||||
init_latent = self.init_latent
|
init_latent = self.init_latent
|
||||||
)
|
)
|
||||||
return self.sample_to_image(samples)
|
return self.sample_to_image(samples)
|
||||||
|
@ -61,14 +61,10 @@ class PromptFormatter:
|
|||||||
switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
|
switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
|
||||||
# to do: put model name into the t2i object
|
# to do: put model name into the t2i object
|
||||||
# switches.append(f'--model{t2i.model_name}')
|
# switches.append(f'--model{t2i.model_name}')
|
||||||
if opt.invert_mask:
|
|
||||||
switches.append(f'--invert_mask')
|
|
||||||
if opt.seamless or t2i.seamless:
|
if opt.seamless or t2i.seamless:
|
||||||
switches.append(f'--seamless')
|
switches.append(f'--seamless')
|
||||||
if opt.init_img:
|
if opt.init_img:
|
||||||
switches.append(f'-I{opt.init_img}')
|
switches.append(f'-I{opt.init_img}')
|
||||||
if opt.mask:
|
|
||||||
switches.append(f'-M{opt.mask}')
|
|
||||||
if opt.fit:
|
if opt.fit:
|
||||||
switches.append(f'--fit')
|
switches.append(f'--fit')
|
||||||
if opt.strength and opt.init_img is not None:
|
if opt.strength and opt.init_img is not None:
|
||||||
|
121
ldm/generate.py
121
ldm/generate.py
@ -193,10 +193,9 @@ class Generate:
|
|||||||
log_tokenization= False,
|
log_tokenization= False,
|
||||||
with_variations = None,
|
with_variations = None,
|
||||||
variation_amount = 0.0,
|
variation_amount = 0.0,
|
||||||
# these are specific to img2img
|
# these are specific to img2img and inpaint
|
||||||
init_img = None,
|
init_img = None,
|
||||||
mask = None,
|
init_mask = None,
|
||||||
invert_mask = False,
|
|
||||||
fit = False,
|
fit = False,
|
||||||
strength = None,
|
strength = None,
|
||||||
# these are specific to GFPGAN/ESRGAN
|
# these are specific to GFPGAN/ESRGAN
|
||||||
@ -217,8 +216,6 @@ class Generate:
|
|||||||
cfg_scale // how strongly the prompt influences the image (7.5) (must be >1)
|
cfg_scale // how strongly the prompt influences the image (7.5) (must be >1)
|
||||||
seamless // whether the generated image should tile
|
seamless // whether the generated image should tile
|
||||||
init_img // path to an initial image
|
init_img // path to an initial image
|
||||||
mask // path to an initial image mask for inpainting
|
|
||||||
invert_mask // paint over opaque areas, retain transparent areas
|
|
||||||
strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
|
strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
|
||||||
gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
|
gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
|
||||||
ddim_eta // image randomness (eta=0.0 means the same seed always produces the same image)
|
ddim_eta // image randomness (eta=0.0 means the same seed always produces the same image)
|
||||||
@ -293,7 +290,7 @@ class Generate:
|
|||||||
|
|
||||||
results = list()
|
results = list()
|
||||||
init_image = None
|
init_image = None
|
||||||
init_mask_image = None
|
mask_image = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
uc, c = get_uc_and_c(
|
uc, c = get_uc_and_c(
|
||||||
@ -302,19 +299,11 @@ class Generate:
|
|||||||
log_tokens=self.log_tokenization
|
log_tokens=self.log_tokenization
|
||||||
)
|
)
|
||||||
|
|
||||||
if mask and not init_img:
|
(init_image,mask_image) = self._make_images(init_img,init_mask, width, height, fit)
|
||||||
raise AssertionError('If mask path is provided, initial image path should be provided as well')
|
|
||||||
|
|
||||||
if mask and init_img:
|
if (init_image is not None) and (mask_image is not None):
|
||||||
init_image,size1 = self._load_img(init_img, width, height,fit=fit)
|
|
||||||
init_image.to(self.device)
|
|
||||||
init_mask_image,size2 = self._load_img_mask(mask, width, height,fit=fit, invert=invert_mask)
|
|
||||||
init_mask_image.to(self.device)
|
|
||||||
assert size1==size2,f"for inpainting, the initial image and its mask must be identical sizes, instead got {size1} vs {size2}"
|
|
||||||
generator = self._make_inpaint()
|
generator = self._make_inpaint()
|
||||||
elif init_img: # little bit of repeated code here, but makes logic clearer
|
elif init_image is not None:
|
||||||
init_image,_ = self._load_img(init_img, width, height, fit=fit)
|
|
||||||
init_image.to(self.device)
|
|
||||||
generator = self._make_img2img()
|
generator = self._make_img2img()
|
||||||
else:
|
else:
|
||||||
generator = self._make_txt2img()
|
generator = self._make_txt2img()
|
||||||
@ -334,8 +323,8 @@ class Generate:
|
|||||||
width = width,
|
width = width,
|
||||||
height = height,
|
height = height,
|
||||||
init_image = init_image, # notice that init_image is different from init_img
|
init_image = init_image, # notice that init_image is different from init_img
|
||||||
init_mask = init_mask_image,
|
mask_image = mask_image,
|
||||||
strength = strength
|
strength = strength,
|
||||||
)
|
)
|
||||||
|
|
||||||
if upscale is not None or gfpgan_strength > 0:
|
if upscale is not None or gfpgan_strength > 0:
|
||||||
@ -352,7 +341,7 @@ class Generate:
|
|||||||
)
|
)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
print(traceback.format_exc(), file=sys.stderr)
|
print(traceback.format_exc(), file=sys.stderr)
|
||||||
print('>> Are you sure your system has an adequate GPU?')
|
print('>> Could not generate image.')
|
||||||
|
|
||||||
toc = time.time()
|
toc = time.time()
|
||||||
print('>> Usage stats:')
|
print('>> Usage stats:')
|
||||||
@ -374,6 +363,31 @@ class Generate:
|
|||||||
)
|
)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def _make_images(self, img_path, mask_path, width, height, fit=False):
|
||||||
|
init_image = None
|
||||||
|
init_mask = None
|
||||||
|
if not img_path:
|
||||||
|
return None,None
|
||||||
|
|
||||||
|
image = self._load_img(img_path, width, height, fit=fit) # this returns an Image
|
||||||
|
init_image = self._create_init_image(image) # this returns a torch tensor
|
||||||
|
|
||||||
|
if self._has_transparency(image) and not mask_path: # if image has a transparent area and no mask was provided, then try to generate mask
|
||||||
|
print('>> Initial image has transparent areas. Will inpaint in these regions.')
|
||||||
|
if self._check_for_erasure(image):
|
||||||
|
print(
|
||||||
|
'>> WARNING: Colors underneath the transparent region seem to have been erased.\n',
|
||||||
|
'>> Inpainting will be suboptimal. Please preserve the colors when making\n',
|
||||||
|
'>> a transparency mask, or provide mask explicitly using --init_mask (-M).'
|
||||||
|
)
|
||||||
|
init_mask = self._create_init_mask(image) # this returns a torch tensor
|
||||||
|
|
||||||
|
if mask_path:
|
||||||
|
mask_image = self._load_img(mask_path, width, height, fit=fit) # this returns an Image
|
||||||
|
init_mask = self._create_init_mask(mask_image)
|
||||||
|
|
||||||
|
return init_image,init_mask
|
||||||
|
|
||||||
def _make_img2img(self):
|
def _make_img2img(self):
|
||||||
if not self.generators.get('img2img'):
|
if not self.generators.get('img2img'):
|
||||||
from ldm.dream.generator.img2img import Img2Img
|
from ldm.dream.generator.img2img import Img2Img
|
||||||
@ -545,8 +559,9 @@ class Generate:
|
|||||||
def _load_img(self, path, width, height, fit=False):
|
def _load_img(self, path, width, height, fit=False):
|
||||||
assert os.path.exists(path), f'>> {path}: File not found'
|
assert os.path.exists(path), f'>> {path}: File not found'
|
||||||
|
|
||||||
with Image.open(path) as img:
|
# with Image.open(path) as img:
|
||||||
image = img.convert('RGB')
|
# image = img.convert('RGBA')
|
||||||
|
image = Image.open(path)
|
||||||
print(
|
print(
|
||||||
f'>> loaded input image of size {image.width}x{image.height} from {path}'
|
f'>> loaded input image of size {image.width}x{image.height} from {path}'
|
||||||
)
|
)
|
||||||
@ -554,57 +569,47 @@ class Generate:
|
|||||||
image = self._fit_image(image,(width,height))
|
image = self._fit_image(image,(width,height))
|
||||||
else:
|
else:
|
||||||
image = self._squeeze_image(image)
|
image = self._squeeze_image(image)
|
||||||
|
return image
|
||||||
|
|
||||||
size = image.size
|
def _create_init_image(self,image):
|
||||||
|
image = image.convert('RGB')
|
||||||
|
# print(
|
||||||
|
# f'>> DEBUG: writing the image to img.png'
|
||||||
|
# )
|
||||||
|
# image.save('img.png')
|
||||||
image = np.array(image).astype(np.float32) / 255.0
|
image = np.array(image).astype(np.float32) / 255.0
|
||||||
image = image[None].transpose(0, 3, 1, 2)
|
image = image[None].transpose(0, 3, 1, 2)
|
||||||
image = torch.from_numpy(image)
|
image = torch.from_numpy(image)
|
||||||
image = 2.0 * image - 1.0
|
image = 2.0 * image - 1.0
|
||||||
return image.to(self.device),size
|
return image.to(self.device)
|
||||||
|
|
||||||
def _load_img_mask(self, path, width, height, fit=False, invert=False):
|
|
||||||
assert os.path.exists(path), f'>> {path}: File not found'
|
|
||||||
|
|
||||||
image = Image.open(path)
|
|
||||||
print(
|
|
||||||
f'>> loaded input mask of size {image.width}x{image.height} from {path}'
|
|
||||||
)
|
|
||||||
|
|
||||||
if fit:
|
|
||||||
image = self._fit_image(image,(width,height))
|
|
||||||
else:
|
|
||||||
image = self._squeeze_image(image)
|
|
||||||
|
|
||||||
|
def _create_init_mask(self, image):
|
||||||
# convert into a black/white mask
|
# convert into a black/white mask
|
||||||
image = self._mask_to_image(image,invert)
|
image = self._image_to_mask(image)
|
||||||
image = image.convert('RGB')
|
image = image.convert('RGB')
|
||||||
size = image.size
|
|
||||||
|
|
||||||
# not quite sure what's going on here. It is copied from basunjindal's implementation
|
|
||||||
# image = image.resize((64, 64), resample=Image.Resampling.LANCZOS)
|
|
||||||
# BUG: We need to use the model's downsample factor rather than hardcoding "8"
|
# BUG: We need to use the model's downsample factor rather than hardcoding "8"
|
||||||
from ldm.dream.generator.base import downsampling
|
from ldm.dream.generator.base import downsampling
|
||||||
image = image.resize((size[0]//downsampling, size[1]//downsampling), resample=Image.Resampling.LANCZOS)
|
image = image.resize((image.width//downsampling, image.height//downsampling), resample=Image.Resampling.LANCZOS)
|
||||||
|
# print(
|
||||||
|
# f'>> DEBUG: writing the mask to mask.png'
|
||||||
|
# )
|
||||||
|
# image.save('mask.png')
|
||||||
image = np.array(image)
|
image = np.array(image)
|
||||||
image = image.astype(np.float32) / 255.0
|
image = image.astype(np.float32) / 255.0
|
||||||
image = image[None].transpose(0, 3, 1, 2)
|
image = image[None].transpose(0, 3, 1, 2)
|
||||||
image = torch.from_numpy(image)
|
image = torch.from_numpy(image)
|
||||||
return image.to(self.device),size
|
return image.to(self.device)
|
||||||
|
|
||||||
# The mask is expected to have the region to be inpainted
|
# The mask is expected to have the region to be inpainted
|
||||||
# with alpha transparency. It converts it into a black/white
|
# with alpha transparency. It converts it into a black/white
|
||||||
# image with the transparent part black.
|
# image with the transparent part black.
|
||||||
def _mask_to_image(self, init_mask, invert=False) -> Image:
|
def _image_to_mask(self, mask_image, invert=False) -> Image:
|
||||||
if self._has_transparency(init_mask):
|
|
||||||
# Obtain the mask from the transparency channel
|
# Obtain the mask from the transparency channel
|
||||||
mask = Image.new(mode="L", size=init_mask.size, color=255)
|
mask = Image.new(mode="L", size=mask_image.size, color=255)
|
||||||
mask.putdata(init_mask.getdata(band=3))
|
mask.putdata(mask_image.getdata(band=3))
|
||||||
if invert:
|
if invert:
|
||||||
mask = ImageOps.invert(mask)
|
mask = ImageOps.invert(mask)
|
||||||
return mask
|
return mask
|
||||||
else:
|
|
||||||
print(f'>> No transparent pixels in this image. Will paint across entire image.')
|
|
||||||
return Image.new(mode="L", size=mask.size, color=0)
|
|
||||||
|
|
||||||
def _has_transparency(self,image):
|
def _has_transparency(self,image):
|
||||||
if image.info.get("transparency", None) is not None:
|
if image.info.get("transparency", None) is not None:
|
||||||
@ -620,6 +625,20 @@ class Generate:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _check_for_erasure(self,image):
|
||||||
|
width, height = image.size
|
||||||
|
pixdata = image.load()
|
||||||
|
colored = 0
|
||||||
|
for y in range(height):
|
||||||
|
for x in range(width):
|
||||||
|
if pixdata[x, y][3] == 0:
|
||||||
|
r, g, b, _ = pixdata[x, y]
|
||||||
|
if (r, g, b) != (0, 0, 0) and \
|
||||||
|
(r, g, b) != (255, 255, 255):
|
||||||
|
colored += 1
|
||||||
|
return colored == 0
|
||||||
|
|
||||||
def _squeeze_image(self,image):
|
def _squeeze_image(self,image):
|
||||||
x,y,resize_needed = self._resolution_check(image.width,image.height)
|
x,y,resize_needed = self._resolution_check(image.width,image.height)
|
||||||
if resize_needed:
|
if resize_needed:
|
||||||
|
@ -564,14 +564,9 @@ def create_cmd_parser():
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-M',
|
'-M',
|
||||||
'--mask',
|
'--init_mask',
|
||||||
type=str,
|
type=str,
|
||||||
help='Path to inpainting mask; transparent areas will be painted over',
|
help='Path to input mask for inpainting mode (supersedes width and height)',
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--invert_mask',
|
|
||||||
action='store_true',
|
|
||||||
help='Invert the inpainting mask; opaque areas will be painted over',
|
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-T',
|
'-T',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user