Merge branch 'main' into perf/lowmem_sequential_guidance

This commit is contained in:
Lincoln Stein 2023-02-20 07:42:35 -05:00 committed by GitHub
commit cfd897874b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 140 additions and 22 deletions

View File

@ -214,6 +214,8 @@ Here are the invoke> command that apply to txt2img:
| `--variation <float>` | `-v<float>` | `0.0` | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with `-S<seed>` and `-n<int>` to generate a series a riffs on a starting image. See [Variations](./VARIATIONS.md). |
| `--with_variations <pattern>` | | `None` | Combine two or more variations. See [Variations](./VARIATIONS.md) for now to use this. |
| `--save_intermediates <n>` | | `None` | Save the image from every nth step into an "intermediates" folder inside the output directory |
| `--h_symmetry_time_pct <float>` | | `None` | Create symmetry along the X axis at the desired percent complete of the generation process. (Must be between 0.0 and 1.0; set to a very small number like 0.0001 for just after the first step of generation.) |
| `--v_symmetry_time_pct <float>` | | `None` | Create symmetry along the Y axis at the desired percent complete of the generation process. (Must be between 0.0 and 1.0; set to a very small number like 0.0001 for just after the first step of generation.) |
!!! note

View File

@ -40,7 +40,7 @@ for adj in adjectives:
print(f'a {adj} day -A{samp} -C{cg}')
```
It's output looks like this (abbreviated):
Its output looks like this (abbreviated):
```bash
a sunny day -Aklms -C7.5

View File

@ -320,6 +320,8 @@ class Generate:
variation_amount=0.0,
threshold=0.0,
perlin=0.0,
h_symmetry_time_pct = None,
v_symmetry_time_pct = None,
karras_max=None,
outdir=None,
# these are specific to img2img and inpaint
@ -390,6 +392,8 @@ class Generate:
variation_amount // optional 0-1 value to slerp from -S noise to random noise (allows variations on an image)
threshold // optional value >=0 to add thresholding to latent values for k-diffusion samplers (0 disables)
perlin // optional 0-1 value to add a percentage of perlin noise to the initial noise
h_symmetry_time_pct // optional 0-1 value that indicates the time at which horizontal symmetry is applied
v_symmetry_time_pct // optional 0-1 value that indicates the time at which vertical symmetry is applied
embiggen // scale factor relative to the size of the --init_img (-I), followed by ESRGAN upscaling strength (0-1.0), followed by minimum amount of overlap between tiles as a decimal ratio (0 - 1.0) or number of pixels
embiggen_tiles // list of tiles by number in order to process and replace onto the image e.g. `0 2 4`
embiggen_strength // strength for embiggen. 0.0 preserves image exactly, 1.0 replaces it completely
@ -561,6 +565,8 @@ class Generate:
strength=strength,
threshold=threshold,
perlin=perlin,
h_symmetry_time_pct=h_symmetry_time_pct,
v_symmetry_time_pct=v_symmetry_time_pct,
embiggen=embiggen,
embiggen_tiles=embiggen_tiles,
embiggen_strength=embiggen_strength,

View File

@ -272,6 +272,10 @@ class Args(object):
switches.append('--seamless')
if a['hires_fix']:
switches.append('--hires_fix')
if a['h_symmetry_time_pct']:
switches.append(f'--h_symmetry_time_pct {a["h_symmetry_time_pct"]}')
if a['v_symmetry_time_pct']:
switches.append(f'--v_symmetry_time_pct {a["v_symmetry_time_pct"]}')
# img2img generations have parameters relevant only to them and have special handling
if a['init_img'] and len(a['init_img'])>0:
@ -852,6 +856,18 @@ class Args(object):
type=float,
help='Perlin noise scale (0.0 - 1.0) - add perlin noise to the initialization instead of the usual gaussian noise.',
)
render_group.add_argument(
'--h_symmetry_time_pct',
default=None,
type=float,
help='Horizontal symmetry point (0.0 - 1.0) - apply horizontal symmetry at this point in image generation.',
)
render_group.add_argument(
'--v_symmetry_time_pct',
default=None,
type=float,
help='Vertical symmetry point (0.0 - 1.0) - apply vertical symmetry at this point in image generation.',
)
render_group.add_argument(
'--fnformat',
default='{prefix}.{seed}.png',
@ -1158,7 +1174,8 @@ def metadata_dumps(opt,
# remove any image keys not mentioned in RFC #266
rfc266_img_fields = ['type','postprocessing','sampler','prompt','seed','variations','steps',
'cfg_scale','threshold','perlin','step_number','width','height','extra','strength','seamless'
'init_img','init_mask','facetool','facetool_strength','upscale']
'init_img','init_mask','facetool','facetool_strength','upscale','h_symmetry_time_pct',
'v_symmetry_time_pct']
rfc_dict ={}
for item in image_dict.items():

View File

@ -64,6 +64,7 @@ class Generator:
def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None,
image_callback=None, step_callback=None, threshold=0.0, perlin=0.0,
h_symmetry_time_pct=None, v_symmetry_time_pct=None,
safety_checker:dict=None,
free_gpu_mem: bool=False,
**kwargs):
@ -81,6 +82,8 @@ class Generator:
step_callback = step_callback,
threshold = threshold,
perlin = perlin,
h_symmetry_time_pct = h_symmetry_time_pct,
v_symmetry_time_pct = v_symmetry_time_pct,
attention_maps_callback = attention_maps_callback,
**kwargs
)

View File

@ -16,8 +16,8 @@ class Img2Img(Generator):
self.init_latent = None # by get_noise()
def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
conditioning,init_image,strength,step_callback=None,threshold=0.0,perlin=0.0,
attention_maps_callback=None,
conditioning,init_image,strength,step_callback=None,threshold=0.0,warmup=0.2,perlin=0.0,
h_symmetry_time_pct=None,v_symmetry_time_pct=None,attention_maps_callback=None,
**kwargs):
"""
Returns a function returning an image derived from the prompt and the initial image
@ -33,8 +33,13 @@ class Img2Img(Generator):
conditioning_data = (
ConditioningData(
uc, c, cfg_scale, extra_conditioning_info,
postprocessing_settings = PostprocessingSettings(threshold, warmup=0.2) if threshold else None)
.add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
postprocessing_settings=PostprocessingSettings(
threshold=threshold,
warmup=warmup,
h_symmetry_time_pct=h_symmetry_time_pct,
v_symmetry_time_pct=v_symmetry_time_pct
)
).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
def make_image(x_T):

View File

@ -15,8 +15,8 @@ class Txt2Img(Generator):
@torch.no_grad()
def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
conditioning,width,height,step_callback=None,threshold=0.0,perlin=0.0,
attention_maps_callback=None,
conditioning,width,height,step_callback=None,threshold=0.0,warmup=0.2,perlin=0.0,
h_symmetry_time_pct=None,v_symmetry_time_pct=None,attention_maps_callback=None,
**kwargs):
"""
Returns a function returning an image derived from the prompt and the initial image
@ -33,8 +33,13 @@ class Txt2Img(Generator):
conditioning_data = (
ConditioningData(
uc, c, cfg_scale, extra_conditioning_info,
postprocessing_settings = PostprocessingSettings(threshold, warmup=0.2) if threshold else None)
.add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
postprocessing_settings=PostprocessingSettings(
threshold=threshold,
warmup=warmup,
h_symmetry_time_pct=h_symmetry_time_pct,
v_symmetry_time_pct=v_symmetry_time_pct
)
).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
def make_image(x_T) -> PIL.Image.Image:
pipeline_output = pipeline.image_from_embeddings(
@ -44,8 +49,10 @@ class Txt2Img(Generator):
conditioning_data=conditioning_data,
callback=step_callback,
)
if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None:
attention_maps_callback(pipeline_output.attention_map_saver)
return pipeline.numpy_to_pil(pipeline_output.images)[0]
return make_image

View File

@ -21,12 +21,14 @@ class Txt2Img2Img(Generator):
def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_eta,
conditioning, width:int, height:int, strength:float,
step_callback:Optional[Callable]=None, threshold=0.0, **kwargs):
step_callback:Optional[Callable]=None, threshold=0.0, warmup=0.2, perlin=0.0,
h_symmetry_time_pct=None, v_symmetry_time_pct=None, attention_maps_callback=None, **kwargs):
"""
Returns a function returning an image derived from the prompt and the initial image
Return value depends on the seed at the time you call it
kwargs are 'width' and 'height'
"""
self.perlin = perlin
# noinspection PyTypeChecker
pipeline: StableDiffusionGeneratorPipeline = self.model
@ -36,8 +38,13 @@ class Txt2Img2Img(Generator):
conditioning_data = (
ConditioningData(
uc, c, cfg_scale, extra_conditioning_info,
postprocessing_settings = PostprocessingSettings(threshold=threshold, warmup=0.2) if threshold else None)
.add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
postprocessing_settings = PostprocessingSettings(
threshold=threshold,
warmup=0.2,
h_symmetry_time_pct=h_symmetry_time_pct,
v_symmetry_time_pct=v_symmetry_time_pct
)
).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
def make_image(x_T):
@ -69,19 +76,28 @@ class Txt2Img2Img(Generator):
if clear_cuda_cache is not None:
clear_cuda_cache()
second_pass_noise = self.get_noise_like(resized_latents)
second_pass_noise = self.get_noise_like(resized_latents, override_perlin=True)
# Clear symmetry for the second pass
from dataclasses import replace
new_postprocessing_settings = replace(conditioning_data.postprocessing_settings, h_symmetry_time_pct=None)
new_postprocessing_settings = replace(new_postprocessing_settings, v_symmetry_time_pct=None)
new_conditioning_data = replace(conditioning_data, postprocessing_settings=new_postprocessing_settings)
verbosity = get_verbosity()
set_verbosity_error()
pipeline_output = pipeline.img2img_from_latents_and_embeddings(
resized_latents,
num_inference_steps=steps,
conditioning_data=conditioning_data,
conditioning_data=new_conditioning_data,
strength=strength,
noise=second_pass_noise,
callback=step_callback)
set_verbosity(verbosity)
if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None:
attention_maps_callback(pipeline_output.attention_map_saver)
return pipeline.numpy_to_pil(pipeline_output.images)[0]
@ -95,13 +111,13 @@ class Txt2Img2Img(Generator):
return make_image
def get_noise_like(self, like: torch.Tensor):
def get_noise_like(self, like: torch.Tensor, override_perlin: bool=False):
device = like.device
if device.type == 'mps':
x = torch.randn_like(like, device='cpu', dtype=self.torch_dtype()).to(device)
else:
x = torch.randn_like(like, device=device, dtype=self.torch_dtype())
if self.perlin > 0.0:
if self.perlin > 0.0 and override_perlin == False:
shape = like.shape
x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(shape[3], shape[2])
return x
@ -139,6 +155,9 @@ class Txt2Img2Img(Generator):
shape = (1, channels,
scaled_height // self.downsampling_factor, scaled_width // self.downsampling_factor)
if self.use_mps_noise or device.type == 'mps':
return torch.randn(shape, dtype=self.torch_dtype(), device='cpu').to(device)
tensor = torch.empty(size=shape, device='cpu')
tensor = self.get_noise_like(like=tensor).to(device)
else:
return torch.randn(shape, dtype=self.torch_dtype(), device=device)
tensor = torch.empty(size=shape, device=device)
tensor = self.get_noise_like(like=tensor)
return tensor

View File

@ -58,6 +58,8 @@ COMMANDS = (
'--inpaint_replace','-r',
'--png_compression','-z',
'--text_mask','-tm',
'--h_symmetry_time_pct',
'--v_symmetry_time_pct',
'!fix','!fetch','!replay','!history','!search','!clear',
'!models','!switch','!import_model','!optimize_model','!convert_model','!edit_model','!del_model',
'!mask','!triggers',
@ -138,7 +140,7 @@ class Completer(object):
elif re.match('^'+'|'.join(MODEL_COMMANDS),buffer):
self.matches= self._model_completions(text, state)
# looking for a ckpt model
# looking for a ckpt model
elif re.match('^'+'|'.join(CKPT_MODEL_COMMANDS),buffer):
self.matches= self._model_completions(text, state, ckpt_only=True)
@ -255,7 +257,7 @@ class Completer(object):
update our list of models
'''
self.models = models
def _seed_completions(self, text, state):
m = re.search('(-S\s?|--seed[=\s]?)(\d*)',text)
if m:

View File

@ -24,6 +24,8 @@ ModelForwardCallback: TypeAlias = Union[
class PostprocessingSettings:
threshold: float
warmup: float
h_symmetry_time_pct: Optional[float]
v_symmetry_time_pct: Optional[float]
class InvokeAIDiffuserComponent:
@ -179,6 +181,7 @@ class InvokeAIDiffuserComponent:
if postprocessing_settings is not None:
percent_through = self.calculate_percent_through(sigma, step_index, total_step_count)
latents = self.apply_threshold(postprocessing_settings, latents, percent_through)
latents = self.apply_symmetry(postprocessing_settings, latents, percent_through)
return latents
def calculate_percent_through(self, sigma, step_index, total_step_count):
@ -320,8 +323,12 @@ class InvokeAIDiffuserComponent:
self,
postprocessing_settings: PostprocessingSettings,
latents: torch.Tensor,
percent_through
percent_through: float
) -> torch.Tensor:
if postprocessing_settings.threshold is None or postprocessing_settings.threshold == 0.0:
return latents
threshold = postprocessing_settings.threshold
warmup = postprocessing_settings.warmup
@ -370,6 +377,56 @@ class InvokeAIDiffuserComponent:
return latents
def apply_symmetry(
self,
postprocessing_settings: PostprocessingSettings,
latents: torch.Tensor,
percent_through: float
) -> torch.Tensor:
# Reset our last percent through if this is our first step.
if percent_through == 0.0:
self.last_percent_through = 0.0
if postprocessing_settings is None:
return latents
# Check for out of bounds
h_symmetry_time_pct = postprocessing_settings.h_symmetry_time_pct
if (h_symmetry_time_pct is not None and (h_symmetry_time_pct <= 0.0 or h_symmetry_time_pct > 1.0)):
h_symmetry_time_pct = None
v_symmetry_time_pct = postprocessing_settings.v_symmetry_time_pct
if (v_symmetry_time_pct is not None and (v_symmetry_time_pct <= 0.0 or v_symmetry_time_pct > 1.0)):
v_symmetry_time_pct = None
dev = latents.device.type
latents.to(device='cpu')
if (
h_symmetry_time_pct != None and
self.last_percent_through < h_symmetry_time_pct and
percent_through >= h_symmetry_time_pct
):
# Horizontal symmetry occurs on the 3rd dimension of the latent
width = latents.shape[3]
x_flipped = torch.flip(latents, dims=[3])
latents = torch.cat([latents[:, :, :, 0:int(width/2)], x_flipped[:, :, :, int(width/2):int(width)]], dim=3)
if (
v_symmetry_time_pct != None and
self.last_percent_through < v_symmetry_time_pct and
percent_through >= v_symmetry_time_pct
):
# Vertical symmetry occurs on the 2nd dimension of the latent
height = latents.shape[2]
y_flipped = torch.flip(latents, dims=[2])
latents = torch.cat([latents[:, :, 0:int(height / 2)], y_flipped[:, :, int(height / 2):int(height)]], dim=2)
self.last_percent_through = percent_through
return latents.to(device=dev)
def estimate_percent_through(self, step_index, sigma):
if step_index is not None and self.cross_attention_control_context is not None:
# percent_through will never reach 1.0 (but this is intended)