From 2736d7e15ebf9906f20588d619504b474ee221a7 Mon Sep 17 00:00:00 2001 From: xra Date: Mon, 22 Aug 2022 22:59:06 +0900 Subject: [PATCH 1/3] optional weighting for creative blending of prompts example: "an apple: a banana:0 a watermelon:0.5" the above example turns into 3 sub-prompts: "an apple" 1.0 (default if no value) "a banana" 0.0 "a watermelon" 0.5 The weights are added and normalized The resulting image will be: apple 66%, banana 0%, watermelon 33% --- ldm/simplet2i.py | 81 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 62f4bea8d4..4c25939621 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -200,7 +200,21 @@ The vast majority of these arguments default to reasonable values. uc = model.get_learned_conditioning(batch_size * [""]) if isinstance(prompts, tuple): prompts = list(prompts) - c = model.get_learned_conditioning(prompts) + + # weighted sub-prompts + subprompts,weights = T2I.split_weighted_subprompts(prompts[0]) + if len(subprompts) > 1: + # i dont know if this is correct.. but it works + c = torch.zeros_like(uc) + # get total weight for normalizing + totalWeight = sum(weights) + # normalize each "sub prompt" and add it + for i in range(0,len(subprompts)): + weight = weights[i] / totalWeight + c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) + else: # just standard 1 prompt + c = model.get_learned_conditioning(prompts) + shape = [self.latent_channels, height // self.downsampling_factor, width // self.downsampling_factor] samples_ddim, _ = sampler.sample(S=steps, conditioning=c, @@ -319,7 +333,20 @@ The vast majority of these arguments default to reasonable values. uc = model.get_learned_conditioning(batch_size * [""]) if isinstance(prompts, tuple): prompts = list(prompts) - c = model.get_learned_conditioning(prompts) + + # weighted sub-prompts + subprompts,weights = T2I.split_weighted_subprompts(prompts[0]) + if len(subprompts) > 1: + # i dont know if this is correct.. but it works + c = torch.zeros_like(uc) + # get total weight for normalizing + totalWeight = sum(weights) + # normalize each "sub prompt" and add it + for i in range(0,len(subprompts)): + weight = weights[i] / totalWeight + c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) + else: # just standard 1 prompt + c = model.get_learned_conditioning(prompts) # encode (scaled latent) z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(self.device)) @@ -430,3 +457,53 @@ The vast majority of these arguments default to reasonable values. image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image) return 2.*image - 1. + """ + example: "an apple: a banana:0 a watermelon:0.5" + grabs all text up to the first occurance of ':' + then removes the text, repeating until no characters left. + if ':' has no weight defined, defaults to 1.0 + + the above example turns into 3 sub-prompts: + "an apple" 1.0 + "a banana" 0.0 + "a watermelon" 0.5 + The weights are added and normalized + The resulting image will be: apple 66% (1.0 / 1.5), banana 0%, watermelon 33% (0.5 / 1.5) + """ + def split_weighted_subprompts(text): + # very simple, uses : to separate sub-prompts + # assumes number following : and space after number + # if no number found, defaults to 1.0 + remaining = len(text) + prompts = [] + weights = [] + while remaining > 0: + # find : + if ":" in text: + idx = text.index(":") # first occurrance from start + # snip sub prompt + prompt = text[:idx] + remaining -= idx + # remove from main text + text = text[idx+1:] + # get number + if " " in text: + idx = text.index(" ") # first occurance + else: # no space, read to end + idx = len(text) + if idx != 0: + weight = float(text[:idx]) + else: # no number to grab + weight = 1.0 + # remove + remaining -= idx + text = text[idx+1:] + prompts.append(prompt) + weights.append(weight) + else: + if len(text) > 0: + # take what remains as weight 1 + prompts.append(text) + weights.append(1.0) + remaining = 0 + return prompts, weights \ No newline at end of file From a3632f5b4f0044a153f4c0f3c7fdb878ad029a90 Mon Sep 17 00:00:00 2001 From: xra Date: Mon, 22 Aug 2022 23:32:01 +0900 Subject: [PATCH 2/3] improved comments & added warning if value couldn't be parsed correctly --- ldm/simplet2i.py | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 4c25939621..78b70000df 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -457,52 +457,47 @@ The vast majority of these arguments default to reasonable values. image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image) return 2.*image - 1. - """ - example: "an apple: a banana:0 a watermelon:0.5" - grabs all text up to the first occurance of ':' - then removes the text, repeating until no characters left. - if ':' has no weight defined, defaults to 1.0 - the above example turns into 3 sub-prompts: - "an apple" 1.0 - "a banana" 0.0 - "a watermelon" 0.5 - The weights are added and normalized - The resulting image will be: apple 66% (1.0 / 1.5), banana 0%, watermelon 33% (0.5 / 1.5) + """ + grabs all text up to the first occurrence of ':' + uses the grabbed text as a sub-prompt, and takes the value following ':' as weight + if ':' has no value defined, defaults to 1.0 + repeats until no text remaining """ def split_weighted_subprompts(text): - # very simple, uses : to separate sub-prompts - # assumes number following : and space after number - # if no number found, defaults to 1.0 remaining = len(text) prompts = [] weights = [] while remaining > 0: - # find : if ":" in text: - idx = text.index(":") # first occurrance from start - # snip sub prompt + idx = text.index(":") # first occurrence from start + # grab up to index as sub-prompt prompt = text[:idx] remaining -= idx # remove from main text text = text[idx+1:] - # get number + # find value for weight if " " in text: - idx = text.index(" ") # first occurance + idx = text.index(" ") # first occurence else: # no space, read to end idx = len(text) if idx != 0: - weight = float(text[:idx]) - else: # no number to grab + try: + weight = float(text[:idx]) + except: # couldn't treat as float + print(f"Warning: '{text[:idx]}' is not a value, are you missing a space?") + weight = 1.0 + else: # no value found weight = 1.0 - # remove + # remove from main text remaining -= idx text = text[idx+1:] + # append the sub-prompt and its weight prompts.append(prompt) weights.append(weight) - else: - if len(text) > 0: - # take what remains as weight 1 + else: # no : found + if len(text) > 0: # there is still text though + # take remainder as weight 1 prompts.append(text) weights.append(1.0) remaining = 0 From e4eb775b6388c284ab307fa6a74f17aac93c0853 Mon Sep 17 00:00:00 2001 From: xra Date: Tue, 23 Aug 2022 00:03:32 +0900 Subject: [PATCH 3/3] added optional parameter to skip subprompt weight normalization allows more control when fine-tuning --- ldm/simplet2i.py | 12 ++++++++---- scripts/dream.py | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 78b70000df..ab3195284a 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -142,7 +142,7 @@ The vast majority of these arguments default to reasonable values. def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, - cfg_scale=None,ddim_eta=None,strength=None,init_img=None): + cfg_scale=None,ddim_eta=None,strength=None,init_img=None,skip_normalize=False): """ Generate an image from the prompt, writing iteration images into the outdir The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...] @@ -210,7 +210,9 @@ The vast majority of these arguments default to reasonable values. totalWeight = sum(weights) # normalize each "sub prompt" and add it for i in range(0,len(subprompts)): - weight = weights[i] / totalWeight + weight = weights[i] + if not skip_normalize: + weight = weight / totalWeight c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) else: # just standard 1 prompt c = model.get_learned_conditioning(prompts) @@ -257,7 +259,7 @@ The vast majority of these arguments default to reasonable values. # There is lots of shared code between this and txt2img and should be refactored. def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, - cfg_scale=None,ddim_eta=None,strength=None): + cfg_scale=None,ddim_eta=None,strength=None,skip_normalize=False): """ Generate an image from the prompt and the initial image, writing iteration images into the outdir The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...] @@ -343,7 +345,9 @@ The vast majority of these arguments default to reasonable values. totalWeight = sum(weights) # normalize each "sub prompt" and add it for i in range(0,len(subprompts)): - weight = weights[i] / totalWeight + weight = weights[i] + if not skip_normalize: + weight = weight / totalWeight c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) else: # just standard 1 prompt c = model.get_learned_conditioning(prompts) diff --git a/scripts/dream.py b/scripts/dream.py index 4e18492b3d..2625b6027f 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -233,6 +233,7 @@ def create_cmd_parser(): parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)") parser.add_argument('-I','--init_img',type=str,help="path to input image (supersedes width and height)") parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely") + parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization") return parser if readline_available: