From d99a08a4414e3e5edd2f835ca995dc3ae1308e84 Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Tue, 25 Apr 2023 03:48:44 +0300 Subject: [PATCH 01/15] Add compel node and conditioning field type --- invokeai/app/invocations/compel.py | 272 ++++++++++++++++++ .../web/src/common/util/parseMetadata.ts | 44 ++- .../nodes/components/InputFieldComponent.tsx | 11 + .../ConditioningInputFieldComponent.tsx | 19 ++ .../web/src/features/nodes/types/constants.ts | 7 + .../web/src/features/nodes/types/types.ts | 13 + .../nodes/util/fieldTemplateBuilders.ts | 19 ++ .../features/nodes/util/fieldValueBuilders.ts | 4 + 8 files changed, 386 insertions(+), 3 deletions(-) create mode 100644 invokeai/app/invocations/compel.py create mode 100644 invokeai/frontend/web/src/features/nodes/components/fields/ConditioningInputFieldComponent.tsx diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py new file mode 100644 index 0000000000..87f6f0fcca --- /dev/null +++ b/invokeai/app/invocations/compel.py @@ -0,0 +1,272 @@ +from typing import Literal, Optional, Union +from pydantic import BaseModel, Field + +from invokeai.app.invocations.util.choose_model import choose_model +from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig + +from ...backend.util.devices import choose_torch_device, torch_dtype +from ...backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent +from ...backend.stable_diffusion.textual_inversion_manager import TextualInversionManager + +from compel import Compel +from compel.prompt_parser import ( + Blend, + CrossAttentionControlSubstitute, + FlattenedPrompt, + Fragment, +) + +from invokeai.backend.globals import Globals + + +class ConditioningField(BaseModel): + conditioning_name: Optional[str] = Field(default=None, description="The name of conditioning data") + class Config: + schema_extra = {"required": ["conditioning_name"]} + + +class CompelOutput(BaseInvocationOutput): + """Compel parser output""" + + #fmt: off + type: Literal["compel_output"] = "compel_output" + # name + loras -> pipeline + loras + # model: ModelField = Field(default=None, description="Model") + # src? + loras -> tokenizer + text_encoder + loras + # clip: ClipField = Field(default=None, description="Text encoder(clip)") + positive: ConditioningField = Field(default=None, description="Positive conditioning") + negative: ConditioningField = Field(default=None, description="Negative conditioning") + #fmt: on + + +class CompelInvocation(BaseInvocation): + + type: Literal["compel"] = "compel" + + positive_prompt: str = Field(default="", description="Positive prompt") + negative_prompt: str = Field(default="", description="Negative prompt") + + model: str = Field(default="", description="Model to use") + truncate_long_prompts: bool = Field(default=False, description="Whether or not to truncate long prompt to 77 tokens") + + # name + loras -> pipeline + loras + # model: ModelField = Field(default=None, description="Model to use") + # src? + loras -> tokenizer + text_encoder + loras + # clip: ClipField = Field(default=None, description="Text encoder(clip) to use") + + # Schema customisation + class Config(InvocationConfig): + schema_extra = { + "ui": { + "tags": ["latents", "noise"], + "type_hints": { + "model": "model" + } + }, + } + + def invoke(self, context: InvocationContext) -> CompelOutput: + + # TODO: load without model + model = choose_model(context.services.model_manager, self.model) + pipeline = model["model"] + tokenizer = pipeline.tokenizer + text_encoder = pipeline.text_encoder + + # TODO: global? input? + #use_full_precision = precision == "float32" or precision == "autocast" + use_full_precision = False + + textual_inversion_manager = TextualInversionManager( + tokenizer=tokenizer, + text_encoder=text_encoder, + full_precision=use_full_precision, + ) + + # lazy-load any deferred textual inversions. + # this might take a couple of seconds the first time a textual inversion is used. + textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms( + self.positive_prompt + "[" + self.negative_prompt + "]" + ) + + compel = Compel( + tokenizer=tokenizer, + text_encoder=text_encoder, + textual_inversion_manager=textual_inversion_manager, + dtype_for_device_getter=torch_dtype, + truncate_long_prompts=self.truncate_long_prompts, + ) + + + # TODO: support legacy blend? + + positive_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(self.positive_prompt) + negative_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(self.negative_prompt) + + if True: #getattr(Globals, "log_tokenization", False): + log_tokenization(positive_prompt, negative_prompt, tokenizer=tokenizer) + + # TODO: add lora(with model and clip field types) + c, c_options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt) + uc, uc_options = compel.build_conditioning_tensor_for_prompt_object(negative_prompt) + + if not self.truncate_long_prompts: + [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc]) + + c_ec = InvokeAIDiffuserComponent.ExtraConditioningInfo( + tokens_count_including_eos_bos=get_max_token_count(tokenizer, positive_prompt), + cross_attention_control_args=c_options.get("cross_attention_control", None), + ) + + uc_ec = InvokeAIDiffuserComponent.ExtraConditioningInfo( + tokens_count_including_eos_bos=get_max_token_count(tokenizer, negative_prompt), + cross_attention_control_args=uc_options.get("cross_attention_control", None), + ) + + name_prefix = f'{context.graph_execution_state_id}__{self.id}' + name_positive = f"{name_prefix}_positive" + name_negative = f"{name_prefix}_negative" + + # TODO: hacky but works ;D maybe rename latents somehow? + context.services.latents.set(name_positive, (c, c_ec)) + context.services.latents.set(name_negative, (uc, uc_ec)) + + return CompelOutput( + positive=ConditioningField( + conditioning_name=name_positive, + ), + negative=ConditioningField( + conditioning_name=name_negative, + ), + ) + + +def get_max_token_count( + tokenizer, prompt: Union[FlattenedPrompt, Blend], truncate_if_too_long=False +) -> int: + if type(prompt) is Blend: + blend: Blend = prompt + return max( + [ + get_max_token_count(tokenizer, c, truncate_if_too_long) + for c in blend.prompts + ] + ) + else: + return len( + get_tokens_for_prompt_object(tokenizer, prompt, truncate_if_too_long) + ) + + +def get_tokens_for_prompt_object( + tokenizer, parsed_prompt: FlattenedPrompt, truncate_if_too_long=True +) -> [str]: + if type(parsed_prompt) is Blend: + raise ValueError( + "Blend is not supported here - you need to get tokens for each of its .children" + ) + + text_fragments = [ + x.text + if type(x) is Fragment + else ( + " ".join([f.text for f in x.original]) + if type(x) is CrossAttentionControlSubstitute + else str(x) + ) + for x in parsed_prompt.children + ] + text = " ".join(text_fragments) + tokens = tokenizer.tokenize(text) + if truncate_if_too_long: + max_tokens_length = tokenizer.model_max_length - 2 # typically 75 + tokens = tokens[0:max_tokens_length] + return tokens + + +def log_tokenization( + positive_prompt: Union[Blend, FlattenedPrompt], + negative_prompt: Union[Blend, FlattenedPrompt], + tokenizer, +): + print(f"\n>> [TOKENLOG] Parsed Prompt: {positive_prompt}") + print(f"\n>> [TOKENLOG] Parsed Negative Prompt: {negative_prompt}") + + log_tokenization_for_prompt_object(positive_prompt, tokenizer) + log_tokenization_for_prompt_object( + negative_prompt, tokenizer, display_label_prefix="(negative prompt)" + ) + + +def log_tokenization_for_prompt_object( + p: Union[Blend, FlattenedPrompt], tokenizer, display_label_prefix=None +): + display_label_prefix = display_label_prefix or "" + if type(p) is Blend: + blend: Blend = p + for i, c in enumerate(blend.prompts): + log_tokenization_for_prompt_object( + c, + tokenizer, + display_label_prefix=f"{display_label_prefix}(blend part {i + 1}, weight={blend.weights[i]})", + ) + elif type(p) is FlattenedPrompt: + flattened_prompt: FlattenedPrompt = p + if flattened_prompt.wants_cross_attention_control: + original_fragments = [] + edited_fragments = [] + for f in flattened_prompt.children: + if type(f) is CrossAttentionControlSubstitute: + original_fragments += f.original + edited_fragments += f.edited + else: + original_fragments.append(f) + edited_fragments.append(f) + + original_text = " ".join([x.text for x in original_fragments]) + log_tokenization_for_text( + original_text, + tokenizer, + display_label=f"{display_label_prefix}(.swap originals)", + ) + edited_text = " ".join([x.text for x in edited_fragments]) + log_tokenization_for_text( + edited_text, + tokenizer, + display_label=f"{display_label_prefix}(.swap replacements)", + ) + else: + text = " ".join([x.text for x in flattened_prompt.children]) + log_tokenization_for_text( + text, tokenizer, display_label=display_label_prefix + ) + + +def log_tokenization_for_text(text, tokenizer, display_label=None, truncate_if_too_long=False): + """shows how the prompt is tokenized + # usually tokens have '' to indicate end-of-word, + # but for readability it has been replaced with ' ' + """ + tokens = tokenizer.tokenize(text) + tokenized = "" + discarded = "" + usedTokens = 0 + totalTokens = len(tokens) + + for i in range(0, totalTokens): + token = tokens[i].replace("", " ") + # alternate color + s = (usedTokens % 6) + 1 + if truncate_if_too_long and i >= tokenizer.model_max_length: + discarded = discarded + f"\x1b[0;3{s};40m{token}" + else: + tokenized = tokenized + f"\x1b[0;3{s};40m{token}" + usedTokens += 1 + + if usedTokens > 0: + print(f'\n>> [TOKENLOG] Tokens {display_label or ""} ({usedTokens}):') + print(f"{tokenized}\x1b[0m") + + if discarded != "": + print(f"\n>> [TOKENLOG] Tokens Discarded ({totalTokens - usedTokens}):") + print(f"{discarded}\x1b[0m") diff --git a/invokeai/frontend/web/src/common/util/parseMetadata.ts b/invokeai/frontend/web/src/common/util/parseMetadata.ts index 433aa9b2a1..a017019125 100644 --- a/invokeai/frontend/web/src/common/util/parseMetadata.ts +++ b/invokeai/frontend/web/src/common/util/parseMetadata.ts @@ -1,5 +1,5 @@ import { forEach, size } from 'lodash'; -import { ImageField, LatentsField } from 'services/api'; +import { ImageField, LatentsField, ConditioningField } from 'services/api'; const OBJECT_TYPESTRING = '[object Object]'; const STRING_TYPESTRING = '[object String]'; @@ -74,8 +74,38 @@ const parseLatentsField = (latentsField: unknown): LatentsField | undefined => { }; }; +const parseConditioningField = ( + conditioningField: unknown +): ConditioningField | undefined => { + // Must be an object + if (!isObject(conditioningField)) { + return; + } + + // A ConditioningField must have a `conditioning_name` + if (!('conditioning_name' in conditioningField)) { + return; + } + + // A ConditioningField's `conditioning_name` must be a string + if (typeof conditioningField.conditioning_name !== 'string') { + return; + } + + // Build a valid ConditioningField + return { + conditioning_name: conditioningField.conditioning_name, + }; +}; + type NodeMetadata = { - [key: string]: string | number | boolean | ImageField | LatentsField; + [key: string]: + | string + | number + | boolean + | ImageField + | LatentsField + | ConditioningField; }; type InvokeAIMetadata = { @@ -101,7 +131,7 @@ export const parseNodeMetadata = ( return; } - // the only valid object types are ImageField and LatentsField + // the only valid object types are ImageField, LatentsField and ConditioningField if (isObject(nodeItem)) { if ('image_name' in nodeItem || 'image_type' in nodeItem) { const imageField = parseImageField(nodeItem); @@ -118,6 +148,14 @@ export const parseNodeMetadata = ( } return; } + + if ('conditioning_name' in nodeItem) { + const conditioningField = parseConditioningField(nodeItem); + if (conditioningField) { + parsed[nodeKey] = conditioningField; + } + return; + } } // otherwise we accept any string, number or boolean diff --git a/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx b/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx index 21e4b9fcfb..01d6d01b48 100644 --- a/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx +++ b/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx @@ -6,6 +6,7 @@ import BooleanInputFieldComponent from './fields/BooleanInputFieldComponent'; import EnumInputFieldComponent from './fields/EnumInputFieldComponent'; import ImageInputFieldComponent from './fields/ImageInputFieldComponent'; import LatentsInputFieldComponent from './fields/LatentsInputFieldComponent'; +import ConditioningInputFieldComponent from './fields/ConditioningInputFieldComponent'; import ModelInputFieldComponent from './fields/ModelInputFieldComponent'; import NumberInputFieldComponent from './fields/NumberInputFieldComponent'; import StringInputFieldComponent from './fields/StringInputFieldComponent'; @@ -84,6 +85,16 @@ const InputFieldComponent = (props: InputFieldComponentProps) => { ); } + if (type === 'conditioning' && template.type === 'conditioning') { + return ( + + ); + } + if (type === 'model' && template.type === 'model') { return ( +) => { + const { nodeId, field } = props; + + return null; +}; + +export default memo(ConditioningInputFieldComponent); diff --git a/invokeai/frontend/web/src/features/nodes/types/constants.ts b/invokeai/frontend/web/src/features/nodes/types/constants.ts index 01497651e3..73bd7bb0a1 100644 --- a/invokeai/frontend/web/src/features/nodes/types/constants.ts +++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts @@ -11,6 +11,7 @@ export const FIELD_TYPE_MAP: Record = { enum: 'enum', ImageField: 'image', LatentsField: 'latents', + ConditioningField: 'conditioning', model: 'model', array: 'array', }; @@ -63,6 +64,12 @@ export const FIELDS: Record = { title: 'Latents', description: 'Latents may be passed between nodes.', }, + conditioning: { + color: 'cyan', + colorCssVar: getColorTokenCssVariable('cyan'), + title: 'Conditioning', + description: 'Conditioning may be passed between nodes.', + }, model: { color: 'teal', colorCssVar: getColorTokenCssVariable('teal'), diff --git a/invokeai/frontend/web/src/features/nodes/types/types.ts b/invokeai/frontend/web/src/features/nodes/types/types.ts index 4b5548e351..568c5fa831 100644 --- a/invokeai/frontend/web/src/features/nodes/types/types.ts +++ b/invokeai/frontend/web/src/features/nodes/types/types.ts @@ -56,6 +56,7 @@ export type FieldType = | 'enum' | 'image' | 'latents' + | 'conditioning' | 'model' | 'array'; @@ -74,6 +75,7 @@ export type InputFieldValue = | BooleanInputFieldValue | ImageInputFieldValue | LatentsInputFieldValue + | ConditioningInputFieldValue | EnumInputFieldValue | ModelInputFieldValue | ArrayInputFieldValue; @@ -91,6 +93,7 @@ export type InputFieldTemplate = | BooleanInputFieldTemplate | ImageInputFieldTemplate | LatentsInputFieldTemplate + | ConditioningInputFieldTemplate | EnumInputFieldTemplate | ModelInputFieldTemplate | ArrayInputFieldTemplate; @@ -162,6 +165,11 @@ export type LatentsInputFieldValue = FieldValueBase & { value?: undefined; }; +export type ConditioningInputFieldValue = FieldValueBase & { + type: 'conditioning'; + value?: undefined; +}; + export type ImageInputFieldValue = FieldValueBase & { type: 'image'; value?: Pick; @@ -229,6 +237,11 @@ export type LatentsInputFieldTemplate = InputFieldTemplateBase & { type: 'latents'; }; +export type ConditioningInputFieldTemplate = InputFieldTemplateBase & { + default: undefined; + type: 'conditioning'; +}; + export type EnumInputFieldTemplate = InputFieldTemplateBase & { default: string | number; type: 'enum'; diff --git a/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts b/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts index e37f446e00..6d057cc764 100644 --- a/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts +++ b/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts @@ -9,6 +9,7 @@ import { ImageInputFieldTemplate, IntegerInputFieldTemplate, LatentsInputFieldTemplate, + ConditioningInputFieldTemplate, StringInputFieldTemplate, ModelInputFieldTemplate, InputFieldTemplateBase, @@ -196,6 +197,21 @@ const buildLatentsInputFieldTemplate = ({ return template; }; +const buildConditioningInputFieldTemplate = ({ + schemaObject, + baseField, +}: BuildInputFieldArg): ConditioningInputFieldTemplate => { + const template: ConditioningInputFieldTemplate = { + ...baseField, + type: 'conditioning', + inputRequirement: 'always', + inputKind: 'connection', + default: schemaObject.default ?? undefined, + }; + + return template; +}; + const buildEnumInputFieldTemplate = ({ schemaObject, baseField, @@ -266,6 +282,9 @@ export const buildInputFieldTemplate = ( if (['latents'].includes(fieldType)) { return buildLatentsInputFieldTemplate({ schemaObject, baseField }); } + if (['conditioning'].includes(fieldType)) { + return buildConditioningInputFieldTemplate({ schemaObject, baseField }); + } if (['model'].includes(fieldType)) { return buildModelInputFieldTemplate({ schemaObject, baseField }); } diff --git a/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts b/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts index f2db2b5dc4..9221e5f7ac 100644 --- a/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts +++ b/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts @@ -48,6 +48,10 @@ export const buildInputFieldValue = ( fieldValue.value = undefined; } + if (template.type === 'conditioning') { + fieldValue.value = undefined; + } + if (template.type === 'model') { fieldValue.value = undefined; } From 8f460b92f13890179924a59678383f7b7c31dea9 Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Tue, 25 Apr 2023 04:21:03 +0300 Subject: [PATCH 02/15] Make latent generation nodes use conditions instead of prompt --- invokeai/app/invocations/latent.py | 14 ++++++++------ invokeai/app/services/default_graphs.py | 20 ++++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 3d1c925570..e62bb4d6a4 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -13,13 +13,13 @@ from ...backend.model_management.model_manager import ModelManager from ...backend.util.devices import choose_torch_device, torch_dtype from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import PostprocessingSettings from ...backend.image_util.seamless import configure_model_padding -from ...backend.prompting.conditioning import get_uc_and_c_and_ec from ...backend.stable_diffusion.diffusers_pipeline import ConditioningData, StableDiffusionGeneratorPipeline from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig import numpy as np from ..services.image_storage import ImageType from .baseinvocation import BaseInvocation, InvocationContext from .image import ImageField, ImageOutput, build_image_output +from .compel import ConditioningField from ...backend.stable_diffusion import PipelineIntermediateState from diffusers.schedulers import SchedulerMixin as Scheduler import diffusers @@ -143,9 +143,9 @@ class TextToLatentsInvocation(BaseInvocation): type: Literal["t2l"] = "t2l" # Inputs - # TODO: consider making prompt optional to enable providing prompt through a link # fmt: off - prompt: Optional[str] = Field(description="The prompt to generate an image from") + positive: Optional[ConditioningField] = Field(description="Positive conditioning for generation") + negative: Optional[ConditioningField] = Field(description="Negative conditioning for generation") seed: int = Field(default=-1,ge=-1, le=np.iinfo(np.uint32).max, description="The seed to use (-1 for a random seed)", ) noise: Optional[LatentsField] = Field(description="The noise to use") steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") @@ -206,8 +206,10 @@ class TextToLatentsInvocation(BaseInvocation): return model - def get_conditioning_data(self, model: StableDiffusionGeneratorPipeline) -> ConditioningData: - uc, c, extra_conditioning_info = get_uc_and_c_and_ec(self.prompt, model=model) + def get_conditioning_data(self, context: InvocationContext, model: StableDiffusionGeneratorPipeline) -> ConditioningData: + c, extra_conditioning_info = context.services.latents.get(self.positive.conditioning_name) + uc, _ = context.services.latents.get(self.negative.conditioning_name) + conditioning_data = ConditioningData( uc, c, @@ -234,7 +236,7 @@ class TextToLatentsInvocation(BaseInvocation): self.dispatch_progress(context, source_node_id, state) model = self.get_model(context.services.model_manager) - conditioning_data = self.get_conditioning_data(model) + conditioning_data = self.get_conditioning_data(context, model) # TODO: Verify the noise is the right size diff --git a/invokeai/app/services/default_graphs.py b/invokeai/app/services/default_graphs.py index 637d906e75..47e951b44a 100644 --- a/invokeai/app/services/default_graphs.py +++ b/invokeai/app/services/default_graphs.py @@ -1,4 +1,5 @@ from ..invocations.latent import LatentsToImageInvocation, NoiseInvocation, TextToLatentsInvocation +from ..invocations.compel import CompelInvocation from ..invocations.params import ParamIntInvocation from .graph import Edge, EdgeConnection, ExposedNodeInput, ExposedNodeOutput, Graph, LibraryGraph from .item_storage import ItemStorageABC @@ -17,25 +18,28 @@ def create_text_to_image() -> LibraryGraph: 'width': ParamIntInvocation(id='width', a=512), 'height': ParamIntInvocation(id='height', a=512), '3': NoiseInvocation(id='3'), - '4': TextToLatentsInvocation(id='4'), - '5': LatentsToImageInvocation(id='5') + '4': CompelInvocation(id='4'), + '5': TextToLatentsInvocation(id='5'), + '6': LatentsToImageInvocation(id='6'), }, edges=[ Edge(source=EdgeConnection(node_id='width', field='a'), destination=EdgeConnection(node_id='3', field='width')), Edge(source=EdgeConnection(node_id='height', field='a'), destination=EdgeConnection(node_id='3', field='height')), - Edge(source=EdgeConnection(node_id='width', field='a'), destination=EdgeConnection(node_id='4', field='width')), - Edge(source=EdgeConnection(node_id='height', field='a'), destination=EdgeConnection(node_id='4', field='height')), - Edge(source=EdgeConnection(node_id='3', field='noise'), destination=EdgeConnection(node_id='4', field='noise')), - Edge(source=EdgeConnection(node_id='4', field='latents'), destination=EdgeConnection(node_id='5', field='latents')), + Edge(source=EdgeConnection(node_id='width', field='a'), destination=EdgeConnection(node_id='5', field='width')), + Edge(source=EdgeConnection(node_id='height', field='a'), destination=EdgeConnection(node_id='5', field='height')), + Edge(source=EdgeConnection(node_id='3', field='noise'), destination=EdgeConnection(node_id='5', field='noise')), + Edge(source=EdgeConnection(node_id='5', field='latents'), destination=EdgeConnection(node_id='6', field='latents')), + Edge(source=EdgeConnection(node_id='4', field='positive'), destination=EdgeConnection(node_id='5', field='positive')), + Edge(source=EdgeConnection(node_id='4', field='negative'), destination=EdgeConnection(node_id='5', field='negative')), ] ), exposed_inputs=[ - ExposedNodeInput(node_path='4', field='prompt', alias='prompt'), + ExposedNodeInput(node_path='4', field='positive_prompt', alias='prompt'), # TODO: cli uses concatenated prompt ExposedNodeInput(node_path='width', field='a', alias='width'), ExposedNodeInput(node_path='height', field='a', alias='height') ], exposed_outputs=[ - ExposedNodeOutput(node_path='5', field='image', alias='image') + ExposedNodeOutput(node_path='6', field='image', alias='image') ]) From 8cb2fa86008d16232b22bb4e653c9869522dbc8a Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Tue, 25 Apr 2023 04:29:17 +0300 Subject: [PATCH 03/15] Restore log_tokenization check --- invokeai/app/invocations/compel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index 87f6f0fcca..653c60b686 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -103,7 +103,7 @@ class CompelInvocation(BaseInvocation): positive_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(self.positive_prompt) negative_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(self.negative_prompt) - if True: #getattr(Globals, "log_tokenization", False): + if getattr(Globals, "log_tokenization", False): log_tokenization(positive_prompt, negative_prompt, tokenizer=tokenizer) # TODO: add lora(with model and clip field types) From 37916a22adc945973a21fd1d6ca32c53cb1a303e Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Tue, 25 Apr 2023 12:53:13 +0300 Subject: [PATCH 04/15] Use textual inversion manager from pipeline, remove extra conditioning info for uc --- invokeai/app/invocations/compel.py | 55 +++++++++++-------- .../backend/model_management/model_manager.py | 2 + 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index 653c60b686..948d7d130c 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -75,52 +75,63 @@ class CompelInvocation(BaseInvocation): # TODO: global? input? #use_full_precision = precision == "float32" or precision == "autocast" - use_full_precision = False + #use_full_precision = False - textual_inversion_manager = TextualInversionManager( - tokenizer=tokenizer, - text_encoder=text_encoder, - full_precision=use_full_precision, + # TODO: redo TI when separate model loding implemented + #textual_inversion_manager = TextualInversionManager( + # tokenizer=tokenizer, + # text_encoder=text_encoder, + # full_precision=use_full_precision, + #) + + def load_huggingface_concepts(concepts: list[str]): + pipeline.textual_inversion_manager.load_huggingface_concepts(concepts) + + # apply the concepts library to the prompt + positive_prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers( + self.positive_prompt, + lambda concepts: load_huggingface_concepts(concepts), + pipeline.textual_inversion_manager.get_all_trigger_strings(), + ) + + negative_prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers( + self.negative_prompt, + lambda concepts: load_huggingface_concepts(concepts), + pipeline.textual_inversion_manager.get_all_trigger_strings(), ) # lazy-load any deferred textual inversions. # this might take a couple of seconds the first time a textual inversion is used. - textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms( - self.positive_prompt + "[" + self.negative_prompt + "]" + pipeline.textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms( + positive_prompt_str + "[" + negative_prompt_str + "]" ) compel = Compel( tokenizer=tokenizer, text_encoder=text_encoder, - textual_inversion_manager=textual_inversion_manager, + textual_inversion_manager=pipeline.textual_inversion_manager, dtype_for_device_getter=torch_dtype, truncate_long_prompts=self.truncate_long_prompts, ) - # TODO: support legacy blend? - positive_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(self.positive_prompt) - negative_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(self.negative_prompt) + positive_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(positive_prompt_str) + negative_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(negative_prompt_str) if getattr(Globals, "log_tokenization", False): log_tokenization(positive_prompt, negative_prompt, tokenizer=tokenizer) # TODO: add lora(with model and clip field types) - c, c_options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt) - uc, uc_options = compel.build_conditioning_tensor_for_prompt_object(negative_prompt) + c, options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt) + uc, _ = compel.build_conditioning_tensor_for_prompt_object(negative_prompt) if not self.truncate_long_prompts: [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc]) - c_ec = InvokeAIDiffuserComponent.ExtraConditioningInfo( + ec = InvokeAIDiffuserComponent.ExtraConditioningInfo( tokens_count_including_eos_bos=get_max_token_count(tokenizer, positive_prompt), - cross_attention_control_args=c_options.get("cross_attention_control", None), - ) - - uc_ec = InvokeAIDiffuserComponent.ExtraConditioningInfo( - tokens_count_including_eos_bos=get_max_token_count(tokenizer, negative_prompt), - cross_attention_control_args=uc_options.get("cross_attention_control", None), + cross_attention_control_args=options.get("cross_attention_control", None), ) name_prefix = f'{context.graph_execution_state_id}__{self.id}' @@ -128,8 +139,8 @@ class CompelInvocation(BaseInvocation): name_negative = f"{name_prefix}_negative" # TODO: hacky but works ;D maybe rename latents somehow? - context.services.latents.set(name_positive, (c, c_ec)) - context.services.latents.set(name_negative, (uc, uc_ec)) + context.services.latents.set(name_positive, (c, ec)) + context.services.latents.set(name_negative, (uc, None)) return CompelOutput( positive=ConditioningField( diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index 534b526081..baa48adf5e 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -1191,6 +1191,8 @@ class ModelManager(object): if self.embedding_path is not None: print(f">> Loading embeddings from {self.embedding_path}") for root, _, files in os.walk(self.embedding_path): + print(root) + print(files) for name in files: ti_path = os.path.join(root, name) model.textual_inversion_manager.load_textual_inversion( From 89f1909e4b17726d2999ebd907225d1e031e6a8c Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Tue, 25 Apr 2023 13:11:50 +0300 Subject: [PATCH 05/15] Update default graph --- invokeai/app/services/default_graphs.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/invokeai/app/services/default_graphs.py b/invokeai/app/services/default_graphs.py index 47e951b44a..3521f41594 100644 --- a/invokeai/app/services/default_graphs.py +++ b/invokeai/app/services/default_graphs.py @@ -17,6 +17,7 @@ def create_text_to_image() -> LibraryGraph: nodes={ 'width': ParamIntInvocation(id='width', a=512), 'height': ParamIntInvocation(id='height', a=512), + 'seed': ParamIntInvocation(id='seed', a=-1), '3': NoiseInvocation(id='3'), '4': CompelInvocation(id='4'), '5': TextToLatentsInvocation(id='5'), @@ -25,8 +26,11 @@ def create_text_to_image() -> LibraryGraph: edges=[ Edge(source=EdgeConnection(node_id='width', field='a'), destination=EdgeConnection(node_id='3', field='width')), Edge(source=EdgeConnection(node_id='height', field='a'), destination=EdgeConnection(node_id='3', field='height')), + Edge(source=EdgeConnection(node_id='seed', field='a'), destination=EdgeConnection(node_id='3', field='seed')), + # TODO: remove, when updated TextToLatents merged Edge(source=EdgeConnection(node_id='width', field='a'), destination=EdgeConnection(node_id='5', field='width')), Edge(source=EdgeConnection(node_id='height', field='a'), destination=EdgeConnection(node_id='5', field='height')), + Edge(source=EdgeConnection(node_id='seed', field='a'), destination=EdgeConnection(node_id='5', field='seed')), Edge(source=EdgeConnection(node_id='3', field='noise'), destination=EdgeConnection(node_id='5', field='noise')), Edge(source=EdgeConnection(node_id='5', field='latents'), destination=EdgeConnection(node_id='6', field='latents')), Edge(source=EdgeConnection(node_id='4', field='positive'), destination=EdgeConnection(node_id='5', field='positive')), @@ -34,9 +38,11 @@ def create_text_to_image() -> LibraryGraph: ] ), exposed_inputs=[ - ExposedNodeInput(node_path='4', field='positive_prompt', alias='prompt'), # TODO: cli uses concatenated prompt + ExposedNodeInput(node_path='4', field='positive_prompt', alias='positive_prompt'), + ExposedNodeInput(node_path='4', field='negative_prompt', alias='negative_prompt'), ExposedNodeInput(node_path='width', field='a', alias='width'), - ExposedNodeInput(node_path='height', field='a', alias='height') + ExposedNodeInput(node_path='height', field='a', alias='height'), + ExposedNodeInput(node_path='seed', field='a', alias='seed'), ], exposed_outputs=[ ExposedNodeOutput(node_path='6', field='image', alias='image') From d753cff91ae8baa60bfe04bcf8ee7d4bc9660d30 Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Tue, 25 Apr 2023 13:18:50 +0300 Subject: [PATCH 06/15] Undo debug message --- invokeai/backend/model_management/model_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index baa48adf5e..534b526081 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -1191,8 +1191,6 @@ class ModelManager(object): if self.embedding_path is not None: print(f">> Loading embeddings from {self.embedding_path}") for root, _, files in os.walk(self.embedding_path): - print(root) - print(files) for name in files: ti_path = os.path.join(root, name) model.textual_inversion_manager.load_textual_inversion( From 0f95f7cea321ce949b987cf3a0f9e41ce3740a15 Mon Sep 17 00:00:00 2001 From: Andy Luhrs Date: Thu, 27 Apr 2023 11:03:07 -0700 Subject: [PATCH 07/15] Fix inpaint node Seems like this is the only change needed for the existing inpaint node to work. --- invokeai/app/invocations/generate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/app/invocations/generate.py b/invokeai/app/invocations/generate.py index df79baa0f3..0bf48eefcf 100644 --- a/invokeai/app/invocations/generate.py +++ b/invokeai/app/invocations/generate.py @@ -247,8 +247,8 @@ class InpaintInvocation(ImageToImageInvocation): outputs = Inpaint(model).generate( prompt=self.prompt, - init_img=image, - init_mask=mask, + init_image=image, + mask_image=mask, step_callback=partial(self.dispatch_progress, context, source_node_id), **self.dict( exclude={"prompt", "image", "mask"} From 7d221e2518f7113450a5d11d6ab1dcc9bf5d4755 Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Thu, 4 May 2023 20:14:22 +0300 Subject: [PATCH 08/15] Combine conditioning to one field(better fits for multiple type conditioning like perp-neg) --- invokeai/app/invocations/compel.py | 17 +++++------------ invokeai/app/invocations/latent.py | 6 ++---- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index 948d7d130c..a3935bc9f4 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -34,8 +34,7 @@ class CompelOutput(BaseInvocationOutput): # model: ModelField = Field(default=None, description="Model") # src? + loras -> tokenizer + text_encoder + loras # clip: ClipField = Field(default=None, description="Text encoder(clip)") - positive: ConditioningField = Field(default=None, description="Positive conditioning") - negative: ConditioningField = Field(default=None, description="Negative conditioning") + conditioning: ConditioningField = Field(default=None, description="Conditioning") #fmt: on @@ -134,20 +133,14 @@ class CompelInvocation(BaseInvocation): cross_attention_control_args=options.get("cross_attention_control", None), ) - name_prefix = f'{context.graph_execution_state_id}__{self.id}' - name_positive = f"{name_prefix}_positive" - name_negative = f"{name_prefix}_negative" + name_cond = f"{context.graph_execution_state_id}_{self.id}_conditioning" # TODO: hacky but works ;D maybe rename latents somehow? - context.services.latents.set(name_positive, (c, ec)) - context.services.latents.set(name_negative, (uc, None)) + context.services.latents.set(name_cond, (c, uc, ec)) return CompelOutput( - positive=ConditioningField( - conditioning_name=name_positive, - ), - negative=ConditioningField( - conditioning_name=name_negative, + conditioning=ConditioningField( + conditioning_name=name_cond, ), ) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 7e3df2f716..4c3de0a7e1 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -144,8 +144,7 @@ class TextToLatentsInvocation(BaseInvocation): # Inputs # fmt: off - positive: Optional[ConditioningField] = Field(description="Positive conditioning for generation") - negative: Optional[ConditioningField] = Field(description="Negative conditioning for generation") + conditioning: Optional[ConditioningField] = Field(description="Conditioning for generation") seed: int = Field(default=-1,ge=-1, le=np.iinfo(np.uint32).max, description="The seed to use (-1 for a random seed)", ) noise: Optional[LatentsField] = Field(description="The noise to use") steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") @@ -205,8 +204,7 @@ class TextToLatentsInvocation(BaseInvocation): def get_conditioning_data(self, context: InvocationContext, model: StableDiffusionGeneratorPipeline) -> ConditioningData: - c, extra_conditioning_info = context.services.latents.get(self.positive.conditioning_name) - uc, _ = context.services.latents.get(self.negative.conditioning_name) + c, uc, extra_conditioning_info = context.services.latents.get(self.conditioning.conditioning_name) conditioning_data = ConditioningData( uc, From 1e6adf0a06785068a7257b5d6b1da1956e9e2673 Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Thu, 4 May 2023 21:14:31 +0300 Subject: [PATCH 09/15] Fix default graph and test --- invokeai/app/services/default_graphs.py | 3 +-- tests/nodes/test_node_graph.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/invokeai/app/services/default_graphs.py b/invokeai/app/services/default_graphs.py index 87275f0e1f..513e84d66d 100644 --- a/invokeai/app/services/default_graphs.py +++ b/invokeai/app/services/default_graphs.py @@ -29,8 +29,7 @@ def create_text_to_image() -> LibraryGraph: Edge(source=EdgeConnection(node_id='seed', field='a'), destination=EdgeConnection(node_id='3', field='seed')), Edge(source=EdgeConnection(node_id='3', field='noise'), destination=EdgeConnection(node_id='5', field='noise')), Edge(source=EdgeConnection(node_id='5', field='latents'), destination=EdgeConnection(node_id='6', field='latents')), - Edge(source=EdgeConnection(node_id='4', field='positive'), destination=EdgeConnection(node_id='5', field='positive')), - Edge(source=EdgeConnection(node_id='4', field='negative'), destination=EdgeConnection(node_id='5', field='negative')), + Edge(source=EdgeConnection(node_id='4', field='conditioning'), destination=EdgeConnection(node_id='5', field='conditioning')), ] ), exposed_inputs=[ diff --git a/tests/nodes/test_node_graph.py b/tests/nodes/test_node_graph.py index c7693b59c9..293de421c2 100644 --- a/tests/nodes/test_node_graph.py +++ b/tests/nodes/test_node_graph.py @@ -463,16 +463,16 @@ def test_graph_subgraph_t2i(): n4 = ShowImageInvocation(id = "4") g.add_node(n4) - g.add_edge(create_edge("1.5","image","4","image")) + g.add_edge(create_edge("1.6","image","4","image")) # Validate dg = g.nx_graph_flat() - assert set(dg.nodes) == set(['1.width', '1.height', '1.3', '1.4', '1.5', '2', '3', '4']) + assert set(dg.nodes) == set(['1.width', '1.height', '1.seed', '1.3', '1.4', '1.5', '1.6', '2', '3', '4']) expected_edges = [(f'1.{e.source.node_id}',f'1.{e.destination.node_id}') for e in lg.graph.edges] expected_edges.extend([ ('2','1.width'), ('3','1.height'), - ('1.5','4') + ('1.6','4') ]) print(expected_edges) print(list(dg.edges)) From 81ec476f3a5d7c3e3059e11844227879225b720f Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Thu, 4 May 2023 21:50:40 +0300 Subject: [PATCH 10/15] Revert seed field addition --- invokeai/app/invocations/latent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 4c3de0a7e1..28f7dac224 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -145,7 +145,6 @@ class TextToLatentsInvocation(BaseInvocation): # Inputs # fmt: off conditioning: Optional[ConditioningField] = Field(description="Conditioning for generation") - seed: int = Field(default=-1,ge=-1, le=np.iinfo(np.uint32).max, description="The seed to use (-1 for a random seed)", ) noise: Optional[LatentsField] = Field(description="The noise to use") steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") cfg_scale: float = Field(default=7.5, gt=0, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", ) From d866dcb3d212c435e5ef79b7dea425a8abfb0efc Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 4 May 2023 20:30:59 -0400 Subject: [PATCH 11/15] close #3343 --- invokeai/backend/web/invoke_ai_web_server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/invokeai/backend/web/invoke_ai_web_server.py b/invokeai/backend/web/invoke_ai_web_server.py index 84478d5cb6..97687bd2bf 100644 --- a/invokeai/backend/web/invoke_ai_web_server.py +++ b/invokeai/backend/web/invoke_ai_web_server.py @@ -78,7 +78,6 @@ class InvokeAIWebServer: mimetypes.add_type("application/javascript", ".js") mimetypes.add_type("text/css", ".css") # Socket IO - logger = True if args.web_verbose else False engineio_logger = True if args.web_verbose else False max_http_buffer_size = 10000000 From 5012f6159933ef3bcff4b6ae78751407aeb9f03a Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Fri, 5 May 2023 15:47:51 +0300 Subject: [PATCH 12/15] Separate conditionings back to positive and negative --- invokeai/app/invocations/compel.py | 17 ++++++++++++----- invokeai/app/invocations/latent.py | 6 ++++-- invokeai/app/services/default_graphs.py | 3 ++- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index a3935bc9f4..948d7d130c 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -34,7 +34,8 @@ class CompelOutput(BaseInvocationOutput): # model: ModelField = Field(default=None, description="Model") # src? + loras -> tokenizer + text_encoder + loras # clip: ClipField = Field(default=None, description="Text encoder(clip)") - conditioning: ConditioningField = Field(default=None, description="Conditioning") + positive: ConditioningField = Field(default=None, description="Positive conditioning") + negative: ConditioningField = Field(default=None, description="Negative conditioning") #fmt: on @@ -133,14 +134,20 @@ class CompelInvocation(BaseInvocation): cross_attention_control_args=options.get("cross_attention_control", None), ) - name_cond = f"{context.graph_execution_state_id}_{self.id}_conditioning" + name_prefix = f'{context.graph_execution_state_id}__{self.id}' + name_positive = f"{name_prefix}_positive" + name_negative = f"{name_prefix}_negative" # TODO: hacky but works ;D maybe rename latents somehow? - context.services.latents.set(name_cond, (c, uc, ec)) + context.services.latents.set(name_positive, (c, ec)) + context.services.latents.set(name_negative, (uc, None)) return CompelOutput( - conditioning=ConditioningField( - conditioning_name=name_cond, + positive=ConditioningField( + conditioning_name=name_positive, + ), + negative=ConditioningField( + conditioning_name=name_negative, ), ) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 28f7dac224..1db0a0ccee 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -144,7 +144,8 @@ class TextToLatentsInvocation(BaseInvocation): # Inputs # fmt: off - conditioning: Optional[ConditioningField] = Field(description="Conditioning for generation") + positive: Optional[ConditioningField] = Field(description="Positive conditioning for generation") + negative: Optional[ConditioningField] = Field(description="Negative conditioning for generation") noise: Optional[LatentsField] = Field(description="The noise to use") steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") cfg_scale: float = Field(default=7.5, gt=0, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", ) @@ -203,7 +204,8 @@ class TextToLatentsInvocation(BaseInvocation): def get_conditioning_data(self, context: InvocationContext, model: StableDiffusionGeneratorPipeline) -> ConditioningData: - c, uc, extra_conditioning_info = context.services.latents.get(self.conditioning.conditioning_name) + c, extra_conditioning_info = context.services.latents.get(self.positive.conditioning_name) + uc, _ = context.services.latents.get(self.negative.conditioning_name) conditioning_data = ConditioningData( uc, diff --git a/invokeai/app/services/default_graphs.py b/invokeai/app/services/default_graphs.py index 513e84d66d..87275f0e1f 100644 --- a/invokeai/app/services/default_graphs.py +++ b/invokeai/app/services/default_graphs.py @@ -29,7 +29,8 @@ def create_text_to_image() -> LibraryGraph: Edge(source=EdgeConnection(node_id='seed', field='a'), destination=EdgeConnection(node_id='3', field='seed')), Edge(source=EdgeConnection(node_id='3', field='noise'), destination=EdgeConnection(node_id='5', field='noise')), Edge(source=EdgeConnection(node_id='5', field='latents'), destination=EdgeConnection(node_id='6', field='latents')), - Edge(source=EdgeConnection(node_id='4', field='conditioning'), destination=EdgeConnection(node_id='5', field='conditioning')), + Edge(source=EdgeConnection(node_id='4', field='positive'), destination=EdgeConnection(node_id='5', field='positive')), + Edge(source=EdgeConnection(node_id='4', field='negative'), destination=EdgeConnection(node_id='5', field='negative')), ] ), exposed_inputs=[ From 58d7833c5ca56ecef3313ab7f015d6bfa6627456 Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Fri, 5 May 2023 21:09:29 +0300 Subject: [PATCH 13/15] Review changes --- invokeai/app/invocations/compel.py | 79 +++++++------------------ invokeai/app/invocations/latent.py | 10 ++-- invokeai/app/services/default_graphs.py | 19 +++--- tests/nodes/test_node_graph.py | 6 +- 4 files changed, 38 insertions(+), 76 deletions(-) diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index 948d7d130c..8414a60fd9 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -30,35 +30,24 @@ class CompelOutput(BaseInvocationOutput): #fmt: off type: Literal["compel_output"] = "compel_output" - # name + loras -> pipeline + loras - # model: ModelField = Field(default=None, description="Model") - # src? + loras -> tokenizer + text_encoder + loras - # clip: ClipField = Field(default=None, description="Text encoder(clip)") - positive: ConditioningField = Field(default=None, description="Positive conditioning") - negative: ConditioningField = Field(default=None, description="Negative conditioning") + + conditioning: ConditioningField = Field(default=None, description="Conditioning") #fmt: on class CompelInvocation(BaseInvocation): + """Parse prompt using compel package to conditioning.""" type: Literal["compel"] = "compel" - positive_prompt: str = Field(default="", description="Positive prompt") - negative_prompt: str = Field(default="", description="Negative prompt") - + prompt: str = Field(default="", description="Prompt") model: str = Field(default="", description="Model to use") - truncate_long_prompts: bool = Field(default=False, description="Whether or not to truncate long prompt to 77 tokens") - - # name + loras -> pipeline + loras - # model: ModelField = Field(default=None, description="Model to use") - # src? + loras -> tokenizer + text_encoder + loras - # clip: ClipField = Field(default=None, description="Text encoder(clip) to use") # Schema customisation class Config(InvocationConfig): schema_extra = { "ui": { - "tags": ["latents", "noise"], + "tags": ["prompt", "compel"], "type_hints": { "model": "model" } @@ -88,14 +77,8 @@ class CompelInvocation(BaseInvocation): pipeline.textual_inversion_manager.load_huggingface_concepts(concepts) # apply the concepts library to the prompt - positive_prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers( - self.positive_prompt, - lambda concepts: load_huggingface_concepts(concepts), - pipeline.textual_inversion_manager.get_all_trigger_strings(), - ) - - negative_prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers( - self.negative_prompt, + prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers( + self.prompt, lambda concepts: load_huggingface_concepts(concepts), pipeline.textual_inversion_manager.get_all_trigger_strings(), ) @@ -103,7 +86,7 @@ class CompelInvocation(BaseInvocation): # lazy-load any deferred textual inversions. # this might take a couple of seconds the first time a textual inversion is used. pipeline.textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms( - positive_prompt_str + "[" + negative_prompt_str + "]" + prompt_str ) compel = Compel( @@ -111,43 +94,35 @@ class CompelInvocation(BaseInvocation): text_encoder=text_encoder, textual_inversion_manager=pipeline.textual_inversion_manager, dtype_for_device_getter=torch_dtype, - truncate_long_prompts=self.truncate_long_prompts, + truncate_long_prompts=True, # TODO: ) # TODO: support legacy blend? - positive_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(positive_prompt_str) - negative_prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(negative_prompt_str) + prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(prompt_str) if getattr(Globals, "log_tokenization", False): - log_tokenization(positive_prompt, negative_prompt, tokenizer=tokenizer) + log_tokenization_for_prompt_object(prompt, tokenizer) - # TODO: add lora(with model and clip field types) - c, options = compel.build_conditioning_tensor_for_prompt_object(positive_prompt) - uc, _ = compel.build_conditioning_tensor_for_prompt_object(negative_prompt) + c, options = compel.build_conditioning_tensor_for_prompt_object(prompt) - if not self.truncate_long_prompts: - [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc]) + # TODO: long prompt support + #if not self.truncate_long_prompts: + # [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc]) ec = InvokeAIDiffuserComponent.ExtraConditioningInfo( - tokens_count_including_eos_bos=get_max_token_count(tokenizer, positive_prompt), + tokens_count_including_eos_bos=get_max_token_count(tokenizer, prompt), cross_attention_control_args=options.get("cross_attention_control", None), ) - name_prefix = f'{context.graph_execution_state_id}__{self.id}' - name_positive = f"{name_prefix}_positive" - name_negative = f"{name_prefix}_negative" + conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning" # TODO: hacky but works ;D maybe rename latents somehow? - context.services.latents.set(name_positive, (c, ec)) - context.services.latents.set(name_negative, (uc, None)) + context.services.latents.set(conditioning_name, (c, ec)) return CompelOutput( - positive=ConditioningField( - conditioning_name=name_positive, - ), - negative=ConditioningField( - conditioning_name=name_negative, + conditioning=ConditioningField( + conditioning_name=conditioning_name, ), ) @@ -195,20 +170,6 @@ def get_tokens_for_prompt_object( return tokens -def log_tokenization( - positive_prompt: Union[Blend, FlattenedPrompt], - negative_prompt: Union[Blend, FlattenedPrompt], - tokenizer, -): - print(f"\n>> [TOKENLOG] Parsed Prompt: {positive_prompt}") - print(f"\n>> [TOKENLOG] Parsed Negative Prompt: {negative_prompt}") - - log_tokenization_for_prompt_object(positive_prompt, tokenizer) - log_tokenization_for_prompt_object( - negative_prompt, tokenizer, display_label_prefix="(negative prompt)" - ) - - def log_tokenization_for_prompt_object( p: Union[Blend, FlattenedPrompt], tokenizer, display_label_prefix=None ): diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 1db0a0ccee..0d3ef4a8cd 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -138,14 +138,14 @@ class NoiseInvocation(BaseInvocation): # Text to image class TextToLatentsInvocation(BaseInvocation): - """Generates latents from a prompt.""" + """Generates latents from conditionings.""" type: Literal["t2l"] = "t2l" # Inputs # fmt: off - positive: Optional[ConditioningField] = Field(description="Positive conditioning for generation") - negative: Optional[ConditioningField] = Field(description="Negative conditioning for generation") + positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation") + negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation") noise: Optional[LatentsField] = Field(description="The noise to use") steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") cfg_scale: float = Field(default=7.5, gt=0, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", ) @@ -204,8 +204,8 @@ class TextToLatentsInvocation(BaseInvocation): def get_conditioning_data(self, context: InvocationContext, model: StableDiffusionGeneratorPipeline) -> ConditioningData: - c, extra_conditioning_info = context.services.latents.get(self.positive.conditioning_name) - uc, _ = context.services.latents.get(self.negative.conditioning_name) + c, extra_conditioning_info = context.services.latents.get(self.positive_conditioning.conditioning_name) + uc, _ = context.services.latents.get(self.negative_conditioning.conditioning_name) conditioning_data = ConditioningData( uc, diff --git a/invokeai/app/services/default_graphs.py b/invokeai/app/services/default_graphs.py index 87275f0e1f..c8347c043f 100644 --- a/invokeai/app/services/default_graphs.py +++ b/invokeai/app/services/default_graphs.py @@ -20,28 +20,29 @@ def create_text_to_image() -> LibraryGraph: 'seed': ParamIntInvocation(id='seed', a=-1), '3': NoiseInvocation(id='3'), '4': CompelInvocation(id='4'), - '5': TextToLatentsInvocation(id='5'), - '6': LatentsToImageInvocation(id='6'), + '5': CompelInvocation(id='5'), + '6': TextToLatentsInvocation(id='6'), + '7': LatentsToImageInvocation(id='7'), }, edges=[ Edge(source=EdgeConnection(node_id='width', field='a'), destination=EdgeConnection(node_id='3', field='width')), Edge(source=EdgeConnection(node_id='height', field='a'), destination=EdgeConnection(node_id='3', field='height')), Edge(source=EdgeConnection(node_id='seed', field='a'), destination=EdgeConnection(node_id='3', field='seed')), - Edge(source=EdgeConnection(node_id='3', field='noise'), destination=EdgeConnection(node_id='5', field='noise')), - Edge(source=EdgeConnection(node_id='5', field='latents'), destination=EdgeConnection(node_id='6', field='latents')), - Edge(source=EdgeConnection(node_id='4', field='positive'), destination=EdgeConnection(node_id='5', field='positive')), - Edge(source=EdgeConnection(node_id='4', field='negative'), destination=EdgeConnection(node_id='5', field='negative')), + Edge(source=EdgeConnection(node_id='3', field='noise'), destination=EdgeConnection(node_id='6', field='noise')), + Edge(source=EdgeConnection(node_id='6', field='latents'), destination=EdgeConnection(node_id='7', field='latents')), + Edge(source=EdgeConnection(node_id='4', field='conditioning'), destination=EdgeConnection(node_id='6', field='positive_conditioning')), + Edge(source=EdgeConnection(node_id='5', field='conditioning'), destination=EdgeConnection(node_id='6', field='negative_conditioning')), ] ), exposed_inputs=[ - ExposedNodeInput(node_path='4', field='positive_prompt', alias='positive_prompt'), - ExposedNodeInput(node_path='4', field='negative_prompt', alias='negative_prompt'), + ExposedNodeInput(node_path='4', field='prompt', alias='positive_prompt'), + ExposedNodeInput(node_path='5', field='prompt', alias='negative_prompt'), ExposedNodeInput(node_path='width', field='a', alias='width'), ExposedNodeInput(node_path='height', field='a', alias='height'), ExposedNodeInput(node_path='seed', field='a', alias='seed'), ], exposed_outputs=[ - ExposedNodeOutput(node_path='6', field='image', alias='image') + ExposedNodeOutput(node_path='7', field='image', alias='image') ]) diff --git a/tests/nodes/test_node_graph.py b/tests/nodes/test_node_graph.py index 293de421c2..82818414b2 100644 --- a/tests/nodes/test_node_graph.py +++ b/tests/nodes/test_node_graph.py @@ -463,16 +463,16 @@ def test_graph_subgraph_t2i(): n4 = ShowImageInvocation(id = "4") g.add_node(n4) - g.add_edge(create_edge("1.6","image","4","image")) + g.add_edge(create_edge("1.7","image","4","image")) # Validate dg = g.nx_graph_flat() - assert set(dg.nodes) == set(['1.width', '1.height', '1.seed', '1.3', '1.4', '1.5', '1.6', '2', '3', '4']) + assert set(dg.nodes) == set(['1.width', '1.height', '1.seed', '1.3', '1.4', '1.5', '1.6', '1.7', '2', '3', '4']) expected_edges = [(f'1.{e.source.node_id}',f'1.{e.destination.node_id}') for e in lg.graph.edges] expected_edges.extend([ ('2','1.width'), ('3','1.height'), - ('1.6','4') + ('1.7','4') ]) print(expected_edges) print(list(dg.edges)) From a80fe05e23c180c90de10c73dbdda4df234f7097 Mon Sep 17 00:00:00 2001 From: StAlKeR7779 Date: Fri, 5 May 2023 21:30:16 +0300 Subject: [PATCH 14/15] Rename compel node --- invokeai/app/invocations/compel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index 8414a60fd9..1fb7832031 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -47,6 +47,7 @@ class CompelInvocation(BaseInvocation): class Config(InvocationConfig): schema_extra = { "ui": { + "title": "Prompt (Compel)", "tags": ["prompt", "compel"], "type_hints": { "model": "model" From f01c79a94f0896245e4d7d357c5827d031f0dd2d Mon Sep 17 00:00:00 2001 From: Steve Martinelli <4118756+stevemar@users.noreply.github.com> Date: Fri, 5 May 2023 21:28:00 -0400 Subject: [PATCH 15/15] add -y to the automated install instructions when copying the automated install instructions from the docs into a terminal, the line to install the python packages failed as it was missing the `-y` flag. --- docs/installation/010_INSTALL_AUTOMATED.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation/010_INSTALL_AUTOMATED.md b/docs/installation/010_INSTALL_AUTOMATED.md index 83b4415394..c710ed17b1 100644 --- a/docs/installation/010_INSTALL_AUTOMATED.md +++ b/docs/installation/010_INSTALL_AUTOMATED.md @@ -89,7 +89,7 @@ experimental versions later. sudo apt update sudo apt install -y software-properties-common sudo add-apt-repository -y ppa:deadsnakes/ppa - sudo apt install python3.10 python3-pip python3.10-venv + sudo apt install -y python3.10 python3-pip python3.10-venv sudo update-alternatives --install /usr/local/bin/python python /usr/bin/python3.10 3 ```