model manager now running as a service

2024-08-30 20:32:17 +00:00 · 2023-05-11 21:24:29 -04:00
parent 8ad8c5c67a
commit df5b968954
10 changed files with 118 additions and 428 deletions
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@ -1,7 +1,6 @@
 from typing import Literal, Optional, Union
 from pydantic import BaseModel, Field

-from invokeai.app.invocations.util.choose_model import choose_model
 from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig

 from ...backend.util.devices import choose_torch_device, torch_dtype
@ -58,74 +57,74 @@ class CompelInvocation(BaseInvocation):
    def invoke(self, context: InvocationContext) -> CompelOutput:

        # TODO: load without model
-        model = choose_model(context.services.model_manager, self.model)
-        pipeline = model.context.model
-        tokenizer = pipeline.tokenizer
-        text_encoder = pipeline.text_encoder
+        model = context.services.model_manager.get_model(self.model)
+        with model.context as pipeline:
+            tokenizer = pipeline.tokenizer
+            text_encoder = pipeline.text_encoder

-        # TODO: global? input?
-        #use_full_precision = precision == "float32" or precision == "autocast"
-        #use_full_precision = False
+            # TODO: global? input?
+            #use_full_precision = precision == "float32" or precision == "autocast"
+            #use_full_precision = False

-        # TODO: redo TI when separate model loding implemented
-        #textual_inversion_manager = TextualInversionManager(
-        #    tokenizer=tokenizer,
-        #    text_encoder=text_encoder,
-        #    full_precision=use_full_precision,
-        #)
+            # TODO: redo TI when separate model loding implemented
+            #textual_inversion_manager = TextualInversionManager(
+            #    tokenizer=tokenizer,
+            #    text_encoder=text_encoder,
+            #    full_precision=use_full_precision,
+            #)

-        def load_huggingface_concepts(concepts: list[str]):
-            pipeline.textual_inversion_manager.load_huggingface_concepts(concepts)
+            def load_huggingface_concepts(concepts: list[str]):
+                pipeline.textual_inversion_manager.load_huggingface_concepts(concepts)

-        # apply the concepts library to the prompt
-        prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers(
-            self.prompt,
-            lambda concepts: load_huggingface_concepts(concepts),
-            pipeline.textual_inversion_manager.get_all_trigger_strings(),
-        )
+            # apply the concepts library to the prompt
+            prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers(
+                self.prompt,
+                lambda concepts: load_huggingface_concepts(concepts),
+                pipeline.textual_inversion_manager.get_all_trigger_strings(),
+            )

-        # lazy-load any deferred textual inversions.
-        # this might take a couple of seconds the first time a textual inversion is used.
-        pipeline.textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms(
-            prompt_str
-        )
+            # lazy-load any deferred textual inversions.
+            # this might take a couple of seconds the first time a textual inversion is used.
+            pipeline.textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms(
+                prompt_str
+            )

-        compel = Compel(
-            tokenizer=tokenizer,
-            text_encoder=text_encoder,
-            textual_inversion_manager=pipeline.textual_inversion_manager,
-            dtype_for_device_getter=torch_dtype,
-            truncate_long_prompts=True, # TODO:
-        )
+            compel = Compel(
+                tokenizer=tokenizer,
+                text_encoder=text_encoder,
+                textual_inversion_manager=pipeline.textual_inversion_manager,
+                dtype_for_device_getter=torch_dtype,
+                truncate_long_prompts=True, # TODO:
+            )

-        # TODO: support legacy blend?
+            # TODO: support legacy blend?

-        prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(prompt_str)
+            prompt: Union[FlattenedPrompt, Blend] = Compel.parse_prompt_string(prompt_str)

-        if getattr(Globals, "log_tokenization", False):
-            log_tokenization_for_prompt_object(prompt, tokenizer)
+            if getattr(Globals, "log_tokenization", False):
+                log_tokenization_for_prompt_object(prompt, tokenizer)

-        c, options = compel.build_conditioning_tensor_for_prompt_object(prompt)
+            c, options = compel.build_conditioning_tensor_for_prompt_object(prompt)

-        # TODO: long prompt support
-        #if not self.truncate_long_prompts:
-        #    [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
+            # TODO: long prompt support
+            #if not self.truncate_long_prompts:
+            #    [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])

-        ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(
-            tokens_count_including_eos_bos=get_max_token_count(tokenizer, prompt),
-            cross_attention_control_args=options.get("cross_attention_control", None),
-        )
+            ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(
+                tokens_count_including_eos_bos=get_max_token_count(tokenizer, prompt),
+                cross_attention_control_args=options.get("cross_attention_control", None),
+            )

-        conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning"
+            conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning"

-        # TODO: hacky but works ;D maybe rename latents somehow?
-        context.services.latents.set(conditioning_name, (c, ec))
+            # TODO: hacky but works ;D maybe rename latents somehow?
+            context.services.latents.set(conditioning_name, (c, ec))

-        return CompelOutput(
-            conditioning=ConditioningField(
-                conditioning_name=conditioning_name,
-            ),
-        )
+            return CompelOutput(
+                conditioning=ConditioningField(
+                    conditioning_name=conditioning_name,
+                ),
+            )


 def get_max_token_count(