mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
working but filled with debug statements
This commit is contained in:
parent
3d69372785
commit
9adb15f86c
@ -4,6 +4,8 @@ from logging import Logger
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
import invokeai.backend.util.devices # horrible hack
|
||||||
|
|
||||||
from invokeai.app.services.object_serializer.object_serializer_disk import ObjectSerializerDisk
|
from invokeai.app.services.object_serializer.object_serializer_disk import ObjectSerializerDisk
|
||||||
from invokeai.app.services.object_serializer.object_serializer_forward_cache import ObjectSerializerForwardCache
|
from invokeai.app.services.object_serializer.object_serializer_forward_cache import ObjectSerializerForwardCache
|
||||||
from invokeai.app.services.shared.sqlite.sqlite_util import init_db
|
from invokeai.app.services.shared.sqlite.sqlite_util import init_db
|
||||||
@ -100,6 +102,9 @@ class ApiDependencies:
|
|||||||
download_queue=download_queue_service,
|
download_queue=download_queue_service,
|
||||||
events=events,
|
events=events,
|
||||||
)
|
)
|
||||||
|
# horrible hack - remove
|
||||||
|
invokeai.backend.util.devices.RAM_CACHE = model_manager.load.ram_cache
|
||||||
|
|
||||||
names = SimpleNameService()
|
names = SimpleNameService()
|
||||||
session_processor = DefaultSessionProcessor()
|
session_processor = DefaultSessionProcessor()
|
||||||
session_queue = SqliteSessionQueue(db=db)
|
session_queue = SqliteSessionQueue(db=db)
|
||||||
|
@ -4,7 +4,7 @@ import math
|
|||||||
from contextlib import ExitStack
|
from contextlib import ExitStack
|
||||||
from functools import singledispatchmethod
|
from functools import singledispatchmethod
|
||||||
from typing import Any, Iterator, List, Literal, Optional, Tuple, Union
|
from typing import Any, Iterator, List, Literal, Optional, Tuple, Union
|
||||||
|
import threading
|
||||||
import einops
|
import einops
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import numpy.typing as npt
|
import numpy.typing as npt
|
||||||
@ -393,6 +393,11 @@ class DenoiseLatentsInvocation(BaseInvocation):
|
|||||||
# flip all bits to have noise different from initial
|
# flip all bits to have noise different from initial
|
||||||
generator=torch.Generator(device=unet.device).manual_seed(seed ^ 0xFFFFFFFF),
|
generator=torch.Generator(device=unet.device).manual_seed(seed ^ 0xFFFFFFFF),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if conditioning_data.unconditioned_embeddings.embeds.device != conditioning_data.text_embeddings.embeds.device:
|
||||||
|
print(f'DEBUG; ERROR uc={conditioning_data.unconditioned_embeddings.embeds.device} c={conditioning_data.text_embeddings.embeds.device} unet={unet.device}, tid={threading.current_thread().ident}')
|
||||||
|
|
||||||
|
|
||||||
return conditioning_data
|
return conditioning_data
|
||||||
|
|
||||||
def create_pipeline(
|
def create_pipeline(
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from queue import Queue
|
from queue import Queue
|
||||||
from typing import TYPE_CHECKING, Optional, TypeVar
|
from typing import TYPE_CHECKING, Optional, TypeVar
|
||||||
|
import threading
|
||||||
|
|
||||||
from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase
|
from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase
|
||||||
|
|
||||||
@ -18,8 +19,8 @@ class ObjectSerializerForwardCache(ObjectSerializerBase[T]):
|
|||||||
def __init__(self, underlying_storage: ObjectSerializerBase[T], max_cache_size: int = 20):
|
def __init__(self, underlying_storage: ObjectSerializerBase[T], max_cache_size: int = 20):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._underlying_storage = underlying_storage
|
self._underlying_storage = underlying_storage
|
||||||
self._cache: dict[str, T] = {}
|
self._cache: dict[int, dict[str, T]] = {}
|
||||||
self._cache_ids = Queue[str]()
|
self._cache_ids: dict[int, Queue[str]] = {}
|
||||||
self._max_cache_size = max_cache_size
|
self._max_cache_size = max_cache_size
|
||||||
|
|
||||||
def start(self, invoker: "Invoker") -> None:
|
def start(self, invoker: "Invoker") -> None:
|
||||||
@ -54,12 +55,27 @@ class ObjectSerializerForwardCache(ObjectSerializerBase[T]):
|
|||||||
del self._cache[name]
|
del self._cache[name]
|
||||||
self._on_deleted(name)
|
self._on_deleted(name)
|
||||||
|
|
||||||
|
def _get_tid_cache(self) -> dict[str, T]:
|
||||||
|
tid = threading.current_thread().ident
|
||||||
|
if tid not in self._cache:
|
||||||
|
self._cache[tid] = {}
|
||||||
|
return self._cache[tid]
|
||||||
|
|
||||||
|
def _get_tid_cache_ids(self) -> Queue[str]:
|
||||||
|
tid = threading.current_thread().ident
|
||||||
|
if tid not in self._cache_ids:
|
||||||
|
self._cache_ids[tid] = Queue[str]()
|
||||||
|
return self._cache_ids[tid]
|
||||||
|
|
||||||
def _get_cache(self, name: str) -> Optional[T]:
|
def _get_cache(self, name: str) -> Optional[T]:
|
||||||
return None if name not in self._cache else self._cache[name]
|
cache = self._get_tid_cache()
|
||||||
|
return None if name not in cache else cache[name]
|
||||||
|
|
||||||
def _set_cache(self, name: str, data: T):
|
def _set_cache(self, name: str, data: T):
|
||||||
if name not in self._cache:
|
cache = self._get_tid_cache()
|
||||||
self._cache[name] = data
|
if name not in cache:
|
||||||
self._cache_ids.put(name)
|
cache[name] = data
|
||||||
if self._cache_ids.qsize() > self._max_cache_size:
|
cache_ids = self._get_tid_cache_ids()
|
||||||
self._cache.pop(self._cache_ids.get())
|
cache_ids.put(name)
|
||||||
|
if cache_ids.qsize() > self._max_cache_size:
|
||||||
|
cache.pop(cache_ids.get())
|
||||||
|
@ -175,7 +175,6 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
session = self._session_worker_queue.get()
|
session = self._session_worker_queue.get()
|
||||||
if self._cancel_event.is_set():
|
if self._cancel_event.is_set():
|
||||||
if session.item_id in self._sessions_to_cancel:
|
if session.item_id in self._sessions_to_cancel:
|
||||||
print("DEBUG: CANCEL")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if profiler is not None:
|
if profiler is not None:
|
||||||
@ -183,7 +182,6 @@ class DefaultSessionProcessor(SessionProcessorBase):
|
|||||||
|
|
||||||
# reserve a GPU for this session - may block
|
# reserve a GPU for this session - may block
|
||||||
with self._invoker.services.model_manager.load.ram_cache.reserve_execution_device() as gpu:
|
with self._invoker.services.model_manager.load.ram_cache.reserve_execution_device() as gpu:
|
||||||
print(f"DEBUG: session {session.item_id} has reserved gpu {gpu}")
|
|
||||||
|
|
||||||
# Prepare invocations and take the first
|
# Prepare invocations and take the first
|
||||||
with self._process_lock:
|
with self._process_lock:
|
||||||
|
@ -30,15 +30,11 @@ import torch
|
|||||||
|
|
||||||
from invokeai.backend.model_manager import AnyModel, SubModelType
|
from invokeai.backend.model_manager import AnyModel, SubModelType
|
||||||
from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot
|
from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot
|
||||||
from invokeai.backend.util.devices import choose_torch_device
|
|
||||||
from invokeai.backend.util.logging import InvokeAILogger
|
from invokeai.backend.util.logging import InvokeAILogger
|
||||||
|
|
||||||
from .model_cache_base import CacheRecord, CacheStats, ModelCacheBase, ModelLockerBase
|
from .model_cache_base import CacheRecord, CacheStats, ModelCacheBase, ModelLockerBase
|
||||||
from .model_locker import ModelLocker
|
from .model_locker import ModelLocker
|
||||||
|
|
||||||
if choose_torch_device() == torch.device("mps"):
|
|
||||||
from torch import mps
|
|
||||||
|
|
||||||
# Maximum size of the cache, in gigs
|
# Maximum size of the cache, in gigs
|
||||||
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
|
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
|
||||||
DEFAULT_MAX_CACHE_SIZE = 6.0
|
DEFAULT_MAX_CACHE_SIZE = 6.0
|
||||||
@ -130,6 +126,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
assigned = [x for x, tid in self._execution_devices.items() if current_thread == tid]
|
assigned = [x for x, tid in self._execution_devices.items() if current_thread == tid]
|
||||||
if not assigned:
|
if not assigned:
|
||||||
raise ValueError("No GPU has been reserved for the use of thread {current_thread}")
|
raise ValueError("No GPU has been reserved for the use of thread {current_thread}")
|
||||||
|
print(f'DEBUG: TID={current_thread}; owns {assigned[0]}')
|
||||||
return assigned[0]
|
return assigned[0]
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
@ -155,15 +152,16 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
self._free_execution_device.acquire(timeout=timeout)
|
self._free_execution_device.acquire(timeout=timeout)
|
||||||
with self._device_lock:
|
with self._device_lock:
|
||||||
free_device = [x for x, tid in self._execution_devices.items() if tid == 0]
|
free_device = [x for x, tid in self._execution_devices.items() if tid == 0]
|
||||||
print(f"DEBUG: execution devices = {self._execution_devices}")
|
|
||||||
self._execution_devices[free_device[0]] = current_thread
|
self._execution_devices[free_device[0]] = current_thread
|
||||||
device = free_device[0]
|
device = free_device[0]
|
||||||
|
|
||||||
# we are outside the lock region now
|
# we are outside the lock region now
|
||||||
|
print(f'DEBUG: RESERVED {device} for TID {current_thread}')
|
||||||
try:
|
try:
|
||||||
yield device
|
yield device
|
||||||
finally:
|
finally:
|
||||||
with self._device_lock:
|
with self._device_lock:
|
||||||
|
print(f'DEBUG: RELEASED {device} for TID {current_thread}')
|
||||||
self._execution_devices[device] = 0
|
self._execution_devices[device] = 0
|
||||||
self._free_execution_device.release()
|
self._free_execution_device.release()
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
@ -386,11 +384,6 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
if self.stats:
|
if self.stats:
|
||||||
self.stats.cleared = models_cleared
|
self.stats.cleared = models_cleared
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
torch.cuda.empty_cache()
|
|
||||||
if choose_torch_device() == torch.device("mps"):
|
|
||||||
mps.empty_cache()
|
|
||||||
|
|
||||||
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
|
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
|
||||||
|
|
||||||
def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
|
def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
|
||||||
@ -406,12 +399,12 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_execution_devices(devices: Optional[Set[torch.device]] = None) -> Set[torch.device]:
|
def _get_execution_devices(devices: Optional[Set[torch.device]] = None) -> Set[torch.device]:
|
||||||
if not devices:
|
if not devices:
|
||||||
default_device = choose_torch_device()
|
if torch.cuda.is_available():
|
||||||
if default_device != torch.device("cuda"):
|
|
||||||
devices = {default_device}
|
|
||||||
else:
|
|
||||||
# we get here if the default device is cuda, and return each of the cuda devices.
|
|
||||||
devices = {torch.device(f"cuda:{x}") for x in range(0, torch.cuda.device_count())}
|
devices = {torch.device(f"cuda:{x}") for x in range(0, torch.cuda.device_count())}
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
|
devices = {torch.device('mps')}
|
||||||
|
else:
|
||||||
|
devices = {torch.device('cpu')}
|
||||||
return devices
|
return devices
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -414,6 +414,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
else:
|
else:
|
||||||
attn_ctx = nullcontext()
|
attn_ctx = nullcontext()
|
||||||
|
|
||||||
|
# NOTE error is not here!
|
||||||
|
if conditioning_data.unconditioned_embeddings.embeds.device != \
|
||||||
|
conditioning_data.text_embeddings.embeds.device:
|
||||||
|
print('DEBUG; HERE IS THE ERROR 1')
|
||||||
|
|
||||||
with attn_ctx:
|
with attn_ctx:
|
||||||
if callback is not None:
|
if callback is not None:
|
||||||
callback(
|
callback(
|
||||||
@ -428,6 +433,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
|
|
||||||
# print("timesteps:", timesteps)
|
# print("timesteps:", timesteps)
|
||||||
for i, t in enumerate(self.progress_bar(timesteps)):
|
for i, t in enumerate(self.progress_bar(timesteps)):
|
||||||
|
if conditioning_data.unconditioned_embeddings.embeds.device != \
|
||||||
|
conditioning_data.text_embeddings.embeds.device:
|
||||||
|
print('DEBUG; HERE IS THE ERROR 2')
|
||||||
|
|
||||||
batched_t = t.expand(batch_size)
|
batched_t = t.expand(batch_size)
|
||||||
step_output = self.step(
|
step_output = self.step(
|
||||||
batched_t,
|
batched_t,
|
||||||
@ -472,6 +481,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
|
t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
|
||||||
ip_adapter_unet_patcher: Optional[UNetPatcher] = None,
|
ip_adapter_unet_patcher: Optional[UNetPatcher] = None,
|
||||||
):
|
):
|
||||||
|
|
||||||
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
|
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
|
||||||
timestep = t[0]
|
timestep = t[0]
|
||||||
if additional_guidance is None:
|
if additional_guidance is None:
|
||||||
|
@ -5,6 +5,7 @@ from contextlib import contextmanager
|
|||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
import threading
|
||||||
from diffusers import UNet2DConditionModel
|
from diffusers import UNet2DConditionModel
|
||||||
from typing_extensions import TypeAlias
|
from typing_extensions import TypeAlias
|
||||||
|
|
||||||
@ -288,6 +289,8 @@ class InvokeAIDiffuserComponent:
|
|||||||
unconditioning, encoder_attention_mask = _pad_conditioning(unconditioning, max_len, encoder_attention_mask)
|
unconditioning, encoder_attention_mask = _pad_conditioning(unconditioning, max_len, encoder_attention_mask)
|
||||||
conditioning, encoder_attention_mask = _pad_conditioning(conditioning, max_len, encoder_attention_mask)
|
conditioning, encoder_attention_mask = _pad_conditioning(conditioning, max_len, encoder_attention_mask)
|
||||||
|
|
||||||
|
if unconditioning.device != conditioning.device:
|
||||||
|
print(f'DEBUG: TID={threading.current_thread().ident}: Unconditioning device = {unconditioning.device}, conditioning device={conditioning.device}')
|
||||||
return torch.cat([unconditioning, conditioning]), encoder_attention_mask
|
return torch.cat([unconditioning, conditioning]), encoder_attention_mask
|
||||||
|
|
||||||
# methods below are called from do_diffusion_step and should be considered private to this class.
|
# methods below are called from do_diffusion_step and should be considered private to this class.
|
||||||
|
@ -12,28 +12,26 @@ from invokeai.app.services.config.config_default import get_config
|
|||||||
CPU_DEVICE = torch.device("cpu")
|
CPU_DEVICE = torch.device("cpu")
|
||||||
CUDA_DEVICE = torch.device("cuda")
|
CUDA_DEVICE = torch.device("cuda")
|
||||||
MPS_DEVICE = torch.device("mps")
|
MPS_DEVICE = torch.device("mps")
|
||||||
|
RAM_CACHE = None # horrible hack
|
||||||
|
|
||||||
def choose_torch_device() -> torch.device:
|
def choose_torch_device() -> torch.device:
|
||||||
"""Convenience routine for guessing which GPU device to run model on."""
|
"""Convenience routine for guessing which GPU device to run model on."""
|
||||||
# """Temporarily modified to use the model manager's get_execution_device()"""
|
"""Temporarily modified to use the model manager's get_execution_device()"""
|
||||||
# try:
|
global RAM_CACHE
|
||||||
# from invokeai.app.api.dependencies import ApiDependencies
|
try:
|
||||||
# model_manager = ApiDependencies.invoker.services.model_manager
|
device = RAM_CACHE.get_execution_device()
|
||||||
# device = model_manager.load.ram_cache.acquire_execution_device()
|
return device
|
||||||
# print(f'DEBUG choose_torch_device returning {device}')
|
except (ValueError, AttributeError):
|
||||||
# return device
|
config = get_config()
|
||||||
# except Exception:
|
if config.device == "auto":
|
||||||
config = get_config()
|
if torch.cuda.is_available():
|
||||||
if config.device == "auto":
|
return torch.device("cuda")
|
||||||
if torch.cuda.is_available():
|
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
||||||
return torch.device("cuda")
|
return torch.device("mps")
|
||||||
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
else:
|
||||||
return torch.device("mps")
|
return CPU_DEVICE
|
||||||
else:
|
else:
|
||||||
return CPU_DEVICE
|
return torch.device(config.device)
|
||||||
else:
|
|
||||||
return torch.device(config.device)
|
|
||||||
|
|
||||||
|
|
||||||
def get_torch_device_name() -> str:
|
def get_torch_device_name() -> str:
|
||||||
|
Loading…
Reference in New Issue
Block a user