diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index 7c3d43e3e2..5ca17f00fc 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -328,6 +328,25 @@ class ModelCache(object): refs = sys.getrefcount(cache_entry.model) + # manualy clear local variable references of just finished function calls + # for some reason python don't want to collect it even by gc.collect() immidiately + if refs > 2: + while True: + cleared = False + for referrer in gc.get_referrers(cache_entry.model): + if type(referrer).__name__ == "frame": + # RuntimeError: cannot clear an executing frame + with suppress(RuntimeError): + referrer.clear() + cleared = True + #break + + # repeat if referrers changes(due to frame clear), else exit loop + if cleared: + gc.collect() + else: + break + device = cache_entry.model.device if hasattr(cache_entry.model, "device") else None self.logger.debug(f"Model: {model_key}, locks: {cache_entry._locks}, device: {device}, loaded: {cache_entry.loaded}, refs: {refs}") @@ -363,6 +382,9 @@ class ModelCache(object): self.logger.debug(f'GPU VRAM freed: {(mem.vram_used/GIG):.2f} GB') vram_in_use += mem.vram_used # note vram_used is negative self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB') + + gc.collect() + torch.cuda.empty_cache() def _local_model_hash(self, model_path: Union[str, Path]) -> str: sha = hashlib.sha256() diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index 30eecbd26f..c6dad3295f 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -106,16 +106,16 @@ providing information about a model defined in models.yaml. For example: >>> models = mgr.list_models() >>> json.dumps(models[0]) - {"path": "/home/lstein/invokeai-main/models/sd-1/controlnet/canny", - "model_format": "diffusers", - "name": "canny", - "base_model": "sd-1", + {"path": "/home/lstein/invokeai-main/models/sd-1/controlnet/canny", + "model_format": "diffusers", + "name": "canny", + "base_model": "sd-1", "type": "controlnet" } You can filter by model type and base model as shown here: - + controlnets = mgr.list_models(model_type=ModelType.ControlNet, base_model=BaseModelType.StableDiffusion1) for c in controlnets: @@ -140,14 +140,14 @@ Layout of the `models` directory: models ├── sd-1 - │   ├── controlnet - │   ├── lora - │   ├── main - │   └── embedding + │ ├── controlnet + │ ├── lora + │ ├── main + │ └── embedding ├── sd-2 - │   ├── controlnet - │   ├── lora - │   ├── main + │ ├── controlnet + │ ├── lora + │ ├── main │ └── embedding └── core ├── face_reconstruction @@ -195,7 +195,7 @@ name, base model, type and a dict of model attributes. See `invokeai/backend/model_management/models` for the attributes required by each model type. -A model can be deleted using `del_model()`, providing the same +A model can be deleted using `del_model()`, providing the same identifying information as `get_model()` The `heuristic_import()` method will take a set of strings @@ -304,7 +304,7 @@ class ModelManager(object): logger: types.ModuleType = logger, ): """ - Initialize with the path to the models.yaml config file. + Initialize with the path to the models.yaml config file. Optional parameters are the torch device type, precision, max_models, and sequential_offload boolean. Note that the default device type and precision are set up for a CUDA system running at half precision. @@ -323,7 +323,7 @@ class ModelManager(object): self.config_meta = ConfigMeta(**config.pop("__metadata__")) # TODO: metadata not found # TODO: version check - + self.app_config = InvokeAIAppConfig.get_config() self.logger = logger self.cache = ModelCache( @@ -431,7 +431,7 @@ class ModelManager(object): :param model_name: symbolic name of the model in models.yaml :param model_type: ModelType enum indicating the type of model to return :param base_model: BaseModelType enum indicating the base model used by this model - :param submode_typel: an ModelType enum indicating the portion of + :param submode_typel: an ModelType enum indicating the portion of the model to retrieve (e.g. ModelType.Vae) """ model_class = MODEL_CLASSES[base_model][model_type] @@ -456,7 +456,7 @@ class ModelManager(object): raise ModelNotFoundException(f"Model not found - {model_key}") # vae/movq override - # TODO: + # TODO: if submodel_type is not None and hasattr(model_config, submodel_type): override_path = getattr(model_config, submodel_type) if override_path: @@ -489,7 +489,7 @@ class ModelManager(object): self.cache_keys[model_key].add(model_context.key) model_hash = "" # TODO: - + return ModelInfo( context = model_context, name = model_name, @@ -518,7 +518,7 @@ class ModelManager(object): def model_names(self) -> List[Tuple[str, BaseModelType, ModelType]]: """ - Return a list of (str, BaseModelType, ModelType) corresponding to all models + Return a list of (str, BaseModelType, ModelType) corresponding to all models known to the configuration. """ return [(self.parse_key(x)) for x in self.models.keys()] @@ -692,12 +692,12 @@ class ModelManager(object): if new_name is None and new_base is None: self.logger.error("rename_model() called with neither a new_name nor a new_base. {model_name} unchanged.") return - + model_key = self.create_key(model_name, base_model, model_type) model_cfg = self.models.get(model_key, None) if not model_cfg: raise ModelNotFoundException(f"Unknown model: {model_key}") - + old_path = self.app_config.root_path / model_cfg.path new_name = new_name or model_name new_base = new_base or base_model @@ -726,7 +726,7 @@ class ModelManager(object): self.models.pop(model_key, None) # delete self.models[new_key] = model_cfg self.commit() - + def convert_model ( self, model_name: str, @@ -776,12 +776,12 @@ class ModelManager(object): # something went wrong, so don't leave dangling diffusers model in directory or it will cause a duplicate model error! rmtree(new_diffusers_path) raise - + if checkpoint_path.exists() and checkpoint_path.is_relative_to(self.app_config.models_path): checkpoint_path.unlink() - + return result - + def search_models(self, search_folder): self.logger.info(f"Finding Models In: {search_folder}") models_folder_ckpt = Path(search_folder).glob("**/*.ckpt") @@ -824,10 +824,14 @@ class ModelManager(object): assert config_file_path is not None,'no config file path to write to' config_file_path = self.app_config.root_path / config_file_path tmpfile = os.path.join(os.path.dirname(config_file_path), "new_config.tmp") - with open(tmpfile, "w", encoding="utf-8") as outfile: - outfile.write(self.preamble()) - outfile.write(yaml_str) - os.replace(tmpfile, config_file_path) + try: + with open(tmpfile, "w", encoding="utf-8") as outfile: + outfile.write(self.preamble()) + outfile.write(yaml_str) + os.replace(tmpfile, config_file_path) + except OSError as err: + self.logger.warning(f"Could not modify the config file at {config_file_path}") + self.logger.warning(err) def preamble(self) -> str: """ @@ -977,13 +981,12 @@ class ModelManager(object): # avoid circular import here from invokeai.backend.install.model_install_backend import ModelInstall successfully_installed = dict() - + installer = ModelInstall(config = self.app_config, prediction_type_helper = prediction_type_helper, model_manager = self) for thing in items_to_import: installed = installer.heuristic_import(thing) successfully_installed.update(installed) - self.commit() + self.commit() return successfully_installed -