[mm] Do not write diffuser model to disk when convert_cache set to zero (#6072)

* pass model config to _load_model

* make conversion work again

* do not write diffusers to disk when convert_cache set to 0

* adding same model to cache twice is a no-op, not an assertion error

* fix issues identified by psychedelicious during pr review

* following conversion, avoid redundant read of cached submodels

* fix error introduced while merging

---------

Co-authored-by: Lincoln Stein <lstein@gmail.com>
This commit is contained in:
Lincoln Stein
2024-03-29 16:11:08 -04:00
committed by GitHub
parent 0ac1c0f339
commit 3d6d89feb4
14 changed files with 147 additions and 133 deletions

View File

@ -122,6 +122,11 @@ class ModelCache(ModelCacheBase[AnyModel]):
"""Return the cap on cache size."""
return self._max_cache_size
@max_cache_size.setter
def max_cache_size(self, value: float) -> None:
"""Set the cap on cache size."""
self._max_cache_size = value
@property
def stats(self) -> Optional[CacheStats]:
"""Return collected CacheStats object."""
@ -157,8 +162,9 @@ class ModelCache(ModelCacheBase[AnyModel]):
) -> None:
"""Store model under key and optional submodel_type."""
key = self._make_cache_key(key, submodel_type)
assert key not in self._cached_models
if key in self._cached_models:
return
self.make_room(size)
cache_record = CacheRecord(key, model, size)
self._cached_models[key] = cache_record
self._cache_stack.append(key)
@ -405,6 +411,8 @@ class ModelCache(ModelCacheBase[AnyModel]):
#
# Keep in mind that gc is only responsible for handling reference cycles. Most objects should be cleaned up
# immediately when their reference count hits 0.
if self.stats:
self.stats.cleared = models_cleared
gc.collect()
torch.cuda.empty_cache()