when unlocking models, offload_unlocked_models should prune to vram limit only (#6450)

Co-authored-by: Lincoln Stein <lstein@gmail.com>
This commit is contained in:
Lincoln Stein 2024-05-28 23:01:21 -04:00 committed by GitHub
parent df91d1b849
commit 21a60af881
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -60,5 +60,5 @@ class ModelLocker(ModelLockerBase):
self._cache_entry.unlock() self._cache_entry.unlock()
if not self._cache.lazy_offloading: if not self._cache.lazy_offloading:
self._cache.offload_unlocked_models(self._cache_entry.size) self._cache.offload_unlocked_models(0)
self._cache.print_cuda_stats() self._cache.print_cuda_stats()