only compile model the first time :-)

This commit is contained in:
Lincoln Stein
2023-12-20 22:40:56 -05:00
parent 9c1d250665
commit 6cb3031c09

View File

@ -247,6 +247,9 @@ class ModelCache(object):
snapshot_before = self._capture_memory_snapshot()
with skip_torch_weight_init():
model = model_info.get_model(child_type=submodel, torch_dtype=self.precision)
if sfast_available and submodel:
model = self._compile_model(model, submodel)
snapshot_after = self._capture_memory_snapshot()
end_load_time = time.time()
@ -284,8 +287,6 @@ class ModelCache(object):
self._cache_stack.remove(key)
self._cache_stack.append(key)
if sfast_available and submodel:
cache_entry.model = self._compile_model(cache_entry.model, submodel)
return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size)
def _move_model_to_device(self, key: str, target_device: torch.device):
@ -336,7 +337,6 @@ class ModelCache(object):
config = CompilationConfig.Default()
config.enable_xformers = True
config.enable_triton = True
config.enable_jit_freeze = True
config.enable_cuda_graph = True
if model_type == SubModelType("unet"):
return compile_unet(model, config)