diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index da850075aa..8080d4cade 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -247,6 +247,9 @@ class ModelCache(object): snapshot_before = self._capture_memory_snapshot() with skip_torch_weight_init(): model = model_info.get_model(child_type=submodel, torch_dtype=self.precision) + if sfast_available and submodel: + model = self._compile_model(model, submodel) + snapshot_after = self._capture_memory_snapshot() end_load_time = time.time() @@ -284,8 +287,6 @@ class ModelCache(object): self._cache_stack.remove(key) self._cache_stack.append(key) - if sfast_available and submodel: - cache_entry.model = self._compile_model(cache_entry.model, submodel) return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size) def _move_model_to_device(self, key: str, target_device: torch.device): @@ -336,7 +337,6 @@ class ModelCache(object): config = CompilationConfig.Default() config.enable_xformers = True config.enable_triton = True - config.enable_jit_freeze = True config.enable_cuda_graph = True if model_type == SubModelType("unet"): return compile_unet(model, config)