diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py
index da850075aa..8080d4cade 100644
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -247,6 +247,9 @@ class ModelCache(object):
             snapshot_before = self._capture_memory_snapshot()
             with skip_torch_weight_init():
                 model = model_info.get_model(child_type=submodel, torch_dtype=self.precision)
+            if sfast_available and submodel:
+                model = self._compile_model(model, submodel)
+
             snapshot_after = self._capture_memory_snapshot()
             end_load_time = time.time()
 
@@ -284,8 +287,6 @@ class ModelCache(object):
             self._cache_stack.remove(key)
         self._cache_stack.append(key)
 
-        if sfast_available and submodel:
-            cache_entry.model = self._compile_model(cache_entry.model, submodel)
         return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size)
 
     def _move_model_to_device(self, key: str, target_device: torch.device):
@@ -336,7 +337,6 @@ class ModelCache(object):
         config = CompilationConfig.Default()
         config.enable_xformers = True
         config.enable_triton = True
-        config.enable_jit_freeze = True
         config.enable_cuda_graph = True
         if model_type == SubModelType("unet"):
             return compile_unet(model, config)