diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py
index 93d763428b..8b947b9a8d 100644
--- a/invokeai/app/invocations/flux_text_to_image.py
+++ b/invokeai/app/invocations/flux_text_to_image.py
@@ -79,8 +79,6 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
         inference_dtype = torch.bfloat16
 
         # Prepare input noise.
-        # TODO(ryand): Does the seed behave the same on different devices? Should we re-implement this to always use a
-        # CPU RNG?
         x = get_noise(
             num_samples=1,
             height=self.height,
diff --git a/invokeai/backend/flux/sampling.py b/invokeai/backend/flux/sampling.py
index 9917d63a8b..82abc0e561 100644
--- a/invokeai/backend/flux/sampling.py
+++ b/invokeai/backend/flux/sampling.py
@@ -20,16 +20,19 @@ def get_noise(
     dtype: torch.dtype,
     seed: int,
 ):
+    # We always generate noise on the same device and dtype then cast to ensure consistency across devices/dtypes.
+    rand_device = "cpu"
+    rand_dtype = torch.float16
     return torch.randn(
         num_samples,
         16,
         # allow for packing
         2 * math.ceil(height / 16),
         2 * math.ceil(width / 16),
-        device=device,
-        dtype=dtype,
-        generator=torch.Generator(device=device).manual_seed(seed),
-    )
+        device=rand_device,
+        dtype=rand_dtype,
+        generator=torch.Generator(device=rand_device).manual_seed(seed),
+    ).to(device=device, dtype=dtype)
 
 
 def prepare(t5: HFEncoder, clip: HFEncoder, img: Tensor, prompt: str | list[str]) -> dict[str, Tensor]: