In exception handlers, clear the torch CUDA cache (if we're using CUDA) to free up memory for other programs using the GPU and to reduce fragmentation. (#2549)

2024-08-30 20:32:17 +00:00 · 2023-02-06 09:33:24 -06:00
parent a485d45400
commit 2432adb38f
3 changed files with 25 additions and 1 deletions
--- a/invokeai/backend/invoke_ai_web_server.py
+++ b/invokeai/backend/invoke_ai_web_server.py
@ -1208,12 +1208,18 @@ class InvokeAIWebServer:
            )
        except KeyboardInterrupt:
            # Clear the CUDA cache on an exception
            self.empty_cuda_cache()
            self.socketio.emit("processingCanceled")
            raise
        except CanceledException:
            # Clear the CUDA cache on an exception
            self.empty_cuda_cache()
            self.socketio.emit("processingCanceled")
            pass
        except Exception as e:
            # Clear the CUDA cache on an exception
            self.empty_cuda_cache()
            print(e)
            self.socketio.emit("error", {"message": (str(e))})
            print("\n")
@ -1221,6 +1227,12 @@ class InvokeAIWebServer:
            traceback.print_exc()
            print("\n")
    def empty_cuda_cache(self):
        if self.generate.device.type == "cuda":
            import torch.cuda
            torch.cuda.empty_cache()
    def parameters_to_generated_image_metadata(self, parameters):
        try:
            # top-level metadata minus `image` or `images`
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -565,11 +565,19 @@ class Generate:
                                             image_callback = image_callback)
        except KeyboardInterrupt:
            # Clear the CUDA cache on an exception
            if self._has_cuda():
                torch.cuda.empty_cache()
            if catch_interrupts:
                print('**Interrupted** Partial results will be returned.')
            else:
                raise KeyboardInterrupt
        except RuntimeError:
            # Clear the CUDA cache on an exception
            if self._has_cuda():
                torch.cuda.empty_cache()
            print(traceback.format_exc(), file=sys.stderr)
            print('>> Could not generate image.')
--- a/ldm/invoke/generator/txt2img2img.py
+++ b/ldm/invoke/generator/txt2img2img.py
@ -65,6 +65,10 @@ class Txt2Img2Img(Generator):
                mode="bilinear"
            )
            # Free up memory from the last generation.
            if self.model.device.type == 'cuda':
                torch.cuda.empty_cache()
            second_pass_noise = self.get_noise_like(resized_latents)
            verbosity = get_verbosity()