From 8dc7f119e50e9d97307d0b83dfc86f97cb43d9e8 Mon Sep 17 00:00:00 2001 From: Kyle Schouviller Date: Thu, 10 Nov 2022 21:43:56 -0800 Subject: [PATCH] Fix performance issue introduced by torch cuda cache clear during generation --- ldm/modules/attention.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 05f6183029..94bb8a2916 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -282,7 +282,6 @@ class CrossAttention(nn.Module): def get_attention_mem_efficient(self, q, k, v): if q.device.type == 'cuda': - torch.cuda.empty_cache() #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device)) return self.einsum_op_cuda(q, k, v)