Fix performance issue introduced by torch cuda cache clear during generation

2024-08-30 20:32:17 +00:00 · 2022-11-10 21:43:56 -08:00
parent fa3670270e
commit b116715490
1 changed files with 0 additions and 1 deletions
--- a/ldm/modules/attention.py
+++ b/ldm/modules/attention.py
@ -282,7 +282,6 @@ class CrossAttention(nn.Module):

    def get_attention_mem_efficient(self, q, k, v):
        if q.device.type == 'cuda':
-            torch.cuda.empty_cache()
            #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device))
            return self.einsum_op_cuda(q, k, v)