~7% speedup (1.57 to 1.69it/s) from switch to += in ldm.modules.attention. (#482)

Tested on 8GB eGPU nvidia setup so YMMV.
512x512 output, max VRAM stays same.
This commit is contained in:
Mihai 2022-09-10 16:58:07 +03:00 committed by Armando C. Santisbon
parent 2cf8de9234
commit 529fc57f2b

View File

@ -235,9 +235,9 @@ class BasicTransformerBlock(nn.Module):
def _forward(self, x, context=None): def _forward(self, x, context=None):
x = x.contiguous() if x.device.type == 'mps' else x x = x.contiguous() if x.device.type == 'mps' else x
x = self.attn1(self.norm1(x)) + x x += self.attn1(self.norm1(x))
x = self.attn2(self.norm2(x), context=context) + x x += self.attn2(self.norm2(x), context=context)
x = self.ff(self.norm3(x)) + x x += self.ff(self.norm3(x))
return x return x