slight efficiency gain by using += in attention.py

This commit is contained in:
Lincoln Stein 2022-09-11 16:03:37 -04:00
parent b86a1deb00
commit 7708f4fb98

View File

@ -244,9 +244,9 @@ class BasicTransformerBlock(nn.Module):
def _forward(self, x, context=None):
x = x.contiguous() if x.device.type == 'mps' else x
x = self.attn1(self.norm1(x)) + x
x = self.attn2(self.norm2(x), context=context) + x
x = self.ff(self.norm3(x)) + x
x += self.attn1(self.norm1(x))
x += self.attn2(self.norm2(x), context=context)
x += self.ff(self.norm3(x))
return x