Enable even larger images with one simple torch.nn.functional.silu import (#653)

Fixes:
File "stable-diffusion/ldm/modules/diffusionmodules/model.py", line 37, in nonlinearity
    return x*torch.sigmoid(x)
RuntimeError: CUDA out of memory. Tried to allocate 1.56 GiB [..]

Now up to 1536x1280 is possible on 8GB VRAM.
Also remove unused SiLU class.
This commit is contained in:
Mihai 2022-09-18 01:03:52 +03:00 committed by GitHub
parent ad292b095d
commit 071f65a892
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 21 deletions

View File

@ -3,6 +3,7 @@ import gc
import math
import torch
import torch.nn as nn
from torch.nn.functional import silu
import numpy as np
from einops import rearrange
@ -32,11 +33,6 @@ def get_timestep_embedding(timesteps, embedding_dim):
return emb
def nonlinearity(x):
# swish
return x*torch.sigmoid(x)
def Normalize(in_channels, num_groups=32):
return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
@ -122,14 +118,14 @@ class ResnetBlock(nn.Module):
def forward(self, x, temb):
h = self.norm1(x)
h = nonlinearity(h)
h = silu(h)
h = self.conv1(h)
if temb is not None:
h = h + self.temb_proj(nonlinearity(temb))[:,:,None,None]
h = h + self.temb_proj(silu(temb))[:,:,None,None]
h = self.norm2(h)
h = nonlinearity(h)
h = silu(h)
h = self.dropout(h)
h = self.conv2(h)
@ -368,7 +364,7 @@ class Model(nn.Module):
assert t is not None
temb = get_timestep_embedding(t, self.ch)
temb = self.temb.dense[0](temb)
temb = nonlinearity(temb)
temb = silu(temb)
temb = self.temb.dense[1](temb)
else:
temb = None
@ -402,7 +398,7 @@ class Model(nn.Module):
# end
h = self.norm_out(h)
h = nonlinearity(h)
h = silu(h)
h = self.conv_out(h)
return h
@ -499,7 +495,7 @@ class Encoder(nn.Module):
# end
h = self.norm_out(h)
h = nonlinearity(h)
h = silu(h)
h = self.conv_out(h)
return h
@ -611,7 +607,7 @@ class Decoder(nn.Module):
return h
h = self.norm_out(h)
h = nonlinearity(h)
h = silu(h)
h = self.conv_out(h)
if self.tanh_out:
h = torch.tanh(h)
@ -649,7 +645,7 @@ class SimpleDecoder(nn.Module):
x = layer(x)
h = self.norm_out(x)
h = nonlinearity(h)
h = silu(h)
x = self.conv_out(h)
return x
@ -697,7 +693,7 @@ class UpsampleDecoder(nn.Module):
if i_level != self.num_resolutions - 1:
h = self.upsample_blocks[k](h)
h = self.norm_out(h)
h = nonlinearity(h)
h = silu(h)
h = self.conv_out(h)
return h
@ -873,7 +869,7 @@ class FirstStagePostProcessor(nn.Module):
z_fs = self.encode_with_pretrained(x)
z = self.proj_norm(z_fs)
z = self.proj(z)
z = nonlinearity(z)
z = silu(z)
for submodel, downmodel in zip(self.model,self.downsampler):
z = submodel(z,temb=None)

View File

@ -252,12 +252,6 @@ def normalization(channels):
return GroupNorm32(32, channels)
# PyTorch 1.7 has SiLU, but we support PyTorch 1.5.
class SiLU(nn.Module):
def forward(self, x):
return x * torch.sigmoid(x)
class GroupNorm32(nn.GroupNorm):
def forward(self, x):
return super().forward(x.float()).type(x.dtype)