Disable autocast for cpu to fix error. Remove unused precision arg. (#518)

When running on just cpu (intel), a call to torch.layer_norm would error with RuntimeError: expected scalar type BFloat16 but found Float
Fix buggy device handling in model.py.
Tested with scripts/dream.py --full_precision on just cpu on intel laptop. Works but slow at ~10s/it.
This commit is contained in:
Mihai 2022-09-12 23:55:21 +03:00 committed by GitHub
parent 6665f4494f
commit 0bc6779361
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 8 deletions

View File

@ -13,8 +13,9 @@ def choose_torch_device() -> str:
def choose_autocast_device(device):
'''Returns an autocast compatible device from a torch device'''
device_type = device.type # this returns 'mps' on M1
# autocast only supports cuda or cpu
if device_type in ('cuda','cpu'):
if device_type == 'cuda':
return device_type,autocast
elif device_type == 'cpu':
return device_type,nullcontext
else:
return 'cpu',nullcontext

View File

@ -111,7 +111,6 @@ class Generate:
height = 512,
sampler_name = 'k_lms',
ddim_eta = 0.0, # deterministic
precision = 'autocast',
full_precision = False,
strength = 0.75, # default in scripts/img2img.py
seamless = False,
@ -129,7 +128,6 @@ class Generate:
self.sampler_name = sampler_name
self.grid = grid
self.ddim_eta = ddim_eta
self.precision = precision
self.full_precision = True if choose_torch_device() == 'mps' else full_precision
self.strength = strength
self.seamless = seamless

View File

@ -209,8 +209,7 @@ class AttnBlock(nn.Module):
h_ = torch.zeros_like(k, device=q.device)
device_type = 'mps' if q.device.type == 'mps' else 'cuda'
if device_type == 'cuda':
if q.device.type == 'cuda':
stats = torch.cuda.memory_stats(q.device)
mem_active = stats['active_bytes.all.current']
mem_reserved = stats['reserved_bytes.all.current']
@ -612,9 +611,8 @@ class Decoder(nn.Module):
del h3
# prepare for up sampling
device_type = 'mps' if h.device.type == 'mps' else 'cuda'
gc.collect()
if device_type == 'cuda':
if h.device.type == 'cuda':
torch.cuda.empty_cache()
# upsampling