mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Disable autocast for cpu to fix error. Remove unused precision arg. (#518)
When running on just cpu (intel), a call to torch.layer_norm would error with RuntimeError: expected scalar type BFloat16 but found Float Fix buggy device handling in model.py. Tested with scripts/dream.py --full_precision on just cpu on intel laptop. Works but slow at ~10s/it.
This commit is contained in:
parent
6665f4494f
commit
0bc6779361
@ -13,8 +13,9 @@ def choose_torch_device() -> str:
|
|||||||
def choose_autocast_device(device):
|
def choose_autocast_device(device):
|
||||||
'''Returns an autocast compatible device from a torch device'''
|
'''Returns an autocast compatible device from a torch device'''
|
||||||
device_type = device.type # this returns 'mps' on M1
|
device_type = device.type # this returns 'mps' on M1
|
||||||
# autocast only supports cuda or cpu
|
if device_type == 'cuda':
|
||||||
if device_type in ('cuda','cpu'):
|
|
||||||
return device_type,autocast
|
return device_type,autocast
|
||||||
|
elif device_type == 'cpu':
|
||||||
|
return device_type,nullcontext
|
||||||
else:
|
else:
|
||||||
return 'cpu',nullcontext
|
return 'cpu',nullcontext
|
||||||
|
@ -111,7 +111,6 @@ class Generate:
|
|||||||
height = 512,
|
height = 512,
|
||||||
sampler_name = 'k_lms',
|
sampler_name = 'k_lms',
|
||||||
ddim_eta = 0.0, # deterministic
|
ddim_eta = 0.0, # deterministic
|
||||||
precision = 'autocast',
|
|
||||||
full_precision = False,
|
full_precision = False,
|
||||||
strength = 0.75, # default in scripts/img2img.py
|
strength = 0.75, # default in scripts/img2img.py
|
||||||
seamless = False,
|
seamless = False,
|
||||||
@ -129,7 +128,6 @@ class Generate:
|
|||||||
self.sampler_name = sampler_name
|
self.sampler_name = sampler_name
|
||||||
self.grid = grid
|
self.grid = grid
|
||||||
self.ddim_eta = ddim_eta
|
self.ddim_eta = ddim_eta
|
||||||
self.precision = precision
|
|
||||||
self.full_precision = True if choose_torch_device() == 'mps' else full_precision
|
self.full_precision = True if choose_torch_device() == 'mps' else full_precision
|
||||||
self.strength = strength
|
self.strength = strength
|
||||||
self.seamless = seamless
|
self.seamless = seamless
|
||||||
|
@ -209,8 +209,7 @@ class AttnBlock(nn.Module):
|
|||||||
|
|
||||||
h_ = torch.zeros_like(k, device=q.device)
|
h_ = torch.zeros_like(k, device=q.device)
|
||||||
|
|
||||||
device_type = 'mps' if q.device.type == 'mps' else 'cuda'
|
if q.device.type == 'cuda':
|
||||||
if device_type == 'cuda':
|
|
||||||
stats = torch.cuda.memory_stats(q.device)
|
stats = torch.cuda.memory_stats(q.device)
|
||||||
mem_active = stats['active_bytes.all.current']
|
mem_active = stats['active_bytes.all.current']
|
||||||
mem_reserved = stats['reserved_bytes.all.current']
|
mem_reserved = stats['reserved_bytes.all.current']
|
||||||
@ -612,9 +611,8 @@ class Decoder(nn.Module):
|
|||||||
del h3
|
del h3
|
||||||
|
|
||||||
# prepare for up sampling
|
# prepare for up sampling
|
||||||
device_type = 'mps' if h.device.type == 'mps' else 'cuda'
|
|
||||||
gc.collect()
|
gc.collect()
|
||||||
if device_type == 'cuda':
|
if h.device.type == 'cuda':
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
# upsampling
|
# upsampling
|
||||||
|
Loading…
x
Reference in New Issue
Block a user