mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
fix: use pad_token for padding (#2381)
Stable Diffusion 2 does not use eos_token for padding. Fixes #2378
This commit is contained in:
commit
89791d91e8
@ -1,18 +1,16 @@
|
|||||||
import math
|
import math
|
||||||
import os.path
|
from functools import partial
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import clip
|
||||||
|
import kornia
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from functools import partial
|
from einops import repeat
|
||||||
import clip
|
|
||||||
from einops import rearrange, repeat
|
|
||||||
from transformers import CLIPTokenizer, CLIPTextModel
|
from transformers import CLIPTokenizer, CLIPTextModel
|
||||||
import kornia
|
|
||||||
from ldm.invoke.devices import choose_torch_device
|
|
||||||
from ldm.invoke.globals import Globals, global_cache_dir
|
|
||||||
#from ldm.modules.textual_inversion_manager import TextualInversionManager
|
|
||||||
|
|
||||||
|
from ldm.invoke.devices import choose_torch_device
|
||||||
|
from ldm.invoke.globals import global_cache_dir
|
||||||
from ldm.modules.x_transformer import (
|
from ldm.modules.x_transformer import (
|
||||||
Encoder,
|
Encoder,
|
||||||
TransformerWrapper,
|
TransformerWrapper,
|
||||||
@ -664,12 +662,12 @@ class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder):
|
|||||||
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
|
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
|
||||||
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
|
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
|
||||||
|
|
||||||
# pad out to a 77-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
|
# pad out to a 77-entry array: [bos_token, <prompt tokens>, eos_token, pad_token…]
|
||||||
# (77 = self.max_length)
|
# (77 = self.max_length)
|
||||||
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
|
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
|
||||||
per_token_weights = [1.0] + per_token_weights + [1.0]
|
per_token_weights = [1.0] + per_token_weights + [1.0]
|
||||||
pad_length = self.max_length - len(all_token_ids)
|
pad_length = self.max_length - len(all_token_ids)
|
||||||
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
|
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
|
||||||
per_token_weights += [1.0] * pad_length
|
per_token_weights += [1.0] * pad_length
|
||||||
|
|
||||||
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long).to(self.device)
|
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long).to(self.device)
|
||||||
|
@ -3,8 +3,9 @@ import math
|
|||||||
import torch
|
import torch
|
||||||
from transformers import CLIPTokenizer, CLIPTextModel
|
from transformers import CLIPTokenizer, CLIPTextModel
|
||||||
|
|
||||||
from ldm.modules.textual_inversion_manager import TextualInversionManager
|
|
||||||
from ldm.invoke.devices import torch_dtype
|
from ldm.invoke.devices import torch_dtype
|
||||||
|
from ldm.modules.textual_inversion_manager import TextualInversionManager
|
||||||
|
|
||||||
|
|
||||||
class WeightedPromptFragmentsToEmbeddingsConverter():
|
class WeightedPromptFragmentsToEmbeddingsConverter():
|
||||||
|
|
||||||
@ -22,8 +23,8 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
|
|||||||
return self.tokenizer.model_max_length
|
return self.tokenizer.model_max_length
|
||||||
|
|
||||||
def get_embeddings_for_weighted_prompt_fragments(self,
|
def get_embeddings_for_weighted_prompt_fragments(self,
|
||||||
text: list[str],
|
text: list[list[str]],
|
||||||
fragment_weights: list[float],
|
fragment_weights: list[list[float]],
|
||||||
should_return_tokens: bool = False,
|
should_return_tokens: bool = False,
|
||||||
device='cpu'
|
device='cpu'
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
@ -198,12 +199,12 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
|
|||||||
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
|
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
|
||||||
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
|
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
|
||||||
|
|
||||||
# pad out to a self.max_length-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
|
# pad out to a self.max_length-entry array: [bos_token, <prompt tokens>, eos_token, pad_token…]
|
||||||
# (typically self.max_length == 77)
|
# (typically self.max_length == 77)
|
||||||
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
|
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
|
||||||
per_token_weights = [1.0] + per_token_weights + [1.0]
|
per_token_weights = [1.0] + per_token_weights + [1.0]
|
||||||
pad_length = self.max_length - len(all_token_ids)
|
pad_length = self.max_length - len(all_token_ids)
|
||||||
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
|
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
|
||||||
per_token_weights += [1.0] * pad_length
|
per_token_weights += [1.0] * pad_length
|
||||||
|
|
||||||
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device)
|
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device)
|
||||||
|
Loading…
Reference in New Issue
Block a user