mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
fix: use pad_token for padding (#2381)
Stable Diffusion 2 does not use eos_token for padding. Fixes #2378
This commit is contained in:
commit
89791d91e8
@ -1,18 +1,16 @@
|
||||
import math
|
||||
import os.path
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import clip
|
||||
import kornia
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from functools import partial
|
||||
import clip
|
||||
from einops import rearrange, repeat
|
||||
from einops import repeat
|
||||
from transformers import CLIPTokenizer, CLIPTextModel
|
||||
import kornia
|
||||
from ldm.invoke.devices import choose_torch_device
|
||||
from ldm.invoke.globals import Globals, global_cache_dir
|
||||
#from ldm.modules.textual_inversion_manager import TextualInversionManager
|
||||
|
||||
from ldm.invoke.devices import choose_torch_device
|
||||
from ldm.invoke.globals import global_cache_dir
|
||||
from ldm.modules.x_transformer import (
|
||||
Encoder,
|
||||
TransformerWrapper,
|
||||
@ -664,12 +662,12 @@ class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder):
|
||||
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
|
||||
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
|
||||
|
||||
# pad out to a 77-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
|
||||
# pad out to a 77-entry array: [bos_token, <prompt tokens>, eos_token, pad_token…]
|
||||
# (77 = self.max_length)
|
||||
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
|
||||
per_token_weights = [1.0] + per_token_weights + [1.0]
|
||||
pad_length = self.max_length - len(all_token_ids)
|
||||
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
|
||||
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
|
||||
per_token_weights += [1.0] * pad_length
|
||||
|
||||
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long).to(self.device)
|
||||
|
@ -3,8 +3,9 @@ import math
|
||||
import torch
|
||||
from transformers import CLIPTokenizer, CLIPTextModel
|
||||
|
||||
from ldm.modules.textual_inversion_manager import TextualInversionManager
|
||||
from ldm.invoke.devices import torch_dtype
|
||||
from ldm.modules.textual_inversion_manager import TextualInversionManager
|
||||
|
||||
|
||||
class WeightedPromptFragmentsToEmbeddingsConverter():
|
||||
|
||||
@ -22,8 +23,8 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
|
||||
return self.tokenizer.model_max_length
|
||||
|
||||
def get_embeddings_for_weighted_prompt_fragments(self,
|
||||
text: list[str],
|
||||
fragment_weights: list[float],
|
||||
text: list[list[str]],
|
||||
fragment_weights: list[list[float]],
|
||||
should_return_tokens: bool = False,
|
||||
device='cpu'
|
||||
) -> torch.Tensor:
|
||||
@ -198,12 +199,12 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
|
||||
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
|
||||
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
|
||||
|
||||
# pad out to a self.max_length-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
|
||||
# pad out to a self.max_length-entry array: [bos_token, <prompt tokens>, eos_token, pad_token…]
|
||||
# (typically self.max_length == 77)
|
||||
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
|
||||
per_token_weights = [1.0] + per_token_weights + [1.0]
|
||||
pad_length = self.max_length - len(all_token_ids)
|
||||
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
|
||||
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
|
||||
per_token_weights += [1.0] * pad_length
|
||||
|
||||
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device)
|
||||
|
Loading…
Reference in New Issue
Block a user