fix: use pad_token for padding (#2381)

Stable Diffusion 2 does not use eos_token for padding.

Fixes #2378
This commit is contained in:
Kevin Turner 2023-01-21 13:30:03 -08:00 committed by GitHub
commit 89791d91e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 15 deletions

View File

@ -1,18 +1,16 @@
import math
import os.path
from functools import partial
from typing import Optional
import clip
import kornia
import torch
import torch.nn as nn
from functools import partial
import clip
from einops import rearrange, repeat
from einops import repeat
from transformers import CLIPTokenizer, CLIPTextModel
import kornia
from ldm.invoke.devices import choose_torch_device
from ldm.invoke.globals import Globals, global_cache_dir
#from ldm.modules.textual_inversion_manager import TextualInversionManager
from ldm.invoke.devices import choose_torch_device
from ldm.invoke.globals import global_cache_dir
from ldm.modules.x_transformer import (
Encoder,
TransformerWrapper,
@ -664,12 +662,12 @@ class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder):
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
# pad out to a 77-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
# pad out to a 77-entry array: [bos_token, <prompt tokens>, eos_token, pad_token…]
# (77 = self.max_length)
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
per_token_weights = [1.0] + per_token_weights + [1.0]
pad_length = self.max_length - len(all_token_ids)
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
per_token_weights += [1.0] * pad_length
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long).to(self.device)

View File

@ -3,8 +3,9 @@ import math
import torch
from transformers import CLIPTokenizer, CLIPTextModel
from ldm.modules.textual_inversion_manager import TextualInversionManager
from ldm.invoke.devices import torch_dtype
from ldm.modules.textual_inversion_manager import TextualInversionManager
class WeightedPromptFragmentsToEmbeddingsConverter():
@ -22,8 +23,8 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
return self.tokenizer.model_max_length
def get_embeddings_for_weighted_prompt_fragments(self,
text: list[str],
fragment_weights: list[float],
text: list[list[str]],
fragment_weights: list[list[float]],
should_return_tokens: bool = False,
device='cpu'
) -> torch.Tensor:
@ -198,12 +199,12 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
# pad out to a self.max_length-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
# pad out to a self.max_length-entry array: [bos_token, <prompt tokens>, eos_token, pad_token…]
# (typically self.max_length == 77)
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
per_token_weights = [1.0] + per_token_weights + [1.0]
pad_length = self.max_length - len(all_token_ids)
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
per_token_weights += [1.0] * pad_length
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device)