Add Embedding Parsing (#1973)

* Add Embedding Parsing * Add Embedding Parsing * Return token_dim in embedding_info * fixes to handle other variants 1. Handle the case of a .bin file being mislabeled .pt (seen in the wild at https://cyberes.github.io/stable-diffusion-textual-inversion-models/) 2. Handle the "broken" .pt files reported by https://github.com/invoke-ai/InvokeAI/issues/1829 3. When token name is not available, use the basename of the pt or bin file rather than the whole path. fixes #1829 * remove whitespace Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com>
2024-08-30 20:32:17 +00:00 · 2022-12-16 11:26:36 +13:00 · 2022-12-16 11:26:36 +13:00 · 69cc0993f8
commit 69cc0993f8
parent 1050f2726a
1 changed files with 99 additions and 23 deletions
--- a/ldm/modules/embedding_manager.py
+++ b/ldm/modules/embedding_manager.py
@ -256,32 +256,22 @@ class EmbeddingManager(nn.Module):
        return [x for x in expanded_paths if os.path.splitext(x)[1] in ('.pt','.bin')]
    def _load(self, ckpt_path, full=True):
-
+        try:
-        scan_result = scan_file_path(ckpt_path)
+            scan_result = scan_file_path(ckpt_path)
-        if scan_result.infected_files == 1:
+            if scan_result.infected_files == 1:
-            print(f'\n### Security Issues Found in Model: {scan_result.issues_count}')
+                print(f'\n### Security Issues Found in Model: {scan_result.issues_count}')
-            print('### For your safety, InvokeAI will not load this embed.')
+                print('### For your safety, InvokeAI will not load this embed.')
                return
        except Exception:
            print(f"### WARNING::: Invalid or corrupt embeddings found. Ignoring: {ckpt_path}")
            return
-        ckpt = torch.load(ckpt_path, map_location='cpu')
+        embedding_info = self.parse_embedding(ckpt_path)
-
+        if embedding_info:
-        # Handle .pt textual inversion files
+            self.max_vectors_per_token = embedding_info['num_vectors_per_token']
-        if 'string_to_token' in ckpt and 'string_to_param' in ckpt:
+            self.add_embedding(embedding_info['name'], embedding_info['embedding'], full)
            filename = os.path.basename(ckpt_path)
            token_str = '.'.join(filename.split('.')[:-1]) # filename excluding extension
            if len(ckpt["string_to_token"]) > 1:
                print(f">> {ckpt_path} has >1 embedding, only the first will be used")
            string_to_param_dict = ckpt['string_to_param']
            embedding = list(string_to_param_dict.values())[0]
            self.add_embedding(token_str, embedding, full)
        # Handle .bin textual inversion files from Huggingface Concepts
        # https://huggingface.co/sd-concepts-library
        else:
-            for token_str in list(ckpt.keys()):
+            print(f'>> Failed to load embedding located at {ckpt_path}. Unsupported file.')
                embedding = ckpt[token_str]
                self.add_embedding(token_str, embedding, full)
    def add_embedding(self, token_str, embedding, full):
        if token_str in self.string_to_param_dict:
@ -302,6 +292,92 @@ class EmbeddingManager(nn.Module):
        self.string_to_token_dict[token_str] = token
        self.string_to_param_dict[token_str] = torch.nn.Parameter(embedding)
    def parse_embedding(self, embedding_file: str):
        file_type = embedding_file.split('.')[-1]
        if file_type == 'pt':
            return self.parse_embedding_pt(embedding_file)
        elif file_type == 'bin':
            return self.parse_embedding_bin(embedding_file)
        else:
            print(f'>> Not a recognized embedding file: {embedding_file}')
    def parse_embedding_pt(self, embedding_file):
        embedding_ckpt = torch.load(embedding_file, map_location='cpu')
        embedding_info = {}
        # Check if valid embedding file
        if 'string_to_token' and 'string_to_param' in embedding_ckpt:
            # Catch variants that do not have the expected keys or values.
            try:
                embedding_info['name'] = embedding_ckpt['name'] or os.path.basename(os.path.splitext(embedding_file)[0])
                # Check num of embeddings and warn user only the first will be used
                embedding_info['num_of_embeddings'] = len(embedding_ckpt["string_to_token"])
                if embedding_info['num_of_embeddings'] > 1:
                    print('>> More than 1 embedding found. Will use the first one')
                embedding = list(embedding_ckpt['string_to_param'].values())[0]
            except (AttributeError,KeyError): 
                return self.handle_broken_pt_variants(embedding_ckpt, embedding_file)
            embedding_info['embedding'] = embedding
            embedding_info['num_vectors_per_token'] = embedding.size()[0]
            embedding_info['token_dim'] = embedding.size()[1]
            try:
                embedding_info['trained_steps'] = embedding_ckpt['step']
                embedding_info['trained_model_name'] = embedding_ckpt['sd_checkpoint_name']
                embedding_info['trained_model_checksum'] = embedding_ckpt['sd_checkpoint']
            except AttributeError:
                print(">> No Training Details Found. Passing ...")
        # .pt files found at https://cyberes.github.io/stable-diffusion-textual-inversion-models/
        # They are actually .bin files
        elif len(embedding_ckpt.keys())==1:
            print('>> Detected .bin file masquerading as .pt file')
            embedding_info = self.parse_embedding_bin(embedding_file)
        else:
            print('>> Invalid embedding format')
            embedding_info = None
        return embedding_info
    def parse_embedding_bin(self, embedding_file):
        embedding_ckpt = torch.load(embedding_file, map_location='cpu')
        embedding_info = {}
        if list(embedding_ckpt.keys()) == 0:
            print(">> Invalid concepts file")
            embedding_info = None
        else:
            for token in list(embedding_ckpt.keys()):
                embedding_info['name'] = token or os.path.basename(os.path.splitext(embedding_file)[0])
                embedding_info['embedding'] = embedding_ckpt[token]
                embedding_info['num_vectors_per_token'] = 1 # All Concepts seem to default to 1
                embedding_info['token_dim'] = embedding_info['embedding'].size()[0]
        return embedding_info
    def handle_broken_pt_variants(self, embedding_ckpt:dict, embedding_file:str)->dict:
        '''
        This handles the broken .pt file variants. We only know of one at present.
        '''
        embedding_info = {}
        if isinstance(list(embedding_ckpt['string_to_token'].values())[0],torch.Tensor):
            print('>> Detected .pt file variant 1') # example at https://github.com/invoke-ai/InvokeAI/issues/1829
            for token in list(embedding_ckpt['string_to_token'].keys()):
                embedding_info['name'] = token if token != '*' else os.path.basename(os.path.splitext(embedding_file)[0])
                embedding_info['embedding'] = embedding_ckpt['string_to_param'].state_dict()[token]
                embedding_info['num_vectors_per_token'] = embedding_info['embedding'].shape[0]
                embedding_info['token_dim'] = embedding_info['embedding'].size()[0]
        else:
            print('>> Invalid embedding format')
            embedding_info = None
        return embedding_info
    def has_embedding_for_token(self, token_str):
        return token_str in self.string_to_token_dict