Add OMI vendor files

This commit is contained in:
Billy
2025-06-26 17:25:06 +10:00
committed by psychedelicious
parent 17dead3309
commit fe83c2f81f
10 changed files with 472 additions and 0 deletions

View File

View File

@ -0,0 +1,17 @@
from invokeai.backend.model_manager.omi.vendor.convert.lora.convert_lora_util import LoraConversionKeySet, map_prefix_range
def map_clip(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("text_projection", "text_projection", parent=key_prefix)]
for k in map_prefix_range("text_model.encoder.layers", "text_model.encoder.layers", parent=key_prefix):
keys += [LoraConversionKeySet("mlp.fc1", "mlp.fc1", parent=k)]
keys += [LoraConversionKeySet("mlp.fc2", "mlp.fc2", parent=k)]
keys += [LoraConversionKeySet("self_attn.k_proj", "self_attn.k_proj", parent=k)]
keys += [LoraConversionKeySet("self_attn.out_proj", "self_attn.out_proj", parent=k)]
keys += [LoraConversionKeySet("self_attn.q_proj", "self_attn.q_proj", parent=k)]
keys += [LoraConversionKeySet("self_attn.v_proj", "self_attn.v_proj", parent=k)]
return keys

View File

@ -0,0 +1,75 @@
from invokeai.backend.model_manager.omi.vendor.convert.lora.convert_clip import map_clip
from invokeai.backend.model_manager.omi.vendor.convert.lora.convert_lora_util import LoraConversionKeySet, map_prefix_range
from invokeai.backend.model_manager.omi.vendor.convert.lora.convert_t5 import map_t5
def __map_double_transformer_block(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("img_attn.qkv.0", "attn.to_q", parent=key_prefix)]
keys += [LoraConversionKeySet("img_attn.qkv.1", "attn.to_k", parent=key_prefix)]
keys += [LoraConversionKeySet("img_attn.qkv.2", "attn.to_v", parent=key_prefix)]
keys += [LoraConversionKeySet("txt_attn.qkv.0", "attn.add_q_proj", parent=key_prefix)]
keys += [LoraConversionKeySet("txt_attn.qkv.1", "attn.add_k_proj", parent=key_prefix)]
keys += [LoraConversionKeySet("txt_attn.qkv.2", "attn.add_v_proj", parent=key_prefix)]
keys += [LoraConversionKeySet("img_attn.proj", "attn.to_out.0", parent=key_prefix)]
keys += [LoraConversionKeySet("img_mlp.0", "ff.net.0.proj", parent=key_prefix)]
keys += [LoraConversionKeySet("img_mlp.2", "ff.net.2", parent=key_prefix)]
keys += [LoraConversionKeySet("img_mod.lin", "norm1.linear", parent=key_prefix)]
keys += [LoraConversionKeySet("txt_attn.proj", "attn.to_add_out", parent=key_prefix)]
keys += [LoraConversionKeySet("txt_mlp.0", "ff_context.net.0.proj", parent=key_prefix)]
keys += [LoraConversionKeySet("txt_mlp.2", "ff_context.net.2", parent=key_prefix)]
keys += [LoraConversionKeySet("txt_mod.lin", "norm1_context.linear", parent=key_prefix)]
return keys
def __map_single_transformer_block(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("linear1.0", "attn.to_q", parent=key_prefix)]
keys += [LoraConversionKeySet("linear1.1", "attn.to_k", parent=key_prefix)]
keys += [LoraConversionKeySet("linear1.2", "attn.to_v", parent=key_prefix)]
keys += [LoraConversionKeySet("linear1.3", "proj_mlp", parent=key_prefix)]
keys += [LoraConversionKeySet("linear2", "proj_out", parent=key_prefix)]
keys += [LoraConversionKeySet("modulation.lin", "norm.linear", parent=key_prefix)]
return keys
def __map_transformer(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("txt_in", "context_embedder", parent=key_prefix)]
keys += [LoraConversionKeySet("final_layer.adaLN_modulation.1", "norm_out.linear", parent=key_prefix, swap_chunks=True)]
keys += [LoraConversionKeySet("final_layer.linear", "proj_out", parent=key_prefix)]
keys += [LoraConversionKeySet("guidance_in.in_layer", "time_text_embed.guidance_embedder.linear_1", parent=key_prefix)]
keys += [LoraConversionKeySet("guidance_in.out_layer", "time_text_embed.guidance_embedder.linear_2", parent=key_prefix)]
keys += [LoraConversionKeySet("vector_in.in_layer", "time_text_embed.text_embedder.linear_1", parent=key_prefix)]
keys += [LoraConversionKeySet("vector_in.out_layer", "time_text_embed.text_embedder.linear_2", parent=key_prefix)]
keys += [LoraConversionKeySet("time_in.in_layer", "time_text_embed.timestep_embedder.linear_1", parent=key_prefix)]
keys += [LoraConversionKeySet("time_in.out_layer", "time_text_embed.timestep_embedder.linear_2", parent=key_prefix)]
keys += [LoraConversionKeySet("img_in.proj", "x_embedder", parent=key_prefix)]
for k in map_prefix_range("double_blocks", "transformer_blocks", parent=key_prefix):
keys += __map_double_transformer_block(k)
for k in map_prefix_range("single_blocks", "single_transformer_blocks", parent=key_prefix):
keys += __map_single_transformer_block(k)
return keys
def convert_flux_lora_key_sets() -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("bundle_emb", "bundle_emb")]
keys += __map_transformer(LoraConversionKeySet("transformer", "lora_transformer"))
keys += map_clip(LoraConversionKeySet("clip_l", "lora_te1"))
keys += map_t5(LoraConversionKeySet("t5", "lora_te2"))
return keys

View File

@ -0,0 +1,211 @@
import torch
from torch import Tensor
from typing_extensions import Self
class LoraConversionKeySet:
def __init__(
self,
omi_prefix: str,
diffusers_prefix: str,
legacy_diffusers_prefix: str | None = None,
parent: Self | None = None,
swap_chunks: bool = False,
filter_is_last: bool | None = None,
next_omi_prefix: str | None = None,
next_diffusers_prefix: str | None = None,
):
if parent is not None:
self.omi_prefix = combine(parent.omi_prefix, omi_prefix)
self.diffusers_prefix = combine(parent.diffusers_prefix, diffusers_prefix)
else:
self.omi_prefix = omi_prefix
self.diffusers_prefix = diffusers_prefix
if legacy_diffusers_prefix is None:
self.legacy_diffusers_prefix = self.diffusers_prefix.replace('.', '_')
elif parent is not None:
self.legacy_diffusers_prefix = combine(parent.legacy_diffusers_prefix, legacy_diffusers_prefix).replace('.', '_')
else:
self.legacy_diffusers_prefix = legacy_diffusers_prefix
self.parent = parent
self.swap_chunks = swap_chunks
self.filter_is_last = filter_is_last
self.prefix = parent
if next_omi_prefix is None and parent is not None:
self.next_omi_prefix = parent.next_omi_prefix
self.next_diffusers_prefix = parent.next_diffusers_prefix
self.next_legacy_diffusers_prefix = parent.next_legacy_diffusers_prefix
elif next_omi_prefix is not None and parent is not None:
self.next_omi_prefix = combine(parent.omi_prefix, next_omi_prefix)
self.next_diffusers_prefix = combine(parent.diffusers_prefix, next_diffusers_prefix)
self.next_legacy_diffusers_prefix = combine(parent.legacy_diffusers_prefix, next_diffusers_prefix).replace('.', '_')
elif next_omi_prefix is not None and parent is None:
self.next_omi_prefix = next_omi_prefix
self.next_diffusers_prefix = next_diffusers_prefix
self.next_legacy_diffusers_prefix = next_diffusers_prefix.replace('.', '_')
else:
self.next_omi_prefix = None
self.next_diffusers_prefix = None
self.next_legacy_diffusers_prefix = None
def __get_omi(self, in_prefix: str, key: str) -> str:
return self.omi_prefix + key.removeprefix(in_prefix)
def __get_diffusers(self, in_prefix: str, key: str) -> str:
return self.diffusers_prefix + key.removeprefix(in_prefix)
def __get_legacy_diffusers(self, in_prefix: str, key: str) -> str:
key = self.legacy_diffusers_prefix + key.removeprefix(in_prefix)
suffix = key[key.rfind('.'):]
if suffix not in ['.alpha', '.dora_scale']: # some keys only have a single . in the suffix
suffix = key[key.removesuffix(suffix).rfind('.'):]
key = key.removesuffix(suffix)
return key.replace('.', '_') + suffix
def get_key(self, in_prefix: str, key: str, target: str) -> str:
if target == 'omi':
return self.__get_omi(in_prefix, key)
elif target == 'diffusers':
return self.__get_diffusers(in_prefix, key)
elif target == 'legacy_diffusers':
return self.__get_legacy_diffusers(in_prefix, key)
return key
def __str__(self) -> str:
return f"omi: {self.omi_prefix}, diffusers: {self.diffusers_prefix}, legacy: {self.legacy_diffusers_prefix}"
def combine(left: str, right: str) -> str:
left = left.rstrip('.')
right = right.lstrip('.')
if left == "" or left is None:
return right
elif right == "" or right is None:
return left
else:
return left + "." + right
def map_prefix_range(
omi_prefix: str,
diffusers_prefix: str,
parent: LoraConversionKeySet,
) -> list[LoraConversionKeySet]:
# 100 should be a safe upper bound. increase if it's not enough in the future
return [LoraConversionKeySet(
omi_prefix=f"{omi_prefix}.{i}",
diffusers_prefix=f"{diffusers_prefix}.{i}",
parent=parent,
next_omi_prefix=f"{omi_prefix}.{i + 1}",
next_diffusers_prefix=f"{diffusers_prefix}.{i + 1}",
) for i in range(100)]
def __convert(
state_dict: dict[str, Tensor],
key_sets: list[LoraConversionKeySet],
source: str,
target: str,
) -> dict[str, Tensor]:
out_states = {}
if source == target:
return dict(state_dict)
# TODO: maybe replace with a non O(n^2) algorithm
for key, tensor in state_dict.items():
for key_set in key_sets:
in_prefix = ''
if source == 'omi':
in_prefix = key_set.omi_prefix
elif source == 'diffusers':
in_prefix = key_set.diffusers_prefix
elif source == 'legacy_diffusers':
in_prefix = key_set.legacy_diffusers_prefix
if not key.startswith(in_prefix):
continue
if key_set.filter_is_last is not None:
next_prefix = None
if source == 'omi':
next_prefix = key_set.next_omi_prefix
elif source == 'diffusers':
next_prefix = key_set.next_diffusers_prefix
elif source == 'legacy_diffusers':
next_prefix = key_set.next_legacy_diffusers_prefix
is_last = not any(k.startswith(next_prefix) for k in state_dict)
if key_set.filter_is_last != is_last:
continue
name = key_set.get_key(in_prefix, key, target)
can_swap_chunks = target == 'omi' or source == 'omi'
if key_set.swap_chunks and name.endswith('.lora_up.weight') and can_swap_chunks:
chunk_0, chunk_1 = tensor.chunk(2, dim=0)
tensor = torch.cat([chunk_1, chunk_0], dim=0)
out_states[name] = tensor
break # only map the first matching key set
return out_states
def __detect_source(
state_dict: dict[str, Tensor],
key_sets: list[LoraConversionKeySet],
) -> str:
omi_count = 0
diffusers_count = 0
legacy_diffusers_count = 0
for key in state_dict:
for key_set in key_sets:
if key.startswith(key_set.omi_prefix):
omi_count += 1
if key.startswith(key_set.diffusers_prefix):
diffusers_count += 1
if key.startswith(key_set.legacy_diffusers_prefix):
legacy_diffusers_count += 1
if omi_count > diffusers_count and omi_count > legacy_diffusers_count:
return 'omi'
if diffusers_count > omi_count and diffusers_count > legacy_diffusers_count:
return 'diffusers'
if legacy_diffusers_count > omi_count and legacy_diffusers_count > diffusers_count:
return 'legacy_diffusers'
return ''
def convert_to_omi(
state_dict: dict[str, Tensor],
key_sets: list[LoraConversionKeySet],
) -> dict[str, Tensor]:
source = __detect_source(state_dict, key_sets)
return __convert(state_dict, key_sets, source, 'omi')
def convert_to_diffusers(
state_dict: dict[str, Tensor],
key_sets: list[LoraConversionKeySet],
) -> dict[str, Tensor]:
source = __detect_source(state_dict, key_sets)
return __convert(state_dict, key_sets, source, 'diffusers')
def convert_to_legacy_diffusers(
state_dict: dict[str, Tensor],
key_sets: list[LoraConversionKeySet],
) -> dict[str, Tensor]:
source = __detect_source(state_dict, key_sets)
return __convert(state_dict, key_sets, source, 'legacy_diffusers')

View File

@ -0,0 +1,122 @@
from invokeai.backend.model_manager.omi.vendor.convert.lora.convert_clip import map_clip
from invokeai.backend.model_manager.omi.vendor.convert.lora.convert_lora_util import LoraConversionKeySet, map_prefix_range
def __map_unet_resnet_block(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("emb_layers.1", "time_emb_proj", parent=key_prefix)]
keys += [LoraConversionKeySet("in_layers.2", "conv1", parent=key_prefix)]
keys += [LoraConversionKeySet("out_layers.3", "conv2", parent=key_prefix)]
keys += [LoraConversionKeySet("skip_connection", "conv_shortcut", parent=key_prefix)]
return keys
def __map_unet_attention_block(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("proj_in", "proj_in", parent=key_prefix)]
keys += [LoraConversionKeySet("proj_out", "proj_out", parent=key_prefix)]
for k in map_prefix_range("transformer_blocks", "transformer_blocks", parent=key_prefix):
keys += [LoraConversionKeySet("attn1.to_q", "attn1.to_q", parent=k)]
keys += [LoraConversionKeySet("attn1.to_k", "attn1.to_k", parent=k)]
keys += [LoraConversionKeySet("attn1.to_v", "attn1.to_v", parent=k)]
keys += [LoraConversionKeySet("attn1.to_out.0", "attn1.to_out.0", parent=k)]
keys += [LoraConversionKeySet("attn2.to_q", "attn2.to_q", parent=k)]
keys += [LoraConversionKeySet("attn2.to_k", "attn2.to_k", parent=k)]
keys += [LoraConversionKeySet("attn2.to_v", "attn2.to_v", parent=k)]
keys += [LoraConversionKeySet("attn2.to_out.0", "attn2.to_out.0", parent=k)]
keys += [LoraConversionKeySet("ff.net.0.proj", "ff.net.0.proj", parent=k)]
keys += [LoraConversionKeySet("ff.net.2", "ff.net.2", parent=k)]
return keys
def __map_unet_down_blocks(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += __map_unet_resnet_block(LoraConversionKeySet("1.0", "0.resnets.0", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("2.0", "0.resnets.1", parent=key_prefix))
keys += [LoraConversionKeySet("3.0.op", "0.downsamplers.0.conv", parent=key_prefix)]
keys += __map_unet_resnet_block(LoraConversionKeySet("4.0", "1.resnets.0", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("4.1", "1.attentions.0", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("5.0", "1.resnets.1", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("5.1", "1.attentions.1", parent=key_prefix))
keys += [LoraConversionKeySet("6.0.op", "1.downsamplers.0.conv", parent=key_prefix)]
keys += __map_unet_resnet_block(LoraConversionKeySet("7.0", "2.resnets.0", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("7.1", "2.attentions.0", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("8.0", "2.resnets.1", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("8.1", "2.attentions.1", parent=key_prefix))
return keys
def __map_unet_mid_block(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += __map_unet_resnet_block(LoraConversionKeySet("0", "resnets.0", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("1", "attentions.0", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("2", "resnets.1", parent=key_prefix))
return keys
def __map_unet_up_block(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += __map_unet_resnet_block(LoraConversionKeySet("0.0", "0.resnets.0", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("0.1", "0.attentions.0", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("1.0", "0.resnets.1", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("1.1", "0.attentions.1", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("2.0", "0.resnets.2", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("2.1", "0.attentions.2", parent=key_prefix))
keys += [LoraConversionKeySet("2.2.conv", "0.upsamplers.0.conv", parent=key_prefix)]
keys += __map_unet_resnet_block(LoraConversionKeySet("3.0", "1.resnets.0", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("3.1", "1.attentions.0", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("4.0", "1.resnets.1", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("4.1", "1.attentions.1", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("5.0", "1.resnets.2", parent=key_prefix))
keys += __map_unet_attention_block(LoraConversionKeySet("5.1", "1.attentions.2", parent=key_prefix))
keys += [LoraConversionKeySet("5.2.conv", "1.upsamplers.0.conv", parent=key_prefix)]
keys += __map_unet_resnet_block(LoraConversionKeySet("6.0", "2.resnets.0", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("7.0", "2.resnets.1", parent=key_prefix))
keys += __map_unet_resnet_block(LoraConversionKeySet("8.0", "2.resnets.2", parent=key_prefix))
return keys
def __map_unet(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("input_blocks.0.0", "conv_in", parent=key_prefix)]
keys += [LoraConversionKeySet("time_embed.0", "time_embedding.linear_1", parent=key_prefix)]
keys += [LoraConversionKeySet("time_embed.2", "time_embedding.linear_2", parent=key_prefix)]
keys += [LoraConversionKeySet("label_emb.0.0", "add_embedding.linear_1", parent=key_prefix)]
keys += [LoraConversionKeySet("label_emb.0.2", "add_embedding.linear_2", parent=key_prefix)]
keys += __map_unet_down_blocks(LoraConversionKeySet("input_blocks", "down_blocks", parent=key_prefix))
keys += __map_unet_mid_block(LoraConversionKeySet("middle_block", "mid_block", parent=key_prefix))
keys += __map_unet_up_block(LoraConversionKeySet("output_blocks", "up_blocks", parent=key_prefix))
keys += [LoraConversionKeySet("out.0", "conv_norm_out", parent=key_prefix)]
keys += [LoraConversionKeySet("out.2", "conv_out", parent=key_prefix)]
return keys
def convert_sdxl_lora_key_sets() -> list[LoraConversionKeySet]:
keys = []
keys += [LoraConversionKeySet("bundle_emb", "bundle_emb")]
keys += __map_unet(LoraConversionKeySet( "unet", "lora_unet"))
keys += map_clip(LoraConversionKeySet("clip_l", "lora_te1"))
keys += map_clip(LoraConversionKeySet("clip_g", "lora_te2"))
return keys

View File

@ -0,0 +1,16 @@
from invokeai.backend.model_manager.omi.vendor.convert.lora.convert_lora_util import LoraConversionKeySet, map_prefix_range
def map_t5(key_prefix: LoraConversionKeySet) -> list[LoraConversionKeySet]:
keys = []
for k in map_prefix_range("encoder.block", "encoder.block", parent=key_prefix):
keys += [LoraConversionKeySet("layer.0.SelfAttention.k", "layer.0.SelfAttention.k", parent=k)]
keys += [LoraConversionKeySet("layer.0.SelfAttention.o", "layer.0.SelfAttention.o", parent=k)]
keys += [LoraConversionKeySet("layer.0.SelfAttention.q", "layer.0.SelfAttention.q", parent=k)]
keys += [LoraConversionKeySet("layer.0.SelfAttention.v", "layer.0.SelfAttention.v", parent=k)]
keys += [LoraConversionKeySet("layer.1.DenseReluDense.wi_0", "layer.1.DenseReluDense.wi_0", parent=k)]
keys += [LoraConversionKeySet("layer.1.DenseReluDense.wi_1", "layer.1.DenseReluDense.wi_1", parent=k)]
keys += [LoraConversionKeySet("layer.1.DenseReluDense.wo", "layer.1.DenseReluDense.wo", parent=k)]
return keys

View File

@ -0,0 +1,31 @@
stable_diffusion_1_lora = 'stable-diffusion-v1/lora'
stable_diffusion_1_inpainting_lora = 'stable-diffusion-v1-inpainting/lora'
stable_diffusion_2_512_lora = 'stable-diffusion-v2-512/lora'
stable_diffusion_2_768_v_lora = 'stable-diffusion-v2-768-v/lora'
stable_diffusion_2_depth_lora = 'stable-diffusion-v2-depth/lora'
stable_diffusion_2_inpainting_lora = 'stable-diffusion-v2-inpainting/lora'
stable_diffusion_3_medium_lora = 'stable-diffusion-v3-medium/lora'
stable_diffusion_35_medium_lora = 'stable-diffusion-v3.5-medium/lora'
stable_diffusion_35_large_lora = 'stable-diffusion-v3.5-large/lora'
stable_diffusion_xl_1_lora = 'stable-diffusion-xl-v1-base/lora'
stable_diffusion_xl_1_inpainting_lora = 'stable-diffusion-xl-v1-base-inpainting/lora'
wuerstchen_2_lora = 'wuerstchen-v2-prior/lora'
stable_cascade_1_stage_a_lora = 'stable-cascade-v1-stage-a/lora'
stable_cascade_1_stage_b_lora = 'stable-cascade-v1-stage-b/lora'
stable_cascade_1_stage_c_lora = 'stable-cascade-v1-stage-c/lora'
pixart_alpha_lora = 'pixart-alpha/lora'
pixart_sigma_lora = 'pixart-sigma/lora'
flux_dev_1_lora = 'Flux.1-dev/lora'
flux_fill_dev_1_lora = 'Flux.1-fill-dev/lora'
sana_lora = 'sana/lora'
hunyuan_video_lora = 'hunyuan-video/lora'
hi_dream_i1_lora = 'hidream-i1/lora'