diff --git a/invokeai/backend/model_manager/__init__.py b/invokeai/backend/model_manager/__init__.py index 1ec9e385de..09789ff254 100644 --- a/invokeai/backend/model_manager/__init__.py +++ b/invokeai/backend/model_manager/__init__.py @@ -1,14 +1,16 @@ """ Initialization file for invokeai.backend.model_manager.config """ -from invokeai.backend.model_manager.config import ( # noqa F401 - ModelConfigFactory, - ModelConfigBase, - InvalidModelConfigException, +from ..model_management.models.base import read_checkpoint_meta # noqa F401 +from .config import ( # noqa F401 BaseModelType, - ModelType, - SubModelType, - ModelVariantType, + InvalidModelConfigException, + ModelConfigBase, + ModelConfigFactory, ModelFormat, + ModelType, + ModelVariantType, SchedulerPredictionType, + SubModelType, ) +from .model_install import ModelInstall # noqa F401 diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py index bffc523364..8283e806b9 100644 --- a/invokeai/backend/model_manager/config.py +++ b/invokeai/backend/model_manager/config.py @@ -50,7 +50,6 @@ class ModelType(str, Enum): ControlNet = "controlnet" # used by model_probe TextualInversion = "embedding" - class SubModelType(str, Enum): """Submodel type.""" diff --git a/invokeai/backend/model_manager/model_install.py b/invokeai/backend/model_manager/model_install.py new file mode 100644 index 0000000000..c72265fc88 --- /dev/null +++ b/invokeai/backend/model_manager/model_install.py @@ -0,0 +1,106 @@ +# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Development Team +""" +Install/delete models. + +Typical usage: + + from invokeai.app.services.config import InvokeAIAppConfig + from invokeai.backend.model_manager import ModelInstall + from invokeai.backend.model_manager.storage import ModelConfigStoreSQL + + config = InvokeAIAppConfig.get_config() + store = ModelConfigStoreSQL(config.db_path) + installer = ModelInstall(store=store, config=config) + + # register config, don't move path + id: str = installer.register_model('/path/to/model') + + # register config, and install model in `models` + id: str = installer.install_model('/path/to/model') + + # unregister, don't delete + installer.forget(id) + + # unregister and delete model from disk + installer.delete_model(id) + + # scan directory recursively and install all new models found + ids: List[str] = installer.scan_directory('/path/to/directory') + + # unregister any model whose path is no longer valid + ids: List[str] = installer.garbage_collect() + + hash: str = installer.hash('/path/to/model') # should be same as id above + +The following exceptions may be raised: + DuplicateModelException + UnknownModelTypeException +""" +from pathlib import Path +from typing import Optional, List +from invokeai.app.services.config import InvokeAIAppConfig +from invokeai.backend.util.logging import InvokeAILogger +from .storage import ModelConfigStore + + +class ModelInstall(object): + """Model installer class handles installation from a local path.""" + + _config: InvokeAIAppConfig + _logger: InvokeAILogger + _store: ModelConfigStore + + def __init__(self, + store: Optional[ModelConfigStore] = None, + config: Optional[InvokeAIAppConfig] = None, + logger: Optional[InvokeAILogger] = None + ): + """ + Create ModelInstall object. + + :param store: Optional ModelConfigStore. If None passed, + defaults to `configs/models.yaml`. + :param config: Optional InvokeAIAppConfig. If None passed, + uses the system-wide default app config. + :param logger: Optional InvokeAILogger. If None passed, + uses the system-wide default logger. + """ + self._config = config or InvokeAIAppConfig.get_config() + self._logger = logger or InvokeAILogger.getLogger() + if store is None: + from .storage import ModelConfigStoreYAML + store = ModelConfigStoreYAML(config.model_conf_path) + self._store = store + + + def register(self, model_path: Path) -> str: + """Probe and register the model at model_path.""" + pass + + def install(self, model_path: Path) -> str: + """Probe, register and Install the model in the models directory.""" + pass + + def forget(self, id: str) -> str: + """Unregister the model identified by id.""" + pass + + def delete(self, id: str) -> str: + """ + Unregister and delete the model identified by id. + Note that this deletes the model unconditionally. + """ + pass + + def scan_directory(self, scan_dir: Path, install: bool=False) -> List[str]: + """Scan directory for new models and register or install them.""" + pass + + def garbage_collect(self): + """Unregister any models whose paths are no longer valid.""" + pass + + def hash(self, model_path: Path) -> str: + """Compute the fast hash of the model.""" + pass + diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py new file mode 100644 index 0000000000..27acfac0ee --- /dev/null +++ b/invokeai/backend/model_manager/probe.py @@ -0,0 +1,55 @@ +# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team +""" +Module for probing a Stable Diffusion model and returning +its base type, model type, format and variant. +""" + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, Callable + +import torch +import safetensors.torch + +from invokeai.backend.model_management.models.base import ( + read_checkpoint_meta +) +import invokeai.configs.model_probe_templates as templates + +from .config import ( + ModelType, + BaseModelType, + ModelVariantType, + ModelFormat, + SchedulerPredictionType +) + + +@dataclass +class ModelProbeInfo(object): + model_type: ModelType + base_type: BaseModelType + variant_type: ModelVariantType + prediction_type: SchedulerPredictionType + format: ModelFormat + +class ModelProbe(object): + """ + Class to probe a checkpoint, safetensors or diffusers folder. + """ + + def __init__(self): + pass + + @classmethod + def heuristic_probe( + cls, + model: Path, + prediction_type_helper: Optional[Callable[[Path], SchedulerPredictionType]] = None, + ) -> ModelProbeInfo: + """ + Probe model located at path and return ModelProbeInfo object. + A Callable may be passed to return the SchedulerPredictionType. + """ + pass diff --git a/invokeai/configs/model_probe_templates/checkpoints/sd-1/main-inpaint.json b/invokeai/configs/model_probe_templates/checkpoints/sd-1/main-inpaint.json new file mode 100644 index 0000000000..d1205c780a --- /dev/null +++ b/invokeai/configs/model_probe_templates/checkpoints/sd-1/main-inpaint.json @@ -0,0 +1 @@ +{"base_type": "sd-1", "model_type": "main", "variant": "inpaint", "template": {"betas": [1000], "alphas_cumprod": [1000], "alphas_cumprod_prev": [1000], "sqrt_alphas_cumprod": [1000], "sqrt_one_minus_alphas_cumprod": [1000], "log_one_minus_alphas_cumprod": [1000], "sqrt_recip_alphas_cumprod": [1000], "sqrt_recipm1_alphas_cumprod": [1000], "posterior_variance": [1000], "posterior_log_variance_clipped": [1000], "posterior_mean_coef1": [1000], "posterior_mean_coef2": [1000], "model.diffusion_model.time_embed.0.weight": [1280, 320], "model.diffusion_model.time_embed.0.bias": [1280], "model.diffusion_model.time_embed.2.weight": [1280, 1280], "model.diffusion_model.time_embed.2.bias": [1280], "model.diffusion_model.input_blocks.0.0.weight": [320, 9, 3, 3], "model.diffusion_model.input_blocks.0.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.1.1.norm.weight": [320], "model.diffusion_model.input_blocks.1.1.norm.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.1.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.1.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.2.1.norm.weight": [320], "model.diffusion_model.input_blocks.2.1.norm.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.2.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.2.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.3.0.op.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.3.0.op.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": [640, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [640, 320, 1, 1], "model.diffusion_model.input_blocks.4.0.skip_connection.bias": [640], "model.diffusion_model.input_blocks.4.1.norm.weight": [640], "model.diffusion_model.input_blocks.4.1.norm.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.4.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.4.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.5.1.norm.weight": [640], "model.diffusion_model.input_blocks.5.1.norm.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.5.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.5.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.6.0.op.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.6.0.op.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": [1280, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [1280, 640, 1, 1], "model.diffusion_model.input_blocks.7.0.skip_connection.bias": [1280], "model.diffusion_model.input_blocks.7.1.norm.weight": [1280], "model.diffusion_model.input_blocks.7.1.norm.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.7.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.7.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.8.1.norm.weight": [1280], "model.diffusion_model.input_blocks.8.1.norm.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.8.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.8.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.9.0.op.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.9.0.op.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.0.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.1.norm.weight": [1280], "model.diffusion_model.middle_block.1.norm.bias": [1280], "model.diffusion_model.middle_block.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.middle_block.1.proj_in.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.middle_block.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.middle_block.1.proj_out.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.2.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.0.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.1.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.2.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.1.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.1.conv.bias": [1280], "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.3.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.3.1.norm.weight": [1280], "model.diffusion_model.output_blocks.3.1.norm.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.3.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.3.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.4.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.4.1.norm.weight": [1280], "model.diffusion_model.output_blocks.4.1.norm.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.4.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.4.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": [1280, 1920, 3, 3], "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [1280, 1920, 1, 1], "model.diffusion_model.output_blocks.5.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.5.1.norm.weight": [1280], "model.diffusion_model.output_blocks.5.1.norm.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.5.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.5.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.5.2.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.5.2.conv.bias": [1280], "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": [640, 1920, 3, 3], "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [640, 1920, 1, 1], "model.diffusion_model.output_blocks.6.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.6.1.norm.weight": [640], "model.diffusion_model.output_blocks.6.1.norm.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.6.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.6.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": [640, 1280, 3, 3], "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [640, 1280, 1, 1], "model.diffusion_model.output_blocks.7.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.7.1.norm.weight": [640], "model.diffusion_model.output_blocks.7.1.norm.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.7.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.7.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": [640, 960, 3, 3], "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [640, 960, 1, 1], "model.diffusion_model.output_blocks.8.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.8.1.norm.weight": [640], "model.diffusion_model.output_blocks.8.1.norm.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.8.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.8.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.8.2.conv.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.8.2.conv.bias": [640], "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": [320, 960, 3, 3], "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [320, 960, 1, 1], "model.diffusion_model.output_blocks.9.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.9.1.norm.weight": [320], "model.diffusion_model.output_blocks.9.1.norm.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.9.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.9.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.10.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.10.1.norm.weight": [320], "model.diffusion_model.output_blocks.10.1.norm.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.10.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.10.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.11.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.11.1.norm.weight": [320], "model.diffusion_model.output_blocks.11.1.norm.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.11.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.11.1.proj_out.bias": [320], "model.diffusion_model.out.0.weight": [320], "model.diffusion_model.out.0.bias": [320], "model.diffusion_model.out.2.weight": [4, 320, 3, 3], "model.diffusion_model.out.2.bias": [4], "model_ema.decay": [], "model_ema.num_updates": [], "first_stage_model.encoder.conv_in.weight": [128, 3, 3, 3], "first_stage_model.encoder.conv_in.bias": [128], "first_stage_model.encoder.down.0.block.0.norm1.weight": [128], "first_stage_model.encoder.down.0.block.0.norm1.bias": [128], "first_stage_model.encoder.down.0.block.0.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv1.bias": [128], "first_stage_model.encoder.down.0.block.0.norm2.weight": [128], "first_stage_model.encoder.down.0.block.0.norm2.bias": [128], "first_stage_model.encoder.down.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv2.bias": [128], "first_stage_model.encoder.down.0.block.1.norm1.weight": [128], "first_stage_model.encoder.down.0.block.1.norm1.bias": [128], "first_stage_model.encoder.down.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv1.bias": [128], "first_stage_model.encoder.down.0.block.1.norm2.weight": [128], "first_stage_model.encoder.down.0.block.1.norm2.bias": [128], "first_stage_model.encoder.down.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv2.bias": [128], "first_stage_model.encoder.down.0.downsample.conv.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.downsample.conv.bias": [128], "first_stage_model.encoder.down.1.block.0.norm1.weight": [128], "first_stage_model.encoder.down.1.block.0.norm1.bias": [128], "first_stage_model.encoder.down.1.block.0.conv1.weight": [256, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv1.bias": [256], "first_stage_model.encoder.down.1.block.0.norm2.weight": [256], "first_stage_model.encoder.down.1.block.0.norm2.bias": [256], "first_stage_model.encoder.down.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.0.conv2.bias": [256], "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [256, 128, 1, 1], "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": [256], "first_stage_model.encoder.down.1.block.1.norm1.weight": [256], "first_stage_model.encoder.down.1.block.1.norm1.bias": [256], "first_stage_model.encoder.down.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv1.bias": [256], "first_stage_model.encoder.down.1.block.1.norm2.weight": [256], "first_stage_model.encoder.down.1.block.1.norm2.bias": [256], "first_stage_model.encoder.down.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv2.bias": [256], "first_stage_model.encoder.down.1.downsample.conv.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.downsample.conv.bias": [256], "first_stage_model.encoder.down.2.block.0.norm1.weight": [256], "first_stage_model.encoder.down.2.block.0.norm1.bias": [256], "first_stage_model.encoder.down.2.block.0.conv1.weight": [512, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv1.bias": [512], "first_stage_model.encoder.down.2.block.0.norm2.weight": [512], "first_stage_model.encoder.down.2.block.0.norm2.bias": [512], "first_stage_model.encoder.down.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.0.conv2.bias": [512], "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [512, 256, 1, 1], "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": [512], "first_stage_model.encoder.down.2.block.1.norm1.weight": [512], "first_stage_model.encoder.down.2.block.1.norm1.bias": [512], "first_stage_model.encoder.down.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv1.bias": [512], "first_stage_model.encoder.down.2.block.1.norm2.weight": [512], "first_stage_model.encoder.down.2.block.1.norm2.bias": [512], "first_stage_model.encoder.down.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv2.bias": [512], "first_stage_model.encoder.down.2.downsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.downsample.conv.bias": [512], "first_stage_model.encoder.down.3.block.0.norm1.weight": [512], "first_stage_model.encoder.down.3.block.0.norm1.bias": [512], "first_stage_model.encoder.down.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv1.bias": [512], "first_stage_model.encoder.down.3.block.0.norm2.weight": [512], "first_stage_model.encoder.down.3.block.0.norm2.bias": [512], "first_stage_model.encoder.down.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv2.bias": [512], "first_stage_model.encoder.down.3.block.1.norm1.weight": [512], "first_stage_model.encoder.down.3.block.1.norm1.bias": [512], "first_stage_model.encoder.down.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv1.bias": [512], "first_stage_model.encoder.down.3.block.1.norm2.weight": [512], "first_stage_model.encoder.down.3.block.1.norm2.bias": [512], "first_stage_model.encoder.down.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv2.bias": [512], "first_stage_model.encoder.mid.block_1.norm1.weight": [512], "first_stage_model.encoder.mid.block_1.norm1.bias": [512], "first_stage_model.encoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv1.bias": [512], "first_stage_model.encoder.mid.block_1.norm2.weight": [512], "first_stage_model.encoder.mid.block_1.norm2.bias": [512], "first_stage_model.encoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv2.bias": [512], "first_stage_model.encoder.mid.attn_1.norm.weight": [512], "first_stage_model.encoder.mid.attn_1.norm.bias": [512], "first_stage_model.encoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.q.bias": [512], "first_stage_model.encoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.k.bias": [512], "first_stage_model.encoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.v.bias": [512], "first_stage_model.encoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.encoder.mid.block_2.norm1.weight": [512], "first_stage_model.encoder.mid.block_2.norm1.bias": [512], "first_stage_model.encoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv1.bias": [512], "first_stage_model.encoder.mid.block_2.norm2.weight": [512], "first_stage_model.encoder.mid.block_2.norm2.bias": [512], "first_stage_model.encoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv2.bias": [512], "first_stage_model.encoder.norm_out.weight": [512], "first_stage_model.encoder.norm_out.bias": [512], "first_stage_model.encoder.conv_out.weight": [8, 512, 3, 3], "first_stage_model.encoder.conv_out.bias": [8], "first_stage_model.decoder.conv_in.weight": [512, 4, 3, 3], "first_stage_model.decoder.conv_in.bias": [512], "first_stage_model.decoder.mid.block_1.norm1.weight": [512], "first_stage_model.decoder.mid.block_1.norm1.bias": [512], "first_stage_model.decoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv1.bias": [512], "first_stage_model.decoder.mid.block_1.norm2.weight": [512], "first_stage_model.decoder.mid.block_1.norm2.bias": [512], "first_stage_model.decoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv2.bias": [512], "first_stage_model.decoder.mid.attn_1.norm.weight": [512], "first_stage_model.decoder.mid.attn_1.norm.bias": [512], "first_stage_model.decoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.q.bias": [512], "first_stage_model.decoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.k.bias": [512], "first_stage_model.decoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.v.bias": [512], "first_stage_model.decoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.decoder.mid.block_2.norm1.weight": [512], "first_stage_model.decoder.mid.block_2.norm1.bias": [512], "first_stage_model.decoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv1.bias": [512], "first_stage_model.decoder.mid.block_2.norm2.weight": [512], "first_stage_model.decoder.mid.block_2.norm2.bias": [512], "first_stage_model.decoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv2.bias": [512], "first_stage_model.decoder.up.0.block.0.norm1.weight": [256], "first_stage_model.decoder.up.0.block.0.norm1.bias": [256], "first_stage_model.decoder.up.0.block.0.conv1.weight": [128, 256, 3, 3], "first_stage_model.decoder.up.0.block.0.conv1.bias": [128], "first_stage_model.decoder.up.0.block.0.norm2.weight": [128], "first_stage_model.decoder.up.0.block.0.norm2.bias": [128], "first_stage_model.decoder.up.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.0.conv2.bias": [128], "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [128, 256, 1, 1], "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": [128], "first_stage_model.decoder.up.0.block.1.norm1.weight": [128], "first_stage_model.decoder.up.0.block.1.norm1.bias": [128], "first_stage_model.decoder.up.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv1.bias": [128], "first_stage_model.decoder.up.0.block.1.norm2.weight": [128], "first_stage_model.decoder.up.0.block.1.norm2.bias": [128], "first_stage_model.decoder.up.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv2.bias": [128], "first_stage_model.decoder.up.0.block.2.norm1.weight": [128], "first_stage_model.decoder.up.0.block.2.norm1.bias": [128], "first_stage_model.decoder.up.0.block.2.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv1.bias": [128], "first_stage_model.decoder.up.0.block.2.norm2.weight": [128], "first_stage_model.decoder.up.0.block.2.norm2.bias": [128], "first_stage_model.decoder.up.0.block.2.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv2.bias": [128], "first_stage_model.decoder.up.1.block.0.norm1.weight": [512], "first_stage_model.decoder.up.1.block.0.norm1.bias": [512], "first_stage_model.decoder.up.1.block.0.conv1.weight": [256, 512, 3, 3], "first_stage_model.decoder.up.1.block.0.conv1.bias": [256], "first_stage_model.decoder.up.1.block.0.norm2.weight": [256], "first_stage_model.decoder.up.1.block.0.norm2.bias": [256], "first_stage_model.decoder.up.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.0.conv2.bias": [256], "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [256, 512, 1, 1], "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": [256], "first_stage_model.decoder.up.1.block.1.norm1.weight": [256], "first_stage_model.decoder.up.1.block.1.norm1.bias": [256], "first_stage_model.decoder.up.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv1.bias": [256], "first_stage_model.decoder.up.1.block.1.norm2.weight": [256], "first_stage_model.decoder.up.1.block.1.norm2.bias": [256], "first_stage_model.decoder.up.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv2.bias": [256], "first_stage_model.decoder.up.1.block.2.norm1.weight": [256], "first_stage_model.decoder.up.1.block.2.norm1.bias": [256], "first_stage_model.decoder.up.1.block.2.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv1.bias": [256], "first_stage_model.decoder.up.1.block.2.norm2.weight": [256], "first_stage_model.decoder.up.1.block.2.norm2.bias": [256], "first_stage_model.decoder.up.1.block.2.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv2.bias": [256], "first_stage_model.decoder.up.1.upsample.conv.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.upsample.conv.bias": [256], "first_stage_model.decoder.up.2.block.0.norm1.weight": [512], "first_stage_model.decoder.up.2.block.0.norm1.bias": [512], "first_stage_model.decoder.up.2.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv1.bias": [512], "first_stage_model.decoder.up.2.block.0.norm2.weight": [512], "first_stage_model.decoder.up.2.block.0.norm2.bias": [512], "first_stage_model.decoder.up.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv2.bias": [512], "first_stage_model.decoder.up.2.block.1.norm1.weight": [512], "first_stage_model.decoder.up.2.block.1.norm1.bias": [512], "first_stage_model.decoder.up.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv1.bias": [512], "first_stage_model.decoder.up.2.block.1.norm2.weight": [512], "first_stage_model.decoder.up.2.block.1.norm2.bias": [512], "first_stage_model.decoder.up.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv2.bias": [512], "first_stage_model.decoder.up.2.block.2.norm1.weight": [512], "first_stage_model.decoder.up.2.block.2.norm1.bias": [512], "first_stage_model.decoder.up.2.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv1.bias": [512], "first_stage_model.decoder.up.2.block.2.norm2.weight": [512], "first_stage_model.decoder.up.2.block.2.norm2.bias": [512], "first_stage_model.decoder.up.2.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv2.bias": [512], "first_stage_model.decoder.up.2.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.upsample.conv.bias": [512], "first_stage_model.decoder.up.3.block.0.norm1.weight": [512], "first_stage_model.decoder.up.3.block.0.norm1.bias": [512], "first_stage_model.decoder.up.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv1.bias": [512], "first_stage_model.decoder.up.3.block.0.norm2.weight": [512], "first_stage_model.decoder.up.3.block.0.norm2.bias": [512], "first_stage_model.decoder.up.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv2.bias": [512], "first_stage_model.decoder.up.3.block.1.norm1.weight": [512], "first_stage_model.decoder.up.3.block.1.norm1.bias": [512], "first_stage_model.decoder.up.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv1.bias": [512], "first_stage_model.decoder.up.3.block.1.norm2.weight": [512], "first_stage_model.decoder.up.3.block.1.norm2.bias": [512], "first_stage_model.decoder.up.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv2.bias": [512], "first_stage_model.decoder.up.3.block.2.norm1.weight": [512], "first_stage_model.decoder.up.3.block.2.norm1.bias": [512], "first_stage_model.decoder.up.3.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv1.bias": [512], "first_stage_model.decoder.up.3.block.2.norm2.weight": [512], "first_stage_model.decoder.up.3.block.2.norm2.bias": [512], "first_stage_model.decoder.up.3.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv2.bias": [512], "first_stage_model.decoder.up.3.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.upsample.conv.bias": [512], "first_stage_model.decoder.norm_out.weight": [128], "first_stage_model.decoder.norm_out.bias": [128], "first_stage_model.decoder.conv_out.weight": [3, 128, 3, 3], "first_stage_model.decoder.conv_out.bias": [3], "first_stage_model.quant_conv.weight": [8, 8, 1, 1], "first_stage_model.quant_conv.bias": [8], "first_stage_model.post_quant_conv.weight": [4, 4, 1, 1], "first_stage_model.post_quant_conv.bias": [4], "cond_stage_model.transformer.text_model.embeddings.position_ids": [1, 77], "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight": [49408, 768], "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight": [77, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.final_layer_norm.weight": [768], "cond_stage_model.transformer.text_model.final_layer_norm.bias": [768]}} \ No newline at end of file diff --git a/invokeai/configs/model_probe_templates/checkpoints/sd-1/main-normal.json b/invokeai/configs/model_probe_templates/checkpoints/sd-1/main-normal.json new file mode 100644 index 0000000000..3624de1db0 --- /dev/null +++ b/invokeai/configs/model_probe_templates/checkpoints/sd-1/main-normal.json @@ -0,0 +1 @@ +{"base_type": "sd-1", "model_type": "main", "variant": "normal", "template": {"betas": [1000], "alphas_cumprod": [1000], "alphas_cumprod_prev": [1000], "sqrt_alphas_cumprod": [1000], "sqrt_one_minus_alphas_cumprod": [1000], "log_one_minus_alphas_cumprod": [1000], "sqrt_recip_alphas_cumprod": [1000], "sqrt_recipm1_alphas_cumprod": [1000], "posterior_variance": [1000], "posterior_log_variance_clipped": [1000], "posterior_mean_coef1": [1000], "posterior_mean_coef2": [1000], "model.diffusion_model.time_embed.0.weight": [1280, 320], "model.diffusion_model.time_embed.0.bias": [1280], "model.diffusion_model.time_embed.2.weight": [1280, 1280], "model.diffusion_model.time_embed.2.bias": [1280], "model.diffusion_model.input_blocks.0.0.weight": [320, 4, 3, 3], "model.diffusion_model.input_blocks.0.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.1.1.norm.weight": [320], "model.diffusion_model.input_blocks.1.1.norm.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.1.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.1.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.2.1.norm.weight": [320], "model.diffusion_model.input_blocks.2.1.norm.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.2.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.input_blocks.2.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.3.0.op.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.3.0.op.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": [640, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [640, 320, 1, 1], "model.diffusion_model.input_blocks.4.0.skip_connection.bias": [640], "model.diffusion_model.input_blocks.4.1.norm.weight": [640], "model.diffusion_model.input_blocks.4.1.norm.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.4.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.4.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.5.1.norm.weight": [640], "model.diffusion_model.input_blocks.5.1.norm.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.5.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.input_blocks.5.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.6.0.op.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.6.0.op.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": [1280, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [1280, 640, 1, 1], "model.diffusion_model.input_blocks.7.0.skip_connection.bias": [1280], "model.diffusion_model.input_blocks.7.1.norm.weight": [1280], "model.diffusion_model.input_blocks.7.1.norm.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.7.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.7.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.8.1.norm.weight": [1280], "model.diffusion_model.input_blocks.8.1.norm.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.8.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.input_blocks.8.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.9.0.op.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.9.0.op.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.0.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.1.norm.weight": [1280], "model.diffusion_model.middle_block.1.norm.bias": [1280], "model.diffusion_model.middle_block.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.middle_block.1.proj_in.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.middle_block.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.middle_block.1.proj_out.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.2.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.0.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.1.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.2.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.1.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.1.conv.bias": [1280], "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.3.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.3.1.norm.weight": [1280], "model.diffusion_model.output_blocks.3.1.norm.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.3.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.3.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.4.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.4.1.norm.weight": [1280], "model.diffusion_model.output_blocks.4.1.norm.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.4.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.4.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": [1280, 1920, 3, 3], "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [1280, 1920, 1, 1], "model.diffusion_model.output_blocks.5.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.5.1.norm.weight": [1280], "model.diffusion_model.output_blocks.5.1.norm.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.5.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [1280, 768], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [1280, 768], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_out.weight": [1280, 1280, 1, 1], "model.diffusion_model.output_blocks.5.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.5.2.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.5.2.conv.bias": [1280], "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": [640, 1920, 3, 3], "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [640, 1920, 1, 1], "model.diffusion_model.output_blocks.6.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.6.1.norm.weight": [640], "model.diffusion_model.output_blocks.6.1.norm.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.6.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.6.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": [640, 1280, 3, 3], "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [640, 1280, 1, 1], "model.diffusion_model.output_blocks.7.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.7.1.norm.weight": [640], "model.diffusion_model.output_blocks.7.1.norm.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.7.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.7.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": [640, 960, 3, 3], "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [640, 960, 1, 1], "model.diffusion_model.output_blocks.8.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.8.1.norm.weight": [640], "model.diffusion_model.output_blocks.8.1.norm.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_in.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.8.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [640, 768], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [640, 768], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_out.weight": [640, 640, 1, 1], "model.diffusion_model.output_blocks.8.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.8.2.conv.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.8.2.conv.bias": [640], "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": [320, 960, 3, 3], "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [320, 960, 1, 1], "model.diffusion_model.output_blocks.9.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.9.1.norm.weight": [320], "model.diffusion_model.output_blocks.9.1.norm.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.9.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.9.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.10.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.10.1.norm.weight": [320], "model.diffusion_model.output_blocks.10.1.norm.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.10.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.10.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.11.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.11.1.norm.weight": [320], "model.diffusion_model.output_blocks.11.1.norm.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_in.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.11.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": [320, 768], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": [320, 768], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_out.weight": [320, 320, 1, 1], "model.diffusion_model.output_blocks.11.1.proj_out.bias": [320], "model.diffusion_model.out.0.weight": [320], "model.diffusion_model.out.0.bias": [320], "model.diffusion_model.out.2.weight": [4, 320, 3, 3], "model.diffusion_model.out.2.bias": [4], "model_ema.decay": [], "model_ema.num_updates": [], "first_stage_model.encoder.conv_in.weight": [128, 3, 3, 3], "first_stage_model.encoder.conv_in.bias": [128], "first_stage_model.encoder.down.0.block.0.norm1.weight": [128], "first_stage_model.encoder.down.0.block.0.norm1.bias": [128], "first_stage_model.encoder.down.0.block.0.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv1.bias": [128], "first_stage_model.encoder.down.0.block.0.norm2.weight": [128], "first_stage_model.encoder.down.0.block.0.norm2.bias": [128], "first_stage_model.encoder.down.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv2.bias": [128], "first_stage_model.encoder.down.0.block.1.norm1.weight": [128], "first_stage_model.encoder.down.0.block.1.norm1.bias": [128], "first_stage_model.encoder.down.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv1.bias": [128], "first_stage_model.encoder.down.0.block.1.norm2.weight": [128], "first_stage_model.encoder.down.0.block.1.norm2.bias": [128], "first_stage_model.encoder.down.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv2.bias": [128], "first_stage_model.encoder.down.0.downsample.conv.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.downsample.conv.bias": [128], "first_stage_model.encoder.down.1.block.0.norm1.weight": [128], "first_stage_model.encoder.down.1.block.0.norm1.bias": [128], "first_stage_model.encoder.down.1.block.0.conv1.weight": [256, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv1.bias": [256], "first_stage_model.encoder.down.1.block.0.norm2.weight": [256], "first_stage_model.encoder.down.1.block.0.norm2.bias": [256], "first_stage_model.encoder.down.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.0.conv2.bias": [256], "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [256, 128, 1, 1], "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": [256], "first_stage_model.encoder.down.1.block.1.norm1.weight": [256], "first_stage_model.encoder.down.1.block.1.norm1.bias": [256], "first_stage_model.encoder.down.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv1.bias": [256], "first_stage_model.encoder.down.1.block.1.norm2.weight": [256], "first_stage_model.encoder.down.1.block.1.norm2.bias": [256], "first_stage_model.encoder.down.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv2.bias": [256], "first_stage_model.encoder.down.1.downsample.conv.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.downsample.conv.bias": [256], "first_stage_model.encoder.down.2.block.0.norm1.weight": [256], "first_stage_model.encoder.down.2.block.0.norm1.bias": [256], "first_stage_model.encoder.down.2.block.0.conv1.weight": [512, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv1.bias": [512], "first_stage_model.encoder.down.2.block.0.norm2.weight": [512], "first_stage_model.encoder.down.2.block.0.norm2.bias": [512], "first_stage_model.encoder.down.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.0.conv2.bias": [512], "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [512, 256, 1, 1], "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": [512], "first_stage_model.encoder.down.2.block.1.norm1.weight": [512], "first_stage_model.encoder.down.2.block.1.norm1.bias": [512], "first_stage_model.encoder.down.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv1.bias": [512], "first_stage_model.encoder.down.2.block.1.norm2.weight": [512], "first_stage_model.encoder.down.2.block.1.norm2.bias": [512], "first_stage_model.encoder.down.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv2.bias": [512], "first_stage_model.encoder.down.2.downsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.downsample.conv.bias": [512], "first_stage_model.encoder.down.3.block.0.norm1.weight": [512], "first_stage_model.encoder.down.3.block.0.norm1.bias": [512], "first_stage_model.encoder.down.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv1.bias": [512], "first_stage_model.encoder.down.3.block.0.norm2.weight": [512], "first_stage_model.encoder.down.3.block.0.norm2.bias": [512], "first_stage_model.encoder.down.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv2.bias": [512], "first_stage_model.encoder.down.3.block.1.norm1.weight": [512], "first_stage_model.encoder.down.3.block.1.norm1.bias": [512], "first_stage_model.encoder.down.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv1.bias": [512], "first_stage_model.encoder.down.3.block.1.norm2.weight": [512], "first_stage_model.encoder.down.3.block.1.norm2.bias": [512], "first_stage_model.encoder.down.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv2.bias": [512], "first_stage_model.encoder.mid.block_1.norm1.weight": [512], "first_stage_model.encoder.mid.block_1.norm1.bias": [512], "first_stage_model.encoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv1.bias": [512], "first_stage_model.encoder.mid.block_1.norm2.weight": [512], "first_stage_model.encoder.mid.block_1.norm2.bias": [512], "first_stage_model.encoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv2.bias": [512], "first_stage_model.encoder.mid.attn_1.norm.weight": [512], "first_stage_model.encoder.mid.attn_1.norm.bias": [512], "first_stage_model.encoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.q.bias": [512], "first_stage_model.encoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.k.bias": [512], "first_stage_model.encoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.v.bias": [512], "first_stage_model.encoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.encoder.mid.block_2.norm1.weight": [512], "first_stage_model.encoder.mid.block_2.norm1.bias": [512], "first_stage_model.encoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv1.bias": [512], "first_stage_model.encoder.mid.block_2.norm2.weight": [512], "first_stage_model.encoder.mid.block_2.norm2.bias": [512], "first_stage_model.encoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv2.bias": [512], "first_stage_model.encoder.norm_out.weight": [512], "first_stage_model.encoder.norm_out.bias": [512], "first_stage_model.encoder.conv_out.weight": [8, 512, 3, 3], "first_stage_model.encoder.conv_out.bias": [8], "first_stage_model.decoder.conv_in.weight": [512, 4, 3, 3], "first_stage_model.decoder.conv_in.bias": [512], "first_stage_model.decoder.mid.block_1.norm1.weight": [512], "first_stage_model.decoder.mid.block_1.norm1.bias": [512], "first_stage_model.decoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv1.bias": [512], "first_stage_model.decoder.mid.block_1.norm2.weight": [512], "first_stage_model.decoder.mid.block_1.norm2.bias": [512], "first_stage_model.decoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv2.bias": [512], "first_stage_model.decoder.mid.attn_1.norm.weight": [512], "first_stage_model.decoder.mid.attn_1.norm.bias": [512], "first_stage_model.decoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.q.bias": [512], "first_stage_model.decoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.k.bias": [512], "first_stage_model.decoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.v.bias": [512], "first_stage_model.decoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.decoder.mid.block_2.norm1.weight": [512], "first_stage_model.decoder.mid.block_2.norm1.bias": [512], "first_stage_model.decoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv1.bias": [512], "first_stage_model.decoder.mid.block_2.norm2.weight": [512], "first_stage_model.decoder.mid.block_2.norm2.bias": [512], "first_stage_model.decoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv2.bias": [512], "first_stage_model.decoder.up.0.block.0.norm1.weight": [256], "first_stage_model.decoder.up.0.block.0.norm1.bias": [256], "first_stage_model.decoder.up.0.block.0.conv1.weight": [128, 256, 3, 3], "first_stage_model.decoder.up.0.block.0.conv1.bias": [128], "first_stage_model.decoder.up.0.block.0.norm2.weight": [128], "first_stage_model.decoder.up.0.block.0.norm2.bias": [128], "first_stage_model.decoder.up.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.0.conv2.bias": [128], "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [128, 256, 1, 1], "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": [128], "first_stage_model.decoder.up.0.block.1.norm1.weight": [128], "first_stage_model.decoder.up.0.block.1.norm1.bias": [128], "first_stage_model.decoder.up.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv1.bias": [128], "first_stage_model.decoder.up.0.block.1.norm2.weight": [128], "first_stage_model.decoder.up.0.block.1.norm2.bias": [128], "first_stage_model.decoder.up.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv2.bias": [128], "first_stage_model.decoder.up.0.block.2.norm1.weight": [128], "first_stage_model.decoder.up.0.block.2.norm1.bias": [128], "first_stage_model.decoder.up.0.block.2.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv1.bias": [128], "first_stage_model.decoder.up.0.block.2.norm2.weight": [128], "first_stage_model.decoder.up.0.block.2.norm2.bias": [128], "first_stage_model.decoder.up.0.block.2.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv2.bias": [128], "first_stage_model.decoder.up.1.block.0.norm1.weight": [512], "first_stage_model.decoder.up.1.block.0.norm1.bias": [512], "first_stage_model.decoder.up.1.block.0.conv1.weight": [256, 512, 3, 3], "first_stage_model.decoder.up.1.block.0.conv1.bias": [256], "first_stage_model.decoder.up.1.block.0.norm2.weight": [256], "first_stage_model.decoder.up.1.block.0.norm2.bias": [256], "first_stage_model.decoder.up.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.0.conv2.bias": [256], "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [256, 512, 1, 1], "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": [256], "first_stage_model.decoder.up.1.block.1.norm1.weight": [256], "first_stage_model.decoder.up.1.block.1.norm1.bias": [256], "first_stage_model.decoder.up.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv1.bias": [256], "first_stage_model.decoder.up.1.block.1.norm2.weight": [256], "first_stage_model.decoder.up.1.block.1.norm2.bias": [256], "first_stage_model.decoder.up.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv2.bias": [256], "first_stage_model.decoder.up.1.block.2.norm1.weight": [256], "first_stage_model.decoder.up.1.block.2.norm1.bias": [256], "first_stage_model.decoder.up.1.block.2.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv1.bias": [256], "first_stage_model.decoder.up.1.block.2.norm2.weight": [256], "first_stage_model.decoder.up.1.block.2.norm2.bias": [256], "first_stage_model.decoder.up.1.block.2.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv2.bias": [256], "first_stage_model.decoder.up.1.upsample.conv.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.upsample.conv.bias": [256], "first_stage_model.decoder.up.2.block.0.norm1.weight": [512], "first_stage_model.decoder.up.2.block.0.norm1.bias": [512], "first_stage_model.decoder.up.2.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv1.bias": [512], "first_stage_model.decoder.up.2.block.0.norm2.weight": [512], "first_stage_model.decoder.up.2.block.0.norm2.bias": [512], "first_stage_model.decoder.up.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv2.bias": [512], "first_stage_model.decoder.up.2.block.1.norm1.weight": [512], "first_stage_model.decoder.up.2.block.1.norm1.bias": [512], "first_stage_model.decoder.up.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv1.bias": [512], "first_stage_model.decoder.up.2.block.1.norm2.weight": [512], "first_stage_model.decoder.up.2.block.1.norm2.bias": [512], "first_stage_model.decoder.up.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv2.bias": [512], "first_stage_model.decoder.up.2.block.2.norm1.weight": [512], "first_stage_model.decoder.up.2.block.2.norm1.bias": [512], "first_stage_model.decoder.up.2.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv1.bias": [512], "first_stage_model.decoder.up.2.block.2.norm2.weight": [512], "first_stage_model.decoder.up.2.block.2.norm2.bias": [512], "first_stage_model.decoder.up.2.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv2.bias": [512], "first_stage_model.decoder.up.2.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.upsample.conv.bias": [512], "first_stage_model.decoder.up.3.block.0.norm1.weight": [512], "first_stage_model.decoder.up.3.block.0.norm1.bias": [512], "first_stage_model.decoder.up.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv1.bias": [512], "first_stage_model.decoder.up.3.block.0.norm2.weight": [512], "first_stage_model.decoder.up.3.block.0.norm2.bias": [512], "first_stage_model.decoder.up.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv2.bias": [512], "first_stage_model.decoder.up.3.block.1.norm1.weight": [512], "first_stage_model.decoder.up.3.block.1.norm1.bias": [512], "first_stage_model.decoder.up.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv1.bias": [512], "first_stage_model.decoder.up.3.block.1.norm2.weight": [512], "first_stage_model.decoder.up.3.block.1.norm2.bias": [512], "first_stage_model.decoder.up.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv2.bias": [512], "first_stage_model.decoder.up.3.block.2.norm1.weight": [512], "first_stage_model.decoder.up.3.block.2.norm1.bias": [512], "first_stage_model.decoder.up.3.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv1.bias": [512], "first_stage_model.decoder.up.3.block.2.norm2.weight": [512], "first_stage_model.decoder.up.3.block.2.norm2.bias": [512], "first_stage_model.decoder.up.3.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv2.bias": [512], "first_stage_model.decoder.up.3.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.upsample.conv.bias": [512], "first_stage_model.decoder.norm_out.weight": [128], "first_stage_model.decoder.norm_out.bias": [128], "first_stage_model.decoder.conv_out.weight": [3, 128, 3, 3], "first_stage_model.decoder.conv_out.bias": [3], "first_stage_model.quant_conv.weight": [8, 8, 1, 1], "first_stage_model.quant_conv.bias": [8], "first_stage_model.post_quant_conv.weight": [4, 4, 1, 1], "first_stage_model.post_quant_conv.bias": [4], "cond_stage_model.transformer.text_model.embeddings.position_ids": [1, 77], "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight": [49408, 768], "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight": [77, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.0.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.0.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.1.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.1.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.2.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.2.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.3.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.3.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.4.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.4.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.5.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.5.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.6.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.6.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.7.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.7.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.8.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.8.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.9.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.9.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.10.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.10.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": [768, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm1.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.weight": [3072, 768], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc1.bias": [3072], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.weight": [768, 3072], "cond_stage_model.transformer.text_model.encoder.layers.11.mlp.fc2.bias": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.weight": [768], "cond_stage_model.transformer.text_model.encoder.layers.11.layer_norm2.bias": [768], "cond_stage_model.transformer.text_model.final_layer_norm.weight": [768], "cond_stage_model.transformer.text_model.final_layer_norm.bias": [768]}} \ No newline at end of file diff --git a/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-inpaint-512.json b/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-inpaint-512.json new file mode 100644 index 0000000000..541e373b39 --- /dev/null +++ b/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-inpaint-512.json @@ -0,0 +1 @@ +{"base_type": "sd-2", "model_type": "main", "variant": "inpaint", "template": {"alphas_cumprod": [1000], "alphas_cumprod_prev": [1000], "betas": [1000], "cond_stage_model.model.ln_final.bias": [1024], "cond_stage_model.model.ln_final.weight": [1024], "cond_stage_model.model.logit_scale": [], "cond_stage_model.model.positional_embedding": [77, 1024], "cond_stage_model.model.text_projection": [1024, 1024], "cond_stage_model.model.token_embedding.weight": [49408, 1024], "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.0.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.1.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.10.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.11.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.12.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.13.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.14.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.15.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.16.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.17.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.18.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.19.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.2.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.20.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.21.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.22.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.23.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.23.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.3.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.4.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.5.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.6.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.7.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.8.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.9.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.weight": [1024, 4096], "first_stage_model.decoder.conv_in.bias": [512], "first_stage_model.decoder.conv_in.weight": [512, 4, 3, 3], "first_stage_model.decoder.conv_out.bias": [3], "first_stage_model.decoder.conv_out.weight": [3, 128, 3, 3], "first_stage_model.decoder.mid.attn_1.k.bias": [512], "first_stage_model.decoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.norm.bias": [512], "first_stage_model.decoder.mid.attn_1.norm.weight": [512], "first_stage_model.decoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.decoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.q.bias": [512], "first_stage_model.decoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.v.bias": [512], "first_stage_model.decoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.block_1.conv1.bias": [512], "first_stage_model.decoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv2.bias": [512], "first_stage_model.decoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.norm1.bias": [512], "first_stage_model.decoder.mid.block_1.norm1.weight": [512], "first_stage_model.decoder.mid.block_1.norm2.bias": [512], "first_stage_model.decoder.mid.block_1.norm2.weight": [512], "first_stage_model.decoder.mid.block_2.conv1.bias": [512], "first_stage_model.decoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv2.bias": [512], "first_stage_model.decoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.norm1.bias": [512], "first_stage_model.decoder.mid.block_2.norm1.weight": [512], "first_stage_model.decoder.mid.block_2.norm2.bias": [512], "first_stage_model.decoder.mid.block_2.norm2.weight": [512], "first_stage_model.decoder.norm_out.bias": [128], "first_stage_model.decoder.norm_out.weight": [128], "first_stage_model.decoder.up.0.block.0.conv1.bias": [128], "first_stage_model.decoder.up.0.block.0.conv1.weight": [128, 256, 3, 3], "first_stage_model.decoder.up.0.block.0.conv2.bias": [128], "first_stage_model.decoder.up.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": [128], "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [128, 256, 1, 1], "first_stage_model.decoder.up.0.block.0.norm1.bias": [256], "first_stage_model.decoder.up.0.block.0.norm1.weight": [256], "first_stage_model.decoder.up.0.block.0.norm2.bias": [128], "first_stage_model.decoder.up.0.block.0.norm2.weight": [128], "first_stage_model.decoder.up.0.block.1.conv1.bias": [128], "first_stage_model.decoder.up.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv2.bias": [128], "first_stage_model.decoder.up.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.norm1.bias": [128], "first_stage_model.decoder.up.0.block.1.norm1.weight": [128], "first_stage_model.decoder.up.0.block.1.norm2.bias": [128], "first_stage_model.decoder.up.0.block.1.norm2.weight": [128], "first_stage_model.decoder.up.0.block.2.conv1.bias": [128], "first_stage_model.decoder.up.0.block.2.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv2.bias": [128], "first_stage_model.decoder.up.0.block.2.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.norm1.bias": [128], "first_stage_model.decoder.up.0.block.2.norm1.weight": [128], "first_stage_model.decoder.up.0.block.2.norm2.bias": [128], "first_stage_model.decoder.up.0.block.2.norm2.weight": [128], "first_stage_model.decoder.up.1.block.0.conv1.bias": [256], "first_stage_model.decoder.up.1.block.0.conv1.weight": [256, 512, 3, 3], "first_stage_model.decoder.up.1.block.0.conv2.bias": [256], "first_stage_model.decoder.up.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": [256], "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [256, 512, 1, 1], "first_stage_model.decoder.up.1.block.0.norm1.bias": [512], "first_stage_model.decoder.up.1.block.0.norm1.weight": [512], "first_stage_model.decoder.up.1.block.0.norm2.bias": [256], "first_stage_model.decoder.up.1.block.0.norm2.weight": [256], "first_stage_model.decoder.up.1.block.1.conv1.bias": [256], "first_stage_model.decoder.up.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv2.bias": [256], "first_stage_model.decoder.up.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.norm1.bias": [256], "first_stage_model.decoder.up.1.block.1.norm1.weight": [256], "first_stage_model.decoder.up.1.block.1.norm2.bias": [256], "first_stage_model.decoder.up.1.block.1.norm2.weight": [256], "first_stage_model.decoder.up.1.block.2.conv1.bias": [256], "first_stage_model.decoder.up.1.block.2.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv2.bias": [256], "first_stage_model.decoder.up.1.block.2.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.norm1.bias": [256], "first_stage_model.decoder.up.1.block.2.norm1.weight": [256], "first_stage_model.decoder.up.1.block.2.norm2.bias": [256], "first_stage_model.decoder.up.1.block.2.norm2.weight": [256], "first_stage_model.decoder.up.1.upsample.conv.bias": [256], "first_stage_model.decoder.up.1.upsample.conv.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.2.block.0.conv1.bias": [512], "first_stage_model.decoder.up.2.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv2.bias": [512], "first_stage_model.decoder.up.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.norm1.bias": [512], "first_stage_model.decoder.up.2.block.0.norm1.weight": [512], "first_stage_model.decoder.up.2.block.0.norm2.bias": [512], "first_stage_model.decoder.up.2.block.0.norm2.weight": [512], "first_stage_model.decoder.up.2.block.1.conv1.bias": [512], "first_stage_model.decoder.up.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv2.bias": [512], "first_stage_model.decoder.up.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.norm1.bias": [512], "first_stage_model.decoder.up.2.block.1.norm1.weight": [512], "first_stage_model.decoder.up.2.block.1.norm2.bias": [512], "first_stage_model.decoder.up.2.block.1.norm2.weight": [512], "first_stage_model.decoder.up.2.block.2.conv1.bias": [512], "first_stage_model.decoder.up.2.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv2.bias": [512], "first_stage_model.decoder.up.2.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.norm1.bias": [512], "first_stage_model.decoder.up.2.block.2.norm1.weight": [512], "first_stage_model.decoder.up.2.block.2.norm2.bias": [512], "first_stage_model.decoder.up.2.block.2.norm2.weight": [512], "first_stage_model.decoder.up.2.upsample.conv.bias": [512], "first_stage_model.decoder.up.2.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv1.bias": [512], "first_stage_model.decoder.up.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv2.bias": [512], "first_stage_model.decoder.up.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.norm1.bias": [512], "first_stage_model.decoder.up.3.block.0.norm1.weight": [512], "first_stage_model.decoder.up.3.block.0.norm2.bias": [512], "first_stage_model.decoder.up.3.block.0.norm2.weight": [512], "first_stage_model.decoder.up.3.block.1.conv1.bias": [512], "first_stage_model.decoder.up.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv2.bias": [512], "first_stage_model.decoder.up.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.norm1.bias": [512], "first_stage_model.decoder.up.3.block.1.norm1.weight": [512], "first_stage_model.decoder.up.3.block.1.norm2.bias": [512], "first_stage_model.decoder.up.3.block.1.norm2.weight": [512], "first_stage_model.decoder.up.3.block.2.conv1.bias": [512], "first_stage_model.decoder.up.3.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv2.bias": [512], "first_stage_model.decoder.up.3.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.norm1.bias": [512], "first_stage_model.decoder.up.3.block.2.norm1.weight": [512], "first_stage_model.decoder.up.3.block.2.norm2.bias": [512], "first_stage_model.decoder.up.3.block.2.norm2.weight": [512], "first_stage_model.decoder.up.3.upsample.conv.bias": [512], "first_stage_model.decoder.up.3.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.conv_in.bias": [128], "first_stage_model.encoder.conv_in.weight": [128, 3, 3, 3], "first_stage_model.encoder.conv_out.bias": [8], "first_stage_model.encoder.conv_out.weight": [8, 512, 3, 3], "first_stage_model.encoder.down.0.block.0.conv1.bias": [128], "first_stage_model.encoder.down.0.block.0.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv2.bias": [128], "first_stage_model.encoder.down.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.norm1.bias": [128], "first_stage_model.encoder.down.0.block.0.norm1.weight": [128], "first_stage_model.encoder.down.0.block.0.norm2.bias": [128], "first_stage_model.encoder.down.0.block.0.norm2.weight": [128], "first_stage_model.encoder.down.0.block.1.conv1.bias": [128], "first_stage_model.encoder.down.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv2.bias": [128], "first_stage_model.encoder.down.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.norm1.bias": [128], "first_stage_model.encoder.down.0.block.1.norm1.weight": [128], "first_stage_model.encoder.down.0.block.1.norm2.bias": [128], "first_stage_model.encoder.down.0.block.1.norm2.weight": [128], "first_stage_model.encoder.down.0.downsample.conv.bias": [128], "first_stage_model.encoder.down.0.downsample.conv.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv1.bias": [256], "first_stage_model.encoder.down.1.block.0.conv1.weight": [256, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv2.bias": [256], "first_stage_model.encoder.down.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": [256], "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [256, 128, 1, 1], "first_stage_model.encoder.down.1.block.0.norm1.bias": [128], "first_stage_model.encoder.down.1.block.0.norm1.weight": [128], "first_stage_model.encoder.down.1.block.0.norm2.bias": [256], "first_stage_model.encoder.down.1.block.0.norm2.weight": [256], "first_stage_model.encoder.down.1.block.1.conv1.bias": [256], "first_stage_model.encoder.down.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv2.bias": [256], "first_stage_model.encoder.down.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.norm1.bias": [256], "first_stage_model.encoder.down.1.block.1.norm1.weight": [256], "first_stage_model.encoder.down.1.block.1.norm2.bias": [256], "first_stage_model.encoder.down.1.block.1.norm2.weight": [256], "first_stage_model.encoder.down.1.downsample.conv.bias": [256], "first_stage_model.encoder.down.1.downsample.conv.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv1.bias": [512], "first_stage_model.encoder.down.2.block.0.conv1.weight": [512, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv2.bias": [512], "first_stage_model.encoder.down.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": [512], "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [512, 256, 1, 1], "first_stage_model.encoder.down.2.block.0.norm1.bias": [256], "first_stage_model.encoder.down.2.block.0.norm1.weight": [256], "first_stage_model.encoder.down.2.block.0.norm2.bias": [512], "first_stage_model.encoder.down.2.block.0.norm2.weight": [512], "first_stage_model.encoder.down.2.block.1.conv1.bias": [512], "first_stage_model.encoder.down.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv2.bias": [512], "first_stage_model.encoder.down.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.norm1.bias": [512], "first_stage_model.encoder.down.2.block.1.norm1.weight": [512], "first_stage_model.encoder.down.2.block.1.norm2.bias": [512], "first_stage_model.encoder.down.2.block.1.norm2.weight": [512], "first_stage_model.encoder.down.2.downsample.conv.bias": [512], "first_stage_model.encoder.down.2.downsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv1.bias": [512], "first_stage_model.encoder.down.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv2.bias": [512], "first_stage_model.encoder.down.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.norm1.bias": [512], "first_stage_model.encoder.down.3.block.0.norm1.weight": [512], "first_stage_model.encoder.down.3.block.0.norm2.bias": [512], "first_stage_model.encoder.down.3.block.0.norm2.weight": [512], "first_stage_model.encoder.down.3.block.1.conv1.bias": [512], "first_stage_model.encoder.down.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv2.bias": [512], "first_stage_model.encoder.down.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.norm1.bias": [512], "first_stage_model.encoder.down.3.block.1.norm1.weight": [512], "first_stage_model.encoder.down.3.block.1.norm2.bias": [512], "first_stage_model.encoder.down.3.block.1.norm2.weight": [512], "first_stage_model.encoder.mid.attn_1.k.bias": [512], "first_stage_model.encoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.norm.bias": [512], "first_stage_model.encoder.mid.attn_1.norm.weight": [512], "first_stage_model.encoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.encoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.q.bias": [512], "first_stage_model.encoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.v.bias": [512], "first_stage_model.encoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.block_1.conv1.bias": [512], "first_stage_model.encoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv2.bias": [512], "first_stage_model.encoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.norm1.bias": [512], "first_stage_model.encoder.mid.block_1.norm1.weight": [512], "first_stage_model.encoder.mid.block_1.norm2.bias": [512], "first_stage_model.encoder.mid.block_1.norm2.weight": [512], "first_stage_model.encoder.mid.block_2.conv1.bias": [512], "first_stage_model.encoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv2.bias": [512], "first_stage_model.encoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.norm1.bias": [512], "first_stage_model.encoder.mid.block_2.norm1.weight": [512], "first_stage_model.encoder.mid.block_2.norm2.bias": [512], "first_stage_model.encoder.mid.block_2.norm2.weight": [512], "first_stage_model.encoder.norm_out.bias": [512], "first_stage_model.encoder.norm_out.weight": [512], "first_stage_model.post_quant_conv.bias": [4], "first_stage_model.post_quant_conv.weight": [4, 4, 1, 1], "first_stage_model.quant_conv.bias": [8], "first_stage_model.quant_conv.weight": [8, 8, 1, 1], "log_one_minus_alphas_cumprod": [1000], "model.diffusion_model.input_blocks.0.0.bias": [320], "model.diffusion_model.input_blocks.0.0.weight": [320, 9, 3, 3], "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.1.norm.bias": [320], "model.diffusion_model.input_blocks.1.1.norm.weight": [320], "model.diffusion_model.input_blocks.1.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_in.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_out.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.1.norm.bias": [320], "model.diffusion_model.input_blocks.2.1.norm.weight": [320], "model.diffusion_model.input_blocks.2.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_in.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_out.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.3.0.op.bias": [320], "model.diffusion_model.input_blocks.3.0.op.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": [640, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.4.0.skip_connection.bias": [640], "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [640, 320, 1, 1], "model.diffusion_model.input_blocks.4.1.norm.bias": [640], "model.diffusion_model.input_blocks.4.1.norm.weight": [640], "model.diffusion_model.input_blocks.4.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.1.norm.bias": [640], "model.diffusion_model.input_blocks.5.1.norm.weight": [640], "model.diffusion_model.input_blocks.5.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.6.0.op.bias": [640], "model.diffusion_model.input_blocks.6.0.op.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": [1280, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.7.0.skip_connection.bias": [1280], "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [1280, 640, 1, 1], "model.diffusion_model.input_blocks.7.1.norm.bias": [1280], "model.diffusion_model.input_blocks.7.1.norm.weight": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.1.norm.bias": [1280], "model.diffusion_model.input_blocks.8.1.norm.weight": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.9.0.op.bias": [1280], "model.diffusion_model.input_blocks.9.0.op.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.0.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.1.norm.bias": [1280], "model.diffusion_model.middle_block.1.norm.weight": [1280], "model.diffusion_model.middle_block.1.proj_in.bias": [1280], "model.diffusion_model.middle_block.1.proj_in.weight": [1280, 1280], "model.diffusion_model.middle_block.1.proj_out.bias": [1280], "model.diffusion_model.middle_block.1.proj_out.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.2.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.out.0.bias": [320], "model.diffusion_model.out.0.weight": [320], "model.diffusion_model.out.2.bias": [4], "model.diffusion_model.out.2.weight": [4, 320, 3, 3], "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.0.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.1.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.10.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.10.1.norm.bias": [320], "model.diffusion_model.output_blocks.10.1.norm.weight": [320], "model.diffusion_model.output_blocks.10.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.11.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.11.1.norm.bias": [320], "model.diffusion_model.output_blocks.11.1.norm.weight": [320], "model.diffusion_model.output_blocks.11.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.2.1.conv.bias": [1280], "model.diffusion_model.output_blocks.2.1.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.3.1.norm.bias": [1280], "model.diffusion_model.output_blocks.3.1.norm.weight": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.4.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.4.1.norm.bias": [1280], "model.diffusion_model.output_blocks.4.1.norm.weight": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": [1280, 1920, 3, 3], "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.5.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [1280, 1920, 1, 1], "model.diffusion_model.output_blocks.5.1.norm.bias": [1280], "model.diffusion_model.output_blocks.5.1.norm.weight": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.2.conv.bias": [1280], "model.diffusion_model.output_blocks.5.2.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": [640, 1920, 3, 3], "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.6.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [640, 1920, 1, 1], "model.diffusion_model.output_blocks.6.1.norm.bias": [640], "model.diffusion_model.output_blocks.6.1.norm.weight": [640], "model.diffusion_model.output_blocks.6.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": [640, 1280, 3, 3], "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.7.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [640, 1280, 1, 1], "model.diffusion_model.output_blocks.7.1.norm.bias": [640], "model.diffusion_model.output_blocks.7.1.norm.weight": [640], "model.diffusion_model.output_blocks.7.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": [640, 960, 3, 3], "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.8.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [640, 960, 1, 1], "model.diffusion_model.output_blocks.8.1.norm.bias": [640], "model.diffusion_model.output_blocks.8.1.norm.weight": [640], "model.diffusion_model.output_blocks.8.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.2.conv.bias": [640], "model.diffusion_model.output_blocks.8.2.conv.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": [320, 960, 3, 3], "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.9.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [320, 960, 1, 1], "model.diffusion_model.output_blocks.9.1.norm.bias": [320], "model.diffusion_model.output_blocks.9.1.norm.weight": [320], "model.diffusion_model.output_blocks.9.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.time_embed.0.bias": [1280], "model.diffusion_model.time_embed.0.weight": [1280, 320], "model.diffusion_model.time_embed.2.bias": [1280], "model.diffusion_model.time_embed.2.weight": [1280, 1280], "model_ema.decay": [], "model_ema.num_updates": [], "posterior_log_variance_clipped": [1000], "posterior_mean_coef1": [1000], "posterior_mean_coef2": [1000], "posterior_variance": [1000], "sqrt_alphas_cumprod": [1000], "sqrt_one_minus_alphas_cumprod": [1000], "sqrt_recip_alphas_cumprod": [1000], "sqrt_recipm1_alphas_cumprod": [1000]}} \ No newline at end of file diff --git a/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-normal-512.json b/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-normal-512.json new file mode 100644 index 0000000000..a4f3c82688 --- /dev/null +++ b/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-normal-512.json @@ -0,0 +1 @@ +{"base_type": "sd-2", "model_type": "main", "variant": "normal", "template": {"cond_stage_model.model.ln_final.bias": [1024], "cond_stage_model.model.ln_final.weight": [1024], "cond_stage_model.model.positional_embedding": [77, 1024], "cond_stage_model.model.token_embedding.weight": [49408, 1024], "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.0.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.1.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.10.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.11.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.12.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.13.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.14.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.15.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.16.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.17.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.18.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.19.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.2.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.20.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.21.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.22.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.3.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.4.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.5.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.6.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.7.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.8.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.9.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.weight": [1024, 4096], "first_stage_model.decoder.conv_in.bias": [512], "first_stage_model.decoder.conv_in.weight": [512, 4, 3, 3], "first_stage_model.decoder.conv_out.bias": [3], "first_stage_model.decoder.conv_out.weight": [3, 128, 3, 3], "first_stage_model.decoder.mid.attn_1.k.bias": [512], "first_stage_model.decoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.norm.bias": [512], "first_stage_model.decoder.mid.attn_1.norm.weight": [512], "first_stage_model.decoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.decoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.q.bias": [512], "first_stage_model.decoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.v.bias": [512], "first_stage_model.decoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.block_1.conv1.bias": [512], "first_stage_model.decoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv2.bias": [512], "first_stage_model.decoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.norm1.bias": [512], "first_stage_model.decoder.mid.block_1.norm1.weight": [512], "first_stage_model.decoder.mid.block_1.norm2.bias": [512], "first_stage_model.decoder.mid.block_1.norm2.weight": [512], "first_stage_model.decoder.mid.block_2.conv1.bias": [512], "first_stage_model.decoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv2.bias": [512], "first_stage_model.decoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.norm1.bias": [512], "first_stage_model.decoder.mid.block_2.norm1.weight": [512], "first_stage_model.decoder.mid.block_2.norm2.bias": [512], "first_stage_model.decoder.mid.block_2.norm2.weight": [512], "first_stage_model.decoder.norm_out.bias": [128], "first_stage_model.decoder.norm_out.weight": [128], "first_stage_model.decoder.up.0.block.0.conv1.bias": [128], "first_stage_model.decoder.up.0.block.0.conv1.weight": [128, 256, 3, 3], "first_stage_model.decoder.up.0.block.0.conv2.bias": [128], "first_stage_model.decoder.up.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": [128], "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [128, 256, 1, 1], "first_stage_model.decoder.up.0.block.0.norm1.bias": [256], "first_stage_model.decoder.up.0.block.0.norm1.weight": [256], "first_stage_model.decoder.up.0.block.0.norm2.bias": [128], "first_stage_model.decoder.up.0.block.0.norm2.weight": [128], "first_stage_model.decoder.up.0.block.1.conv1.bias": [128], "first_stage_model.decoder.up.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv2.bias": [128], "first_stage_model.decoder.up.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.norm1.bias": [128], "first_stage_model.decoder.up.0.block.1.norm1.weight": [128], "first_stage_model.decoder.up.0.block.1.norm2.bias": [128], "first_stage_model.decoder.up.0.block.1.norm2.weight": [128], "first_stage_model.decoder.up.0.block.2.conv1.bias": [128], "first_stage_model.decoder.up.0.block.2.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv2.bias": [128], "first_stage_model.decoder.up.0.block.2.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.norm1.bias": [128], "first_stage_model.decoder.up.0.block.2.norm1.weight": [128], "first_stage_model.decoder.up.0.block.2.norm2.bias": [128], "first_stage_model.decoder.up.0.block.2.norm2.weight": [128], "first_stage_model.decoder.up.1.block.0.conv1.bias": [256], "first_stage_model.decoder.up.1.block.0.conv1.weight": [256, 512, 3, 3], "first_stage_model.decoder.up.1.block.0.conv2.bias": [256], "first_stage_model.decoder.up.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": [256], "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [256, 512, 1, 1], "first_stage_model.decoder.up.1.block.0.norm1.bias": [512], "first_stage_model.decoder.up.1.block.0.norm1.weight": [512], "first_stage_model.decoder.up.1.block.0.norm2.bias": [256], "first_stage_model.decoder.up.1.block.0.norm2.weight": [256], "first_stage_model.decoder.up.1.block.1.conv1.bias": [256], "first_stage_model.decoder.up.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv2.bias": [256], "first_stage_model.decoder.up.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.norm1.bias": [256], "first_stage_model.decoder.up.1.block.1.norm1.weight": [256], "first_stage_model.decoder.up.1.block.1.norm2.bias": [256], "first_stage_model.decoder.up.1.block.1.norm2.weight": [256], "first_stage_model.decoder.up.1.block.2.conv1.bias": [256], "first_stage_model.decoder.up.1.block.2.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv2.bias": [256], "first_stage_model.decoder.up.1.block.2.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.norm1.bias": [256], "first_stage_model.decoder.up.1.block.2.norm1.weight": [256], "first_stage_model.decoder.up.1.block.2.norm2.bias": [256], "first_stage_model.decoder.up.1.block.2.norm2.weight": [256], "first_stage_model.decoder.up.1.upsample.conv.bias": [256], "first_stage_model.decoder.up.1.upsample.conv.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.2.block.0.conv1.bias": [512], "first_stage_model.decoder.up.2.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv2.bias": [512], "first_stage_model.decoder.up.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.norm1.bias": [512], "first_stage_model.decoder.up.2.block.0.norm1.weight": [512], "first_stage_model.decoder.up.2.block.0.norm2.bias": [512], "first_stage_model.decoder.up.2.block.0.norm2.weight": [512], "first_stage_model.decoder.up.2.block.1.conv1.bias": [512], "first_stage_model.decoder.up.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv2.bias": [512], "first_stage_model.decoder.up.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.norm1.bias": [512], "first_stage_model.decoder.up.2.block.1.norm1.weight": [512], "first_stage_model.decoder.up.2.block.1.norm2.bias": [512], "first_stage_model.decoder.up.2.block.1.norm2.weight": [512], "first_stage_model.decoder.up.2.block.2.conv1.bias": [512], "first_stage_model.decoder.up.2.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv2.bias": [512], "first_stage_model.decoder.up.2.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.norm1.bias": [512], "first_stage_model.decoder.up.2.block.2.norm1.weight": [512], "first_stage_model.decoder.up.2.block.2.norm2.bias": [512], "first_stage_model.decoder.up.2.block.2.norm2.weight": [512], "first_stage_model.decoder.up.2.upsample.conv.bias": [512], "first_stage_model.decoder.up.2.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv1.bias": [512], "first_stage_model.decoder.up.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv2.bias": [512], "first_stage_model.decoder.up.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.norm1.bias": [512], "first_stage_model.decoder.up.3.block.0.norm1.weight": [512], "first_stage_model.decoder.up.3.block.0.norm2.bias": [512], "first_stage_model.decoder.up.3.block.0.norm2.weight": [512], "first_stage_model.decoder.up.3.block.1.conv1.bias": [512], "first_stage_model.decoder.up.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv2.bias": [512], "first_stage_model.decoder.up.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.norm1.bias": [512], "first_stage_model.decoder.up.3.block.1.norm1.weight": [512], "first_stage_model.decoder.up.3.block.1.norm2.bias": [512], "first_stage_model.decoder.up.3.block.1.norm2.weight": [512], "first_stage_model.decoder.up.3.block.2.conv1.bias": [512], "first_stage_model.decoder.up.3.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv2.bias": [512], "first_stage_model.decoder.up.3.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.norm1.bias": [512], "first_stage_model.decoder.up.3.block.2.norm1.weight": [512], "first_stage_model.decoder.up.3.block.2.norm2.bias": [512], "first_stage_model.decoder.up.3.block.2.norm2.weight": [512], "first_stage_model.decoder.up.3.upsample.conv.bias": [512], "first_stage_model.decoder.up.3.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.conv_in.bias": [128], "first_stage_model.encoder.conv_in.weight": [128, 3, 3, 3], "first_stage_model.encoder.conv_out.bias": [8], "first_stage_model.encoder.conv_out.weight": [8, 512, 3, 3], "first_stage_model.encoder.down.0.block.0.conv1.bias": [128], "first_stage_model.encoder.down.0.block.0.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv2.bias": [128], "first_stage_model.encoder.down.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.norm1.bias": [128], "first_stage_model.encoder.down.0.block.0.norm1.weight": [128], "first_stage_model.encoder.down.0.block.0.norm2.bias": [128], "first_stage_model.encoder.down.0.block.0.norm2.weight": [128], "first_stage_model.encoder.down.0.block.1.conv1.bias": [128], "first_stage_model.encoder.down.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv2.bias": [128], "first_stage_model.encoder.down.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.norm1.bias": [128], "first_stage_model.encoder.down.0.block.1.norm1.weight": [128], "first_stage_model.encoder.down.0.block.1.norm2.bias": [128], "first_stage_model.encoder.down.0.block.1.norm2.weight": [128], "first_stage_model.encoder.down.0.downsample.conv.bias": [128], "first_stage_model.encoder.down.0.downsample.conv.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv1.bias": [256], "first_stage_model.encoder.down.1.block.0.conv1.weight": [256, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv2.bias": [256], "first_stage_model.encoder.down.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": [256], "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [256, 128, 1, 1], "first_stage_model.encoder.down.1.block.0.norm1.bias": [128], "first_stage_model.encoder.down.1.block.0.norm1.weight": [128], "first_stage_model.encoder.down.1.block.0.norm2.bias": [256], "first_stage_model.encoder.down.1.block.0.norm2.weight": [256], "first_stage_model.encoder.down.1.block.1.conv1.bias": [256], "first_stage_model.encoder.down.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv2.bias": [256], "first_stage_model.encoder.down.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.norm1.bias": [256], "first_stage_model.encoder.down.1.block.1.norm1.weight": [256], "first_stage_model.encoder.down.1.block.1.norm2.bias": [256], "first_stage_model.encoder.down.1.block.1.norm2.weight": [256], "first_stage_model.encoder.down.1.downsample.conv.bias": [256], "first_stage_model.encoder.down.1.downsample.conv.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv1.bias": [512], "first_stage_model.encoder.down.2.block.0.conv1.weight": [512, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv2.bias": [512], "first_stage_model.encoder.down.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": [512], "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [512, 256, 1, 1], "first_stage_model.encoder.down.2.block.0.norm1.bias": [256], "first_stage_model.encoder.down.2.block.0.norm1.weight": [256], "first_stage_model.encoder.down.2.block.0.norm2.bias": [512], "first_stage_model.encoder.down.2.block.0.norm2.weight": [512], "first_stage_model.encoder.down.2.block.1.conv1.bias": [512], "first_stage_model.encoder.down.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv2.bias": [512], "first_stage_model.encoder.down.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.norm1.bias": [512], "first_stage_model.encoder.down.2.block.1.norm1.weight": [512], "first_stage_model.encoder.down.2.block.1.norm2.bias": [512], "first_stage_model.encoder.down.2.block.1.norm2.weight": [512], "first_stage_model.encoder.down.2.downsample.conv.bias": [512], "first_stage_model.encoder.down.2.downsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv1.bias": [512], "first_stage_model.encoder.down.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv2.bias": [512], "first_stage_model.encoder.down.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.norm1.bias": [512], "first_stage_model.encoder.down.3.block.0.norm1.weight": [512], "first_stage_model.encoder.down.3.block.0.norm2.bias": [512], "first_stage_model.encoder.down.3.block.0.norm2.weight": [512], "first_stage_model.encoder.down.3.block.1.conv1.bias": [512], "first_stage_model.encoder.down.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv2.bias": [512], "first_stage_model.encoder.down.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.norm1.bias": [512], "first_stage_model.encoder.down.3.block.1.norm1.weight": [512], "first_stage_model.encoder.down.3.block.1.norm2.bias": [512], "first_stage_model.encoder.down.3.block.1.norm2.weight": [512], "first_stage_model.encoder.mid.attn_1.k.bias": [512], "first_stage_model.encoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.norm.bias": [512], "first_stage_model.encoder.mid.attn_1.norm.weight": [512], "first_stage_model.encoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.encoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.q.bias": [512], "first_stage_model.encoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.v.bias": [512], "first_stage_model.encoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.block_1.conv1.bias": [512], "first_stage_model.encoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv2.bias": [512], "first_stage_model.encoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.norm1.bias": [512], "first_stage_model.encoder.mid.block_1.norm1.weight": [512], "first_stage_model.encoder.mid.block_1.norm2.bias": [512], "first_stage_model.encoder.mid.block_1.norm2.weight": [512], "first_stage_model.encoder.mid.block_2.conv1.bias": [512], "first_stage_model.encoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv2.bias": [512], "first_stage_model.encoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.norm1.bias": [512], "first_stage_model.encoder.mid.block_2.norm1.weight": [512], "first_stage_model.encoder.mid.block_2.norm2.bias": [512], "first_stage_model.encoder.mid.block_2.norm2.weight": [512], "first_stage_model.encoder.norm_out.bias": [512], "first_stage_model.encoder.norm_out.weight": [512], "first_stage_model.post_quant_conv.bias": [4], "first_stage_model.post_quant_conv.weight": [4, 4, 1, 1], "first_stage_model.quant_conv.bias": [8], "first_stage_model.quant_conv.weight": [8, 8, 1, 1], "model.diffusion_model.input_blocks.0.0.bias": [320], "model.diffusion_model.input_blocks.0.0.weight": [320, 4, 3, 3], "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.1.norm.bias": [320], "model.diffusion_model.input_blocks.1.1.norm.weight": [320], "model.diffusion_model.input_blocks.1.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_in.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_out.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.1.norm.bias": [320], "model.diffusion_model.input_blocks.2.1.norm.weight": [320], "model.diffusion_model.input_blocks.2.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_in.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_out.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.3.0.op.bias": [320], "model.diffusion_model.input_blocks.3.0.op.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": [640, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.4.0.skip_connection.bias": [640], "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [640, 320, 1, 1], "model.diffusion_model.input_blocks.4.1.norm.bias": [640], "model.diffusion_model.input_blocks.4.1.norm.weight": [640], "model.diffusion_model.input_blocks.4.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.1.norm.bias": [640], "model.diffusion_model.input_blocks.5.1.norm.weight": [640], "model.diffusion_model.input_blocks.5.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.6.0.op.bias": [640], "model.diffusion_model.input_blocks.6.0.op.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": [1280, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.7.0.skip_connection.bias": [1280], "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [1280, 640, 1, 1], "model.diffusion_model.input_blocks.7.1.norm.bias": [1280], "model.diffusion_model.input_blocks.7.1.norm.weight": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.1.norm.bias": [1280], "model.diffusion_model.input_blocks.8.1.norm.weight": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.9.0.op.bias": [1280], "model.diffusion_model.input_blocks.9.0.op.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.0.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.1.norm.bias": [1280], "model.diffusion_model.middle_block.1.norm.weight": [1280], "model.diffusion_model.middle_block.1.proj_in.bias": [1280], "model.diffusion_model.middle_block.1.proj_in.weight": [1280, 1280], "model.diffusion_model.middle_block.1.proj_out.bias": [1280], "model.diffusion_model.middle_block.1.proj_out.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.2.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.out.0.bias": [320], "model.diffusion_model.out.0.weight": [320], "model.diffusion_model.out.2.bias": [4], "model.diffusion_model.out.2.weight": [4, 320, 3, 3], "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.0.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.1.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.10.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.10.1.norm.bias": [320], "model.diffusion_model.output_blocks.10.1.norm.weight": [320], "model.diffusion_model.output_blocks.10.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.11.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.11.1.norm.bias": [320], "model.diffusion_model.output_blocks.11.1.norm.weight": [320], "model.diffusion_model.output_blocks.11.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.2.1.conv.bias": [1280], "model.diffusion_model.output_blocks.2.1.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.3.1.norm.bias": [1280], "model.diffusion_model.output_blocks.3.1.norm.weight": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.4.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.4.1.norm.bias": [1280], "model.diffusion_model.output_blocks.4.1.norm.weight": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": [1280, 1920, 3, 3], "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.5.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [1280, 1920, 1, 1], "model.diffusion_model.output_blocks.5.1.norm.bias": [1280], "model.diffusion_model.output_blocks.5.1.norm.weight": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.2.conv.bias": [1280], "model.diffusion_model.output_blocks.5.2.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": [640, 1920, 3, 3], "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.6.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [640, 1920, 1, 1], "model.diffusion_model.output_blocks.6.1.norm.bias": [640], "model.diffusion_model.output_blocks.6.1.norm.weight": [640], "model.diffusion_model.output_blocks.6.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": [640, 1280, 3, 3], "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.7.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [640, 1280, 1, 1], "model.diffusion_model.output_blocks.7.1.norm.bias": [640], "model.diffusion_model.output_blocks.7.1.norm.weight": [640], "model.diffusion_model.output_blocks.7.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": [640, 960, 3, 3], "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.8.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [640, 960, 1, 1], "model.diffusion_model.output_blocks.8.1.norm.bias": [640], "model.diffusion_model.output_blocks.8.1.norm.weight": [640], "model.diffusion_model.output_blocks.8.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.2.conv.bias": [640], "model.diffusion_model.output_blocks.8.2.conv.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": [320, 960, 3, 3], "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.9.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [320, 960, 1, 1], "model.diffusion_model.output_blocks.9.1.norm.bias": [320], "model.diffusion_model.output_blocks.9.1.norm.weight": [320], "model.diffusion_model.output_blocks.9.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.time_embed.0.bias": [1280], "model.diffusion_model.time_embed.0.weight": [1280, 320], "model.diffusion_model.time_embed.2.bias": [1280], "model.diffusion_model.time_embed.2.weight": [1280, 1280]}} \ No newline at end of file diff --git a/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-normal-768.json b/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-normal-768.json new file mode 100644 index 0000000000..64bd382406 --- /dev/null +++ b/invokeai/configs/model_probe_templates/checkpoints/sd-2/main-normal-768.json @@ -0,0 +1 @@ +{"base_type": "sd-2", "model_type": "main", "variant": "normal", "template": {"alphas_cumprod": [1000], "alphas_cumprod_prev": [1000], "betas": [1000], "log_one_minus_alphas_cumprod": [1000], "model_ema.decay": [], "posterior_log_variance_clipped": [1000], "posterior_mean_coef1": [1000], "posterior_mean_coef2": [1000], "posterior_variance": [1000], "sqrt_alphas_cumprod": [1000], "sqrt_one_minus_alphas_cumprod": [1000], "sqrt_recip_alphas_cumprod": [1000], "sqrt_recipm1_alphas_cumprod": [1000], "model_ema.num_updates": [], "cond_stage_model.model.ln_final.bias": [1024], "cond_stage_model.model.ln_final.weight": [1024], "cond_stage_model.model.logit_scale": [], "cond_stage_model.model.positional_embedding": [77, 1024], "cond_stage_model.model.text_projection": [1024, 1024], "cond_stage_model.model.token_embedding.weight": [49408, 1024], "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.0.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.0.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.1.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.1.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.1.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.1.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.10.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.10.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.10.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.10.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.11.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.11.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.11.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.11.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.12.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.12.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.12.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.12.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.13.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.13.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.13.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.13.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.14.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.14.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.14.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.14.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.15.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.15.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.15.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.15.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.16.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.16.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.16.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.16.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.17.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.17.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.17.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.17.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.18.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.18.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.18.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.18.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.19.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.19.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.19.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.19.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.2.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.2.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.2.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.2.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.20.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.20.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.20.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.20.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.21.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.21.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.21.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.21.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.22.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.22.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.22.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.22.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.23.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.23.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.23.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.3.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.3.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.3.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.3.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.4.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.4.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.4.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.4.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.5.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.5.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.5.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.5.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.6.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.6.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.6.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.6.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.7.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.7.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.7.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.7.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.8.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.8.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.8.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.8.mlp.c_proj.weight": [1024, 4096], "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_bias": [3072], "cond_stage_model.model.transformer.resblocks.9.attn.in_proj_weight": [3072, 1024], "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.attn.out_proj.weight": [1024, 1024], "cond_stage_model.model.transformer.resblocks.9.ln_1.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_1.weight": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_2.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.ln_2.weight": [1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.bias": [4096], "cond_stage_model.model.transformer.resblocks.9.mlp.c_fc.weight": [4096, 1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.bias": [1024], "cond_stage_model.model.transformer.resblocks.9.mlp.c_proj.weight": [1024, 4096], "first_stage_model.decoder.conv_in.bias": [512], "first_stage_model.decoder.conv_in.weight": [512, 4, 3, 3], "first_stage_model.decoder.conv_out.bias": [3], "first_stage_model.decoder.conv_out.weight": [3, 128, 3, 3], "first_stage_model.decoder.mid.attn_1.k.bias": [512], "first_stage_model.decoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.norm.bias": [512], "first_stage_model.decoder.mid.attn_1.norm.weight": [512], "first_stage_model.decoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.decoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.q.bias": [512], "first_stage_model.decoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.v.bias": [512], "first_stage_model.decoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.block_1.conv1.bias": [512], "first_stage_model.decoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv2.bias": [512], "first_stage_model.decoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.norm1.bias": [512], "first_stage_model.decoder.mid.block_1.norm1.weight": [512], "first_stage_model.decoder.mid.block_1.norm2.bias": [512], "first_stage_model.decoder.mid.block_1.norm2.weight": [512], "first_stage_model.decoder.mid.block_2.conv1.bias": [512], "first_stage_model.decoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv2.bias": [512], "first_stage_model.decoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.norm1.bias": [512], "first_stage_model.decoder.mid.block_2.norm1.weight": [512], "first_stage_model.decoder.mid.block_2.norm2.bias": [512], "first_stage_model.decoder.mid.block_2.norm2.weight": [512], "first_stage_model.decoder.norm_out.bias": [128], "first_stage_model.decoder.norm_out.weight": [128], "first_stage_model.decoder.up.0.block.0.conv1.bias": [128], "first_stage_model.decoder.up.0.block.0.conv1.weight": [128, 256, 3, 3], "first_stage_model.decoder.up.0.block.0.conv2.bias": [128], "first_stage_model.decoder.up.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": [128], "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [128, 256, 1, 1], "first_stage_model.decoder.up.0.block.0.norm1.bias": [256], "first_stage_model.decoder.up.0.block.0.norm1.weight": [256], "first_stage_model.decoder.up.0.block.0.norm2.bias": [128], "first_stage_model.decoder.up.0.block.0.norm2.weight": [128], "first_stage_model.decoder.up.0.block.1.conv1.bias": [128], "first_stage_model.decoder.up.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv2.bias": [128], "first_stage_model.decoder.up.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.norm1.bias": [128], "first_stage_model.decoder.up.0.block.1.norm1.weight": [128], "first_stage_model.decoder.up.0.block.1.norm2.bias": [128], "first_stage_model.decoder.up.0.block.1.norm2.weight": [128], "first_stage_model.decoder.up.0.block.2.conv1.bias": [128], "first_stage_model.decoder.up.0.block.2.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv2.bias": [128], "first_stage_model.decoder.up.0.block.2.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.norm1.bias": [128], "first_stage_model.decoder.up.0.block.2.norm1.weight": [128], "first_stage_model.decoder.up.0.block.2.norm2.bias": [128], "first_stage_model.decoder.up.0.block.2.norm2.weight": [128], "first_stage_model.decoder.up.1.block.0.conv1.bias": [256], "first_stage_model.decoder.up.1.block.0.conv1.weight": [256, 512, 3, 3], "first_stage_model.decoder.up.1.block.0.conv2.bias": [256], "first_stage_model.decoder.up.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": [256], "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [256, 512, 1, 1], "first_stage_model.decoder.up.1.block.0.norm1.bias": [512], "first_stage_model.decoder.up.1.block.0.norm1.weight": [512], "first_stage_model.decoder.up.1.block.0.norm2.bias": [256], "first_stage_model.decoder.up.1.block.0.norm2.weight": [256], "first_stage_model.decoder.up.1.block.1.conv1.bias": [256], "first_stage_model.decoder.up.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv2.bias": [256], "first_stage_model.decoder.up.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.norm1.bias": [256], "first_stage_model.decoder.up.1.block.1.norm1.weight": [256], "first_stage_model.decoder.up.1.block.1.norm2.bias": [256], "first_stage_model.decoder.up.1.block.1.norm2.weight": [256], "first_stage_model.decoder.up.1.block.2.conv1.bias": [256], "first_stage_model.decoder.up.1.block.2.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv2.bias": [256], "first_stage_model.decoder.up.1.block.2.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.norm1.bias": [256], "first_stage_model.decoder.up.1.block.2.norm1.weight": [256], "first_stage_model.decoder.up.1.block.2.norm2.bias": [256], "first_stage_model.decoder.up.1.block.2.norm2.weight": [256], "first_stage_model.decoder.up.1.upsample.conv.bias": [256], "first_stage_model.decoder.up.1.upsample.conv.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.2.block.0.conv1.bias": [512], "first_stage_model.decoder.up.2.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv2.bias": [512], "first_stage_model.decoder.up.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.norm1.bias": [512], "first_stage_model.decoder.up.2.block.0.norm1.weight": [512], "first_stage_model.decoder.up.2.block.0.norm2.bias": [512], "first_stage_model.decoder.up.2.block.0.norm2.weight": [512], "first_stage_model.decoder.up.2.block.1.conv1.bias": [512], "first_stage_model.decoder.up.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv2.bias": [512], "first_stage_model.decoder.up.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.norm1.bias": [512], "first_stage_model.decoder.up.2.block.1.norm1.weight": [512], "first_stage_model.decoder.up.2.block.1.norm2.bias": [512], "first_stage_model.decoder.up.2.block.1.norm2.weight": [512], "first_stage_model.decoder.up.2.block.2.conv1.bias": [512], "first_stage_model.decoder.up.2.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv2.bias": [512], "first_stage_model.decoder.up.2.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.norm1.bias": [512], "first_stage_model.decoder.up.2.block.2.norm1.weight": [512], "first_stage_model.decoder.up.2.block.2.norm2.bias": [512], "first_stage_model.decoder.up.2.block.2.norm2.weight": [512], "first_stage_model.decoder.up.2.upsample.conv.bias": [512], "first_stage_model.decoder.up.2.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv1.bias": [512], "first_stage_model.decoder.up.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv2.bias": [512], "first_stage_model.decoder.up.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.norm1.bias": [512], "first_stage_model.decoder.up.3.block.0.norm1.weight": [512], "first_stage_model.decoder.up.3.block.0.norm2.bias": [512], "first_stage_model.decoder.up.3.block.0.norm2.weight": [512], "first_stage_model.decoder.up.3.block.1.conv1.bias": [512], "first_stage_model.decoder.up.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv2.bias": [512], "first_stage_model.decoder.up.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.norm1.bias": [512], "first_stage_model.decoder.up.3.block.1.norm1.weight": [512], "first_stage_model.decoder.up.3.block.1.norm2.bias": [512], "first_stage_model.decoder.up.3.block.1.norm2.weight": [512], "first_stage_model.decoder.up.3.block.2.conv1.bias": [512], "first_stage_model.decoder.up.3.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv2.bias": [512], "first_stage_model.decoder.up.3.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.norm1.bias": [512], "first_stage_model.decoder.up.3.block.2.norm1.weight": [512], "first_stage_model.decoder.up.3.block.2.norm2.bias": [512], "first_stage_model.decoder.up.3.block.2.norm2.weight": [512], "first_stage_model.decoder.up.3.upsample.conv.bias": [512], "first_stage_model.decoder.up.3.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.conv_in.bias": [128], "first_stage_model.encoder.conv_in.weight": [128, 3, 3, 3], "first_stage_model.encoder.conv_out.bias": [8], "first_stage_model.encoder.conv_out.weight": [8, 512, 3, 3], "first_stage_model.encoder.down.0.block.0.conv1.bias": [128], "first_stage_model.encoder.down.0.block.0.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv2.bias": [128], "first_stage_model.encoder.down.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.norm1.bias": [128], "first_stage_model.encoder.down.0.block.0.norm1.weight": [128], "first_stage_model.encoder.down.0.block.0.norm2.bias": [128], "first_stage_model.encoder.down.0.block.0.norm2.weight": [128], "first_stage_model.encoder.down.0.block.1.conv1.bias": [128], "first_stage_model.encoder.down.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv2.bias": [128], "first_stage_model.encoder.down.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.norm1.bias": [128], "first_stage_model.encoder.down.0.block.1.norm1.weight": [128], "first_stage_model.encoder.down.0.block.1.norm2.bias": [128], "first_stage_model.encoder.down.0.block.1.norm2.weight": [128], "first_stage_model.encoder.down.0.downsample.conv.bias": [128], "first_stage_model.encoder.down.0.downsample.conv.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv1.bias": [256], "first_stage_model.encoder.down.1.block.0.conv1.weight": [256, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv2.bias": [256], "first_stage_model.encoder.down.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": [256], "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [256, 128, 1, 1], "first_stage_model.encoder.down.1.block.0.norm1.bias": [128], "first_stage_model.encoder.down.1.block.0.norm1.weight": [128], "first_stage_model.encoder.down.1.block.0.norm2.bias": [256], "first_stage_model.encoder.down.1.block.0.norm2.weight": [256], "first_stage_model.encoder.down.1.block.1.conv1.bias": [256], "first_stage_model.encoder.down.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv2.bias": [256], "first_stage_model.encoder.down.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.norm1.bias": [256], "first_stage_model.encoder.down.1.block.1.norm1.weight": [256], "first_stage_model.encoder.down.1.block.1.norm2.bias": [256], "first_stage_model.encoder.down.1.block.1.norm2.weight": [256], "first_stage_model.encoder.down.1.downsample.conv.bias": [256], "first_stage_model.encoder.down.1.downsample.conv.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv1.bias": [512], "first_stage_model.encoder.down.2.block.0.conv1.weight": [512, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv2.bias": [512], "first_stage_model.encoder.down.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": [512], "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [512, 256, 1, 1], "first_stage_model.encoder.down.2.block.0.norm1.bias": [256], "first_stage_model.encoder.down.2.block.0.norm1.weight": [256], "first_stage_model.encoder.down.2.block.0.norm2.bias": [512], "first_stage_model.encoder.down.2.block.0.norm2.weight": [512], "first_stage_model.encoder.down.2.block.1.conv1.bias": [512], "first_stage_model.encoder.down.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv2.bias": [512], "first_stage_model.encoder.down.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.norm1.bias": [512], "first_stage_model.encoder.down.2.block.1.norm1.weight": [512], "first_stage_model.encoder.down.2.block.1.norm2.bias": [512], "first_stage_model.encoder.down.2.block.1.norm2.weight": [512], "first_stage_model.encoder.down.2.downsample.conv.bias": [512], "first_stage_model.encoder.down.2.downsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv1.bias": [512], "first_stage_model.encoder.down.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv2.bias": [512], "first_stage_model.encoder.down.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.norm1.bias": [512], "first_stage_model.encoder.down.3.block.0.norm1.weight": [512], "first_stage_model.encoder.down.3.block.0.norm2.bias": [512], "first_stage_model.encoder.down.3.block.0.norm2.weight": [512], "first_stage_model.encoder.down.3.block.1.conv1.bias": [512], "first_stage_model.encoder.down.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv2.bias": [512], "first_stage_model.encoder.down.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.norm1.bias": [512], "first_stage_model.encoder.down.3.block.1.norm1.weight": [512], "first_stage_model.encoder.down.3.block.1.norm2.bias": [512], "first_stage_model.encoder.down.3.block.1.norm2.weight": [512], "first_stage_model.encoder.mid.attn_1.k.bias": [512], "first_stage_model.encoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.norm.bias": [512], "first_stage_model.encoder.mid.attn_1.norm.weight": [512], "first_stage_model.encoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.encoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.q.bias": [512], "first_stage_model.encoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.v.bias": [512], "first_stage_model.encoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.block_1.conv1.bias": [512], "first_stage_model.encoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv2.bias": [512], "first_stage_model.encoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.norm1.bias": [512], "first_stage_model.encoder.mid.block_1.norm1.weight": [512], "first_stage_model.encoder.mid.block_1.norm2.bias": [512], "first_stage_model.encoder.mid.block_1.norm2.weight": [512], "first_stage_model.encoder.mid.block_2.conv1.bias": [512], "first_stage_model.encoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv2.bias": [512], "first_stage_model.encoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.norm1.bias": [512], "first_stage_model.encoder.mid.block_2.norm1.weight": [512], "first_stage_model.encoder.mid.block_2.norm2.bias": [512], "first_stage_model.encoder.mid.block_2.norm2.weight": [512], "first_stage_model.encoder.norm_out.bias": [512], "first_stage_model.encoder.norm_out.weight": [512], "first_stage_model.post_quant_conv.bias": [4], "first_stage_model.post_quant_conv.weight": [4, 4, 1, 1], "first_stage_model.quant_conv.bias": [8], "first_stage_model.quant_conv.weight": [8, 8, 1, 1], "model.diffusion_model.input_blocks.0.0.bias": [320], "model.diffusion_model.input_blocks.0.0.weight": [320, 4, 3, 3], "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.1.norm.bias": [320], "model.diffusion_model.input_blocks.1.1.norm.weight": [320], "model.diffusion_model.input_blocks.1.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_in.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.1.1.proj_out.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.1.norm.bias": [320], "model.diffusion_model.input_blocks.2.1.norm.weight": [320], "model.diffusion_model.input_blocks.2.1.proj_in.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_in.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.proj_out.bias": [320], "model.diffusion_model.input_blocks.2.1.proj_out.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.input_blocks.3.0.op.bias": [320], "model.diffusion_model.input_blocks.3.0.op.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": [640, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.4.0.skip_connection.bias": [640], "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [640, 320, 1, 1], "model.diffusion_model.input_blocks.4.1.norm.bias": [640], "model.diffusion_model.input_blocks.4.1.norm.weight": [640], "model.diffusion_model.input_blocks.4.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.1.norm.bias": [640], "model.diffusion_model.input_blocks.5.1.norm.weight": [640], "model.diffusion_model.input_blocks.5.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.6.0.op.bias": [640], "model.diffusion_model.input_blocks.6.0.op.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": [1280, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.7.0.skip_connection.bias": [1280], "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [1280, 640, 1, 1], "model.diffusion_model.input_blocks.7.1.norm.bias": [1280], "model.diffusion_model.input_blocks.7.1.norm.weight": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.1.norm.bias": [1280], "model.diffusion_model.input_blocks.8.1.norm.weight": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.9.0.op.bias": [1280], "model.diffusion_model.input_blocks.9.0.op.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.0.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.1.norm.bias": [1280], "model.diffusion_model.middle_block.1.norm.weight": [1280], "model.diffusion_model.middle_block.1.proj_in.bias": [1280], "model.diffusion_model.middle_block.1.proj_in.weight": [1280, 1280], "model.diffusion_model.middle_block.1.proj_out.bias": [1280], "model.diffusion_model.middle_block.1.proj_out.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.2.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.out.0.bias": [320], "model.diffusion_model.out.0.weight": [320], "model.diffusion_model.out.2.bias": [4], "model.diffusion_model.out.2.weight": [4, 320, 3, 3], "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.0.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.1.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.10.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.10.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.10.1.norm.bias": [320], "model.diffusion_model.output_blocks.10.1.norm.weight": [320], "model.diffusion_model.output_blocks.10.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.10.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.11.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.11.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.11.1.norm.bias": [320], "model.diffusion_model.output_blocks.11.1.norm.weight": [320], "model.diffusion_model.output_blocks.11.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.11.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.2.1.conv.bias": [1280], "model.diffusion_model.output_blocks.2.1.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.3.1.norm.bias": [1280], "model.diffusion_model.output_blocks.3.1.norm.weight": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.3.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.4.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.4.1.norm.bias": [1280], "model.diffusion_model.output_blocks.4.1.norm.weight": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.4.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": [1280, 1920, 3, 3], "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.5.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [1280, 1920, 1, 1], "model.diffusion_model.output_blocks.5.1.norm.bias": [1280], "model.diffusion_model.output_blocks.5.1.norm.weight": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.5.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [1280, 1024], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [1280, 1024], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.5.2.conv.bias": [1280], "model.diffusion_model.output_blocks.5.2.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": [640, 1920, 3, 3], "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.6.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [640, 1920, 1, 1], "model.diffusion_model.output_blocks.6.1.norm.bias": [640], "model.diffusion_model.output_blocks.6.1.norm.weight": [640], "model.diffusion_model.output_blocks.6.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.6.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": [1280], "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": [640, 1280, 3, 3], "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.7.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [640, 1280, 1, 1], "model.diffusion_model.output_blocks.7.1.norm.bias": [640], "model.diffusion_model.output_blocks.7.1.norm.weight": [640], "model.diffusion_model.output_blocks.7.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.7.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": [640, 960, 3, 3], "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.8.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [640, 960, 1, 1], "model.diffusion_model.output_blocks.8.1.norm.bias": [640], "model.diffusion_model.output_blocks.8.1.norm.weight": [640], "model.diffusion_model.output_blocks.8.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.8.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [640, 1024], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [640, 1024], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.8.2.conv.bias": [640], "model.diffusion_model.output_blocks.8.2.conv.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": [320, 960, 3, 3], "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.9.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.9.0.skip_connection.weight": [320, 960, 1, 1], "model.diffusion_model.output_blocks.9.1.norm.bias": [320], "model.diffusion_model.output_blocks.9.1.norm.weight": [320], "model.diffusion_model.output_blocks.9.1.proj_in.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_in.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.proj_out.bias": [320], "model.diffusion_model.output_blocks.9.1.proj_out.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": [320, 1024], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": [320, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": [320, 1024], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": [2560], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": [2560, 320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": [320, 1280], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": [320], "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": [320], "model.diffusion_model.time_embed.0.bias": [1280], "model.diffusion_model.time_embed.0.weight": [1280, 320], "model.diffusion_model.time_embed.2.bias": [1280], "model.diffusion_model.time_embed.2.weight": [1280, 1280]}} \ No newline at end of file diff --git a/invokeai/configs/model_probe_templates/checkpoints/sdxl/main-normal.json b/invokeai/configs/model_probe_templates/checkpoints/sdxl/main-normal.json new file mode 100644 index 0000000000..d30e6d6e12 --- /dev/null +++ b/invokeai/configs/model_probe_templates/checkpoints/sdxl/main-normal.json @@ -0,0 +1 @@ +{"base_type": "sdxl", "model_type": "main", "variant": "normal", "template": {"conditioner.embedders.0.transformer.text_model.embeddings.position_embedding.weight": [77, 768], "conditioner.embedders.0.transformer.text_model.embeddings.position_ids": [1, 77], "conditioner.embedders.0.transformer.text_model.embeddings.token_embedding.weight": [49408, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.0.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.1.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.10.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.11.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.2.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.3.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.4.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.5.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.6.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.7.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.8.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm1.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm1.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.layer_norm2.weight": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc1.bias": [3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc1.weight": [3072, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc2.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.mlp.fc2.weight": [768, 3072], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.k_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.k_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.out_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.out_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.q_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.q_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.v_proj.bias": [768], "conditioner.embedders.0.transformer.text_model.encoder.layers.9.self_attn.v_proj.weight": [768, 768], "conditioner.embedders.0.transformer.text_model.final_layer_norm.bias": [768], "conditioner.embedders.0.transformer.text_model.final_layer_norm.weight": [768], "conditioner.embedders.1.model.ln_final.bias": [1280], "conditioner.embedders.1.model.ln_final.weight": [1280], "conditioner.embedders.1.model.logit_scale": [], "conditioner.embedders.1.model.positional_embedding": [77, 1280], "conditioner.embedders.1.model.text_projection": [1280, 1280], "conditioner.embedders.1.model.token_embedding.weight": [49408, 1280], "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.0.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.0.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.0.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.0.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.0.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.0.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.1.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.1.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.1.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.1.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.1.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.1.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.10.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.10.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.10.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.10.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.10.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.10.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.11.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.11.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.11.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.11.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.11.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.11.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.12.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.12.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.12.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.12.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.12.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.12.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.13.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.13.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.13.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.13.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.13.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.13.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.14.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.14.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.14.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.14.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.14.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.14.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.15.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.15.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.15.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.15.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.15.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.15.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.16.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.16.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.16.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.16.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.16.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.16.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.17.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.17.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.17.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.17.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.17.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.17.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.18.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.18.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.18.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.18.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.18.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.18.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.19.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.19.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.19.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.19.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.19.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.19.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.2.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.2.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.2.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.2.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.2.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.2.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.20.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.20.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.20.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.20.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.20.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.20.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.21.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.21.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.21.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.21.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.21.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.21.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.22.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.22.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.22.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.22.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.22.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.22.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.23.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.23.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.23.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.23.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.23.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.23.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.24.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.24.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.24.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.24.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.24.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.24.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.25.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.25.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.25.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.25.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.25.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.25.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.26.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.26.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.26.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.26.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.26.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.26.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.27.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.27.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.27.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.27.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.27.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.27.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.28.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.28.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.28.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.28.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.28.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.28.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.29.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.29.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.29.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.29.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.29.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.29.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.3.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.3.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.3.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.3.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.3.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.3.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.30.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.30.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.30.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.30.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.30.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.30.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.31.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.31.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.31.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.31.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.31.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.31.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.4.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.4.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.4.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.4.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.4.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.4.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.5.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.5.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.5.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.5.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.5.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.5.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.6.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.6.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.6.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.6.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.6.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.6.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.7.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.7.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.7.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.7.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.7.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.7.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.8.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.8.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.8.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.8.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.8.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.8.mlp.c_proj.weight": [1280, 5120], "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_bias": [3840], "conditioner.embedders.1.model.transformer.resblocks.9.attn.in_proj_weight": [3840, 1280], "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.9.attn.out_proj.weight": [1280, 1280], "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.9.ln_1.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.9.ln_2.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.9.ln_2.weight": [1280], "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_fc.bias": [5120], "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_fc.weight": [5120, 1280], "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.bias": [1280], "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.weight": [1280, 5120], "first_stage_model.decoder.conv_in.bias": [512], "first_stage_model.decoder.conv_in.weight": [512, 4, 3, 3], "first_stage_model.decoder.conv_out.bias": [3], "first_stage_model.decoder.conv_out.weight": [3, 128, 3, 3], "first_stage_model.decoder.mid.attn_1.k.bias": [512], "first_stage_model.decoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.norm.bias": [512], "first_stage_model.decoder.mid.attn_1.norm.weight": [512], "first_stage_model.decoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.decoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.q.bias": [512], "first_stage_model.decoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.attn_1.v.bias": [512], "first_stage_model.decoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.decoder.mid.block_1.conv1.bias": [512], "first_stage_model.decoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.conv2.bias": [512], "first_stage_model.decoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_1.norm1.bias": [512], "first_stage_model.decoder.mid.block_1.norm1.weight": [512], "first_stage_model.decoder.mid.block_1.norm2.bias": [512], "first_stage_model.decoder.mid.block_1.norm2.weight": [512], "first_stage_model.decoder.mid.block_2.conv1.bias": [512], "first_stage_model.decoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.conv2.bias": [512], "first_stage_model.decoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.mid.block_2.norm1.bias": [512], "first_stage_model.decoder.mid.block_2.norm1.weight": [512], "first_stage_model.decoder.mid.block_2.norm2.bias": [512], "first_stage_model.decoder.mid.block_2.norm2.weight": [512], "first_stage_model.decoder.norm_out.bias": [128], "first_stage_model.decoder.norm_out.weight": [128], "first_stage_model.decoder.up.0.block.0.conv1.bias": [128], "first_stage_model.decoder.up.0.block.0.conv1.weight": [128, 256, 3, 3], "first_stage_model.decoder.up.0.block.0.conv2.bias": [128], "first_stage_model.decoder.up.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.0.nin_shortcut.bias": [128], "first_stage_model.decoder.up.0.block.0.nin_shortcut.weight": [128, 256, 1, 1], "first_stage_model.decoder.up.0.block.0.norm1.bias": [256], "first_stage_model.decoder.up.0.block.0.norm1.weight": [256], "first_stage_model.decoder.up.0.block.0.norm2.bias": [128], "first_stage_model.decoder.up.0.block.0.norm2.weight": [128], "first_stage_model.decoder.up.0.block.1.conv1.bias": [128], "first_stage_model.decoder.up.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.conv2.bias": [128], "first_stage_model.decoder.up.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.1.norm1.bias": [128], "first_stage_model.decoder.up.0.block.1.norm1.weight": [128], "first_stage_model.decoder.up.0.block.1.norm2.bias": [128], "first_stage_model.decoder.up.0.block.1.norm2.weight": [128], "first_stage_model.decoder.up.0.block.2.conv1.bias": [128], "first_stage_model.decoder.up.0.block.2.conv1.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.conv2.bias": [128], "first_stage_model.decoder.up.0.block.2.conv2.weight": [128, 128, 3, 3], "first_stage_model.decoder.up.0.block.2.norm1.bias": [128], "first_stage_model.decoder.up.0.block.2.norm1.weight": [128], "first_stage_model.decoder.up.0.block.2.norm2.bias": [128], "first_stage_model.decoder.up.0.block.2.norm2.weight": [128], "first_stage_model.decoder.up.1.block.0.conv1.bias": [256], "first_stage_model.decoder.up.1.block.0.conv1.weight": [256, 512, 3, 3], "first_stage_model.decoder.up.1.block.0.conv2.bias": [256], "first_stage_model.decoder.up.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.0.nin_shortcut.bias": [256], "first_stage_model.decoder.up.1.block.0.nin_shortcut.weight": [256, 512, 1, 1], "first_stage_model.decoder.up.1.block.0.norm1.bias": [512], "first_stage_model.decoder.up.1.block.0.norm1.weight": [512], "first_stage_model.decoder.up.1.block.0.norm2.bias": [256], "first_stage_model.decoder.up.1.block.0.norm2.weight": [256], "first_stage_model.decoder.up.1.block.1.conv1.bias": [256], "first_stage_model.decoder.up.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.conv2.bias": [256], "first_stage_model.decoder.up.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.1.norm1.bias": [256], "first_stage_model.decoder.up.1.block.1.norm1.weight": [256], "first_stage_model.decoder.up.1.block.1.norm2.bias": [256], "first_stage_model.decoder.up.1.block.1.norm2.weight": [256], "first_stage_model.decoder.up.1.block.2.conv1.bias": [256], "first_stage_model.decoder.up.1.block.2.conv1.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.conv2.bias": [256], "first_stage_model.decoder.up.1.block.2.conv2.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.1.block.2.norm1.bias": [256], "first_stage_model.decoder.up.1.block.2.norm1.weight": [256], "first_stage_model.decoder.up.1.block.2.norm2.bias": [256], "first_stage_model.decoder.up.1.block.2.norm2.weight": [256], "first_stage_model.decoder.up.1.upsample.conv.bias": [256], "first_stage_model.decoder.up.1.upsample.conv.weight": [256, 256, 3, 3], "first_stage_model.decoder.up.2.block.0.conv1.bias": [512], "first_stage_model.decoder.up.2.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.conv2.bias": [512], "first_stage_model.decoder.up.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.0.norm1.bias": [512], "first_stage_model.decoder.up.2.block.0.norm1.weight": [512], "first_stage_model.decoder.up.2.block.0.norm2.bias": [512], "first_stage_model.decoder.up.2.block.0.norm2.weight": [512], "first_stage_model.decoder.up.2.block.1.conv1.bias": [512], "first_stage_model.decoder.up.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.conv2.bias": [512], "first_stage_model.decoder.up.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.1.norm1.bias": [512], "first_stage_model.decoder.up.2.block.1.norm1.weight": [512], "first_stage_model.decoder.up.2.block.1.norm2.bias": [512], "first_stage_model.decoder.up.2.block.1.norm2.weight": [512], "first_stage_model.decoder.up.2.block.2.conv1.bias": [512], "first_stage_model.decoder.up.2.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.conv2.bias": [512], "first_stage_model.decoder.up.2.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.2.block.2.norm1.bias": [512], "first_stage_model.decoder.up.2.block.2.norm1.weight": [512], "first_stage_model.decoder.up.2.block.2.norm2.bias": [512], "first_stage_model.decoder.up.2.block.2.norm2.weight": [512], "first_stage_model.decoder.up.2.upsample.conv.bias": [512], "first_stage_model.decoder.up.2.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv1.bias": [512], "first_stage_model.decoder.up.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.conv2.bias": [512], "first_stage_model.decoder.up.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.0.norm1.bias": [512], "first_stage_model.decoder.up.3.block.0.norm1.weight": [512], "first_stage_model.decoder.up.3.block.0.norm2.bias": [512], "first_stage_model.decoder.up.3.block.0.norm2.weight": [512], "first_stage_model.decoder.up.3.block.1.conv1.bias": [512], "first_stage_model.decoder.up.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.conv2.bias": [512], "first_stage_model.decoder.up.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.1.norm1.bias": [512], "first_stage_model.decoder.up.3.block.1.norm1.weight": [512], "first_stage_model.decoder.up.3.block.1.norm2.bias": [512], "first_stage_model.decoder.up.3.block.1.norm2.weight": [512], "first_stage_model.decoder.up.3.block.2.conv1.bias": [512], "first_stage_model.decoder.up.3.block.2.conv1.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.conv2.bias": [512], "first_stage_model.decoder.up.3.block.2.conv2.weight": [512, 512, 3, 3], "first_stage_model.decoder.up.3.block.2.norm1.bias": [512], "first_stage_model.decoder.up.3.block.2.norm1.weight": [512], "first_stage_model.decoder.up.3.block.2.norm2.bias": [512], "first_stage_model.decoder.up.3.block.2.norm2.weight": [512], "first_stage_model.decoder.up.3.upsample.conv.bias": [512], "first_stage_model.decoder.up.3.upsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.conv_in.bias": [128], "first_stage_model.encoder.conv_in.weight": [128, 3, 3, 3], "first_stage_model.encoder.conv_out.bias": [8], "first_stage_model.encoder.conv_out.weight": [8, 512, 3, 3], "first_stage_model.encoder.down.0.block.0.conv1.bias": [128], "first_stage_model.encoder.down.0.block.0.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.conv2.bias": [128], "first_stage_model.encoder.down.0.block.0.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.0.norm1.bias": [128], "first_stage_model.encoder.down.0.block.0.norm1.weight": [128], "first_stage_model.encoder.down.0.block.0.norm2.bias": [128], "first_stage_model.encoder.down.0.block.0.norm2.weight": [128], "first_stage_model.encoder.down.0.block.1.conv1.bias": [128], "first_stage_model.encoder.down.0.block.1.conv1.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.conv2.bias": [128], "first_stage_model.encoder.down.0.block.1.conv2.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.0.block.1.norm1.bias": [128], "first_stage_model.encoder.down.0.block.1.norm1.weight": [128], "first_stage_model.encoder.down.0.block.1.norm2.bias": [128], "first_stage_model.encoder.down.0.block.1.norm2.weight": [128], "first_stage_model.encoder.down.0.downsample.conv.bias": [128], "first_stage_model.encoder.down.0.downsample.conv.weight": [128, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv1.bias": [256], "first_stage_model.encoder.down.1.block.0.conv1.weight": [256, 128, 3, 3], "first_stage_model.encoder.down.1.block.0.conv2.bias": [256], "first_stage_model.encoder.down.1.block.0.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.0.nin_shortcut.bias": [256], "first_stage_model.encoder.down.1.block.0.nin_shortcut.weight": [256, 128, 1, 1], "first_stage_model.encoder.down.1.block.0.norm1.bias": [128], "first_stage_model.encoder.down.1.block.0.norm1.weight": [128], "first_stage_model.encoder.down.1.block.0.norm2.bias": [256], "first_stage_model.encoder.down.1.block.0.norm2.weight": [256], "first_stage_model.encoder.down.1.block.1.conv1.bias": [256], "first_stage_model.encoder.down.1.block.1.conv1.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.conv2.bias": [256], "first_stage_model.encoder.down.1.block.1.conv2.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.1.block.1.norm1.bias": [256], "first_stage_model.encoder.down.1.block.1.norm1.weight": [256], "first_stage_model.encoder.down.1.block.1.norm2.bias": [256], "first_stage_model.encoder.down.1.block.1.norm2.weight": [256], "first_stage_model.encoder.down.1.downsample.conv.bias": [256], "first_stage_model.encoder.down.1.downsample.conv.weight": [256, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv1.bias": [512], "first_stage_model.encoder.down.2.block.0.conv1.weight": [512, 256, 3, 3], "first_stage_model.encoder.down.2.block.0.conv2.bias": [512], "first_stage_model.encoder.down.2.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.0.nin_shortcut.bias": [512], "first_stage_model.encoder.down.2.block.0.nin_shortcut.weight": [512, 256, 1, 1], "first_stage_model.encoder.down.2.block.0.norm1.bias": [256], "first_stage_model.encoder.down.2.block.0.norm1.weight": [256], "first_stage_model.encoder.down.2.block.0.norm2.bias": [512], "first_stage_model.encoder.down.2.block.0.norm2.weight": [512], "first_stage_model.encoder.down.2.block.1.conv1.bias": [512], "first_stage_model.encoder.down.2.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.conv2.bias": [512], "first_stage_model.encoder.down.2.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.2.block.1.norm1.bias": [512], "first_stage_model.encoder.down.2.block.1.norm1.weight": [512], "first_stage_model.encoder.down.2.block.1.norm2.bias": [512], "first_stage_model.encoder.down.2.block.1.norm2.weight": [512], "first_stage_model.encoder.down.2.downsample.conv.bias": [512], "first_stage_model.encoder.down.2.downsample.conv.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv1.bias": [512], "first_stage_model.encoder.down.3.block.0.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.conv2.bias": [512], "first_stage_model.encoder.down.3.block.0.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.0.norm1.bias": [512], "first_stage_model.encoder.down.3.block.0.norm1.weight": [512], "first_stage_model.encoder.down.3.block.0.norm2.bias": [512], "first_stage_model.encoder.down.3.block.0.norm2.weight": [512], "first_stage_model.encoder.down.3.block.1.conv1.bias": [512], "first_stage_model.encoder.down.3.block.1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.conv2.bias": [512], "first_stage_model.encoder.down.3.block.1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.down.3.block.1.norm1.bias": [512], "first_stage_model.encoder.down.3.block.1.norm1.weight": [512], "first_stage_model.encoder.down.3.block.1.norm2.bias": [512], "first_stage_model.encoder.down.3.block.1.norm2.weight": [512], "first_stage_model.encoder.mid.attn_1.k.bias": [512], "first_stage_model.encoder.mid.attn_1.k.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.norm.bias": [512], "first_stage_model.encoder.mid.attn_1.norm.weight": [512], "first_stage_model.encoder.mid.attn_1.proj_out.bias": [512], "first_stage_model.encoder.mid.attn_1.proj_out.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.q.bias": [512], "first_stage_model.encoder.mid.attn_1.q.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.attn_1.v.bias": [512], "first_stage_model.encoder.mid.attn_1.v.weight": [512, 512, 1, 1], "first_stage_model.encoder.mid.block_1.conv1.bias": [512], "first_stage_model.encoder.mid.block_1.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.conv2.bias": [512], "first_stage_model.encoder.mid.block_1.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_1.norm1.bias": [512], "first_stage_model.encoder.mid.block_1.norm1.weight": [512], "first_stage_model.encoder.mid.block_1.norm2.bias": [512], "first_stage_model.encoder.mid.block_1.norm2.weight": [512], "first_stage_model.encoder.mid.block_2.conv1.bias": [512], "first_stage_model.encoder.mid.block_2.conv1.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.conv2.bias": [512], "first_stage_model.encoder.mid.block_2.conv2.weight": [512, 512, 3, 3], "first_stage_model.encoder.mid.block_2.norm1.bias": [512], "first_stage_model.encoder.mid.block_2.norm1.weight": [512], "first_stage_model.encoder.mid.block_2.norm2.bias": [512], "first_stage_model.encoder.mid.block_2.norm2.weight": [512], "first_stage_model.encoder.norm_out.bias": [512], "first_stage_model.encoder.norm_out.weight": [512], "first_stage_model.post_quant_conv.bias": [4], "first_stage_model.post_quant_conv.weight": [4, 4, 1, 1], "first_stage_model.quant_conv.bias": [8], "first_stage_model.quant_conv.weight": [8, 8, 1, 1], "model.diffusion_model.input_blocks.0.0.bias": [320], "model.diffusion_model.input_blocks.0.0.weight": [320, 4, 3, 3], "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": [320], "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": [320], "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": [320], "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.3.0.op.bias": [320], "model.diffusion_model.input_blocks.3.0.op.weight": [320, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": [320], "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": [320], "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": [640, 320, 3, 3], "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.4.0.skip_connection.bias": [640], "model.diffusion_model.input_blocks.4.0.skip_connection.weight": [640, 320, 1, 1], "model.diffusion_model.input_blocks.4.1.norm.bias": [640], "model.diffusion_model.input_blocks.4.1.norm.weight": [640], "model.diffusion_model.input_blocks.4.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.4.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [640, 2048], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [640, 2048], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_k.weight": [640, 2048], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.attn2.to_v.weight": [640, 2048], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm1.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm1.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm2.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm2.weight": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm3.bias": [640], "model.diffusion_model.input_blocks.4.1.transformer_blocks.1.norm3.weight": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": [640], "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": [640], "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": [640], "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.5.1.norm.bias": [640], "model.diffusion_model.input_blocks.5.1.norm.weight": [640], "model.diffusion_model.input_blocks.5.1.proj_in.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_in.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.proj_out.bias": [640], "model.diffusion_model.input_blocks.5.1.proj_out.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [640, 2048], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [640, 2048], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_k.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn1.to_v.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_k.weight": [640, 2048], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_out.0.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_q.weight": [640, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.attn2.to_v.weight": [640, 2048], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.0.proj.bias": [5120], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.ff.net.2.weight": [640, 2560], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm1.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm1.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm2.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm2.weight": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm3.bias": [640], "model.diffusion_model.input_blocks.5.1.transformer_blocks.1.norm3.weight": [640], "model.diffusion_model.input_blocks.6.0.op.bias": [640], "model.diffusion_model.input_blocks.6.0.op.weight": [640, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": [640], "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": [640], "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": [1280, 640, 3, 3], "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.7.0.skip_connection.bias": [1280], "model.diffusion_model.input_blocks.7.0.skip_connection.weight": [1280, 640, 1, 1], "model.diffusion_model.input_blocks.7.1.norm.bias": [1280], "model.diffusion_model.input_blocks.7.1.norm.weight": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.7.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.1.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.2.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.3.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.4.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.5.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.6.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.7.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.8.norm3.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.norm1.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.norm1.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.norm2.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.norm2.weight": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.norm3.bias": [1280], "model.diffusion_model.input_blocks.7.1.transformer_blocks.9.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": [1280], "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": [1280], "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": [1280], "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.input_blocks.8.1.norm.bias": [1280], "model.diffusion_model.input_blocks.8.1.norm.weight": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_in.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.proj_out.bias": [1280], "model.diffusion_model.input_blocks.8.1.proj_out.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.1.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.2.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.3.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.4.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.5.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.6.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.7.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.8.norm3.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn1.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn2.to_out.0.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.ff.net.0.proj.bias": [10240], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.ff.net.2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.ff.net.2.weight": [1280, 5120], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.norm1.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.norm1.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.norm2.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.norm2.weight": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.norm3.bias": [1280], "model.diffusion_model.input_blocks.8.1.transformer_blocks.9.norm3.weight": [1280], "model.diffusion_model.label_emb.0.0.bias": [1280], "model.diffusion_model.label_emb.0.0.weight": [1280, 2816], "model.diffusion_model.label_emb.0.2.bias": [1280], "model.diffusion_model.label_emb.0.2.weight": [1280, 1280], "model.diffusion_model.middle_block.0.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.0.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.0.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.0.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.0.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.1.norm.bias": [1280], "model.diffusion_model.middle_block.1.norm.weight": [1280], "model.diffusion_model.middle_block.1.proj_in.bias": [1280], "model.diffusion_model.middle_block.1.proj_in.weight": [1280, 1280], "model.diffusion_model.middle_block.1.proj_out.bias": [1280], "model.diffusion_model.middle_block.1.proj_out.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.1.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.1.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.2.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.2.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.3.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.3.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.4.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.4.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.4.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.4.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.5.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.5.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.5.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.5.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.6.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.6.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.6.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.6.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.7.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.7.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.7.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.7.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.8.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.8.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.8.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.8.norm3.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn1.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_out.0.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.middle_block.1.transformer_blocks.9.ff.net.0.proj.bias": [10240], "model.diffusion_model.middle_block.1.transformer_blocks.9.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.ff.net.2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.ff.net.2.weight": [1280, 5120], "model.diffusion_model.middle_block.1.transformer_blocks.9.norm1.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.norm1.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.norm2.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.norm2.weight": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.norm3.bias": [1280], "model.diffusion_model.middle_block.1.transformer_blocks.9.norm3.weight": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.bias": [1280], "model.diffusion_model.middle_block.2.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.middle_block.2.in_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.in_layers.2.bias": [1280], "model.diffusion_model.middle_block.2.in_layers.2.weight": [1280, 1280, 3, 3], "model.diffusion_model.middle_block.2.out_layers.0.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.0.weight": [1280], "model.diffusion_model.middle_block.2.out_layers.3.bias": [1280], "model.diffusion_model.middle_block.2.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.out.0.bias": [320], "model.diffusion_model.out.0.weight": [320], "model.diffusion_model.out.2.bias": [4], "model.diffusion_model.out.2.weight": [4, 320, 3, 3], "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.0.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.0.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.0.1.norm.bias": [1280], "model.diffusion_model.output_blocks.0.1.norm.weight": [1280], "model.diffusion_model.output_blocks.0.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.0.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.0.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.1.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.2.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.3.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.4.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.5.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.6.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.7.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.8.norm3.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.norm1.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.norm1.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.norm2.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.norm2.weight": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.norm3.bias": [1280], "model.diffusion_model.output_blocks.0.1.transformer_blocks.9.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": [2560], "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": [1280, 2560, 3, 3], "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.1.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.1.0.skip_connection.weight": [1280, 2560, 1, 1], "model.diffusion_model.output_blocks.1.1.norm.bias": [1280], "model.diffusion_model.output_blocks.1.1.norm.weight": [1280], "model.diffusion_model.output_blocks.1.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.1.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.1.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.1.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.2.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.3.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.4.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.5.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.6.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.7.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.8.norm3.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.norm1.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.norm1.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.norm2.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.norm2.weight": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.norm3.bias": [1280], "model.diffusion_model.output_blocks.1.1.transformer_blocks.9.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": [1280], "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": [1280], "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": [1280, 1920, 3, 3], "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": [1280], "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.2.0.skip_connection.bias": [1280], "model.diffusion_model.output_blocks.2.0.skip_connection.weight": [1280, 1920, 1, 1], "model.diffusion_model.output_blocks.2.1.norm.bias": [1280], "model.diffusion_model.output_blocks.2.1.norm.weight": [1280], "model.diffusion_model.output_blocks.2.1.proj_in.bias": [1280], "model.diffusion_model.output_blocks.2.1.proj_in.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.proj_out.bias": [1280], "model.diffusion_model.output_blocks.2.1.proj_out.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.0.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.1.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.2.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.3.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.4.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.5.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.6.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.7.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.8.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_k.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn1.to_v.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_k.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_out.0.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_out.0.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_q.weight": [1280, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.attn2.to_v.weight": [1280, 2048], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.0.proj.bias": [10240], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.0.proj.weight": [10240, 1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.ff.net.2.weight": [1280, 5120], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm1.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm1.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm2.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm2.weight": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm3.bias": [1280], "model.diffusion_model.output_blocks.2.1.transformer_blocks.9.norm3.weight": [1280], "model.diffusion_model.output_blocks.2.2.conv.bias": [1280], "model.diffusion_model.output_blocks.2.2.conv.weight": [1280, 1280, 3, 3], "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": [1920], "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": [1920], "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": [640, 1920, 3, 3], "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.3.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.3.0.skip_connection.weight": [640, 1920, 1, 1], "model.diffusion_model.output_blocks.3.1.norm.bias": [640], "model.diffusion_model.output_blocks.3.1.norm.weight": [640], "model.diffusion_model.output_blocks.3.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.3.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.3.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": [640, 2048], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": [640, 2048], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_k.weight": [640, 2048], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.attn2.to_v.weight": [640, 2048], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm1.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm1.weight": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm2.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm2.weight": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm3.bias": [640], "model.diffusion_model.output_blocks.3.1.transformer_blocks.1.norm3.weight": [640], "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": [1280], "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": [1280], "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": [640, 1280, 3, 3], "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.4.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.4.0.skip_connection.weight": [640, 1280, 1, 1], "model.diffusion_model.output_blocks.4.1.norm.bias": [640], "model.diffusion_model.output_blocks.4.1.norm.weight": [640], "model.diffusion_model.output_blocks.4.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.4.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.4.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": [640, 2048], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": [640, 2048], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_k.weight": [640, 2048], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.attn2.to_v.weight": [640, 2048], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm1.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm1.weight": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm2.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm2.weight": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm3.bias": [640], "model.diffusion_model.output_blocks.4.1.transformer_blocks.1.norm3.weight": [640], "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": [640], "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": [640, 1280], "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": [640], "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": [640, 960, 3, 3], "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": [640], "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": [640], "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": [640], "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.5.0.skip_connection.bias": [640], "model.diffusion_model.output_blocks.5.0.skip_connection.weight": [640, 960, 1, 1], "model.diffusion_model.output_blocks.5.1.norm.bias": [640], "model.diffusion_model.output_blocks.5.1.norm.weight": [640], "model.diffusion_model.output_blocks.5.1.proj_in.bias": [640], "model.diffusion_model.output_blocks.5.1.proj_in.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.proj_out.bias": [640], "model.diffusion_model.output_blocks.5.1.proj_out.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": [640, 2048], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": [640, 2048], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_k.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_out.0.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn1.to_v.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_k.weight": [640, 2048], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_out.0.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_out.0.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_q.weight": [640, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.attn2.to_v.weight": [640, 2048], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.0.proj.bias": [5120], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.0.proj.weight": [5120, 640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.2.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.ff.net.2.weight": [640, 2560], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm1.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm1.weight": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm2.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm2.weight": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm3.bias": [640], "model.diffusion_model.output_blocks.5.1.transformer_blocks.1.norm3.weight": [640], "model.diffusion_model.output_blocks.5.2.conv.bias": [640], "model.diffusion_model.output_blocks.5.2.conv.weight": [640, 640, 3, 3], "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": [960], "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": [960], "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": [320, 960, 3, 3], "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.6.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.6.0.skip_connection.weight": [320, 960, 1, 1], "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.7.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.7.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": [320], "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": [320, 1280], "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": [640], "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": [640], "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": [320], "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": [320, 640, 3, 3], "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": [320], "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": [320], "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": [320], "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": [320, 320, 3, 3], "model.diffusion_model.output_blocks.8.0.skip_connection.bias": [320], "model.diffusion_model.output_blocks.8.0.skip_connection.weight": [320, 640, 1, 1], "model.diffusion_model.time_embed.0.bias": [1280], "model.diffusion_model.time_embed.0.weight": [1280, 320], "model.diffusion_model.time_embed.2.bias": [1280], "model.diffusion_model.time_embed.2.weight": [1280, 1280]}} \ No newline at end of file diff --git a/scripts/create_checkpoint_template.py b/scripts/create_checkpoint_template.py index 4b9bfe0ea2..045df77551 100755 --- a/scripts/create_checkpoint_template.py +++ b/scripts/create_checkpoint_template.py @@ -9,11 +9,35 @@ import json from pathlib import Path -from invokeai.backend.model_management.models.base import read_checkpoint_meta +from invokeai.backend.model_manager import( + read_checkpoint_meta, + ModelType, + ModelVariantType, + BaseModelType, +) -parser = argparse.ArgumentParser(description="Create a .json template from checkpoint/safetensors model") -parser.add_argument("--checkpoint", "--in", type=Path, help="Path to the input checkpoint/safetensors file") +parser = argparse.ArgumentParser( + description="Create a .json template from checkpoint/safetensors model", +) +parser.add_argument('checkpoint', type=Path, help="Path to the input checkpoint/safetensors file") parser.add_argument("--template", "--out", type=Path, help="Path to the output .json file") +parser.add_argument("--base-type", + type=str, + choices=[x.value for x in BaseModelType], + help="Base model", + ) +parser.add_argument("--model-type", + type=str, + choices=[x.value for x in ModelType], + default='main', + help="Type of the model", + ) +parser.add_argument("--variant", + type=str, + choices=[x.value for x in ModelVariantType], + default='normal', + help="Base type of the model", + ) opt = parser.parse_args() ckpt = read_checkpoint_meta(opt.checkpoint) @@ -25,9 +49,16 @@ tmpl = {} for key, tensor in ckpt.items(): tmpl[key] = list(tensor.shape) +meta = { + 'base_type': opt.base_type, + 'model_type': opt.model_type, + 'variant': opt.variant, + 'template': tmpl +} + try: - with open(opt.template, "w") as f: - json.dump(tmpl, f) + with open(opt.template, "w", encoding="utf-8") as f: + json.dump(meta, f) print(f"Template written out as {opt.template}") -except Exception as e: +except OSError as e: print(f"An exception occurred while writing template: {str(e)}") diff --git a/scripts/verify_checkpoint_template.py b/scripts/verify_checkpoint_template.py index 15194290f5..dd155557c4 100755 --- a/scripts/verify_checkpoint_template.py +++ b/scripts/verify_checkpoint_template.py @@ -1,6 +1,7 @@ #!/usr/bin/env python """ Read a checkpoint/safetensors file and compare it to a template .json. + Returns True if their metadata match. """ @@ -26,12 +27,14 @@ checkpoint_metadata = {} for key, tensor in ckpt.items(): checkpoint_metadata[key] = list(tensor.shape) -with open(opt.template, "r") as f: +with open(opt.template, "r", encoding="utf-8") as f: template = json.load(f) -if checkpoint_metadata == template: - print("True") - sys.exit(0) -else: - print("False") - sys.exit(-1) +for key in template["template"]: + val1 = checkpoint_metadata.get(key) + val2 = template["template"][key] + if val1 != val2: + print(f"mismatch: {key}: template={val2} != checkpoint={val1}") + sys.exit(-1) +print("Match") +sys.exit(0)