add back the heuristic_import() method and extend repo_ids to arbitrary file paths

2024-08-30 20:32:17 +00:00 · 2024-02-11 23:37:49 -05:00
parent d56337f2d8
commit 195768c9ee
6 changed files with 199 additions and 12 deletions
--- a/docs/contributing/MODEL_MANAGER.md
+++ b/docs/contributing/MODEL_MANAGER.md
@ -446,6 +446,44 @@ required parameters:
 Once initialized, the installer will provide the following methods:
 #### install_job = installer.heuristic_import(source, [config], [access_token])
 This is a simplified interface to the installer which takes a source
 string, an optional model configuration dictionary and an optional
 access token.
 The `source` is a string that can be any of these forms
 1. A path on the local filesystem (`C:\\users\\fred\\model.safetensors`)
 2. A Url pointing to a single downloadable model file (`https://civitai.com/models/58390/detail-tweaker-lora-lora`)
 3. A HuggingFace repo_id with any of the following formats:
   - `model/name` -- entire model
   - `model/name:fp32` -- entire model, using the fp32 variant
   - `model/name:fp16:vae` -- vae submodel, using the fp16 variant
   - `model/name::vae` -- vae submodel, using default precision
   - `model/name:fp16:path/to/model.safetensors` -- an individual model file, fp16 variant
   - `model/name::path/to/model.safetensors` -- an individual model file, default variant
 Note that by specifying a relative path to the top of the HuggingFace
 repo, you can download and install arbitrary models files. 
 The variant, if not provided, will be automatically filled in with
 `fp32` if the user has requested full precision, and `fp16`
 otherwise. If a variant that does not exist is requested, then the
 method will install whatever HuggingFace returns as its default
 revision.
 `config` is an optional dict of values that will override the
 autoprobed values for model type, base, scheduler prediction type, and
 so forth. See [Model configuration and
 probing](#Model-configuration-and-probing) for details.
 `access_token` is an optional access token for accessing resources
 that need authentication.
 The method will return a `ModelInstallJob`. This object is discussed
 at length in the following section.
 #### install_job = installer.import_model()
 The `import_model()` method is the core of the installer. The
@ -464,9 +502,10 @@ source2 = LocalModelSource(path='/opt/models/sushi_diffusers')     # a local dif
 source3 = HFModelSource(repo_id='runwayml/stable-diffusion-v1-5')  # a repo_id
 source4 = HFModelSource(repo_id='runwayml/stable-diffusion-v1-5', subfolder='vae')  # a subfolder within a repo_id
 source5 = HFModelSource(repo_id='runwayml/stable-diffusion-v1-5', variant='fp16')   # a named variant of a HF model
 source6 = HFModelSource(repo_id='runwayml/stable-diffusion-v1-5', subfolder='OrangeMix/OrangeMix1.ckpt')   # path to an individual model file
-source6 = URLModelSource(url='https://civitai.com/api/download/models/63006')       # model located at a URL
+source7 = URLModelSource(url='https://civitai.com/api/download/models/63006')       # model located at a URL
-source7 = URLModelSource(url='https://civitai.com/api/download/models/63006', access_token='letmein') # with an access token
+source8 = URLModelSource(url='https://civitai.com/api/download/models/63006', access_token='letmein') # with an access token
 for source in [source1, source2, source3, source4, source5, source6, source7]:
   install_job = installer.install_model(source)
@ -522,7 +561,6 @@ can be passed to `import_model()`.
 attributes returned by the model prober. See the section below for
 details.
 #### LocalModelSource
 This is used for a model that is located on a locally-accessible Posix
@ -715,7 +753,7 @@ and `cancelled`, as well as `in_terminal_state`. The last will return
 True if the job is in the complete, errored or cancelled states.
-#### Model confguration and probing
+#### Model configuration and probing
 The install service uses the `invokeai.backend.model_manager.probe`
 module during import to determine the model's type, base type, and
@ -1106,7 +1144,7 @@ job = queue.create_download_job(
 		 event_handlers=[my_handler1, my_handler2], # if desired
 		 start=True,
 	)
- ```
+```
 The `filename` argument forces the downloader to use the specified
 name for the file rather than the name provided by the remote source,
@ -1427,9 +1465,9 @@ set of keys to the corresponding model config objects.
 Find all model metadata records that have the given author and return
 a set of keys to the corresponding model config objects.
-# The remainder of this documentation is provisional, pending implementation of the Load service
+***
-## Let's get loaded, the lowdown on ModelLoadService
+## The Lowdown on the ModelLoadService
 The `ModelLoadService` is responsible for loading a named model into
 memory so that it can be used for inference. Despite the fact that it
--- a/invokeai/app/api/routers/model_manager_v2.py
+++ b/invokeai/app/api/routers/model_manager_v2.py
@ -251,9 +251,75 @@ async def add_model_record(
    return result
@model_manager_v2_router.post(
    "/heuristic_import",
    operation_id="heuristic_import_model",
    responses={
        201: {"description": "The model imported successfully"},
        415: {"description": "Unrecognized file/folder format"},
        424: {"description": "The model appeared to import successfully, but could not be found in the model manager"},
        409: {"description": "There is already a model corresponding to this path or repo_id"},
    },
    status_code=201,
 )
 async def heuristic_import(
    source: str,
    config: Optional[Dict[str, Any]] = Body(
        description="Dict of fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
        default=None,
    ),
    access_token: Optional[str] = None,
 ) -> ModelInstallJob:
    """Install a model using a string identifier.
    `source` can be any of the following.
    1. A path on the local filesystem ('C:\\users\\fred\\model.safetensors')
    2. A Url pointing to a single downloadable model file
    3. A HuggingFace repo_id with any of the following formats:
       - model/name
       - model/name:fp16:vae
       - model/name::vae          -- use default precision
       - model/name:fp16:path/to/model.safetensors
       - model/name::path/to/model.safetensors
    `config` is an optional dict containing model configuration values that will override
    the ones that are probed automatically.
    `access_token` is an optional access token for use with Urls that require
    authentication.
    Models will be downloaded, probed, configured and installed in a
    series of background threads. The return object has `status` attribute
    that can be used to monitor progress.
    See the documentation for `import_model_record` for more information on
    interpreting the job information returned by this route.
    """
    logger = ApiDependencies.invoker.services.logger
    try:
        installer = ApiDependencies.invoker.services.model_manager.install
        result: ModelInstallJob = installer.heuristic_import(
            source=source,
            config=config,
        )
        logger.info(f"Started installation of {source}")
    except UnknownModelException as e:
        logger.error(str(e))
        raise HTTPException(status_code=424, detail=str(e))
    except InvalidModelException as e:
        logger.error(str(e))
        raise HTTPException(status_code=415)
    except ValueError as e:
        logger.error(str(e))
        raise HTTPException(status_code=409, detail=str(e))
    return result
@model_manager_v2_router.post(
    "/import",
-    operation_id="import_model_record",
+    operation_id="import_model",
    responses={
        201: {"description": "The model imported successfully"},
        415: {"description": "Unrecognized file/folder format"},
@ -269,7 +335,7 @@ async def import_model(
        default=None,
    ),
 ) -> ModelInstallJob:
-    """Add a model using its local path, repo_id, or remote URL.
+    """Install a model using its local path, repo_id, or remote URL.
    Models will be downloaded, probed, configured and installed in a
    series of background threads. The return object has `status` attribute
--- a/invokeai/app/api_app.py
+++ b/invokeai/app/api_app.py
@ -49,7 +49,6 @@ if True:  # hack to make flake8 happy with imports coming after setting up the c
        download_queue,
        images,
        model_manager_v2,
        models,
        session_queue,
        sessions,
        utilities,
--- a/invokeai/app/services/model_install/model_install_base.py
+++ b/invokeai/app/services/model_install/model_install_base.py
@ -127,8 +127,8 @@ class HFModelSource(StringLikeSource):
    def __str__(self) -> str:
        """Return string version of repoid when string rep needed."""
        base: str = self.repo_id
        base += f":{self.variant or ''}"
        base += f":{self.subfolder}" if self.subfolder else ""
        base += f" ({self.variant})" if self.variant else ""
        return base
@ -324,6 +324,43 @@ class ModelInstallServiceBase(ABC):
        :returns id: The string ID of the registered model.
        """
    @abstractmethod
    def heuristic_import(
        self,
        source: str,
        config: Optional[Dict[str, Any]] = None,
        access_token: Optional[str] = None,
    ) -> ModelInstallJob:
        r"""Install the indicated model using heuristics to interpret user intentions.
        :param source: String source
        :param config: Optional dict. Any fields in this dict
         will override corresponding autoassigned probe fields in the
         model's config record as described in `import_model()`.
        :param access_token: Optional access token for remote sources.
        The source can be:
        1. A local file path in posix() format (`/foo/bar` or `C:\foo\bar`)
        2. An http or https URL (`https://foo.bar/foo`)
        3. A HuggingFace repo_id (`foo/bar`, `foo/bar:fp16`, `foo/bar:fp16:vae`)
        We extend the HuggingFace repo_id syntax to include the variant and the
        subfolder or path. The following are acceptable alternatives:
            stabilityai/stable-diffusion-v4
            stabilityai/stable-diffusion-v4:fp16
            stabilityai/stable-diffusion-v4:fp16:vae
            stabilityai/stable-diffusion-v4::/checkpoints/sd4.safetensors
            stabilityai/stable-diffusion-v4:onnx:vae
        Because a local file path can look like a huggingface repo_id, the logic
        first checks whether the path exists on disk, and if not, it is treated as
        a parseable huggingface repo.
        The previous support for recursing into a local folder and loading all model-like files
        has been removed.
        """
        pass
    @abstractmethod
    def import_model(
        self,
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@ -50,6 +50,7 @@ from .model_install_base import (
    ModelInstallJob,
    ModelInstallServiceBase,
    ModelSource,
    StringLikeSource,
    URLModelSource,
 )
@ -177,6 +178,34 @@ class ModelInstallService(ModelInstallServiceBase):
            info,
        )
    def heuristic_import(
        self,
        source: str,
        config: Optional[Dict[str, Any]] = None,
        access_token: Optional[str] = None,
    ) -> ModelInstallJob:
        variants = "|".join(ModelRepoVariant.__members__.values())
        hf_repoid_re = f"^([^/:]+/[^/:]+)(?::({variants})?(?::/?([^:]+))?)?$"
        source_obj: Optional[StringLikeSource] = None
        if Path(source).exists():  # A local file or directory
            source_obj = LocalModelSource(path=Path(source))
        elif match := re.match(hf_repoid_re, source):
            source_obj = HFModelSource(
                repo_id=match.group(1),
                variant=match.group(2) if match.group(2) else None,  # pass None rather than ''
                subfolder=Path(match.group(3)) if match.group(3) else None,
                access_token=access_token,
            )
        elif re.match(r"^https?://[^/]+", source):
            source_obj = URLModelSource(
                url=AnyHttpUrl(source),
                access_token=access_token,
            )
        else:
            raise ValueError(f"Unsupported model source: '{source}'")
        return self.import_model(source_obj, config)
    def import_model(self, source: ModelSource, config: Optional[Dict[str, Any]] = None) -> ModelInstallJob:  # noqa D102
        similar_jobs = [x for x in self.list_jobs() if x.source == source and not x.in_terminal_state]
        if similar_jobs:
@ -571,6 +600,8 @@ class ModelInstallService(ModelInstallServiceBase):
        # TODO: Replace with tempfile.tmpdir() when multithreading is cleaned up.
        # Currently the tmpdir isn't automatically removed at exit because it is
        # being held in a daemon thread.
        if len(remote_files) == 0:
            raise ValueError(f"{source}: No downloadable files found")
        tmpdir = Path(
            mkdtemp(
                dir=self._app_config.models_path,
@ -586,6 +617,16 @@ class ModelInstallService(ModelInstallServiceBase):
            bytes=0,
            total_bytes=0,
        )
        # In the event that there is a subfolder specified in the source,
        # we need to remove it from the destination path in order to avoid
        # creating unwanted subfolders
        if hasattr(source, "subfolder") and source.subfolder:
            root = Path(remote_files[0].path.parts[0])
            subfolder = root / source.subfolder
        else:
            root = Path(".")
            subfolder = Path(".")
        # we remember the path up to the top of the tmpdir so that it may be
        # removed safely at the end of the install process.
        install_job._install_tmpdir = tmpdir
@ -595,7 +636,7 @@ class ModelInstallService(ModelInstallServiceBase):
        self._logger.debug(f"remote_files={remote_files}")
        for model_file in remote_files:
            url = model_file.url
-            path = model_file.path
+            path = root / model_file.path.relative_to(subfolder)
            self._logger.info(f"Downloading {url} => {path}")
            install_job.total_bytes += model_file.size
            assert hasattr(source, "access_token")
--- a/invokeai/backend/model_manager/util/select_hf_files.py
+++ b/invokeai/backend/model_manager/util/select_hf_files.py
@ -36,6 +36,11 @@ def filter_files(
    """
    variant = variant or ModelRepoVariant.DEFAULT
    paths: List[Path] = []
    root = files[0].parts[0]
    # if the subfolder is a single file, then bypass the selection and just return it
    if subfolder and subfolder.suffix in [".safetensors", ".bin", ".onnx", ".xml", ".pth", ".pt", ".ckpt", ".msgpack"]:
        return [root / subfolder]
    # Start by filtering on model file extensions, discarding images, docs, etc
    for file in files:
@ -61,6 +66,7 @@ def filter_files(
    # limit search to subfolder if requested
    if subfolder:
        subfolder = root / subfolder
        paths = [x for x in paths if x.parent == Path(subfolder)]
    # _filter_by_variant uniquifies the paths and returns a set