From 2dae5eb7ad6da3fb7e90a948e903430eacc507eb Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lstein@gmail.com>
Date: Thu, 16 May 2024 22:26:18 -0400
Subject: [PATCH] more refactoring; HF subfolders not working

---
 docs/contributing/MODEL_MANAGER.md            |  15 +-
 .../model_install/model_install_base.py       |   9 +-
 .../model_install/model_install_default.py    | 169 ++++++++++--------
 .../model_records/model_records_base.py       |   8 +-
 .../model_install/test_model_install.py       |  14 +-
 5 files changed, 113 insertions(+), 102 deletions(-)

diff --git a/docs/contributing/MODEL_MANAGER.md b/docs/contributing/MODEL_MANAGER.md
index c12046293c..d53198b98e 100644
--- a/docs/contributing/MODEL_MANAGER.md
+++ b/docs/contributing/MODEL_MANAGER.md
@@ -397,26 +397,25 @@ In the event you wish to create a new installer, you may use the
 following initialization pattern:
 
 ```
-from invokeai.app.services.config import InvokeAIAppConfig
+from invokeai.app.services.config import get_config
 from invokeai.app.services.model_records import ModelRecordServiceSQL
 from invokeai.app.services.model_install import ModelInstallService
 from invokeai.app.services.download import DownloadQueueService
-from invokeai.app.services.shared.sqlite import SqliteDatabase
+from invokeai.app.services.shared.sqlite.sqlite_database import SqliteDatabase
 from invokeai.backend.util.logging import InvokeAILogger
 
-config = InvokeAIAppConfig.get_config()
-config.parse_args()
+config = get_config()
 
 logger = InvokeAILogger.get_logger(config=config)
-db = SqliteDatabase(config, logger)
+db = SqliteDatabase(config.db_path, logger)
 record_store = ModelRecordServiceSQL(db)
 queue = DownloadQueueService()
 queue.start()
 
-installer = ModelInstallService(app_config=config, 
+installer = ModelInstallService(app_config=config,
                                 record_store=record_store,
-              download_queue=queue
-           )
+                                download_queue=queue
+                                )
 installer.start()
 ```
 
diff --git a/invokeai/app/services/model_install/model_install_base.py b/invokeai/app/services/model_install/model_install_base.py
index 68cf9591e0..b622c8dade 100644
--- a/invokeai/app/services/model_install/model_install_base.py
+++ b/invokeai/app/services/model_install/model_install_base.py
@@ -466,17 +466,14 @@ class ModelInstallServiceBase(ABC):
         """
 
     @abstractmethod
-    def download_and_cache_ckpt(
+    def download_and_cache_model(
         self,
-        source: str | AnyHttpUrl,
-        access_token: Optional[str] = None,
-        timeout: int = 0,
+        source: str,
     ) -> Path:
         """
         Download the model file located at source to the models cache and return its Path.
 
-        :param source: A Url or a string that can be converted into one.
-        :param access_token: Optional access token to access restricted resources.
+        :param source: A string representing a URL or repo_id.
 
         The model file will be downloaded into the system-wide model cache
         (`models/.cache`) if it isn't already there. Note that the model cache
diff --git a/invokeai/app/services/model_install/model_install_default.py b/invokeai/app/services/model_install/model_install_default.py
index 1d77b2c6e1..a6bb7ad10d 100644
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@@ -9,7 +9,7 @@ from pathlib import Path
 from queue import Empty, Queue
 from shutil import copyfile, copytree, move, rmtree
 from tempfile import mkdtemp
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
 
 import torch
 import yaml
@@ -18,7 +18,7 @@ from pydantic.networks import AnyHttpUrl
 from requests import Session
 
 from invokeai.app.services.config import InvokeAIAppConfig
-from invokeai.app.services.download import DownloadQueueServiceBase, MultiFileDownloadJob, TqdmProgress
+from invokeai.app.services.download import DownloadQueueServiceBase, MultiFileDownloadJob
 from invokeai.app.services.events.events_base import EventServiceBase
 from invokeai.app.services.invoker import Invoker
 from invokeai.app.services.model_records import DuplicateModelException, ModelRecordServiceBase
@@ -208,26 +208,12 @@ class ModelInstallService(ModelInstallServiceBase):
         access_token: Optional[str] = None,
         inplace: Optional[bool] = False,
     ) -> ModelInstallJob:
-        variants = "|".join(ModelRepoVariant.__members__.values())
-        hf_repoid_re = f"^([^/:]+/[^/:]+)(?::({variants})?(?::/?([^:]+))?)?$"
-        source_obj: Optional[StringLikeSource] = None
-
-        if Path(source).exists():  # A local file or directory
-            source_obj = LocalModelSource(path=Path(source), inplace=inplace)
-        elif match := re.match(hf_repoid_re, source):
-            source_obj = HFModelSource(
-                repo_id=match.group(1),
-                variant=match.group(2) if match.group(2) else None,  # pass None rather than ''
-                subfolder=Path(match.group(3)) if match.group(3) else None,
-                access_token=access_token,
-            )
-        elif re.match(r"^https?://[^/]+", source):
-            source_obj = URLModelSource(
-                url=AnyHttpUrl(source),
-                access_token=access_token,
-            )
-        else:
-            raise ValueError(f"Unsupported model source: '{source}'")
+        """Install a model using pattern matching to infer the type of source."""
+        source_obj = self._guess_source(source)
+        if isinstance(source_obj, LocalModelSource):
+            source_obj.inplace = inplace
+        elif isinstance(source_obj, HFModelSource) or isinstance(source_obj, URLModelSource):
+            source_obj.access_token = access_token
         return self.import_model(source_obj, config)
 
     def import_model(self, source: ModelSource, config: Optional[Dict[str, Any]] = None) -> ModelInstallJob:  # noqa D102
@@ -383,37 +369,86 @@ class ModelInstallService(ModelInstallServiceBase):
         escaped_source = slugify(str(source))
         return app_config.download_cache_path / escaped_source
 
-    def download_and_cache_ckpt(
+    def download_and_cache_model(
         self,
-        source: str | AnyHttpUrl,
-        access_token: Optional[str] = None,
-        timeout: int = 0,
+        source: str,
     ) -> Path:
         """Download the model file located at source to the models cache and return its Path."""
-        model_path = self._download_cache_path(source, self._app_config)
+        model_path = self._download_cache_path(str(source), self._app_config)
 
-        # We expect the cache directory to contain one and only one downloaded file.
+        # We expect the cache directory to contain one and only one downloaded file or directory.
         # We don't know the file's name in advance, as it is set by the download
         # content-disposition header.
         if model_path.exists():
-            contents = [x for x in model_path.iterdir() if x.is_file()]
+            contents: List[Path] = list(model_path.iterdir())
             if len(contents) > 0:
                 return contents[0]
 
         model_path.mkdir(parents=True, exist_ok=True)
-        job = self._download_queue.download(
-            source=AnyHttpUrl(str(source)),
+        model_source = self._guess_source(source)
+        remote_files, _ = self._remote_files_from_source(model_source)
+        job = self._download_queue.multifile_download(
+            parts=remote_files,
             dest=model_path,
-            access_token=access_token,
-            on_progress=TqdmProgress().update,
         )
-        self._download_queue.wait_for_job(job, timeout)
+        files_string = "file" if len(remote_files) == 1 else "file"
+        self._logger.info(f"Queuing model install: {source} ({len(remote_files)} {files_string})")
+        self._download_queue.wait_for_job(job)
         if job.complete:
             assert job.download_path is not None
             return job.download_path
         else:
             raise Exception(job.error)
 
+    def _remote_files_from_source(
+        self, source: ModelSource
+    ) -> Tuple[List[RemoteModelFile], Optional[AnyModelRepoMetadata]]:
+        metadata = None
+        if isinstance(source, HFModelSource):
+            metadata = HuggingFaceMetadataFetch(self._session).from_id(source.repo_id, source.variant)
+            assert isinstance(metadata, ModelMetadataWithFiles)
+            return metadata.download_urls(
+                variant=source.variant or self._guess_variant(),
+                subfolder=source.subfolder,
+                session=self._session,
+            ), metadata
+
+        if isinstance(source, URLModelSource):
+            try:
+                fetcher = self.get_fetcher_from_url(str(source.url))
+                kwargs: dict[str, Any] = {"session": self._session}
+                metadata = fetcher(**kwargs).from_url(source.url)
+                assert isinstance(metadata, ModelMetadataWithFiles)
+                return metadata.download_urls(session=self._session), metadata
+            except ValueError:
+                pass
+
+            return [RemoteModelFile(url=source.url, path=Path("."), size=0)], None
+
+        raise Exception(f"No files associated with {source}")
+
+    def _guess_source(self, source: str) -> ModelSource:
+        """Turn a source string into a ModelSource object."""
+        variants = "|".join(ModelRepoVariant.__members__.values())
+        hf_repoid_re = f"^([^/:]+/[^/:]+)(?::({variants})?(?::/?([^:]+))?)?$"
+        source_obj: Optional[StringLikeSource] = None
+
+        if Path(source).exists():  # A local file or directory
+            source_obj = LocalModelSource(path=Path(source))
+        elif match := re.match(hf_repoid_re, source):
+            source_obj = HFModelSource(
+                repo_id=match.group(1),
+                variant=match.group(2) if match.group(2) else None,  # pass None rather than ''
+                subfolder=Path(match.group(3)) if match.group(3) else None,
+            )
+        elif re.match(r"^https?://[^/]+", source):
+            source_obj = URLModelSource(
+                url=AnyHttpUrl(source),
+            )
+        else:
+            raise ValueError(f"Unsupported model source: '{source}'")
+        return source_obj
+
     # --------------------------------------------------------------------------------------------
     # Internal functions that manage the installer threads
     # --------------------------------------------------------------------------------------------
@@ -650,18 +685,9 @@ class ModelInstallService(ModelInstallServiceBase):
         config: Optional[Dict[str, Any]] = None,
     ) -> ModelInstallJob:
         # Add user's cached access token to HuggingFace requests
-        source.access_token = source.access_token or HfFolder.get_token()
-        if not source.access_token:
-            self._logger.info("No HuggingFace access token present; some models may not be downloadable.")
-
-        metadata = HuggingFaceMetadataFetch(self._session).from_id(source.repo_id, source.variant)
-        assert isinstance(metadata, ModelMetadataWithFiles)
-        remote_files = metadata.download_urls(
-            variant=source.variant or self._guess_variant(),
-            subfolder=source.subfolder,
-            session=self._session,
-        )
-
+        if source.access_token is None:
+            source.access_token = HfFolder.get_token()
+        remote_files, metadata = self._remote_files_from_source(source)
         return self._import_remote_model(
             source=source,
             config=config,
@@ -674,21 +700,7 @@ class ModelInstallService(ModelInstallServiceBase):
         source: URLModelSource,
         config: Optional[Dict[str, Any]],
     ) -> ModelInstallJob:
-        # URLs from HuggingFace will be handled specially
-        metadata = None
-        fetcher = None
-        try:
-            fetcher = self.get_fetcher_from_url(str(source.url))
-        except ValueError:
-            pass
-        kwargs: dict[str, Any] = {"session": self._session}
-        if fetcher is not None:
-            metadata = fetcher(**kwargs).from_url(source.url)
-        self._logger.debug(f"metadata={metadata}")
-        if metadata and isinstance(metadata, ModelMetadataWithFiles):
-            remote_files = metadata.download_urls(session=self._session)
-        else:
-            remote_files = [RemoteModelFile(url=source.url, path=Path("."), size=0)]
+        remote_files, metadata = self._remote_files_from_source(source)
         return self._import_remote_model(
             source=source,
             config=config,
@@ -733,26 +745,17 @@ class ModelInstallService(ModelInstallServiceBase):
             root = Path(".")
             subfolder = Path(".")
 
-        # we remember the path up to the top of the destdir so that it may be
-        # removed safely at the end of the install process.
-        install_job._install_tmpdir = destdir
-
         parts: List[RemoteModelFile] = []
         for model_file in remote_files:
             assert install_job.total_bytes is not None
             assert model_file.size is not None
             install_job.total_bytes += model_file.size
             parts.append(RemoteModelFile(url=model_file.url, path=model_file.path.relative_to(subfolder)))
-        multifile_job = self._download_queue.multifile_download(
+        multifile_job = self._multifile_download(
             parts=parts,
             dest=destdir,
             access_token=source.access_token,
-            submit_job=False,
-            on_start=self._download_started_callback,
-            on_progress=self._download_progress_callback,
-            on_complete=self._download_complete_callback,
-            on_error=self._download_error_callback,
-            on_cancelled=self._download_cancelled_callback,
+            submit_job=False,  # Important! Don't submit the job until we have set our _download_cache dict
         )
         self._download_cache[multifile_job.id] = install_job
         install_job._download_job = multifile_job
@@ -772,6 +775,21 @@ class ModelInstallService(ModelInstallServiceBase):
                 size += sum(self._stat_size(Path(root, x)) for x in files)
         return size
 
+    def _multifile_download(
+        self, parts: List[RemoteModelFile], dest: Path, access_token: Optional[str] = None, submit_job: bool = True
+    ) -> MultiFileDownloadJob:
+        return self._download_queue.multifile_download(
+            parts=parts,
+            dest=dest,
+            access_token=access_token,
+            submit_job=submit_job,
+            on_start=self._download_started_callback,
+            on_progress=self._download_progress_callback,
+            on_complete=self._download_complete_callback,
+            on_error=self._download_error_callback,
+            on_cancelled=self._download_cancelled_callback,
+        )
+
     # ------------------------------------------------------------------
     # Callbacks are executed by the download queue in a separate thread
     # ------------------------------------------------------------------
@@ -875,10 +893,9 @@ class ModelInstallService(ModelInstallServiceBase):
             assert job.local_path is not None
             assert job.config_out is not None
             key = job.config_out.key
-            self._event_bus.emit_model_install_completed(source=str(job.source),
-                                                         key=key,
-                                                         id=job.id,
-                                                         total_bytes=job.bytes)
+            self._event_bus.emit_model_install_completed(
+                source=str(job.source), key=key, id=job.id, total_bytes=job.bytes
+            )
 
     def _signal_job_errored(self, job: ModelInstallJob) -> None:
         self._logger.error(f"Model install error: {job.source}\n{job.error_type}: {job.error}")
diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py
index 094ade6383..57531cf3c1 100644
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -12,15 +12,13 @@ from pydantic import BaseModel, Field
 
 from invokeai.app.services.shared.pagination import PaginatedResults
 from invokeai.app.util.model_exclude_null import BaseModelExcludeNull
-from invokeai.backend.model_manager import (
+from invokeai.backend.model_manager.config import (
     AnyModelConfig,
     BaseModelType,
-    ModelFormat,
-    ModelType,
-)
-from invokeai.backend.model_manager.config import (
     ControlAdapterDefaultSettings,
     MainModelDefaultSettings,
+    ModelFormat,
+    ModelType,
     ModelVariantType,
     SchedulerPredictionType,
 )
diff --git a/tests/app/services/model_install/test_model_install.py b/tests/app/services/model_install/test_model_install.py
index f73b827534..ca8616238f 100644
--- a/tests/app/services/model_install/test_model_install.py
+++ b/tests/app/services/model_install/test_model_install.py
@@ -222,7 +222,7 @@ def test_delete_register(
         store.get_model(key)
 
 
-@pytest.mark.timeout(timeout=20, method="thread")
+@pytest.mark.timeout(timeout=10, method="thread")
 def test_simple_download(mm2_installer: ModelInstallServiceBase, mm2_app_config: InvokeAIAppConfig) -> None:
     source = URLModelSource(url=Url("https://www.test.foo/download/test_embedding.safetensors"))
 
@@ -253,7 +253,7 @@ def test_simple_download(mm2_installer: ModelInstallServiceBase, mm2_app_config:
     ]
 
 
-@pytest.mark.timeout(timeout=20, method="thread")
+@pytest.mark.timeout(timeout=10, method="thread")
 def test_huggingface_install(mm2_installer: ModelInstallServiceBase, mm2_app_config: InvokeAIAppConfig) -> None:
     source = URLModelSource(url=Url("https://huggingface.co/stabilityai/sdxl-turbo"))
 
@@ -285,9 +285,8 @@ def test_huggingface_install(mm2_installer: ModelInstallServiceBase, mm2_app_con
     }
 
 
-@pytest.mark.timeout(timeout=20, method="thread")
+@pytest.mark.timeout(timeout=10, method="thread")
 def test_huggingface_repo_id(mm2_installer: ModelInstallServiceBase, mm2_app_config: InvokeAIAppConfig) -> None:
-    # TODO: Test subfolder download
     source = HFModelSource(repo_id="stabilityai/sdxl-turbo", variant=ModelRepoVariant.Default)
 
     bus = mm2_installer.event_bus
@@ -323,6 +322,7 @@ def test_huggingface_repo_id(mm2_installer: ModelInstallServiceBase, mm2_app_con
     assert job.total_bytes == completed_events[0].payload["total_bytes"]
     assert job.total_bytes == sum(x["total_bytes"] for x in downloading_events[-1].payload["parts"])
 
+
 def test_404_download(mm2_installer: ModelInstallServiceBase, mm2_app_config: InvokeAIAppConfig) -> None:
     source = URLModelSource(url=Url("https://test.com/missing_model.safetensors"))
     job = mm2_installer.import_model(source)
@@ -371,7 +371,7 @@ def test_other_error_during_install(
         },
     ],
 )
-@pytest.mark.timeout(timeout=20, method="thread")
+@pytest.mark.timeout(timeout=10, method="thread")
 def test_heuristic_import_with_type(mm2_installer: ModelInstallServiceBase, model_params: Dict[str, str]):
     """Test whether or not type is respected on configs when passed to heuristic import."""
     assert "name" in model_params and "type" in model_params
@@ -387,7 +387,7 @@ def test_heuristic_import_with_type(mm2_installer: ModelInstallServiceBase, mode
     }
     assert "repo_id" in model_params
     install_job1 = mm2_installer.heuristic_import(source=model_params["repo_id"], config=config1)
-    mm2_installer.wait_for_job(install_job1, timeout=20)
+    mm2_installer.wait_for_job(install_job1, timeout=10)
     if model_params["type"] != "embedding":
         assert install_job1.errored
         assert install_job1.error_type == "InvalidModelConfigException"
@@ -396,6 +396,6 @@ def test_heuristic_import_with_type(mm2_installer: ModelInstallServiceBase, mode
     assert install_job1.config_out if model_params["type"] == "embedding" else not install_job1.config_out
 
     install_job2 = mm2_installer.heuristic_import(source=model_params["repo_id"], config=config2)
-    mm2_installer.wait_for_job(install_job2, timeout=20)
+    mm2_installer.wait_for_job(install_job2, timeout=10)
     assert install_job2.complete
     assert install_job2.config_out if model_params["type"] == "embedding" else not install_job2.config_out