Change pad_to_multiple_of to be 8 for all cases. Add comment about it's temporary status

2024-08-30 20:32:17 +00:00 · 2023-11-08 17:04:02 +01:00
parent b9f607be56
commit 6001d3d71d
1 changed files with 8 additions and 8 deletions
--- a/invokeai/backend/model_management/lora.py
+++ b/invokeai/backend/model_management/lora.py
@ -166,13 +166,13 @@ class ModelPatcher:
        init_tokens_count = None
        new_tokens_added = None

-        # This is required since Transformers 4.32
-        # see https://github.com/huggingface/transformers/pull/25088
-        # More information: https://docs.nvidia.com/deeplearning/performance/dl-performance-
-        # matrix-multiplication/index.html#requirements-tc
-        if "A100" in torch.cuda.get_device_name():
-            pad_to_multiple_of = 64
-        else:
+        # TODO: This is required since Transformers 4.32 see
+        # https://github.com/huggingface/transformers/pull/25088
+        # More information by NVIDIA:
+        # https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc
+        # This value might need to be changed in the future and take the GPUs model into account as there seem
+        # to be ideal values for different GPUS. This value is temporary!
+        # For references to the current discussion please see https://github.com/invoke-ai/InvokeAI/pull/4817
        pad_to_multiple_of = 8

        try: