mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
commit 1c649e4663f37b51b42a561548c7e03d7efb209e Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 12 13:29:16 2022 -0400 fix torchvision dependency version #511 commit 4d197f699e1e8c3b0e7c1b71c30261a49370ee8d Merge: a3e07fb 190ba78 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 12 07:29:19 2022 -0400 Merge branch 'development' of github.com:lstein/stable-diffusion into development commit a3e07fb84ad51eab2aa586edaa011bbd4e01b395 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 12 07:28:58 2022 -0400 fix grid crash commit 9fa1f31bf2f80785492927959c58e4b0825fb2e4 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 12 07:07:05 2022 -0400 fix opencv and realesrgan dependencies in mac install commit 190ba78960c0c45bd1c51626e303b8c78a17b0c1 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 12 01:50:58 2022 -0400 Update requirements-mac.txt Fixed dangling dash on last line. commit 25d9ccc5091cc6452d8597453dcfe6c79327aa3a Author: Any-Winter-4079 <50542132+Any-Winter-4079@users.noreply.github.com> Date: Mon Sep 12 03:17:29 2022 +0200 Update model.py commit 9cdf3aca7d2a7a6e85ec0a2732eb8e5a2dd60329 Author: Any-Winter-4079 <50542132+Any-Winter-4079@users.noreply.github.com> Date: Mon Sep 12 02:52:36 2022 +0200 Update attention.py Performance improvements to generate larger images in M1 #431 Update attention.py Added dtype=r1.dtype to softmax commit 49a96b90d846bcff17582273cacad596eff30658 Author: Mihai <299015+mh-dm@users.noreply.github.com> Date: Sat Sep 10 16:58:07 2022 +0300 ~7% speedup (1.57 to 1.69it/s) from switch to += in ldm.modules.attention. (#482) Tested on 8GB eGPU nvidia setup so YMMV. 512x512 output, max VRAM stays same. commit aba94b85e88cde654dd03bdec493a6d3b232f931 Author: Niek van der Maas <mail@niekvandermaas.nl> Date: Fri Sep 9 15:01:37 2022 +0200 Fix macOS `pyenv` instructions, add code block highlight (#441) Fix: `anaconda3-latest` does not work, specify the correct virtualenv, add missing init. commit aac5102cf3850781a635cacc3150dd6bb4f486a8 Author: Henry van Megen <h.vanmegen@gmail.com> Date: Thu Sep 8 05:16:35 2022 +0200 Disabled debug output (#436) Co-authored-by: Henry van Megen <hvanmegen@gmail.com> commit 0ab5a3646424467b459ea878d49cfc23f4a5ea35 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 17:19:46 2022 -0400 fix missing lines in outputs commit 5e433728b550de9f56a2f124c8b325b3a5f2bd2f Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 16:20:14 2022 -0400 upped max_steps in v1-finetune.yaml and fixed TI docs to address #493 commit 7708f4fb98510dff504041231261c039a2c718de Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 16:03:37 2022 -0400 slight efficiency gain by using += in attention.py commit b86a1deb00892f2b5f260659377d27790ef14016 Author: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Mon Sep 12 07:47:12 2022 +1200 Remove print statement styling (#504) Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com> commit 4951e66103878e5d5c8943a710ebce9320888252 Author: chromaticist <mhostick@gmail.com> Date: Sun Sep 11 12:44:26 2022 -0700 Adding support for .bin files from huggingface concepts (#498) * Adding support for .bin files from huggingface concepts * Updating documentation to include huggingface .bin info commit 79b445b0ca43b3592a829909dc4507cb1ecbe9e0 Merge: a323070 f7662c1 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 15:39:38 2022 -0400 Merge branch 'development' of github.com:lstein/stable-diffusion into development commit a323070a4dbb1ce62db94342a2ab8e4adef833d6 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 15:28:57 2022 -0400 update requirements for new location of gfpgan commit f7662c1808acc1704316d3b84d4baeacf1b24018 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 15:00:24 2022 -0400 update requirements for changed location of gfpgan commit 93c242c9fbef91d87a6bbf42db2267dbd51e5739 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 14:47:58 2022 -0400 make gfpgan_model_exists flag available to web interface commit c7c6cd7735b5c32e58349ca998a925cbaed7b376 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 14:43:07 2022 -0400 Update UPSCALE.md New instructions needed to accommodate fact that the ESRGAN and GFPGAN packages are now installed by environment.yaml. commit 77ca83e1031639f1e15cb7451e53dd8e37d1e971 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 14:31:56 2022 -0400 Update CLI.md Final documentation tweak. commit 0ea145d1884ce2316452124fd51a879506e2988d Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 14:29:26 2022 -0400 Update CLI.md More doc fixes. commit 162285ae86a2ab0bb26749387186c82b6bbf851d Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 14:28:45 2022 -0400 Update CLI.md Minor documentation fix commit 37c921dfe2aa25342934a101bf83eea4c0f5cfb7 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 14:26:41 2022 -0400 documentation enhancements commit 4f72cb44ad0429874c9ba507d325267e295a040c Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 13:05:38 2022 -0400 moved the notebook files into their own directory commit 878ef2e9e095ab08d00532f8a19556b8949b2dbb Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 12:58:06 2022 -0400 documentation tweaks commit 4923118610ecaced2a670d108aef81c220d3507a Merge: 16f6a67 defafc0 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 12:51:25 2022 -0400 Merge branch 'development' of github.com:lstein/stable-diffusion into development commit defafc0e8e0e69b39fd13db12036e1d01e7a19f1 Author: Dominic Letz <dominic@diode.io> Date: Sun Sep 11 18:51:01 2022 +0200 Enable upscaling on m1 (#474) commit 16f6a6731d80fcc04dcdb693d74fc5c21e753c10 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 12:47:26 2022 -0400 install GFPGAN inside SD repository in order to fix 'dark cast' issue #169 commit 0881d429f2ddcd288aa673b2b5e9435a8a44371a Author: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Mon Sep 12 03:52:43 2022 +1200 Docs Update (#466) Authored-by: @blessedcoolant Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com> commit 9a29d442b437d650bd42516bbb24ebbcd0d6cd74 Author: Gérald LONLAS <gerald@lonlas.com> Date: Sun Sep 11 23:23:18 2022 +0800 Revert "Add 3x Upscale option on the Web UI (#442)" (#488) This reverts commit f8a540881c79ae657dc05b47bc71f8648e9f9782. commit d301836fbdfce0a3f12b19ae6415e7ae14f53ed2 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 10:52:19 2022 -0400 can select prior output for init_img using -1, -2, etc commit 70aa674e9e10d03eb462249764695ef1d4e1e28c Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 10:34:06 2022 -0400 merge PR #495 - keep using float16 in ldm.modules.attention commit 8748370f44e28b104fbaa23b4e2e54e64102d799 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 10:22:32 2022 -0400 negative -S indexing recovers correct previous seed; closes issue #476 commit 839e30e4b8ca6554017fbab671bdf85fadf9a6ea Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 11 10:02:44 2022 -0400 improve CUDA VRAM monitoring extra check that device==cuda before getting VRAM stats commit bfb278127923fbd461c4549a4b7f2f2c1dd34b8c Author: tildebyte <337875+tildebyte@users.noreply.github.com> Date: Sat Sep 10 10:15:56 2022 -0400 fix(readme): add note about updating env via conda (#475) commit 5c439888626145f94db1fdb00f5787ad27b64602 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 10 10:02:43 2022 -0400 reduce VRAM memory usage by half during model loading * This moves the call to half() before model.to(device) to avoid GPU copy of full model. Improves speed and reduces memory usage dramatically * This fix contributed by @mh-dm (Mihai) commit 99122708ca3342e00063c687f149c950cfd87200 Merge: 817c4a2 ecc6b75 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 10 09:54:34 2022 -0400 Merge branch 'development' of github.com:lstein/stable-diffusion into development commit 817c4a26de0d01b109550e6db9d4c3ece9f37c1b Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 10 09:53:27 2022 -0400 remove -F option from normalized prompt; closes #483 commit ecc6b75a3ede6d1d2850d69e998c92c342efdf2d Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 10 09:53:27 2022 -0400 remove -F option from normalized prompt commit 723d07444205a9c3da96926630c1dc705db3f130 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Fri Sep 9 18:49:51 2022 -0400 Allow ctrl c when using --from_file (#472) * added ansi escapes to highlight key parts of CLI session * adjust exception handling so that ^C will abort when reading prompts from a file commit 75f633cda887d7bfcca3ef529d25c52461e11d99 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Fri Sep 9 12:03:45 2022 -0400 re-add new logo commit 10db192cc4be66b3cebbdaa48a1806807578b56f Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Fri Sep 9 09:26:10 2022 -0400 changes to dogettx optimizations to run on m1 * Author @any-winter-4079 * Author @dogettx Thanks to many individuals who contributed time and hardware to benchmarking and debugging these changes. commit c85ae00b33d619ab5448246ecda6c8e40d66fa3e Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 23:57:45 2022 -0400 fix bug which caused seed to get "stuck" on previous image even when UI specified -1 commit 1b5aae3ef3218b3f07b9ec48ce72589c0ad33746 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 22:36:47 2022 -0400 add icon to dream web server commit 6abf739315ef83202ff5ad2144888f79f480d88d Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 22:25:09 2022 -0400 add favicon to web server commit db825b813805b7428465e42377d756009e09e836 Merge: 33874ba afee7f9 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 22:17:37 2022 -0400 Merge branch 'deNULL-development' into development commit 33874bae8db71dcdb5525826a1ec93b105e841ad Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 22:16:29 2022 -0400 Squashed commit of the following: commit afee7f9cea2a73a3d62ced667e88aa0fe15020e4 Merge: 6531446 171f8db Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 22:14:32 2022 -0400 Merge branch 'development' of github.com:deNULL/stable-diffusion into deNULL-development commit 171f8db742f18532b6fa03cdfbf4be2bbf6cf3ad Author: Denis Olshin <me@denull.ru> Date: Thu Sep 8 03:15:20 2022 +0300 saving full prompt to metadata when using web ui commit d7e67b62f0ea9b7c8394b7c48786f5cf9c6f9e94 Author: Denis Olshin <me@denull.ru> Date: Thu Sep 8 01:51:47 2022 +0300 better logic for clicking to make variations commit afee7f9cea2a73a3d62ced667e88aa0fe15020e4 Merge: 6531446 171f8db Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 22:14:32 2022 -0400 Merge branch 'development' of github.com:deNULL/stable-diffusion into deNULL-development commit 653144694fbb928d387c615c013ab0f2f1d5ca7f Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 20:41:37 2022 -0400 work around unexplained crash when timesteps=1000 (#440) * work around unexplained crash when timesteps=1000 * this fix seems to work commit c33a84cdfdb861a77916cd499e561d4c68ee192a Author: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Fri Sep 9 12:39:51 2022 +1200 Add New Logo (#454) * Add instructions on how to install alongside pyenv (#393) Like probably many others, I have a lot of different virtualenvs, one for each project. Most of them are handled by `pyenv`. After installing according to these instructions I had issues with ´pyenv`and `miniconda` fighting over the $PATH of my system. But then I stumbled upon this nice solution on SO: https://stackoverflow.com/a/73139031 , upon which I have based my suggested changes. It runs perfectly on my M1 setup, with the anaconda setup as a virtual environment handled by pyenv. Feel free to incorporate these instructions as you see fit. Thanks a million for all your hard work. * Disabled debug output (#436) Co-authored-by: Henry van Megen <hvanmegen@gmail.com> * Add New Logo Co-authored-by: Håvard Gulldahl <havard@lurtgjort.no> Co-authored-by: Henry van Megen <h.vanmegen@gmail.com> Co-authored-by: Henry van Megen <hvanmegen@gmail.com> Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com> commit f8a540881c79ae657dc05b47bc71f8648e9f9782 Author: Gérald LONLAS <gerald@lonlas.com> Date: Fri Sep 9 01:45:54 2022 +0800 Add 3x Upscale option on the Web UI (#442) commit 244239e5f656e1f34830b8e8ce99a40decbea324 Author: James Reynolds <magnusviri@users.noreply.github.com> Date: Thu Sep 8 05:36:33 2022 -0600 macOS CI workflow, dream.py exits with an error, but the workflow com… (#396) * macOS CI workflow, dream.py exits with an error, but the workflow completes. * Files for testing Co-authored-by: James Reynolds <magnsuviri@me.com> Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com> commit 711d49ed30a0741558ed06d6be38680e00272774 Author: James Reynolds <magnusviri@users.noreply.github.com> Date: Thu Sep 8 05:35:08 2022 -0600 Cache model workflow (#394) * Add workflow that caches the model, step 1 for CI * Change name of workflow job Co-authored-by: James Reynolds <magnsuviri@me.com> Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com> commit 7996a30e3aea1ae9611bbce6e6efaac60aeb95d4 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Thu Sep 8 07:34:03 2022 -0400 add auto-creation of mask for inpainting (#438) * now use a single init image for both image and mask * turn on debugging for now to write out mask and image * add back -M option as a fallback commit a69ca31f349ddcf4c94fd009dc896f4e653f7fa4 Author: elliotsayes <elliotsayes@gmail.com> Date: Thu Sep 8 15:30:06 2022 +1200 .gitignore WebUI temp files (#430) * Add instructions on how to install alongside pyenv (#393) Like probably many others, I have a lot of different virtualenvs, one for each project. Most of them are handled by `pyenv`. After installing according to these instructions I had issues with ´pyenv`and `miniconda` fighting over the $PATH of my system. But then I stumbled upon this nice solution on SO: https://stackoverflow.com/a/73139031 , upon which I have based my suggested changes. It runs perfectly on my M1 setup, with the anaconda setup as a virtual environment handled by pyenv. Feel free to incorporate these instructions as you see fit. Thanks a million for all your hard work. * .gitignore WebUI temp files Co-authored-by: Håvard Gulldahl <havard@lurtgjort.no> commit 5c6b612a722ff9cde1a5ddf9b29874842f1d5a26 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Wed Sep 7 22:50:55 2022 -0400 fix bug that caused same seed to be redisplayed repeatedly commit 56f155c5907224b4276adb6ba01bd5c1a3401ee3 Author: Johan Roxendal <johan@roxendal.com> Date: Thu Sep 8 04:50:06 2022 +0200 added support for parsing run log and displaying images in the frontend init state (#410) Co-authored-by: Johan Roxendal <johan.roxendal@litteraturbanken.se> Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com> commit 41687746be5290a4c3d3437957307666d956ae9d Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Wed Sep 7 20:24:35 2022 -0400 added missing initialization of latent_noise to None commit 171f8db742f18532b6fa03cdfbf4be2bbf6cf3ad Author: Denis Olshin <me@denull.ru> Date: Thu Sep 8 03:15:20 2022 +0300 saving full prompt to metadata when using web ui commit d7e67b62f0ea9b7c8394b7c48786f5cf9c6f9e94 Author: Denis Olshin <me@denull.ru> Date: Thu Sep 8 01:51:47 2022 +0300 better logic for clicking to make variations commit d1d044aa87cf8ba95a7e2e553c7fd993ec81a6d7 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Wed Sep 7 17:56:59 2022 -0400 actual image seed now written into web log rather than -1 (#428) commit edada042b318028c77ab50dfbaa0b2671cc69e61 Author: Arturo Mendivil <60411196+artmen1516@users.noreply.github.com> Date: Wed Sep 7 10:42:26 2022 -0700 Improve notebook and add requirements file (#422) commit 29ab3c20280bfa73b9a89c8bd9dc99dc0ad7b651 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Wed Sep 7 13:28:11 2022 -0400 disable neonpixel optimizations on M1 hardware (#414) * disable neonpixel optimizations on M1 hardware * fix typo that was causing random noise images on m1 commit 7670ecc63f3e30e320e2c4197eb7140c6196c168 Author: cody <cnmizell@gmail.com> Date: Wed Sep 7 12:24:41 2022 -0500 add more keyboard support on the web server (#391) add ability to submit prompts with the "enter" key add ability to cancel generations with the "escape" key commit dd2aedacaf27d8fe750a342c310bc88de5311931 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Wed Sep 7 13:23:53 2022 -0400 report VRAM usage stats during initial model loading (#419) commit f6284777e6d79bd3d1e85b83aa72d774299a7403 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Tue Sep 6 17:12:39 2022 -0400 Squashed commit of the following: commit 7d1344282d942a33dcecda4d5144fc154ec82915 Merge: caf4ea3 ebeb556 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 5 10:07:27 2022 -0400 Merge branch 'development' of github.com:WebDev9000/stable-diffusion into WebDev9000-development commit ebeb556af9c99b491a83c72f83512683a02a82ad Author: Web Dev 9000 <rirath@gmail.com> Date: Sun Sep 4 18:05:15 2022 -0700 Fixed unintentionally removed lines commit ff2c4b9a1b773b95686d5f3e546e1194de054694 Author: Web Dev 9000 <rirath@gmail.com> Date: Sun Sep 4 17:50:13 2022 -0700 Add ability to recreate variations via image click commit c012929cdae7c37aa3b3b4fa2e7de465458f732a Author: Web Dev 9000 <rirath@gmail.com> Date: Sun Sep 4 14:35:33 2022 -0700 Add files via upload commit 02a601899214adfe4536ce0ba67694a46319fd51 Author: Web Dev 9000 <rirath@gmail.com> Date: Sun Sep 4 14:35:07 2022 -0700 Add files via upload commit eef788981cbed7c68ffd58b4eb22a2df2e59ae0b Author: Olivier Louvignes <olivier@mg-crea.com> Date: Tue Sep 6 12:41:08 2022 +0200 feat(txt2img): allow from_file to work with len(lines) < batch_size (#349) commit 720e5cd6513cd27e6d53feb6475dde20bd39841a Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 5 20:40:10 2022 -0400 Refactoring simplet2i (#387) * start refactoring -not yet functional * first phase of refactor done - not sure weighted prompts working * Second phase of refactoring. Everything mostly working. * The refactoring has moved all the hard-core inference work into ldm.dream.generator.*, where there are submodules for txt2img and img2img. inpaint will go in there as well. * Some additional refactoring will be done soon, but relatively minor work. * fix -save_orig flag to actually work * add @neonsecret attention.py memory optimization * remove unneeded imports * move token logging into conditioning.py * add placeholder version of inpaint; porting in progress * fix crash in img2img * inpainting working; not tested on variations * fix crashes in img2img * ported attention.py memory optimization #117 from basujindal branch * added @torch_no_grad() decorators to img2img, txt2img, inpaint closures * Final commit prior to PR against development * fixup crash when generating intermediate images in web UI * rename ldm.simplet2i to ldm.generate * add backward-compatibility simplet2i shell with deprecation warning * add back in mps exception, addresses @vargol comment in #354 * replaced Conditioning class with exported functions * fix wrong type of with_variations attribute during intialization * changed "image_iterator()" to "get_make_image()" * raise NotImplementedError for calling get_make_image() in parent class * Update ldm/generate.py better error message Co-authored-by: Kevin Gibbons <bakkot@gmail.com> * minor stylistic fixes and assertion checks from code review * moved get_noise() method into img2img class * break get_noise() into two methods, one for txt2img and the other for img2img * inpainting works on non-square images now * make get_noise() an abstract method in base class * much improved inpainting Co-authored-by: Kevin Gibbons <bakkot@gmail.com> commit 1ad2a8e567b054cfe9df1715aa805218ee185754 Author: thealanle <35761977+thealanle@users.noreply.github.com> Date: Mon Sep 5 17:35:04 2022 -0700 Fix --outdir function for web (#373) * Fix --outdir function for web * Removed unnecessary hardcoded path commit 52d8bb2836cf05994ee5e2c5cf9c8d190dac0524 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 5 10:31:59 2022 -0400 Squashed commit of the following: commit 0cd48e932f1326e000c46f4140f98697eb9bdc79 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Mon Sep 5 10:27:43 2022 -0400 resolve conflicts with development commit d7bc8c12e05535a363ac7c745a3f3abc2773bfcf Author: Scott McMillin <scott@scottmcmillin.com> Date: Sun Sep 4 18:52:09 2022 -0500 Add title attribute back to img tag commit 5397c89184ebfb8260bc2d8c3f23e73e103d24e6 Author: Scott McMillin <scott@scottmcmillin.com> Date: Sun Sep 4 13:49:46 2022 -0500 Remove temp code commit 1da080b50972696db2930681a09cb1c14e524758 Author: Scott McMillin <scott@scottmcmillin.com> Date: Sun Sep 4 13:33:56 2022 -0500 Cleaned up HTML; small style changes; image click opens image; add seed to figcaption beneath image commit caf4ea3d8982416dcf5a80fe4601ac4fbc126cc0 Author: Adam Rice <adam@askadam.io> Date: Mon Sep 5 10:05:39 2022 -0400 Add a 'Remove Image' button to clear the file upload field (#382) * added "remove image" button * styled a new "remove image" button * Update index.js commit 95c088b30342c75ec2ab8c7d7a423ffd11c50099 Author: Kevin Gibbons <bakkot@gmail.com> Date: Sun Sep 4 19:04:14 2022 -0700 Revert "Add CORS headers to dream server to ease integration with third-party web interfaces" (#371) This reverts commit 91e826e5f425333674d1e3bec1fa1ac63cfb382d. commit a20113d5a3985a23b7e19301acb57688e31e975c Author: Kevin Gibbons <bakkot@gmail.com> Date: Sun Sep 4 18:59:12 2022 -0700 put no_grad decorator on make_image closures (#375) commit 0f93dadd6ac5aa0fbeee5d72150def775752a153 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 4 21:39:15 2022 -0400 fix several dangling references to --gfpgan option, which no longer exists commit f4004f660e5daba721426cfcd3fe95318fd10bc3 Author: tildebyte <337875+tildebyte@users.noreply.github.com> Date: Sun Sep 4 19:43:04 2022 -0400 TOIL(requirements): Split requirements to per-platform (#355) * toil(reqs): split requirements to per-platform Signed-off-by: Ben Alkov <ben.alkov@gmail.com> * toil(reqs): fix for Win and Lin... ...allow pip to resolve latest torch, numpy Signed-off-by: Ben Alkov <ben.alkov@gmail.com> * toil(install): update reqs in Win install notebook Signed-off-by: Ben Alkov <ben.alkov@gmail.com> Signed-off-by: Ben Alkov <ben.alkov@gmail.com> commit 4406fd138dec0e25409aeaa2b716f88dd95b76d1 Merge: 5116c81 fd7a72e Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 4 08:23:53 2022 -0400 Merge branch 'SebastianAigner-main' into development Add support for full CORS headers for dream server. commit fd7a72e147393f32fc40d8f5918ea9bf1401e723 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 4 08:23:11 2022 -0400 remove debugging message commit 3a2be621f36e66b16e60b7f4f9210babfe84c582 Merge: 91e826e 5116c81 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sun Sep 4 08:15:51 2022 -0400 Merge branch 'development' into main commit 5116c8178c67f550e57f5d16fe931ee1a7cdb0ba Author: Justin Wong <1584142+wongjustin99@users.noreply.github.com> Date: Sun Sep 4 07:17:58 2022 -0400 fix save_original flag saving to the same filename (#360) * Update README.md with new Anaconda install steps (#347) pip3 version did not work for me and this is the recommended way to install Anaconda now it seems * fix save_original flag saving to the same filename Before this, the `--save_orig` flag was not working. The upscaled/GFPGAN would overwrite the original output image. Co-authored-by: greentext2 <112735219+greentext2@users.noreply.github.com> commit 91e826e5f425333674d1e3bec1fa1ac63cfb382d Author: Sebastian Aigner <SebastianAigner@users.noreply.github.com> Date: Sun Sep 4 10:22:54 2022 +0200 Add CORS headers to dream server to ease integration with third-party web interfaces commit 6266d9e8d6421ee732338560f825771e461cefb0 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 15:45:20 2022 -0400 remove stray debugging message commit 138956e5162679f6894ce75462907c9eeed83cbb Author: greentext2 <112735219+greentext2@users.noreply.github.com> Date: Sat Sep 3 13:38:57 2022 -0500 Update README.md with new Anaconda install steps (#347) pip3 version did not work for me and this is the recommended way to install Anaconda now it seems commit 60be735e802a1c3cd2812c5d8e63f9ed467ea9d9 Author: Cora Johnson-Roberson <cora.johnson.roberson@gmail.com> Date: Sat Sep 3 14:28:34 2022 -0400 Switch to regular pytorch channel and restore Python 3.10 for Macs. (#301) * Switch to regular pytorch channel and restore Python 3.10 for Macs. Although pytorch-nightly should in theory be faster, it is currently causing increased memory usage and slower iterations: https://github.com/lstein/stable-diffusion/pull/283#issuecomment-1234784885 This changes the environment-mac.yaml file back to the regular pytorch channel and moves the `transformers` dep into pip for now (since it cannot be satisfied until tokenizers>=0.11 is built for Python 3.10). * Specify versions for Pip packages as well. commit d0d95d3a2a4b7a91c5c4f570d88af43a2c3afe75 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 14:10:31 2022 -0400 make initimg appear in web log commit b90a21500037f07bb1b5d143045253ee6bc67391 Merge: 1eee811 6270e31 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 13:47:15 2022 -0400 Merge branch 'prixt-seamless' into development commit 6270e313b8d87b33cb914f12558e34bc2f0ae357 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 13:46:29 2022 -0400 add credit to prixt for seamless circular tiling commit a01b7bdc40af5376177de30b76dc075b523b3450 Merge: 1eee811 9d88abe Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 13:43:04 2022 -0400 add web interface for seamless option commit 1eee8111b95241f54b49f58605ab343a52325b89 Merge: 64eca42 fb857f0 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 12:33:39 2022 -0400 Merge branch 'development' of github.com:lstein/stable-diffusion into development commit 64eca42610b92cb73a30c405ab9dad28990c15e1 Merge: 9130ad7 21a1f68 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 12:33:05 2022 -0400 Merge branch 'main' into development * brings in small documentation fixes that were added directly to main during release tweaking. commit fb857f05ba0eda5cf9bbe0f60b73a73d75562d85 Author: Lincoln Stein <lincoln.stein@gmail.com> Date: Sat Sep 3 12:07:07 2022 -0400 fix typo in docs commit 9d88abe2ea1fed6231ffd822956614589a1075b7 Author: prixt <paraxite@naver.com> Date: Sat Sep 3 22:42:16 2022 +0900 fixed typo commit a61e49bc974af0fc01c8424d7df9262f63ecf289 Author: prixt <paraxite@naver.com> Date: Sat Sep 3 22:39:35 2022 +0900 * Removed unnecessary code * Added description about --seamless commit 02bee4fdb1534b71c5e609204506efb66699b2bc Author: prixt <paraxite@naver.com> Date: Sat Sep 3 16:08:03 2022 +0900 added --seamless tag logging to normalize_prompt commit d922b53c26f3e9a11ecb920536b9632ec69df5f6 Author: prixt <paraxite@naver.com> Date: Sat Sep 3 15:13:31 2022 +0900 added seamless tiling mode and commands
919 lines
35 KiB
Python
919 lines
35 KiB
Python
# pytorch_diffusion + derived encoder decoder
|
|
import gc
|
|
import math
|
|
import torch
|
|
import torch.nn as nn
|
|
import numpy as np
|
|
from einops import rearrange
|
|
|
|
from ldm.util import instantiate_from_config
|
|
from ldm.modules.attention import LinearAttention
|
|
|
|
import psutil
|
|
|
|
def get_timestep_embedding(timesteps, embedding_dim):
|
|
"""
|
|
This matches the implementation in Denoising Diffusion Probabilistic Models:
|
|
From Fairseq.
|
|
Build sinusoidal embeddings.
|
|
This matches the implementation in tensor2tensor, but differs slightly
|
|
from the description in Section 3.5 of "Attention Is All You Need".
|
|
"""
|
|
assert len(timesteps.shape) == 1
|
|
|
|
half_dim = embedding_dim // 2
|
|
emb = math.log(10000) / (half_dim - 1)
|
|
emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb)
|
|
emb = emb.to(device=timesteps.device)
|
|
emb = timesteps.float()[:, None] * emb[None, :]
|
|
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
|
|
if embedding_dim % 2 == 1: # zero pad
|
|
emb = torch.nn.functional.pad(emb, (0,1,0,0))
|
|
return emb
|
|
|
|
|
|
def nonlinearity(x):
|
|
# swish
|
|
return x*torch.sigmoid(x)
|
|
|
|
|
|
def Normalize(in_channels, num_groups=32):
|
|
return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
|
|
|
|
|
|
class Upsample(nn.Module):
|
|
def __init__(self, in_channels, with_conv):
|
|
super().__init__()
|
|
self.with_conv = with_conv
|
|
if self.with_conv:
|
|
self.conv = torch.nn.Conv2d(in_channels,
|
|
in_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
def forward(self, x):
|
|
x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
|
|
if self.with_conv:
|
|
x = self.conv(x)
|
|
return x
|
|
|
|
|
|
class Downsample(nn.Module):
|
|
def __init__(self, in_channels, with_conv):
|
|
super().__init__()
|
|
self.with_conv = with_conv
|
|
if self.with_conv:
|
|
# no asymmetric padding in torch conv, must do it ourselves
|
|
self.conv = torch.nn.Conv2d(in_channels,
|
|
in_channels,
|
|
kernel_size=3,
|
|
stride=2,
|
|
padding=0)
|
|
|
|
def forward(self, x):
|
|
if self.with_conv:
|
|
pad = (0,1,0,1)
|
|
x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
|
|
x = self.conv(x)
|
|
else:
|
|
x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
|
|
return x
|
|
|
|
|
|
class ResnetBlock(nn.Module):
|
|
def __init__(self, *, in_channels, out_channels=None, conv_shortcut=False,
|
|
dropout, temb_channels=512):
|
|
super().__init__()
|
|
self.in_channels = in_channels
|
|
out_channels = in_channels if out_channels is None else out_channels
|
|
self.out_channels = out_channels
|
|
self.use_conv_shortcut = conv_shortcut
|
|
|
|
self.norm1 = Normalize(in_channels)
|
|
self.conv1 = torch.nn.Conv2d(in_channels,
|
|
out_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
if temb_channels > 0:
|
|
self.temb_proj = torch.nn.Linear(temb_channels,
|
|
out_channels)
|
|
self.norm2 = Normalize(out_channels)
|
|
self.dropout = torch.nn.Dropout(dropout)
|
|
self.conv2 = torch.nn.Conv2d(out_channels,
|
|
out_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
if self.in_channels != self.out_channels:
|
|
if self.use_conv_shortcut:
|
|
self.conv_shortcut = torch.nn.Conv2d(in_channels,
|
|
out_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
else:
|
|
self.nin_shortcut = torch.nn.Conv2d(in_channels,
|
|
out_channels,
|
|
kernel_size=1,
|
|
stride=1,
|
|
padding=0)
|
|
|
|
def forward(self, x, temb):
|
|
h1 = x
|
|
h2 = self.norm1(h1)
|
|
del h1
|
|
|
|
h3 = nonlinearity(h2)
|
|
del h2
|
|
|
|
h4 = self.conv1(h3)
|
|
del h3
|
|
|
|
if temb is not None:
|
|
h4 = h4 + self.temb_proj(nonlinearity(temb))[:,:,None,None]
|
|
|
|
h5 = self.norm2(h4)
|
|
del h4
|
|
|
|
h6 = nonlinearity(h5)
|
|
del h5
|
|
|
|
h7 = self.dropout(h6)
|
|
del h6
|
|
|
|
h8 = self.conv2(h7)
|
|
del h7
|
|
|
|
if self.in_channels != self.out_channels:
|
|
if self.use_conv_shortcut:
|
|
x = self.conv_shortcut(x)
|
|
else:
|
|
x = self.nin_shortcut(x)
|
|
|
|
return x + h8
|
|
|
|
class LinAttnBlock(LinearAttention):
|
|
"""to match AttnBlock usage"""
|
|
def __init__(self, in_channels):
|
|
super().__init__(dim=in_channels, heads=1, dim_head=in_channels)
|
|
|
|
|
|
class AttnBlock(nn.Module):
|
|
def __init__(self, in_channels):
|
|
super().__init__()
|
|
self.in_channels = in_channels
|
|
|
|
self.norm = Normalize(in_channels)
|
|
self.q = torch.nn.Conv2d(in_channels,
|
|
in_channels,
|
|
kernel_size=1,
|
|
stride=1,
|
|
padding=0)
|
|
self.k = torch.nn.Conv2d(in_channels,
|
|
in_channels,
|
|
kernel_size=1,
|
|
stride=1,
|
|
padding=0)
|
|
self.v = torch.nn.Conv2d(in_channels,
|
|
in_channels,
|
|
kernel_size=1,
|
|
stride=1,
|
|
padding=0)
|
|
self.proj_out = torch.nn.Conv2d(in_channels,
|
|
in_channels,
|
|
kernel_size=1,
|
|
stride=1,
|
|
padding=0)
|
|
|
|
|
|
def forward(self, x):
|
|
h_ = x
|
|
h_ = self.norm(h_)
|
|
q1 = self.q(h_)
|
|
k1 = self.k(h_)
|
|
v = self.v(h_)
|
|
|
|
# compute attention
|
|
b, c, h, w = q1.shape
|
|
|
|
q2 = q1.reshape(b, c, h*w)
|
|
del q1
|
|
|
|
q = q2.permute(0, 2, 1) # b,hw,c
|
|
del q2
|
|
|
|
k = k1.reshape(b, c, h*w) # b,c,hw
|
|
del k1
|
|
|
|
h_ = torch.zeros_like(k, device=q.device)
|
|
|
|
device_type = 'mps' if q.device.type == 'mps' else 'cuda'
|
|
if device_type == 'cuda':
|
|
stats = torch.cuda.memory_stats(q.device)
|
|
mem_active = stats['active_bytes.all.current']
|
|
mem_reserved = stats['reserved_bytes.all.current']
|
|
mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
|
|
mem_free_torch = mem_reserved - mem_active
|
|
mem_free_total = mem_free_cuda + mem_free_torch
|
|
|
|
tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * 4
|
|
mem_required = tensor_size * 2.5
|
|
steps = 1
|
|
|
|
if mem_required > mem_free_total:
|
|
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
|
|
|
slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
|
|
|
|
else:
|
|
if psutil.virtual_memory().available / (1024**3) < 12:
|
|
slice_size = 1
|
|
else:
|
|
slice_size = min(q.shape[1], math.floor(2**30 / (q.shape[0] * q.shape[1])))
|
|
|
|
for i in range(0, q.shape[1], slice_size):
|
|
end = i + slice_size
|
|
|
|
w1 = torch.bmm(q[:, i:end], k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
|
|
w2 = w1 * (int(c)**(-0.5))
|
|
del w1
|
|
w3 = torch.nn.functional.softmax(w2, dim=2)
|
|
del w2
|
|
|
|
# attend to values
|
|
v1 = v.reshape(b, c, h*w)
|
|
w4 = w3.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q)
|
|
del w3
|
|
|
|
h_[:, :, i:end] = torch.bmm(v1, w4) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
|
|
del v1, w4
|
|
|
|
h2 = h_.reshape(b, c, h, w)
|
|
del h_
|
|
|
|
h3 = self.proj_out(h2)
|
|
del h2
|
|
|
|
h3 += x
|
|
|
|
return h3
|
|
|
|
|
|
def make_attn(in_channels, attn_type="vanilla"):
|
|
assert attn_type in ["vanilla", "linear", "none"], f'attn_type {attn_type} unknown'
|
|
print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
|
|
if attn_type == "vanilla":
|
|
return AttnBlock(in_channels)
|
|
elif attn_type == "none":
|
|
return nn.Identity(in_channels)
|
|
else:
|
|
return LinAttnBlock(in_channels)
|
|
|
|
|
|
class Model(nn.Module):
|
|
def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks,
|
|
attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels,
|
|
resolution, use_timestep=True, use_linear_attn=False, attn_type="vanilla"):
|
|
super().__init__()
|
|
if use_linear_attn: attn_type = "linear"
|
|
self.ch = ch
|
|
self.temb_ch = self.ch*4
|
|
self.num_resolutions = len(ch_mult)
|
|
self.num_res_blocks = num_res_blocks
|
|
self.resolution = resolution
|
|
self.in_channels = in_channels
|
|
|
|
self.use_timestep = use_timestep
|
|
if self.use_timestep:
|
|
# timestep embedding
|
|
self.temb = nn.Module()
|
|
self.temb.dense = nn.ModuleList([
|
|
torch.nn.Linear(self.ch,
|
|
self.temb_ch),
|
|
torch.nn.Linear(self.temb_ch,
|
|
self.temb_ch),
|
|
])
|
|
|
|
# downsampling
|
|
self.conv_in = torch.nn.Conv2d(in_channels,
|
|
self.ch,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
curr_res = resolution
|
|
in_ch_mult = (1,)+tuple(ch_mult)
|
|
self.down = nn.ModuleList()
|
|
for i_level in range(self.num_resolutions):
|
|
block = nn.ModuleList()
|
|
attn = nn.ModuleList()
|
|
block_in = ch*in_ch_mult[i_level]
|
|
block_out = ch*ch_mult[i_level]
|
|
for i_block in range(self.num_res_blocks):
|
|
block.append(ResnetBlock(in_channels=block_in,
|
|
out_channels=block_out,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout))
|
|
block_in = block_out
|
|
if curr_res in attn_resolutions:
|
|
attn.append(make_attn(block_in, attn_type=attn_type))
|
|
down = nn.Module()
|
|
down.block = block
|
|
down.attn = attn
|
|
if i_level != self.num_resolutions-1:
|
|
down.downsample = Downsample(block_in, resamp_with_conv)
|
|
curr_res = curr_res // 2
|
|
self.down.append(down)
|
|
|
|
# middle
|
|
self.mid = nn.Module()
|
|
self.mid.block_1 = ResnetBlock(in_channels=block_in,
|
|
out_channels=block_in,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout)
|
|
self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
|
|
self.mid.block_2 = ResnetBlock(in_channels=block_in,
|
|
out_channels=block_in,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout)
|
|
|
|
# upsampling
|
|
self.up = nn.ModuleList()
|
|
for i_level in reversed(range(self.num_resolutions)):
|
|
block = nn.ModuleList()
|
|
attn = nn.ModuleList()
|
|
block_out = ch*ch_mult[i_level]
|
|
skip_in = ch*ch_mult[i_level]
|
|
for i_block in range(self.num_res_blocks+1):
|
|
if i_block == self.num_res_blocks:
|
|
skip_in = ch*in_ch_mult[i_level]
|
|
block.append(ResnetBlock(in_channels=block_in+skip_in,
|
|
out_channels=block_out,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout))
|
|
block_in = block_out
|
|
if curr_res in attn_resolutions:
|
|
attn.append(make_attn(block_in, attn_type=attn_type))
|
|
up = nn.Module()
|
|
up.block = block
|
|
up.attn = attn
|
|
if i_level != 0:
|
|
up.upsample = Upsample(block_in, resamp_with_conv)
|
|
curr_res = curr_res * 2
|
|
self.up.insert(0, up) # prepend to get consistent order
|
|
|
|
# end
|
|
self.norm_out = Normalize(block_in)
|
|
self.conv_out = torch.nn.Conv2d(block_in,
|
|
out_ch,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
def forward(self, x, t=None, context=None):
|
|
#assert x.shape[2] == x.shape[3] == self.resolution
|
|
if context is not None:
|
|
# assume aligned context, cat along channel axis
|
|
x = torch.cat((x, context), dim=1)
|
|
if self.use_timestep:
|
|
# timestep embedding
|
|
assert t is not None
|
|
temb = get_timestep_embedding(t, self.ch)
|
|
temb = self.temb.dense[0](temb)
|
|
temb = nonlinearity(temb)
|
|
temb = self.temb.dense[1](temb)
|
|
else:
|
|
temb = None
|
|
|
|
# downsampling
|
|
hs = [self.conv_in(x)]
|
|
for i_level in range(self.num_resolutions):
|
|
for i_block in range(self.num_res_blocks):
|
|
h = self.down[i_level].block[i_block](hs[-1], temb)
|
|
if len(self.down[i_level].attn) > 0:
|
|
h = self.down[i_level].attn[i_block](h)
|
|
hs.append(h)
|
|
if i_level != self.num_resolutions-1:
|
|
hs.append(self.down[i_level].downsample(hs[-1]))
|
|
|
|
# middle
|
|
h = hs[-1]
|
|
h = self.mid.block_1(h, temb)
|
|
h = self.mid.attn_1(h)
|
|
h = self.mid.block_2(h, temb)
|
|
|
|
# upsampling
|
|
for i_level in reversed(range(self.num_resolutions)):
|
|
for i_block in range(self.num_res_blocks+1):
|
|
h = self.up[i_level].block[i_block](
|
|
torch.cat([h, hs.pop()], dim=1), temb)
|
|
if len(self.up[i_level].attn) > 0:
|
|
h = self.up[i_level].attn[i_block](h)
|
|
if i_level != 0:
|
|
h = self.up[i_level].upsample(h)
|
|
|
|
# end
|
|
h = self.norm_out(h)
|
|
h = nonlinearity(h)
|
|
h = self.conv_out(h)
|
|
return h
|
|
|
|
def get_last_layer(self):
|
|
return self.conv_out.weight
|
|
|
|
|
|
class Encoder(nn.Module):
|
|
def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks,
|
|
attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels,
|
|
resolution, z_channels, double_z=True, use_linear_attn=False, attn_type="vanilla",
|
|
**ignore_kwargs):
|
|
super().__init__()
|
|
if use_linear_attn: attn_type = "linear"
|
|
self.ch = ch
|
|
self.temb_ch = 0
|
|
self.num_resolutions = len(ch_mult)
|
|
self.num_res_blocks = num_res_blocks
|
|
self.resolution = resolution
|
|
self.in_channels = in_channels
|
|
|
|
# downsampling
|
|
self.conv_in = torch.nn.Conv2d(in_channels,
|
|
self.ch,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
curr_res = resolution
|
|
in_ch_mult = (1,)+tuple(ch_mult)
|
|
self.in_ch_mult = in_ch_mult
|
|
self.down = nn.ModuleList()
|
|
for i_level in range(self.num_resolutions):
|
|
block = nn.ModuleList()
|
|
attn = nn.ModuleList()
|
|
block_in = ch*in_ch_mult[i_level]
|
|
block_out = ch*ch_mult[i_level]
|
|
for i_block in range(self.num_res_blocks):
|
|
block.append(ResnetBlock(in_channels=block_in,
|
|
out_channels=block_out,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout))
|
|
block_in = block_out
|
|
if curr_res in attn_resolutions:
|
|
attn.append(make_attn(block_in, attn_type=attn_type))
|
|
down = nn.Module()
|
|
down.block = block
|
|
down.attn = attn
|
|
if i_level != self.num_resolutions-1:
|
|
down.downsample = Downsample(block_in, resamp_with_conv)
|
|
curr_res = curr_res // 2
|
|
self.down.append(down)
|
|
|
|
# middle
|
|
self.mid = nn.Module()
|
|
self.mid.block_1 = ResnetBlock(in_channels=block_in,
|
|
out_channels=block_in,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout)
|
|
self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
|
|
self.mid.block_2 = ResnetBlock(in_channels=block_in,
|
|
out_channels=block_in,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout)
|
|
|
|
# end
|
|
self.norm_out = Normalize(block_in)
|
|
self.conv_out = torch.nn.Conv2d(block_in,
|
|
2*z_channels if double_z else z_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
def forward(self, x):
|
|
# timestep embedding
|
|
temb = None
|
|
|
|
# downsampling
|
|
hs = [self.conv_in(x)]
|
|
for i_level in range(self.num_resolutions):
|
|
for i_block in range(self.num_res_blocks):
|
|
h = self.down[i_level].block[i_block](hs[-1], temb)
|
|
if len(self.down[i_level].attn) > 0:
|
|
h = self.down[i_level].attn[i_block](h)
|
|
hs.append(h)
|
|
if i_level != self.num_resolutions-1:
|
|
hs.append(self.down[i_level].downsample(hs[-1]))
|
|
|
|
# middle
|
|
h = hs[-1]
|
|
h = self.mid.block_1(h, temb)
|
|
h = self.mid.attn_1(h)
|
|
h = self.mid.block_2(h, temb)
|
|
|
|
# end
|
|
h = self.norm_out(h)
|
|
h = nonlinearity(h)
|
|
h = self.conv_out(h)
|
|
return h
|
|
|
|
|
|
class Decoder(nn.Module):
|
|
def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks,
|
|
attn_resolutions, dropout=0.0, resamp_with_conv=True, in_channels,
|
|
resolution, z_channels, give_pre_end=False, tanh_out=False, use_linear_attn=False,
|
|
attn_type="vanilla", **ignorekwargs):
|
|
super().__init__()
|
|
if use_linear_attn: attn_type = "linear"
|
|
self.ch = ch
|
|
self.temb_ch = 0
|
|
self.num_resolutions = len(ch_mult)
|
|
self.num_res_blocks = num_res_blocks
|
|
self.resolution = resolution
|
|
self.in_channels = in_channels
|
|
self.give_pre_end = give_pre_end
|
|
self.tanh_out = tanh_out
|
|
|
|
# compute in_ch_mult, block_in and curr_res at lowest res
|
|
in_ch_mult = (1,)+tuple(ch_mult)
|
|
block_in = ch*ch_mult[self.num_resolutions-1]
|
|
curr_res = resolution // 2**(self.num_resolutions-1)
|
|
self.z_shape = (1,z_channels,curr_res,curr_res)
|
|
print("Working with z of shape {} = {} dimensions.".format(
|
|
self.z_shape, np.prod(self.z_shape)))
|
|
|
|
# z to block_in
|
|
self.conv_in = torch.nn.Conv2d(z_channels,
|
|
block_in,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
# middle
|
|
self.mid = nn.Module()
|
|
self.mid.block_1 = ResnetBlock(in_channels=block_in,
|
|
out_channels=block_in,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout)
|
|
self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
|
|
self.mid.block_2 = ResnetBlock(in_channels=block_in,
|
|
out_channels=block_in,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout)
|
|
|
|
# upsampling
|
|
self.up = nn.ModuleList()
|
|
for i_level in reversed(range(self.num_resolutions)):
|
|
block = nn.ModuleList()
|
|
attn = nn.ModuleList()
|
|
block_out = ch*ch_mult[i_level]
|
|
for i_block in range(self.num_res_blocks+1):
|
|
block.append(ResnetBlock(in_channels=block_in,
|
|
out_channels=block_out,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout))
|
|
block_in = block_out
|
|
if curr_res in attn_resolutions:
|
|
attn.append(make_attn(block_in, attn_type=attn_type))
|
|
up = nn.Module()
|
|
up.block = block
|
|
up.attn = attn
|
|
if i_level != 0:
|
|
up.upsample = Upsample(block_in, resamp_with_conv)
|
|
curr_res = curr_res * 2
|
|
self.up.insert(0, up) # prepend to get consistent order
|
|
|
|
# end
|
|
self.norm_out = Normalize(block_in)
|
|
self.conv_out = torch.nn.Conv2d(block_in,
|
|
out_ch,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
def forward(self, z):
|
|
#assert z.shape[1:] == self.z_shape[1:]
|
|
self.last_z_shape = z.shape
|
|
|
|
# timestep embedding
|
|
temb = None
|
|
|
|
# z to block_in
|
|
h1 = self.conv_in(z)
|
|
|
|
# middle
|
|
h2 = self.mid.block_1(h1, temb)
|
|
del h1
|
|
|
|
h3 = self.mid.attn_1(h2)
|
|
del h2
|
|
|
|
h = self.mid.block_2(h3, temb)
|
|
del h3
|
|
|
|
# prepare for up sampling
|
|
device_type = 'mps' if h.device.type == 'mps' else 'cuda'
|
|
gc.collect()
|
|
if device_type == 'cuda':
|
|
torch.cuda.empty_cache()
|
|
|
|
# upsampling
|
|
for i_level in reversed(range(self.num_resolutions)):
|
|
for i_block in range(self.num_res_blocks+1):
|
|
h = self.up[i_level].block[i_block](h, temb)
|
|
if len(self.up[i_level].attn) > 0:
|
|
t = h
|
|
h = self.up[i_level].attn[i_block](t)
|
|
del t
|
|
|
|
if i_level != 0:
|
|
t = h
|
|
h = self.up[i_level].upsample(t)
|
|
del t
|
|
|
|
# end
|
|
if self.give_pre_end:
|
|
return h
|
|
|
|
h1 = self.norm_out(h)
|
|
del h
|
|
|
|
h2 = nonlinearity(h1)
|
|
del h1
|
|
|
|
h = self.conv_out(h2)
|
|
del h2
|
|
|
|
if self.tanh_out:
|
|
t = h
|
|
h = torch.tanh(t)
|
|
del t
|
|
|
|
return h
|
|
|
|
|
|
class SimpleDecoder(nn.Module):
|
|
def __init__(self, in_channels, out_channels, *args, **kwargs):
|
|
super().__init__()
|
|
self.model = nn.ModuleList([nn.Conv2d(in_channels, in_channels, 1),
|
|
ResnetBlock(in_channels=in_channels,
|
|
out_channels=2 * in_channels,
|
|
temb_channels=0, dropout=0.0),
|
|
ResnetBlock(in_channels=2 * in_channels,
|
|
out_channels=4 * in_channels,
|
|
temb_channels=0, dropout=0.0),
|
|
ResnetBlock(in_channels=4 * in_channels,
|
|
out_channels=2 * in_channels,
|
|
temb_channels=0, dropout=0.0),
|
|
nn.Conv2d(2*in_channels, in_channels, 1),
|
|
Upsample(in_channels, with_conv=True)])
|
|
# end
|
|
self.norm_out = Normalize(in_channels)
|
|
self.conv_out = torch.nn.Conv2d(in_channels,
|
|
out_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
def forward(self, x):
|
|
for i, layer in enumerate(self.model):
|
|
if i in [1,2,3]:
|
|
x = layer(x, None)
|
|
else:
|
|
x = layer(x)
|
|
|
|
h = self.norm_out(x)
|
|
h = nonlinearity(h)
|
|
x = self.conv_out(h)
|
|
return x
|
|
|
|
|
|
class UpsampleDecoder(nn.Module):
|
|
def __init__(self, in_channels, out_channels, ch, num_res_blocks, resolution,
|
|
ch_mult=(2,2), dropout=0.0):
|
|
super().__init__()
|
|
# upsampling
|
|
self.temb_ch = 0
|
|
self.num_resolutions = len(ch_mult)
|
|
self.num_res_blocks = num_res_blocks
|
|
block_in = in_channels
|
|
curr_res = resolution // 2 ** (self.num_resolutions - 1)
|
|
self.res_blocks = nn.ModuleList()
|
|
self.upsample_blocks = nn.ModuleList()
|
|
for i_level in range(self.num_resolutions):
|
|
res_block = []
|
|
block_out = ch * ch_mult[i_level]
|
|
for i_block in range(self.num_res_blocks + 1):
|
|
res_block.append(ResnetBlock(in_channels=block_in,
|
|
out_channels=block_out,
|
|
temb_channels=self.temb_ch,
|
|
dropout=dropout))
|
|
block_in = block_out
|
|
self.res_blocks.append(nn.ModuleList(res_block))
|
|
if i_level != self.num_resolutions - 1:
|
|
self.upsample_blocks.append(Upsample(block_in, True))
|
|
curr_res = curr_res * 2
|
|
|
|
# end
|
|
self.norm_out = Normalize(block_in)
|
|
self.conv_out = torch.nn.Conv2d(block_in,
|
|
out_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
|
|
def forward(self, x):
|
|
# upsampling
|
|
h = x
|
|
for k, i_level in enumerate(range(self.num_resolutions)):
|
|
for i_block in range(self.num_res_blocks + 1):
|
|
h = self.res_blocks[i_level][i_block](h, None)
|
|
if i_level != self.num_resolutions - 1:
|
|
h = self.upsample_blocks[k](h)
|
|
h = self.norm_out(h)
|
|
h = nonlinearity(h)
|
|
h = self.conv_out(h)
|
|
return h
|
|
|
|
|
|
class LatentRescaler(nn.Module):
|
|
def __init__(self, factor, in_channels, mid_channels, out_channels, depth=2):
|
|
super().__init__()
|
|
# residual block, interpolate, residual block
|
|
self.factor = factor
|
|
self.conv_in = nn.Conv2d(in_channels,
|
|
mid_channels,
|
|
kernel_size=3,
|
|
stride=1,
|
|
padding=1)
|
|
self.res_block1 = nn.ModuleList([ResnetBlock(in_channels=mid_channels,
|
|
out_channels=mid_channels,
|
|
temb_channels=0,
|
|
dropout=0.0) for _ in range(depth)])
|
|
self.attn = AttnBlock(mid_channels)
|
|
self.res_block2 = nn.ModuleList([ResnetBlock(in_channels=mid_channels,
|
|
out_channels=mid_channels,
|
|
temb_channels=0,
|
|
dropout=0.0) for _ in range(depth)])
|
|
|
|
self.conv_out = nn.Conv2d(mid_channels,
|
|
out_channels,
|
|
kernel_size=1,
|
|
)
|
|
|
|
def forward(self, x):
|
|
x = self.conv_in(x)
|
|
for block in self.res_block1:
|
|
x = block(x, None)
|
|
x = torch.nn.functional.interpolate(x, size=(int(round(x.shape[2]*self.factor)), int(round(x.shape[3]*self.factor))))
|
|
x = self.attn(x)
|
|
for block in self.res_block2:
|
|
x = block(x, None)
|
|
x = self.conv_out(x)
|
|
return x
|
|
|
|
|
|
class MergedRescaleEncoder(nn.Module):
|
|
def __init__(self, in_channels, ch, resolution, out_ch, num_res_blocks,
|
|
attn_resolutions, dropout=0.0, resamp_with_conv=True,
|
|
ch_mult=(1,2,4,8), rescale_factor=1.0, rescale_module_depth=1):
|
|
super().__init__()
|
|
intermediate_chn = ch * ch_mult[-1]
|
|
self.encoder = Encoder(in_channels=in_channels, num_res_blocks=num_res_blocks, ch=ch, ch_mult=ch_mult,
|
|
z_channels=intermediate_chn, double_z=False, resolution=resolution,
|
|
attn_resolutions=attn_resolutions, dropout=dropout, resamp_with_conv=resamp_with_conv,
|
|
out_ch=None)
|
|
self.rescaler = LatentRescaler(factor=rescale_factor, in_channels=intermediate_chn,
|
|
mid_channels=intermediate_chn, out_channels=out_ch, depth=rescale_module_depth)
|
|
|
|
def forward(self, x):
|
|
x = self.encoder(x)
|
|
x = self.rescaler(x)
|
|
return x
|
|
|
|
|
|
class MergedRescaleDecoder(nn.Module):
|
|
def __init__(self, z_channels, out_ch, resolution, num_res_blocks, attn_resolutions, ch, ch_mult=(1,2,4,8),
|
|
dropout=0.0, resamp_with_conv=True, rescale_factor=1.0, rescale_module_depth=1):
|
|
super().__init__()
|
|
tmp_chn = z_channels*ch_mult[-1]
|
|
self.decoder = Decoder(out_ch=out_ch, z_channels=tmp_chn, attn_resolutions=attn_resolutions, dropout=dropout,
|
|
resamp_with_conv=resamp_with_conv, in_channels=None, num_res_blocks=num_res_blocks,
|
|
ch_mult=ch_mult, resolution=resolution, ch=ch)
|
|
self.rescaler = LatentRescaler(factor=rescale_factor, in_channels=z_channels, mid_channels=tmp_chn,
|
|
out_channels=tmp_chn, depth=rescale_module_depth)
|
|
|
|
def forward(self, x):
|
|
x = self.rescaler(x)
|
|
x = self.decoder(x)
|
|
return x
|
|
|
|
|
|
class Upsampler(nn.Module):
|
|
def __init__(self, in_size, out_size, in_channels, out_channels, ch_mult=2):
|
|
super().__init__()
|
|
assert out_size >= in_size
|
|
num_blocks = int(np.log2(out_size//in_size))+1
|
|
factor_up = 1.+ (out_size % in_size)
|
|
print(f"Building {self.__class__.__name__} with in_size: {in_size} --> out_size {out_size} and factor {factor_up}")
|
|
self.rescaler = LatentRescaler(factor=factor_up, in_channels=in_channels, mid_channels=2*in_channels,
|
|
out_channels=in_channels)
|
|
self.decoder = Decoder(out_ch=out_channels, resolution=out_size, z_channels=in_channels, num_res_blocks=2,
|
|
attn_resolutions=[], in_channels=None, ch=in_channels,
|
|
ch_mult=[ch_mult for _ in range(num_blocks)])
|
|
|
|
def forward(self, x):
|
|
x = self.rescaler(x)
|
|
x = self.decoder(x)
|
|
return x
|
|
|
|
|
|
class Resize(nn.Module):
|
|
def __init__(self, in_channels=None, learned=False, mode="bilinear"):
|
|
super().__init__()
|
|
self.with_conv = learned
|
|
self.mode = mode
|
|
if self.with_conv:
|
|
print(f"Note: {self.__class__.__name} uses learned downsampling and will ignore the fixed {mode} mode")
|
|
raise NotImplementedError()
|
|
assert in_channels is not None
|
|
# no asymmetric padding in torch conv, must do it ourselves
|
|
self.conv = torch.nn.Conv2d(in_channels,
|
|
in_channels,
|
|
kernel_size=4,
|
|
stride=2,
|
|
padding=1)
|
|
|
|
def forward(self, x, scale_factor=1.0):
|
|
if scale_factor==1.0:
|
|
return x
|
|
else:
|
|
x = torch.nn.functional.interpolate(x, mode=self.mode, align_corners=False, scale_factor=scale_factor)
|
|
return x
|
|
|
|
class FirstStagePostProcessor(nn.Module):
|
|
|
|
def __init__(self, ch_mult:list, in_channels,
|
|
pretrained_model:nn.Module=None,
|
|
reshape=False,
|
|
n_channels=None,
|
|
dropout=0.,
|
|
pretrained_config=None):
|
|
super().__init__()
|
|
if pretrained_config is None:
|
|
assert pretrained_model is not None, 'Either "pretrained_model" or "pretrained_config" must not be None'
|
|
self.pretrained_model = pretrained_model
|
|
else:
|
|
assert pretrained_config is not None, 'Either "pretrained_model" or "pretrained_config" must not be None'
|
|
self.instantiate_pretrained(pretrained_config)
|
|
|
|
self.do_reshape = reshape
|
|
|
|
if n_channels is None:
|
|
n_channels = self.pretrained_model.encoder.ch
|
|
|
|
self.proj_norm = Normalize(in_channels,num_groups=in_channels//2)
|
|
self.proj = nn.Conv2d(in_channels,n_channels,kernel_size=3,
|
|
stride=1,padding=1)
|
|
|
|
blocks = []
|
|
downs = []
|
|
ch_in = n_channels
|
|
for m in ch_mult:
|
|
blocks.append(ResnetBlock(in_channels=ch_in,out_channels=m*n_channels,dropout=dropout))
|
|
ch_in = m * n_channels
|
|
downs.append(Downsample(ch_in, with_conv=False))
|
|
|
|
self.model = nn.ModuleList(blocks)
|
|
self.downsampler = nn.ModuleList(downs)
|
|
|
|
|
|
def instantiate_pretrained(self, config):
|
|
model = instantiate_from_config(config)
|
|
self.pretrained_model = model.eval()
|
|
# self.pretrained_model.train = False
|
|
for param in self.pretrained_model.parameters():
|
|
param.requires_grad = False
|
|
|
|
|
|
@torch.no_grad()
|
|
def encode_with_pretrained(self,x):
|
|
c = self.pretrained_model.encode(x)
|
|
if isinstance(c, DiagonalGaussianDistribution):
|
|
c = c.mode()
|
|
return c
|
|
|
|
def forward(self,x):
|
|
z_fs = self.encode_with_pretrained(x)
|
|
z = self.proj_norm(z_fs)
|
|
z = self.proj(z)
|
|
z = nonlinearity(z)
|
|
|
|
for submodel, downmodel in zip(self.model,self.downsampler):
|
|
z = submodel(z,temb=None)
|
|
z = downmodel(z)
|
|
|
|
if self.do_reshape:
|
|
z = rearrange(z,'b c h w -> b (h w) c')
|
|
return z
|