mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
added assertion checks for out-of-bound arguments; added various copyright and license agreement files
This commit is contained in:
parent
7a67d3d837
commit
3393b8cad1
30
LICENSE
30
LICENSE
@ -1,9 +1,27 @@
|
||||
All rights reserved by the authors.
|
||||
You must not distribute the weights provided to you directly or indirectly without explicit consent of the authors.
|
||||
You must not distribute harmful, offensive, dehumanizing content or otherwise harmful representations of people or their environments, cultures, religions, etc. produced with the model weights
|
||||
or other generated content described in the "Misuse and Malicious Use" section in the model card.
|
||||
The model weights are provided for research purposes only.
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
|
||||
|
||||
This software is derived from a fork of the source code available from
|
||||
https://github.com/pesser/stable-diffusion and
|
||||
https://github.com/CompViz/stable-diffusion. They carry the following
|
||||
copyrights:
|
||||
|
||||
Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich
|
||||
Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors
|
||||
|
||||
Please see individual source code files for copyright and authorship
|
||||
attributions.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
@ -11,4 +29,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
SOFTWARE.
|
||||
|
294
LICENSE-ModelWeights.txt
Normal file
294
LICENSE-ModelWeights.txt
Normal file
@ -0,0 +1,294 @@
|
||||
Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors
|
||||
|
||||
CreativeML Open RAIL-M
|
||||
dated August 22, 2022
|
||||
|
||||
Section I: PREAMBLE
|
||||
|
||||
Multimodal generative models are being widely adopted and used, and
|
||||
have the potential to transform the way artists, among other
|
||||
individuals, conceive and benefit from AI or ML technologies as a tool
|
||||
for content creation.
|
||||
|
||||
Notwithstanding the current and potential benefits that these
|
||||
artifacts can bring to society at large, there are also concerns about
|
||||
potential misuses of them, either due to their technical limitations
|
||||
or ethical considerations.
|
||||
|
||||
In short, this license strives for both the open and responsible
|
||||
downstream use of the accompanying model. When it comes to the open
|
||||
character, we took inspiration from open source permissive licenses
|
||||
regarding the grant of IP rights. Referring to the downstream
|
||||
responsible use, we added use-based restrictions not permitting the
|
||||
use of the Model in very specific scenarios, in order for the licensor
|
||||
to be able to enforce the license in case potential misuses of the
|
||||
Model may occur. At the same time, we strive to promote open and
|
||||
responsible research on generative models for art and content
|
||||
generation.
|
||||
|
||||
Even though downstream derivative versions of the model could be
|
||||
released under different licensing terms, the latter will always have
|
||||
to include - at minimum - the same use-based restrictions as the ones
|
||||
in the original license (this license). We believe in the intersection
|
||||
between open and responsible AI development; thus, this License aims
|
||||
to strike a balance between both in order to enable responsible
|
||||
open-science in the field of AI.
|
||||
|
||||
This License governs the use of the model (and its derivatives) and is
|
||||
informed by the model card associated with the model.
|
||||
|
||||
NOW THEREFORE, You and Licensor agree as follows:
|
||||
|
||||
1. Definitions
|
||||
|
||||
- "License" means the terms and conditions for use, reproduction, and
|
||||
Distribution as defined in this document.
|
||||
|
||||
- "Data" means a collection of information and/or content extracted
|
||||
from the dataset used with the Model, including to train, pretrain,
|
||||
or otherwise evaluate the Model. The Data is not licensed under this
|
||||
License.
|
||||
|
||||
- "Output" means the results of operating a Model as embodied in
|
||||
informational content resulting therefrom.
|
||||
|
||||
- "Model" means any accompanying machine-learning based assemblies
|
||||
(including checkpoints), consisting of learnt weights, parameters
|
||||
(including optimizer states), corresponding to the model
|
||||
architecture as embodied in the Complementary Material, that have
|
||||
been trained or tuned, in whole or in part on the Data, using the
|
||||
Complementary Material.
|
||||
|
||||
- "Derivatives of the Model" means all modifications to the Model,
|
||||
works based on the Model, or any other model which is created or
|
||||
initialized by transfer of patterns of the weights, parameters,
|
||||
activations or output of the Model, to the other model, in order to
|
||||
cause the other model to perform similarly to the Model, including -
|
||||
but not limited to - distillation methods entailing the use of
|
||||
intermediate data representations or methods based on the generation
|
||||
of synthetic data by the Model for training the other model.
|
||||
|
||||
- "Complementary Material" means the accompanying source code and
|
||||
scripts used to define, run, load, benchmark or evaluate the Model,
|
||||
and used to prepare data for training or evaluation, if any. This
|
||||
includes any accompanying documentation, tutorials, examples, etc,
|
||||
if any.
|
||||
|
||||
- "Distribution" means any transmission, reproduction, publication or
|
||||
other sharing of the Model or Derivatives of the Model to a third
|
||||
party, including providing the Model as a hosted service made
|
||||
available by electronic or other remote means - e.g. API-based or
|
||||
web access.
|
||||
|
||||
- "Licensor" means the copyright owner or entity authorized by the
|
||||
copyright owner that is granting the License, including the persons
|
||||
or entities that may have rights in the Model and/or distributing
|
||||
the Model.
|
||||
|
||||
- "You" (or "Your") means an individual or Legal Entity exercising
|
||||
permissions granted by this License and/or making use of the Model
|
||||
for whichever purpose and in any field of use, including usage of
|
||||
the Model in an end-use application - e.g. chatbot, translator,
|
||||
image generator.
|
||||
|
||||
- "Third Parties" means individuals or legal entities that are not
|
||||
under common control with Licensor or You.
|
||||
|
||||
- "Contribution" means any work of authorship, including the original
|
||||
version of the Model and any modifications or additions to that
|
||||
Model or Derivatives of the Model thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Model by the copyright
|
||||
owner or by an individual or Legal Entity authorized to submit on
|
||||
behalf of the copyright owner. For the purposes of this definition,
|
||||
"submitted" means any form of electronic, verbal, or written
|
||||
communication sent to the Licensor or its representatives, including
|
||||
but not limited to communication on electronic mailing lists, source
|
||||
code control systems, and issue tracking systems that are managed
|
||||
by, or on behalf of, the Licensor for the purpose of discussing and
|
||||
improving the Model, but excluding communication that is
|
||||
conspicuously marked or otherwise designated in writing by the
|
||||
copyright owner as "Not a Contribution."
|
||||
|
||||
- "Contributor" means Licensor and any individual or Legal Entity on
|
||||
behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Model.
|
||||
|
||||
Section II: INTELLECTUAL PROPERTY RIGHTS
|
||||
|
||||
Both copyright and patent grants apply to the Model, Derivatives of
|
||||
the Model and Complementary Material. The Model and Derivatives of the
|
||||
Model are subject to additional terms as described in Section III.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare, publicly display, publicly
|
||||
perform, sublicense, and distribute the Complementary Material, the
|
||||
Model, and Derivatives of the Model.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License and where and as applicable, each Contributor hereby
|
||||
grants to You a perpetual, worldwide, non-exclusive, no-charge,
|
||||
royalty-free, irrevocable (except as stated in this paragraph) patent
|
||||
license to make, have made, use, offer to sell, sell, import, and
|
||||
otherwise transfer the Model and the Complementary Material, where
|
||||
such license applies only to those patent claims licensable by such
|
||||
Contributor that are necessarily infringed by their Contribution(s)
|
||||
alone or by combination of their Contribution(s) with the Model to
|
||||
which such Contribution(s) was submitted. If You institute patent
|
||||
litigation against any entity (including a cross-claim or counterclaim
|
||||
in a lawsuit) alleging that the Model and/or Complementary Material or
|
||||
a Contribution incorporated within the Model and/or Complementary
|
||||
Material constitutes direct or contributory patent infringement, then
|
||||
any patent licenses granted to You under this License for the Model
|
||||
and/or Work shall terminate as of the date such litigation is asserted
|
||||
or filed.
|
||||
|
||||
Section III: CONDITIONS OF USAGE, DISTRIBUTION AND REDISTRIBUTION
|
||||
|
||||
4. Distribution and Redistribution. You may host for Third Party
|
||||
remote access purposes (e.g. software-as-a-service), reproduce and
|
||||
distribute copies of the Model or Derivatives of the Model thereof in
|
||||
any medium, with or without modifications, provided that You meet the
|
||||
following conditions: Use-based restrictions as referenced in
|
||||
paragraph 5 MUST be included as an enforceable provision by You in any
|
||||
type of legal agreement (e.g. a license) governing the use and/or
|
||||
distribution of the Model or Derivatives of the Model, and You shall
|
||||
give notice to subsequent users You Distribute to, that the Model or
|
||||
Derivatives of the Model are subject to paragraph 5. This provision
|
||||
does not apply to the use of Complementary Material. You must give
|
||||
any Third Party recipients of the Model or Derivatives of the Model a
|
||||
copy of this License; You must cause any modified files to carry
|
||||
prominent notices stating that You changed the files; You must retain
|
||||
all copyright, patent, trademark, and attribution notices excluding
|
||||
those notices that do not pertain to any part of the Model,
|
||||
Derivatives of the Model. You may add Your own copyright statement to
|
||||
Your modifications and may provide additional or different license
|
||||
terms and conditions - respecting paragraph 4.a. - for use,
|
||||
reproduction, or Distribution of Your modifications, or for any such
|
||||
Derivatives of the Model as a whole, provided Your use, reproduction,
|
||||
and Distribution of the Model otherwise complies with the conditions
|
||||
stated in this License.
|
||||
|
||||
5. Use-based restrictions. The restrictions set forth in Attachment A
|
||||
are considered Use-based restrictions. Therefore You cannot use the
|
||||
Model and the Derivatives of the Model for the specified restricted
|
||||
uses. You may use the Model subject to this License, including only
|
||||
for lawful purposes and in accordance with the License. Use may
|
||||
include creating any content with, finetuning, updating, running,
|
||||
training, evaluating and/or reparametrizing the Model. You shall
|
||||
require all of Your users who use the Model or a Derivative of the
|
||||
Model to comply with the terms of this paragraph (paragraph 5).
|
||||
|
||||
6. The Output You Generate. Except as set forth herein, Licensor
|
||||
claims no rights in the Output You generate using the Model. You are
|
||||
accountable for the Output you generate and its subsequent uses. No
|
||||
use of the output can contravene any provision as stated in the
|
||||
License.
|
||||
|
||||
Section IV: OTHER PROVISIONS
|
||||
|
||||
7. Updates and Runtime Restrictions. To the maximum extent permitted
|
||||
by law, Licensor reserves the right to restrict (remotely or
|
||||
otherwise) usage of the Model in violation of this License, update the
|
||||
Model through electronic means, or modify the Output of the Model
|
||||
based on updates. You shall undertake reasonable efforts to use the
|
||||
latest version of the Model.
|
||||
|
||||
8. Trademarks and related. Nothing in this License permits You to make
|
||||
use of Licensors’ trademarks, trade names, logos or to otherwise
|
||||
suggest endorsement or misrepresent the relationship between the
|
||||
parties; and any rights not expressly granted herein are reserved by
|
||||
the Licensors.
|
||||
|
||||
9. Disclaimer of Warranty. Unless required by applicable law or agreed
|
||||
to in writing, Licensor provides the Model and the Complementary
|
||||
Material (and each Contributor provides its Contributions) on an "AS
|
||||
IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
||||
express or implied, including, without limitation, any warranties or
|
||||
conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR
|
||||
A PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Model, Derivatives of
|
||||
the Model, and the Complementary Material and assume any risks
|
||||
associated with Your exercise of permissions under this License.
|
||||
|
||||
10. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise, unless
|
||||
required by applicable law (such as deliberate and grossly negligent
|
||||
acts) or agreed to in writing, shall any Contributor be liable to You
|
||||
for damages, including any direct, indirect, special, incidental, or
|
||||
consequential damages of any character arising as a result of this
|
||||
License or out of the use or inability to use the Model and the
|
||||
Complementary Material (including but not limited to damages for loss
|
||||
of goodwill, work stoppage, computer failure or malfunction, or any
|
||||
and all other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
11. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Model, Derivatives of the Model and the Complementary Material
|
||||
thereof, You may choose to offer, and charge a fee for, acceptance of
|
||||
support, warranty, indemnity, or other liability obligations and/or
|
||||
rights consistent with this License. However, in accepting such
|
||||
obligations, You may act only on Your own behalf and on Your sole
|
||||
responsibility, not on behalf of any other Contributor, and only if
|
||||
You agree to indemnify, defend, and hold each Contributor harmless for
|
||||
any liability incurred by, or claims asserted against, such
|
||||
Contributor by reason of your accepting any such warranty or
|
||||
additional liability.
|
||||
|
||||
12. If any provision of this License is held to be invalid, illegal or
|
||||
unenforceable, the remaining provisions shall be unaffected thereby
|
||||
and remain valid as if such provision had not been set forth herein.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
|
||||
|
||||
|
||||
Attachment A
|
||||
|
||||
Use Restrictions
|
||||
|
||||
You agree not to use the Model or Derivatives of the Model:
|
||||
|
||||
- In any way that violates any applicable national, federal, state,
|
||||
local or international law or regulation;
|
||||
|
||||
- For the purpose of exploiting, harming or attempting to exploit or
|
||||
harm minors in any way;
|
||||
|
||||
- To generate or disseminate verifiably false information and/or
|
||||
content with the purpose of harming others;
|
||||
|
||||
- To generate or disseminate personal identifiable information that
|
||||
can be used to harm an individual;
|
||||
|
||||
- To defame, disparage or otherwise harass others;
|
||||
|
||||
- For fully automated decision making that adversely impacts an
|
||||
individual’s legal rights or otherwise creates or modifies a
|
||||
binding, enforceable obligation;
|
||||
|
||||
pp- For any use intended to or which has the effect of discriminating
|
||||
against or harming individuals or groups based on online or offline
|
||||
social behavior or known or predicted personal or personality
|
||||
characteristics;
|
||||
|
||||
- To exploit any of the vulnerabilities of a specific group of persons
|
||||
based on their age, social, physical or mental characteristics, in
|
||||
order to materially distort the behavior of a person pertaining to
|
||||
that group in a manner that causes or is likely to cause that person
|
||||
or another person physical or psychological harm;
|
||||
|
||||
- For any use intended to or which has the effect of discriminating
|
||||
against individuals or groups based on legally protected
|
||||
characteristics or categories;
|
||||
|
||||
- To provide medical advice and medical results interpretation;
|
||||
|
||||
- To generate or disseminate information for the purpose to be used
|
||||
for administration of justice, law enforcement, immigration or
|
||||
asylum processes, such as predicting an individual will commit
|
||||
fraud/crime commitment (e.g. by text profiling, drawing causal
|
||||
relationships between assertions made in documents, indiscriminate
|
||||
and arbitrarily-targeted use).
|
210
README-CompViz.md
Normal file
210
README-CompViz.md
Normal file
@ -0,0 +1,210 @@
|
||||
# Original README from CompViz/stable-diffusion
|
||||
*Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:*
|
||||
|
||||
[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/)<br/>
|
||||
[Robin Rombach](https://github.com/rromb)\*,
|
||||
[Andreas Blattmann](https://github.com/ablattmann)\*,
|
||||
[Dominik Lorenz](https://github.com/qp-qp)\,
|
||||
[Patrick Esser](https://github.com/pesser),
|
||||
[Björn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)<br/>
|
||||
|
||||
**CVPR '22 Oral**
|
||||
|
||||
which is available on [GitHub](https://github.com/CompVis/latent-diffusion). PDF at [arXiv](https://arxiv.org/abs/2112.10752). Please also visit our [Project page](https://ommer-lab.com/research/latent-diffusion-models/).
|
||||
|
||||
![txt2img-stable2](assets/stable-samples/txt2img/merged-0006.png)
|
||||
[Stable Diffusion](#stable-diffusion-v1) is a latent text-to-image diffusion
|
||||
model.
|
||||
Thanks to a generous compute donation from [Stability AI](https://stability.ai/) and support from [LAION](https://laion.ai/), we were able to train a Latent Diffusion Model on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) database.
|
||||
Similar to Google's [Imagen](https://arxiv.org/abs/2205.11487),
|
||||
this model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text prompts.
|
||||
With its 860M UNet and 123M text encoder, the model is relatively lightweight and runs on a GPU with at least 10GB VRAM.
|
||||
See [this section](#stable-diffusion-v1) below and the [model card](https://huggingface.co/CompVis/stable-diffusion).
|
||||
|
||||
|
||||
## Requirements
|
||||
|
||||
A suitable [conda](https://conda.io/) environment named `ldm` can be created
|
||||
and activated with:
|
||||
|
||||
```
|
||||
conda env create -f environment.yaml
|
||||
conda activate ldm
|
||||
```
|
||||
|
||||
You can also update an existing [latent diffusion](https://github.com/CompVis/latent-diffusion) environment by running
|
||||
|
||||
```
|
||||
conda install pytorch torchvision -c pytorch
|
||||
pip install transformers==4.19.2
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
## Stable Diffusion v1
|
||||
|
||||
Stable Diffusion v1 refers to a specific configuration of the model
|
||||
architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet
|
||||
and CLIP ViT-L/14 text encoder for the diffusion model. The model was pretrained on 256x256 images and
|
||||
then finetuned on 512x512 images.
|
||||
|
||||
*Note: Stable Diffusion v1 is a general text-to-image diffusion model and therefore mirrors biases and (mis-)conceptions that are present
|
||||
in its training data.
|
||||
Details on the training procedure and data, as well as the intended use of the model can be found in the corresponding [model card](https://huggingface.co/CompVis/stable-diffusion).
|
||||
Research into the safe deployment of general text-to-image models is an ongoing effort. To prevent misuse and harm, we currently provide access to the checkpoints only for [academic research purposes upon request](https://stability.ai/academia-access-form).
|
||||
**This is an experiment in safe and community-driven publication of a capable and general text-to-image model. We are working on a public release with a more permissive license that also incorporates ethical considerations.***
|
||||
|
||||
[Request access to Stable Diffusion v1 checkpoints for academic research](https://stability.ai/academia-access-form)
|
||||
|
||||
### Weights
|
||||
|
||||
We currently provide three checkpoints, `sd-v1-1.ckpt`, `sd-v1-2.ckpt` and `sd-v1-3.ckpt`,
|
||||
which were trained as follows,
|
||||
|
||||
- `sd-v1-1.ckpt`: 237k steps at resolution `256x256` on [laion2B-en](https://huggingface.co/datasets/laion/laion2B-en).
|
||||
194k steps at resolution `512x512` on [laion-high-resolution](https://huggingface.co/datasets/laion/laion-high-resolution) (170M examples from LAION-5B with resolution `>= 1024x1024`).
|
||||
- `sd-v1-2.ckpt`: Resumed from `sd-v1-1.ckpt`.
|
||||
515k steps at resolution `512x512` on "laion-improved-aesthetics" (a subset of laion2B-en,
|
||||
filtered to images with an original size `>= 512x512`, estimated aesthetics score `> 5.0`, and an estimated watermark probability `< 0.5`. The watermark estimate is from the LAION-5B metadata, the aesthetics score is estimated using an [improved aesthetics estimator](https://github.com/christophschuhmann/improved-aesthetic-predictor)).
|
||||
- `sd-v1-3.ckpt`: Resumed from `sd-v1-2.ckpt`. 195k steps at resolution `512x512` on "laion-improved-aesthetics" and 10\% dropping of the text-conditioning to improve [classifier-free guidance sampling](https://arxiv.org/abs/2207.12598).
|
||||
|
||||
Evaluations with different classifier-free guidance scales (1.5, 2.0, 3.0, 4.0,
|
||||
5.0, 6.0, 7.0, 8.0) and 50 PLMS sampling
|
||||
steps show the relative improvements of the checkpoints:
|
||||
![sd evaluation results](assets/v1-variants-scores.jpg)
|
||||
|
||||
|
||||
|
||||
### Text-to-Image with Stable Diffusion
|
||||
![txt2img-stable2](assets/stable-samples/txt2img/merged-0005.png)
|
||||
![txt2img-stable2](assets/stable-samples/txt2img/merged-0007.png)
|
||||
|
||||
Stable Diffusion is a latent diffusion model conditioned on the (non-pooled) text embeddings of a CLIP ViT-L/14 text encoder.
|
||||
|
||||
|
||||
#### Sampling Script
|
||||
|
||||
After [obtaining the weights](#weights), link them
|
||||
```
|
||||
mkdir -p models/ldm/stable-diffusion-v1/
|
||||
ln -s <path/to/model.ckpt> models/ldm/stable-diffusion-v1/model.ckpt
|
||||
```
|
||||
and sample with
|
||||
```
|
||||
python scripts/txt2img.py --prompt "a photograph of an astronaut riding a horse" --plms
|
||||
```
|
||||
|
||||
By default, this uses a guidance scale of `--scale 7.5`, [Katherine Crowson's implementation](https://github.com/CompVis/latent-diffusion/pull/51) of the [PLMS](https://arxiv.org/abs/2202.09778) sampler,
|
||||
and renders images of size 512x512 (which it was trained on) in 50 steps. All supported arguments are listed below (type `python scripts/txt2img.py --help`).
|
||||
|
||||
```commandline
|
||||
usage: txt2img.py [-h] [--prompt [PROMPT]] [--outdir [OUTDIR]] [--skip_grid] [--skip_save] [--ddim_steps DDIM_STEPS] [--plms] [--laion400m] [--fixed_code] [--ddim_eta DDIM_ETA] [--n_iter N_ITER] [--H H] [--W W] [--C C] [--f F] [--n_samples N_SAMPLES] [--n_rows N_ROWS]
|
||||
[--scale SCALE] [--from-file FROM_FILE] [--config CONFIG] [--ckpt CKPT] [--seed SEED] [--precision {full,autocast}]
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--prompt [PROMPT] the prompt to render
|
||||
--outdir [OUTDIR] dir to write results to
|
||||
--skip_grid do not save a grid, only individual samples. Helpful when evaluating lots of samples
|
||||
--skip_save do not save individual samples. For speed measurements.
|
||||
--ddim_steps DDIM_STEPS
|
||||
number of ddim sampling steps
|
||||
--plms use plms sampling
|
||||
--laion400m uses the LAION400M model
|
||||
--fixed_code if enabled, uses the same starting code across samples
|
||||
--ddim_eta DDIM_ETA ddim eta (eta=0.0 corresponds to deterministic sampling
|
||||
--n_iter N_ITER sample this often
|
||||
--H H image height, in pixel space
|
||||
--W W image width, in pixel space
|
||||
--C C latent channels
|
||||
--f F downsampling factor
|
||||
--n_samples N_SAMPLES
|
||||
how many samples to produce for each given prompt. A.k.a. batch size
|
||||
(note that the seeds for each image in the batch will be unavailable)
|
||||
--n_rows N_ROWS rows in the grid (default: n_samples)
|
||||
--scale SCALE unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))
|
||||
--from-file FROM_FILE
|
||||
if specified, load prompts from this file
|
||||
--config CONFIG path to config which constructs model
|
||||
--ckpt CKPT path to checkpoint of model
|
||||
--seed SEED the seed (for reproducible sampling)
|
||||
--precision {full,autocast}
|
||||
evaluate at this precision
|
||||
|
||||
```
|
||||
Note: The inference config for all v1 versions is designed to be used with EMA-only checkpoints.
|
||||
For this reason `use_ema=False` is set in the configuration, otherwise the code will try to switch from
|
||||
non-EMA to EMA weights. If you want to examine the effect of EMA vs no EMA, we provide "full" checkpoints
|
||||
which contain both types of weights. For these, `use_ema=False` will load and use the non-EMA weights.
|
||||
|
||||
|
||||
#### Diffusers Integration
|
||||
|
||||
Another way to download and sample Stable Diffusion is by using the [diffusers library](https://github.com/huggingface/diffusers/tree/main#new--stable-diffusion-is-now-fully-compatible-with-diffusers)
|
||||
```py
|
||||
# make sure you're logged in with `huggingface-cli login`
|
||||
from torch import autocast
|
||||
from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-3-diffusers",
|
||||
use_auth_token=True
|
||||
)
|
||||
|
||||
prompt = "a photo of an astronaut riding a horse on mars"
|
||||
with autocast("cuda"):
|
||||
image = pipe(prompt)["sample"][0]
|
||||
|
||||
image.save("astronaut_rides_horse.png")
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Image Modification with Stable Diffusion
|
||||
|
||||
By using a diffusion-denoising mechanism as first proposed by [SDEdit](https://arxiv.org/abs/2108.01073), the model can be used for different
|
||||
tasks such as text-guided image-to-image translation and upscaling. Similar to the txt2img sampling script,
|
||||
we provide a script to perform image modification with Stable Diffusion.
|
||||
|
||||
The following describes an example where a rough sketch made in [Pinta](https://www.pinta-project.com/) is converted into a detailed artwork.
|
||||
```
|
||||
python scripts/img2img.py --prompt "A fantasy landscape, trending on artstation" --init-img <path-to-img.jpg> --strength 0.8
|
||||
```
|
||||
Here, strength is a value between 0.0 and 1.0, that controls the amount of noise that is added to the input image.
|
||||
Values that approach 1.0 allow for lots of variations but will also produce images that are not semantically consistent with the input. See the following example.
|
||||
|
||||
**Input**
|
||||
|
||||
![sketch-in](assets/stable-samples/img2img/sketch-mountains-input.jpg)
|
||||
|
||||
**Outputs**
|
||||
|
||||
![out3](assets/stable-samples/img2img/mountains-3.png)
|
||||
![out2](assets/stable-samples/img2img/mountains-2.png)
|
||||
|
||||
This procedure can, for example, also be used to upscale samples from the base model.
|
||||
|
||||
|
||||
## Comments
|
||||
|
||||
- Our codebase for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion)
|
||||
and [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch).
|
||||
Thanks for open-sourcing!
|
||||
|
||||
- The implementation of the transformer encoder is from [x-transformers](https://github.com/lucidrains/x-transformers) by [lucidrains](https://github.com/lucidrains?tab=repositories).
|
||||
|
||||
|
||||
## BibTeX
|
||||
|
||||
```
|
||||
@misc{rombach2021highresolution,
|
||||
title={High-Resolution Image Synthesis with Latent Diffusion Models},
|
||||
author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
|
||||
year={2021},
|
||||
eprint={2112.10752},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
|
243
README.md
243
README.md
@ -102,6 +102,12 @@ do not need to add up to 1.
|
||||
|
||||
## Changes
|
||||
|
||||
* v1.08 (?? August 2022)
|
||||
* Escape single quotes on the dream> command before trying to parse. This avoids
|
||||
parse errors.
|
||||
* Removed instruction to install Python3.8 as first step in Windows installed.
|
||||
Turns out that anaconda3 does it for you!
|
||||
|
||||
* v1.07 (23 August 2022)
|
||||
* Image filenames will now never fill gaps in the sequence, but will be assigned the
|
||||
next higher name in the chosen directory. This ensures that the alphabetic and chronological
|
||||
@ -236,34 +242,31 @@ This will bring your local copy into sync with the remote one.
|
||||
|
||||
### Windows
|
||||
|
||||
1. Install Python version 3.8.5 from here: https://www.python.org/downloads/windows/
|
||||
(note that several users have reported that later versions do not work properly)
|
||||
1. Install Anaconda3 (miniconda3 version) from here: https://docs.anaconda.com/anaconda/install/windows/
|
||||
|
||||
2. Install Anaconda3 (miniconda3 version) from here: https://docs.anaconda.com/anaconda/install/windows/
|
||||
2. Install Git from here: https://git-scm.com/download/win
|
||||
|
||||
3. Install Git from here: https://git-scm.com/download/win
|
||||
3. Launch Anaconda from the Windows Start menu. This will bring up a command window. Type all the remaining commands in this window.
|
||||
|
||||
4. Launch Anaconda from the Windows Start menu. This will bring up a command window. Type all the remaining commands in this window.
|
||||
|
||||
5. Run the command:
|
||||
4. Run the command:
|
||||
```
|
||||
git clone https://github.com/lstein/stable-diffusion.git
|
||||
```
|
||||
This will create stable-diffusion folder where you will follow the rest of the steps.
|
||||
|
||||
6. Enter the newly-created stable-diffusion folder. From this step forward make sure that you are working in the stable-diffusion directory!
|
||||
5. Enter the newly-created stable-diffusion folder. From this step forward make sure that you are working in the stable-diffusion directory!
|
||||
```
|
||||
cd stable-diffusion
|
||||
```
|
||||
|
||||
7. Run the following two commands:
|
||||
6. Run the following two commands:
|
||||
```
|
||||
conda env create -f environment.yaml (step 7a)
|
||||
conda activate ldm (step 7b)
|
||||
conda env create -f environment.yaml (step 6a)
|
||||
conda activate ldm (step 6b)
|
||||
```
|
||||
This will install all python requirements and activate the "ldm" environment which sets PATH and other environment variables properly.
|
||||
|
||||
8. Run the command:
|
||||
7. Run the command:
|
||||
```
|
||||
python scripts\preload_models.py
|
||||
```
|
||||
@ -273,7 +276,7 @@ requires. (Note that this step is required. I created it because some people
|
||||
are using GPU systems that are behind a firewall and the models can't be
|
||||
downloaded just-in-time)
|
||||
|
||||
9. Now you need to install the weights for the big stable diffusion model.
|
||||
8. Now you need to install the weights for the big stable diffusion model.
|
||||
|
||||
For running with the released weights, you will first need to set up
|
||||
an acount with Hugging Face (https://huggingface.co). Use your
|
||||
@ -299,7 +302,7 @@ you stashed this file. If you prefer not to copy or move the .ckpt file,
|
||||
you may instead create a shortcut to it from within
|
||||
"models\ldm\stable-diffusion-v1\".
|
||||
|
||||
10. Start generating images!
|
||||
9. Start generating images!
|
||||
```
|
||||
# for the pre-release weights
|
||||
python scripts\dream.py -l
|
||||
@ -307,7 +310,7 @@ python scripts\dream.py -l
|
||||
# for the post-release weights
|
||||
python scripts\dream.py
|
||||
```
|
||||
11. Subsequently, to relaunch the script, first activate the Anaconda command window (step 4), enter the stable-diffusion directory (step 6, "cd \path\to\stable-diffusion"), run "conda activate ldm" (step 7b), and then launch the dream script (step 10).
|
||||
10. Subsequently, to relaunch the script, first activate the Anaconda command window (step 3), enter the stable-diffusion directory (step 5, "cd \path\to\stable-diffusion"), run "conda activate ldm" (step 6b), and then launch the dream script (step 9).
|
||||
|
||||
#### Updating to newer versions of the script
|
||||
|
||||
@ -378,213 +381,3 @@ to send me an email if you use and like the script.
|
||||
|
||||
*Contributions by:* [Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison), [xraxra](https://github.com/xraxra), and [bmaltais](https://github.com/bmaltais)
|
||||
|
||||
# Original README from CompViz/stable-diffusion
|
||||
*Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:*
|
||||
|
||||
[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/)<br/>
|
||||
[Robin Rombach](https://github.com/rromb)\*,
|
||||
[Andreas Blattmann](https://github.com/ablattmann)\*,
|
||||
[Dominik Lorenz](https://github.com/qp-qp)\,
|
||||
[Patrick Esser](https://github.com/pesser),
|
||||
[Björn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)<br/>
|
||||
|
||||
**CVPR '22 Oral**
|
||||
|
||||
which is available on [GitHub](https://github.com/CompVis/latent-diffusion). PDF at [arXiv](https://arxiv.org/abs/2112.10752). Please also visit our [Project page](https://ommer-lab.com/research/latent-diffusion-models/).
|
||||
|
||||
![txt2img-stable2](assets/stable-samples/txt2img/merged-0006.png)
|
||||
[Stable Diffusion](#stable-diffusion-v1) is a latent text-to-image diffusion
|
||||
model.
|
||||
Thanks to a generous compute donation from [Stability AI](https://stability.ai/) and support from [LAION](https://laion.ai/), we were able to train a Latent Diffusion Model on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) database.
|
||||
Similar to Google's [Imagen](https://arxiv.org/abs/2205.11487),
|
||||
this model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text prompts.
|
||||
With its 860M UNet and 123M text encoder, the model is relatively lightweight and runs on a GPU with at least 10GB VRAM.
|
||||
See [this section](#stable-diffusion-v1) below and the [model card](https://huggingface.co/CompVis/stable-diffusion).
|
||||
|
||||
|
||||
## Requirements
|
||||
|
||||
A suitable [conda](https://conda.io/) environment named `ldm` can be created
|
||||
and activated with:
|
||||
|
||||
```
|
||||
conda env create -f environment.yaml
|
||||
conda activate ldm
|
||||
```
|
||||
|
||||
You can also update an existing [latent diffusion](https://github.com/CompVis/latent-diffusion) environment by running
|
||||
|
||||
```
|
||||
conda install pytorch torchvision -c pytorch
|
||||
pip install transformers==4.19.2
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
## Stable Diffusion v1
|
||||
|
||||
Stable Diffusion v1 refers to a specific configuration of the model
|
||||
architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet
|
||||
and CLIP ViT-L/14 text encoder for the diffusion model. The model was pretrained on 256x256 images and
|
||||
then finetuned on 512x512 images.
|
||||
|
||||
*Note: Stable Diffusion v1 is a general text-to-image diffusion model and therefore mirrors biases and (mis-)conceptions that are present
|
||||
in its training data.
|
||||
Details on the training procedure and data, as well as the intended use of the model can be found in the corresponding [model card](https://huggingface.co/CompVis/stable-diffusion).
|
||||
Research into the safe deployment of general text-to-image models is an ongoing effort. To prevent misuse and harm, we currently provide access to the checkpoints only for [academic research purposes upon request](https://stability.ai/academia-access-form).
|
||||
**This is an experiment in safe and community-driven publication of a capable and general text-to-image model. We are working on a public release with a more permissive license that also incorporates ethical considerations.***
|
||||
|
||||
[Request access to Stable Diffusion v1 checkpoints for academic research](https://stability.ai/academia-access-form)
|
||||
|
||||
### Weights
|
||||
|
||||
We currently provide three checkpoints, `sd-v1-1.ckpt`, `sd-v1-2.ckpt` and `sd-v1-3.ckpt`,
|
||||
which were trained as follows,
|
||||
|
||||
- `sd-v1-1.ckpt`: 237k steps at resolution `256x256` on [laion2B-en](https://huggingface.co/datasets/laion/laion2B-en).
|
||||
194k steps at resolution `512x512` on [laion-high-resolution](https://huggingface.co/datasets/laion/laion-high-resolution) (170M examples from LAION-5B with resolution `>= 1024x1024`).
|
||||
- `sd-v1-2.ckpt`: Resumed from `sd-v1-1.ckpt`.
|
||||
515k steps at resolution `512x512` on "laion-improved-aesthetics" (a subset of laion2B-en,
|
||||
filtered to images with an original size `>= 512x512`, estimated aesthetics score `> 5.0`, and an estimated watermark probability `< 0.5`. The watermark estimate is from the LAION-5B metadata, the aesthetics score is estimated using an [improved aesthetics estimator](https://github.com/christophschuhmann/improved-aesthetic-predictor)).
|
||||
- `sd-v1-3.ckpt`: Resumed from `sd-v1-2.ckpt`. 195k steps at resolution `512x512` on "laion-improved-aesthetics" and 10\% dropping of the text-conditioning to improve [classifier-free guidance sampling](https://arxiv.org/abs/2207.12598).
|
||||
|
||||
Evaluations with different classifier-free guidance scales (1.5, 2.0, 3.0, 4.0,
|
||||
5.0, 6.0, 7.0, 8.0) and 50 PLMS sampling
|
||||
steps show the relative improvements of the checkpoints:
|
||||
![sd evaluation results](assets/v1-variants-scores.jpg)
|
||||
|
||||
|
||||
|
||||
### Text-to-Image with Stable Diffusion
|
||||
![txt2img-stable2](assets/stable-samples/txt2img/merged-0005.png)
|
||||
![txt2img-stable2](assets/stable-samples/txt2img/merged-0007.png)
|
||||
|
||||
Stable Diffusion is a latent diffusion model conditioned on the (non-pooled) text embeddings of a CLIP ViT-L/14 text encoder.
|
||||
|
||||
|
||||
#### Sampling Script
|
||||
|
||||
After [obtaining the weights](#weights), link them
|
||||
```
|
||||
mkdir -p models/ldm/stable-diffusion-v1/
|
||||
ln -s <path/to/model.ckpt> models/ldm/stable-diffusion-v1/model.ckpt
|
||||
```
|
||||
and sample with
|
||||
```
|
||||
python scripts/txt2img.py --prompt "a photograph of an astronaut riding a horse" --plms
|
||||
```
|
||||
|
||||
By default, this uses a guidance scale of `--scale 7.5`, [Katherine Crowson's implementation](https://github.com/CompVis/latent-diffusion/pull/51) of the [PLMS](https://arxiv.org/abs/2202.09778) sampler,
|
||||
and renders images of size 512x512 (which it was trained on) in 50 steps. All supported arguments are listed below (type `python scripts/txt2img.py --help`).
|
||||
|
||||
```commandline
|
||||
usage: txt2img.py [-h] [--prompt [PROMPT]] [--outdir [OUTDIR]] [--skip_grid] [--skip_save] [--ddim_steps DDIM_STEPS] [--plms] [--laion400m] [--fixed_code] [--ddim_eta DDIM_ETA] [--n_iter N_ITER] [--H H] [--W W] [--C C] [--f F] [--n_samples N_SAMPLES] [--n_rows N_ROWS]
|
||||
[--scale SCALE] [--from-file FROM_FILE] [--config CONFIG] [--ckpt CKPT] [--seed SEED] [--precision {full,autocast}]
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--prompt [PROMPT] the prompt to render
|
||||
--outdir [OUTDIR] dir to write results to
|
||||
--skip_grid do not save a grid, only individual samples. Helpful when evaluating lots of samples
|
||||
--skip_save do not save individual samples. For speed measurements.
|
||||
--ddim_steps DDIM_STEPS
|
||||
number of ddim sampling steps
|
||||
--plms use plms sampling
|
||||
--laion400m uses the LAION400M model
|
||||
--fixed_code if enabled, uses the same starting code across samples
|
||||
--ddim_eta DDIM_ETA ddim eta (eta=0.0 corresponds to deterministic sampling
|
||||
--n_iter N_ITER sample this often
|
||||
--H H image height, in pixel space
|
||||
--W W image width, in pixel space
|
||||
--C C latent channels
|
||||
--f F downsampling factor
|
||||
--n_samples N_SAMPLES
|
||||
how many samples to produce for each given prompt. A.k.a. batch size
|
||||
(note that the seeds for each image in the batch will be unavailable)
|
||||
--n_rows N_ROWS rows in the grid (default: n_samples)
|
||||
--scale SCALE unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))
|
||||
--from-file FROM_FILE
|
||||
if specified, load prompts from this file
|
||||
--config CONFIG path to config which constructs model
|
||||
--ckpt CKPT path to checkpoint of model
|
||||
--seed SEED the seed (for reproducible sampling)
|
||||
--precision {full,autocast}
|
||||
evaluate at this precision
|
||||
|
||||
```
|
||||
Note: The inference config for all v1 versions is designed to be used with EMA-only checkpoints.
|
||||
For this reason `use_ema=False` is set in the configuration, otherwise the code will try to switch from
|
||||
non-EMA to EMA weights. If you want to examine the effect of EMA vs no EMA, we provide "full" checkpoints
|
||||
which contain both types of weights. For these, `use_ema=False` will load and use the non-EMA weights.
|
||||
|
||||
|
||||
#### Diffusers Integration
|
||||
|
||||
Another way to download and sample Stable Diffusion is by using the [diffusers library](https://github.com/huggingface/diffusers/tree/main#new--stable-diffusion-is-now-fully-compatible-with-diffusers)
|
||||
```py
|
||||
# make sure you're logged in with `huggingface-cli login`
|
||||
from torch import autocast
|
||||
from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler
|
||||
|
||||
pipe = StableDiffusionPipeline.from_pretrained(
|
||||
"CompVis/stable-diffusion-v1-3-diffusers",
|
||||
use_auth_token=True
|
||||
)
|
||||
|
||||
prompt = "a photo of an astronaut riding a horse on mars"
|
||||
with autocast("cuda"):
|
||||
image = pipe(prompt)["sample"][0]
|
||||
|
||||
image.save("astronaut_rides_horse.png")
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Image Modification with Stable Diffusion
|
||||
|
||||
By using a diffusion-denoising mechanism as first proposed by [SDEdit](https://arxiv.org/abs/2108.01073), the model can be used for different
|
||||
tasks such as text-guided image-to-image translation and upscaling. Similar to the txt2img sampling script,
|
||||
we provide a script to perform image modification with Stable Diffusion.
|
||||
|
||||
The following describes an example where a rough sketch made in [Pinta](https://www.pinta-project.com/) is converted into a detailed artwork.
|
||||
```
|
||||
python scripts/img2img.py --prompt "A fantasy landscape, trending on artstation" --init-img <path-to-img.jpg> --strength 0.8
|
||||
```
|
||||
Here, strength is a value between 0.0 and 1.0, that controls the amount of noise that is added to the input image.
|
||||
Values that approach 1.0 allow for lots of variations but will also produce images that are not semantically consistent with the input. See the following example.
|
||||
|
||||
**Input**
|
||||
|
||||
![sketch-in](assets/stable-samples/img2img/sketch-mountains-input.jpg)
|
||||
|
||||
**Outputs**
|
||||
|
||||
![out3](assets/stable-samples/img2img/mountains-3.png)
|
||||
![out2](assets/stable-samples/img2img/mountains-2.png)
|
||||
|
||||
This procedure can, for example, also be used to upscale samples from the base model.
|
||||
|
||||
|
||||
## Comments
|
||||
|
||||
- Our codebase for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion)
|
||||
and [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch).
|
||||
Thanks for open-sourcing!
|
||||
|
||||
- The implementation of the transformer encoder is from [x-transformers](https://github.com/lucidrains/x-transformers) by [lucidrains](https://github.com/lucidrains?tab=repositories).
|
||||
|
||||
|
||||
## BibTeX
|
||||
|
||||
```
|
||||
@misc{rombach2021highresolution,
|
||||
title={High-Resolution Image Synthesis with Latent Diffusion Models},
|
||||
author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
|
||||
year={2021},
|
||||
eprint={2112.10752},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
# Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
|
||||
|
||||
# Derived from source code carrying the following copyrights
|
||||
# Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich
|
||||
# Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors
|
||||
|
||||
|
||||
"""Simplified text to image API for stable diffusion/latent diffusion
|
||||
|
||||
Example Usage:
|
||||
@ -161,6 +168,9 @@ The vast majority of these arguments default to reasonable values.
|
||||
|
||||
model = self.load_model() # will instantiate the model or return it from cache
|
||||
|
||||
assert strength<1.0 and strength>=0.0, "strength (-f) must be >=0.0 and <1.0"
|
||||
assert cfg_scale>1.0, "CFG_Scale (-C) must be >1.0"
|
||||
|
||||
# grid and individual are mutually exclusive, with individual taking priority.
|
||||
# not necessary, but needed for compatability with dream bot
|
||||
if (grid is None):
|
||||
@ -282,6 +292,9 @@ The vast majority of these arguments default to reasonable values.
|
||||
iterations = iterations or self.iterations
|
||||
strength = strength or self.strength
|
||||
|
||||
assert strength<1.0 and strength>=0.0, "strength (-f) must be >=0.0 and <1.0"
|
||||
assert cfg_scale>1.0, "CFG_Scale (-C) must be >1.0"
|
||||
|
||||
if init_img is None:
|
||||
print("no init_img provided!")
|
||||
return []
|
||||
|
@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
|
||||
|
||||
import argparse
|
||||
import shlex
|
||||
import atexit
|
||||
@ -158,10 +160,14 @@ def main_loop(t2i,parser,log,infile):
|
||||
print("Try again with a prompt!")
|
||||
continue
|
||||
|
||||
if opt.init_img is None:
|
||||
results = t2i.txt2img(**vars(opt))
|
||||
else:
|
||||
results = t2i.img2img(**vars(opt))
|
||||
try:
|
||||
if opt.init_img is None:
|
||||
results = t2i.txt2img(**vars(opt))
|
||||
else:
|
||||
results = t2i.img2img(**vars(opt))
|
||||
except AssertionError as e:
|
||||
print(e)
|
||||
continue
|
||||
print("Outputs:")
|
||||
write_log_message(t2i,opt,results,log)
|
||||
|
||||
@ -374,3 +380,4 @@ if readline_available:
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein)
|
||||
# Before running stable-diffusion on an internet-isolated machine,
|
||||
# run this script from one with internet connectivity. The
|
||||
# two machines must share a common .cache directory.
|
||||
@ -30,3 +31,4 @@ tokenizer =CLIPTokenizer.from_pretrained(version)
|
||||
transformer=CLIPTextModel.from_pretrained(version)
|
||||
print('\n\n...success')
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user