From 794523de1124cfb62d66817b8e1dc0654cadf420 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Mon, 1 May 2023 10:57:03 +0530 Subject: [PATCH 1/2] fix: scale_lr and sync example readme and docs. --- docs/source/en/training/lora.mdx | 22 +++++++++++++++++-- examples/dreambooth/README.md | 23 +++++++++++++++++--- examples/dreambooth/train_dreambooth_lora.py | 5 ----- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/docs/source/en/training/lora.mdx b/docs/source/en/training/lora.mdx index 3c7cc7ebfeec..fecdeaf18ca3 100644 --- a/docs/source/en/training/lora.mdx +++ b/docs/source/en/training/lora.mdx @@ -243,8 +243,26 @@ Load the LoRA weights from your finetuned DreamBooth model *on top of the base m >>> image.save("bucket-dog.png") ``` -Note that the use of [`LoraLoaderMixin.load_lora_weights`] is preferred to [`UNet2DConditionLoadersMixin.load_attn_procs`] for loading LoRA parameters. This is because -[`LoraLoaderMixin.load_lora_weights`] can handle the following situations: +If you used `--train_text_encoder` during training, then use `pipe.load_lora_weights()` to load the LoRA +weights. For example: + +```python +from huggingface_hub.repocard import RepoCard +from diffusers import StableDiffusionPipeline +import torch + +lora_model_id = "sayakpaul/dreambooth-text-encoder-test" +card = RepoCard.load(lora_model_id) +base_model_id = card.data.to_dict()["base_model"] + +pipe = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16) +pipe = pipe.to("cuda") +pipe.load_lora_weights(lora_model_id) +image = pipe("A picture of a sks dog in a bucket", num_inference_steps=25).images[0] +``` + +Note that the use of [~`LoraLoaderMixin.load_lora_weights`] is preferred to [~`UNet2DConditionLoadersMixin.load_attn_procs`] for loading LoRA parameters. This is because +[~`LoraLoaderMixin.load_lora_weights`] can handle the following situations: * LoRA parameters that don't have separate identifiers for the UNet and the text encoder (such as [`"patrickvonplaten/lora_dreambooth_dog_example"`](https://huggingface.co/patrickvonplaten/lora_dreambooth_dog_example)). So, you can just do: diff --git a/examples/dreambooth/README.md b/examples/dreambooth/README.md index 490e31458988..75d705f89e02 100644 --- a/examples/dreambooth/README.md +++ b/examples/dreambooth/README.md @@ -408,9 +408,26 @@ pipe = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch. ... ``` -**Note** that we will gradually be depcrecating the use of [`UNet2DConditionLoadersMixin.load_attn_procs`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs) since we now have a more general -method to load the LoRA parameters -- [`LoraLoaderMixin.load_lora_weights`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.load_lora_weights). This is because -[`LoraLoaderMixin.load_lora_weights`] can handle the following situations: +If you used `--train_text_encoder` during training, then use `pipe.load_lora_weights()` to load the LoRA +weights. For example: + +```python +from huggingface_hub.repocard import RepoCard +from diffusers import StableDiffusionPipeline +import torch + +lora_model_id = "sayakpaul/dreambooth-text-encoder-test" +card = RepoCard.load(lora_model_id) +base_model_id = card.data.to_dict()["base_model"] + +pipe = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16) +pipe = pipe.to("cuda") +pipe.load_lora_weights(lora_model_id) +image = pipe("A picture of a sks dog in a bucket", num_inference_steps=25).images[0] +``` + +Note that the use of [`LoraLoaderMixin.load_lora_weights`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.LoraLoaderMixin.load_lora_weights) is preferred to [`UNet2DConditionLoadersMixin.load_attn_procs`](https://huggingface.co/docs/diffusers/main/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs) for loading LoRA parameters. This is because +`LoraLoaderMixin.load_lora_weights` can handle the following situations: * LoRA parameters that don't have separate identifiers for the UNet and the text encoder (such as [`"patrickvonplaten/lora_dreambooth_dog_example"`](https://huggingface.co/patrickvonplaten/lora_dreambooth_dog_example)). So, you can just do: diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index 5cefc57c614d..9af81aa5a95d 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -746,11 +746,6 @@ def main(args): accelerator.register_for_checkpointing(text_encoder_lora_layers) del temp_pipeline - if args.scale_lr: - args.learning_rate = ( - args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes - ) - # Enable TF32 for faster training on Ampere GPUs, # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices if args.allow_tf32: From 8f3e7a076bdd25f55b8967033a41cf3978dd2cda Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Mon, 1 May 2023 21:41:08 +0530 Subject: [PATCH 2/2] fix doc link. --- docs/source/en/training/lora.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/en/training/lora.mdx b/docs/source/en/training/lora.mdx index fecdeaf18ca3..8e41aab5e2d8 100644 --- a/docs/source/en/training/lora.mdx +++ b/docs/source/en/training/lora.mdx @@ -261,8 +261,8 @@ pipe.load_lora_weights(lora_model_id) image = pipe("A picture of a sks dog in a bucket", num_inference_steps=25).images[0] ``` -Note that the use of [~`LoraLoaderMixin.load_lora_weights`] is preferred to [~`UNet2DConditionLoadersMixin.load_attn_procs`] for loading LoRA parameters. This is because -[~`LoraLoaderMixin.load_lora_weights`] can handle the following situations: +Note that the use of [`~diffusers.loaders.LoraLoaderMixin.load_lora_weights`] is preferred to [`~diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs`] for loading LoRA parameters. This is because +[`~diffusers.loaders.LoraLoaderMixin.load_lora_weights`] can handle the following situations: * LoRA parameters that don't have separate identifiers for the UNet and the text encoder (such as [`"patrickvonplaten/lora_dreambooth_dog_example"`](https://huggingface.co/patrickvonplaten/lora_dreambooth_dog_example)). So, you can just do: