huggingface · sayakpaul · Aug 2, 2023 · Jul 26, 2023 · Jul 26, 2023 · Jul 26, 2023
diff --git a/scripts/convert_tiny_autoencoder_to_diffusers.py b/scripts/convert_tiny_autoencoder_to_diffusers.py
@@ -0,0 +1,72 @@
+import argparse
+
+from diffusers.utils import is_safetensors_available
+
+
+if is_safetensors_available():
+    import safetensors.torch
+else:
+    raise ImportError("Please install `safetensors`.")
+
+from diffusers import TinyAutoencoder
+
+
+"""
+Example - From the diffusers root directory:
+
+Download the weights:
+```sh
+$ wget -q https://huggingface.co/madebyollin/taesd/resolve/main/taesd_encoder.safetensors
+$ wget -q https://huggingface.co/madebyollin/taesd/resolve/main/taesd_decoder.safetensors
+```
+
+Convert the model:
+```sh
+$ python scripts/convert_tiny_autoencoder_to_diffusers.py \
+    --encoder_ckpt_path  taesd_encoder.safetensors \
+    --decoder_ckpt_path taesd_decoder.safetensors \
+    --dump_path taesd-diffusers
+```
+"""
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.")
+    parser.add_argument(
+        "--encoder_ckpt_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the encoder ckpt.",
+    )
+    parser.add_argument(
+        "--decoder_ckpt_path",
+        default=None,
+        type=str,
+        required=True,
+        help="Path to the decoder ckpt.",
+    )
+    parser.add_argument(
+        "--use_safetensors", action="store_true", help="Whether to serialize in the safetensors format."
+    )
+    args = parser.parse_args()
+
+    print("Loading the original state_dicts of the encoder and the decoder...")
+    encoder_state_dict = safetensors.torch.load_file(args.encoder_ckpt_path)
+    decoder_state_dict = safetensors.torch.load_file(args.decoder_ckpt_path)
+
+    print("Populating the state_dicts in the diffusers format...")
+    tiny_autoencoder = TinyAutoencoder()
+    new_state_dict = {}
+
+    for k in encoder_state_dict:
+        new_state_dict.update({f"encoder.layers.{k}": encoder_state_dict[k]})
+    for k in decoder_state_dict:
+        new_state_dict.update({f"decoder.layers.{k}": decoder_state_dict[k]})
+
+    # Assertion tests with the original implementation can be found here:
+    # https://gist.github.com/sayakpaul/337b0988f08bd2cf2b248206f760e28f
+    tiny_autoencoder.load_state_dict(new_state_dict)
+    print("Population successful, serializing...")
+    tiny_autoencoder.save_pretrained(args.dump_path, safe_serialization=args.use_safetensors)
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -44,6 +44,7 @@
         PriorTransformer,
         T2IAdapter,
         T5FilmDecoder,
+        TinyAutoencoder,
         Transformer2DModel,
         UNet1DModel,
         UNet2DConditionModel,

diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py
@@ -110,11 +110,17 @@ def normalize(images):
         return 2.0 * images - 1.0
 
     @staticmethod
-    def denormalize(images):
+    def denormalize(images, is_tiny_vae):
         """
         Denormalize an image array to [0,1].
+
+        Refer to https://github.com/madebyollin/taesd/issues/3#issuecomment-1657729279 to know why `is_tiny_vae`
+        exists.
         """
-        return (images / 2 + 0.5).clamp(0, 1)
+        if not is_tiny_vae:
+            return (images / 2 + 0.5).clamp(0, 1)
+        else:
+            return images.clamp(0, 1)
 
     @staticmethod
     def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
@@ -217,6 +223,7 @@ def postprocess(
         image: torch.FloatTensor,
         output_type: str = "pil",
         do_denormalize: Optional[List[bool]] = None,
+        is_tiny_vae: bool = False,
     ):
         if not isinstance(image, torch.Tensor):
             raise ValueError(
@@ -237,7 +244,7 @@ def postprocess(
             do_denormalize = [self.config.do_normalize] * image.shape[0]
 
         image = torch.stack(
-            [self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
+            [self.denormalize(image[i], is_tiny_vae) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
         )
 
         if output_type == "pt":

diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py
@@ -24,6 +24,7 @@
     from .modeling_utils import ModelMixin
     from .prior_transformer import PriorTransformer
     from .t5_film_transformer import T5FilmDecoder
+    from .tiny_autoencoder import TinyAutoencoder
     from .transformer_2d import Transformer2DModel
     from .unet_1d import UNet1DModel
     from .unet_2d import UNet2DModel

diff --git a/src/diffusers/models/activations.py b/src/diffusers/models/activations.py
@@ -8,5 +8,7 @@ def get_activation(act_fn):
         return nn.Mish()
     elif act_fn == "gelu":
         return nn.GELU()
+    elif act_fn == "relu":
+        return nn.ReLU()
     else:
         raise ValueError(f"Unsupported activation function: {act_fn}")