From e5f71ae5355246b9560aaa9449a4b06f9fae4887 Mon Sep 17 00:00:00 2001 From: Qing Date: Tue, 30 Jan 2024 13:19:13 +0800 Subject: [PATCH] sd: load single file model using local config --- iopaint/download.py | 3 + iopaint/model/controlnet.py | 2 + iopaint/model/original_sd_configs/__init__.py | 19 ++++ .../model/original_sd_configs/sd_xl_base.yaml | 93 +++++++++++++++++++ .../original_sd_configs/sd_xl_refiner.yaml | 86 +++++++++++++++++ .../original_sd_configs/v1-inference.yaml | 70 ++++++++++++++ .../original_sd_configs/v2-inference-v.yaml | 68 ++++++++++++++ iopaint/model/sd.py | 2 + iopaint/model/sdxl.py | 2 + iopaint/tests/test_sd_model.py | 3 +- setup.py | 4 + .../components/SidePanel/DiffusionOptions.tsx | 2 +- 12 files changed, 352 insertions(+), 2 deletions(-) create mode 100644 iopaint/model/original_sd_configs/__init__.py create mode 100644 iopaint/model/original_sd_configs/sd_xl_base.yaml create mode 100644 iopaint/model/original_sd_configs/sd_xl_refiner.yaml create mode 100644 iopaint/model/original_sd_configs/v1-inference.yaml create mode 100644 iopaint/model/original_sd_configs/v2-inference-v.yaml diff --git a/iopaint/download.py b/iopaint/download.py index 5253206..ce916aa 100644 --- a/iopaint/download.py +++ b/iopaint/download.py @@ -14,6 +14,7 @@ from iopaint.const import ( DIFFUSERS_SDXL_INPAINT_CLASS_NAME, ANYTEXT_NAME, ) +from iopaint.model.original_sd_configs import get_config_files from iopaint.model_info import ModelInfo, ModelType @@ -60,6 +61,7 @@ def get_sd_model_type(model_abs_path: str) -> ModelType: load_safety_checker=False, local_files_only=True, num_in_channels=9, + config_files=get_config_files() ) model_type = ModelType.DIFFUSERS_SD_INPAINT except ValueError as e: @@ -84,6 +86,7 @@ def get_sdxl_model_type(model_abs_path: str) -> ModelType: load_safety_checker=False, local_files_only=True, num_in_channels=9, + config_files=get_config_files() ) if model.unet.config.in_channels == 9: # https://github.com/huggingface/diffusers/issues/6610 diff --git a/iopaint/model/controlnet.py b/iopaint/model/controlnet.py index 6ea80fd..c738b13 100644 --- a/iopaint/model/controlnet.py +++ b/iopaint/model/controlnet.py @@ -13,6 +13,7 @@ from .helper.controlnet_preprocess import ( make_inpaint_control_image, ) from .helper.cpu_text_encoder import CPUTextEncoderWrapper +from .original_sd_configs import get_config_files from .utils import ( get_scheduler, handle_from_pretrained_exceptions, @@ -101,6 +102,7 @@ class ControlNet(DiffusionInpaintModel): controlnet=controlnet, load_safety_checker=not disable_nsfw_checker, torch_dtype=torch_dtype, + config_files=get_config_files(), **model_kwargs, ) else: diff --git a/iopaint/model/original_sd_configs/__init__.py b/iopaint/model/original_sd_configs/__init__.py new file mode 100644 index 0000000..23896a7 --- /dev/null +++ b/iopaint/model/original_sd_configs/__init__.py @@ -0,0 +1,19 @@ +from pathlib import Path +from typing import Dict + +CURRENT_DIR = Path(__file__).parent.absolute() + + +def get_config_files() -> Dict[str, Path]: + """ + - `v1`: Config file for Stable Diffusion v1 + - `v2`: Config file for Stable Diffusion v2 + - `xl`: Config file for Stable Diffusion XL + - `xl_refiner`: Config file for Stable Diffusion XL Refiner + """ + return { + "v1": CURRENT_DIR / "v1-inference.yaml", + "v2": CURRENT_DIR / "v2-inference-v.yaml", + "xl": CURRENT_DIR / "sd_xl_base.yaml", + "xl_refiner": CURRENT_DIR / "sd_xl_refiner.yaml", + } diff --git a/iopaint/model/original_sd_configs/sd_xl_base.yaml b/iopaint/model/original_sd_configs/sd_xl_base.yaml new file mode 100644 index 0000000..6047379 --- /dev/null +++ b/iopaint/model/original_sd_configs/sd_xl_base.yaml @@ -0,0 +1,93 @@ +model: + target: sgm.models.diffusion.DiffusionEngine + params: + scale_factor: 0.13025 + disable_first_stage_autocast: True + + denoiser_config: + target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser + params: + num_idx: 1000 + + scaling_config: + target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling + discretization_config: + target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization + + network_config: + target: sgm.modules.diffusionmodules.openaimodel.UNetModel + params: + adm_in_channels: 2816 + num_classes: sequential + use_checkpoint: True + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [4, 2] + num_res_blocks: 2 + channel_mult: [1, 2, 4] + num_head_channels: 64 + use_linear_in_transformer: True + transformer_depth: [1, 2, 10] + context_dim: 2048 + spatial_transformer_attn_type: softmax-xformers + + conditioner_config: + target: sgm.modules.GeneralConditioner + params: + emb_models: + - is_trainable: False + input_key: txt + target: sgm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: hidden + layer_idx: 11 + + - is_trainable: False + input_key: txt + target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2 + params: + arch: ViT-bigG-14 + version: laion2b_s39b_b160k + freeze: True + layer: penultimate + always_return_pooled: True + legacy: False + + - is_trainable: False + input_key: original_size_as_tuple + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 + + - is_trainable: False + input_key: crop_coords_top_left + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 + + - is_trainable: False + input_key: target_size_as_tuple + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 + + first_stage_config: + target: sgm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + attn_type: vanilla-xformers + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: [1, 2, 4, 4] + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity diff --git a/iopaint/model/original_sd_configs/sd_xl_refiner.yaml b/iopaint/model/original_sd_configs/sd_xl_refiner.yaml new file mode 100644 index 0000000..2d5ab44 --- /dev/null +++ b/iopaint/model/original_sd_configs/sd_xl_refiner.yaml @@ -0,0 +1,86 @@ +model: + target: sgm.models.diffusion.DiffusionEngine + params: + scale_factor: 0.13025 + disable_first_stage_autocast: True + + denoiser_config: + target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser + params: + num_idx: 1000 + + scaling_config: + target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling + discretization_config: + target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization + + network_config: + target: sgm.modules.diffusionmodules.openaimodel.UNetModel + params: + adm_in_channels: 2560 + num_classes: sequential + use_checkpoint: True + in_channels: 4 + out_channels: 4 + model_channels: 384 + attention_resolutions: [4, 2] + num_res_blocks: 2 + channel_mult: [1, 2, 4, 4] + num_head_channels: 64 + use_linear_in_transformer: True + transformer_depth: 4 + context_dim: [1280, 1280, 1280, 1280] + spatial_transformer_attn_type: softmax-xformers + + conditioner_config: + target: sgm.modules.GeneralConditioner + params: + emb_models: + - is_trainable: False + input_key: txt + target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2 + params: + arch: ViT-bigG-14 + version: laion2b_s39b_b160k + legacy: False + freeze: True + layer: penultimate + always_return_pooled: True + + - is_trainable: False + input_key: original_size_as_tuple + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 + + - is_trainable: False + input_key: crop_coords_top_left + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 + + - is_trainable: False + input_key: aesthetic_score + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 + + first_stage_config: + target: sgm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + attn_type: vanilla-xformers + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: [1, 2, 4, 4] + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity diff --git a/iopaint/model/original_sd_configs/v1-inference.yaml b/iopaint/model/original_sd_configs/v1-inference.yaml new file mode 100644 index 0000000..d4effe5 --- /dev/null +++ b/iopaint/model/original_sd_configs/v1-inference.yaml @@ -0,0 +1,70 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/iopaint/model/original_sd_configs/v2-inference-v.yaml b/iopaint/model/original_sd_configs/v2-inference-v.yaml new file mode 100644 index 0000000..8ec8dfb --- /dev/null +++ b/iopaint/model/original_sd_configs/v2-inference-v.yaml @@ -0,0 +1,68 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + parameterization: "v" + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/iopaint/model/sd.py b/iopaint/model/sd.py index 3e78cad..4f20a41 100644 --- a/iopaint/model/sd.py +++ b/iopaint/model/sd.py @@ -5,6 +5,7 @@ from loguru import logger from .base import DiffusionInpaintModel from .helper.cpu_text_encoder import CPUTextEncoderWrapper +from .original_sd_configs import get_config_files from .utils import ( handle_from_pretrained_exceptions, get_torch_dtype, @@ -51,6 +52,7 @@ class SD(DiffusionInpaintModel): self.model_id_or_path, dtype=torch_dtype, load_safety_checker=not disable_nsfw_checker, + config_files=get_config_files(), **model_kwargs, ) else: diff --git a/iopaint/model/sdxl.py b/iopaint/model/sdxl.py index 716d4ec..2557e71 100644 --- a/iopaint/model/sdxl.py +++ b/iopaint/model/sdxl.py @@ -10,6 +10,7 @@ from iopaint.schema import InpaintRequest, ModelType from .base import DiffusionInpaintModel from .helper.cpu_text_encoder import CPUTextEncoderWrapper +from .original_sd_configs import get_config_files from .utils import ( handle_from_pretrained_exceptions, get_torch_dtype, @@ -41,6 +42,7 @@ class SDXL(DiffusionInpaintModel): dtype=torch_dtype, num_in_channels=num_in_channels, load_safety_checker=False, + config_files=get_config_files() ) else: model_kwargs = { diff --git a/iopaint/tests/test_sd_model.py b/iopaint/tests/test_sd_model.py index c6e4d11..6865e5a 100644 --- a/iopaint/tests/test_sd_model.py +++ b/iopaint/tests/test_sd_model.py @@ -232,9 +232,10 @@ def test_runway_sd_1_5_cpu_offload(device, strategy, sampler): @pytest.mark.parametrize( "name", [ - "sd-v1-5-inpainting.ckpt", "sd-v1-5-inpainting.safetensors", "v1-5-pruned-emaonly.safetensors", + "sd_xl_base_1.0.safetensors", + "sd_xl_base_1.0_inpainting_0.1.safetensors", ], ) def test_local_file_path(device, sampler, name): diff --git a/setup.py b/setup.py index 02dc088..a358072 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,10 @@ package_files = Path("iopaint/web_app").glob("**/*") package_files = [str(it).replace("iopaint/", "") for it in package_files] package_files += ["model/anytext/ocr_recog/ppocr_keys_v1.txt"] package_files += ["model/anytext/anytext_sd15.yaml"] +package_files += ["model/original_sd_configs/sd_xl_base.yaml"] +package_files += ["model/original_sd_configs/sd_xl_refiner.yaml"] +package_files += ["model/original_sd_configs/v1-inference.yaml"] +package_files += ["model/original_sd_configs/v2-inference-v.yaml"] with open("README.md", "r", encoding="utf-8") as fh: diff --git a/web_app/src/components/SidePanel/DiffusionOptions.tsx b/web_app/src/components/SidePanel/DiffusionOptions.tsx index 265e1c7..19d6820 100644 --- a/web_app/src/components/SidePanel/DiffusionOptions.tsx +++ b/web_app/src/components/SidePanel/DiffusionOptions.tsx @@ -734,7 +734,7 @@ const DiffusionOptions = () => {