update

2023-12-24 15:32:27 +08:00 · 2023-12-24 15:32:27 +08:00 · 371db2d771
commit 371db2d771
parent 0e5e16ba20
31 changed files with 441 additions and 439 deletions
--- a/lama_cleaner/init.py
+++ b/lama_cleaner/init.py
@ -10,11 +10,8 @@ from lama_cleaner.parse_args import parse_args
 def entry_point():
    args = parse_args()
    if args is None:
        return
    # To make os.environ["XDG_CACHE_HOME"] = args.model_cache_dir works for diffusers
    # https://github.com/huggingface/diffusers/blob/be99201a567c1ccd841dc16fb24e88f7f239c187/src/diffusers/utils/constants.py#L18
-    from lama_cleaner.server import main
+    from lama_cleaner.server import typer_app
-    main(args)
+    typer_app()
--- a/lama_cleaner/benchmark.py
+++ b/lama_cleaner/benchmark.py
@ -103,6 +103,5 @@ if __name__ == "__main__":
        device=device,
        disable_nsfw=True,
        sd_cpu_textencoder=True,
        hf_access_token="123"
    )
    benchmark(model, args.times, args.empty_cache)
--- a/lama_cleaner/const.py
+++ b/lama_cleaner/const.py
@ -21,19 +21,17 @@ AVAILABLE_MODELS = [
    "zits",
    "mat",
    "fcf",
    "manga",
    "cv2",
    "sd1.5",
    "sdxl",
    "anything4",
    "realisticVision1.4",
    "cv2",
    "manga",
    "sd2",
    "sdxl",
    "paint_by_example",
    "instruct_pix2pix",
    "kandinsky2.2",
    "sdxl",
 ]
 SD15_MODELS = ["sd1.5", "anything4", "realisticVision1.4"]
 DIFFUSERS_MODEL_FP16_REVERSION = [
    "runwayml/stable-diffusion-inpainting",
    "Sanster/anything-4.0-inpainting",
@ -46,26 +44,22 @@ AVAILABLE_DEVICES = ["cuda", "cpu", "mps"]
 DEFAULT_DEVICE = "cuda"
 NO_HALF_HELP = """
-Using full precision model.
+Using full precision(fp32) model.
-If your generate result is always black or green, use this argument. (sd/paint_by_exmaple)
+If your diffusion model generate result is always black or green, use this argument.
 """
 CPU_OFFLOAD_HELP = """
-Offloads all models to CPU, significantly reducing vRAM usage. (sd/paint_by_example)
+Offloads diffusion model's weight to CPU RAM, significantly reducing vRAM usage.
 """
 DISABLE_NSFW_HELP = """
-Disable NSFW checker. (sd/paint_by_example)
+Disable NSFW checker for diffusion model.
 """
-SD_CPU_TEXTENCODER_HELP = """
+CPU_TEXTENCODER_HELP = """
-Run Stable Diffusion text encoder model on CPU to save GPU memory.
+Run diffusion models text encoder on CPU to reduce vRAM usage.
 """
 SD_CONTROLNET_HELP = """
 Run Stable Diffusion normal or inpainting model with ControlNet.
 """
 DEFAULT_SD_CONTROLNET_METHOD = "lllyasviel/control_v11p_sd15_canny"
 SD_CONTROLNET_CHOICES = [
    "lllyasviel/control_v11p_sd15_canny",
    # "lllyasviel/control_v11p_sd15_seg",
@ -74,46 +68,36 @@ SD_CONTROLNET_CHOICES = [
    "lllyasviel/control_v11f1p_sd15_depth",
 ]
 DEFAULT_SD2_CONTROLNET_METHOD = "thibaud/controlnet-sd21-canny-diffusers"
 SD2_CONTROLNET_CHOICES = [
    "thibaud/controlnet-sd21-canny-diffusers",
    "thibaud/controlnet-sd21-depth-diffusers",
    "thibaud/controlnet-sd21-openpose-diffusers",
 ]
 DEFAULT_SDXL_CONTROLNET_METHOD = "diffusers/controlnet-canny-sdxl-1.0"
 SDXL_CONTROLNET_CHOICES = [
    "thibaud/controlnet-openpose-sdxl-1.0",
-    "destitech/controlnet-inpaint-dreamer-sdxl"
+    "destitech/controlnet-inpaint-dreamer-sdxl",
    "diffusers/controlnet-canny-sdxl-1.0",
    "diffusers/controlnet-canny-sdxl-1.0-mid",
-    "diffusers/controlnet-canny-sdxl-1.0-small"
+    "diffusers/controlnet-canny-sdxl-1.0-small",
    "diffusers/controlnet-depth-sdxl-1.0",
    "diffusers/controlnet-depth-sdxl-1.0-mid",
    "diffusers/controlnet-depth-sdxl-1.0-small",
 ]
 SD_LOCAL_MODEL_HELP = """
 Load Stable Diffusion 1.5 model(ckpt/safetensors) from local path.
 """
 LOCAL_FILES_ONLY_HELP = """
-Use local files only, not connect to Hugging Face server. (sd/paint_by_example)
+When loading diffusion models, using local files only, not connect to HuggingFace server.
 """
 ENABLE_XFORMERS_HELP = """
 Enable xFormers optimizations. Requires xformers package has been installed. See: https://github.com/facebookresearch/xformers (sd/paint_by_example)
 """
 DEFAULT_MODEL_DIR = os.getenv(
    "XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache")
 )
-MODEL_DIR_HELP = """
+MODEL_DIR_HELP = f"""
-Model download directory (by setting XDG_CACHE_HOME environment variable), by default model downloaded to ~/.cache
+Model download directory (by setting XDG_CACHE_HOME environment variable), by default model download to {DEFAULT_MODEL_DIR}
 """
 OUTPUT_DIR_HELP = """
-Result images will be saved to output directory automatically without confirmation.
+Result images will be saved to output directory automatically.
 """
 INPUT_HELP = """
@ -125,37 +109,45 @@ GUI_HELP = """
 Launch Lama Cleaner as desktop app
 """
 NO_GUI_AUTO_CLOSE_HELP = """
 Prevent backend auto close after the GUI window closed.
 """
 QUALITY_HELP = """
 Quality of image encoding, 0-100. Default is 95, higher quality will generate larger file size.
 """
-class RealESRGANModelName(str, Enum):
+class Choices(str, Enum):
    @classmethod
    def values(cls):
        return [member.value for member in cls]
 class RealESRGANModel(Choices):
    realesr_general_x4v3 = "realesr-general-x4v3"
    RealESRGAN_x4plus = "RealESRGAN_x4plus"
    RealESRGAN_x4plus_anime_6B = "RealESRGAN_x4plus_anime_6B"
-RealESRGANModelNameList = [e.value for e in RealESRGANModelName]
+class Device(Choices):
    cpu = "cpu"
    cuda = "cuda"
    mps = "mps"
 class InteractiveSegModel(Choices):
    vit_b = "vit_b"
    vit_l = "vit_l"
    vit_h = "vit_h"
    mobile_sam = "mobile_sam"
 INTERACTIVE_SEG_HELP = "Enable interactive segmentation using Segment Anything."
 INTERACTIVE_SEG_MODEL_HELP = "Model size: vit_b < vit_l < vit_h. Bigger model size means better segmentation but slower speed."
 AVAILABLE_INTERACTIVE_SEG_MODELS = ["vit_b", "vit_l", "vit_h", "vit_t"]
 AVAILABLE_INTERACTIVE_SEG_DEVICES = ["cuda", "cpu", "mps"]
 REMOVE_BG_HELP = "Enable remove background. Always run on CPU"
 ANIMESEG_HELP = "Enable anime segmentation. Always run on CPU"
 REALESRGAN_HELP = "Enable realesrgan super resolution"
 REALESRGAN_AVAILABLE_DEVICES = ["cpu", "cuda", "mps"]
 GFPGAN_HELP = (
    "Enable GFPGAN face restore. To enhance background, use with --enable-realesrgan"
 )
 GFPGAN_AVAILABLE_DEVICES = ["cpu", "cuda", "mps"]
 RESTOREFORMER_HELP = "Enable RestoreFormer face restore. To enhance background, use with --enable-realesrgan"
 RESTOREFORMER_AVAILABLE_DEVICES = ["cpu", "cuda", "mps"]
 GIF_HELP = "Enable GIF plugin. Make GIF to compare original and cleaned image"
@ -164,8 +156,6 @@ class Config(BaseModel):
    port: int = 8080
    model: str = DEFAULT_MODEL
    sd_local_model_path: str = None
    sd_controlnet: bool = False
    sd_controlnet_method: str = DEFAULT_SD_CONTROLNET_METHOD
    device: str = DEFAULT_DEVICE
    gui: bool = False
    no_gui_auto_close: bool = False
@ -173,7 +163,6 @@ class Config(BaseModel):
    cpu_offload: bool = False
    disable_nsfw: bool = False
    sd_cpu_textencoder: bool = False
    enable_xformers: bool = False
    local_files_only: bool = False
    model_dir: str = DEFAULT_MODEL_DIR
    input: str = None
@ -186,7 +175,7 @@ class Config(BaseModel):
    enable_anime_seg: bool = False
    enable_realesrgan: bool = False
    realesrgan_device: str = "cpu"
-    realesrgan_model: str = RealESRGANModelName.realesr_general_x4v3.value
+    realesrgan_model: str = RealESRGANModel.realesr_general_x4v3.value
    realesrgan_no_half: bool = False
    enable_gfpgan: bool = False
    gfpgan_device: str = "cpu"
--- a/lama_cleaner/download.py
+++ b/lama_cleaner/download.py
@ -6,6 +6,7 @@ from loguru import logger
 from pathlib import Path
 from lama_cleaner.const import DIFFUSERS_MODEL_FP16_REVERSION, DEFAULT_MODEL_DIR
 from lama_cleaner.runtime import setup_model_dir
 from lama_cleaner.schema import (
    ModelInfo,
    ModelType,
@ -16,16 +17,8 @@ from lama_cleaner.schema import (
 )
-def cli_download_model(model: str, model_dir: str):
+def cli_download_model(model: str, model_dir: Path):
-    if os.path.isfile(model_dir):
+    setup_model_dir(model_dir)
        raise ValueError(f"invalid --model-dir: {model_dir} is a file")
    if not os.path.exists(model_dir):
        logger.info(f"Create model cache directory: {model_dir}")
        Path(model_dir).mkdir(exist_ok=True, parents=True)
    os.environ["XDG_CACHE_HOME"] = model_dir
    from lama_cleaner.model import models
    if model in models:
@ -38,7 +31,7 @@ def cli_download_model(model: str, model_dir: str):
        downloaded_path = DiffusionPipeline.download(
            pretrained_model_name=model,
-            revision="fp16" if model in DIFFUSERS_MODEL_FP16_REVERSION else "main",
+            variant="fp16" if model in DIFFUSERS_MODEL_FP16_REVERSION else "main",
            resume_download=True,
        )
        logger.info(f"Done. Downloaded to {downloaded_path}")
@ -101,7 +94,7 @@ def scan_inpaint_models() -> List[ModelInfo]:
    from lama_cleaner.model import models
    for name, m in models.items():
-        if m.is_erase_model:
+        if m.is_erase_model and m.is_downloaded():
            res.append(
                ModelInfo(
                    name=name,
--- a/lama_cleaner/model/base.py
+++ b/lama_cleaner/model/base.py
@ -41,7 +41,7 @@ class InpaintModel:
    @staticmethod
    @abc.abstractmethod
    def is_downloaded() -> bool:
-        ...
+        return False
    @abc.abstractmethod
    def forward(self, image, mask, config: Config):
@ -67,6 +67,8 @@ class InpaintModel:
        logger.info(f"final forward pad size: {pad_image.shape}")
        image, mask = self.forward_pre_process(image, mask, config)
        result = self.forward(pad_image, pad_mask, config)
        result = result[0:origin_height, 0:origin_width, :]
@ -77,6 +79,9 @@ class InpaintModel:
            result = result * (mask / 255) + image[:, :, ::-1] * (1 - (mask / 255))
        return result
    def forward_pre_process(self, image, mask, config):
        return image, mask
    def forward_post_process(self, result, image, mask, config):
        return result, image, mask
@ -400,6 +405,13 @@ class DiffusionInpaintModel(InpaintModel):
        scheduler = get_scheduler(sd_sampler, scheduler_config)
        self.model.scheduler = scheduler
    def forward_pre_process(self, image, mask, config):
        if config.sd_mask_blur != 0:
            k = 2 * config.sd_mask_blur + 1
            mask = cv2.GaussianBlur(mask, (k, k), 0)[:, :, np.newaxis]
        return image, mask
    def forward_post_process(self, result, image, mask, config):
        if config.sd_match_histograms:
            result = self._match_histograms(result, image[:, :, ::-1], mask)
--- a/lama_cleaner/model/controlnet.py
+++ b/lama_cleaner/model/controlnet.py
@ -17,14 +17,6 @@ from lama_cleaner.model.helper.cpu_text_encoder import CPUTextEncoderWrapper
 from lama_cleaner.model.utils import get_scheduler
 from lama_cleaner.schema import Config, ModelInfo, ModelType
 # 为了兼容性
 controlnet_name_map = {
    "control_v11p_sd15_canny": "lllyasviel/control_v11p_sd15_canny",
    "control_v11p_sd15_openpose": "lllyasviel/control_v11p_sd15_openpose",
    "control_v11p_sd15_inpaint": "lllyasviel/control_v11p_sd15_inpaint",
    "control_v11f1p_sd15_depth": "lllyasviel/control_v11f1p_sd15_depth",
 }
 class ControlNet(DiffusionInpaintModel):
    name = "controlnet"
@ -49,9 +41,6 @@ class ControlNet(DiffusionInpaintModel):
        fp16 = not kwargs.get("no_half", False)
        model_info: ModelInfo = kwargs["model_info"]
        sd_controlnet_method = kwargs["sd_controlnet_method"]
        sd_controlnet_method = controlnet_name_map.get(
            sd_controlnet_method, sd_controlnet_method
        )
        self.model_info = model_info
        self.sd_controlnet_method = sd_controlnet_method
@ -113,12 +102,6 @@ class ControlNet(DiffusionInpaintModel):
                **model_kwargs,
            )
        # https://huggingface.co/docs/diffusers/v0.7.0/en/api/pipelines/stable_diffusion#diffusers.StableDiffusionInpaintPipeline.enable_attention_slicing
        self.model.enable_attention_slicing()
        # https://huggingface.co/docs/diffusers/v0.7.0/en/optimization/fp16#memory-efficient-attention
        if kwargs.get("enable_xformers", False):
            self.model.enable_xformers_memory_efficient_attention()
        if kwargs.get("cpu_offload", False) and use_gpu:
            logger.info("Enable sequential cpu offload")
            self.model.enable_sequential_cpu_offload(gpu_id=0)
@ -162,10 +145,6 @@ class ControlNet(DiffusionInpaintModel):
        scheduler = get_scheduler(config.sd_sampler, scheduler_config)
        self.model.scheduler = scheduler
        if config.sd_mask_blur != 0:
            k = 2 * config.sd_mask_blur + 1
            mask = cv2.GaussianBlur(mask, (k, k), 0)[:, :, np.newaxis]
        img_h, img_w = image.shape[:2]
        control_image = self._get_control_image(image, mask)
        mask_image = PIL.Image.fromarray(mask[:, :, -1], mode="L")
@ -190,8 +169,3 @@ class ControlNet(DiffusionInpaintModel):
        output = (output * 255).round().astype("uint8")
        output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
        return output
    @staticmethod
    def is_downloaded() -> bool:
        # model will be downloaded when app start, and can't switch in frontend settings
        return True
--- a/lama_cleaner/model/instruct_pix2pix.py
+++ b/lama_cleaner/model/instruct_pix2pix.py
@ -31,30 +31,15 @@ class InstructPix2Pix(DiffusionInpaintModel):
        use_gpu = device == torch.device("cuda") and torch.cuda.is_available()
        torch_dtype = torch.float16 if use_gpu and fp16 else torch.float32
        self.model = StableDiffusionInstructPix2PixPipeline.from_pretrained(
-            "timbrooks/instruct-pix2pix",
+            self.name, variant="fp16", torch_dtype=torch_dtype, **model_kwargs
            revision="fp16" if use_gpu and fp16 else "main",
            torch_dtype=torch_dtype,
            **model_kwargs
        )
        self.model.enable_attention_slicing()
        if kwargs.get("enable_xformers", False):
            self.model.enable_xformers_memory_efficient_attention()
        if kwargs.get("cpu_offload", False) and use_gpu:
            logger.info("Enable sequential cpu offload")
            self.model.enable_sequential_cpu_offload(gpu_id=0)
        else:
            self.model = self.model.to(device)
    @staticmethod
    def download():
        from diffusers import StableDiffusionInstructPix2PixPipeline
        StableDiffusionInstructPix2PixPipeline.from_pretrained(
            "timbrooks/instruct-pix2pix", revision="fp16"
        )
    def forward(self, image, mask, config: Config):
        """Input image and output image have same size
        image: [H, W, C] RGB
@ -76,8 +61,3 @@ class InstructPix2Pix(DiffusionInpaintModel):
        output = (output * 255).round().astype("uint8")
        output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
        return output
    @staticmethod
    def is_downloaded() -> bool:
        # model will be downloaded when app start, and can't switch in frontend settings
        return True
--- a/lama_cleaner/model/kandinsky.py
+++ b/lama_cleaner/model/kandinsky.py
@ -24,7 +24,7 @@ class Kandinsky(DiffusionInpaintModel):
        }
        self.model = AutoPipelineForInpainting.from_pretrained(
-            self.model_name, **model_kwargs
+            self.model_id_or_path, **model_kwargs
        ).to(device)
        self.callback = kwargs.pop("callback", None)
@ -40,9 +40,6 @@ class Kandinsky(DiffusionInpaintModel):
        self.model.scheduler = scheduler
        generator = torch.manual_seed(config.sd_seed)
        if config.sd_mask_blur != 0:
            k = 2 * config.sd_mask_blur + 1
            mask = cv2.GaussianBlur(mask, (k, k), 0)[:, :, np.newaxis]
        mask = mask.astype(np.float32) / 255
        img_h, img_w = image.shape[:2]
@ -66,20 +63,7 @@ class Kandinsky(DiffusionInpaintModel):
        output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
        return output
    @staticmethod
    def is_downloaded() -> bool:
        # model will be downloaded when app start, and can't switch in frontend settings
        return True
 class Kandinsky22(Kandinsky):
-    name = "kandinsky-community/kandinsky-2-2-decoder-inpaint"
+    name = "kandinsky2.2"
-    model_name = "kandinsky-community/kandinsky-2-2-decoder-inpaint"
+    model_id_or_path = "kandinsky-community/kandinsky-2-2-decoder-inpaint"
    @staticmethod
    def download():
        from diffusers import AutoPipelineForInpainting
        AutoPipelineForInpainting.from_pretrained(
            "kandinsky-community/kandinsky-2-2-decoder-inpaint"
        )
--- a/lama_cleaner/model/paint_by_example.py
+++ b/lama_cleaner/model/paint_by_example.py
@ -31,10 +31,6 @@ class PaintByExample(DiffusionInpaintModel):
            "Fantasy-Studio/Paint-by-Example", torch_dtype=torch_dtype, **model_kwargs
        )
        self.model.enable_attention_slicing()
        if kwargs.get("enable_xformers", False):
            self.model.enable_xformers_memory_efficient_attention()
        # TODO: gpu_id
        if kwargs.get("cpu_offload", False) and use_gpu:
            self.model.image_encoder = self.model.image_encoder.to(device)
@ -68,8 +64,3 @@ class PaintByExample(DiffusionInpaintModel):
        output = (output * 255).round().astype("uint8")
        output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
        return output
    @staticmethod
    def is_downloaded() -> bool:
        # model will be downloaded when app start, and can't switch in frontend settings
        return True
--- a/lama_cleaner/model/sd.py
+++ b/lama_cleaner/model/sd.py
@ -1,8 +1,5 @@
 import os
 import PIL.Image
 import cv2
 import numpy as np
 import torch
 from loguru import logger
@ -49,23 +46,12 @@ class SD(DiffusionInpaintModel):
            self.model = StableDiffusionInpaintPipeline.from_pretrained(
                self.model_id_or_path,
                revision="fp16"
-                if (
+                if self.model_id_or_path in DIFFUSERS_MODEL_FP16_REVERSION
                    self.model_id_or_path in DIFFUSERS_MODEL_FP16_REVERSION
                    and use_gpu
                    and fp16
                )
                else "main",
                torch_dtype=torch_dtype,
                use_auth_token=kwargs["hf_access_token"],
                **model_kwargs,
            )
        # https://huggingface.co/docs/diffusers/v0.7.0/en/api/pipelines/stable_diffusion#diffusers.StableDiffusionInpaintPipeline.enable_attention_slicing
        self.model.enable_attention_slicing()
        # https://huggingface.co/docs/diffusers/v0.7.0/en/optimization/fp16#memory-efficient-attention
        if kwargs.get("enable_xformers", False):
            self.model.enable_xformers_memory_efficient_attention()
        if kwargs.get("cpu_offload", False) and use_gpu:
            # TODO: gpu_id
            logger.info("Enable sequential cpu offload")
@ -88,10 +74,6 @@ class SD(DiffusionInpaintModel):
        """
        self.set_scheduler(config)
        if config.sd_mask_blur != 0:
            k = 2 * config.sd_mask_blur + 1
            mask = cv2.GaussianBlur(mask, (k, k), 0)[:, :, np.newaxis]
        img_h, img_w = image.shape[:2]
        output = self.model(
@ -114,17 +96,6 @@ class SD(DiffusionInpaintModel):
        output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
        return output
    @staticmethod
    def is_downloaded() -> bool:
        # model will be downloaded when app start, and can't switch in frontend settings
        return True
    @classmethod
    def download(cls):
        from diffusers import StableDiffusionInpaintPipeline
        StableDiffusionInpaintPipeline.from_pretrained(cls.model_id_or_path)
 class SD15(SD):
    name = "sd1.5"
--- a/lama_cleaner/model/sdxl.py
+++ b/lama_cleaner/model/sdxl.py
@ -45,16 +45,9 @@ class SDXL(DiffusionInpaintModel):
                self.model_id_or_path,
                revision="main",
                torch_dtype=torch_dtype,
                use_auth_token=kwargs["hf_access_token"],
                vae=vae,
            )
        # https://huggingface.co/docs/diffusers/v0.7.0/en/api/pipelines/stable_diffusion#diffusers.StableDiffusionInpaintPipeline.enable_attention_slicing
        self.model.enable_attention_slicing()
        # https://huggingface.co/docs/diffusers/v0.7.0/en/optimization/fp16#memory-efficient-attention
        if kwargs.get("enable_xformers", False):
            self.model.enable_xformers_memory_efficient_attention()
        if kwargs.get("cpu_offload", False) and use_gpu:
            logger.info("Enable sequential cpu offload")
            self.model.enable_sequential_cpu_offload(gpu_id=0)
@ -65,14 +58,6 @@ class SDXL(DiffusionInpaintModel):
        self.callback = kwargs.pop("callback", None)
    @staticmethod
    def download():
        from diffusers import AutoPipelineForInpainting
        AutoPipelineForInpainting.from_pretrained(
            "diffusers/stable-diffusion-xl-1.0-inpainting-0.1"
        )
    def forward(self, image, mask, config: Config):
        """Input image and output image have same size
        image: [H, W, C] RGB
@ -81,10 +66,6 @@ class SDXL(DiffusionInpaintModel):
        """
        self.set_scheduler(config)
        if config.sd_mask_blur != 0:
            k = 2 * config.sd_mask_blur + 1
            mask = cv2.GaussianBlur(mask, (k, k), 0)[:, :, np.newaxis]
        img_h, img_w = image.shape[:2]
        output = self.model(
@ -106,8 +87,3 @@ class SDXL(DiffusionInpaintModel):
        output = (output * 255).round().astype("uint8")
        output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
        return output
    @staticmethod
    def is_downloaded() -> bool:
        # model will be downloaded when app start, and can't switch in frontend settings
        return True
--- a/lama_cleaner/model_manager.py
+++ b/lama_cleaner/model_manager.py
@ -3,7 +3,6 @@ from typing import List, Dict
 import torch
 from loguru import logger
 from lama_cleaner.const import DEFAULT_SD_CONTROLNET_METHOD
 from lama_cleaner.download import scan_models
 from lama_cleaner.helper import switch_mps_device
 from lama_cleaner.model import models, ControlNet, SD, SDXL
@ -19,16 +18,25 @@ class ModelManager:
        self.available_models: Dict[str, ModelInfo] = {}
        self.scan_models()
-        self.sd_controlnet = kwargs.get("sd_controlnet", False)
+        self.sd_controlnet = False
-        self.sd_controlnet_method = kwargs.get(
+        self.sd_controlnet_method = ""
            "sd_controlnet_method", DEFAULT_SD_CONTROLNET_METHOD
        )
        self.model = self.init_model(name, device, **kwargs)
-    def init_model(self, name: str, device, **kwargs):
+    def _map_old_name(self, name: str) -> str:
        for old_name, model_cls in models.items():
            if name == old_name and hasattr(model_cls, "model_id_or_path"):
                name = model_cls.model_id_or_path
                break
        return name
    @property
    def current_model(self) -> Dict:
        name = self._map_old_name(self.name)
        return self.available_models[name].model_dump()
    def init_model(self, name: str, device, **kwargs):
        name = self._map_old_name(name)
        logger.info(f"Loading model: {name}")
        if name not in self.available_models:
            raise NotImplementedError(f"Unsupported model: {name}")
@ -86,6 +94,7 @@ class ModelManager:
        ):
            self.sd_controlnet_method = self.available_models[new_name].controlnets[0]
        try:
            # TODO: enable/disable controlnet without reload model
            del self.model
            torch_gc()
--- a/lama_cleaner/parse_args.py
+++ b/lama_cleaner/parse_args.py
@ -55,7 +55,7 @@ def parse_args():
    parser.add_argument("--cpu-offload", action="store_true", help=CPU_OFFLOAD_HELP)
    parser.add_argument("--disable-nsfw", action="store_true", help=DISABLE_NSFW_HELP)
    parser.add_argument(
-        "--sd-cpu-textencoder", action="store_true", help=SD_CPU_TEXTENCODER_HELP
+        "--sd-cpu-textencoder", action="store_true", help=CPU_TEXTENCODER_HELP
    )
    parser.add_argument("--sd-controlnet", action="store_true", help=SD_CONTROLNET_HELP)
    parser.add_argument(
@ -66,16 +66,10 @@ def parse_args():
    parser.add_argument(
        "--local-files-only", action="store_true", help=LOCAL_FILES_ONLY_HELP
    )
    parser.add_argument(
        "--enable-xformers", action="store_true", help=ENABLE_XFORMERS_HELP
    )
    parser.add_argument(
        "--device", default=DEFAULT_DEVICE, type=str, choices=AVAILABLE_DEVICES
    )
    parser.add_argument("--gui", action="store_true", help=GUI_HELP)
    parser.add_argument(
        "--no-gui-auto-close", action="store_true", help=NO_GUI_AUTO_CLOSE_HELP
    )
    parser.add_argument(
        "--gui-size",
        default=[1600, 1000],
--- a/lama_cleaner/plugins/interactive_seg.py
+++ b/lama_cleaner/plugins/interactive_seg.py
@ -22,7 +22,7 @@ SEGMENT_ANYTHING_MODELS = {
        "url": "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
        "md5": "4b8939a88964f0f4ff5f5b2642c598a6",
    },
-    "vit_t": {
+    "mobile_sam": {
        "url": "https://github.com/Sanster/models/releases/download/MobileSAM/mobile_sam.pt",
        "md5": "f3c0d8cda613564d499310dab6c812cd",
    },
--- a/lama_cleaner/plugins/realesrgan.py
+++ b/lama_cleaner/plugins/realesrgan.py
@ -3,7 +3,7 @@ from enum import Enum
 import cv2
 from loguru import logger
-from lama_cleaner.const import RealESRGANModelName
+from lama_cleaner.const import RealESRGANModel
 from lama_cleaner.helper import download_model
 from lama_cleaner.plugins.base_plugin import BasePlugin
@ -18,7 +18,7 @@ class RealESRGANUpscaler(BasePlugin):
        from realesrgan.archs.srvgg_arch import SRVGGNetCompact
        REAL_ESRGAN_MODELS = {
-            RealESRGANModelName.realesr_general_x4v3: {
+            RealESRGANModel.realesr_general_x4v3: {
                "url": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth",
                "scale": 4,
                "model": lambda: SRVGGNetCompact(
@ -31,7 +31,7 @@ class RealESRGANUpscaler(BasePlugin):
                ),
                "model_md5": "91a7644643c884ee00737db24e478156",
            },
-            RealESRGANModelName.RealESRGAN_x4plus: {
+            RealESRGANModel.RealESRGAN_x4plus: {
                "url": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
                "scale": 4,
                "model": lambda: RRDBNet(
@ -44,7 +44,7 @@ class RealESRGANUpscaler(BasePlugin):
                ),
                "model_md5": "99ec365d4afad750833258a1a24f44ca",
            },
-            RealESRGANModelName.RealESRGAN_x4plus_anime_6B: {
+            RealESRGANModel.RealESRGAN_x4plus_anime_6B: {
                "url": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth",
                "scale": 4,
                "model": lambda: RRDBNet(
--- a/lama_cleaner/plugins/segment_anything/build_sam.py
+++ b/lama_cleaner/plugins/segment_anything/build_sam.py
@ -109,7 +109,7 @@ sam_model_registry = {
    "vit_h": build_sam,
    "vit_l": build_sam_vit_l,
    "vit_b": build_sam_vit_b,
-    "vit_t": build_sam_vit_t,
+    "mobile_sam": build_sam_vit_t,
 }
--- a/lama_cleaner/runtime.py
+++ b/lama_cleaner/runtime.py
@ -1,10 +1,16 @@
 # https://github.com/huggingface/huggingface_hub/blob/5a12851f54bf614be39614034ed3a9031922d297/src/huggingface_hub/utils/_runtime.py
 import os
 import platform
 import sys
 from pathlib import Path
 import packaging.version
 from loguru import logger
 from rich import print
 from typing import Dict, Any
 from lama_cleaner.const import Device
 _PY_VERSION: str = sys.version.split()[0].rstrip("+")
 if packaging.version.Version(_PY_VERSION) < packaging.version.Version("3.8.0"):
@ -21,7 +27,6 @@ _CANDIDATES = [
    "diffusers",
    "transformers",
    "opencv-python",
    "xformers",
    "accelerate",
    "lama-cleaner",
    "rembg",
@ -38,7 +43,7 @@ for name in _CANDIDATES:
 def dump_environment_info() -> Dict[str, str]:
-    """Dump information about the machine to help debugging issues. """
+    """Dump information about the machine to help debugging issues."""
    # Generic machine info
    info: Dict[str, Any] = {
@ -48,3 +53,34 @@ def dump_environment_info() -> Dict[str, str]:
    info.update(_package_versions)
    print("\n".join([f"- {prop}: {val}" for prop, val in info.items()]) + "\n")
    return info
 def check_device(device: Device) -> Device:
    if device == Device.cuda:
        import platform
        if platform.system() == "Darwin":
            logger.warning("MacOS does not support cuda, use cpu instead")
            return Device.cpu
        else:
            import torch
            if not torch.cuda.is_available():
                logger.warning("CUDA is not available, use cpu instead")
                return Device.cpu
    elif device == Device.mps:
        import torch
        if not torch.backends.mps.is_available():
            logger.warning("mps is not available, use cpu instead")
            return Device.cpu
    return device
 def setup_model_dir(model_dir: Path):
    model_dir = model_dir.expanduser().absolute()
    os.environ["U2NET_HOME"] = str(model_dir)
    os.environ["XDG_CACHE_HOME"] = str(model_dir)
    if not model_dir.exists():
        logger.info(f"Create model directory: {model_dir}")
        model_dir.mkdir(exist_ok=True, parents=True)
--- a/lama_cleaner/server.py
+++ b/lama_cleaner/server.py
@ -1,10 +1,18 @@
 #!/usr/bin/env python3
 import json
 import os
-import hashlib
+
-import traceback
+import typer
 from typer import Option
 from lama_cleaner.download import cli_download_model, scan_models
 from lama_cleaner.runtime import setup_model_dir, dump_environment_info, check_device
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
 import hashlib
 import traceback
 from dataclasses import dataclass
 import imghdr
 import io
@ -20,12 +28,7 @@ import torch
 from PIL import Image
 from loguru import logger
-from lama_cleaner.const import (
+from lama_cleaner.const import *
    SD15_MODELS,
    SD_CONTROLNET_CHOICES,
    SDXL_CONTROLNET_CHOICES,
    SD2_CONTROLNET_CHOICES,
 )
 from lama_cleaner.file_manager import FileManager
 from lama_cleaner.model.utils import torch_gc
 from lama_cleaner.model_manager import ModelManager
@ -39,6 +42,8 @@ from lama_cleaner.plugins import (
 )
 from lama_cleaner.schema import Config
 typer_app = typer.Typer(pretty_exceptions_show_locals=False, add_completion=False)
 try:
    torch._C._jit_override_can_fuse_on_cpu(False)
    torch._C._jit_override_can_fuse_on_gpu(False)
@ -103,23 +108,34 @@ logging.getLogger("werkzeug").addFilter(NoFlaskwebgui())
 app = Flask(__name__, static_folder=os.path.join(BUILD_DIR, "static"))
 app.config["JSON_AS_ASCII"] = False
-CORS(app, expose_headers=["Content-Disposition", "X-seed"])
+CORS(app, expose_headers=["Content-Disposition", "X-seed", "X-Height", "X-Width"])
 sio_logger = logging.getLogger("sio-logger")
 sio_logger.setLevel(logging.ERROR)
 socketio = SocketIO(app, cors_allowed_origins="*", async_mode="threading")
-model: ModelManager = None
+
-thumb: FileManager = None
+@dataclass
-output_dir: str = None
+class GlobalConfig:
-device = None
+    model_manager: ModelManager = None
-input_image_path: str = None
+    file_manager: FileManager = None
-is_disable_model_switch: bool = False
+    output_dir: Path = None
-enable_file_manager: bool = False
+    input_image_path: Path = None
-enable_auto_saving: bool = False
+    disable_model_switch: bool = False
-is_desktop: bool = False
+    is_desktop: bool = False
-image_quality: int = 95
+    image_quality: int = 95
-plugins = {}
+    plugins = {}
    @property
    def enable_auto_saving(self) -> bool:
        return self.output_dir is not None
    @property
    def enable_file_manager(self) -> bool:
        return self.file_manager is not None
 global_config = GlobalConfig()
 def get_image_ext(img_bytes):
@ -135,7 +151,7 @@ def diffuser_callback(i, t, latents):
@app.route("/save_image", methods=["POST"])
 def save_image():
-    if output_dir is None:
+    if global_config.output_dir is None:
        return "--output-dir is None", 500
    input = request.files
@ -143,7 +159,7 @@ def save_image():
    origin_image_bytes = input["image"].read()  # RGB
    ext = get_image_ext(origin_image_bytes)
    image, alpha_channel, exif_infos = load_img(origin_image_bytes, return_exif=True)
-    save_path = os.path.join(output_dir, filename)
+    save_path = str(global_config.output_dir / filename)
    if alpha_channel is not None:
        if alpha_channel.shape[:2] != image.shape[:2]:
@ -157,7 +173,7 @@ def save_image():
    img_bytes = pil_to_bytes(
        pil_image,
        ext,
-        quality=image_quality,
+        quality=global_config.image_quality,
        exif_infos=exif_infos,
    )
    with open(save_path, "wb") as fw:
@ -169,9 +185,11 @@ def save_image():
@app.route("/medias/<tab>")
 def medias(tab):
    if tab == "image":
-        response = make_response(jsonify(thumb.media_names), 200)
+        response = make_response(jsonify(global_config.file_manager.media_names), 200)
    else:
-        response = make_response(jsonify(thumb.output_media_names), 200)
+        response = make_response(
            jsonify(global_config.file_manager.output_media_names), 200
        )
    # response.last_modified = thumb.modified_time[tab]
    # response.cache_control.no_cache = True
    # response.cache_control.max_age = 0
@ -182,8 +200,8 @@ def medias(tab):
@app.route("/media/<tab>/<filename>")
 def media_file(tab, filename):
    if tab == "image":
-        return send_from_directory(thumb.root_directory, filename)
+        return send_from_directory(global_config.file_manager.root_directory, filename)
-    return send_from_directory(thumb.output_dir, filename)
+    return send_from_directory(global_config.file_manager.output_dir, filename)
@app.route("/media_thumbnail/<tab>/<filename>")
@ -198,10 +216,10 @@ def media_thumbnail_file(tab, filename):
    if height:
        height = int(float(height))
-    directory = thumb.root_directory
+    directory = global_config.file_manager.root_directory
    if tab == "output":
-        directory = thumb.output_dir
+        directory = global_config.file_manager.output_dir
-    thumb_filename, (width, height) = thumb.get_thumbnail(
+    thumb_filename, (width, height) = global_config.file_manager.get_thumbnail(
        directory, filename, width, height
    )
    thumb_filepath = f"{app.config['THUMBNAIL_MEDIA_THUMBNAIL_ROOT']}{thumb_filename}"
@ -257,13 +275,11 @@ def process():
        croper_y=form["croperY"],
        croper_height=form["croperHeight"],
        croper_width=form["croperWidth"],
        use_extender=form["useExtender"],
        extender_x=form["extenderX"],
        extender_y=form["extenderY"],
        extender_height=form["extenderHeight"],
        extender_width=form["extenderWidth"],
        sd_scale=form["sdScale"],
        sd_mask_blur=form["sdMaskBlur"],
        sd_strength=form["sdStrength"],
@ -294,7 +310,7 @@ def process():
    start = time.time()
    try:
-        res_np_img = model(image, mask, config)
+        res_np_img = global_config.model_manager(image, mask, config)
    except RuntimeError as e:
        if "CUDA out of memory. " in str(e):
            # NOTE: the string may change?
@ -322,7 +338,7 @@ def process():
        pil_to_bytes(
            Image.fromarray(res_np_img),
            ext,
-            quality=image_quality,
+            quality=global_config.image_quality,
            exif_infos=exif_infos,
        )
    )
@ -345,7 +361,7 @@ def run_plugin():
    form = request.form
    files = request.files
    name = form["name"]
-    if name not in plugins:
+    if name not in global_config.plugins:
        return "Plugin not found", 500
    origin_image_bytes = files["image"].read()  # RGB
@ -359,7 +375,7 @@ def run_plugin():
        if name == InteractiveSeg.name:
            img_md5 = hashlib.md5(origin_image_bytes).hexdigest()
            form["img_md5"] = img_md5
-        bgr_res = plugins[name](rgb_np_img, files, form)
+        bgr_res = global_config.plugins[name](rgb_np_img, files, form)
    except RuntimeError as e:
        torch.cuda.empty_cache()
        if "CUDA out of memory. " in str(e):
@ -401,7 +417,7 @@ def run_plugin():
                pil_to_bytes(
                    Image.fromarray(rgb_res),
                    ext,
-                    quality=image_quality,
+                    quality=global_config.image_quality,
                    exif_infos=exif_infos,
                )
            ),
@ -414,41 +430,40 @@ def run_plugin():
@app.route("/server_config", methods=["GET"])
 def get_server_config():
    return {
-        "plugins": list(plugins.keys()),
+        "plugins": list(global_config.plugins.keys()),
-        "enableFileManager": enable_file_manager,
+        "enableFileManager": global_config.enable_file_manager,
-        "enableAutoSaving": enable_auto_saving,
+        "enableAutoSaving": global_config.enable_auto_saving,
-        "enableControlnet": model.sd_controlnet,
+        "enableControlnet": global_config.model_manager.sd_controlnet,
-        "controlnetMethod": model.sd_controlnet_method,
+        "controlnetMethod": global_config.model_manager.sd_controlnet_method,
-        "disableModelSwitch": is_disable_model_switch,
+        "disableModelSwitch": global_config.disable_model_switch,
        "isDesktop": global_config.is_desktop,
    }, 200
@app.route("/models", methods=["GET"])
 def get_models():
-    return [it.model_dump() for it in model.scan_models()]
+    return [it.model_dump() for it in global_config.model_manager.scan_models()]
@app.route("/model")
 def current_model():
-    return model.available_models[model.name].model_dump(), 200
+    return (
-
+        global_config.model_manager.current_model,
-
+        200,
-@app.route("/is_desktop")
+    )
 def get_is_desktop():
    return str(is_desktop), 200
@app.route("/model", methods=["POST"])
 def switch_model():
-    if is_disable_model_switch:
+    if global_config.disable_model_switch:
        return "Switch model is disabled", 400
    new_name = request.form.get("name")
-    if new_name == model.name:
+    if new_name == global_config.model_manager.name:
        return "Same model", 200
    try:
-        model.switch(new_name)
+        global_config.model_manager.switch(new_name)
    except Exception as e:
        traceback.print_exc()
        error_message = f"{type(e).__name__} - {str(e)}"
@ -464,160 +479,230 @@ def index():
@app.route("/inputimage")
 def get_cli_input_image():
-    if input_image_path:
+    if global_config.input_image_path:
-        with open(input_image_path, "rb") as f:
+        with open(global_config.input_image_path, "rb") as f:
            image_in_bytes = f.read()
        return send_file(
-            input_image_path,
+            global_config.input_image_path,
            as_attachment=True,
-            download_name=Path(input_image_path).name,
+            download_name=Path(global_config.input_image_path).name,
            mimetype=f"image/{get_image_ext(image_in_bytes)}",
        )
    else:
        return "No Input Image"
-def build_plugins(args):
+def build_plugins(
-    global plugins
+    enable_interactive_seg: bool,
-    if args.enable_interactive_seg:
+    interactive_seg_model: InteractiveSegModel,
    interactive_seg_device: Device,
    enable_remove_bg: bool,
    enable_anime_seg: bool,
    enable_realesrgan: bool,
    realesrgan_device: Device,
    realesrgan_model: str,
    enable_gfpgan: bool,
    gfpgan_device: Device,
    enable_restoreformer: bool,
    restoreformer_device: Device,
    no_half: bool,
 ):
    if enable_interactive_seg:
        logger.info(f"Initialize {InteractiveSeg.name} plugin")
-        plugins[InteractiveSeg.name] = InteractiveSeg(
+        global_config.plugins[InteractiveSeg.name] = InteractiveSeg(
-            args.interactive_seg_model, args.interactive_seg_device
+            interactive_seg_model, interactive_seg_device
        )
-    if args.enable_remove_bg:
+    if enable_remove_bg:
        logger.info(f"Initialize {RemoveBG.name} plugin")
-        plugins[RemoveBG.name] = RemoveBG()
+        global_config.plugins[RemoveBG.name] = RemoveBG()
-    if args.enable_anime_seg:
+    if enable_anime_seg:
        logger.info(f"Initialize {AnimeSeg.name} plugin")
-        plugins[AnimeSeg.name] = AnimeSeg()
+        global_config.plugins[AnimeSeg.name] = AnimeSeg()
-    if args.enable_realesrgan:
+    if enable_realesrgan:
        logger.info(
-            f"Initialize {RealESRGANUpscaler.name} plugin: {args.realesrgan_model}, {args.realesrgan_device}"
+            f"Initialize {RealESRGANUpscaler.name} plugin: {realesrgan_model}, {realesrgan_device}"
        )
-        plugins[RealESRGANUpscaler.name] = RealESRGANUpscaler(
+        global_config.plugins[RealESRGANUpscaler.name] = RealESRGANUpscaler(
-            args.realesrgan_model,
+            realesrgan_model,
-            args.realesrgan_device,
+            realesrgan_device,
-            no_half=args.realesrgan_no_half,
+            no_half=no_half,
        )
-    if args.enable_gfpgan:
+    if enable_gfpgan:
        logger.info(f"Initialize {GFPGANPlugin.name} plugin")
-        if args.enable_realesrgan:
+        if enable_realesrgan:
            logger.info("Use realesrgan as GFPGAN background upscaler")
        else:
            logger.info(
                f"GFPGAN no background upscaler, use --enable-realesrgan to enable it"
            )
-        plugins[GFPGANPlugin.name] = GFPGANPlugin(
+        global_config.plugins[GFPGANPlugin.name] = GFPGANPlugin(
-            args.gfpgan_device, upscaler=plugins.get(RealESRGANUpscaler.name, None)
+            gfpgan_device,
            upscaler=global_config.plugins.get(RealESRGANUpscaler.name, None),
        )
-    if args.enable_restoreformer:
+    if enable_restoreformer:
        logger.info(f"Initialize {RestoreFormerPlugin.name} plugin")
-        plugins[RestoreFormerPlugin.name] = RestoreFormerPlugin(
+        global_config.plugins[RestoreFormerPlugin.name] = RestoreFormerPlugin(
-            args.restoreformer_device,
+            restoreformer_device,
-            upscaler=plugins.get(RealESRGANUpscaler.name, None),
+            upscaler=global_config.plugins.get(RealESRGANUpscaler.name, None),
        )
 def main(args):
    global model
    global device
    global input_image_path
    global is_disable_model_switch
    global enable_file_manager
    global is_desktop
    global thumb
    global output_dir
    global image_quality
    global enable_auto_saving
-    build_plugins(args)
+@typer_app.command(help="Install all plugins dependencies")
 def install_plugins_packages():
    from lama_cleaner.installer import install_plugins_package
-    image_quality = args.quality
+    install_plugins_package()
-    output_dir = args.output_dir
+
@typer_app.command(help="Download SD/SDXL normal/inpainting model from HuggingFace")
 def download(
    model: str = Option(
        ..., help="Model id on HuggingFace e.g: runwayml/stable-diffusion-inpainting"
    ),
    model_dir: Path = Option(DEFAULT_MODEL_DIR, help=MODEL_DIR_HELP, file_okay=False),
 ):
    cli_download_model(model, model_dir)
@typer_app.command(help="List downloaded models")
 def list_model(
    model_dir: Path = Option(DEFAULT_MODEL_DIR, help=MODEL_DIR_HELP, file_okay=False),
 ):
    setup_model_dir(model_dir)
    scanned_models = scan_models()
    for it in scanned_models:
        print(it.name)
@typer_app.command(help="Start lama cleaner server")
 def start(
    host: str = Option("127.0.0.1"),
    port: int = Option(8080),
    model: str = Option(
        DEFAULT_MODEL,
        help=f"Available models: [{', '.join(AVAILABLE_MODELS)}]. "
        f"You can use download command to download other SD/SDXL normal/inpainting models on huggingface",
    ),
    model_dir: Path = Option(
        DEFAULT_MODEL_DIR, help=MODEL_DIR_HELP, dir_okay=True, file_okay=False
    ),
    no_half: bool = Option(False, help=NO_HALF_HELP),
    cpu_offload: bool = Option(False, help=CPU_OFFLOAD_HELP),
    disable_nsfw_checker: bool = Option(False, help=DISABLE_NSFW_HELP),
    cpu_textencoder: bool = Option(False, help=CPU_TEXTENCODER_HELP),
    local_files_only: bool = Option(False, help=LOCAL_FILES_ONLY_HELP),
    device: Device = Option(Device.cpu),
    gui: bool = Option(False, help=GUI_HELP),
    disable_model_switch: bool = Option(False),
    input: Path = Option(None, help=INPUT_HELP),
    output_dir: Path = Option(
        None, help=OUTPUT_DIR_HELP, dir_okay=True, file_okay=False
    ),
    quality: int = Option(95, help=QUALITY_HELP),
    enable_interactive_seg: bool = Option(False, help=INTERACTIVE_SEG_HELP),
    interactive_seg_model: InteractiveSegModel = Option(
        InteractiveSegModel.vit_b, help=INTERACTIVE_SEG_MODEL_HELP
    ),
    interactive_seg_device: Device = Option(Device.cpu),
    enable_remove_bg: bool = Option(False, help=REMOVE_BG_HELP),
    enable_anime_seg: bool = Option(False, help=ANIMESEG_HELP),
    enable_realesrgan: bool = Option(False),
    realesrgan_device: Device = Option(Device.cpu),
    realesrgan_model: str = Option(RealESRGANModel.realesr_general_x4v3),
    enable_gfpgan: bool = Option(False),
    gfpgan_device: Device = Option(Device.cpu),
    enable_restoreformer: bool = Option(False),
    restoreformer_device: Device = Option(Device.cpu),
 ):
    global global_config
    dump_environment_info()
    if input:
        if not input.exists():
            logger.error(f"invalid --input: {input} not exists")
            exit()
        if input.is_dir():
            logger.info(f"Initialize file manager")
            file_manager = FileManager(app)
            app.config["THUMBNAIL_MEDIA_ROOT"] = input
            app.config["THUMBNAIL_MEDIA_THUMBNAIL_ROOT"] = os.path.join(
                output_dir, "lama_cleaner_thumbnails"
            )
            file_manager.output_dir = output_dir
        else:
            global_config.input_image_path = input
    device = check_device(device)
    setup_model_dir(model_dir)
    if local_files_only:
        os.environ["TRANSFORMERS_OFFLINE"] = "1"
        os.environ["HF_HUB_OFFLINE"] = "1"
    if model not in AVAILABLE_MODELS:
        scanned_models = scan_models()
        if model not in [it.name for it in scanned_models]:
            logger.error(
                f"invalid --model: {model} not exists. Available models: {AVAILABLE_MODELS} or {[it.name for it in scanned_models]}"
            )
            exit()
    global_config.image_quality = quality
    global_config.disable_model_switch = disable_model_switch
    global_config.is_desktop = gui
    build_plugins(
        enable_interactive_seg,
        interactive_seg_model,
        interactive_seg_device,
        enable_remove_bg,
        enable_anime_seg,
        enable_realesrgan,
        realesrgan_device,
        realesrgan_model,
        enable_gfpgan,
        gfpgan_device,
        enable_restoreformer,
        restoreformer_device,
        no_half,
    )
    if output_dir:
-        output_dir = os.path.abspath(output_dir)
+        output_dir = output_dir.expanduser().absolute()
-        logger.info(f"Output dir: {output_dir}")
+        logger.info(f"Image will auto save to output dir: {output_dir}")
-        enable_auto_saving = True
+        global_config.output_dir = output_dir
-    device = torch.device(args.device)
+    global_config.model_manager = ModelManager(
-    is_disable_model_switch = args.disable_model_switch
+        name=model,
-    is_desktop = args.gui
+        device=torch.device(device),
-    if is_disable_model_switch:
+        no_half=no_half,
-        logger.info(
+        disable_nsfw=disable_nsfw_checker,
-            f"Start with --disable-model-switch, model switch on frontend is disable"
+        sd_cpu_textencoder=cpu_textencoder,
-        )
+        cpu_offload=cpu_offload,
    if args.input and os.path.isdir(args.input):
        logger.info(f"Initialize file manager")
        thumb = FileManager(app)
        enable_file_manager = True
        app.config["THUMBNAIL_MEDIA_ROOT"] = args.input
        app.config["THUMBNAIL_MEDIA_THUMBNAIL_ROOT"] = os.path.join(
            output_dir, "lama_cleaner_thumbnails"
        )
        thumb.output_dir = Path(output_dir)
        # thumb.start()
        # try:
        #     while True:
        #         time.sleep(1)
        # finally:
        #     thumb.image_dir_observer.stop()
        #     thumb.image_dir_observer.join()
        #     thumb.output_dir_observer.stop()
        #     thumb.output_dir_observer.join()
    else:
        input_image_path = args.input
    # 为了兼容性
    model_name_map = {
        "sd1.5": "runwayml/stable-diffusion-inpainting",
        "anything4": "Sanster/anything-4.0-inpainting",
        "realisticVision1.4": "Sanster/Realistic_Vision_V1.4-inpainting",
        "sd2": "stabilityai/stable-diffusion-2-inpainting",
        "sdxl": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
        "kandinsky2.2": "kandinsky-community/kandinsky-2-2-decoder-inpaint",
        "paint_by_example": "Fantasy-Studio/Paint-by-Example",
        "instruct_pix2pix": "timbrooks/instruct-pix2pix",
    }
    model = ModelManager(
        name=model_name_map.get(args.model, args.model),
        sd_controlnet=args.sd_controlnet,
        sd_controlnet_method=args.sd_controlnet_method,
        device=device,
        no_half=args.no_half,
        hf_access_token=args.hf_access_token,
        disable_nsfw=args.sd_disable_nsfw or args.disable_nsfw,
        sd_cpu_textencoder=args.sd_cpu_textencoder,
        cpu_offload=args.cpu_offload,
        enable_xformers=args.sd_enable_xformers or args.enable_xformers,
        callback=diffuser_callback,
    )
-    if args.gui:
+    if gui:
        app_width, app_height = args.gui_size
        from flaskwebgui import FlaskUI
        ui = FlaskUI(
            app,
            socketio=socketio,
-            width=app_width,
+            width=1200,
-            height=app_height,
+            height=800,
-            host=args.host,
+            host=host,
-            port=args.port,
+            port=port,
-            close_server_on_exit=not args.no_gui_auto_close,
+            close_server_on_exit=True,
            idle_interval=60,
        )
        ui.run()
    else:
        socketio.run(
            app,
-            host=args.host,
+            host=host,
-            port=args.port,
+            port=port,
            debug=args.debug,
            allow_unsafe_werkzeug=True,
        )
--- a/lama_cleaner/tests/test_controlnet.py
+++ b/lama_cleaner/tests/test_controlnet.py
@ -39,7 +39,6 @@ def test_runway_sd_1_5(
        name=model_name,
        sd_controlnet=True,
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=disable_nsfw,
        sd_cpu_textencoder=cpu_textencoder,
        sd_controlnet_method=sd_controlnet_method,
@ -87,7 +86,6 @@ def test_local_file_path(sd_device, sampler):
        name=model_name,
        sd_controlnet=True,
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        cpu_offload=True,
@ -125,7 +123,6 @@ def test_local_file_path_controlnet_native_inpainting(sd_device, sampler):
        name=model_name,
        sd_controlnet=True,
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        cpu_offload=True,
@ -166,7 +163,6 @@ def test_controlnet_switch(sd_device, sampler):
        name=model_name,
        sd_controlnet=True,
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        cpu_offload=True,
--- a/lama_cleaner/tests/test_instruct_pix2pix.py
+++ b/lama_cleaner/tests/test_instruct_pix2pix.py
@ -21,7 +21,6 @@ def test_instruct_pix2pix(disable_nsfw, cpu_offload):
    model = ModelManager(
        name=model_name,
        device=torch.device(device),
        hf_access_token="",
        disable_nsfw=disable_nsfw,
        sd_cpu_textencoder=False,
        cpu_offload=cpu_offload,
@ -52,7 +51,6 @@ def test_instruct_pix2pix_snow(disable_nsfw, cpu_offload):
    model = ModelManager(
        name=model_name,
        device=torch.device(device),
        hf_access_token="",
        disable_nsfw=disable_nsfw,
        sd_cpu_textencoder=False,
        cpu_offload=cpu_offload,
--- a/lama_cleaner/tests/test_model_md5.py
+++ b/lama_cleaner/tests/test_model_md5.py
@ -17,11 +17,9 @@ def test_load_model():
            name=m,
            device="cpu",
            no_half=False,
            hf_access_token="",
            disable_nsfw=False,
            sd_cpu_textencoder=True,
            cpu_offload=True,
            enable_xformers=False,
        )
--- a/lama_cleaner/tests/test_model_switch.py
+++ b/lama_cleaner/tests/test_model_switch.py
@ -16,11 +16,9 @@ def test_model_switch():
        sd_controlnet=True,
        sd_controlnet_method="lllyasviel/control_v11p_sd15_canny",
        device=torch.device("mps"),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=True,
        cpu_offload=False,
        enable_xformers=False,
        callback=None,
    )
@ -34,11 +32,9 @@ def test_controlnet_switch_onoff(caplog):
        sd_controlnet=True,
        sd_controlnet_method="lllyasviel/control_v11p_sd15_canny",
        device=torch.device("mps"),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=True,
        cpu_offload=False,
        enable_xformers=False,
        callback=None,
    )
@ -61,11 +57,9 @@ def test_controlnet_switch_method(caplog):
        sd_controlnet=True,
        sd_controlnet_method=old_method,
        device=torch.device("mps"),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=True,
        cpu_offload=False,
        enable_xformers=False,
        callback=None,
    )
--- a/lama_cleaner/tests/test_outpainting.py
+++ b/lama_cleaner/tests/test_outpainting.py
@ -41,7 +41,6 @@ def test_outpainting(name, sd_device, rect):
    model = ModelManager(
        name=name,
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        callback=callback,
@ -86,7 +85,6 @@ def test_kandinsky_outpainting(name, sd_device, rect):
    model = ModelManager(
        name=name,
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        callback=callback,
--- a/lama_cleaner/tests/test_sd_model.py
+++ b/lama_cleaner/tests/test_sd_model.py
@ -38,7 +38,6 @@ def test_runway_sd_1_5_all_samplers(
    model = ModelManager(
        name="runwayml/stable-diffusion-inpainting",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
    )
@ -69,7 +68,6 @@ def test_runway_sd_lcm_lora(sd_device, strategy, sampler):
    model = ModelManager(
        name="runwayml/stable-diffusion-inpainting",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
    )
@ -102,7 +100,6 @@ def test_runway_sd_freeu(sd_device, strategy, sampler):
    model = ModelManager(
        name="runwayml/stable-diffusion-inpainting",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
    )
@ -136,7 +133,6 @@ def test_runway_sd_sd_strength(sd_device, strategy, sampler):
    model = ModelManager(
        name="runwayml/stable-diffusion-inpainting",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
    )
@ -165,7 +161,6 @@ def test_runway_norm_sd_model(sd_device, strategy, sampler):
    model = ModelManager(
        name="runwayml/stable-diffusion-v1-5",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
    )
@ -192,7 +187,6 @@ def test_runway_sd_1_5_cpu_offload(sd_device, strategy, sampler):
    model = ModelManager(
        name="runwayml/stable-diffusion-inpainting",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        cpu_offload=True,
@ -229,7 +223,6 @@ def test_local_file_path(sd_device, sampler, name):
    model = ModelManager(
        name=name,
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        cpu_offload=False,
--- a/lama_cleaner/tests/test_sdxl.py
+++ b/lama_cleaner/tests/test_sdxl.py
@ -29,7 +29,6 @@ def test_sdxl(sd_device, strategy, sampler):
    model = ModelManager(
        name="diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        callback=callback,
@ -70,7 +69,6 @@ def test_sdxl_lcm_lora_and_freeu(sd_device, strategy, sampler):
    model = ModelManager(
        name="diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
        callback=callback,
@ -131,7 +129,6 @@ def test_sdxl_outpainting(sd_device, rect):
    model = ModelManager(
        name="diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
        device=torch.device(sd_device),
        hf_access_token="",
        disable_nsfw=True,
        sd_cpu_textencoder=False,
    )
--- a/lama_cleaner/web_config.py
+++ b/lama_cleaner/web_config.py
@ -24,7 +24,6 @@ def save_config(
    cpu_offload,
    disable_nsfw,
    sd_cpu_textencoder,
    enable_xformers,
    local_files_only,
    model_dir,
    input,
@ -102,9 +101,6 @@ def main(config_file: str):
                with gr.Column():
                    gui = gr.Checkbox(init_config.gui, label=f"{GUI_HELP}")
                    no_gui_auto_close = gr.Checkbox(
                        init_config.no_gui_auto_close, label=f"{NO_GUI_AUTO_CLOSE_HELP}"
                    )
                with gr.Column():
                    model_dir = gr.Textbox(
@ -193,14 +189,11 @@ def main(config_file: str):
                    init_config.cpu_offload, label=f"{CPU_OFFLOAD_HELP}"
                )
                sd_cpu_textencoder = gr.Checkbox(
-                    init_config.sd_cpu_textencoder, label=f"{SD_CPU_TEXTENCODER_HELP}"
+                    init_config.sd_cpu_textencoder, label=f"{CPU_TEXTENCODER_HELP}"
                )
                disable_nsfw = gr.Checkbox(
                    init_config.disable_nsfw, label=f"{DISABLE_NSFW_HELP}"
                )
                enable_xformers = gr.Checkbox(
                    init_config.enable_xformers, label=f"{ENABLE_XFORMERS_HELP}"
                )
                local_files_only = gr.Checkbox(
                    init_config.local_files_only, label=f"{LOCAL_FILES_ONLY_HELP}"
                )
@ -221,7 +214,6 @@ def main(config_file: str):
                cpu_offload,
                disable_nsfw,
                sd_cpu_textencoder,
                enable_xformers,
                local_files_only,
                model_dir,
                input,
--- a/web_app/src/components/Cropper.tsx
+++ b/web_app/src/components/Cropper.tsx
@ -71,6 +71,8 @@ const Cropper = (props: Props) => {
    setY,
    setWidth,
    setHeight,
    isResizing,
    setIsResizing,
  ] = useStore((state) => [
    state.imageWidth,
    state.imageHeight,
@ -80,9 +82,11 @@ const Cropper = (props: Props) => {
    state.setCropperY,
    state.setCropperWidth,
    state.setCropperHeight,
    state.isCropperExtenderResizing,
    state.setIsCropperExtenderResizing,
  ])
-  const [isResizing, setIsResizing] = useState(false)
+  // const [isResizing, setIsResizing] = useState(false)
  const [isMoving, setIsMoving] = useState(false)
  useEffect(() => {
--- a/web_app/src/components/Editor.tsx
+++ b/web_app/src/components/Editor.tsx
@ -65,6 +65,7 @@ export default function Editor(props: EditorProps) {
    updateAppState,
    runMannually,
    runInpainting,
    isCropperExtenderResizing,
  ] = useStore((state) => [
    state.disableShortCuts,
    state.windowSize,
@ -87,6 +88,7 @@ export default function Editor(props: EditorProps) {
    state.updateAppState,
    state.runMannually(),
    state.runInpainting,
    state.isCropperExtenderResizing,
  ])
  const baseBrushSize = useStore((state) => state.editorState.baseBrushSize)
  const brushSize = useStore((state) => state.getBrushSize())
@ -537,7 +539,7 @@ export default function Editor(props: EditorProps) {
  }
  const toggleShowBrush = (newState: boolean) => {
-    if (newState !== showBrush && !isPanning) {
+    if (newState !== showBrush && !isPanning && !isCropperExtenderResizing) {
      setShowBrush(newState)
    }
  }
@ -693,7 +695,7 @@ export default function Editor(props: EditorProps) {
        limitToBounds={false}
        doubleClick={{ disabled: true }}
        initialScale={minScale}
-        minScale={minScale * 0.6}
+        minScale={minScale * 0.3}
        onPanning={(ref) => {
          if (!panned) {
            setPanned(true)
--- a/web_app/src/components/Extender.tsx
+++ b/web_app/src/components/Extender.tsx
@ -54,6 +54,8 @@ const Extender = (props: Props) => {
    setWidth,
    setHeight,
    extenderDirection,
    isResizing,
    setIsResizing,
  ] = useStore((state) => [
    state.isInpainting,
    state.imageHeight,
@ -64,10 +66,10 @@ const Extender = (props: Props) => {
    state.setExtenderWidth,
    state.setExtenderHeight,
    state.settings.extenderDirection,
    state.isCropperExtenderResizing,
    state.setIsCropperExtenderResizing,
  ])
  const [isResizing, setIsResizing] = useState(false)
  const [evData, setEVData] = useState<EVData>({
    initX: 0,
    initY: 0,
@ -122,10 +124,9 @@ const Extender = (props: Props) => {
    const moveBottom = () => {
      const newHeight = evData.initHeight + offsetY
      let [clampedY, clampedHeight] = clampTopBottom(evData.initY, newHeight)
      if (extenderDirection === EXTENDER_ALL) {
-        if (clampedY + clampedHeight < imageHeight) {
+        if (clampedHeight < Math.abs(clampedY) + imageHeight) {
-          clampedHeight = imageHeight
+          clampedHeight = Math.abs(clampedY) + imageHeight
        }
      }
      setHeight(clampedHeight)
@ -155,8 +156,8 @@ const Extender = (props: Props) => {
      const newWidth = evData.initWidth + offsetX
      let [clampedX, clampedWidth] = clampLeftRight(evData.initX, newWidth)
      if (extenderDirection === EXTENDER_ALL) {
-        if (clampedX + clampedWidth < imageWdith) {
+        if (clampedWidth < Math.abs(clampedX) + imageWdith) {
-          clampedWidth = imageWdith
+          clampedWidth = Math.abs(clampedX) + imageWdith
        }
      }
      setWidth(clampedWidth)
--- a/web_app/src/components/SidePanel.tsx
+++ b/web_app/src/components/SidePanel.tsx
@ -105,18 +105,22 @@ const LabelTitle = ({
          {text}
        </Label>
      </TooltipTrigger>
-      <TooltipContent className="flex flex-col max-w-xs text-sm" side="left">
+      {toolTip ? (
-        <p>{toolTip}</p>
+        <TooltipContent className="flex flex-col max-w-xs text-sm" side="left">
-        {url ? (
+          <p>{toolTip}</p>
-          <Button variant="link" className="justify-end">
+          {url ? (
-            <a href={url} target="_blank">
+            <Button variant="link" className="justify-end">
-              More info
+              <a href={url} target="_blank">
-            </a>
+                More info
-          </Button>
+              </a>
-        ) : (
+            </Button>
-          <></>
+          ) : (
-        )}
+            <></>
-      </TooltipContent>
+          )}
        </TooltipContent>
      ) : (
        <></>
      )}
    </Tooltip>
  )
 }
@ -172,7 +176,11 @@ const SidePanel = () => {
      <div className="flex flex-col gap-4">
        <div className="flex flex-col gap-4">
          <div className="flex justify-between items-center pr-2">
-            <LabelTitle text="Controlnet" />
+            <LabelTitle
              text="ControlNet"
              toolTip="Using an additional conditioning image to control how an image is generated"
              url="https://huggingface.co/docs/diffusers/main/en/using-diffusers/inpaint#controlnet"
            />
            <Switch
              id="controlnet"
              checked={settings.enableControlnet}
@ -271,7 +279,11 @@ const SidePanel = () => {
    return (
      <div className="flex flex-col gap-4">
        <div className="flex justify-between items-center pr-2">
-          <LabelTitle text="Freeu" />
+          <LabelTitle
            text="FreeU"
            toolTip="FreeU is a technique for improving image quality. Different models may require different FreeU-specific hyperparameters, which can be viewed in the more info section."
            url="https://huggingface.co/docs/diffusers/main/en/using-diffusers/freeu"
          />
          <Switch
            id="freeu"
            checked={settings.enableFreeu}
@ -408,7 +420,10 @@ const SidePanel = () => {
    return (
      <div>
        <RowContainer>
-          <div>Example Image</div>
+          <LabelTitle
            text="Example Image"
            toolTip="An example image to guide image generation."
          />
          <ImageUploadButton
            tooltip="Upload example image"
            onFileUpload={(file) => {
@ -450,8 +465,9 @@ const SidePanel = () => {
    return (
      <div className="flex flex-col gap-1">
        <LabelTitle
          htmlFor="image-guidance-scale"
          text="Image guidance scale"
          toolTip="Push the generated image towards the inital image. Higher image guidance scale encourages generated images that are closely linked to the source image, usually at the expense of lower image quality."
          url="https://huggingface.co/docs/diffusers/main/en/api/pipelines/pix2pix"
        />
        <RowContainer>
          <Slider
@ -518,11 +534,17 @@ const SidePanel = () => {
  }
  const renderExtender = () => {
    if (!settings.model.support_outpainting) {
      return null
    }
    return (
      <>
        <div className="flex flex-col gap-4">
          <RowContainer>
-            <LabelTitle text="Extender" />
+            <LabelTitle
              text="Extender"
              toolTip="Perform outpainting on images to expand it's content."
            />
            <Switch
              id="extender"
              checked={settings.showExtender}
@ -709,7 +731,10 @@ const SidePanel = () => {
        >
          <div className="flex flex-col gap-4 mt-4">
            <RowContainer>
-              <LabelTitle text="Cropper" />
+              <LabelTitle
                text="Cropper"
                toolTip="Inpainting on part of image, improve inference speed and reduce memory usage."
              />
              <Switch
                id="cropper"
                checked={settings.showCropper}
@ -725,7 +750,11 @@ const SidePanel = () => {
            {renderExtender()}
            <div className="flex flex-col gap-1">
-              <LabelTitle htmlFor="steps" text="Steps" />
+              <LabelTitle
                htmlFor="steps"
                text="Steps"
                toolTip="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
              />
              <RowContainer>
                <Slider
                  className="w-[180px]"
--- a/web_app/src/lib/states.ts
+++ b/web_app/src/lib/states.ts
@ -150,8 +150,11 @@ type AppState = {
  interactiveSegState: InteractiveSegState
  fileManagerState: FileManagerState
  cropperState: CropperState
  extenderState: CropperState
  isCropperExtenderResizing: bool
  serverConfig: ServerConfig
  settings: Settings
@ -177,6 +180,7 @@ type AppAction = {
  setExtenderY: (newValue: number) => void
  setExtenderWidth: (newValue: number) => void
  setExtenderHeight: (newValue: number) => void
  setIsCropperExtenderResizing: (newValue: boolean) => void
  updateExtenderDirection: (newValue: string) => void
  resetExtender: (width: number, height: number) => void
  updateExtenderByBuiltIn: (direction: string, scale: number) => void
@ -261,6 +265,7 @@ const defaultValues: AppState = {
    width: 512,
    height: 512,
  },
  isCropperExtenderResizing: false,
  fileManagerState: {
    sortBy: SortBy.CTIME,
@ -889,6 +894,11 @@ export const useStore = createWithEqualityFn<AppState & AppAction>()(
          state.extenderState.height = newValue
        }),
      setIsCropperExtenderResizing: (newValue: boolean) =>
        set((state) => {
          state.isCropperExtenderResizing = newValue
        }),
      updateExtenderDirection: (newValue: string) => {
        console.log(
          `updateExtenderDirection: ${JSON.stringify(get().extenderState)}`