From 0d57e552cfbbcef7114e28a979408549f4e459b9 Mon Sep 17 00:00:00 2001 From: Qing Date: Thu, 29 Sep 2022 09:42:19 +0800 Subject: [PATCH 1/6] add sd-disable-nsfw arg --- lama_cleaner/model/sd.py | 36 +++++++++++++++++++++++++++++++++++- lama_cleaner/parse_args.py | 5 +++++ lama_cleaner/server.py | 1 + 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/lama_cleaner/model/sd.py b/lama_cleaner/model/sd.py index c7a3389..1bc47e0 100644 --- a/lama_cleaner/model/sd.py +++ b/lama_cleaner/model/sd.py @@ -6,6 +6,7 @@ import numpy as np import torch from diffusers import PNDMScheduler, DDIMScheduler from loguru import logger +from transformers import FeatureExtractionMixin, ImageFeatureExtractionMixin from lama_cleaner.helper import norm_img @@ -38,19 +39,52 @@ from lama_cleaner.schema import Config, SDSampler # mask = torch.from_numpy(mask) # return mask +class DummyFeatureExtractorOutput: + def __init__(self, pixel_values): + self.pixel_values = pixel_values + + def to(self, device): + return self + + +class DummyFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def __call__(self, *args, **kwargs): + return DummyFeatureExtractorOutput(torch.empty(0, 3)) + + +class DummySafetyChecker: + def __init__(self, *args, **kwargs): + pass + + def __call__(self, clip_input, images): + return images, False + class SD(InpaintModel): - pad_mod = 64 # current diffusers only support 64 https://github.com/huggingface/diffusers/pull/505 + pad_mod = 64 # current diffusers only support 64 https://github.com/huggingface/diffusers/pull/505 min_size = 512 def init_model(self, device: torch.device, **kwargs): from .sd_pipeline import StableDiffusionInpaintPipeline + model_kwargs = {} + sd_disable_nsfw = kwargs.pop('sd_disable_nsfw', False) + if sd_disable_nsfw: + logger.info("Disable Stable Diffusion Model NSFW checker") + model_kwargs.update(dict( + feature_extractor=DummyFeatureExtractor(), + safety_checker=DummySafetyChecker(), + )) + self.model = StableDiffusionInpaintPipeline.from_pretrained( self.model_id_or_path, revision="fp16" if torch.cuda.is_available() else "main", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, use_auth_token=kwargs["hf_access_token"], + **model_kwargs ) # https://huggingface.co/docs/diffusers/v0.3.0/en/api/pipelines/stable_diffusion#diffusers.StableDiffusionInpaintPipeline.enable_attention_slicing self.model.enable_attention_slicing() diff --git a/lama_cleaner/parse_args.py b/lama_cleaner/parse_args.py index f4b720e..359621b 100644 --- a/lama_cleaner/parse_args.py +++ b/lama_cleaner/parse_args.py @@ -17,6 +17,11 @@ def parse_args(): default="", help="huggingface access token. Check how to get token from: https://huggingface.co/docs/hub/security-tokens", ) + parser.add_argument( + "--sd-disable-nsfw", + action="store_true", + help="disable stable diffusion nsfw checker", + ) parser.add_argument("--device", default="cuda", type=str, choices=["cuda", "cpu"]) parser.add_argument("--gui", action="store_true", help="Launch as desktop app") parser.add_argument( diff --git a/lama_cleaner/server.py b/lama_cleaner/server.py index 6397c0b..0fd8119 100644 --- a/lama_cleaner/server.py +++ b/lama_cleaner/server.py @@ -218,6 +218,7 @@ def main(args): name=args.model, device=device, hf_access_token=args.hf_access_token, + sd_disable_nsfw=args.sd_disable_nsfw, callbacks=[diffuser_callback], ) From ec7b2d8e2d7a6c764bed927269639830fc3292c6 Mon Sep 17 00:00:00 2001 From: Qing Date: Thu, 29 Sep 2022 12:20:55 +0800 Subject: [PATCH 2/6] add sd-cpu-textencoder args --- lama_cleaner/model/sd.py | 8 ++++++-- lama_cleaner/parse_args.py | 9 +++++++-- lama_cleaner/server.py | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lama_cleaner/model/sd.py b/lama_cleaner/model/sd.py index 1bc47e0..8aaeab8 100644 --- a/lama_cleaner/model/sd.py +++ b/lama_cleaner/model/sd.py @@ -71,8 +71,7 @@ class SD(InpaintModel): from .sd_pipeline import StableDiffusionInpaintPipeline model_kwargs = {} - sd_disable_nsfw = kwargs.pop('sd_disable_nsfw', False) - if sd_disable_nsfw: + if kwargs['sd_disable_nsfw']: logger.info("Disable Stable Diffusion Model NSFW checker") model_kwargs.update(dict( feature_extractor=DummyFeatureExtractor(), @@ -89,6 +88,11 @@ class SD(InpaintModel): # https://huggingface.co/docs/diffusers/v0.3.0/en/api/pipelines/stable_diffusion#diffusers.StableDiffusionInpaintPipeline.enable_attention_slicing self.model.enable_attention_slicing() self.model = self.model.to(device) + + if kwargs['sd_cpu_textencoder']: + logger.info("Run Stable Diffusion TextEncoder on CPU") + self.model.text_encoder = self.model.text_encoder.to(torch.device('cpu')) + self.callbacks = kwargs.pop("callbacks", None) @torch.cuda.amp.autocast() diff --git a/lama_cleaner/parse_args.py b/lama_cleaner/parse_args.py index 359621b..b876dde 100644 --- a/lama_cleaner/parse_args.py +++ b/lama_cleaner/parse_args.py @@ -15,12 +15,17 @@ def parse_args(): parser.add_argument( "--hf_access_token", default="", - help="huggingface access token. Check how to get token from: https://huggingface.co/docs/hub/security-tokens", + help="Huggingface access token. Check how to get token from: https://huggingface.co/docs/hub/security-tokens", ) parser.add_argument( "--sd-disable-nsfw", action="store_true", - help="disable stable diffusion nsfw checker", + help="Disable Stable Diffusion nsfw checker", + ) + parser.add_argument( + "--sd-cpu-textencoder", + action="store_true", + help="Always run Stable Diffusion TextEncoder model on CPU", ) parser.add_argument("--device", default="cuda", type=str, choices=["cuda", "cpu"]) parser.add_argument("--gui", action="store_true", help="Launch as desktop app") diff --git a/lama_cleaner/server.py b/lama_cleaner/server.py index 0fd8119..8fcd332 100644 --- a/lama_cleaner/server.py +++ b/lama_cleaner/server.py @@ -219,6 +219,7 @@ def main(args): device=device, hf_access_token=args.hf_access_token, sd_disable_nsfw=args.sd_disable_nsfw, + sd_cpu_textencoder=args.sd_cpu_textencoder, callbacks=[diffuser_callback], ) From 4d6cec0ca2f83592ecf20b988a8b73254682966e Mon Sep 17 00:00:00 2001 From: Qing Date: Thu, 29 Sep 2022 13:13:09 +0800 Subject: [PATCH 3/6] add run-sd-local arg --- README.md | 23 +++++++++++++---------- lama_cleaner/model/sd.py | 2 +- lama_cleaner/parse_args.py | 9 +++++++-- lama_cleaner/server.py | 1 + 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 013e700..04fffca 100644 --- a/README.md +++ b/README.md @@ -57,16 +57,19 @@ lama-cleaner --model=lama --device=cpu --port=8080 Available arguments: -| Name | Description | Default | -| ----------------- | -------------------------------------------------------------------------------------------------------- | -------- | -| --model | lama/ldm/zits/mat/fcf/sd. See details in [Inpaint Model](#inpainting-model) | lama | -| --hf_access_token | stable-diffusion(sd) model need huggingface access token https://huggingface.co/docs/hub/security-tokens | | -| --device | cuda or cpu | cuda | -| --port | Port for backend flask web server | 8080 | -| --gui | Launch lama-cleaner as a desktop application | | -| --gui_size | Set the window size for the application | 1200 900 | -| --input | Path to image you want to load by default | None | -| --debug | Enable debug mode for flask web server | | +| Name | Description | Default | +|-------------------|-------------------------------------------------------------------------------------------------------------------------------| -------- | +| --model | lama/ldm/zits/mat/fcf/sd1.4 See details in [Inpaint Model](#inpainting-model) | lama | +| --hf_access_token | stable-diffusion(sd) model need [huggingface access token](https://huggingface.co/docs/hub/security-tokens) to download model | | +| --sd-run-local | Once the model as downloaded, you can pass this arg and remove `--hf_access_token` | | +| --sd-disable-nsfw | Disable stable-diffusion NSFW checker. | | +| --sd-cpu-textencoder | Always run stable-diffusion TextEncoder model on CPU. | | +| --device | cuda or cpu | cuda | +| --port | Port for backend flask web server | 8080 | +| --gui | Launch lama-cleaner as a desktop application | | +| --gui_size | Set the window size for the application | 1200 900 | +| --input | Path to image you want to load by default | None | +| --debug | Enable debug mode for flask web server | | ## Inpainting Model diff --git a/lama_cleaner/model/sd.py b/lama_cleaner/model/sd.py index 8aaeab8..a1cd881 100644 --- a/lama_cleaner/model/sd.py +++ b/lama_cleaner/model/sd.py @@ -70,7 +70,7 @@ class SD(InpaintModel): def init_model(self, device: torch.device, **kwargs): from .sd_pipeline import StableDiffusionInpaintPipeline - model_kwargs = {} + model_kwargs = {"local_files_only": kwargs['sd_run_local']} if kwargs['sd_disable_nsfw']: logger.info("Disable Stable Diffusion Model NSFW checker") model_kwargs.update(dict( diff --git a/lama_cleaner/parse_args.py b/lama_cleaner/parse_args.py index b876dde..e96bb64 100644 --- a/lama_cleaner/parse_args.py +++ b/lama_cleaner/parse_args.py @@ -20,13 +20,18 @@ def parse_args(): parser.add_argument( "--sd-disable-nsfw", action="store_true", - help="Disable Stable Diffusion nsfw checker", + help="Disable Stable Diffusion NSFW checker", ) parser.add_argument( "--sd-cpu-textencoder", action="store_true", help="Always run Stable Diffusion TextEncoder model on CPU", ) + parser.add_argument( + "--sd-run-local", + action="store_true", + help="After first time Stable Diffusion model downloaded, you can add this arg and remove --hf_access_token", + ) parser.add_argument("--device", default="cuda", type=str, choices=["cuda", "cpu"]) parser.add_argument("--gui", action="store_true", help="Launch as desktop app") parser.add_argument( @@ -48,7 +53,7 @@ def parse_args(): if imghdr.what(args.input) is None: parser.error(f"invalid --input: {args.input} is not a valid image file") - if args.model.startswith("sd"): + if args.model.startswith("sd") and not args.sd_run_local: if not args.hf_access_token.startswith("hf_"): parser.error( f"sd(stable-diffusion) model requires huggingface access token. Check how to get token from: https://huggingface.co/docs/hub/security-tokens" diff --git a/lama_cleaner/server.py b/lama_cleaner/server.py index 8fcd332..c72c782 100644 --- a/lama_cleaner/server.py +++ b/lama_cleaner/server.py @@ -220,6 +220,7 @@ def main(args): hf_access_token=args.hf_access_token, sd_disable_nsfw=args.sd_disable_nsfw, sd_cpu_textencoder=args.sd_cpu_textencoder, + sd_run_local=args.sd_run_local, callbacks=[diffuser_callback], ) From db1d7d5c4882c6a2c82bed39fa921094c9d5e7ed Mon Sep 17 00:00:00 2001 From: Qing Date: Thu, 29 Sep 2022 21:56:33 +0800 Subject: [PATCH 4/6] add sd run local test --- lama_cleaner/tests/test_model.py | 40 ++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/lama_cleaner/tests/test_model.py b/lama_cleaner/tests/test_model.py index accdae3..cd70ad7 100644 --- a/lama_cleaner/tests/test_model.py +++ b/lama_cleaner/tests/test_model.py @@ -158,7 +158,7 @@ def test_fcf(strategy): @pytest.mark.parametrize("strategy", [HDStrategy.ORIGINAL]) @pytest.mark.parametrize("sampler", [SDSampler.ddim, SDSampler.pndm]) -def test_sd(strategy, sampler, capfd): +def test_sd(strategy, sampler): def callback(step: int): print(f"sd_step_{step}") @@ -184,6 +184,38 @@ def test_sd(strategy, sampler, capfd): mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask_blur.png", ) - # captured = capfd.readouterr() - # for i in range(sd_steps): - # assert f'sd_step_{i}' in captured.out + +@pytest.mark.parametrize("strategy", [HDStrategy.ORIGINAL]) +@pytest.mark.parametrize("sampler", [SDSampler.ddim]) +def test_sd_run_local(strategy, sampler): + def callback(step: int): + print(f"sd_step_{step}") + + sd_steps = 50 + model = ModelManager( + name="sd1.4", + device=device, + hf_access_token=None, + sd_run_local=True, + sd_disable_nsfw=True, + sd_cpu_textencoder=True, + callbacks=[callback] + ) + cfg = get_config(strategy, prompt='a cat sitting on a bench', sd_steps=sd_steps) + cfg.sd_sampler = sampler + + assert_equal( + model, + cfg, + f"sd_{strategy.capitalize()}_{sampler}_local_result.png", + img_p=current_dir / "overture-creations-5sI6fQgYIuo.png", + mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask.png", + ) + + assert_equal( + model, + cfg, + f"sd_{strategy.capitalize()}_{sampler}_blur_mask_local_result.png", + img_p=current_dir / "overture-creations-5sI6fQgYIuo.png", + mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask_blur.png", + ) From dba7b01da7dcd4116d10d5de33d862b483a308dd Mon Sep 17 00:00:00 2001 From: Qing Date: Fri, 30 Sep 2022 21:39:23 +0800 Subject: [PATCH 5/6] enable text_encoder cpu --- lama_cleaner/model/sd.py | 3 ++- lama_cleaner/model/sd_pipeline.py | 7 ++++--- lama_cleaner/tests/test_model.py | 19 ++++++++++++++----- requirements.txt | 2 +- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/lama_cleaner/model/sd.py b/lama_cleaner/model/sd.py index a1cd881..d9745d4 100644 --- a/lama_cleaner/model/sd.py +++ b/lama_cleaner/model/sd.py @@ -91,7 +91,8 @@ class SD(InpaintModel): if kwargs['sd_cpu_textencoder']: logger.info("Run Stable Diffusion TextEncoder on CPU") - self.model.text_encoder = self.model.text_encoder.to(torch.device('cpu')) + self.model.text_encoder = self.model.text_encoder.to(torch.device('cpu'), non_blocking=True) + self.model.text_encoder = self.model.text_encoder.to(torch.float32, non_blocking=True ) self.callbacks = kwargs.pop("callbacks", None) diff --git a/lama_cleaner/model/sd_pipeline.py b/lama_cleaner/model/sd_pipeline.py index 6616406..827a383 100644 --- a/lama_cleaner/model/sd_pipeline.py +++ b/lama_cleaner/model/sd_pipeline.py @@ -236,7 +236,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): truncation=True, return_tensors="pt", ) - text_embeddings = self.text_encoder(text_input.input_ids.to(self.device))[0] + text_encoder_device = self.text_encoder.device + + text_embeddings = self.text_encoder(text_input.input_ids.to(text_encoder_device, non_blocking=True))[0].to(self.device, non_blocking=True) # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` @@ -248,7 +250,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): uncond_input = self.tokenizer( [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt" ) - uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0] + uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(text_encoder_device, non_blocking=True))[0].to(self.device, non_blocking=True) # For classifier free guidance, we need to do two forward passes. # Here we concatenate the unconditional and text embeddings into a single batch @@ -269,7 +271,6 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): for i, t in tqdm(enumerate(self.scheduler.timesteps[t_start:])): # expand the latents if we are doing classifier free guidance latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - # predict the noise residual noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample diff --git a/lama_cleaner/tests/test_model.py b/lama_cleaner/tests/test_model.py index cd70ad7..f2eac96 100644 --- a/lama_cleaner/tests/test_model.py +++ b/lama_cleaner/tests/test_model.py @@ -9,6 +9,8 @@ from lama_cleaner.model_manager import ModelManager from lama_cleaner.schema import Config, HDStrategy, LDMSampler, SDSampler current_dir = Path(__file__).parent.absolute().resolve() +save_dir = current_dir / 'result' +save_dir.mkdir(exist_ok=True, parents=True) device = 'cuda' if torch.cuda.is_available() else 'cpu' @@ -40,7 +42,7 @@ def assert_equal(model, config, gt_name, fx=1, fy=1, img_p=current_dir / "image. img, mask = get_data(fx=fx, fy=fy, img_p=img_p, mask_p=mask_p) res = model(img, mask, config) cv2.imwrite( - str(current_dir / gt_name), + str(save_dir / gt_name), res, [int(cv2.IMWRITE_JPEG_QUALITY), 100, int(cv2.IMWRITE_PNG_COMPRESSION), 0], ) @@ -163,7 +165,12 @@ def test_sd(strategy, sampler): print(f"sd_step_{step}") sd_steps = 50 - model = ModelManager(name="sd1.4", device=device, hf_access_token=os.environ['HF_ACCESS_TOKEN'], + model = ModelManager(name="sd1.4", + device=device, + hf_access_token=os.environ['HF_ACCESS_TOKEN'], + sd_run_local=False, + sd_disable_nsfw=False, + sd_cpu_textencoder=False, callbacks=[callback]) cfg = get_config(strategy, prompt='a cat sitting on a bench', sd_steps=sd_steps) cfg.sd_sampler = sampler @@ -187,7 +194,8 @@ def test_sd(strategy, sampler): @pytest.mark.parametrize("strategy", [HDStrategy.ORIGINAL]) @pytest.mark.parametrize("sampler", [SDSampler.ddim]) -def test_sd_run_local(strategy, sampler): +@pytest.mark.parametrize("disable_nsfw", [True, False]) +def test_sd_run_local(strategy, sampler, disable_nsfw): def callback(step: int): print(f"sd_step_{step}") @@ -195,11 +203,11 @@ def test_sd_run_local(strategy, sampler): model = ModelManager( name="sd1.4", device=device, + # hf_access_token=os.environ.get('HF_ACCESS_TOKEN', None), hf_access_token=None, sd_run_local=True, - sd_disable_nsfw=True, + sd_disable_nsfw=disable_nsfw, sd_cpu_textencoder=True, - callbacks=[callback] ) cfg = get_config(strategy, prompt='a cat sitting on a bench', sd_steps=sd_steps) cfg.sd_sampler = sampler @@ -219,3 +227,4 @@ def test_sd_run_local(strategy, sampler): img_p=current_dir / "overture-creations-5sI6fQgYIuo.png", mask_p=current_dir / "overture-creations-5sI6fQgYIuo_mask_blur.png", ) + diff --git a/requirements.txt b/requirements.txt index b029bb5..4715cee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -torch>=1.8.2 +torch>=1.9.0 opencv-python flask_cors flask==1.1.4 From 1e398de639e151dc73bc6afe6de9a7f63a145b0d Mon Sep 17 00:00:00 2001 From: Qing Date: Fri, 30 Sep 2022 22:44:32 +0800 Subject: [PATCH 6/6] 0.21.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a1bb0ab..84d7ca8 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def load_requirements(): # https://setuptools.readthedocs.io/en/latest/setuptools.html#including-data-files setuptools.setup( name="lama-cleaner", - version="0.20.1", + version="0.21.0", author="PanicByte", author_email="cwq1913@gmail.com", description="Image inpainting tool powered by SOTA AI Model",